From 1715b6359c1ae37f24d6774f0bcd73b6bf839eaa Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 6 Oct 2023 16:14:54 -0300 Subject: perf beauty socket/prctl_option: Cope with extended regexp complaint by grep Noticed on fedora 38, the extended regexp that so far was ok for both grep and sed now gets complaints by grep, that says '/' doesn't need to be escaped with '\'. So stop using '/' in sed, use '%' instead and remove the \ before / in the common extended regexp. Link: https://x.com/SMT_Solvers/status/1710380010098344192?s=20 Cc: Adrian Hunter Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Link: https://lore.kernel.org/lkml/ZUEddFPTJHVLhH%2F6@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/trace/beauty/prctl_option.sh | 4 ++-- tools/perf/trace/beauty/socket.sh | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/perf/trace/beauty/prctl_option.sh b/tools/perf/trace/beauty/prctl_option.sh index 8059342ca412..9455d9672f14 100755 --- a/tools/perf/trace/beauty/prctl_option.sh +++ b/tools/perf/trace/beauty/prctl_option.sh @@ -4,9 +4,9 @@ [ $# -eq 1 ] && header_dir=$1 || header_dir=tools/include/uapi/linux/ printf "static const char *prctl_options[] = {\n" -regex='^#define[[:space:]]{1}PR_(\w+)[[:space:]]*([[:xdigit:]]+)([[:space:]]*\/.*)?$' +regex='^#define[[:space:]]{1}PR_(\w+)[[:space:]]*([[:xdigit:]]+)([[:space:]]*/.*)?$' grep -E $regex ${header_dir}/prctl.h | grep -v PR_SET_PTRACER | \ - sed -r "s/$regex/\2 \1/g" | \ + sed -E "s%$regex%\2 \1%g" | \ sort -n | xargs printf "\t[%s] = \"%s\",\n" printf "};\n" diff --git a/tools/perf/trace/beauty/socket.sh b/tools/perf/trace/beauty/socket.sh index 8bc7ba62203e..670c6db298ae 100755 --- a/tools/perf/trace/beauty/socket.sh +++ b/tools/perf/trace/beauty/socket.sh @@ -18,10 +18,10 @@ grep -E $ipproto_regex ${uapi_header_dir}/in.h | \ printf "};\n\n" printf "static const char *socket_level[] = {\n" -socket_level_regex='^#define[[:space:]]+SOL_(\w+)[[:space:]]+([[:digit:]]+)([[:space:]]+\/.*)?' +socket_level_regex='^#define[[:space:]]+SOL_(\w+)[[:space:]]+([[:digit:]]+)([[:space:]]+/.*)?' grep -E $socket_level_regex ${beauty_header_dir}/socket.h | \ - sed -r "s/$socket_level_regex/\2 \1/g" | \ + sed -E "s%$socket_level_regex%\2 \1%g" | \ sort -n | xargs printf "\t[%s] = \"%s\",\n" printf "};\n\n" -- cgit v1.2.3 From c8e3ade38bc6545faece71cc6c642ad744d4cea3 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 6 Oct 2023 18:11:05 -0300 Subject: perf tests make: Remove the last egrep call, use 'grep -E' instead One last case, caught while testing with amazonlinux:2, centos:stream, etc: 4 7.28 amazonlinux:2 : FAIL egrep: warning: egrep is obsolescent; using grep -E gcc version 7.3.1 20180712 (Red Hat 7.3.1-17) (GCC) 8 13.87 centos:stream : FAIL egrep: warning: egrep is obsolescent; using grep -E Reviewed-by: Guilherme Amadio Cc: Adrian Hunter Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Link: https://lore.kernel.org/lkml/ZUEdtblE8qDAQkBK@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/make | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/tests/make b/tools/perf/tests/make index d9945ed25bc5..8a4da7eb637a 100644 --- a/tools/perf/tests/make +++ b/tools/perf/tests/make @@ -183,7 +183,7 @@ run += make_install_prefix_slash # run += make_install_pdf run += make_minimal -old_libbpf := $(shell echo '\#include ' | $(CC) -E -dM -x c -| egrep -q "define[[:space:]]+LIBBPF_MAJOR_VERSION[[:space:]]+0{1}") +old_libbpf := $(shell echo '\#include ' | $(CC) -E -dM -x c -| grep -q -E "define[[:space:]]+LIBBPF_MAJOR_VERSION[[:space:]]+0{1}") ifneq ($(old_libbpf),) run += make_libbpf_dynamic -- cgit v1.2.3 From 851bbccf6b0c152d98ecf0ec83d75fc97aebf43c Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 11 Oct 2023 18:14:52 -0300 Subject: perf build: Warn about missing libelf before warning about missing libbpf As libelf is a requirement for libbpf if it is not available, as in some container build tests where NO_LIBELF=1 is used, then better warn about the most basic library first. Ditto for libz, check its availability before libbpf too. Cc: Adrian Hunter Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Link: https://lore.kernel.org/lkml/ZUEehyDk0FkPnvMR@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile.config | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index b3e6ed10f40c..8b6cffbc4858 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -680,15 +680,15 @@ ifndef BUILD_BPF_SKEL endif ifeq ($(BUILD_BPF_SKEL),1) - ifeq ($(filter -DHAVE_LIBBPF_SUPPORT, $(CFLAGS)),) - dummy := $(warning Warning: Disabled BPF skeletons as libbpf is required) - BUILD_BPF_SKEL := 0 - else ifeq ($(filter -DHAVE_LIBELF_SUPPORT, $(CFLAGS)),) + ifeq ($(filter -DHAVE_LIBELF_SUPPORT, $(CFLAGS)),) dummy := $(warning Warning: Disabled BPF skeletons as libelf is required by bpftool) BUILD_BPF_SKEL := 0 else ifeq ($(filter -DHAVE_ZLIB_SUPPORT, $(CFLAGS)),) dummy := $(warning Warning: Disabled BPF skeletons as zlib is required by bpftool) BUILD_BPF_SKEL := 0 + else ifeq ($(filter -DHAVE_LIBBPF_SUPPORT, $(CFLAGS)),) + dummy := $(warning Warning: Disabled BPF skeletons as libbpf is required) + BUILD_BPF_SKEL := 0 else ifeq ($(call get-executable,$(CLANG)),) dummy := $(warning Warning: Disabled BPF skeletons as clang ($(CLANG)) is missing) BUILD_BPF_SKEL := 0 -- cgit v1.2.3 From 76db7aab1fca6688ddf9f388157521c442e0ffb8 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Wed, 25 Oct 2023 13:16:24 -0700 Subject: tools headers UAPI: Sync include/uapi/linux/perf_event.h header with the kernel Sync the new sample type for the branch counters feature. Signed-off-by: Kan Liang Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Alexey Bayduraev Cc: Andi Kleen Cc: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Tinghao Zhang Link: https://lore.kernel.org/r/20231025201626.3000228-6-kan.liang@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/linux/perf_event.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'tools') diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h index 39c6a250dd1b..3a64499b0f5d 100644 --- a/tools/include/uapi/linux/perf_event.h +++ b/tools/include/uapi/linux/perf_event.h @@ -204,6 +204,8 @@ enum perf_branch_sample_type_shift { PERF_SAMPLE_BRANCH_PRIV_SAVE_SHIFT = 18, /* save privilege mode */ + PERF_SAMPLE_BRANCH_COUNTERS_SHIFT = 19, /* save occurrences of events on a branch */ + PERF_SAMPLE_BRANCH_MAX_SHIFT /* non-ABI */ }; @@ -235,6 +237,8 @@ enum perf_branch_sample_type { PERF_SAMPLE_BRANCH_PRIV_SAVE = 1U << PERF_SAMPLE_BRANCH_PRIV_SAVE_SHIFT, + PERF_SAMPLE_BRANCH_COUNTERS = 1U << PERF_SAMPLE_BRANCH_COUNTERS_SHIFT, + PERF_SAMPLE_BRANCH_MAX = 1U << PERF_SAMPLE_BRANCH_MAX_SHIFT, }; @@ -982,6 +986,12 @@ enum perf_event_type { * { u64 nr; * { u64 hw_idx; } && PERF_SAMPLE_BRANCH_HW_INDEX * { u64 from, to, flags } lbr[nr]; + * # + * # The format of the counters is decided by the + * # "branch_counter_nr" and "branch_counter_width", + * # which are defined in the ABI. + * # + * { u64 counters; } cntr[nr] && PERF_SAMPLE_BRANCH_COUNTERS * } && PERF_SAMPLE_BRANCH_STACK * * { u64 abi; # enum perf_sample_regs_abi @@ -1427,6 +1437,9 @@ struct perf_branch_entry { reserved:31; }; +/* Size of used info bits in struct perf_branch_entry */ +#define PERF_BRANCH_ENTRY_INFO_BITS_MAX 33 + union perf_sample_weight { __u64 full; #if defined(__LITTLE_ENDIAN_BITFIELD) -- cgit v1.2.3 From ac9cd7245fffa0fc053afce3b345469e5afa533a Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Wed, 25 Oct 2023 13:16:25 -0700 Subject: perf header: Support num and width of branch counters To support the branch counters feature, the information of the maximum number of supported counters and the width of the counters is exposed in the sysfs caps folder. The perf tool can use the information to parse the logged counters in each branch. Store the information in the perf_env for later usage. Reviewed-by: Ian Rogers Signed-off-by: Kan Liang Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Alexey Bayduraev Cc: Andi Kleen Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Tinghao Zhang Link: https://lore.kernel.org/r/20231025201626.3000228-7-kan.liang@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/env.h | 5 +++++ tools/perf/util/header.c | 18 +++++++++++++++--- 2 files changed, 20 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/env.h b/tools/perf/util/env.h index 4566c51f2fd9..48d7f8759a2a 100644 --- a/tools/perf/util/env.h +++ b/tools/perf/util/env.h @@ -46,6 +46,9 @@ struct hybrid_node { struct pmu_caps { int nr_caps; unsigned int max_branches; + unsigned int br_cntr_nr; + unsigned int br_cntr_width; + char **caps; char *pmu_name; }; @@ -62,6 +65,8 @@ struct perf_env { unsigned long long total_mem; unsigned int msr_pmu_type; unsigned int max_branches; + unsigned int br_cntr_nr; + unsigned int br_cntr_width; int kernel_is_64_bit; int nr_cmdline; diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index e86b9439ffee..eeb96a1b63a7 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -3259,7 +3259,9 @@ static int process_compressed(struct feat_fd *ff, } static int __process_pmu_caps(struct feat_fd *ff, int *nr_caps, - char ***caps, unsigned int *max_branches) + char ***caps, unsigned int *max_branches, + unsigned int *br_cntr_nr, + unsigned int *br_cntr_width) { char *name, *value, *ptr; u32 nr_pmu_caps, i; @@ -3294,6 +3296,12 @@ static int __process_pmu_caps(struct feat_fd *ff, int *nr_caps, if (!strcmp(name, "branches")) *max_branches = atoi(value); + if (!strcmp(name, "branch_counter_nr")) + *br_cntr_nr = atoi(value); + + if (!strcmp(name, "branch_counter_width")) + *br_cntr_width = atoi(value); + free(value); free(name); } @@ -3318,7 +3326,9 @@ static int process_cpu_pmu_caps(struct feat_fd *ff, { int ret = __process_pmu_caps(ff, &ff->ph->env.nr_cpu_pmu_caps, &ff->ph->env.cpu_pmu_caps, - &ff->ph->env.max_branches); + &ff->ph->env.max_branches, + &ff->ph->env.br_cntr_nr, + &ff->ph->env.br_cntr_width); if (!ret && !ff->ph->env.cpu_pmu_caps) pr_debug("cpu pmu capabilities not available\n"); @@ -3347,7 +3357,9 @@ static int process_pmu_caps(struct feat_fd *ff, void *data __maybe_unused) for (i = 0; i < nr_pmu; i++) { ret = __process_pmu_caps(ff, &pmu_caps[i].nr_caps, &pmu_caps[i].caps, - &pmu_caps[i].max_branches); + &pmu_caps[i].max_branches, + &pmu_caps[i].br_cntr_nr, + &pmu_caps[i].br_cntr_width); if (ret) goto err; -- cgit v1.2.3 From ef5b6a542b1dbb718226a5f8208be09ef405983d Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 8 Nov 2023 04:40:35 -0500 Subject: selftests: kvm/s390x: use vm_create_barebones() This function does the same but makes it clearer why one would use the "____"-prefixed version of vm_create(). Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/s390x/cmma_test.c | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/kvm/s390x/cmma_test.c b/tools/testing/selftests/kvm/s390x/cmma_test.c index c8e0a6495a63..626a2b8a2037 100644 --- a/tools/testing/selftests/kvm/s390x/cmma_test.c +++ b/tools/testing/selftests/kvm/s390x/cmma_test.c @@ -94,11 +94,6 @@ static void guest_dirty_test_data(void) ); } -static struct kvm_vm *create_vm(void) -{ - return ____vm_create(VM_MODE_DEFAULT); -} - static void create_main_memslot(struct kvm_vm *vm) { int i; @@ -157,7 +152,7 @@ static struct kvm_vm *create_vm_two_memslots(void) { struct kvm_vm *vm; - vm = create_vm(); + vm = vm_create_barebones(); create_memslots(vm); @@ -276,7 +271,7 @@ static void assert_exit_was_hypercall(struct kvm_vcpu *vcpu) static void test_migration_mode(void) { - struct kvm_vm *vm = create_vm(); + struct kvm_vm *vm = vm_create_barebones(); struct kvm_vcpu *vcpu; u64 orig_psw; int rc; @@ -670,7 +665,7 @@ struct testdef { */ static int machine_has_cmma(void) { - struct kvm_vm *vm = create_vm(); + struct kvm_vm *vm = vm_create_barebones(); int r; r = !__kvm_has_device_attr(vm->fd, KVM_S390_VM_MEM_CTRL, KVM_S390_VM_MEM_ENABLE_CMMA); -- cgit v1.2.3 From 9fbb4b02302b0ae618303565025412070d32f85e Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Wed, 25 Oct 2023 13:16:26 -0700 Subject: perf tools: Add branch counter knob Add a new branch filter, "counter", for the branch counter option. It is used to mark the events which should be logged in the branch. If it is applied with the -j option, the counters of all the events should be logged in the branch. If the legacy kernel doesn't support the new branch sample type, switching off the branch counter filter. The stored counter values in each branch are displayed right after the regular branch stack information via perf report -D. Usage examples: # perf record -e "{branch-instructions,branch-misses}:S" -j any,counter Only the first event, branch-instructions, collect the LBR. Both branch-instructions and branch-misses are marked as logged events. The occurrences information of them can be found in the branch stack extension space of each branch. # perf record -e "{cpu/branch-instructions,branch_type=any/,cpu/branch-misses,branch_type=counter/}" Only the first event, branch-instructions, collect the LBR. Only the branch-misses event is marked as a logged event. Committer notes: I noticed 'perf test "Sample parsing"' failing, reported to the list and Kan provided a patch that checks if the evsel has a leader and that evsel->evlist is set, the comment in the source code further explains it. Reviewed-by: Ian Rogers Signed-off-by: Kan Liang Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Alexey Bayduraev Cc: Andi Kleen Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Tinghao Zhang Link: https://lore.kernel.org/r/20231025201626.3000228-8-kan.liang@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-record.txt | 4 ++++ tools/perf/util/evsel.c | 35 ++++++++++++++++++++++++++++++- tools/perf/util/evsel.h | 1 + tools/perf/util/parse-branch-options.c | 1 + tools/perf/util/perf_event_attr_fprintf.c | 1 + tools/perf/util/sample.h | 1 + tools/perf/util/session.c | 15 +++++++++++-- 7 files changed, 55 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt index 1889f66addf2..6015fdd08fb6 100644 --- a/tools/perf/Documentation/perf-record.txt +++ b/tools/perf/Documentation/perf-record.txt @@ -445,6 +445,10 @@ following filters are defined: 4th-Gen Xeon+ server), the save branch type is unconditionally enabled when the taken branch stack sampling is enabled. - priv: save privilege state during sampling in case binary is not available later + - counter: save occurrences of the event since the last branch entry. Currently, the + feature is only supported by a newer CPU, e.g., Intel Sierra Forest and + later platforms. An error out is expected if it's used on the unsupported + kernel or CPUs. + The option requires at least one branch type among any, any_call, any_ret, ind_call, cond. diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 72a5dfc38d38..a5da74e3a517 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -1832,6 +1832,8 @@ static int __evsel__prepare_open(struct evsel *evsel, struct perf_cpu_map *cpus, static void evsel__disable_missing_features(struct evsel *evsel) { + if (perf_missing_features.branch_counters) + evsel->core.attr.branch_sample_type &= ~PERF_SAMPLE_BRANCH_COUNTERS; if (perf_missing_features.read_lost) evsel->core.attr.read_format &= ~PERF_FORMAT_LOST; if (perf_missing_features.weight_struct) { @@ -1885,7 +1887,12 @@ bool evsel__detect_missing_features(struct evsel *evsel) * Must probe features in the order they were added to the * perf_event_attr interface. */ - if (!perf_missing_features.read_lost && + if (!perf_missing_features.branch_counters && + (evsel->core.attr.branch_sample_type & PERF_SAMPLE_BRANCH_COUNTERS)) { + perf_missing_features.branch_counters = true; + pr_debug2("switching off branch counters support\n"); + return true; + } else if (!perf_missing_features.read_lost && (evsel->core.attr.read_format & PERF_FORMAT_LOST)) { perf_missing_features.read_lost = true; pr_debug2("switching off PERF_FORMAT_LOST support\n"); @@ -2318,6 +2325,22 @@ u64 evsel__bitfield_swap_branch_flags(u64 value) return new_val; } +static inline bool evsel__has_branch_counters(const struct evsel *evsel) +{ + struct evsel *cur, *leader = evsel__leader(evsel); + + /* The branch counters feature only supports group */ + if (!leader || !evsel->evlist) + return false; + + evlist__for_each_entry(evsel->evlist, cur) { + if ((leader == evsel__leader(cur)) && + (cur->core.attr.branch_sample_type & PERF_SAMPLE_BRANCH_COUNTERS)) + return true; + } + return false; +} + int evsel__parse_sample(struct evsel *evsel, union perf_event *event, struct perf_sample *data) { @@ -2551,6 +2574,16 @@ int evsel__parse_sample(struct evsel *evsel, union perf_event *event, OVERFLOW_CHECK(array, sz, max_size); array = (void *)array + sz; + + if (evsel__has_branch_counters(evsel)) { + OVERFLOW_CHECK_u64(array); + + data->branch_stack_cntr = (u64 *)array; + sz = data->branch_stack->nr * sizeof(u64); + + OVERFLOW_CHECK(array, sz, max_size); + array = (void *)array + sz; + } } if (type & PERF_SAMPLE_REGS_USER) { diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index d791316a1792..f19ac9f027ef 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -191,6 +191,7 @@ struct perf_missing_features { bool code_page_size; bool weight_struct; bool read_lost; + bool branch_counters; }; extern struct perf_missing_features perf_missing_features; diff --git a/tools/perf/util/parse-branch-options.c b/tools/perf/util/parse-branch-options.c index fd67d204d720..f7f7aff3d85a 100644 --- a/tools/perf/util/parse-branch-options.c +++ b/tools/perf/util/parse-branch-options.c @@ -36,6 +36,7 @@ static const struct branch_mode branch_modes[] = { BRANCH_OPT("stack", PERF_SAMPLE_BRANCH_CALL_STACK), BRANCH_OPT("hw_index", PERF_SAMPLE_BRANCH_HW_INDEX), BRANCH_OPT("priv", PERF_SAMPLE_BRANCH_PRIV_SAVE), + BRANCH_OPT("counter", PERF_SAMPLE_BRANCH_COUNTERS), BRANCH_END }; diff --git a/tools/perf/util/perf_event_attr_fprintf.c b/tools/perf/util/perf_event_attr_fprintf.c index 2247991451f3..8f04d3b7f3ec 100644 --- a/tools/perf/util/perf_event_attr_fprintf.c +++ b/tools/perf/util/perf_event_attr_fprintf.c @@ -55,6 +55,7 @@ static void __p_branch_sample_type(char *buf, size_t size, u64 value) bit_name(COND), bit_name(CALL_STACK), bit_name(IND_JUMP), bit_name(CALL), bit_name(NO_FLAGS), bit_name(NO_CYCLES), bit_name(TYPE_SAVE), bit_name(HW_INDEX), bit_name(PRIV_SAVE), + bit_name(COUNTERS), { .name = NULL, } }; #undef bit_name diff --git a/tools/perf/util/sample.h b/tools/perf/util/sample.h index c92ad0f51ecd..70b2c3135555 100644 --- a/tools/perf/util/sample.h +++ b/tools/perf/util/sample.h @@ -113,6 +113,7 @@ struct perf_sample { void *raw_data; struct ip_callchain *callchain; struct branch_stack *branch_stack; + u64 *branch_stack_cntr; struct regs_dump user_regs; struct regs_dump intr_regs; struct stack_dump user_stack; diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 1e9aa8ed15b6..4a094ab0362b 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -1150,9 +1150,13 @@ static void callchain__printf(struct evsel *evsel, i, callchain->ips[i]); } -static void branch_stack__printf(struct perf_sample *sample, bool callstack) +static void branch_stack__printf(struct perf_sample *sample, + struct evsel *evsel) { struct branch_entry *entries = perf_sample__branch_entries(sample); + bool callstack = evsel__has_branch_callstack(evsel); + u64 *branch_stack_cntr = sample->branch_stack_cntr; + struct perf_env *env = evsel__env(evsel); uint64_t i; if (!callstack) { @@ -1194,6 +1198,13 @@ static void branch_stack__printf(struct perf_sample *sample, bool callstack) } } } + + if (branch_stack_cntr) { + printf("... branch stack counters: nr:%" PRIu64 " (counter width: %u max counter nr:%u)\n", + sample->branch_stack->nr, env->br_cntr_width, env->br_cntr_nr); + for (i = 0; i < sample->branch_stack->nr; i++) + printf("..... %2"PRIu64": %016" PRIx64 "\n", i, branch_stack_cntr[i]); + } } static void regs_dump__printf(u64 mask, u64 *regs, const char *arch) @@ -1355,7 +1366,7 @@ static void dump_sample(struct evsel *evsel, union perf_event *event, callchain__printf(evsel, sample); if (evsel__has_br_stack(evsel)) - branch_stack__printf(sample, evsel__has_branch_callstack(evsel)); + branch_stack__printf(sample, evsel); if (sample_type & PERF_SAMPLE_REGS_USER) regs_user__printf(sample, arch); -- cgit v1.2.3 From 7ff7b7afe364af17362f2a08cccdc79905feb0bc Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 3 Oct 2023 08:49:11 +0100 Subject: perf tools: Fix spelling mistake "parametrized" -> "parameterized" There are spelling mistakes in comments and a pr_debug message. Fix them. Reviewed-by: Athira Jajeev Reviewed-by: Ian Rogers Signed-off-by: Colin Ian King Cc: Alexander Shishkin Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mark Rutland Cc: Peter Zijlstra Cc: kernel-janitors@vger.kernel.org Link: https://lore.kernel.org/r/20231003074911.220216-1-colin.i.king@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/parse-events.c | 4 ++-- tools/perf/tests/shell/stat_all_pmu.sh | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/perf/tests/parse-events.c b/tools/perf/tests/parse-events.c index f78be21a5999..e52f45c7c3d1 100644 --- a/tools/perf/tests/parse-events.c +++ b/tools/perf/tests/parse-events.c @@ -2549,7 +2549,7 @@ static int test__pmu_events(struct test_suite *test __maybe_unused, int subtest if (strchr(ent->d_name, '.')) continue; - /* exclude parametrized ones (name contains '?') */ + /* exclude parameterized ones (name contains '?') */ n = snprintf(pmu_event, sizeof(pmu_event), "%s%s", path, ent->d_name); if (n >= PATH_MAX) { pr_err("pmu event name crossed PATH_MAX(%d) size\n", PATH_MAX); @@ -2578,7 +2578,7 @@ static int test__pmu_events(struct test_suite *test __maybe_unused, int subtest fclose(file); if (is_event_parameterized == 1) { - pr_debug("skipping parametrized PMU event: %s which contains ?\n", pmu_event); + pr_debug("skipping parameterized PMU event: %s which contains ?\n", pmu_event); continue; } diff --git a/tools/perf/tests/shell/stat_all_pmu.sh b/tools/perf/tests/shell/stat_all_pmu.sh index c77955419173..d2a3506e0d19 100755 --- a/tools/perf/tests/shell/stat_all_pmu.sh +++ b/tools/perf/tests/shell/stat_all_pmu.sh @@ -4,7 +4,7 @@ set -e -# Test all PMU events; however exclude parametrized ones (name contains '?') +# Test all PMU events; however exclude parameterized ones (name contains '?') for p in $(perf list --raw-dump pmu | sed 's/[[:graph:]]\+?[[:graph:]]\+[[:space:]]//g'); do echo "Testing $p" result=$(perf stat -e "$p" true 2>&1) -- cgit v1.2.3 From 1a27fc01700fbff2f205000edf0d1d315b5f85cc Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Thu, 2 Nov 2023 10:56:44 -0700 Subject: perf record: Lazy load kernel symbols Commit 5b7ba82a75915e73 ("perf symbols: Load kernel maps before using") changed it so that loading a kernel DSO would cause the symbols for the DSO to be eagerly loaded. For 'perf record' this is overhead as the symbols won't be used. Add a field to 'struct symbol_conf' to control the behavior and disable it for 'perf record' and 'perf inject'. Reviewed-by: Adrian Hunter Signed-off-by: Ian Rogers Acked-by: Namhyung Kim Cc: Alexander Shishkin Cc: Andi Kleen Cc: Athira Jajeev Cc: Changbin Du Cc: Colin Ian King Cc: Dmitrii Dolgov <9erthalion6@gmail.com> Cc: German Gomez Cc: Huacai Chen Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: K Prateek Nayak Cc: Kajol Jain Cc: Kan Liang Cc: Leo Yan Cc: Li Dong Cc: Liam Howlett Cc: Mark Rutland Cc: Masami Hiramatsu Cc: Miguel Ojeda Cc: Ming Wang Cc: Nick Terrell Cc: Paolo Bonzini Cc: Peter Zijlstra Cc: Ravi Bangoria Cc: Sandipan Das Cc: Sean Christopherson Cc: Steinar H. Gunderson Cc: Vincent Whitchurch Cc: Wenyu Liu Cc: Yang Jihong Link: https://lore.kernel.org/r/20231102175735.2272696-3-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-inject.c | 6 ++++++ tools/perf/builtin-record.c | 2 ++ tools/perf/util/event.c | 4 ++-- tools/perf/util/symbol_conf.h | 3 ++- 4 files changed, 12 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c index c8cf2fdd9cff..eb3ef5c24b66 100644 --- a/tools/perf/builtin-inject.c +++ b/tools/perf/builtin-inject.c @@ -2265,6 +2265,12 @@ int cmd_inject(int argc, const char **argv) "perf inject []", NULL }; + + if (!inject.itrace_synth_opts.set) { + /* Disable eager loading of kernel symbols that adds overhead to perf inject. */ + symbol_conf.lazy_load_kernel_maps = true; + } + #ifndef HAVE_JITDUMP set_option_nobuild(options, 'j', "jit", "NO_LIBELF=1", true); #endif diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index dcf288a4fb9a..8ec818568662 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -3989,6 +3989,8 @@ int cmd_record(int argc, const char **argv) # undef set_nobuild #endif + /* Disable eager loading of kernel symbols that adds overhead to perf record. */ + symbol_conf.lazy_load_kernel_maps = true; rec->opts.affinity = PERF_AFFINITY_SYS; rec->evlist = evlist__new(); diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index 923c0fb15122..68f45e9e63b6 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -617,13 +617,13 @@ struct map *thread__find_map(struct thread *thread, u8 cpumode, u64 addr, if (cpumode == PERF_RECORD_MISC_KERNEL && perf_host) { al->level = 'k'; maps = machine__kernel_maps(machine); - load_map = true; + load_map = !symbol_conf.lazy_load_kernel_maps; } else if (cpumode == PERF_RECORD_MISC_USER && perf_host) { al->level = '.'; } else if (cpumode == PERF_RECORD_MISC_GUEST_KERNEL && perf_guest) { al->level = 'g'; maps = machine__kernel_maps(machine); - load_map = true; + load_map = !symbol_conf.lazy_load_kernel_maps; } else if (cpumode == PERF_RECORD_MISC_GUEST_USER && perf_guest) { al->level = 'u'; } else { diff --git a/tools/perf/util/symbol_conf.h b/tools/perf/util/symbol_conf.h index 0b589570d1d0..2b2fb9e224b0 100644 --- a/tools/perf/util/symbol_conf.h +++ b/tools/perf/util/symbol_conf.h @@ -42,7 +42,8 @@ struct symbol_conf { inline_name, disable_add2line_warn, buildid_mmap2, - guest_code; + guest_code, + lazy_load_kernel_maps; const char *vmlinux_name, *kallsyms_name, *source_prefix, -- cgit v1.2.3 From 9ffa6c7512ca7aaeb30e596e2c247cb1fae7123a Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Thu, 2 Nov 2023 10:56:47 -0700 Subject: perf machine thread: Remove exited threads by default 'struct thread' values hold onto references to mmaps, DSOs, etc. When a thread exits it is necessary to clean all of this memory up by removing the thread from the machine's threads. Some tools require this doesn't happen, such as auxtrace events, 'perf report' if offcpu events exist or if a task list is being generated, so add a 'struct symbol_conf' member to make the behavior optional. When an exited thread is left in the machine's threads, mark it as exited. This change relates to commit 40826c45eb0b8856 ("perf thread: Remove notion of dead threads") . Dead threads were removed as they had a reference count of 0 and were difficult to reason about with the reference count checker. Here a thread is removed from threads when it exits, unless via symbol_conf the exited thread isn't remove and is marked as exited. Reference counting behaves as it normally does. Reviewed-by: Adrian Hunter Signed-off-by: Ian Rogers Cc: Alexander Shishkin Cc: Andi Kleen Cc: Athira Jajeev Cc: Changbin Du Cc: Colin Ian King Cc: Dmitrii Dolgov <9erthalion6@gmail.com> Cc: German Gomez Cc: Huacai Chen Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: K Prateek Nayak Cc: Kajol Jain Cc: Kan Liang Cc: Leo Yan Cc: Li Dong Cc: Liam Howlett Cc: Mark Rutland Cc: Masami Hiramatsu Cc: Miguel Ojeda Cc: Ming Wang Cc: Namhyung Kim Cc: Nick Terrell Cc: Paolo Bonzini Cc: Peter Zijlstra Cc: Ravi Bangoria Cc: Sandipan Das Cc: Sean Christopherson Cc: Steinar H. Gunderson Cc: Vincent Whitchurch Cc: Wenyu Liu Cc: Yang Jihong Link: https://lore.kernel.org/r/20231102175735.2272696-6-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-report.c | 7 +++++++ tools/perf/util/machine.c | 10 +++++++--- tools/perf/util/session.c | 5 +++++ tools/perf/util/symbol_conf.h | 3 ++- tools/perf/util/thread.h | 14 ++++++++++++++ 5 files changed, 35 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 9cb1da2dc0c0..121a2781323c 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -1426,6 +1426,13 @@ int cmd_report(int argc, const char **argv) if (ret < 0) goto exit; + /* + * tasks_mode require access to exited threads to list those that are in + * the data file. Off-cpu events are synthesized after other events and + * reference exited threads. + */ + symbol_conf.keep_exited_threads = true; + annotation_options__init(&report.annotation_opts); ret = perf_config(report__config, &report); diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index 90c750150b19..a985d004aa8d 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -2157,9 +2157,13 @@ int machine__process_exit_event(struct machine *machine, union perf_event *event if (dump_trace) perf_event__fprintf_task(event, stdout); - if (thread != NULL) - thread__put(thread); - + if (thread != NULL) { + if (symbol_conf.keep_exited_threads) + thread__set_exited(thread, /*exited=*/true); + else + machine__remove_thread(machine, thread); + } + thread__put(thread); return 0; } diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 4a094ab0362b..199d3e8df315 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -115,6 +115,11 @@ static int perf_session__open(struct perf_session *session, int repipe_fd) return -1; } + if (perf_header__has_feat(&session->header, HEADER_AUXTRACE)) { + /* Auxiliary events may reference exited threads, hold onto dead ones. */ + symbol_conf.keep_exited_threads = true; + } + if (perf_data__is_pipe(data)) return 0; diff --git a/tools/perf/util/symbol_conf.h b/tools/perf/util/symbol_conf.h index 2b2fb9e224b0..6040286e07a6 100644 --- a/tools/perf/util/symbol_conf.h +++ b/tools/perf/util/symbol_conf.h @@ -43,7 +43,8 @@ struct symbol_conf { disable_add2line_warn, buildid_mmap2, guest_code, - lazy_load_kernel_maps; + lazy_load_kernel_maps, + keep_exited_threads; const char *vmlinux_name, *kallsyms_name, *source_prefix, diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h index e79225a0ea46..0df775b5c110 100644 --- a/tools/perf/util/thread.h +++ b/tools/perf/util/thread.h @@ -36,13 +36,22 @@ struct thread_rb_node { }; DECLARE_RC_STRUCT(thread) { + /** @maps: mmaps associated with this thread. */ struct maps *maps; pid_t pid_; /* Not all tools update this */ + /** @tid: thread ID number unique to a machine. */ pid_t tid; + /** @ppid: parent process of the process this thread belongs to. */ pid_t ppid; int cpu; int guest_cpu; /* For QEMU thread */ refcount_t refcnt; + /** + * @exited: Has the thread had an exit event. Such threads are usually + * removed from the machine's threads but some events/tools require + * access to dead threads. + */ + bool exited; bool comm_set; int comm_len; struct list_head namespaces_list; @@ -189,6 +198,11 @@ static inline refcount_t *thread__refcnt(struct thread *thread) return &RC_CHK_ACCESS(thread)->refcnt; } +static inline void thread__set_exited(struct thread *thread, bool exited) +{ + RC_CHK_ACCESS(thread)->exited = exited; +} + static inline bool thread__comm_set(const struct thread *thread) { return RC_CHK_ACCESS(thread)->comm_set; -- cgit v1.2.3 From 89d5c48c34c8a552acd9a2e3ee504b2f06879fea Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Fri, 3 Nov 2023 12:55:41 -0700 Subject: perf test: Simplify "object code reading" test It tries cycles (or cpu-clock on s390) event with exclude_kernel bit to open. But other arch on a VM can fail with the hardware event and need to fallback to the software event in the same way. So let's get rid of the cpuid check and use generic fallback mechanism using an array of event candidates. Now event in the odd index excludes the kernel so use that for the return value. Reviewed-by: Adrian Hunter Signed-off-by: Namhyung Kim Tested-by: James Clark Cc: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Peter Zijlstra Cc: Thomas Richter Link: https://lore.kernel.org/r/20231103195541.67788-1-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/code-reading.c | 76 +++++++++++++---------------------------- 1 file changed, 23 insertions(+), 53 deletions(-) (limited to 'tools') diff --git a/tools/perf/tests/code-reading.c b/tools/perf/tests/code-reading.c index 3af81012014e..d6e845c57902 100644 --- a/tools/perf/tests/code-reading.c +++ b/tools/perf/tests/code-reading.c @@ -511,38 +511,6 @@ static void fs_something(void) } } -#ifdef __s390x__ -#include "header.h" // for get_cpuid() -#endif - -static const char *do_determine_event(bool excl_kernel) -{ - const char *event = excl_kernel ? "cycles:u" : "cycles"; - -#ifdef __s390x__ - char cpuid[128], model[16], model_c[16], cpum_cf_v[16]; - unsigned int family; - int ret, cpum_cf_a; - - if (get_cpuid(cpuid, sizeof(cpuid))) - goto out_clocks; - ret = sscanf(cpuid, "%*[^,],%u,%[^,],%[^,],%[^,],%x", &family, model_c, - model, cpum_cf_v, &cpum_cf_a); - if (ret != 5) /* Not available */ - goto out_clocks; - if (excl_kernel && (cpum_cf_a & 4)) - return event; - if (!excl_kernel && (cpum_cf_a & 2)) - return event; - - /* Fall through: missing authorization */ -out_clocks: - event = excl_kernel ? "cpu-clock:u" : "cpu-clock"; - -#endif - return event; -} - static void do_something(void) { fs_something(); @@ -583,8 +551,10 @@ static int do_test_code_reading(bool try_kcore) int err = -1, ret; pid_t pid; struct map *map; - bool have_vmlinux, have_kcore, excl_kernel = false; + bool have_vmlinux, have_kcore; struct dso *dso; + const char *events[] = { "cycles", "cycles:u", "cpu-clock", "cpu-clock:u", NULL }; + int evidx = 0; pid = getpid(); @@ -618,7 +588,7 @@ static int do_test_code_reading(bool try_kcore) /* No point getting kernel events if there is no kernel object */ if (!have_vmlinux && !have_kcore) - excl_kernel = true; + evidx++; threads = thread_map__new_by_tid(pid); if (!threads) { @@ -646,7 +616,7 @@ static int do_test_code_reading(bool try_kcore) goto out_put; } - while (1) { + while (events[evidx]) { const char *str; evlist = evlist__new(); @@ -657,7 +627,7 @@ static int do_test_code_reading(bool try_kcore) perf_evlist__set_maps(&evlist->core, cpus, threads); - str = do_determine_event(excl_kernel); + str = events[evidx]; pr_debug("Parsing event '%s'\n", str); ret = parse_event(evlist, str); if (ret < 0) { @@ -675,32 +645,32 @@ static int do_test_code_reading(bool try_kcore) ret = evlist__open(evlist); if (ret < 0) { - if (!excl_kernel) { - excl_kernel = true; - /* - * Both cpus and threads are now owned by evlist - * and will be freed by following perf_evlist__set_maps - * call. Getting reference to keep them alive. - */ - perf_cpu_map__get(cpus); - perf_thread_map__get(threads); - perf_evlist__set_maps(&evlist->core, NULL, NULL); - evlist__delete(evlist); - evlist = NULL; - continue; - } + evidx++; - if (verbose > 0) { + if (events[evidx] == NULL && verbose > 0) { char errbuf[512]; evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf)); pr_debug("perf_evlist__open() failed!\n%s\n", errbuf); } - goto out_put; + /* + * Both cpus and threads are now owned by evlist + * and will be freed by following perf_evlist__set_maps + * call. Getting reference to keep them alive. + */ + perf_cpu_map__get(cpus); + perf_thread_map__get(threads); + perf_evlist__set_maps(&evlist->core, NULL, NULL); + evlist__delete(evlist); + evlist = NULL; + continue; } break; } + if (events[evidx] == NULL) + goto out_put; + ret = evlist__mmap(evlist, UINT_MAX); if (ret < 0) { pr_debug("evlist__mmap failed\n"); @@ -721,7 +691,7 @@ static int do_test_code_reading(bool try_kcore) err = TEST_CODE_READING_NO_KERNEL_OBJ; else if (!have_vmlinux && !try_kcore) err = TEST_CODE_READING_NO_VMLINUX; - else if (excl_kernel) + else if (strstr(events[evidx], ":u")) err = TEST_CODE_READING_NO_ACCESS; else err = TEST_CODE_READING_OK; -- cgit v1.2.3 From de2c7eb59c342d1a61124caaf2993e325a9becb7 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Fri, 3 Nov 2023 12:19:03 -0700 Subject: perf annotate: Split branch stack cycles information out of 'struct annotation_line' The cycles info is used only when branch stack is provided. Separate them from 'struct annotation_line' into a separate struct and lazy allocate them to save some memory. Committer notes: Make annotation__compute_ipc() check if the lazy allocation works, bailing out if so, its callers already do error checking and propagation. Signed-off-by: Namhyung Kim Cc: Adrian Hunter Cc: Christophe JAILLET Cc: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20231103191907.54531-2-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/browsers/annotate.c | 2 +- tools/perf/util/annotate.c | 61 ++++++++++++++++++++++++++++----------- tools/perf/util/annotate.h | 15 ++++++---- 3 files changed, 54 insertions(+), 24 deletions(-) (limited to 'tools') diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c index ccdb2cd11fbf..d2470f87344d 100644 --- a/tools/perf/ui/browsers/annotate.c +++ b/tools/perf/ui/browsers/annotate.c @@ -337,7 +337,7 @@ static void annotate_browser__calc_percent(struct annotate_browser *browser, max_percent = percent; } - if (max_percent < 0.01 && pos->al.ipc == 0) { + if (max_percent < 0.01 && (!pos->al.cycles || pos->al.cycles->ipc == 0)) { RB_CLEAR_NODE(&pos->al.rb_node); continue; } diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 82956adf9963..99ff3bb9cad8 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -1100,8 +1100,8 @@ static void annotation__count_and_fill(struct annotation *notes, u64 start, u64 for (offset = start; offset <= end; offset++) { struct annotation_line *al = notes->offsets[offset]; - if (al && al->ipc == 0.0) { - al->ipc = ipc; + if (al && al->cycles && al->cycles->ipc == 0.0) { + al->cycles->ipc = ipc; cover_insn++; } } @@ -1114,12 +1114,13 @@ static void annotation__count_and_fill(struct annotation *notes, u64 start, u64 } } -void annotation__compute_ipc(struct annotation *notes, size_t size) +static int annotation__compute_ipc(struct annotation *notes, size_t size) { + int err = 0; s64 offset; if (!notes->src || !notes->src->cycles_hist) - return; + return 0; notes->total_insn = annotation__count_insn(notes, 0, size - 1); notes->hit_cycles = 0; @@ -1134,18 +1135,39 @@ void annotation__compute_ipc(struct annotation *notes, size_t size) if (ch && ch->cycles) { struct annotation_line *al; + al = notes->offsets[offset]; + if (al && al->cycles == NULL) { + al->cycles = zalloc(sizeof(*al->cycles)); + if (al->cycles == NULL) { + err = ENOMEM; + break; + } + } if (ch->have_start) annotation__count_and_fill(notes, ch->start, offset, ch); - al = notes->offsets[offset]; if (al && ch->num_aggr) { - al->cycles = ch->cycles_aggr / ch->num_aggr; - al->cycles_max = ch->cycles_max; - al->cycles_min = ch->cycles_min; + al->cycles->avg = ch->cycles_aggr / ch->num_aggr; + al->cycles->max = ch->cycles_max; + al->cycles->min = ch->cycles_min; } notes->have_cycles = true; } } + + if (err) { + while (++offset < (s64)size) { + struct cyc_hist *ch = ¬es->src->cycles_hist[offset]; + + if (ch && ch->cycles) { + struct annotation_line *al = notes->offsets[offset]; + if (al) + zfree(&al->cycles); + } + } + } + annotation__unlock(notes); + return 0; } int addr_map_symbol__inc_samples(struct addr_map_symbol *ams, struct perf_sample *sample, @@ -1225,6 +1247,7 @@ static void annotation_line__exit(struct annotation_line *al) { zfree_srcline(&al->path); zfree(&al->line); + zfree(&al->cycles); } static size_t disasm_line_size(int nr) @@ -3083,8 +3106,8 @@ static void __annotation_line__write(struct annotation_line *al, struct annotati int printed; if (first_line && (al->offset == -1 || percent_max == 0.0)) { - if (notes->have_cycles) { - if (al->ipc == 0.0 && al->cycles == 0) + if (notes->have_cycles && al->cycles) { + if (al->cycles->ipc == 0.0 && al->cycles->avg == 0) show_title = true; } else show_title = true; @@ -3121,17 +3144,17 @@ static void __annotation_line__write(struct annotation_line *al, struct annotati } if (notes->have_cycles) { - if (al->ipc) - obj__printf(obj, "%*.2f ", ANNOTATION__IPC_WIDTH - 1, al->ipc); + if (al->cycles && al->cycles->ipc) + obj__printf(obj, "%*.2f ", ANNOTATION__IPC_WIDTH - 1, al->cycles->ipc); else if (!show_title) obj__printf(obj, "%*s", ANNOTATION__IPC_WIDTH, " "); else obj__printf(obj, "%*s ", ANNOTATION__IPC_WIDTH - 1, "IPC"); if (!notes->options->show_minmax_cycle) { - if (al->cycles) + if (al->cycles && al->cycles->avg) obj__printf(obj, "%*" PRIu64 " ", - ANNOTATION__CYCLES_WIDTH - 1, al->cycles); + ANNOTATION__CYCLES_WIDTH - 1, al->cycles->avg); else if (!show_title) obj__printf(obj, "%*s", ANNOTATION__CYCLES_WIDTH, " "); @@ -3145,8 +3168,8 @@ static void __annotation_line__write(struct annotation_line *al, struct annotati scnprintf(str, sizeof(str), "%" PRIu64 "(%" PRIu64 "/%" PRIu64 ")", - al->cycles, al->cycles_min, - al->cycles_max); + al->cycles->avg, al->cycles->min, + al->cycles->max); obj__printf(obj, "%*s ", ANNOTATION__MINMAX_CYCLES_WIDTH - 1, @@ -3264,7 +3287,11 @@ int symbol__annotate2(struct map_symbol *ms, struct evsel *evsel, annotation__set_offsets(notes, size); annotation__mark_jump_targets(notes, sym); - annotation__compute_ipc(notes, size); + + err = annotation__compute_ipc(notes, size); + if (err) + goto out_free_offsets; + annotation__init_column_widths(notes, sym); notes->nr_events = nr_pcnt; diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h index 962780559176..19bc2f039175 100644 --- a/tools/perf/util/annotate.h +++ b/tools/perf/util/annotate.h @@ -130,6 +130,13 @@ struct annotation_data { struct sym_hist_entry he; }; +struct cycles_info { + float ipc; + u64 avg; + u64 max; + u64 min; +}; + struct annotation_line { struct list_head node; struct rb_node rb_node; @@ -137,12 +144,9 @@ struct annotation_line { char *line; int line_nr; char *fileloc; - int jump_sources; - float ipc; - u64 cycles; - u64 cycles_max; - u64 cycles_min; char *path; + struct cycles_info *cycles; + int jump_sources; u32 idx; int idx_asm; int data_nr; @@ -325,7 +329,6 @@ static inline bool annotation_line__filter(struct annotation_line *al, struct an } void annotation__set_offsets(struct annotation *notes, s64 size); -void annotation__compute_ipc(struct annotation *notes, size_t size); void annotation__mark_jump_targets(struct annotation *notes, struct symbol *sym); void annotation__update_column_widths(struct annotation *notes); void annotation__init_column_widths(struct annotation *notes, struct symbol *sym); -- cgit v1.2.3 From b7f87e32590bf48eca84f729d3422be7b8dc22d3 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Fri, 3 Nov 2023 12:19:04 -0700 Subject: perf annotate: Split branch stack cycles info from 'struct annotation' The cycles info is only meaningful when sample has branch stacks. To save the memory for normal cases, move those fields to a new 'struct annotated_branch' and dynamically allocate it when needed. Also move cycles_hist from annotated_source as it's related here. Signed-off-by: Namhyung Kim Cc: Adrian Hunter Cc: Christophe JAILLET Cc: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20231103191907.54531-3-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/annotate.c | 99 ++++++++++++++++++++++++-------------------- tools/perf/util/annotate.h | 17 ++++---- tools/perf/util/block-info.c | 4 +- tools/perf/util/sort.c | 14 +++---- 4 files changed, 73 insertions(+), 61 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 99ff3bb9cad8..4e40c94c85d1 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -810,7 +810,6 @@ static __maybe_unused void annotated_source__delete(struct annotated_source *src if (src == NULL) return; zfree(&src->histograms); - zfree(&src->cycles_hist); free(src); } @@ -845,18 +844,6 @@ static int annotated_source__alloc_histograms(struct annotated_source *src, return src->histograms ? 0 : -1; } -/* The cycles histogram is lazily allocated. */ -static int symbol__alloc_hist_cycles(struct symbol *sym) -{ - struct annotation *notes = symbol__annotation(sym); - const size_t size = symbol__size(sym); - - notes->src->cycles_hist = calloc(size, sizeof(struct cyc_hist)); - if (notes->src->cycles_hist == NULL) - return -1; - return 0; -} - void symbol__annotate_zero_histograms(struct symbol *sym) { struct annotation *notes = symbol__annotation(sym); @@ -865,9 +852,10 @@ void symbol__annotate_zero_histograms(struct symbol *sym) if (notes->src != NULL) { memset(notes->src->histograms, 0, notes->src->nr_histograms * notes->src->sizeof_sym_hist); - if (notes->src->cycles_hist) - memset(notes->src->cycles_hist, 0, - symbol__size(sym) * sizeof(struct cyc_hist)); + } + if (notes->branch && notes->branch->cycles_hist) { + memset(notes->branch->cycles_hist, 0, + symbol__size(sym) * sizeof(struct cyc_hist)); } annotation__unlock(notes); } @@ -958,23 +946,33 @@ static int __symbol__inc_addr_samples(struct map_symbol *ms, return 0; } +static struct annotated_branch *annotation__get_branch(struct annotation *notes) +{ + if (notes == NULL) + return NULL; + + if (notes->branch == NULL) + notes->branch = zalloc(sizeof(*notes->branch)); + + return notes->branch; +} + static struct cyc_hist *symbol__cycles_hist(struct symbol *sym) { struct annotation *notes = symbol__annotation(sym); + struct annotated_branch *branch; - if (notes->src == NULL) { - notes->src = annotated_source__new(); - if (notes->src == NULL) - return NULL; - goto alloc_cycles_hist; - } + branch = annotation__get_branch(notes); + if (branch == NULL) + return NULL; + + if (branch->cycles_hist == NULL) { + const size_t size = symbol__size(sym); - if (!notes->src->cycles_hist) { -alloc_cycles_hist: - symbol__alloc_hist_cycles(sym); + branch->cycles_hist = calloc(size, sizeof(struct cyc_hist)); } - return notes->src->cycles_hist; + return branch->cycles_hist; } struct annotated_source *symbol__hists(struct symbol *sym, int nr_hists) @@ -1083,6 +1081,14 @@ static unsigned annotation__count_insn(struct annotation *notes, u64 start, u64 return n_insn; } +static void annotated_branch__delete(struct annotated_branch *branch) +{ + if (branch) { + zfree(&branch->cycles_hist); + free(branch); + } +} + static void annotation__count_and_fill(struct annotation *notes, u64 start, u64 end, struct cyc_hist *ch) { unsigned n_insn; @@ -1091,6 +1097,7 @@ static void annotation__count_and_fill(struct annotation *notes, u64 start, u64 n_insn = annotation__count_insn(notes, start, end); if (n_insn && ch->num && ch->cycles) { + struct annotated_branch *branch; float ipc = n_insn / ((double)ch->cycles / (double)ch->num); /* Hide data when there are too many overlaps. */ @@ -1106,10 +1113,11 @@ static void annotation__count_and_fill(struct annotation *notes, u64 start, u64 } } - if (cover_insn) { - notes->hit_cycles += ch->cycles; - notes->hit_insn += n_insn * ch->num; - notes->cover_insn += cover_insn; + branch = annotation__get_branch(notes); + if (cover_insn && branch) { + branch->hit_cycles += ch->cycles; + branch->hit_insn += n_insn * ch->num; + branch->cover_insn += cover_insn; } } } @@ -1119,19 +1127,19 @@ static int annotation__compute_ipc(struct annotation *notes, size_t size) int err = 0; s64 offset; - if (!notes->src || !notes->src->cycles_hist) + if (!notes->branch || !notes->branch->cycles_hist) return 0; - notes->total_insn = annotation__count_insn(notes, 0, size - 1); - notes->hit_cycles = 0; - notes->hit_insn = 0; - notes->cover_insn = 0; + notes->branch->total_insn = annotation__count_insn(notes, 0, size - 1); + notes->branch->hit_cycles = 0; + notes->branch->hit_insn = 0; + notes->branch->cover_insn = 0; annotation__lock(notes); for (offset = size - 1; offset >= 0; --offset) { struct cyc_hist *ch; - ch = ¬es->src->cycles_hist[offset]; + ch = ¬es->branch->cycles_hist[offset]; if (ch && ch->cycles) { struct annotation_line *al; @@ -1150,13 +1158,12 @@ static int annotation__compute_ipc(struct annotation *notes, size_t size) al->cycles->max = ch->cycles_max; al->cycles->min = ch->cycles_min; } - notes->have_cycles = true; } } if (err) { while (++offset < (s64)size) { - struct cyc_hist *ch = ¬es->src->cycles_hist[offset]; + struct cyc_hist *ch = ¬es->branch->cycles_hist[offset]; if (ch && ch->cycles) { struct annotation_line *al = notes->offsets[offset]; @@ -1322,6 +1329,7 @@ int disasm_line__scnprintf(struct disasm_line *dl, char *bf, size_t size, bool r void annotation__exit(struct annotation *notes) { annotated_source__delete(notes->src); + annotated_branch__delete(notes->branch); } static struct sharded_mutex *sharded_mutex; @@ -3075,13 +3083,14 @@ call_like: static void ipc_coverage_string(char *bf, int size, struct annotation *notes) { double ipc = 0.0, coverage = 0.0; + struct annotated_branch *branch = annotation__get_branch(notes); - if (notes->hit_cycles) - ipc = notes->hit_insn / ((double)notes->hit_cycles); + if (branch && branch->hit_cycles) + ipc = branch->hit_insn / ((double)branch->hit_cycles); - if (notes->total_insn) { - coverage = notes->cover_insn * 100.0 / - ((double)notes->total_insn); + if (branch && branch->total_insn) { + coverage = branch->cover_insn * 100.0 / + ((double)branch->total_insn); } scnprintf(bf, size, "(Average IPC: %.2f, IPC Coverage: %.1f%%)", @@ -3106,7 +3115,7 @@ static void __annotation_line__write(struct annotation_line *al, struct annotati int printed; if (first_line && (al->offset == -1 || percent_max == 0.0)) { - if (notes->have_cycles && al->cycles) { + if (notes->branch && al->cycles) { if (al->cycles->ipc == 0.0 && al->cycles->avg == 0) show_title = true; } else @@ -3143,7 +3152,7 @@ static void __annotation_line__write(struct annotation_line *al, struct annotati } } - if (notes->have_cycles) { + if (notes->branch) { if (al->cycles && al->cycles->ipc) obj__printf(obj, "%*.2f ", ANNOTATION__IPC_WIDTH - 1, al->cycles->ipc); else if (!show_title) diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h index 19bc2f039175..508b93d3dcde 100644 --- a/tools/perf/util/annotate.h +++ b/tools/perf/util/annotate.h @@ -271,17 +271,20 @@ struct annotated_source { struct list_head source; int nr_histograms; size_t sizeof_sym_hist; - struct cyc_hist *cycles_hist; struct sym_hist *histograms; }; -struct LOCKABLE annotation { - u64 max_coverage; - u64 start; +struct annotated_branch { u64 hit_cycles; u64 hit_insn; unsigned int total_insn; unsigned int cover_insn; + struct cyc_hist *cycles_hist; +}; + +struct LOCKABLE annotation { + u64 max_coverage; + u64 start; struct annotation_options *options; struct annotation_line **offsets; int nr_events; @@ -297,8 +300,8 @@ struct LOCKABLE annotation { u8 max_addr; u8 max_ins_name; } widths; - bool have_cycles; struct annotated_source *src; + struct annotated_branch *branch; }; static inline void annotation__init(struct annotation *notes __maybe_unused) @@ -312,10 +315,10 @@ bool annotation__trylock(struct annotation *notes) EXCLUSIVE_TRYLOCK_FUNCTION(tr static inline int annotation__cycles_width(struct annotation *notes) { - if (notes->have_cycles && notes->options->show_minmax_cycle) + if (notes->branch && notes->options->show_minmax_cycle) return ANNOTATION__IPC_WIDTH + ANNOTATION__MINMAX_CYCLES_WIDTH; - return notes->have_cycles ? ANNOTATION__IPC_WIDTH + ANNOTATION__CYCLES_WIDTH : 0; + return notes->branch ? ANNOTATION__IPC_WIDTH + ANNOTATION__CYCLES_WIDTH : 0; } static inline int annotation__pcnt_width(struct annotation *notes) diff --git a/tools/perf/util/block-info.c b/tools/perf/util/block-info.c index 591fc1edd385..08f82c1f166c 100644 --- a/tools/perf/util/block-info.c +++ b/tools/perf/util/block-info.c @@ -129,9 +129,9 @@ int block_info__process_sym(struct hist_entry *he, struct block_hist *bh, al.sym = he->ms.sym; notes = symbol__annotation(he->ms.sym); - if (!notes || !notes->src || !notes->src->cycles_hist) + if (!notes || !notes->branch || !notes->branch->cycles_hist) return 0; - ch = notes->src->cycles_hist; + ch = notes->branch->cycles_hist; for (unsigned int i = 0; i < symbol__size(he->ms.sym); i++) { if (ch[i].num_aggr) { struct block_info *bi; diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index 80e4f6132740..27b123ccd2d1 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -583,21 +583,21 @@ static int hist_entry__sym_ipc_snprintf(struct hist_entry *he, char *bf, { struct symbol *sym = he->ms.sym; - struct annotation *notes; + struct annotated_branch *branch; double ipc = 0.0, coverage = 0.0; char tmp[64]; if (!sym) return repsep_snprintf(bf, size, "%-*s", width, "-"); - notes = symbol__annotation(sym); + branch = symbol__annotation(sym)->branch; - if (notes->hit_cycles) - ipc = notes->hit_insn / ((double)notes->hit_cycles); + if (branch && branch->hit_cycles) + ipc = branch->hit_insn / ((double)branch->hit_cycles); - if (notes->total_insn) { - coverage = notes->cover_insn * 100.0 / - ((double)notes->total_insn); + if (branch && branch->total_insn) { + coverage = branch->cover_insn * 100.0 / + ((double)branch->total_insn); } snprintf(tmp, sizeof(tmp), "%-5.2f [%5.1f%%]", ipc, coverage); -- cgit v1.2.3 From 2b215ec71b888ec2f3ca86846ce3a7f20b0d5e4d Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Fri, 3 Nov 2023 12:19:05 -0700 Subject: perf annotate: Move max_coverage from 'struct annotation' to 'struct annotated_branch' The max_coverage field is only used when branch stack info is available so it'd be natural to move to 'struct annotated_branch'. Reviewed-by: Ian Rogers Signed-off-by: Namhyung Kim Cc: Adrian Hunter Cc: Christophe JAILLET Cc: Ingo Molnar Cc: Jiri Olsa Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20231103191907.54531-4-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-annotate.c | 7 +++++-- tools/perf/util/annotate.c | 2 +- tools/perf/util/annotate.h | 4 +++- tools/perf/util/block-range.c | 7 ++++++- 4 files changed, 15 insertions(+), 5 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index aeeb801f1ed7..a9129b51d511 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -94,6 +94,7 @@ static void process_basic_block(struct addr_map_symbol *start, struct annotation *notes = sym ? symbol__annotation(sym) : NULL; struct block_range_iter iter; struct block_range *entry; + struct annotated_branch *branch; /* * Sanity; NULL isn't executable and the CPU cannot execute backwards @@ -105,6 +106,8 @@ static void process_basic_block(struct addr_map_symbol *start, if (!block_range_iter__valid(&iter)) return; + branch = annotation__get_branch(notes); + /* * First block in range is a branch target. */ @@ -118,8 +121,8 @@ static void process_basic_block(struct addr_map_symbol *start, entry->coverage++; entry->sym = sym; - if (notes) - notes->max_coverage = max(notes->max_coverage, entry->coverage); + if (branch) + branch->max_coverage = max(branch->max_coverage, entry->coverage); } while (block_range_iter__next(&iter)); diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 4e40c94c85d1..077a297f4dad 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -946,7 +946,7 @@ static int __symbol__inc_addr_samples(struct map_symbol *ms, return 0; } -static struct annotated_branch *annotation__get_branch(struct annotation *notes) +struct annotated_branch *annotation__get_branch(struct annotation *notes) { if (notes == NULL) return NULL; diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h index 508b93d3dcde..849713098953 100644 --- a/tools/perf/util/annotate.h +++ b/tools/perf/util/annotate.h @@ -280,10 +280,10 @@ struct annotated_branch { unsigned int total_insn; unsigned int cover_insn; struct cyc_hist *cycles_hist; + u64 max_coverage; }; struct LOCKABLE annotation { - u64 max_coverage; u64 start; struct annotation_options *options; struct annotation_line **offsets; @@ -355,6 +355,8 @@ static inline struct annotation *symbol__annotation(struct symbol *sym) int addr_map_symbol__inc_samples(struct addr_map_symbol *ams, struct perf_sample *sample, struct evsel *evsel); +struct annotated_branch *annotation__get_branch(struct annotation *notes); + int addr_map_symbol__account_cycles(struct addr_map_symbol *ams, struct addr_map_symbol *start, unsigned cycles); diff --git a/tools/perf/util/block-range.c b/tools/perf/util/block-range.c index 680e92774d0c..15c42196c24c 100644 --- a/tools/perf/util/block-range.c +++ b/tools/perf/util/block-range.c @@ -311,6 +311,7 @@ done: double block_range__coverage(struct block_range *br) { struct symbol *sym; + struct annotated_branch *branch; if (!br) { if (block_ranges.blocks) @@ -323,5 +324,9 @@ double block_range__coverage(struct block_range *br) if (!sym) return -1; - return (double)br->coverage / symbol__annotation(sym)->max_coverage; + branch = symbol__annotation(sym)->branch; + if (!branch) + return -1; + + return (double)br->coverage / branch->max_coverage; } -- cgit v1.2.3 From 0aae4c99c5f8f748c6cb5ca03bb3b3ae8cfb10df Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Fri, 3 Nov 2023 12:19:06 -0700 Subject: perf annotate: Move some source code related fields from 'struct annotation' to 'struct annotated_source' Some fields in the 'struct annotation' are only used with 'struct annotated_source' so better to be moved there in order to reduce memory consumption for other symbols. Reviewed-by: Ian Rogers Signed-off-by: Namhyung Kim Cc: Adrian Hunter Cc: Christophe JAILLET Cc: Ingo Molnar Cc: Jiri Olsa Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20231103191907.54531-5-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/browsers/annotate.c | 12 ++++++------ tools/perf/util/annotate.c | 17 +++++++++-------- tools/perf/util/annotate.h | 14 +++++++------- 3 files changed, 22 insertions(+), 21 deletions(-) (limited to 'tools') diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c index d2470f87344d..1b42db70c998 100644 --- a/tools/perf/ui/browsers/annotate.c +++ b/tools/perf/ui/browsers/annotate.c @@ -384,7 +384,7 @@ static bool annotate_browser__toggle_source(struct annotate_browser *browser) if (al->idx_asm < offset) offset = al->idx; - browser->b.nr_entries = notes->nr_entries; + browser->b.nr_entries = notes->src->nr_entries; notes->options->hide_src_code = false; browser->b.seek(&browser->b, -offset, SEEK_CUR); browser->b.top_idx = al->idx - offset; @@ -402,7 +402,7 @@ static bool annotate_browser__toggle_source(struct annotate_browser *browser) if (al->idx_asm < offset) offset = al->idx_asm; - browser->b.nr_entries = notes->nr_asm_entries; + browser->b.nr_entries = notes->src->nr_asm_entries; notes->options->hide_src_code = true; browser->b.seek(&browser->b, -offset, SEEK_CUR); browser->b.top_idx = al->idx_asm - offset; @@ -435,7 +435,7 @@ static void ui_browser__init_asm_mode(struct ui_browser *browser) { struct annotation *notes = browser__annotation(browser); ui_browser__reset_index(browser); - browser->nr_entries = notes->nr_asm_entries; + browser->nr_entries = notes->src->nr_asm_entries; } static int sym_title(struct symbol *sym, struct map *map, char *title, @@ -860,7 +860,7 @@ show_help: browser->b.height, browser->b.index, browser->b.top_idx, - notes->nr_asm_entries); + notes->src->nr_asm_entries); } continue; case K_ENTER: @@ -991,8 +991,8 @@ int symbol__tui_annotate(struct map_symbol *ms, struct evsel *evsel, ui_helpline__push("Press ESC to exit"); - browser.b.width = notes->max_line_len; - browser.b.nr_entries = notes->nr_entries; + browser.b.width = notes->src->max_line_len; + browser.b.nr_entries = notes->src->nr_entries; browser.b.entries = ¬es->src->source, browser.b.width += 18; /* Percentage */ diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 077a297f4dad..23e68b1abc2f 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -2825,19 +2825,20 @@ void annotation__mark_jump_targets(struct annotation *notes, struct symbol *sym) void annotation__set_offsets(struct annotation *notes, s64 size) { struct annotation_line *al; + struct annotated_source *src = notes->src; - notes->max_line_len = 0; - notes->nr_entries = 0; - notes->nr_asm_entries = 0; + src->max_line_len = 0; + src->nr_entries = 0; + src->nr_asm_entries = 0; - list_for_each_entry(al, ¬es->src->source, node) { + list_for_each_entry(al, &src->source, node) { size_t line_len = strlen(al->line); - if (notes->max_line_len < line_len) - notes->max_line_len = line_len; - al->idx = notes->nr_entries++; + if (src->max_line_len < line_len) + src->max_line_len = line_len; + al->idx = src->nr_entries++; if (al->offset != -1) { - al->idx_asm = notes->nr_asm_entries++; + al->idx_asm = src->nr_asm_entries++; /* * FIXME: short term bandaid to cope with assembly * routines that comes with labels in the same column diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h index 849713098953..9eb7b6d3fe95 100644 --- a/tools/perf/util/annotate.h +++ b/tools/perf/util/annotate.h @@ -268,10 +268,13 @@ struct cyc_hist { * returns. */ struct annotated_source { - struct list_head source; - int nr_histograms; - size_t sizeof_sym_hist; - struct sym_hist *histograms; + struct list_head source; + size_t sizeof_sym_hist; + struct sym_hist *histograms; + int nr_histograms; + int nr_entries; + int nr_asm_entries; + u16 max_line_len; }; struct annotated_branch { @@ -289,9 +292,6 @@ struct LOCKABLE annotation { struct annotation_line **offsets; int nr_events; int max_jump_sources; - int nr_entries; - int nr_asm_entries; - u16 max_line_len; struct { u8 addr; u8 jumps; -- cgit v1.2.3 From b753d48f53f9dcea655d359f028c8adfdd9504b5 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Fri, 3 Nov 2023 12:19:07 -0700 Subject: perf annotate: Move offsets array from 'struct annotation' to 'struct annotated_source' The offsets array keeps pointers to 'struct annotation_line' entries which are available in the 'struct annotated_source'. Let's move it to there. Reviewed-by: Ian Rogers Signed-off-by: Namhyung Kim Cc: Adrian Hunter Cc: Christophe JAILLET Cc: Ingo Molnar Cc: Jiri Olsa Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20231103191907.54531-6-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/browsers/annotate.c | 4 ++-- tools/perf/util/annotate.c | 20 ++++++++++---------- tools/perf/util/annotate.h | 2 +- 3 files changed, 13 insertions(+), 13 deletions(-) (limited to 'tools') diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c index 1b42db70c998..163f916fff68 100644 --- a/tools/perf/ui/browsers/annotate.c +++ b/tools/perf/ui/browsers/annotate.c @@ -188,7 +188,7 @@ static void annotate_browser__draw_current_jump(struct ui_browser *browser) * name right after the '<' token and probably treating this like a * 'call' instruction. */ - target = notes->offsets[cursor->ops.target.offset]; + target = notes->src->offsets[cursor->ops.target.offset]; if (target == NULL) { ui_helpline__printf("WARN: jump target inconsistency, press 'o', notes->offsets[%#x] = NULL\n", cursor->ops.target.offset); @@ -1006,6 +1006,6 @@ int symbol__tui_annotate(struct map_symbol *ms, struct evsel *evsel, out_free_offsets: if(not_annotated) - zfree(¬es->offsets); + zfree(¬es->src->offsets); return ret; } diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 23e68b1abc2f..9b68b8e3791c 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -1075,7 +1075,7 @@ static unsigned annotation__count_insn(struct annotation *notes, u64 start, u64 u64 offset; for (offset = start; offset <= end; offset++) { - if (notes->offsets[offset]) + if (notes->src->offsets[offset]) n_insn++; } return n_insn; @@ -1105,7 +1105,7 @@ static void annotation__count_and_fill(struct annotation *notes, u64 start, u64 return; for (offset = start; offset <= end; offset++) { - struct annotation_line *al = notes->offsets[offset]; + struct annotation_line *al = notes->src->offsets[offset]; if (al && al->cycles && al->cycles->ipc == 0.0) { al->cycles->ipc = ipc; @@ -1143,7 +1143,7 @@ static int annotation__compute_ipc(struct annotation *notes, size_t size) if (ch && ch->cycles) { struct annotation_line *al; - al = notes->offsets[offset]; + al = notes->src->offsets[offset]; if (al && al->cycles == NULL) { al->cycles = zalloc(sizeof(*al->cycles)); if (al->cycles == NULL) { @@ -1166,7 +1166,7 @@ static int annotation__compute_ipc(struct annotation *notes, size_t size) struct cyc_hist *ch = ¬es->branch->cycles_hist[offset]; if (ch && ch->cycles) { - struct annotation_line *al = notes->offsets[offset]; + struct annotation_line *al = notes->src->offsets[offset]; if (al) zfree(&al->cycles); } @@ -2800,7 +2800,7 @@ void annotation__mark_jump_targets(struct annotation *notes, struct symbol *sym) return; for (offset = 0; offset < size; ++offset) { - struct annotation_line *al = notes->offsets[offset]; + struct annotation_line *al = notes->src->offsets[offset]; struct disasm_line *dl; dl = disasm_line(al); @@ -2808,7 +2808,7 @@ void annotation__mark_jump_targets(struct annotation *notes, struct symbol *sym) if (!disasm_line__is_valid_local_jump(dl, sym)) continue; - al = notes->offsets[dl->ops.target.offset]; + al = notes->src->offsets[dl->ops.target.offset]; /* * FIXME: Oops, no jump target? Buggy disassembler? Or do we @@ -2847,7 +2847,7 @@ void annotation__set_offsets(struct annotation *notes, s64 size) * E.g. copy_user_generic_unrolled */ if (al->offset < size) - notes->offsets[al->offset] = al; + notes->src->offsets[al->offset] = al; } else al->idx_asm = -1; } @@ -3280,8 +3280,8 @@ int symbol__annotate2(struct map_symbol *ms, struct evsel *evsel, size_t size = symbol__size(sym); int nr_pcnt = 1, err; - notes->offsets = zalloc(size * sizeof(struct annotation_line *)); - if (notes->offsets == NULL) + notes->src->offsets = zalloc(size * sizeof(struct annotation_line *)); + if (notes->src->offsets == NULL) return ENOMEM; if (evsel__is_group_event(evsel)) @@ -3311,7 +3311,7 @@ int symbol__annotate2(struct map_symbol *ms, struct evsel *evsel, return 0; out_free_offsets: - zfree(¬es->offsets); + zfree(¬es->src->offsets); return err; } diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h index 9eb7b6d3fe95..de59c1aff08e 100644 --- a/tools/perf/util/annotate.h +++ b/tools/perf/util/annotate.h @@ -271,6 +271,7 @@ struct annotated_source { struct list_head source; size_t sizeof_sym_hist; struct sym_hist *histograms; + struct annotation_line **offsets; int nr_histograms; int nr_entries; int nr_asm_entries; @@ -289,7 +290,6 @@ struct annotated_branch { struct LOCKABLE annotation { u64 start; struct annotation_options *options; - struct annotation_line **offsets; int nr_events; int max_jump_sources; struct { -- cgit v1.2.3 From 4a5aaaf308b91e3da21c3b8f7e78dc5a256d9324 Mon Sep 17 00:00:00 2001 From: zhaimingbing Date: Mon, 30 Oct 2023 15:58:25 +0800 Subject: perf tests attr: Fix spelling mistake "whic" to "which" There is a spelling mistake, Please fix it. Reviewed-by: Ian Rogers Signed-off-by: zhaimingbing Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: kernel-janitors@vger.kernel.org Link: https://lore.kernel.org/r/20231030075825.3701-1-zhaimingbing@cmss.chinamobile.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/attr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/tests/attr.c b/tools/perf/tests/attr.c index 61186d0d1cfa..97e1bdd6ec0e 100644 --- a/tools/perf/tests/attr.c +++ b/tools/perf/tests/attr.c @@ -188,7 +188,7 @@ static int test__attr(struct test_suite *test __maybe_unused, int subtest __mayb if (perf_pmus__num_core_pmus() > 1) { /* * TODO: Attribute tests hard code the PMU type. If there are >1 - * core PMU then each PMU will have a different type whic + * core PMU then each PMU will have a different type which * requires additional support. */ pr_debug("Skip test on hybrid systems"); -- cgit v1.2.3 From 36c70e44a37b84cbb4094bf6f5cdb01cfd6659df Mon Sep 17 00:00:00 2001 From: Yang Jihong Date: Mon, 30 Oct 2023 11:14:38 +0000 Subject: perf tools: Add the python_ext_build directory to .gitignore `python_ext_build` is the build directory for python.so, ignore it for cleaner git status. Signed-off-by: Yang Jihong Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20231030111438.1357962-2-yangjihong1@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/.gitignore | 1 + 1 file changed, 1 insertion(+) (limited to 'tools') diff --git a/tools/perf/.gitignore b/tools/perf/.gitignore index f533e76fb480..ee5c14f3b8b1 100644 --- a/tools/perf/.gitignore +++ b/tools/perf/.gitignore @@ -49,3 +49,4 @@ libtraceevent/ libtraceevent_plugins/ fixdep Documentation/doc.dep +python_ext_build/ -- cgit v1.2.3 From b861fd7e0efc4dc2376e7212f59a9b32d1383c00 Mon Sep 17 00:00:00 2001 From: Thomas Richter Date: Mon, 6 Nov 2023 10:16:27 +0100 Subject: perf tests offcpu: Adjust test case perf record offcpu profiling tests for s390 On s390 using linux-next the test case: 87: perf record offcpu profiling tests fails. The root cause is this command # ./perf record --off-cpu -e dummy -- ./perf bench sched messaging -l 10 # Running 'sched/messaging' benchmark: # 20 sender and receiver processes per group # 10 groups == 400 processes run Total time: 0.231 [sec] [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.077 MB perf.data (401 samples) ] # It does not generate 800+ sample entries, on s390 usually around 40[1-9], sometimes a few more, but never more than 450. The higher the number of CPUs the lower the number of samples. Looking at function chain: bench_sched_messaging() +--> group() the senders and receiver threads are created. The senders and receivers call function ready() which writes one bytes and wait for a reply using poll system() call. As context switches are counted, the function ready() will trigger a context switch when no input data is available after the write system call. The write system call does not trigger context switches when the data size is small. And writing 1000 bytes (10 iterations with 100 bytes) is not much and certainly won't block. The 400+ context switch on s390 occur when the some receiver/sender threads call ready() and wait for the response from function bench_sched_messaging() being kicked off. Lower the number of expected context switches to 400 to succeed on s390. Suggested-by: Namhyung Kim Signed-off-by: Ilya Leoshkevich Signed-off-by: Thomas Richter Acked-by: Namhyung Kim Cc: Heiko Carstens Cc: Sumanth Korikkar Cc: Sven Schnelle Cc: Vasily Gorbik Co-developed-by: Ilya Leoshkevich Link: https://lore.kernel.org/r/20231106091627.2022530-1-tmricht@linux.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/shell/record_offcpu.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/perf/tests/shell/record_offcpu.sh b/tools/perf/tests/shell/record_offcpu.sh index a1ef8f0d2b5c..67c925f3a15a 100755 --- a/tools/perf/tests/shell/record_offcpu.sh +++ b/tools/perf/tests/shell/record_offcpu.sh @@ -77,9 +77,9 @@ test_offcpu_child() { err=1 return fi - # each process waits for read and write, so it should be more than 800 events + # each process waits at least for poll, so it should be more than 400 events if ! perf report -i ${perfdata} -s comm -q -n -t ';' --percent-limit=90 | \ - awk -F ";" '{ if (NF > 3 && int($3) < 800) exit 1; }' + awk -F ";" '{ if (NF > 3 && int($3) < 400) exit 1; }' then echo "Child task off-cpu test [Failed invalid output]" err=1 -- cgit v1.2.3 From 33ce9fc4f8ddba30b7040bd01cea11080f40b344 Mon Sep 17 00:00:00 2001 From: James Clark Date: Mon, 6 Nov 2023 15:10:48 +0000 Subject: perf test: Add option to change objdump binary All of the other Perf subcommands that use objdump have an option to specify the binary, so add the same option to 'perf test'. This is useful if you have built the kernel with a different toolchain to the system one, where the system objdump may fail to disassemble vmlinux. Now this can be fixed with something like this: $ perf test --objdump llvm-objdump "object code reading" Reviewed-by: Ian Rogers Signed-off-by: James Clark Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Athira Jajeev Cc: Fangrui Song Cc: Ingo Molnar Cc: Jiri Olsa Cc: Kajol Jain Cc: Kan Liang Cc: Mark Rutland Cc: Namhyung Kim Cc: Nathan Chancellor Cc: Nick Desaulniers Cc: Peter Zijlstra Cc: Ravi Bangoria Cc: Tom Rix Cc: Yang Jihong Cc: llvm@lists.linux.dev Link: https://lore.kernel.org/r/20231106151051.129440-2-james.clark@arm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/builtin-test.c | 3 +++ tools/perf/tests/code-reading.c | 2 +- tools/perf/tests/tests.h | 1 + 3 files changed, 5 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c index cb6f1dd00dc4..a8d17dd50588 100644 --- a/tools/perf/tests/builtin-test.c +++ b/tools/perf/tests/builtin-test.c @@ -32,6 +32,7 @@ static bool dont_fork; const char *dso_to_test; +const char *test_objdump_path = "objdump"; /* * List of architecture specific tests. Not a weak symbol as the array length is @@ -529,6 +530,8 @@ int cmd_test(int argc, const char **argv) "Do not fork for testcase"), OPT_STRING('w', "workload", &workload, "work", "workload to run for testing"), OPT_STRING(0, "dso", &dso_to_test, "dso", "dso to test"), + OPT_STRING(0, "objdump", &test_objdump_path, "path", + "objdump binary to use for disassembly and annotations"), OPT_END() }; const char * const test_subcommands[] = { "list", NULL }; diff --git a/tools/perf/tests/code-reading.c b/tools/perf/tests/code-reading.c index d6e845c57902..8620146d0378 100644 --- a/tools/perf/tests/code-reading.c +++ b/tools/perf/tests/code-reading.c @@ -185,7 +185,7 @@ static int read_via_objdump(const char *filename, u64 addr, void *buf, int ret; fmt = "%s -z -d --start-address=0x%"PRIx64" --stop-address=0x%"PRIx64" %s"; - ret = snprintf(cmd, sizeof(cmd), fmt, "objdump", addr, addr + len, + ret = snprintf(cmd, sizeof(cmd), fmt, test_objdump_path, addr, addr + len, filename); if (ret <= 0 || (size_t)ret >= sizeof(cmd)) return -1; diff --git a/tools/perf/tests/tests.h b/tools/perf/tests/tests.h index b394f3ac2d66..dad3d7414142 100644 --- a/tools/perf/tests/tests.h +++ b/tools/perf/tests/tests.h @@ -207,5 +207,6 @@ DECLARE_WORKLOAD(brstack); DECLARE_WORKLOAD(datasym); extern const char *dso_to_test; +extern const char *test_objdump_path; #endif /* TESTS_H */ -- cgit v1.2.3 From 6aad765d10c5cd8a62b258c359bae643ab2d45da Mon Sep 17 00:00:00 2001 From: James Clark Date: Mon, 6 Nov 2023 15:10:49 +0000 Subject: perf test: Add support for setting objdump binary via perf config Add a 'perf config' variable that does the same thing as "perf test --objdump ". Also update the man page. Committer testing: # perf config test.objdump # perf test "object code reading" 26: Object code reading : Ok # perf config test.objdump=blah # perf config test.objdump test.objdump=blah # perf test "object code reading" 26: Object code reading : FAILED! # perf test -v "object code reading" 26: Object code reading : --- start --- test child forked, pid 600599 Looking at the vmlinux_path (8 entries long) Using /proc/kcore for kernel data Using /proc/kallsyms for symbols Parsing event 'cycles' Using CPUID AuthenticAMD-25-21-0 mmap size 528384B Reading object code for memory address: 0x4d9a02 File is: /home/acme/bin/perf On file address is: 0xd9a02 Objdump command is: blah -z -d --start-address=0x4d9a02 --stop-address=0x4d9a82 /home/acme/bin/perf objdump read too few bytes: 128 Bytes read differ from those read by objdump buf1 (dso): 0x48 0x85 0xff 0x74 0x29 0xe8 0x94 0xdf 0x07 0x00 0x8b 0x73 0x1c 0x48 0x8b 0x43 0x08 0xeb 0xa5 0x0f 0x1f 0x00 0x48 0x8b 0x45 0xe8 0x64 0x48 0x2b 0x04 0x25 0x28 0x00 0x00 0x00 0x75 0x0f 0x48 0x8b 0x5d 0xf8 0xc9 0xc3 0x0f 0x1f 0x00 0x48 0x8b 0x43 0x08 0xeb 0x84 0xe8 0xc5 0x3e 0xf3 0xff 0x0f 0x1f 0x44 0x00 0x00 0x55 0x48 0x89 0xe5 0x41 0x56 0x41 0x55 0x49 0x89 0xd5 0x41 0x54 0x49 0x89 0xfc 0x53 0x48 0x89 0xf3 0x48 0x83 0xec 0x30 0x48 0x8b 0x7e 0x20 0x64 0x48 0x8b 0x04 0x25 0x28 0x00 0x00 0x00 0x48 0x89 0x45 0xd8 0x31 0xc0 0x48 0x89 0x75 0xb0 0x48 0xc7 0x45 0xb8 0x00 0x00 0x00 0x00 0x48 0xc7 0x45 0xc0 0x00 0x00 0x00 0x00 0xe8 0xad 0xfa buf2 (objdump): 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 test child finished with -1 ---- end ---- Object code reading: FAILED! # perf config test.objdump=/usr/bin/objdump # perf config test.objdump test.objdump=/usr/bin/objdump # perf test "object code reading" 26: Object code reading : Ok # Signed-off-by: James Clark Tested-by: Arnaldo Carvalho de Melo Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Athira Jajeev Cc: Fangrui Song Cc: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Kan Liang Cc: Mark Rutland Cc: Namhyung Kim Cc: Nathan Chancellor Cc: Nick Desaulniers Cc: Peter Zijlstra Cc: Ravi Bangoria Cc: Tom Rix Cc: Yang Jihong Cc: Yonghong Song Cc: llvm@lists.linux.dev Link: https://lore.kernel.org/r/20231106151051.129440-3-james.clark@arm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-config.txt | 4 ++++ tools/perf/tests/builtin-test.c | 12 ++++++++++++ 2 files changed, 16 insertions(+) (limited to 'tools') diff --git a/tools/perf/Documentation/perf-config.txt b/tools/perf/Documentation/perf-config.txt index 0b4e79dbd3f6..16398babd1ef 100644 --- a/tools/perf/Documentation/perf-config.txt +++ b/tools/perf/Documentation/perf-config.txt @@ -722,6 +722,10 @@ session-.*:: Defines new record session for daemon. The value is record's command line without the 'record' keyword. +test.*:: + + test.objdump:: + objdump binary to use for disassembly and annotations. SEE ALSO -------- diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c index a8d17dd50588..113e92119e1d 100644 --- a/tools/perf/tests/builtin-test.c +++ b/tools/perf/tests/builtin-test.c @@ -14,6 +14,7 @@ #include #include #include "builtin.h" +#include "config.h" #include "hist.h" #include "intlist.h" #include "tests.h" @@ -514,6 +515,15 @@ static int run_workload(const char *work, int argc, const char **argv) return -1; } +static int perf_test__config(const char *var, const char *value, + void *data __maybe_unused) +{ + if (!strcmp(var, "test.objdump")) + test_objdump_path = value; + + return 0; +} + int cmd_test(int argc, const char **argv) { const char *test_usage[] = { @@ -541,6 +551,8 @@ int cmd_test(int argc, const char **argv) if (ret < 0) return ret; + perf_config(perf_test__config, NULL); + /* Unbuffered output */ setvbuf(stdout, NULL, _IONBF, 0); -- cgit v1.2.3 From 6512b6aa237db36d881a81cc312db39668e61853 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Thu, 2 Nov 2023 10:56:54 -0700 Subject: perf bpf: Don't synthesize BPF events when disabled If BPF sideband events are disabled on the command line, don't synthesize BPF events too. Signed-off-by: Ian Rogers Acked-by: Song Liu Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Andi Kleen Cc: Athira Jajeev Cc: Changbin Du Cc: Colin Ian King Cc: Dmitrii Dolgov <9erthalion6@gmail.com> Cc: German Gomez Cc: Huacai Chen Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: K Prateek Nayak Cc: Kajol Jain Cc: Kan Liang Cc: Leo Yan Cc: Li Dong Cc: Liam Howlett Cc: Mark Rutland Cc: Masami Hiramatsu Cc: Miguel Ojeda Cc: Ming Wang Cc: Namhyung Kim Cc: Nick Terrell Cc: Paolo Bonzini Cc: Peter Zijlstra Cc: Ravi Bangoria Cc: Sandipan Das Cc: Sean Christopherson Cc: Steinar H. Gunderson Cc: Vincent Whitchurch Cc: Wenyu Liu Cc: Yang Jihong Link: https://lore.kernel.org/r/20231102175735.2272696-13-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/bpf-event.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'tools') diff --git a/tools/perf/util/bpf-event.c b/tools/perf/util/bpf-event.c index 38fcf3ba5749..830711cae30d 100644 --- a/tools/perf/util/bpf-event.c +++ b/tools/perf/util/bpf-event.c @@ -386,6 +386,9 @@ int perf_event__synthesize_bpf_events(struct perf_session *session, int err; int fd; + if (opts->no_bpf_event) + return 0; + event = malloc(sizeof(event->bpf) + KSYM_NAME_LEN + machine->id_hdr_size); if (!event) return -1; -- cgit v1.2.3 From a399ee6773d6a0203f9bd764f8bd9d978878cef1 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 9 Nov 2023 16:34:09 -0300 Subject: tools: Disable __packed attribute compiler warning due to -Werror=attributes Noticed on several perf tools cross build test containers: [perfbuilder@five ~]$ grep FAIL ~/dm.log/summary 19 10.18 debian:experimental-x-mips : FAIL gcc version 12.3.0 (Debian 12.3.0-6) 20 11.21 debian:experimental-x-mips64 : FAIL gcc version 12.3.0 (Debian 12.3.0-6) 21 11.30 debian:experimental-x-mipsel : FAIL gcc version 12.3.0 (Debian 12.3.0-6) 37 12.07 ubuntu:18.04-x-arm : FAIL gcc version 7.5.0 (Ubuntu/Linaro 7.5.0-3ubuntu1~18.04) 42 11.91 ubuntu:18.04-x-riscv64 : FAIL gcc version 7.5.0 (Ubuntu 7.5.0-3ubuntu1~18.04) 44 13.17 ubuntu:18.04-x-sh4 : FAIL gcc version 7.5.0 (Ubuntu 7.5.0-3ubuntu1~18.04) 45 12.09 ubuntu:18.04-x-sparc64 : FAIL gcc version 7.5.0 (Ubuntu 7.5.0-3ubuntu1~18.04) [perfbuilder@five ~]$ In file included from util/intel-pt-decoder/intel-pt-pkt-decoder.c:10: /tmp/perf-6.6.0-rc1/tools/include/asm-generic/unaligned.h: In function 'get_unaligned_le16': /tmp/perf-6.6.0-rc1/tools/include/asm-generic/unaligned.h:13:29: error: packed attribute causes inefficient alignment for 'x' [-Werror=attributes] 13 | const struct { type x; } __packed *__pptr = (typeof(__pptr))(ptr); \ | ^ /tmp/perf-6.6.0-rc1/tools/include/asm-generic/unaligned.h:27:28: note: in expansion of macro '__get_unaligned_t' 27 | return le16_to_cpu(__get_unaligned_t(__le16, p)); | ^~~~~~~~~~~~~~~~~ This comes from the kernel, where the -Wattributes and -Wpacked isn't used, -Wpacked is already disabled, do it for the attributes as well. Fixes: a91c987254651443 ("perf tools: Add get_unaligned_leNN()") Suggested-by: Adrian Hunter Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Link: https://lore.kernel.org/lkml/7c5b626c-1de9-4c12-a781-e44985b4a797@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/asm-generic/unaligned.h | 1 + 1 file changed, 1 insertion(+) (limited to 'tools') diff --git a/tools/include/asm-generic/unaligned.h b/tools/include/asm-generic/unaligned.h index 156743d399ae..2fd551915c20 100644 --- a/tools/include/asm-generic/unaligned.h +++ b/tools/include/asm-generic/unaligned.h @@ -8,6 +8,7 @@ */ #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wpacked" +#pragma GCC diagnostic ignored "-Wattributes" #define __get_unaligned_t(type, ptr) ({ \ const struct { type x; } __packed *__pptr = (typeof(__pptr))(ptr); \ -- cgit v1.2.3 From fac85c291e141a67fce46bdce01f9ee33aafabfe Mon Sep 17 00:00:00 2001 From: Yuran Pereira Date: Sat, 28 Oct 2023 10:54:13 +0530 Subject: selftests/bpf: Convert CHECK macros to ASSERT_* macros in bpf_iter As it was pointed out by Yonghong Song [1], in the bpf selftests the use of the ASSERT_* series of macros is preferred over the CHECK macro. This patch replaces all CHECK calls in bpf_iter with the appropriate ASSERT_* macros. [1] https://lore.kernel.org/lkml/0a142924-633c-44e6-9a92-2dc019656bf2@linux.dev Suggested-by: Yonghong Song Signed-off-by: Yuran Pereira Acked-by: Yonghong Song Acked-by: Kui-Feng Lee Link: https://lore.kernel.org/r/DB3PR10MB6835E9C8DFCA226DD6FEF914E8A3A@DB3PR10MB6835.EURPRD10.PROD.OUTLOOK.COM Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/prog_tests/bpf_iter.c | 79 ++++++++++------------- 1 file changed, 35 insertions(+), 44 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c index e3498f607b49..5e334d3d7ac2 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c @@ -34,8 +34,6 @@ #include "bpf_iter_ksym.skel.h" #include "bpf_iter_sockmap.skel.h" -static int duration; - static void test_btf_id_or_null(void) { struct bpf_iter_test_kern3 *skel; @@ -64,7 +62,7 @@ static void do_dummy_read_opts(struct bpf_program *prog, struct bpf_iter_attach_ /* not check contents, but ensure read() ends without error */ while ((len = read(iter_fd, buf, sizeof(buf))) > 0) ; - CHECK(len < 0, "read", "read failed: %s\n", strerror(errno)); + ASSERT_GE(len, 0, "read"); close(iter_fd); @@ -413,7 +411,7 @@ static int do_btf_read(struct bpf_iter_task_btf *skel) goto free_link; } - if (CHECK(err < 0, "read", "read failed: %s\n", strerror(errno))) + if (!ASSERT_GE(err, 0, "read")) goto free_link; ASSERT_HAS_SUBSTR(taskbuf, "(struct task_struct)", @@ -526,11 +524,11 @@ static int do_read_with_fd(int iter_fd, const char *expected, start = 0; while ((len = read(iter_fd, buf + start, read_buf_len)) > 0) { start += len; - if (CHECK(start >= 16, "read", "read len %d\n", len)) + if (!ASSERT_LT(start, 16, "read")) return -1; read_buf_len = read_one_char ? 1 : 16 - start; } - if (CHECK(len < 0, "read", "read failed: %s\n", strerror(errno))) + if (!ASSERT_GE(len, 0, "read")) return -1; if (!ASSERT_STREQ(buf, expected, "read")) @@ -571,8 +569,7 @@ static int do_read(const char *path, const char *expected) int err, iter_fd; iter_fd = open(path, O_RDONLY); - if (CHECK(iter_fd < 0, "open", "open %s failed: %s\n", - path, strerror(errno))) + if (!ASSERT_GE(iter_fd, 0, "open")) return -1; err = do_read_with_fd(iter_fd, expected, false); @@ -600,7 +597,7 @@ static void test_file_iter(void) unlink(path); err = bpf_link__pin(link, path); - if (CHECK(err, "pin_iter", "pin_iter to %s failed: %d\n", path, err)) + if (!ASSERT_OK(err, "pin_iter")) goto free_link; err = do_read(path, "abcd"); @@ -651,12 +648,10 @@ static void test_overflow(bool test_e2big_overflow, bool ret1) * overflow and needs restart. */ map1_fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, NULL, 4, 8, 1, NULL); - if (CHECK(map1_fd < 0, "bpf_map_create", - "map_creation failed: %s\n", strerror(errno))) + if (!ASSERT_GE(map1_fd, 0, "bpf_map_create")) goto out; map2_fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, NULL, 4, 8, 1, NULL); - if (CHECK(map2_fd < 0, "bpf_map_create", - "map_creation failed: %s\n", strerror(errno))) + if (!ASSERT_GE(map2_fd, 0, "bpf_map_create")) goto free_map1; /* bpf_seq_printf kernel buffer is 8 pages, so one map @@ -685,14 +680,12 @@ static void test_overflow(bool test_e2big_overflow, bool ret1) /* setup filtering map_id in bpf program */ map_info_len = sizeof(map_info); err = bpf_map_get_info_by_fd(map1_fd, &map_info, &map_info_len); - if (CHECK(err, "get_map_info", "get map info failed: %s\n", - strerror(errno))) + if (!ASSERT_OK(err, "get_map_info")) goto free_map2; skel->bss->map1_id = map_info.id; err = bpf_map_get_info_by_fd(map2_fd, &map_info, &map_info_len); - if (CHECK(err, "get_map_info", "get map info failed: %s\n", - strerror(errno))) + if (!ASSERT_OK(err, "get_map_info")) goto free_map2; skel->bss->map2_id = map_info.id; @@ -714,16 +707,14 @@ static void test_overflow(bool test_e2big_overflow, bool ret1) while ((len = read(iter_fd, buf, expected_read_len)) > 0) total_read_len += len; - CHECK(len != -1 || errno != E2BIG, "read", - "expected ret -1, errno E2BIG, but get ret %d, error %s\n", - len, strerror(errno)); + ASSERT_EQ(len, -1, "read"); + ASSERT_EQ(errno, E2BIG, "read"); goto free_buf; } else if (!ret1) { while ((len = read(iter_fd, buf, expected_read_len)) > 0) total_read_len += len; - if (CHECK(len < 0, "read", "read failed: %s\n", - strerror(errno))) + if (!ASSERT_GE(len, 0, "read")) goto free_buf; } else { do { @@ -732,8 +723,7 @@ static void test_overflow(bool test_e2big_overflow, bool ret1) total_read_len += len; } while (len > 0 || len == -EAGAIN); - if (CHECK(len < 0, "read", "read failed: %s\n", - strerror(errno))) + if (!ASSERT_GE(len, 0, "read")) goto free_buf; } @@ -836,7 +826,7 @@ static void test_bpf_hash_map(void) /* do some tests */ while ((len = read(iter_fd, buf, sizeof(buf))) > 0) ; - if (CHECK(len < 0, "read", "read failed: %s\n", strerror(errno))) + if (!ASSERT_GE(len, 0, "read")) goto close_iter; /* test results */ @@ -917,7 +907,7 @@ static void test_bpf_percpu_hash_map(void) /* do some tests */ while ((len = read(iter_fd, buf, sizeof(buf))) > 0) ; - if (CHECK(len < 0, "read", "read failed: %s\n", strerror(errno))) + if (!ASSERT_GE(len, 0, "read")) goto close_iter; /* test results */ @@ -983,17 +973,14 @@ static void test_bpf_array_map(void) start = 0; while ((len = read(iter_fd, buf + start, sizeof(buf) - start)) > 0) start += len; - if (CHECK(len < 0, "read", "read failed: %s\n", strerror(errno))) + if (!ASSERT_GE(len, 0, "read")) goto close_iter; /* test results */ res_first_key = *(__u32 *)buf; res_first_val = *(__u64 *)(buf + sizeof(__u32)); - if (CHECK(res_first_key != 0 || res_first_val != first_val, - "bpf_seq_write", - "seq_write failure: first key %u vs expected 0, " - " first value %llu vs expected %llu\n", - res_first_key, res_first_val, first_val)) + if (!ASSERT_EQ(res_first_key, 0, "bpf_seq_write") || + !ASSERT_EQ(res_first_val, first_val, "bpf_seq_write")) goto close_iter; if (!ASSERT_EQ(skel->bss->key_sum, expected_key, "key_sum")) @@ -1092,7 +1079,7 @@ static void test_bpf_percpu_array_map(void) /* do some tests */ while ((len = read(iter_fd, buf, sizeof(buf))) > 0) ; - if (CHECK(len < 0, "read", "read failed: %s\n", strerror(errno))) + if (!ASSERT_GE(len, 0, "read")) goto close_iter; /* test results */ @@ -1131,6 +1118,7 @@ static void test_bpf_sk_storage_delete(void) sock_fd = socket(AF_INET6, SOCK_STREAM, 0); if (!ASSERT_GE(sock_fd, 0, "socket")) goto out; + err = bpf_map_update_elem(map_fd, &sock_fd, &val, BPF_NOEXIST); if (!ASSERT_OK(err, "map_update")) goto out; @@ -1151,14 +1139,19 @@ static void test_bpf_sk_storage_delete(void) /* do some tests */ while ((len = read(iter_fd, buf, sizeof(buf))) > 0) ; - if (CHECK(len < 0, "read", "read failed: %s\n", strerror(errno))) + if (!ASSERT_GE(len, 0, "read")) goto close_iter; /* test results */ err = bpf_map_lookup_elem(map_fd, &sock_fd, &val); - if (CHECK(!err || errno != ENOENT, "bpf_map_lookup_elem", - "map value wasn't deleted (err=%d, errno=%d)\n", err, errno)) - goto close_iter; + + /* Note: The following assertions serve to ensure + * the value was deleted. It does so by asserting + * that bpf_map_lookup_elem has failed. This might + * seem counterintuitive at first. + */ + ASSERT_ERR(err, "bpf_map_lookup_elem"); + ASSERT_EQ(errno, ENOENT, "bpf_map_lookup_elem"); close_iter: close(iter_fd); @@ -1203,17 +1196,15 @@ static void test_bpf_sk_storage_get(void) do_dummy_read(skel->progs.fill_socket_owner); err = bpf_map_lookup_elem(map_fd, &sock_fd, &val); - if (CHECK(err || val != getpid(), "bpf_map_lookup_elem", - "map value wasn't set correctly (expected %d, got %d, err=%d)\n", - getpid(), val, err)) + if (!ASSERT_OK(err, "bpf_map_lookup_elem") || + !ASSERT_EQ(val, getpid(), "bpf_map_lookup_elem")) goto close_socket; do_dummy_read(skel->progs.negate_socket_local_storage); err = bpf_map_lookup_elem(map_fd, &sock_fd, &val); - CHECK(err || val != -getpid(), "bpf_map_lookup_elem", - "map value wasn't set correctly (expected %d, got %d, err=%d)\n", - -getpid(), val, err); + ASSERT_OK(err, "bpf_map_lookup_elem"); + ASSERT_EQ(val, -getpid(), "bpf_map_lookup_elem"); close_socket: close(sock_fd); @@ -1290,7 +1281,7 @@ static void test_bpf_sk_storage_map(void) /* do some tests */ while ((len = read(iter_fd, buf, sizeof(buf))) > 0) ; - if (CHECK(len < 0, "read", "read failed: %s\n", strerror(errno))) + if (!ASSERT_GE(len, 0, "read")) goto close_iter; /* test results */ -- cgit v1.2.3 From bf4a64b9323f181df8aba32d66cb37b9fa5df959 Mon Sep 17 00:00:00 2001 From: Yuran Pereira Date: Sat, 28 Oct 2023 10:54:14 +0530 Subject: selftests/bpf: Add malloc failure checks in bpf_iter Since some malloc calls in bpf_iter may at times fail, this patch adds the appropriate fail checks, and ensures that any previously allocated resource is appropriately destroyed before returning the function. Signed-off-by: Yuran Pereira Acked-by: Yonghong Song Acked-by: Kui-Feng Lee Link: https://lore.kernel.org/r/DB3PR10MB6835F0ECA792265FA41FC39BE8A3A@DB3PR10MB6835.EURPRD10.PROD.OUTLOOK.COM Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/prog_tests/bpf_iter.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c index 5e334d3d7ac2..4e02093c2cbe 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c @@ -698,7 +698,7 @@ static void test_overflow(bool test_e2big_overflow, bool ret1) goto free_link; buf = malloc(expected_read_len); - if (!buf) + if (!ASSERT_OK_PTR(buf, "malloc")) goto close_iter; /* do read */ @@ -868,6 +868,8 @@ static void test_bpf_percpu_hash_map(void) skel->rodata->num_cpus = bpf_num_possible_cpus(); val = malloc(8 * bpf_num_possible_cpus()); + if (!ASSERT_OK_PTR(val, "malloc")) + goto out; err = bpf_iter_bpf_percpu_hash_map__load(skel); if (!ASSERT_OK_PTR(skel, "bpf_iter_bpf_percpu_hash_map__load")) @@ -1044,6 +1046,8 @@ static void test_bpf_percpu_array_map(void) skel->rodata->num_cpus = bpf_num_possible_cpus(); val = malloc(8 * bpf_num_possible_cpus()); + if (!ASSERT_OK_PTR(val, "malloc")) + goto out; err = bpf_iter_bpf_percpu_array_map__load(skel); if (!ASSERT_OK_PTR(skel, "bpf_iter_bpf_percpu_array_map__load")) -- cgit v1.2.3 From 2b62aa59d02ed281fa4fc218df3ca91b773e1e62 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Wed, 1 Nov 2023 20:37:43 -0700 Subject: selftests/bpf: fix RELEASE=1 build for tc_opts Compiler complains about malloc(). We also don't need to dynamically allocate anything, so make the life easier by using statically sized buffer. Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20231102033759.2541186-2-andrii@kernel.org Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/prog_tests/tc_opts.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/tc_opts.c b/tools/testing/selftests/bpf/prog_tests/tc_opts.c index 51883ccb8020..196abf223465 100644 --- a/tools/testing/selftests/bpf/prog_tests/tc_opts.c +++ b/tools/testing/selftests/bpf/prog_tests/tc_opts.c @@ -2387,12 +2387,9 @@ static int generate_dummy_prog(void) const size_t prog_insn_cnt = sizeof(prog_insns) / sizeof(struct bpf_insn); LIBBPF_OPTS(bpf_prog_load_opts, opts); const size_t log_buf_sz = 256; - char *log_buf; + char log_buf[log_buf_sz]; int fd = -1; - log_buf = malloc(log_buf_sz); - if (!ASSERT_OK_PTR(log_buf, "log_buf_alloc")) - return fd; opts.log_buf = log_buf; opts.log_size = log_buf_sz; @@ -2402,7 +2399,6 @@ static int generate_dummy_prog(void) prog_insns, prog_insn_cnt, &opts); ASSERT_STREQ(log_buf, "", "log_0"); ASSERT_GE(fd, 0, "prog_fd"); - free(log_buf); return fd; } -- cgit v1.2.3 From f4c7e887324f5776eef6e6e47a90e0ac8058a7a8 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Wed, 1 Nov 2023 20:37:44 -0700 Subject: selftests/bpf: satisfy compiler by having explicit return in btf test Some compilers complain about get_pprint_mapv_size() not returning value in some code paths. Fix with explicit return. Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20231102033759.2541186-3-andrii@kernel.org Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/prog_tests/btf.c | 1 + 1 file changed, 1 insertion(+) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/btf.c b/tools/testing/selftests/bpf/prog_tests/btf.c index 92d51f377fe5..8fb4a04fbbc0 100644 --- a/tools/testing/selftests/bpf/prog_tests/btf.c +++ b/tools/testing/selftests/bpf/prog_tests/btf.c @@ -5265,6 +5265,7 @@ static size_t get_pprint_mapv_size(enum pprint_mapv_kind_t mapv_kind) #endif assert(0); + return 0; } static void set_pprint_mapv(enum pprint_mapv_kind_t mapv_kind, -- cgit v1.2.3 From d79924ca579c647d5dc55f605899c98f7ea04d0f Mon Sep 17 00:00:00 2001 From: Hou Tao Date: Wed, 1 Nov 2023 11:24:53 +0800 Subject: selftests/bpf: Use value with enough-size when updating per-cpu map When updating per-cpu map in map_percpu_stats test, patch_map_thread() only passes 4-bytes-sized value to bpf_map_update_elem(). The expected size of the value is 8 * num_possible_cpus(), so fix it by passing a value with enough-size for per-cpu map update. Signed-off-by: Hou Tao Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20231101032455.3808547-2-houtao@huaweicloud.com Signed-off-by: Alexei Starovoitov --- .../selftests/bpf/map_tests/map_percpu_stats.c | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/map_tests/map_percpu_stats.c b/tools/testing/selftests/bpf/map_tests/map_percpu_stats.c index 8bf497a9843e..a98d6b94dd02 100644 --- a/tools/testing/selftests/bpf/map_tests/map_percpu_stats.c +++ b/tools/testing/selftests/bpf/map_tests/map_percpu_stats.c @@ -131,6 +131,12 @@ static bool is_lru(__u32 map_type) map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH; } +static bool is_percpu(__u32 map_type) +{ + return map_type == BPF_MAP_TYPE_PERCPU_HASH || + map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH; +} + struct upsert_opts { __u32 map_type; int map_fd; @@ -150,17 +156,26 @@ static int create_small_hash(void) static void *patch_map_thread(void *arg) { + /* 8KB is enough for 1024 CPUs. And it is shared between N_THREADS. */ + static __u8 blob[8 << 10]; struct upsert_opts *opts = arg; + void *val_ptr; int val; int ret; int i; for (i = 0; i < opts->n; i++) { - if (opts->map_type == BPF_MAP_TYPE_HASH_OF_MAPS) + if (opts->map_type == BPF_MAP_TYPE_HASH_OF_MAPS) { val = create_small_hash(); - else + val_ptr = &val; + } else if (is_percpu(opts->map_type)) { + val_ptr = blob; + } else { val = rand(); - ret = bpf_map_update_elem(opts->map_fd, &i, &val, 0); + val_ptr = &val; + } + + ret = bpf_map_update_elem(opts->map_fd, &i, val_ptr, 0); CHECK(ret < 0, "bpf_map_update_elem", "key=%d error: %s\n", i, strerror(errno)); if (opts->map_type == BPF_MAP_TYPE_HASH_OF_MAPS) -- cgit v1.2.3 From b9b79553163788d3fc42e25c2662c0a46dc9a3c5 Mon Sep 17 00:00:00 2001 From: Hou Tao Date: Wed, 1 Nov 2023 11:24:54 +0800 Subject: selftests/bpf: Export map_update_retriable() Export map_update_retriable() to make it usable for other map_test cases. These cases may only need retry for specific errno, so add a new callback parameter to let map_update_retriable() decide whether or not the errno is retriable. Signed-off-by: Hou Tao Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20231101032455.3808547-3-houtao@huaweicloud.com Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/test_maps.c | 17 ++++++++++++----- tools/testing/selftests/bpf/test_maps.h | 5 +++++ 2 files changed, 17 insertions(+), 5 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/test_maps.c b/tools/testing/selftests/bpf/test_maps.c index 7fc00e423e4d..767e0693df10 100644 --- a/tools/testing/selftests/bpf/test_maps.c +++ b/tools/testing/selftests/bpf/test_maps.c @@ -1396,13 +1396,18 @@ static void test_map_stress(void) #define MAX_DELAY_US 50000 #define MIN_DELAY_RANGE_US 5000 -static int map_update_retriable(int map_fd, const void *key, const void *value, - int flags, int attempts) +static bool retry_for_again_or_busy(int err) +{ + return (err == EAGAIN || err == EBUSY); +} + +int map_update_retriable(int map_fd, const void *key, const void *value, int flags, int attempts, + retry_for_error_fn need_retry) { int delay = rand() % MIN_DELAY_RANGE_US; while (bpf_map_update_elem(map_fd, key, value, flags)) { - if (!attempts || (errno != EAGAIN && errno != EBUSY)) + if (!attempts || !need_retry(errno)) return -errno; if (delay <= MAX_DELAY_US / 2) @@ -1445,11 +1450,13 @@ static void test_update_delete(unsigned int fn, void *data) key = value = i; if (do_update) { - err = map_update_retriable(fd, &key, &value, BPF_NOEXIST, MAP_RETRIES); + err = map_update_retriable(fd, &key, &value, BPF_NOEXIST, MAP_RETRIES, + retry_for_again_or_busy); if (err) printf("error %d %d\n", err, errno); assert(err == 0); - err = map_update_retriable(fd, &key, &value, BPF_EXIST, MAP_RETRIES); + err = map_update_retriable(fd, &key, &value, BPF_EXIST, MAP_RETRIES, + retry_for_again_or_busy); if (err) printf("error %d %d\n", err, errno); assert(err == 0); diff --git a/tools/testing/selftests/bpf/test_maps.h b/tools/testing/selftests/bpf/test_maps.h index f6fbca761732..e4ac704a536c 100644 --- a/tools/testing/selftests/bpf/test_maps.h +++ b/tools/testing/selftests/bpf/test_maps.h @@ -4,6 +4,7 @@ #include #include +#include #define CHECK(condition, tag, format...) ({ \ int __ret = !!(condition); \ @@ -16,4 +17,8 @@ extern int skips; +typedef bool (*retry_for_error_fn)(int err); +int map_update_retriable(int map_fd, const void *key, const void *value, int flags, int attempts, + retry_for_error_fn need_retry); + #endif -- cgit v1.2.3 From 2f553b032cad4993969cab356b3b0e306fcd1cd1 Mon Sep 17 00:00:00 2001 From: Hou Tao Date: Wed, 1 Nov 2023 11:24:55 +0800 Subject: selftsets/bpf: Retry map update for non-preallocated per-cpu map BPF CI failed due to map_percpu_stats_percpu_hash from time to time [1]. It seems that the failure reason is per-cpu bpf memory allocator may not be able to allocate per-cpu pointer successfully and it can not refill free llist timely, and bpf_map_update_elem() will return -ENOMEM. So mitigate the problem by retrying the update operation for non-preallocated per-cpu map. [1]: https://github.com/kernel-patches/bpf/actions/runs/6713177520/job/18244865326?pr=5909 Signed-off-by: Hou Tao Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20231101032455.3808547-4-houtao@huaweicloud.com Signed-off-by: Alexei Starovoitov --- .../selftests/bpf/map_tests/map_percpu_stats.c | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/map_tests/map_percpu_stats.c b/tools/testing/selftests/bpf/map_tests/map_percpu_stats.c index a98d6b94dd02..2ea36408816b 100644 --- a/tools/testing/selftests/bpf/map_tests/map_percpu_stats.c +++ b/tools/testing/selftests/bpf/map_tests/map_percpu_stats.c @@ -141,6 +141,7 @@ struct upsert_opts { __u32 map_type; int map_fd; __u32 n; + bool retry_for_nomem; }; static int create_small_hash(void) @@ -154,6 +155,11 @@ static int create_small_hash(void) return map_fd; } +static bool retry_for_nomem_fn(int err) +{ + return err == ENOMEM; +} + static void *patch_map_thread(void *arg) { /* 8KB is enough for 1024 CPUs. And it is shared between N_THREADS. */ @@ -175,7 +181,12 @@ static void *patch_map_thread(void *arg) val_ptr = &val; } - ret = bpf_map_update_elem(opts->map_fd, &i, val_ptr, 0); + /* 2 seconds may be enough ? */ + if (opts->retry_for_nomem) + ret = map_update_retriable(opts->map_fd, &i, val_ptr, 0, + 40, retry_for_nomem_fn); + else + ret = bpf_map_update_elem(opts->map_fd, &i, val_ptr, 0); CHECK(ret < 0, "bpf_map_update_elem", "key=%d error: %s\n", i, strerror(errno)); if (opts->map_type == BPF_MAP_TYPE_HASH_OF_MAPS) @@ -296,6 +307,13 @@ static void __test(int map_fd) else opts.n /= 2; + /* per-cpu bpf memory allocator may not be able to allocate per-cpu + * pointer successfully and it can not refill free llist timely, and + * bpf_map_update_elem() will return -ENOMEM. so just retry to mitigate + * the problem temporarily. + */ + opts.retry_for_nomem = is_percpu(opts.map_type) && (info.map_flags & BPF_F_NO_PREALLOC); + /* * Upsert keys [0, n) under some competition: with random values from * N_THREADS threads. Check values, then delete all elements and check -- cgit v1.2.3 From b0cf0dcde8cae24571b1f382e81328229e475604 Mon Sep 17 00:00:00 2001 From: Manu Bretelle Date: Tue, 31 Oct 2023 14:27:17 -0700 Subject: selftests/bpf: Consolidate VIRTIO/9P configs in config.vm file Those configs are needed to be able to run VM somewhat consistently. For instance, ATM, s390x is missing the `CONFIG_VIRTIO_CONSOLE` which prevents s390x kernels built in CI to leverage qemu-guest-agent. By moving them to `config,vm`, we should have selftest kernels which are equal in term of VM functionalities when they include this file. The set of config unabled were picked using grep -h -E '(_9P|_VIRTIO)' config.x86_64 config | sort | uniq added to `config.vm` and then grep -vE '(_9P|_VIRTIO)' config.{x86_64,aarch64,s390x} as a side-effect, some config may have disappeared to the aarch64 and s390x kernels, but they should not be needed. CI will tell. Signed-off-by: Manu Bretelle Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20231031212717.4037892-1-chantr4@gmail.com Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/config.aarch64 | 16 ---------------- tools/testing/selftests/bpf/config.s390x | 9 --------- tools/testing/selftests/bpf/config.vm | 12 ++++++++++++ tools/testing/selftests/bpf/config.x86_64 | 12 ------------ tools/testing/selftests/bpf/vmtest.sh | 4 +++- 5 files changed, 15 insertions(+), 38 deletions(-) create mode 100644 tools/testing/selftests/bpf/config.vm (limited to 'tools') diff --git a/tools/testing/selftests/bpf/config.aarch64 b/tools/testing/selftests/bpf/config.aarch64 index 253821494884..fa8ecf626c73 100644 --- a/tools/testing/selftests/bpf/config.aarch64 +++ b/tools/testing/selftests/bpf/config.aarch64 @@ -1,4 +1,3 @@ -CONFIG_9P_FS=y CONFIG_ARCH_VEXPRESS=y CONFIG_ARCH_WANT_DEFAULT_BPF_JIT=y CONFIG_ARM_SMMU_V3=y @@ -46,7 +45,6 @@ CONFIG_DEBUG_SG=y CONFIG_DETECT_HUNG_TASK=y CONFIG_DEVTMPFS_MOUNT=y CONFIG_DEVTMPFS=y -CONFIG_DRM_VIRTIO_GPU=y CONFIG_DRM=y CONFIG_DUMMY=y CONFIG_EXPERT=y @@ -67,7 +65,6 @@ CONFIG_HAVE_KRETPROBES=y CONFIG_HEADERS_INSTALL=y CONFIG_HIGH_RES_TIMERS=y CONFIG_HUGETLBFS=y -CONFIG_HW_RANDOM_VIRTIO=y CONFIG_HW_RANDOM=y CONFIG_HZ_100=y CONFIG_IDLE_PAGE_TRACKING=y @@ -99,8 +96,6 @@ CONFIG_MEMCG=y CONFIG_MEMORY_HOTPLUG=y CONFIG_MEMORY_HOTREMOVE=y CONFIG_NAMESPACES=y -CONFIG_NET_9P_VIRTIO=y -CONFIG_NET_9P=y CONFIG_NET_ACT_BPF=y CONFIG_NET_ACT_GACT=y CONFIG_NETDEVICES=y @@ -140,7 +135,6 @@ CONFIG_SCHED_TRACER=y CONFIG_SCSI_CONSTANTS=y CONFIG_SCSI_LOGGING=y CONFIG_SCSI_SCAN_ASYNC=y -CONFIG_SCSI_VIRTIO=y CONFIG_SCSI=y CONFIG_SECURITY_NETWORK=y CONFIG_SERIAL_AMBA_PL011_CONSOLE=y @@ -167,16 +161,6 @@ CONFIG_UPROBES=y CONFIG_USELIB=y CONFIG_USER_NS=y CONFIG_VETH=y -CONFIG_VIRTIO_BALLOON=y -CONFIG_VIRTIO_BLK=y -CONFIG_VIRTIO_CONSOLE=y -CONFIG_VIRTIO_FS=y -CONFIG_VIRTIO_INPUT=y -CONFIG_VIRTIO_MMIO_CMDLINE_DEVICES=y -CONFIG_VIRTIO_MMIO=y -CONFIG_VIRTIO_NET=y -CONFIG_VIRTIO_PCI=y -CONFIG_VIRTIO_VSOCKETS_COMMON=y CONFIG_VLAN_8021Q=y CONFIG_VSOCKETS=y CONFIG_VSOCKETS_LOOPBACK=y diff --git a/tools/testing/selftests/bpf/config.s390x b/tools/testing/selftests/bpf/config.s390x index 2ba92167be35..e93330382849 100644 --- a/tools/testing/selftests/bpf/config.s390x +++ b/tools/testing/selftests/bpf/config.s390x @@ -1,4 +1,3 @@ -CONFIG_9P_FS=y CONFIG_ARCH_WANT_DEFAULT_BPF_JIT=y CONFIG_AUDIT=y CONFIG_BLK_CGROUP=y @@ -84,8 +83,6 @@ CONFIG_MEMORY_HOTPLUG=y CONFIG_MEMORY_HOTREMOVE=y CONFIG_NAMESPACES=y CONFIG_NET=y -CONFIG_NET_9P=y -CONFIG_NET_9P_VIRTIO=y CONFIG_NET_ACT_BPF=y CONFIG_NET_ACT_GACT=y CONFIG_NET_KEY=y @@ -114,7 +111,6 @@ CONFIG_SAMPLE_SECCOMP=y CONFIG_SAMPLES=y CONFIG_SCHED_TRACER=y CONFIG_SCSI=y -CONFIG_SCSI_VIRTIO=y CONFIG_SECURITY_NETWORK=y CONFIG_STACK_TRACER=y CONFIG_STATIC_KEYS_SELFTEST=y @@ -136,11 +132,6 @@ CONFIG_UPROBES=y CONFIG_USELIB=y CONFIG_USER_NS=y CONFIG_VETH=y -CONFIG_VIRTIO_BALLOON=y -CONFIG_VIRTIO_BLK=y -CONFIG_VIRTIO_NET=y -CONFIG_VIRTIO_PCI=y -CONFIG_VIRTIO_VSOCKETS_COMMON=y CONFIG_VLAN_8021Q=y CONFIG_VSOCKETS=y CONFIG_VSOCKETS_LOOPBACK=y diff --git a/tools/testing/selftests/bpf/config.vm b/tools/testing/selftests/bpf/config.vm new file mode 100644 index 000000000000..a9746ca78777 --- /dev/null +++ b/tools/testing/selftests/bpf/config.vm @@ -0,0 +1,12 @@ +CONFIG_9P_FS=y +CONFIG_9P_FS_POSIX_ACL=y +CONFIG_9P_FS_SECURITY=y +CONFIG_CRYPTO_DEV_VIRTIO=y +CONFIG_NET_9P=y +CONFIG_NET_9P_VIRTIO=y +CONFIG_VIRTIO_BALLOON=y +CONFIG_VIRTIO_BLK=y +CONFIG_VIRTIO_CONSOLE=y +CONFIG_VIRTIO_NET=y +CONFIG_VIRTIO_PCI=y +CONFIG_VIRTIO_VSOCKETS_COMMON=y diff --git a/tools/testing/selftests/bpf/config.x86_64 b/tools/testing/selftests/bpf/config.x86_64 index 2e70a6048278..f7bfb2b09c82 100644 --- a/tools/testing/selftests/bpf/config.x86_64 +++ b/tools/testing/selftests/bpf/config.x86_64 @@ -1,6 +1,3 @@ -CONFIG_9P_FS=y -CONFIG_9P_FS_POSIX_ACL=y -CONFIG_9P_FS_SECURITY=y CONFIG_AGP=y CONFIG_AGP_AMD64=y CONFIG_AGP_INTEL=y @@ -45,7 +42,6 @@ CONFIG_CPU_IDLE_GOV_LADDER=y CONFIG_CPUSETS=y CONFIG_CRC_T10DIF=y CONFIG_CRYPTO_BLAKE2B=y -CONFIG_CRYPTO_DEV_VIRTIO=y CONFIG_CRYPTO_SEQIV=y CONFIG_CRYPTO_XXHASH=y CONFIG_DCB=y @@ -145,8 +141,6 @@ CONFIG_MEMORY_FAILURE=y CONFIG_MINIX_SUBPARTITION=y CONFIG_NAMESPACES=y CONFIG_NET=y -CONFIG_NET_9P=y -CONFIG_NET_9P_VIRTIO=y CONFIG_NET_ACT_BPF=y CONFIG_NET_CLS_CGROUP=y CONFIG_NET_EMATCH=y @@ -228,12 +222,6 @@ CONFIG_USER_NS=y CONFIG_VALIDATE_FS_PARSER=y CONFIG_VETH=y CONFIG_VIRT_DRIVERS=y -CONFIG_VIRTIO_BALLOON=y -CONFIG_VIRTIO_BLK=y -CONFIG_VIRTIO_CONSOLE=y -CONFIG_VIRTIO_NET=y -CONFIG_VIRTIO_PCI=y -CONFIG_VIRTIO_VSOCKETS_COMMON=y CONFIG_VLAN_8021Q=y CONFIG_VSOCKETS=y CONFIG_VSOCKETS_LOOPBACK=y diff --git a/tools/testing/selftests/bpf/vmtest.sh b/tools/testing/selftests/bpf/vmtest.sh index 685034528018..65d14f3bbe30 100755 --- a/tools/testing/selftests/bpf/vmtest.sh +++ b/tools/testing/selftests/bpf/vmtest.sh @@ -36,7 +36,9 @@ DEFAULT_COMMAND="./test_progs" MOUNT_DIR="mnt" ROOTFS_IMAGE="root.img" OUTPUT_DIR="$HOME/.bpf_selftests" -KCONFIG_REL_PATHS=("tools/testing/selftests/bpf/config" "tools/testing/selftests/bpf/config.${ARCH}") +KCONFIG_REL_PATHS=("tools/testing/selftests/bpf/config" + "tools/testing/selftests/bpf/config.vm" + "tools/testing/selftests/bpf/config.${ARCH}") INDEX_URL="https://raw.githubusercontent.com/libbpf/ci/master/INDEX" NUM_COMPILE_JOBS="$(nproc)" LOG_FILE_BASE="$(date +"bpf_selftests.%Y-%m-%d_%H-%M-%S")" -- cgit v1.2.3 From a46afaa03f6db8c65492302ffdafcb2e769e5667 Mon Sep 17 00:00:00 2001 From: Artem Savkov Date: Fri, 3 Nov 2023 09:11:26 +0100 Subject: bpftool: Fix prog object type in manpage bpftool's man page lists "program" as one of possible values for OBJECT, while in fact bpftool accepts "prog" instead. Reported-by: Jerry Snitselaar Signed-off-by: Artem Savkov Signed-off-by: Andrii Nakryiko Acked-by: Yonghong Song Acked-by: Quentin Monnet Link: https://lore.kernel.org/bpf/20231103081126.170034-1-asavkov@redhat.com Signed-off-by: Alexei Starovoitov --- tools/bpf/bpftool/Documentation/bpftool.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/bpf/bpftool/Documentation/bpftool.rst b/tools/bpf/bpftool/Documentation/bpftool.rst index 6965c94dfdaf..09e4f2ff5658 100644 --- a/tools/bpf/bpftool/Documentation/bpftool.rst +++ b/tools/bpf/bpftool/Documentation/bpftool.rst @@ -20,7 +20,7 @@ SYNOPSIS **bpftool** **version** - *OBJECT* := { **map** | **program** | **link** | **cgroup** | **perf** | **net** | **feature** | + *OBJECT* := { **map** | **prog** | **link** | **cgroup** | **perf** | **net** | **feature** | **btf** | **gen** | **struct_ops** | **iter** } *OPTIONS* := { { **-V** | **--version** } | |COMMON_OPTIONS| } -- cgit v1.2.3 From f2d2c7e1b7c9e8847478769d6e1f8a76b5e91952 Mon Sep 17 00:00:00 2001 From: Anders Roxell Date: Fri, 3 Nov 2023 23:09:12 +0100 Subject: selftests/bpf: Disable CONFIG_DEBUG_INFO_REDUCED in config.aarch64 Building an arm64 kernel and seftests/bpf with defconfig + selftests/bpf/config and selftests/bpf/config.aarch64 the fragment CONFIG_DEBUG_INFO_REDUCED is enabled in arm64's defconfig, it should be disabled in file sefltests/bpf/config.aarch64 since if its not disabled CONFIG_DEBUG_INFO_BTF wont be enabled. Signed-off-by: Anders Roxell Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20231103220912.333930-1-anders.roxell@linaro.org Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/config.aarch64 | 1 + 1 file changed, 1 insertion(+) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/config.aarch64 b/tools/testing/selftests/bpf/config.aarch64 index fa8ecf626c73..29c8635c5722 100644 --- a/tools/testing/selftests/bpf/config.aarch64 +++ b/tools/testing/selftests/bpf/config.aarch64 @@ -36,6 +36,7 @@ CONFIG_CRYPTO_USER_API_SKCIPHER=y CONFIG_DEBUG_ATOMIC_SLEEP=y CONFIG_DEBUG_INFO_BTF=y CONFIG_DEBUG_INFO_DWARF4=y +CONFIG_DEBUG_INFO_REDUCED=n CONFIG_DEBUG_LIST=y CONFIG_DEBUG_LOCKDEP=y CONFIG_DEBUG_NOTIFIERS=y -- cgit v1.2.3 From 7f7c43693c1b46652cfafb7af67ba31726d6ec4e Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Tue, 7 Nov 2023 12:15:11 -0800 Subject: libbpf: Fix potential uninitialized tail padding with LIBBPF_OPTS_RESET Martin reported that there is a libbpf complaining of non-zero-value tail padding with LIBBPF_OPTS_RESET macro if struct bpf_netkit_opts is modified to have a 4-byte tail padding. This only happens to clang compiler. The commend line is: ./test_progs -t tc_netkit_multi_links Martin and I did some investigation and found this indeed the case and the following are the investigation details. Clang: clang version 18.0.0 tools/lib/bpf/libbpf_common.h: #define LIBBPF_OPTS_RESET(NAME, ...) \ do { \ memset(&NAME, 0, sizeof(NAME)); \ NAME = (typeof(NAME)) { \ .sz = sizeof(NAME), \ __VA_ARGS__ \ }; \ } while (0) #endif tools/lib/bpf/libbpf.h: struct bpf_netkit_opts { /* size of this struct, for forward/backward compatibility */ size_t sz; __u32 flags; __u32 relative_fd; __u32 relative_id; __u64 expected_revision; size_t :0; }; #define bpf_netkit_opts__last_field expected_revision In the above struct bpf_netkit_opts, there is no tail padding. prog_tests/tc_netkit.c: static void serial_test_tc_netkit_multi_links_target(int mode, int target) { ... LIBBPF_OPTS(bpf_netkit_opts, optl); ... LIBBPF_OPTS_RESET(optl, .flags = BPF_F_BEFORE, .relative_fd = bpf_program__fd(skel->progs.tc1), ); ... } Let us make the following source change, note that we have a 4-byte tailing padding now. diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index 6cd9c501624f..0dd83910ae9a 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -803,13 +803,13 @@ bpf_program__attach_tcx(const struct bpf_program *prog, int ifindex, struct bpf_netkit_opts { /* size of this struct, for forward/backward compatibility */ size_t sz; - __u32 flags; __u32 relative_fd; __u32 relative_id; __u64 expected_revision; + __u32 flags; size_t :0; }; -#define bpf_netkit_opts__last_field expected_revision +#define bpf_netkit_opts__last_field flags The clang 18 generated asm code looks like below: ; LIBBPF_OPTS_RESET(optl, 55e3: 48 8d 7d 98 leaq -0x68(%rbp), %rdi 55e7: 31 f6 xorl %esi, %esi 55e9: ba 20 00 00 00 movl $0x20, %edx 55ee: e8 00 00 00 00 callq 0x55f3 55f3: 48 c7 85 10 fd ff ff 20 00 00 00 movq $0x20, -0x2f0(%rbp) 55fe: 48 8b 85 68 ff ff ff movq -0x98(%rbp), %rax 5605: 48 8b 78 18 movq 0x18(%rax), %rdi 5609: e8 00 00 00 00 callq 0x560e 560e: 89 85 18 fd ff ff movl %eax, -0x2e8(%rbp) 5614: c7 85 1c fd ff ff 00 00 00 00 movl $0x0, -0x2e4(%rbp) 561e: 48 c7 85 20 fd ff ff 00 00 00 00 movq $0x0, -0x2e0(%rbp) 5629: c7 85 28 fd ff ff 08 00 00 00 movl $0x8, -0x2d8(%rbp) 5633: 48 8b 85 10 fd ff ff movq -0x2f0(%rbp), %rax 563a: 48 89 45 98 movq %rax, -0x68(%rbp) 563e: 48 8b 85 18 fd ff ff movq -0x2e8(%rbp), %rax 5645: 48 89 45 a0 movq %rax, -0x60(%rbp) 5649: 48 8b 85 20 fd ff ff movq -0x2e0(%rbp), %rax 5650: 48 89 45 a8 movq %rax, -0x58(%rbp) 5654: 48 8b 85 28 fd ff ff movq -0x2d8(%rbp), %rax 565b: 48 89 45 b0 movq %rax, -0x50(%rbp) ; link = bpf_program__attach_netkit(skel->progs.tc2, ifindex, &optl); At -O0 level, the clang compiler creates an intermediate copy. We have below to store 'flags' with 4-byte store and leave another 4 byte in the same 8-byte-aligned storage undefined, 5629: c7 85 28 fd ff ff 08 00 00 00 movl $0x8, -0x2d8(%rbp) and later we store 8-byte to the original zero'ed buffer 5654: 48 8b 85 28 fd ff ff movq -0x2d8(%rbp), %rax 565b: 48 89 45 b0 movq %rax, -0x50(%rbp) This caused a problem as the 4-byte value at [%rbp-0x2dc, %rbp-0x2e0) may be garbage. gcc (gcc 11.4) does not have this issue as it does zeroing struct first before doing assignments: ; LIBBPF_OPTS_RESET(optl, 50fd: 48 8d 85 40 fc ff ff leaq -0x3c0(%rbp), %rax 5104: ba 20 00 00 00 movl $0x20, %edx 5109: be 00 00 00 00 movl $0x0, %esi 510e: 48 89 c7 movq %rax, %rdi 5111: e8 00 00 00 00 callq 0x5116 5116: 48 8b 45 f0 movq -0x10(%rbp), %rax 511a: 48 8b 40 18 movq 0x18(%rax), %rax 511e: 48 89 c7 movq %rax, %rdi 5121: e8 00 00 00 00 callq 0x5126 5126: 48 c7 85 40 fc ff ff 00 00 00 00 movq $0x0, -0x3c0(%rbp) 5131: 48 c7 85 48 fc ff ff 00 00 00 00 movq $0x0, -0x3b8(%rbp) 513c: 48 c7 85 50 fc ff ff 00 00 00 00 movq $0x0, -0x3b0(%rbp) 5147: 48 c7 85 58 fc ff ff 00 00 00 00 movq $0x0, -0x3a8(%rbp) 5152: 48 c7 85 40 fc ff ff 20 00 00 00 movq $0x20, -0x3c0(%rbp) 515d: 89 85 48 fc ff ff movl %eax, -0x3b8(%rbp) 5163: c7 85 58 fc ff ff 08 00 00 00 movl $0x8, -0x3a8(%rbp) ; link = bpf_program__attach_netkit(skel->progs.tc2, ifindex, &optl); It is not clear how to resolve the compiler code generation as the compiler generates correct code w.r.t. how to handle unnamed padding in C standard. So this patch changed LIBBPF_OPTS_RESET macro to avoid uninitialized tail padding. We already knows LIBBPF_OPTS macro works on both gcc and clang, even with tail padding. So LIBBPF_OPTS_RESET is changed to be a LIBBPF_OPTS followed by a memcpy(), thus avoiding uninitialized tail padding. The below is asm code generated with this patch and with clang compiler: ; LIBBPF_OPTS_RESET(optl, 55e3: 48 8d bd 10 fd ff ff leaq -0x2f0(%rbp), %rdi 55ea: 31 f6 xorl %esi, %esi 55ec: ba 20 00 00 00 movl $0x20, %edx 55f1: e8 00 00 00 00 callq 0x55f6 55f6: 48 c7 85 10 fd ff ff 20 00 00 00 movq $0x20, -0x2f0(%rbp) 5601: 48 8b 85 68 ff ff ff movq -0x98(%rbp), %rax 5608: 48 8b 78 18 movq 0x18(%rax), %rdi 560c: e8 00 00 00 00 callq 0x5611 5611: 89 85 18 fd ff ff movl %eax, -0x2e8(%rbp) 5617: c7 85 1c fd ff ff 00 00 00 00 movl $0x0, -0x2e4(%rbp) 5621: 48 c7 85 20 fd ff ff 00 00 00 00 movq $0x0, -0x2e0(%rbp) 562c: c7 85 28 fd ff ff 08 00 00 00 movl $0x8, -0x2d8(%rbp) 5636: 48 8b 85 10 fd ff ff movq -0x2f0(%rbp), %rax 563d: 48 89 45 98 movq %rax, -0x68(%rbp) 5641: 48 8b 85 18 fd ff ff movq -0x2e8(%rbp), %rax 5648: 48 89 45 a0 movq %rax, -0x60(%rbp) 564c: 48 8b 85 20 fd ff ff movq -0x2e0(%rbp), %rax 5653: 48 89 45 a8 movq %rax, -0x58(%rbp) 5657: 48 8b 85 28 fd ff ff movq -0x2d8(%rbp), %rax 565e: 48 89 45 b0 movq %rax, -0x50(%rbp) ; link = bpf_program__attach_netkit(skel->progs.tc2, ifindex, &optl); In the above code, a temporary buffer is zeroed and then has proper value assigned. Finally, values in temporary buffer are copied to the original variable buffer, hence tail padding is guaranteed to be 0. Signed-off-by: Yonghong Song Signed-off-by: Andrii Nakryiko Tested-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/20231107201511.2548645-1-yonghong.song@linux.dev Signed-off-by: Alexei Starovoitov --- tools/lib/bpf/libbpf_common.h | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) (limited to 'tools') diff --git a/tools/lib/bpf/libbpf_common.h b/tools/lib/bpf/libbpf_common.h index b7060f254486..8fe248e14eb6 100644 --- a/tools/lib/bpf/libbpf_common.h +++ b/tools/lib/bpf/libbpf_common.h @@ -79,11 +79,14 @@ */ #define LIBBPF_OPTS_RESET(NAME, ...) \ do { \ - memset(&NAME, 0, sizeof(NAME)); \ - NAME = (typeof(NAME)) { \ - .sz = sizeof(NAME), \ - __VA_ARGS__ \ - }; \ + typeof(NAME) ___##NAME = ({ \ + memset(&___##NAME, 0, sizeof(NAME)); \ + (typeof(NAME)) { \ + .sz = sizeof(NAME), \ + __VA_ARGS__ \ + }; \ + }); \ + memcpy(&NAME, &___##NAME, sizeof(NAME)); \ } while (0) #endif /* __LIBBPF_LIBBPF_COMMON_H */ -- cgit v1.2.3 From 5d4a7aaca1ebcc7c864caec13203662a061c4f4f Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Tue, 7 Nov 2023 21:14:29 -0800 Subject: veristat: add ability to sort by stat's absolute value Add ability to sort results by absolute values of specified stats. This is especially useful to find biggest deviations in comparison mode. When comparing verifier change effect against a large base of BPF object files, it's necessary to see big changes both in positive and negative directions, as both might be a signal for regressions or bugs. The syntax is natural, e.g., adding `-s '|insns_diff|'^` will instruct veristat to sort by absolute value of instructions difference in ascending order. Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20231108051430.1830950-1-andrii@kernel.org Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/veristat.c | 68 ++++++++++++++++++++++++++++------ 1 file changed, 56 insertions(+), 12 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/veristat.c b/tools/testing/selftests/bpf/veristat.c index 655095810d4a..102914f70573 100644 --- a/tools/testing/selftests/bpf/veristat.c +++ b/tools/testing/selftests/bpf/veristat.c @@ -18,6 +18,7 @@ #include #include #include +#include #ifndef ARRAY_SIZE #define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0])) @@ -99,6 +100,7 @@ struct stat_specs { enum stat_id ids[ALL_STATS_CNT]; enum stat_variant variants[ALL_STATS_CNT]; bool asc[ALL_STATS_CNT]; + bool abs[ALL_STATS_CNT]; int lens[ALL_STATS_CNT * 3]; /* 3x for comparison mode */ }; @@ -133,6 +135,7 @@ struct filter { int stat_id; enum stat_variant stat_var; long value; + bool abs; }; static struct env { @@ -455,7 +458,8 @@ static struct { { OP_EQ, "=" }, }; -static bool parse_stat_id_var(const char *name, size_t len, int *id, enum stat_variant *var); +static bool parse_stat_id_var(const char *name, size_t len, int *id, + enum stat_variant *var, bool *is_abs); static int append_filter(struct filter **filters, int *cnt, const char *str) { @@ -488,13 +492,14 @@ static int append_filter(struct filter **filters, int *cnt, const char *str) long val; const char *end = str; const char *op_str; + bool is_abs; op_str = operators[i].op_str; p = strstr(str, op_str); if (!p) continue; - if (!parse_stat_id_var(str, p - str, &id, &var)) { + if (!parse_stat_id_var(str, p - str, &id, &var, &is_abs)) { fprintf(stderr, "Unrecognized stat name in '%s'!\n", str); return -EINVAL; } @@ -533,6 +538,7 @@ static int append_filter(struct filter **filters, int *cnt, const char *str) f->stat_id = id; f->stat_var = var; f->op = operators[i].op_kind; + f->abs = true; f->value = val; *cnt += 1; @@ -657,7 +663,8 @@ static struct stat_def { [MARK_READ_MAX_LEN] = { "Max mark read length", {"max_mark_read_len", "mark_read"}, }, }; -static bool parse_stat_id_var(const char *name, size_t len, int *id, enum stat_variant *var) +static bool parse_stat_id_var(const char *name, size_t len, int *id, + enum stat_variant *var, bool *is_abs) { static const char *var_sfxs[] = { [VARIANT_A] = "_a", @@ -667,6 +674,14 @@ static bool parse_stat_id_var(const char *name, size_t len, int *id, enum stat_v }; int i, j, k; + /* || means we take absolute value of given stat */ + *is_abs = false; + if (len > 2 && name[0] == '|' && name[len - 1] == '|') { + *is_abs = true; + name += 1; + len -= 2; + } + for (i = 0; i < ARRAY_SIZE(stat_defs); i++) { struct stat_def *def = &stat_defs[i]; size_t alias_len, sfx_len; @@ -722,7 +737,7 @@ static bool is_desc_sym(char c) static int parse_stat(const char *stat_name, struct stat_specs *specs) { int id; - bool has_order = false, is_asc = false; + bool has_order = false, is_asc = false, is_abs = false; size_t len = strlen(stat_name); enum stat_variant var; @@ -737,7 +752,7 @@ static int parse_stat(const char *stat_name, struct stat_specs *specs) len -= 1; } - if (!parse_stat_id_var(stat_name, len, &id, &var)) { + if (!parse_stat_id_var(stat_name, len, &id, &var, &is_abs)) { fprintf(stderr, "Unrecognized stat name '%s'\n", stat_name); return -ESRCH; } @@ -745,6 +760,7 @@ static int parse_stat(const char *stat_name, struct stat_specs *specs) specs->ids[specs->spec_cnt] = id; specs->variants[specs->spec_cnt] = var; specs->asc[specs->spec_cnt] = has_order ? is_asc : stat_defs[id].asc_by_default; + specs->abs[specs->spec_cnt] = is_abs; specs->spec_cnt++; return 0; @@ -1103,7 +1119,7 @@ cleanup: } static int cmp_stat(const struct verif_stats *s1, const struct verif_stats *s2, - enum stat_id id, bool asc) + enum stat_id id, bool asc, bool abs) { int cmp = 0; @@ -1124,6 +1140,11 @@ static int cmp_stat(const struct verif_stats *s1, const struct verif_stats *s2, long v1 = s1->stats[id]; long v2 = s2->stats[id]; + if (abs) { + v1 = v1 < 0 ? -v1 : v1; + v2 = v2 < 0 ? -v2 : v2; + } + if (v1 != v2) cmp = v1 < v2 ? -1 : 1; break; @@ -1142,7 +1163,8 @@ static int cmp_prog_stats(const void *v1, const void *v2) int i, cmp; for (i = 0; i < env.sort_spec.spec_cnt; i++) { - cmp = cmp_stat(s1, s2, env.sort_spec.ids[i], env.sort_spec.asc[i]); + cmp = cmp_stat(s1, s2, env.sort_spec.ids[i], + env.sort_spec.asc[i], env.sort_spec.abs[i]); if (cmp != 0) return cmp; } @@ -1211,7 +1233,8 @@ static void fetch_join_stat_value(const struct verif_stats_join *s, static int cmp_join_stat(const struct verif_stats_join *s1, const struct verif_stats_join *s2, - enum stat_id id, enum stat_variant var, bool asc) + enum stat_id id, enum stat_variant var, + bool asc, bool abs) { const char *str1 = NULL, *str2 = NULL; double v1, v2; @@ -1220,6 +1243,11 @@ static int cmp_join_stat(const struct verif_stats_join *s1, fetch_join_stat_value(s1, id, var, &str1, &v1); fetch_join_stat_value(s2, id, var, &str2, &v2); + if (abs) { + v1 = fabs(v1); + v2 = fabs(v2); + } + if (str1) cmp = strcmp(str1, str2); else if (v1 != v2) @@ -1237,7 +1265,8 @@ static int cmp_join_stats(const void *v1, const void *v2) cmp = cmp_join_stat(s1, s2, env.sort_spec.ids[i], env.sort_spec.variants[i], - env.sort_spec.asc[i]); + env.sort_spec.asc[i], + env.sort_spec.abs[i]); if (cmp != 0) return cmp; } @@ -1720,6 +1749,9 @@ static bool is_join_stat_filter_matched(struct filter *f, const struct verif_sta fetch_join_stat_value(stats, f->stat_id, f->stat_var, &str, &value); + if (f->abs) + value = fabs(value); + switch (f->op) { case OP_EQ: return value > f->value - eps && value < f->value + eps; case OP_NEQ: return value < f->value - eps || value > f->value + eps; @@ -1766,7 +1798,7 @@ static int handle_comparison_mode(void) struct stat_specs base_specs = {}, comp_specs = {}; struct stat_specs tmp_sort_spec; enum resfmt cur_fmt; - int err, i, j, last_idx; + int err, i, j, last_idx, cnt; if (env.filename_cnt != 2) { fprintf(stderr, "Comparison mode expects exactly two input CSV files!\n\n"); @@ -1879,7 +1911,7 @@ static int handle_comparison_mode(void) env.join_stat_cnt += 1; } - /* now sort joined results accorsing to sort spec */ + /* now sort joined results according to sort spec */ qsort(env.join_stats, env.join_stat_cnt, sizeof(*env.join_stats), cmp_join_stats); /* for human-readable table output we need to do extra pass to @@ -1896,16 +1928,22 @@ one_more_time: output_comp_headers(cur_fmt); last_idx = -1; + cnt = 0; for (i = 0; i < env.join_stat_cnt; i++) { const struct verif_stats_join *join = &env.join_stats[i]; if (!should_output_join_stats(join)) continue; + if (env.top_n && cnt >= env.top_n) + break; + if (cur_fmt == RESFMT_TABLE_CALCLEN) last_idx = i; output_comp_stats(join, cur_fmt, i == last_idx); + + cnt++; } if (cur_fmt == RESFMT_TABLE_CALCLEN) { @@ -1920,6 +1958,9 @@ static bool is_stat_filter_matched(struct filter *f, const struct verif_stats *s { long value = stats->stats[f->stat_id]; + if (f->abs) + value = value < 0 ? -value : value; + switch (f->op) { case OP_EQ: return value == f->value; case OP_NEQ: return value != f->value; @@ -1964,7 +2005,7 @@ static bool should_output_stats(const struct verif_stats *stats) static void output_prog_stats(void) { const struct verif_stats *stats; - int i, last_stat_idx = 0; + int i, last_stat_idx = 0, cnt = 0; if (env.out_fmt == RESFMT_TABLE) { /* calculate column widths */ @@ -1984,7 +2025,10 @@ static void output_prog_stats(void) stats = &env.prog_stats[i]; if (!should_output_stats(stats)) continue; + if (env.top_n && cnt >= env.top_n) + break; output_stats(stats, env.out_fmt, i == last_stat_idx); + cnt++; } } -- cgit v1.2.3 From 27007fae704eb12547b9b5c7b1005e11640d4f19 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Tue, 7 Nov 2023 21:14:30 -0800 Subject: veristat: add ability to filter top N results Add ability to filter top B results, both in replay/verifier mode and comparison mode. Just adding `-n10` will emit only first 10 rows, or less, if there is not enough rows. This is not just a shortcut instead of passing veristat output through `head`, though. Filtering out all the other rows influences final table formatting, as table column widths are calculated based on actual emitted test. To demonstrate the difference, compare two "equivalent" forms below, one using head and another using -n argument. TOP N FEATURE ============= [vmuser@archvm bpf]$ sudo ./veristat -C ~/baseline-results-selftests.csv ~/sanity2-results-selftests.csv -e file,prog,insns,states -s '|insns_diff|' -n10 File Program Insns (A) Insns (B) Insns (DIFF) States (A) States (B) States (DIFF) ---------------------------------------- --------------------- --------- --------- ------------ ---------- ---------- ------------- test_seg6_loop.bpf.linked3.o __add_egr_x 12440 12360 -80 (-0.64%) 364 357 -7 (-1.92%) async_stack_depth.bpf.linked3.o async_call_root_check 145 145 +0 (+0.00%) 3 3 +0 (+0.00%) async_stack_depth.bpf.linked3.o pseudo_call_check 139 139 +0 (+0.00%) 3 3 +0 (+0.00%) atomic_bounds.bpf.linked3.o sub 7 7 +0 (+0.00%) 0 0 +0 (+0.00%) bench_local_storage_create.bpf.linked3.o kmalloc 5 5 +0 (+0.00%) 0 0 +0 (+0.00%) bench_local_storage_create.bpf.linked3.o sched_process_fork 22 22 +0 (+0.00%) 2 2 +0 (+0.00%) bench_local_storage_create.bpf.linked3.o socket_post_create 23 23 +0 (+0.00%) 2 2 +0 (+0.00%) bind4_prog.bpf.linked3.o bind_v4_prog 358 358 +0 (+0.00%) 33 33 +0 (+0.00%) bind6_prog.bpf.linked3.o bind_v6_prog 429 429 +0 (+0.00%) 37 37 +0 (+0.00%) bind_perm.bpf.linked3.o bind_v4_prog 15 15 +0 (+0.00%) 1 1 +0 (+0.00%) PIPING TO HEAD ============== [vmuser@archvm bpf]$ sudo ./veristat -C ~/baseline-results-selftests.csv ~/sanity2-results-selftests.csv -e file,prog,insns,states -s '|insns_diff|' | head -n12 File Program Insns (A) Insns (B) Insns (DIFF) States (A) States (B) States (DIFF) ----------------------------------------------------- ---------------------------------------------------- --------- --------- ------------ ---------- ---------- ------------- test_seg6_loop.bpf.linked3.o __add_egr_x 12440 12360 -80 (-0.64%) 364 357 -7 (-1.92%) async_stack_depth.bpf.linked3.o async_call_root_check 145 145 +0 (+0.00%) 3 3 +0 (+0.00%) async_stack_depth.bpf.linked3.o pseudo_call_check 139 139 +0 (+0.00%) 3 3 +0 (+0.00%) atomic_bounds.bpf.linked3.o sub 7 7 +0 (+0.00%) 0 0 +0 (+0.00%) bench_local_storage_create.bpf.linked3.o kmalloc 5 5 +0 (+0.00%) 0 0 +0 (+0.00%) bench_local_storage_create.bpf.linked3.o sched_process_fork 22 22 +0 (+0.00%) 2 2 +0 (+0.00%) bench_local_storage_create.bpf.linked3.o socket_post_create 23 23 +0 (+0.00%) 2 2 +0 (+0.00%) bind4_prog.bpf.linked3.o bind_v4_prog 358 358 +0 (+0.00%) 33 33 +0 (+0.00%) bind6_prog.bpf.linked3.o bind_v6_prog 429 429 +0 (+0.00%) 37 37 +0 (+0.00%) bind_perm.bpf.linked3.o bind_v4_prog 15 15 +0 (+0.00%) 1 1 +0 (+0.00%) Note all the wasted whitespace in the "PIPING TO HEAD" variant. Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20231108051430.1830950-2-andrii@kernel.org Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/veristat.c | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/veristat.c b/tools/testing/selftests/bpf/veristat.c index 102914f70573..443a29fc6a62 100644 --- a/tools/testing/selftests/bpf/veristat.c +++ b/tools/testing/selftests/bpf/veristat.c @@ -149,6 +149,7 @@ static struct env { bool show_version; bool comparison_mode; bool replay_mode; + int top_n; int log_level; int log_size; @@ -215,6 +216,7 @@ static const struct argp_option opts[] = { { "log-size", OPT_LOG_SIZE, "BYTES", 0, "Customize verifier log size (default to 16MB)" }, { "test-states", 't', NULL, 0, "Force frequent BPF verifier state checkpointing (set BPF_F_TEST_STATE_FREQ program flag)" }, + { "top-n", 'n', "N", 0, "Emit only up to first N results." }, { "quiet", 'q', NULL, 0, "Quiet mode" }, { "emit", 'e', "SPEC", 0, "Specify stats to be emitted" }, { "sort", 's', "SPEC", 0, "Specify sort order" }, @@ -293,6 +295,14 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state) case 't': env.force_checkpoints = true; break; + case 'n': + errno = 0; + env.top_n = strtol(arg, NULL, 10); + if (errno) { + fprintf(stderr, "invalid top N specifier: %s\n", arg); + argp_usage(state); + } + break; case 'C': env.comparison_mode = true; break; -- cgit v1.2.3 From f460e7bdb027d1da93f0c5090b239889cd46a33d Mon Sep 17 00:00:00 2001 From: Dave Marchevsky Date: Tue, 7 Nov 2023 00:56:35 -0800 Subject: selftests/bpf: Add test passing MAYBE_NULL reg to bpf_refcount_acquire The test added in this patch exercises the logic fixed in the previous patch in this series. Before the previous patch's changes, bpf_refcount_acquire accepts MAYBE_NULL local kptrs; after the change the verifier correctly rejects the such a call. Signed-off-by: Dave Marchevsky Link: https://lore.kernel.org/r/20231107085639.3016113-3-davemarchevsky@fb.com Signed-off-by: Alexei Starovoitov --- .../selftests/bpf/progs/refcounted_kptr_fail.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/progs/refcounted_kptr_fail.c b/tools/testing/selftests/bpf/progs/refcounted_kptr_fail.c index 1ef07f6ee580..1553b9c16aa7 100644 --- a/tools/testing/selftests/bpf/progs/refcounted_kptr_fail.c +++ b/tools/testing/selftests/bpf/progs/refcounted_kptr_fail.c @@ -53,6 +53,25 @@ long rbtree_refcounted_node_ref_escapes(void *ctx) return 0; } +SEC("?tc") +__failure __msg("Possibly NULL pointer passed to trusted arg0") +long refcount_acquire_maybe_null(void *ctx) +{ + struct node_acquire *n, *m; + + n = bpf_obj_new(typeof(*n)); + /* Intentionally not testing !n + * it's MAYBE_NULL for refcount_acquire + */ + m = bpf_refcount_acquire(n); + if (m) + bpf_obj_drop(m); + if (n) + bpf_obj_drop(n); + + return 0; +} + SEC("?tc") __failure __msg("Unreleased reference id=3 alloc_insn=9") long rbtree_refcounted_node_ref_escapes_owning_input(void *ctx) -- cgit v1.2.3 From e9ed8df7187cfdce1075d0ee591544ac15d072f1 Mon Sep 17 00:00:00 2001 From: Dave Marchevsky Date: Tue, 7 Nov 2023 00:56:39 -0800 Subject: selftests/bpf: Test bpf_refcount_acquire of node obtained via direct ld This patch demonstrates that verifier changes earlier in this series result in bpf_refcount_acquire(mapval->stashed_kptr) passing verification. The added test additionally validates that stashing a kptr in mapval and - in a separate BPF program - refcount_acquiring the kptr without unstashing works as expected at runtime. Signed-off-by: Dave Marchevsky Link: https://lore.kernel.org/r/20231107085639.3016113-7-davemarchevsky@fb.com Signed-off-by: Alexei Starovoitov --- .../selftests/bpf/prog_tests/local_kptr_stash.c | 33 ++++++++++ .../testing/selftests/bpf/progs/local_kptr_stash.c | 71 ++++++++++++++++++++++ 2 files changed, 104 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/local_kptr_stash.c b/tools/testing/selftests/bpf/prog_tests/local_kptr_stash.c index b25b870f87ba..e6e50a394472 100644 --- a/tools/testing/selftests/bpf/prog_tests/local_kptr_stash.c +++ b/tools/testing/selftests/bpf/prog_tests/local_kptr_stash.c @@ -73,6 +73,37 @@ static void test_local_kptr_stash_unstash(void) local_kptr_stash__destroy(skel); } +static void test_refcount_acquire_without_unstash(void) +{ + LIBBPF_OPTS(bpf_test_run_opts, opts, + .data_in = &pkt_v4, + .data_size_in = sizeof(pkt_v4), + .repeat = 1, + ); + struct local_kptr_stash *skel; + int ret; + + skel = local_kptr_stash__open_and_load(); + if (!ASSERT_OK_PTR(skel, "local_kptr_stash__open_and_load")) + return; + + ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.refcount_acquire_without_unstash), + &opts); + ASSERT_OK(ret, "refcount_acquire_without_unstash run"); + ASSERT_EQ(opts.retval, 2, "refcount_acquire_without_unstash retval"); + + ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.stash_refcounted_node), &opts); + ASSERT_OK(ret, "stash_refcounted_node run"); + ASSERT_OK(opts.retval, "stash_refcounted_node retval"); + + ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.refcount_acquire_without_unstash), + &opts); + ASSERT_OK(ret, "refcount_acquire_without_unstash (2) run"); + ASSERT_EQ(opts.retval, 42, "refcount_acquire_without_unstash (2) retval"); + + local_kptr_stash__destroy(skel); +} + static void test_local_kptr_stash_fail(void) { RUN_TESTS(local_kptr_stash_fail); @@ -86,6 +117,8 @@ void test_local_kptr_stash(void) test_local_kptr_stash_plain(); if (test__start_subtest("local_kptr_stash_unstash")) test_local_kptr_stash_unstash(); + if (test__start_subtest("refcount_acquire_without_unstash")) + test_refcount_acquire_without_unstash(); if (test__start_subtest("local_kptr_stash_fail")) test_local_kptr_stash_fail(); } diff --git a/tools/testing/selftests/bpf/progs/local_kptr_stash.c b/tools/testing/selftests/bpf/progs/local_kptr_stash.c index b567a666d2b8..1769fdff6aea 100644 --- a/tools/testing/selftests/bpf/progs/local_kptr_stash.c +++ b/tools/testing/selftests/bpf/progs/local_kptr_stash.c @@ -14,6 +14,24 @@ struct node_data { struct bpf_rb_node node; }; +struct refcounted_node { + long data; + struct bpf_rb_node rb_node; + struct bpf_refcount refcount; +}; + +struct stash { + struct bpf_spin_lock l; + struct refcounted_node __kptr *stashed; +}; + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __type(key, int); + __type(value, struct stash); + __uint(max_entries, 10); +} refcounted_node_stash SEC(".maps"); + struct plain_local { long key; long data; @@ -38,6 +56,7 @@ struct map_value { * Had to do the same w/ bpf_kfunc_call_test_release below */ struct node_data *just_here_because_btf_bug; +struct refcounted_node *just_here_because_btf_bug2; struct { __uint(type, BPF_MAP_TYPE_ARRAY); @@ -132,4 +151,56 @@ long stash_test_ref_kfunc(void *ctx) return 0; } +SEC("tc") +long refcount_acquire_without_unstash(void *ctx) +{ + struct refcounted_node *p; + struct stash *s; + int ret = 0; + + s = bpf_map_lookup_elem(&refcounted_node_stash, &ret); + if (!s) + return 1; + + if (!s->stashed) + /* refcount_acquire failure is expected when no refcounted_node + * has been stashed before this program executes + */ + return 2; + + p = bpf_refcount_acquire(s->stashed); + if (!p) + return 3; + + ret = s->stashed ? s->stashed->data : -1; + bpf_obj_drop(p); + return ret; +} + +/* Helper for refcount_acquire_without_unstash test */ +SEC("tc") +long stash_refcounted_node(void *ctx) +{ + struct refcounted_node *p; + struct stash *s; + int key = 0; + + s = bpf_map_lookup_elem(&refcounted_node_stash, &key); + if (!s) + return 1; + + p = bpf_obj_new(typeof(*p)); + if (!p) + return 2; + p->data = 42; + + p = bpf_kptr_xchg(&s->stashed, p); + if (p) { + bpf_obj_drop(p); + return 3; + } + + return 0; +} + char _license[] SEC("license") = "GPL"; -- cgit v1.2.3 From 155addf0814a92d08fce26a11b27e3315cdba977 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Fri, 3 Nov 2023 19:49:00 -0700 Subject: bpf: Use named fields for certain bpf uapi structs Martin and Vadim reported a verifier failure with bpf_dynptr usage. The issue is mentioned but Vadim workarounded the issue with source change ([1]). The below describes what is the issue and why there is a verification failure. int BPF_PROG(skb_crypto_setup) { struct bpf_dynptr algo, key; ... bpf_dynptr_from_mem(..., ..., 0, &algo); ... } The bpf program is using vmlinux.h, so we have the following definition in vmlinux.h: struct bpf_dynptr { long: 64; long: 64; }; Note that in uapi header bpf.h, we have struct bpf_dynptr { long: 64; long: 64; } __attribute__((aligned(8))); So we lost alignment information for struct bpf_dynptr by using vmlinux.h. Let us take a look at a simple program below: $ cat align.c typedef unsigned long long __u64; struct bpf_dynptr_no_align { __u64 :64; __u64 :64; }; struct bpf_dynptr_yes_align { __u64 :64; __u64 :64; } __attribute__((aligned(8))); void bar(void *, void *); int foo() { struct bpf_dynptr_no_align a; struct bpf_dynptr_yes_align b; bar(&a, &b); return 0; } $ clang --target=bpf -O2 -S -emit-llvm align.c Look at the generated IR file align.ll: ... %a = alloca %struct.bpf_dynptr_no_align, align 1 %b = alloca %struct.bpf_dynptr_yes_align, align 8 ... The compiler dictates the alignment for struct bpf_dynptr_no_align is 1 and the alignment for struct bpf_dynptr_yes_align is 8. So theoretically compiler could allocate variable %a with alignment 1 although in reallity the compiler may choose a different alignment by considering other local variables. In [1], the verification failure happens because variable 'algo' is allocated on the stack with alignment 4 (fp-28). But the verifer wants its alignment to be 8. To fix the issue, the RFC patch ([1]) tried to add '__attribute__((aligned(8)))' to struct bpf_dynptr plus other similar structs. Andrii suggested that we could directly modify uapi struct with named fields like struct 'bpf_iter_num': struct bpf_iter_num { /* opaque iterator state; having __u64 here allows to preserve correct * alignment requirements in vmlinux.h, generated from BTF */ __u64 __opaque[1]; } __attribute__((aligned(8))); Indeed, adding named fields for those affected structs in this patch can preserve alignment when bpf program references them in vmlinux.h. With this patch, the verification failure in [1] can also be resolved. [1] https://lore.kernel.org/bpf/1b100f73-7625-4c1f-3ae5-50ecf84d3ff0@linux.dev/ [2] https://lore.kernel.org/bpf/20231103055218.2395034-1-yonghong.song@linux.dev/ Cc: Vadim Fedorenko Cc: Martin KaFai Lau Suggested-by: Andrii Nakryiko Signed-off-by: Yonghong Song Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20231104024900.1539182-1-yonghong.song@linux.dev Signed-off-by: Alexei Starovoitov --- include/uapi/linux/bpf.h | 23 +++++++---------------- tools/include/uapi/linux/bpf.h | 23 +++++++---------------- 2 files changed, 14 insertions(+), 32 deletions(-) (limited to 'tools') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 0f6cdf52b1da..095ca7238ac2 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -7151,40 +7151,31 @@ struct bpf_spin_lock { }; struct bpf_timer { - __u64 :64; - __u64 :64; + __u64 __opaque[2]; } __attribute__((aligned(8))); struct bpf_dynptr { - __u64 :64; - __u64 :64; + __u64 __opaque[2]; } __attribute__((aligned(8))); struct bpf_list_head { - __u64 :64; - __u64 :64; + __u64 __opaque[2]; } __attribute__((aligned(8))); struct bpf_list_node { - __u64 :64; - __u64 :64; - __u64 :64; + __u64 __opaque[3]; } __attribute__((aligned(8))); struct bpf_rb_root { - __u64 :64; - __u64 :64; + __u64 __opaque[2]; } __attribute__((aligned(8))); struct bpf_rb_node { - __u64 :64; - __u64 :64; - __u64 :64; - __u64 :64; + __u64 __opaque[4]; } __attribute__((aligned(8))); struct bpf_refcount { - __u32 :32; + __u32 __opaque[1]; } __attribute__((aligned(4))); struct bpf_sysctl { diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 0f6cdf52b1da..095ca7238ac2 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -7151,40 +7151,31 @@ struct bpf_spin_lock { }; struct bpf_timer { - __u64 :64; - __u64 :64; + __u64 __opaque[2]; } __attribute__((aligned(8))); struct bpf_dynptr { - __u64 :64; - __u64 :64; + __u64 __opaque[2]; } __attribute__((aligned(8))); struct bpf_list_head { - __u64 :64; - __u64 :64; + __u64 __opaque[2]; } __attribute__((aligned(8))); struct bpf_list_node { - __u64 :64; - __u64 :64; - __u64 :64; + __u64 __opaque[3]; } __attribute__((aligned(8))); struct bpf_rb_root { - __u64 :64; - __u64 :64; + __u64 __opaque[2]; } __attribute__((aligned(8))); struct bpf_rb_node { - __u64 :64; - __u64 :64; - __u64 :64; - __u64 :64; + __u64 __opaque[4]; } __attribute__((aligned(8))); struct bpf_refcount { - __u32 :32; + __u32 __opaque[1]; } __attribute__((aligned(4))); struct bpf_sysctl { -- cgit v1.2.3 From dd678532f913c1a742f1a2add6adacfb7ae2b166 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 7 Nov 2023 14:03:31 +0530 Subject: perf header: Additional note on AMD IBS for max_precise pmu cap x86 core PMU exposes supported maximum precision level via max_precise PMU capability. Although, AMD core PMU does not support precise mode, certain core PMU events with precise_ip > 0 are allowed and forwarded to IBS OP PMU. Display a note about this in the 'perf report' header output and document the details in the perf-list man page. Signed-off-by: Ravi Bangoria Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Ananth Narayan Cc: Changbin Du Cc: Huacai Chen Cc: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: K Prateek Nayak Cc: Kan Liang Cc: Kim Phillips Cc: Mark Rutland Cc: Ming Wang Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Ross Zwisler Cc: Sandipan Das Cc: Santosh Shukla Cc: Yang Jihong Link: https://lore.kernel.org/r/20231107083331.901-2-ravi.bangoria@amd.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-list.txt | 12 +++++++----- tools/perf/util/env.c | 18 ++++++++++++++++++ tools/perf/util/env.h | 2 ++ tools/perf/util/header.c | 8 ++++++++ 4 files changed, 35 insertions(+), 5 deletions(-) (limited to 'tools') diff --git a/tools/perf/Documentation/perf-list.txt b/tools/perf/Documentation/perf-list.txt index d5f78e125efe..1b90575ee3c8 100644 --- a/tools/perf/Documentation/perf-list.txt +++ b/tools/perf/Documentation/perf-list.txt @@ -81,11 +81,13 @@ For Intel systems precise event sampling is implemented with PEBS which supports up to precise-level 2, and precise level 3 for some special cases -On AMD systems it is implemented using IBS (up to precise-level 2). -The precise modifier works with event types 0x76 (cpu-cycles, CPU -clocks not halted) and 0xC1 (micro-ops retired). Both events map to -IBS execution sampling (IBS op) with the IBS Op Counter Control bit -(IbsOpCntCtl) set respectively (see the +On AMD systems it is implemented using IBS OP (up to precise-level 2). +Unlike Intel PEBS which provides levels of precision, AMD core pmu is +inherently non-precise and IBS is inherently precise. (i.e. ibs_op//, +ibs_op//p, ibs_op//pp and ibs_op//ppp are all same). The precise modifier +works with event types 0x76 (cpu-cycles, CPU clocks not halted) and 0xC1 +(micro-ops retired). Both events map to IBS execution sampling (IBS op) +with the IBS Op Counter Control bit (IbsOpCntCtl) set respectively (see the Core Complex (CCX) -> Processor x86 Core -> Instruction Based Sampling (IBS) section of the [AMD Processor Programming Reference (PPR)] relevant to the family, model and stepping of the processor being used). diff --git a/tools/perf/util/env.c b/tools/perf/util/env.c index 44140b7f596a..cbc18b22ace5 100644 --- a/tools/perf/util/env.c +++ b/tools/perf/util/env.c @@ -531,6 +531,24 @@ int perf_env__numa_node(struct perf_env *env, struct perf_cpu cpu) return cpu.cpu >= 0 && cpu.cpu < env->nr_numa_map ? env->numa_map[cpu.cpu] : -1; } +bool perf_env__has_pmu_mapping(struct perf_env *env, const char *pmu_name) +{ + char *pmu_mapping = env->pmu_mappings, *colon; + + for (int i = 0; i < env->nr_pmu_mappings; ++i) { + if (strtoul(pmu_mapping, &colon, 0) == ULONG_MAX || *colon != ':') + goto out_error; + + pmu_mapping = colon + 1; + if (strcmp(pmu_mapping, pmu_name) == 0) + return true; + + pmu_mapping += strlen(pmu_mapping) + 1; + } +out_error: + return false; +} + char *perf_env__find_pmu_cap(struct perf_env *env, const char *pmu_name, const char *cap) { diff --git a/tools/perf/util/env.h b/tools/perf/util/env.h index 48d7f8759a2a..94596ff124d5 100644 --- a/tools/perf/util/env.h +++ b/tools/perf/util/env.h @@ -179,4 +179,6 @@ struct btf_node *perf_env__find_btf(struct perf_env *env, __u32 btf_id); int perf_env__numa_node(struct perf_env *env, struct perf_cpu cpu); char *perf_env__find_pmu_cap(struct perf_env *env, const char *pmu_name, const char *cap); + +bool perf_env__has_pmu_mapping(struct perf_env *env, const char *pmu_name); #endif /* __PERF_ENV_H */ diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index eeb96a1b63a7..1c687b5789c0 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -2145,6 +2145,14 @@ static void print_pmu_caps(struct feat_fd *ff, FILE *fp) __print_pmu_caps(fp, pmu_caps->nr_caps, pmu_caps->caps, pmu_caps->pmu_name); } + + if (strcmp(perf_env__arch(&ff->ph->env), "x86") == 0 && + perf_env__has_pmu_mapping(&ff->ph->env, "ibs_op")) { + char *max_precise = perf_env__find_pmu_cap(&ff->ph->env, "cpu", "max_precise"); + + if (max_precise != NULL && atoi(max_precise) == 0) + fprintf(fp, "# AMD systems uses ibs_op// PMU for some precise events, e.g.: cycles:p, see the 'perf list' man page for further details.\n"); + } } static void print_pmu_mappings(struct feat_fd *ff, FILE *fp) -- cgit v1.2.3 From ded8c48497b825f15436048e8ea3a731a3f7dece Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 9 Nov 2023 15:59:20 -0800 Subject: perf annotate: Pass "-l" option to objdump conditionally The "-l" option is to print line numbers in the objdump output. perf annotate TUI only can show the line numbers later but it causes big slow downs for the kernel binary. Similarly, showing source code also takes a long time and it already has an option to control it. $ time objdump ... -d -S -C vmlinux > /dev/null real 0m3.474s user 0m3.047s sys 0m0.428s $ time objdump ... -d -l -C vmlinux > /dev/null real 0m1.796s user 0m1.459s sys 0m0.338s $ time objdump ... -d -C vmlinux > /dev/null real 0m0.051s user 0m0.036s sys 0m0.016s As it's not needed for data type profiling, let's make it conditional so that it can skip the unnecessary work. Signed-off-by: Namhyung Kim Cc: Adrian Hunter Cc: Andi Kleen Cc: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Linus Torvalds Cc: Masami Hiramatsu (Google) Cc: Peter Zijlstra Cc: Stephane Eranian Cc: linux-toolchains@vger.kernel.org Cc: linux-trace-devel@vger.kernel.org Link: https://lore.kernel.org/r/20231110000012.3538610-2-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/annotate.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 9b68b8e3791c..118195c787b9 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -2144,12 +2144,13 @@ static int symbol__disassemble(struct symbol *sym, struct annotate_args *args) err = asprintf(&command, "%s %s%s --start-address=0x%016" PRIx64 " --stop-address=0x%016" PRIx64 - " -l -d %s %s %s %c%s%c %s%s -C \"$1\"", + " %s -d %s %s %s %c%s%c %s%s -C \"$1\"", opts->objdump_path ?: "objdump", opts->disassembler_style ? "-M " : "", opts->disassembler_style ?: "", map__rip_2objdump(map, sym->start), map__rip_2objdump(map, sym->end), + opts->show_linenr ? "-l" : "", opts->show_asm_raw ? "" : "--no-show-raw-insn", opts->annotate_src ? "-S" : "", opts->prefix ? "--prefix " : "", -- cgit v1.2.3 From fb7fd2a14a503b9a23fe10303923fc0605c22288 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 9 Nov 2023 15:59:21 -0800 Subject: perf annotate: Move raw_comment and raw_func_start fields out of 'struct ins_operands' Thoese two fields are used only for the jump_ops, so move them into the union to save some bytes. Also add jump__delete() callback not to free the fields as they didn't allocate new strings. Signed-off-by: Namhyung Kim Cc: Adrian Hunter Cc: Andi Kleen Cc: Huacai Chen Cc: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Linus Torvalds Cc: Masami Hiramatsu (Google) Cc: Peter Zijlstra Cc: Stephane Eranian Cc: WANG Rui Cc: linux-toolchains@vger.kernel.org Cc: linux-trace-devel@vger.kernel.org Link: https://lore.kernel.org/r/20231110000012.3538610-3-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/loongarch/annotate/instructions.c | 6 +++--- tools/perf/util/annotate.c | 17 +++++++++++++---- tools/perf/util/annotate.h | 6 ++++-- 3 files changed, 20 insertions(+), 9 deletions(-) (limited to 'tools') diff --git a/tools/perf/arch/loongarch/annotate/instructions.c b/tools/perf/arch/loongarch/annotate/instructions.c index 98e19c5366ac..21cc7e4149f7 100644 --- a/tools/perf/arch/loongarch/annotate/instructions.c +++ b/tools/perf/arch/loongarch/annotate/instructions.c @@ -61,10 +61,10 @@ static int loongarch_jump__parse(struct arch *arch, struct ins_operands *ops, st const char *c = strchr(ops->raw, '#'); u64 start, end; - ops->raw_comment = strchr(ops->raw, arch->objdump.comment_char); - ops->raw_func_start = strchr(ops->raw, '<'); + ops->jump.raw_comment = strchr(ops->raw, arch->objdump.comment_char); + ops->jump.raw_func_start = strchr(ops->raw, '<'); - if (ops->raw_func_start && c > ops->raw_func_start) + if (ops->jump.raw_func_start && c > ops->jump.raw_func_start) c = NULL; if (c++ != NULL) diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 118195c787b9..3364edf30f50 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -340,10 +340,10 @@ bool ins__is_call(const struct ins *ins) */ static inline const char *validate_comma(const char *c, struct ins_operands *ops) { - if (ops->raw_comment && c > ops->raw_comment) + if (ops->jump.raw_comment && c > ops->jump.raw_comment) return NULL; - if (ops->raw_func_start && c > ops->raw_func_start) + if (ops->jump.raw_func_start && c > ops->jump.raw_func_start) return NULL; return c; @@ -359,8 +359,8 @@ static int jump__parse(struct arch *arch, struct ins_operands *ops, struct map_s const char *c = strchr(ops->raw, ','); u64 start, end; - ops->raw_comment = strchr(ops->raw, arch->objdump.comment_char); - ops->raw_func_start = strchr(ops->raw, '<'); + ops->jump.raw_comment = strchr(ops->raw, arch->objdump.comment_char); + ops->jump.raw_func_start = strchr(ops->raw, '<'); c = validate_comma(c, ops); @@ -462,7 +462,16 @@ static int jump__scnprintf(struct ins *ins, char *bf, size_t size, ops->target.offset); } +static void jump__delete(struct ins_operands *ops __maybe_unused) +{ + /* + * The ops->jump.raw_comment and ops->jump.raw_func_start belong to the + * raw string, don't free them. + */ +} + static struct ins_ops jump_ops = { + .free = jump__delete, .parse = jump__parse, .scnprintf = jump__scnprintf, }; diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h index de59c1aff08e..bc8b95e8b1be 100644 --- a/tools/perf/util/annotate.h +++ b/tools/perf/util/annotate.h @@ -31,8 +31,6 @@ struct ins { struct ins_operands { char *raw; - char *raw_comment; - char *raw_func_start; struct { char *raw; char *name; @@ -52,6 +50,10 @@ struct ins_operands { struct ins ins; struct ins_operands *ops; } locked; + struct { + char *raw_comment; + char *raw_func_start; + } jump; }; }; -- cgit v1.2.3 From 6f1b6291cf73cb3223f6fb9ec16862a5fe7ed957 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 9 Nov 2023 15:59:22 -0800 Subject: perf tools: Add util/debuginfo.[ch] files Split debuginfo data structure and related functions into a separate file so that it can be used by other components than the probe-finder. Signed-off-by: Namhyung Kim Cc: Adrian Hunter Cc: Andi Kleen Cc: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Linus Torvalds Cc: Masami Hiramatsu (Google) Cc: Peter Zijlstra Cc: Stephane Eranian Cc: linux-toolchains@vger.kernel.org Cc: linux-trace-devel@vger.kernel.org Link: https://lore.kernel.org/r/20231110000012.3538610-4-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/Build | 1 + tools/perf/util/debuginfo.c | 205 +++++++++++++++++++++++++++++++++++++++++ tools/perf/util/debuginfo.h | 64 +++++++++++++ tools/perf/util/probe-finder.c | 193 +------------------------------------- tools/perf/util/probe-finder.h | 19 +--- 5 files changed, 272 insertions(+), 210 deletions(-) create mode 100644 tools/perf/util/debuginfo.c create mode 100644 tools/perf/util/debuginfo.h (limited to 'tools') diff --git a/tools/perf/util/Build b/tools/perf/util/Build index 96058f949ec9..73e3f194f949 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -195,6 +195,7 @@ endif perf-$(CONFIG_DWARF) += probe-finder.o perf-$(CONFIG_DWARF) += dwarf-aux.o perf-$(CONFIG_DWARF) += dwarf-regs.o +perf-$(CONFIG_DWARF) += debuginfo.o perf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o perf-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind-local.o diff --git a/tools/perf/util/debuginfo.c b/tools/perf/util/debuginfo.c new file mode 100644 index 000000000000..19acf4775d35 --- /dev/null +++ b/tools/perf/util/debuginfo.c @@ -0,0 +1,205 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * DWARF debug information handling code. Copied from probe-finder.c. + * + * Written by Masami Hiramatsu + */ + +#include +#include +#include +#include +#include +#include +#include + +#include "build-id.h" +#include "dso.h" +#include "debug.h" +#include "debuginfo.h" +#include "symbol.h" + +#ifdef HAVE_DEBUGINFOD_SUPPORT +#include +#endif + +/* Dwarf FL wrappers */ +static char *debuginfo_path; /* Currently dummy */ + +static const Dwfl_Callbacks offline_callbacks = { + .find_debuginfo = dwfl_standard_find_debuginfo, + .debuginfo_path = &debuginfo_path, + + .section_address = dwfl_offline_section_address, + + /* We use this table for core files too. */ + .find_elf = dwfl_build_id_find_elf, +}; + +/* Get a Dwarf from offline image */ +static int debuginfo__init_offline_dwarf(struct debuginfo *dbg, + const char *path) +{ + GElf_Addr dummy; + int fd; + + fd = open(path, O_RDONLY); + if (fd < 0) + return fd; + + dbg->dwfl = dwfl_begin(&offline_callbacks); + if (!dbg->dwfl) + goto error; + + dwfl_report_begin(dbg->dwfl); + dbg->mod = dwfl_report_offline(dbg->dwfl, "", "", fd); + if (!dbg->mod) + goto error; + + dbg->dbg = dwfl_module_getdwarf(dbg->mod, &dbg->bias); + if (!dbg->dbg) + goto error; + + dwfl_module_build_id(dbg->mod, &dbg->build_id, &dummy); + + dwfl_report_end(dbg->dwfl, NULL, NULL); + + return 0; +error: + if (dbg->dwfl) + dwfl_end(dbg->dwfl); + else + close(fd); + memset(dbg, 0, sizeof(*dbg)); + + return -ENOENT; +} + +static struct debuginfo *__debuginfo__new(const char *path) +{ + struct debuginfo *dbg = zalloc(sizeof(*dbg)); + if (!dbg) + return NULL; + + if (debuginfo__init_offline_dwarf(dbg, path) < 0) + zfree(&dbg); + if (dbg) + pr_debug("Open Debuginfo file: %s\n", path); + return dbg; +} + +enum dso_binary_type distro_dwarf_types[] = { + DSO_BINARY_TYPE__FEDORA_DEBUGINFO, + DSO_BINARY_TYPE__UBUNTU_DEBUGINFO, + DSO_BINARY_TYPE__OPENEMBEDDED_DEBUGINFO, + DSO_BINARY_TYPE__BUILDID_DEBUGINFO, + DSO_BINARY_TYPE__MIXEDUP_UBUNTU_DEBUGINFO, + DSO_BINARY_TYPE__NOT_FOUND, +}; + +struct debuginfo *debuginfo__new(const char *path) +{ + enum dso_binary_type *type; + char buf[PATH_MAX], nil = '\0'; + struct dso *dso; + struct debuginfo *dinfo = NULL; + struct build_id bid; + + /* Try to open distro debuginfo files */ + dso = dso__new(path); + if (!dso) + goto out; + + /* Set the build id for DSO_BINARY_TYPE__BUILDID_DEBUGINFO */ + if (is_regular_file(path) && filename__read_build_id(path, &bid) > 0) + dso__set_build_id(dso, &bid); + + for (type = distro_dwarf_types; + !dinfo && *type != DSO_BINARY_TYPE__NOT_FOUND; + type++) { + if (dso__read_binary_type_filename(dso, *type, &nil, + buf, PATH_MAX) < 0) + continue; + dinfo = __debuginfo__new(buf); + } + dso__put(dso); + +out: + /* if failed to open all distro debuginfo, open given binary */ + return dinfo ? : __debuginfo__new(path); +} + +void debuginfo__delete(struct debuginfo *dbg) +{ + if (dbg) { + if (dbg->dwfl) + dwfl_end(dbg->dwfl); + free(dbg); + } +} + +/* For the kernel module, we need a special code to get a DIE */ +int debuginfo__get_text_offset(struct debuginfo *dbg, Dwarf_Addr *offs, + bool adjust_offset) +{ + int n, i; + Elf32_Word shndx; + Elf_Scn *scn; + Elf *elf; + GElf_Shdr mem, *shdr; + const char *p; + + elf = dwfl_module_getelf(dbg->mod, &dbg->bias); + if (!elf) + return -EINVAL; + + /* Get the number of relocations */ + n = dwfl_module_relocations(dbg->mod); + if (n < 0) + return -ENOENT; + /* Search the relocation related .text section */ + for (i = 0; i < n; i++) { + p = dwfl_module_relocation_info(dbg->mod, i, &shndx); + if (strcmp(p, ".text") == 0) { + /* OK, get the section header */ + scn = elf_getscn(elf, shndx); + if (!scn) + return -ENOENT; + shdr = gelf_getshdr(scn, &mem); + if (!shdr) + return -ENOENT; + *offs = shdr->sh_addr; + if (adjust_offset) + *offs -= shdr->sh_offset; + } + } + return 0; +} + +#ifdef HAVE_DEBUGINFOD_SUPPORT +int get_source_from_debuginfod(const char *raw_path, + const char *sbuild_id, char **new_path) +{ + debuginfod_client *c = debuginfod_begin(); + const char *p = raw_path; + int fd; + + if (!c) + return -ENOMEM; + + fd = debuginfod_find_source(c, (const unsigned char *)sbuild_id, + 0, p, new_path); + pr_debug("Search %s from debuginfod -> %d\n", p, fd); + if (fd >= 0) + close(fd); + debuginfod_end(c); + if (fd < 0) { + pr_debug("Failed to find %s in debuginfod (%s)\n", + raw_path, sbuild_id); + return -ENOENT; + } + pr_debug("Got a source %s\n", *new_path); + + return 0; +} +#endif /* HAVE_DEBUGINFOD_SUPPORT */ diff --git a/tools/perf/util/debuginfo.h b/tools/perf/util/debuginfo.h new file mode 100644 index 000000000000..4d65b8c605fc --- /dev/null +++ b/tools/perf/util/debuginfo.h @@ -0,0 +1,64 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +#ifndef _PERF_DEBUGINFO_H +#define _PERF_DEBUGINFO_H + +#include +#include + +#ifdef HAVE_DWARF_SUPPORT + +#include "dwarf-aux.h" + +/* debug information structure */ +struct debuginfo { + Dwarf *dbg; + Dwfl_Module *mod; + Dwfl *dwfl; + Dwarf_Addr bias; + const unsigned char *build_id; +}; + +/* This also tries to open distro debuginfo */ +struct debuginfo *debuginfo__new(const char *path); +void debuginfo__delete(struct debuginfo *dbg); + +int debuginfo__get_text_offset(struct debuginfo *dbg, Dwarf_Addr *offs, + bool adjust_offset); + +#else /* HAVE_DWARF_SUPPORT */ + +/* dummy debug information structure */ +struct debuginfo { +}; + +static inline struct debuginfo *debuginfo__new(const char *path __maybe_unused) +{ + return NULL; +} + +static inline void debuginfo__delete(struct debuginfo *dbg __maybe_unused) +{ +} + +static inline int debuginfo__get_text_offset(struct debuginfo *dbg __maybe_unused, + Dwarf_Addr *offs __maybe_unused, + bool adjust_offset __maybe_unused) +{ + return -EINVAL; +} + +#endif /* HAVE_DWARF_SUPPORT */ + +#ifdef HAVE_DEBUGINFOD_SUPPORT +int get_source_from_debuginfod(const char *raw_path, const char *sbuild_id, + char **new_path); +#else /* HAVE_DEBUGINFOD_SUPPORT */ +static inline int get_source_from_debuginfod(const char *raw_path __maybe_unused, + const char *sbuild_id __maybe_unused, + char **new_path __maybe_unused) +{ + return -ENOTSUP; +} +#endif /* HAVE_DEBUGINFOD_SUPPORT */ + +#endif /* _PERF_DEBUGINFO_H */ diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c index f171360b0ef4..8d3dd85f9ff4 100644 --- a/tools/perf/util/probe-finder.c +++ b/tools/perf/util/probe-finder.c @@ -23,6 +23,7 @@ #include "event.h" #include "dso.h" #include "debug.h" +#include "debuginfo.h" #include "intlist.h" #include "strbuf.h" #include "strlist.h" @@ -31,128 +32,9 @@ #include "probe-file.h" #include "string2.h" -#ifdef HAVE_DEBUGINFOD_SUPPORT -#include -#endif - /* Kprobe tracer basic type is up to u64 */ #define MAX_BASIC_TYPE_BITS 64 -/* Dwarf FL wrappers */ -static char *debuginfo_path; /* Currently dummy */ - -static const Dwfl_Callbacks offline_callbacks = { - .find_debuginfo = dwfl_standard_find_debuginfo, - .debuginfo_path = &debuginfo_path, - - .section_address = dwfl_offline_section_address, - - /* We use this table for core files too. */ - .find_elf = dwfl_build_id_find_elf, -}; - -/* Get a Dwarf from offline image */ -static int debuginfo__init_offline_dwarf(struct debuginfo *dbg, - const char *path) -{ - GElf_Addr dummy; - int fd; - - fd = open(path, O_RDONLY); - if (fd < 0) - return fd; - - dbg->dwfl = dwfl_begin(&offline_callbacks); - if (!dbg->dwfl) - goto error; - - dwfl_report_begin(dbg->dwfl); - dbg->mod = dwfl_report_offline(dbg->dwfl, "", "", fd); - if (!dbg->mod) - goto error; - - dbg->dbg = dwfl_module_getdwarf(dbg->mod, &dbg->bias); - if (!dbg->dbg) - goto error; - - dwfl_module_build_id(dbg->mod, &dbg->build_id, &dummy); - - dwfl_report_end(dbg->dwfl, NULL, NULL); - - return 0; -error: - if (dbg->dwfl) - dwfl_end(dbg->dwfl); - else - close(fd); - memset(dbg, 0, sizeof(*dbg)); - - return -ENOENT; -} - -static struct debuginfo *__debuginfo__new(const char *path) -{ - struct debuginfo *dbg = zalloc(sizeof(*dbg)); - if (!dbg) - return NULL; - - if (debuginfo__init_offline_dwarf(dbg, path) < 0) - zfree(&dbg); - if (dbg) - pr_debug("Open Debuginfo file: %s\n", path); - return dbg; -} - -enum dso_binary_type distro_dwarf_types[] = { - DSO_BINARY_TYPE__FEDORA_DEBUGINFO, - DSO_BINARY_TYPE__UBUNTU_DEBUGINFO, - DSO_BINARY_TYPE__OPENEMBEDDED_DEBUGINFO, - DSO_BINARY_TYPE__BUILDID_DEBUGINFO, - DSO_BINARY_TYPE__MIXEDUP_UBUNTU_DEBUGINFO, - DSO_BINARY_TYPE__NOT_FOUND, -}; - -struct debuginfo *debuginfo__new(const char *path) -{ - enum dso_binary_type *type; - char buf[PATH_MAX], nil = '\0'; - struct dso *dso; - struct debuginfo *dinfo = NULL; - struct build_id bid; - - /* Try to open distro debuginfo files */ - dso = dso__new(path); - if (!dso) - goto out; - - /* Set the build id for DSO_BINARY_TYPE__BUILDID_DEBUGINFO */ - if (is_regular_file(path) && filename__read_build_id(path, &bid) > 0) - dso__set_build_id(dso, &bid); - - for (type = distro_dwarf_types; - !dinfo && *type != DSO_BINARY_TYPE__NOT_FOUND; - type++) { - if (dso__read_binary_type_filename(dso, *type, &nil, - buf, PATH_MAX) < 0) - continue; - dinfo = __debuginfo__new(buf); - } - dso__put(dso); - -out: - /* if failed to open all distro debuginfo, open given binary */ - return dinfo ? : __debuginfo__new(path); -} - -void debuginfo__delete(struct debuginfo *dbg) -{ - if (dbg) { - if (dbg->dwfl) - dwfl_end(dbg->dwfl); - free(dbg); - } -} - /* * Probe finder related functions */ @@ -1677,44 +1559,6 @@ int debuginfo__find_available_vars_at(struct debuginfo *dbg, return (ret < 0) ? ret : af.nvls; } -/* For the kernel module, we need a special code to get a DIE */ -int debuginfo__get_text_offset(struct debuginfo *dbg, Dwarf_Addr *offs, - bool adjust_offset) -{ - int n, i; - Elf32_Word shndx; - Elf_Scn *scn; - Elf *elf; - GElf_Shdr mem, *shdr; - const char *p; - - elf = dwfl_module_getelf(dbg->mod, &dbg->bias); - if (!elf) - return -EINVAL; - - /* Get the number of relocations */ - n = dwfl_module_relocations(dbg->mod); - if (n < 0) - return -ENOENT; - /* Search the relocation related .text section */ - for (i = 0; i < n; i++) { - p = dwfl_module_relocation_info(dbg->mod, i, &shndx); - if (strcmp(p, ".text") == 0) { - /* OK, get the section header */ - scn = elf_getscn(elf, shndx); - if (!scn) - return -ENOENT; - shdr = gelf_getshdr(scn, &mem); - if (!shdr) - return -ENOENT; - *offs = shdr->sh_addr; - if (adjust_offset) - *offs -= shdr->sh_offset; - } - } - return 0; -} - /* Reverse search */ int debuginfo__find_probe_point(struct debuginfo *dbg, u64 addr, struct perf_probe_point *ppt) @@ -2009,41 +1853,6 @@ found: return (ret < 0) ? ret : lf.found; } -#ifdef HAVE_DEBUGINFOD_SUPPORT -/* debuginfod doesn't require the comp_dir but buildid is required */ -static int get_source_from_debuginfod(const char *raw_path, - const char *sbuild_id, char **new_path) -{ - debuginfod_client *c = debuginfod_begin(); - const char *p = raw_path; - int fd; - - if (!c) - return -ENOMEM; - - fd = debuginfod_find_source(c, (const unsigned char *)sbuild_id, - 0, p, new_path); - pr_debug("Search %s from debuginfod -> %d\n", p, fd); - if (fd >= 0) - close(fd); - debuginfod_end(c); - if (fd < 0) { - pr_debug("Failed to find %s in debuginfod (%s)\n", - raw_path, sbuild_id); - return -ENOENT; - } - pr_debug("Got a source %s\n", *new_path); - - return 0; -} -#else -static inline int get_source_from_debuginfod(const char *raw_path __maybe_unused, - const char *sbuild_id __maybe_unused, - char **new_path __maybe_unused) -{ - return -ENOTSUP; -} -#endif /* * Find a src file from a DWARF tag path. Prepend optional source path prefix * and chop off leading directories that do not exist. Result is passed back as diff --git a/tools/perf/util/probe-finder.h b/tools/perf/util/probe-finder.h index 8bc1c80d3c1c..3add5ff516e1 100644 --- a/tools/perf/util/probe-finder.h +++ b/tools/perf/util/probe-finder.h @@ -24,21 +24,7 @@ static inline int is_c_varname(const char *name) #ifdef HAVE_DWARF_SUPPORT #include "dwarf-aux.h" - -/* TODO: export debuginfo data structure even if no dwarf support */ - -/* debug information structure */ -struct debuginfo { - Dwarf *dbg; - Dwfl_Module *mod; - Dwfl *dwfl; - Dwarf_Addr bias; - const unsigned char *build_id; -}; - -/* This also tries to open distro debuginfo */ -struct debuginfo *debuginfo__new(const char *path); -void debuginfo__delete(struct debuginfo *dbg); +#include "debuginfo.h" /* Find probe_trace_events specified by perf_probe_event from debuginfo */ int debuginfo__find_trace_events(struct debuginfo *dbg, @@ -49,9 +35,6 @@ int debuginfo__find_trace_events(struct debuginfo *dbg, int debuginfo__find_probe_point(struct debuginfo *dbg, u64 addr, struct perf_probe_point *ppt); -int debuginfo__get_text_offset(struct debuginfo *dbg, Dwarf_Addr *offs, - bool adjust_offset); - /* Find a line range */ int debuginfo__find_line_range(struct debuginfo *dbg, struct line_range *lr); -- cgit v1.2.3 From a65e8c0b7855e951138eff077af4a6a8721a7ef6 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 9 Nov 2023 15:59:23 -0800 Subject: perf dwarf-aux: Fix die_get_typename() for void * The die_get_typename() is to return a C-like type name from DWARF debug entry and it follows data type if the target entry is a pointer type. But I found that void pointers don't have the type attribute to follow and then the function returns an error for that case. This results in a broken type string for void pointer types. For example, the following type entries are pointer types. <1><48c>: Abbrev Number: 4 (DW_TAG_pointer_type) <48d> DW_AT_byte_size : 8 <48d> DW_AT_type : <0x481> <1><491>: Abbrev Number: 211 (DW_TAG_pointer_type) <493> DW_AT_byte_size : 8 <1><494>: Abbrev Number: 4 (DW_TAG_pointer_type) <495> DW_AT_byte_size : 8 <495> DW_AT_type : <0x49e> The first one at offset 48c and the third one at offset 494 have type information. Then they are pointer types for the referenced types. But the second one at offset 491 doesn't have the type attribute. Signed-off-by: Namhyung Kim Acked-by: Masami Hiramatsu (Google) Cc: Adrian Hunter Cc: Andi Kleen Cc: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: linux-toolchains@vger.kernel.org Cc: linux-trace-devel@vger.kernel.org Link: https://lore.kernel.org/r/20231110000012.3538610-5-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/dwarf-aux.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/util/dwarf-aux.c b/tools/perf/util/dwarf-aux.c index 2941d88f2199..4849c3bbfd95 100644 --- a/tools/perf/util/dwarf-aux.c +++ b/tools/perf/util/dwarf-aux.c @@ -1090,7 +1090,14 @@ int die_get_typename(Dwarf_Die *vr_die, struct strbuf *buf) return strbuf_addf(buf, "%s%s", tmp, name ?: ""); } ret = die_get_typename(&type, buf); - return ret ? ret : strbuf_addstr(buf, tmp); + if (ret < 0) { + /* void pointer has no type attribute */ + if (tag == DW_TAG_pointer_type && ret == -ENOENT) + return strbuf_addf(buf, "void*"); + + return ret; + } + return strbuf_addstr(buf, tmp); } /** -- cgit v1.2.3 From 3796eba7c137e073d1caa95b48d4a320fd2d9600 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 9 Nov 2023 15:59:24 -0800 Subject: perf dwarf-aux: Move #else block of #ifdef HAVE_DWARF_GETLOCATIONS_SUPPORT code to the header file It's a usual convention that the conditional code is handled in a header file. As I'm planning to add some more of them, let's move the current code to the header first. Signed-off-by: Namhyung Kim Acked-by: Masami Hiramatsu (Google) Cc: Adrian Hunter Cc: Andi Kleen Cc: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: linux-toolchains@vger.kernel.org Cc: linux-trace-devel@vger.kernel.org Link: https://lore.kernel.org/r/20231110000012.3538610-6-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/dwarf-aux.c | 7 ------- tools/perf/util/dwarf-aux.h | 19 +++++++++++++++++-- 2 files changed, 17 insertions(+), 9 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/dwarf-aux.c b/tools/perf/util/dwarf-aux.c index 4849c3bbfd95..adef2635587d 100644 --- a/tools/perf/util/dwarf-aux.c +++ b/tools/perf/util/dwarf-aux.c @@ -1245,13 +1245,6 @@ int die_get_var_range(Dwarf_Die *sp_die, Dwarf_Die *vr_die, struct strbuf *buf) out: return ret; } -#else -int die_get_var_range(Dwarf_Die *sp_die __maybe_unused, - Dwarf_Die *vr_die __maybe_unused, - struct strbuf *buf __maybe_unused) -{ - return -ENOTSUP; -} #endif /* diff --git a/tools/perf/util/dwarf-aux.h b/tools/perf/util/dwarf-aux.h index 7ec8bc1083bb..4f5d0211ee4f 100644 --- a/tools/perf/util/dwarf-aux.h +++ b/tools/perf/util/dwarf-aux.h @@ -121,7 +121,6 @@ int die_get_typename(Dwarf_Die *vr_die, struct strbuf *buf); /* Get the name and type of given variable DIE, stored as "type\tname" */ int die_get_varname(Dwarf_Die *vr_die, struct strbuf *buf); -int die_get_var_range(Dwarf_Die *sp_die, Dwarf_Die *vr_die, struct strbuf *buf); /* Check if target program is compiled with optimization */ bool die_is_optimized_target(Dwarf_Die *cu_die); @@ -130,4 +129,20 @@ bool die_is_optimized_target(Dwarf_Die *cu_die); void die_skip_prologue(Dwarf_Die *sp_die, Dwarf_Die *cu_die, Dwarf_Addr *entrypc); -#endif +#ifdef HAVE_DWARF_GETLOCATIONS_SUPPORT + +/* Get byte offset range of given variable DIE */ +int die_get_var_range(Dwarf_Die *sp_die, Dwarf_Die *vr_die, struct strbuf *buf); + +#else /* HAVE_DWARF_GETLOCATIONS_SUPPORT */ + +static inline int die_get_var_range(Dwarf_Die *sp_die __maybe_unused, + Dwarf_Die *vr_die __maybe_unused, + struct strbuf *buf __maybe_unused) +{ + return -ENOTSUP; +} + +#endif /* HAVE_DWARF_GETLOCATIONS_SUPPORT */ + +#endif /* _DWARF_AUX_H */ -- cgit v1.2.3 From 981620fd2776c45a514ed621a718e2a6941ad7c5 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 9 Nov 2023 15:59:25 -0800 Subject: perf dwarf-aux: Add die_get_scopes() alternative to dwarf_getscopes() The die_get_scopes() returns the number of enclosing DIEs for the given address and it fills an array of DIEs like dwarf_getscopes(). But it doesn't follow the abstract origin of inlined functions as we want information of the concrete instance. This is needed to check the location of parameters and local variables properly. Users can check the origin separately if needed. Signed-off-by: Namhyung Kim Acked-by: Masami Hiramatsu (Google) Cc: Adrian Hunter Cc: Andi Kleen Cc: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: linux-toolchains@vger.kernel.org Cc: linux-trace-devel@vger.kernel.org Link: https://lore.kernel.org/r/20231110000012.3538610-7-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/dwarf-aux.c | 53 +++++++++++++++++++++++++++++++++++++++++++++ tools/perf/util/dwarf-aux.h | 3 +++ 2 files changed, 56 insertions(+) (limited to 'tools') diff --git a/tools/perf/util/dwarf-aux.c b/tools/perf/util/dwarf-aux.c index adef2635587d..10aa32334d6f 100644 --- a/tools/perf/util/dwarf-aux.c +++ b/tools/perf/util/dwarf-aux.c @@ -1425,3 +1425,56 @@ void die_skip_prologue(Dwarf_Die *sp_die, Dwarf_Die *cu_die, *entrypc = postprologue_addr; } + +/* Internal parameters for __die_find_scope_cb() */ +struct find_scope_data { + /* Target instruction address */ + Dwarf_Addr pc; + /* Number of scopes found [output] */ + int nr; + /* Array of scopes found, 0 for the outermost one. [output] */ + Dwarf_Die *scopes; +}; + +static int __die_find_scope_cb(Dwarf_Die *die_mem, void *arg) +{ + struct find_scope_data *data = arg; + + if (dwarf_haspc(die_mem, data->pc)) { + Dwarf_Die *tmp; + + tmp = realloc(data->scopes, (data->nr + 1) * sizeof(*tmp)); + if (tmp == NULL) + return DIE_FIND_CB_END; + + memcpy(tmp + data->nr, die_mem, sizeof(*die_mem)); + data->scopes = tmp; + data->nr++; + return DIE_FIND_CB_CHILD; + } + return DIE_FIND_CB_SIBLING; +} + +/** + * die_get_scopes - Return a list of scopes including the address + * @cu_die: a compile unit DIE + * @pc: the address to find + * @scopes: the array of DIEs for scopes (result) + * + * This function does the same as the dwarf_getscopes() but doesn't follow + * the origins of inlined functions. It returns the number of scopes saved + * in the @scopes argument. The outer scope will be saved first (index 0) and + * the last one is the innermost scope at the @pc. + */ +int die_get_scopes(Dwarf_Die *cu_die, Dwarf_Addr pc, Dwarf_Die **scopes) +{ + struct find_scope_data data = { + .pc = pc, + }; + Dwarf_Die die_mem; + + die_find_child(cu_die, __die_find_scope_cb, &data, &die_mem); + + *scopes = data.scopes; + return data.nr; +} diff --git a/tools/perf/util/dwarf-aux.h b/tools/perf/util/dwarf-aux.h index 4f5d0211ee4f..f9d765f80fb0 100644 --- a/tools/perf/util/dwarf-aux.h +++ b/tools/perf/util/dwarf-aux.h @@ -129,6 +129,9 @@ bool die_is_optimized_target(Dwarf_Die *cu_die); void die_skip_prologue(Dwarf_Die *sp_die, Dwarf_Die *cu_die, Dwarf_Addr *entrypc); +/* Get the list of including scopes */ +int die_get_scopes(Dwarf_Die *cu_die, Dwarf_Addr pc, Dwarf_Die **scopes); + #ifdef HAVE_DWARF_GETLOCATIONS_SUPPORT /* Get byte offset range of given variable DIE */ -- cgit v1.2.3 From 3f5928e461e394d27bc1ed7d0785b1e63c43d6a1 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 9 Nov 2023 15:59:26 -0800 Subject: perf dwarf-aux: Add die_find_variable_by_reg() helper The die_find_variable_by_reg() will search for a variable or a parameter sub-DIE in the given scope DIE where the location matches to the given register. For the simplest and most common case, memory access usually happens with a base register and an offset to the field so the register holds a pointer in a variable or function parameter. Then we can find one if it has a location expression at the (instruction) address. This function only handles such a simple case for now. In this case, the expression has a DW_OP_regN operation where N < 32. If the register index (N) is greater than or equal to 32, DW_OP_regx operation with an operand which saves the value for the N would be used. It rejects expressions with more operations. Signed-off-by: Namhyung Kim Acked-by: Masami Hiramatsu (Google) Cc: Adrian Hunter Cc: Andi Kleen Cc: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: linux-toolchains@vger.kernel.org Cc: linux-trace-devel@vger.kernel.org Link: https://lore.kernel.org/r/20231110000012.3538610-8-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/dwarf-aux.c | 67 +++++++++++++++++++++++++++++++++++++++++++++ tools/perf/util/dwarf-aux.h | 12 ++++++++ 2 files changed, 79 insertions(+) (limited to 'tools') diff --git a/tools/perf/util/dwarf-aux.c b/tools/perf/util/dwarf-aux.c index 10aa32334d6f..652e6e7368a2 100644 --- a/tools/perf/util/dwarf-aux.c +++ b/tools/perf/util/dwarf-aux.c @@ -1245,6 +1245,73 @@ int die_get_var_range(Dwarf_Die *sp_die, Dwarf_Die *vr_die, struct strbuf *buf) out: return ret; } + +/* Interval parameters for __die_find_var_reg_cb() */ +struct find_var_data { + /* Target instruction address */ + Dwarf_Addr pc; + /* Target register */ + unsigned reg; +}; + +/* Max number of registers DW_OP_regN supports */ +#define DWARF_OP_DIRECT_REGS 32 + +/* Only checks direct child DIEs in the given scope. */ +static int __die_find_var_reg_cb(Dwarf_Die *die_mem, void *arg) +{ + struct find_var_data *data = arg; + int tag = dwarf_tag(die_mem); + ptrdiff_t off = 0; + Dwarf_Attribute attr; + Dwarf_Addr base, start, end; + Dwarf_Op *ops; + size_t nops; + + if (tag != DW_TAG_variable && tag != DW_TAG_formal_parameter) + return DIE_FIND_CB_SIBLING; + + if (dwarf_attr(die_mem, DW_AT_location, &attr) == NULL) + return DIE_FIND_CB_SIBLING; + + while ((off = dwarf_getlocations(&attr, off, &base, &start, &end, &ops, &nops)) > 0) { + /* Assuming the location list is sorted by address */ + if (end < data->pc) + continue; + if (start > data->pc) + break; + + /* Only match with a simple case */ + if (data->reg < DWARF_OP_DIRECT_REGS) { + if (ops->atom == (DW_OP_reg0 + data->reg) && nops == 1) + return DIE_FIND_CB_END; + } else { + if (ops->atom == DW_OP_regx && ops->number == data->reg && + nops == 1) + return DIE_FIND_CB_END; + } + } + return DIE_FIND_CB_SIBLING; +} + +/** + * die_find_variable_by_reg - Find a variable saved in a register + * @sc_die: a scope DIE + * @pc: the program address to find + * @reg: the register number to find + * @die_mem: a buffer to save the resulting DIE + * + * Find the variable DIE accessed by the given register. + */ +Dwarf_Die *die_find_variable_by_reg(Dwarf_Die *sc_die, Dwarf_Addr pc, int reg, + Dwarf_Die *die_mem) +{ + struct find_var_data data = { + .pc = pc, + .reg = reg, + }; + return die_find_child(sc_die, __die_find_var_reg_cb, &data, die_mem); +} #endif /* diff --git a/tools/perf/util/dwarf-aux.h b/tools/perf/util/dwarf-aux.h index f9d765f80fb0..b6f430730bd1 100644 --- a/tools/perf/util/dwarf-aux.h +++ b/tools/perf/util/dwarf-aux.h @@ -137,6 +137,10 @@ int die_get_scopes(Dwarf_Die *cu_die, Dwarf_Addr pc, Dwarf_Die **scopes); /* Get byte offset range of given variable DIE */ int die_get_var_range(Dwarf_Die *sp_die, Dwarf_Die *vr_die, struct strbuf *buf); +/* Find a variable saved in the 'reg' at given address */ +Dwarf_Die *die_find_variable_by_reg(Dwarf_Die *sc_die, Dwarf_Addr pc, int reg, + Dwarf_Die *die_mem); + #else /* HAVE_DWARF_GETLOCATIONS_SUPPORT */ static inline int die_get_var_range(Dwarf_Die *sp_die __maybe_unused, @@ -146,6 +150,14 @@ static inline int die_get_var_range(Dwarf_Die *sp_die __maybe_unused, return -ENOTSUP; } +static inline Dwarf_Die *die_find_variable_by_reg(Dwarf_Die *sc_die __maybe_unused, + Dwarf_Addr pc __maybe_unused, + int reg __maybe_unused, + Dwarf_Die *die_mem __maybe_unused) +{ + return NULL; +} + #endif /* HAVE_DWARF_GETLOCATIONS_SUPPORT */ #endif /* _DWARF_AUX_H */ -- cgit v1.2.3 From f67f2fda7d997a43e3323aec88d76f1b5e0ea5f2 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 9 Nov 2023 15:59:27 -0800 Subject: perf build: Add feature check for dwarf_getcfi() The dwarf_getcfi() is available on libdw 0.142+. Instead of just checking the version number, it'd be nice to have a config item to check the feature at build time. Suggested-by: Masami Hiramatsu (Google) Signed-off-by: Namhyung Kim Acked-by: Masami Hiramatsu (Google) Cc: Adrian Hunter Cc: Andi Kleen Cc: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: linux-toolchains@vger.kernel.org Cc: linux-trace-devel@vger.kernel.org Link: https://lore.kernel.org/r/20231110000012.3538610-9-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/build/Makefile.feature | 1 + tools/build/feature/Makefile | 4 ++++ tools/build/feature/test-dwarf_getcfi.c | 9 +++++++++ 3 files changed, 14 insertions(+) create mode 100644 tools/build/feature/test-dwarf_getcfi.c (limited to 'tools') diff --git a/tools/build/Makefile.feature b/tools/build/Makefile.feature index 934e2777a2db..64df118376df 100644 --- a/tools/build/Makefile.feature +++ b/tools/build/Makefile.feature @@ -32,6 +32,7 @@ FEATURE_TESTS_BASIC := \ backtrace \ dwarf \ dwarf_getlocations \ + dwarf_getcfi \ eventfd \ fortify-source \ get_current_dir_name \ diff --git a/tools/build/feature/Makefile b/tools/build/feature/Makefile index dad79ede4e0a..37722e509eb9 100644 --- a/tools/build/feature/Makefile +++ b/tools/build/feature/Makefile @@ -7,6 +7,7 @@ FILES= \ test-bionic.bin \ test-dwarf.bin \ test-dwarf_getlocations.bin \ + test-dwarf_getcfi.bin \ test-eventfd.bin \ test-fortify-source.bin \ test-get_current_dir_name.bin \ @@ -154,6 +155,9 @@ $(OUTPUT)test-dwarf.bin: $(OUTPUT)test-dwarf_getlocations.bin: $(BUILD) $(DWARFLIBS) +$(OUTPUT)test-dwarf_getcfi.bin: + $(BUILD) $(DWARFLIBS) + $(OUTPUT)test-libelf-getphdrnum.bin: $(BUILD) -lelf diff --git a/tools/build/feature/test-dwarf_getcfi.c b/tools/build/feature/test-dwarf_getcfi.c new file mode 100644 index 000000000000..50e7d7cb7bdf --- /dev/null +++ b/tools/build/feature/test-dwarf_getcfi.c @@ -0,0 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include + +int main(void) +{ + Dwarf *dwarf = NULL; + return dwarf_getcfi(dwarf) == NULL; +} -- cgit v1.2.3 From c06547d02094e7eb81389e9485a45d91cc21914c Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 9 Nov 2023 15:59:28 -0800 Subject: perf probe: Convert to check dwarf_getcfi feature Now it has a feature check for the dwarf_getcfi(), use it and convert the code to check HAVE_DWARF_CFI_SUPPORT definition. Suggested-by: Masami Hiramatsu (Google) Signed-off-by: Namhyung Kim Acked-by: Masami Hiramatsu (Google) Cc: Adrian Hunter Cc: Andi Kleen Cc: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: linux-toolchains@vger.kernel.org Cc: linux-trace-devel@vger.kernel.org Link: https://lore.kernel.org/r/20231110000012.3538610-10-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile.config | 5 +++++ tools/perf/util/probe-finder.c | 8 ++++---- 2 files changed, 9 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index 8b6cffbc4858..aa55850fbc21 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -476,6 +476,11 @@ else else CFLAGS += -DHAVE_DWARF_GETLOCATIONS_SUPPORT endif # dwarf_getlocations + ifneq ($(feature-dwarf_getcfi), 1) + msg := $(warning Old libdw.h, finding variables at given 'perf probe' point will not work, install elfutils-devel/libdw-dev >= 0.142); + else + CFLAGS += -DHAVE_DWARF_CFI_SUPPORT + endif # dwarf_getcfi endif # Dwarf support endif # libelf support endif # NO_LIBELF diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c index 8d3dd85f9ff4..c8923375e30d 100644 --- a/tools/perf/util/probe-finder.c +++ b/tools/perf/util/probe-finder.c @@ -604,7 +604,7 @@ static int call_probe_finder(Dwarf_Die *sc_die, struct probe_finder *pf) ret = dwarf_getlocation_addr(&fb_attr, pf->addr, &pf->fb_ops, &nops, 1); if (ret <= 0 || nops == 0) { pf->fb_ops = NULL; -#if _ELFUTILS_PREREQ(0, 142) +#ifdef HAVE_DWARF_CFI_SUPPORT } else if (nops == 1 && pf->fb_ops[0].atom == DW_OP_call_frame_cfa && (pf->cfi_eh != NULL || pf->cfi_dbg != NULL)) { if ((dwarf_cfi_addrframe(pf->cfi_eh, pf->addr, &frame) != 0 && @@ -615,7 +615,7 @@ static int call_probe_finder(Dwarf_Die *sc_die, struct probe_finder *pf) free(frame); return -ENOENT; } -#endif +#endif /* HAVE_DWARF_CFI_SUPPORT */ } /* Call finder's callback handler */ @@ -1140,7 +1140,7 @@ static int debuginfo__find_probes(struct debuginfo *dbg, pf->machine = ehdr.e_machine; -#if _ELFUTILS_PREREQ(0, 142) +#ifdef HAVE_DWARF_CFI_SUPPORT do { GElf_Shdr shdr; @@ -1150,7 +1150,7 @@ static int debuginfo__find_probes(struct debuginfo *dbg, pf->cfi_dbg = dwarf_getcfi(dbg->dbg); } while (0); -#endif +#endif /* HAVE_DWARF_CFI_SUPPORT */ ret = debuginfo__find_probe_location(dbg, pf); return ret; -- cgit v1.2.3 From b8e3a87a627b575896e448021e5c2f8a3bc19931 Mon Sep 17 00:00:00 2001 From: Jordan Rome Date: Wed, 8 Nov 2023 03:23:34 -0800 Subject: bpf: Add crosstask check to __bpf_get_stack Currently get_perf_callchain only supports user stack walking for the current task. Passing the correct *crosstask* param will return 0 frames if the task passed to __bpf_get_stack isn't the current one instead of a single incorrect frame/address. This change passes the correct *crosstask* param but also does a preemptive check in __bpf_get_stack if the task is current and returns -EOPNOTSUPP if it is not. This issue was found using bpf_get_task_stack inside a BPF iterator ("iter/task"), which iterates over all tasks. bpf_get_task_stack works fine for fetching kernel stacks but because get_perf_callchain relies on the caller to know if the requested *task* is the current one (via *crosstask*) it was failing in a confusing way. It might be possible to get user stacks for all tasks utilizing something like access_process_vm but that requires the bpf program calling bpf_get_task_stack to be sleepable and would therefore be a breaking change. Fixes: fa28dcb82a38 ("bpf: Introduce helper bpf_get_task_stack()") Signed-off-by: Jordan Rome Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20231108112334.3433136-1-jordalgo@meta.com --- include/uapi/linux/bpf.h | 3 +++ kernel/bpf/stackmap.c | 11 ++++++++++- tools/include/uapi/linux/bpf.h | 3 +++ 3 files changed, 16 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 095ca7238ac2..7cf8bcf9f6a2 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -4517,6 +4517,8 @@ union bpf_attr { * long bpf_get_task_stack(struct task_struct *task, void *buf, u32 size, u64 flags) * Description * Return a user or a kernel stack in bpf program provided buffer. + * Note: the user stack will only be populated if the *task* is + * the current task; all other tasks will return -EOPNOTSUPP. * To achieve this, the helper needs *task*, which is a valid * pointer to **struct task_struct**. To store the stacktrace, the * bpf program provides *buf* with a nonnegative *size*. @@ -4528,6 +4530,7 @@ union bpf_attr { * * **BPF_F_USER_STACK** * Collect a user space stack instead of a kernel stack. + * The *task* must be the current task. * **BPF_F_USER_BUILD_ID** * Collect buildid+offset instead of ips for user stack, * only valid if **BPF_F_USER_STACK** is also specified. diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c index d6b277482085..dff7ba539701 100644 --- a/kernel/bpf/stackmap.c +++ b/kernel/bpf/stackmap.c @@ -388,6 +388,7 @@ static long __bpf_get_stack(struct pt_regs *regs, struct task_struct *task, { u32 trace_nr, copy_len, elem_size, num_elem, max_depth; bool user_build_id = flags & BPF_F_USER_BUILD_ID; + bool crosstask = task && task != current; u32 skip = flags & BPF_F_SKIP_FIELD_MASK; bool user = flags & BPF_F_USER_STACK; struct perf_callchain_entry *trace; @@ -410,6 +411,14 @@ static long __bpf_get_stack(struct pt_regs *regs, struct task_struct *task, if (task && user && !user_mode(regs)) goto err_fault; + /* get_perf_callchain does not support crosstask user stack walking + * but returns an empty stack instead of NULL. + */ + if (crosstask && user) { + err = -EOPNOTSUPP; + goto clear; + } + num_elem = size / elem_size; max_depth = num_elem + skip; if (sysctl_perf_event_max_stack < max_depth) @@ -421,7 +430,7 @@ static long __bpf_get_stack(struct pt_regs *regs, struct task_struct *task, trace = get_callchain_entry_for_task(task, max_depth); else trace = get_perf_callchain(regs, 0, kernel, user, max_depth, - false, false); + crosstask, false); if (unlikely(!trace)) goto err_fault; diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 095ca7238ac2..7cf8bcf9f6a2 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -4517,6 +4517,8 @@ union bpf_attr { * long bpf_get_task_stack(struct task_struct *task, void *buf, u32 size, u64 flags) * Description * Return a user or a kernel stack in bpf program provided buffer. + * Note: the user stack will only be populated if the *task* is + * the current task; all other tasks will return -EOPNOTSUPP. * To achieve this, the helper needs *task*, which is a valid * pointer to **struct task_struct**. To store the stacktrace, the * bpf program provides *buf* with a nonnegative *size*. @@ -4528,6 +4530,7 @@ union bpf_attr { * * **BPF_F_USER_STACK** * Collect a user space stack instead of a kernel stack. + * The *task* must be the current task. * **BPF_F_USER_BUILD_ID** * Collect buildid+offset instead of ips for user stack, * only valid if **BPF_F_USER_STACK** is also specified. -- cgit v1.2.3 From 100888fb6d8a185866b1520031ee7e3182b173de Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Fri, 10 Nov 2023 11:36:44 -0800 Subject: selftests/bpf: Fix pyperf180 compilation failure with clang18 With latest clang18 (main branch of llvm-project repo), when building bpf selftests, [~/work/bpf-next (master)]$ make -C tools/testing/selftests/bpf LLVM=1 -j The following compilation error happens: fatal error: error in backend: Branch target out of insn range ... Stack dump: 0. Program arguments: clang -g -Wall -Werror -D__TARGET_ARCH_x86 -mlittle-endian -I/home/yhs/work/bpf-next/tools/testing/selftests/bpf/tools/include -I/home/yhs/work/bpf-next/tools/testing/selftests/bpf -I/home/yhs/work/bpf-next/tools/include/uapi -I/home/yhs/work/bpf-next/tools/testing/selftests/usr/include -idirafter /home/yhs/work/llvm-project/llvm/build.18/install/lib/clang/18/include -idirafter /usr/local/include -idirafter /usr/include -Wno-compare-distinct-pointer-types -DENABLE_ATOMICS_TESTS -O2 --target=bpf -c progs/pyperf180.c -mcpu=v3 -o /home/yhs/work/bpf-next/tools/testing/selftests/bpf/pyperf180.bpf.o 1. parser at end of file 2. Code generation ... The compilation failure only happens to cpu=v2 and cpu=v3. cpu=v4 is okay since cpu=v4 supports 32-bit branch target offset. The above failure is due to upstream llvm patch [1] where some inlining behavior are changed in clang18. To workaround the issue, previously all 180 loop iterations are fully unrolled. The bpf macro __BPF_CPU_VERSION__ (implemented in clang18 recently) is used to avoid unrolling changes if cpu=v4. If __BPF_CPU_VERSION__ is not available and the compiler is clang18, the unrollng amount is unconditionally reduced. [1] https://github.com/llvm/llvm-project/commit/1a2e77cf9e11dbf56b5720c607313a566eebb16e Signed-off-by: Yonghong Song Signed-off-by: Andrii Nakryiko Tested-by: Alan Maguire Link: https://lore.kernel.org/bpf/20231110193644.3130906-1-yonghong.song@linux.dev --- tools/testing/selftests/bpf/progs/pyperf180.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/progs/pyperf180.c b/tools/testing/selftests/bpf/progs/pyperf180.c index c39f559d3100..42c4a8b62e36 100644 --- a/tools/testing/selftests/bpf/progs/pyperf180.c +++ b/tools/testing/selftests/bpf/progs/pyperf180.c @@ -1,4 +1,26 @@ // SPDX-License-Identifier: GPL-2.0 // Copyright (c) 2019 Facebook #define STACK_MAX_LEN 180 + +/* llvm upstream commit at clang18 + * https://github.com/llvm/llvm-project/commit/1a2e77cf9e11dbf56b5720c607313a566eebb16e + * changed inlining behavior and caused compilation failure as some branch + * target distance exceeded 16bit representation which is the maximum for + * cpu v1/v2/v3. Macro __BPF_CPU_VERSION__ is later implemented in clang18 + * to specify which cpu version is used for compilation. So a smaller + * unroll_count can be set if __BPF_CPU_VERSION__ is less than 4, which + * reduced some branch target distances and resolved the compilation failure. + * + * To capture the case where a developer/ci uses clang18 but the corresponding + * repo checkpoint does not have __BPF_CPU_VERSION__, a smaller unroll_count + * will be set as well to prevent potential compilation failures. + */ +#ifdef __BPF_CPU_VERSION__ +#if __BPF_CPU_VERSION__ < 4 +#define UNROLL_COUNT 90 +#endif +#elif __clang_major__ == 18 +#define UNROLL_COUNT 90 +#endif + #include "pyperf.h" -- cgit v1.2.3 From 421fc858023b839106a9c0dc42cd8947cf981d83 Mon Sep 17 00:00:00 2001 From: Atul Kumar Pant Date: Mon, 6 Nov 2023 23:40:34 +0530 Subject: selftests: cgroup: Fixes a typo in a comment Signed-off-by: Atul Kumar Pant Signed-off-by: Tejun Heo --- tools/testing/selftests/cgroup/test_freezer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/cgroup/test_freezer.c b/tools/testing/selftests/cgroup/test_freezer.c index ff519029f6f4..8845353aca53 100644 --- a/tools/testing/selftests/cgroup/test_freezer.c +++ b/tools/testing/selftests/cgroup/test_freezer.c @@ -740,7 +740,7 @@ static int test_cgfreezer_ptraced(const char *root) /* * cg_check_frozen(cgroup, true) will fail here, - * because the task in in the TRACEd state. + * because the task is in the TRACEd state. */ if (cg_freeze_wait(cgroup, false)) goto cleanup; -- cgit v1.2.3 From 14060dfc481a309e3f236127b0a77abbf249648f Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Wed, 25 Oct 2023 14:25:53 -0400 Subject: selftests/cgroup: Minor code cleanup and reorganization of test_cpuset_prs.sh Minor cleanup of test matrix and relocation of test_isolated() function to prepare for the next patch. There is no functional change. Signed-off-by: Waiman Long Signed-off-by: Tejun Heo --- tools/testing/selftests/cgroup/test_cpuset_prs.sh | 142 +++++++++++----------- 1 file changed, 71 insertions(+), 71 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/cgroup/test_cpuset_prs.sh b/tools/testing/selftests/cgroup/test_cpuset_prs.sh index a6e9848189d6..2b825019f806 100755 --- a/tools/testing/selftests/cgroup/test_cpuset_prs.sh +++ b/tools/testing/selftests/cgroup/test_cpuset_prs.sh @@ -146,71 +146,6 @@ test_add_proc() echo $$ > $CGROUP2/cgroup.procs # Move out the task } -# -# Testing the new "isolated" partition root type -# -test_isolated() -{ - cd $CGROUP2/test - echo 2-3 > cpuset.cpus - TYPE=$(cat cpuset.cpus.partition) - [[ $TYPE = member ]] || echo member > cpuset.cpus.partition - - console_msg "Change from member to root" - test_partition root - - console_msg "Change from root to isolated" - test_partition isolated - - console_msg "Change from isolated to member" - test_partition member - - console_msg "Change from member to isolated" - test_partition isolated - - console_msg "Change from isolated to root" - test_partition root - - console_msg "Change from root to member" - test_partition member - - # - # Testing partition root with no cpu - # - console_msg "Distribute all cpus to child partition" - echo +cpuset > cgroup.subtree_control - test_partition root - - mkdir A1 - cd A1 - echo 2-3 > cpuset.cpus - test_partition root - test_effective_cpus 2-3 - cd .. - test_effective_cpus "" - - console_msg "Moving task to partition test" - test_add_proc "No space left" - cd A1 - test_add_proc "" - cd .. - - console_msg "Shrink and expand child partition" - cd A1 - echo 2 > cpuset.cpus - cd .. - test_effective_cpus 3 - cd A1 - echo 2-3 > cpuset.cpus - cd .. - test_effective_cpus "" - - # Cleaning up - console_msg "Cleaning up" - echo $$ > $CGROUP2/cgroup.procs - [[ -d A1 ]] && rmdir A1 -} - # # Cpuset controller state transition test matrix. # @@ -304,7 +239,7 @@ TEST_MATRIX=( A1:P0,A2:P2,A3:P1 2-4" " C0-4:X2-4:S+ C1-4:X2-4:S+:P2 C2-4:X4:P1 \ . . X5 . . 0 A1:0-4,A2:1-4,A3:2-4 \ - A1:P0,A2:P-2,A3:P-1 ." + A1:P0,A2:P-2,A3:P-1" " C0-4:X2-4:S+ C1-4:X2-4:S+:P2 C2-4:X4:P1 \ . . . X1 . 0 A1:0-1,A2:2-4,A3:2-4 \ A1:P0,A2:P2,A3:P-1 2-4" @@ -347,10 +282,10 @@ TEST_MATRIX=( # cpus_allowed/exclusive_cpus update tests " C0-3:X2-3:S+ C1-3:X2-3:S+ C2-3:X2-3 \ . C4 . P2 . 0 A1:4,A2:4,XA2:,XA3:,A3:4 \ - A1:P0,A3:P-2 ." + A1:P0,A3:P-2" " C0-3:X2-3:S+ C1-3:X2-3:S+ C2-3:X2-3 \ . X1 . P2 . 0 A1:0-3,A2:1-3,XA1:1,XA2:,XA3:,A3:2-3 \ - A1:P0,A3:P-2 ." + A1:P0,A3:P-2" " C0-3:X2-3:S+ C1-3:X2-3:S+ C2-3:X2-3 \ . . C3 P2 . 0 A1:0-2,A2:0-2,XA2:3,XA3:3,A3:3 \ A1:P0,A3:P2 3" @@ -359,13 +294,13 @@ TEST_MATRIX=( A1:P0,A3:P2 3" " C0-3:X2-3:S+ C1-3:X2-3:S+ C2-3:X2-3:P2 \ . . X3 . . 0 A1:0-3,A2:1-3,XA2:3,XA3:3,A3:2-3 \ - A1:P0,A3:P-2 ." + A1:P0,A3:P-2" " C0-3:X2-3:S+ C1-3:X2-3:S+ C2-3:X2-3:P2 \ . . C3 . . 0 A1:0-3,A2:3,XA2:3,XA3:3,A3:3 \ - A1:P0,A3:P-2 ." + A1:P0,A3:P-2" " C0-3:X2-3:S+ C1-3:X2-3:S+ C2-3:X2-3:P2 \ . C4 . . . 0 A1:4,A2:4,A3:4,XA1:,XA2:,XA3 \ - A1:P0,A3:P-2 ." + A1:P0,A3:P-2" # old-A1 old-A2 old-A3 old-B1 new-A1 new-A2 new-A3 new-B1 fail ECPUs Pstate ISOLCPUS # ------ ------ ------ ------ ------ ------ ------ ------ ---- ----- ------ -------- @@ -804,6 +739,71 @@ run_state_test() echo "All $I tests of $TEST PASSED." } +# +# Testing the new "isolated" partition root type +# +test_isolated() +{ + cd $CGROUP2/test + echo 2-3 > cpuset.cpus + TYPE=$(cat cpuset.cpus.partition) + [[ $TYPE = member ]] || echo member > cpuset.cpus.partition + + console_msg "Change from member to root" + test_partition root + + console_msg "Change from root to isolated" + test_partition isolated + + console_msg "Change from isolated to member" + test_partition member + + console_msg "Change from member to isolated" + test_partition isolated + + console_msg "Change from isolated to root" + test_partition root + + console_msg "Change from root to member" + test_partition member + + # + # Testing partition root with no cpu + # + console_msg "Distribute all cpus to child partition" + echo +cpuset > cgroup.subtree_control + test_partition root + + mkdir A1 + cd A1 + echo 2-3 > cpuset.cpus + test_partition root + test_effective_cpus 2-3 + cd .. + test_effective_cpus "" + + console_msg "Moving task to partition test" + test_add_proc "No space left" + cd A1 + test_add_proc "" + cd .. + + console_msg "Shrink and expand child partition" + cd A1 + echo 2 > cpuset.cpus + cd .. + test_effective_cpus 3 + cd A1 + echo 2-3 > cpuset.cpus + cd .. + test_effective_cpus "" + + # Cleaning up + console_msg "Cleaning up" + echo $$ > $CGROUP2/cgroup.procs + [[ -d A1 ]] && rmdir A1 +} + # # Wait for inotify event for the given file and read it # $1: cgroup file to wait for -- cgit v1.2.3 From 72c6303acfa1008c542e093bc9f9916fb99e0323 Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Wed, 25 Oct 2023 14:25:55 -0400 Subject: cgroup/cpuset: Take isolated CPUs out of workqueue unbound cpumask To make CPUs in isolated cpuset partition closer in isolation to the boot time isolated CPUs specified in the "isolcpus" boot command line option, we need to take those CPUs out of the workqueue unbound cpumask so that work functions from the unbound workqueues won't run on those CPUs. Otherwise, they will interfere the user tasks running on those isolated CPUs. With the introduction of the workqueue_unbound_exclude_cpumask() helper function in an earlier commit, those isolated CPUs can now be taken out from the workqueue unbound cpumask. This patch also updates cgroup-v2.rst to mention that isolated CPUs will be excluded from unbound workqueue cpumask as well as updating test_cpuset_prs.sh to verify the correctness of the new *cpuset.cpus.isolated file, if available via cgroup_debug option. Signed-off-by: Waiman Long Signed-off-by: Tejun Heo --- Documentation/admin-guide/cgroup-v2.rst | 10 +- kernel/cgroup/cpuset.c | 116 ++++++++++++++++++---- tools/testing/selftests/cgroup/test_cpuset_prs.sh | 74 ++++++++++++-- 3 files changed, 166 insertions(+), 34 deletions(-) (limited to 'tools') diff --git a/Documentation/admin-guide/cgroup-v2.rst b/Documentation/admin-guide/cgroup-v2.rst index 3f85254f3cef..cf5651a11df8 100644 --- a/Documentation/admin-guide/cgroup-v2.rst +++ b/Documentation/admin-guide/cgroup-v2.rst @@ -2358,11 +2358,11 @@ Cpuset Interface Files partition or scheduling domain. The set of exclusive CPUs is determined by the value of its "cpuset.cpus.exclusive.effective". - When set to "isolated", the CPUs in that partition will - be in an isolated state without any load balancing from the - scheduler. Tasks placed in such a partition with multiple - CPUs should be carefully distributed and bound to each of the - individual CPUs for optimal performance. + When set to "isolated", the CPUs in that partition will be in + an isolated state without any load balancing from the scheduler + and excluded from the unbound workqueues. Tasks placed in such + a partition with multiple CPUs should be carefully distributed + and bound to each of the individual CPUs for optimal performance. A partition root ("root" or "isolated") can be in one of the two possible states - valid or invalid. An invalid partition diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c index 19c8779798fd..1bad4007ff4b 100644 --- a/kernel/cgroup/cpuset.c +++ b/kernel/cgroup/cpuset.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include @@ -43,6 +44,7 @@ #include #include #include +#include DEFINE_STATIC_KEY_FALSE(cpusets_pre_enable_key); DEFINE_STATIC_KEY_FALSE(cpusets_enabled_key); @@ -1444,25 +1446,31 @@ static void partition_xcpus_newstate(int old_prs, int new_prs, struct cpumask *x * @new_prs: new partition_root_state * @parent: parent cpuset * @xcpus: exclusive CPUs to be added + * Return: true if isolated_cpus modified, false otherwise * * Remote partition if parent == NULL */ -static void partition_xcpus_add(int new_prs, struct cpuset *parent, +static bool partition_xcpus_add(int new_prs, struct cpuset *parent, struct cpumask *xcpus) { + bool isolcpus_updated; + WARN_ON_ONCE(new_prs < 0); lockdep_assert_held(&callback_lock); if (!parent) parent = &top_cpuset; + if (parent == &top_cpuset) cpumask_or(subpartitions_cpus, subpartitions_cpus, xcpus); - if (new_prs != parent->partition_root_state) + isolcpus_updated = (new_prs != parent->partition_root_state); + if (isolcpus_updated) partition_xcpus_newstate(parent->partition_root_state, new_prs, xcpus); cpumask_andnot(parent->effective_cpus, parent->effective_cpus, xcpus); + return isolcpus_updated; } /* @@ -1470,12 +1478,15 @@ static void partition_xcpus_add(int new_prs, struct cpuset *parent, * @old_prs: old partition_root_state * @parent: parent cpuset * @xcpus: exclusive CPUs to be removed + * Return: true if isolated_cpus modified, false otherwise * * Remote partition if parent == NULL */ -static void partition_xcpus_del(int old_prs, struct cpuset *parent, +static bool partition_xcpus_del(int old_prs, struct cpuset *parent, struct cpumask *xcpus) { + bool isolcpus_updated; + WARN_ON_ONCE(old_prs < 0); lockdep_assert_held(&callback_lock); if (!parent) @@ -1484,12 +1495,27 @@ static void partition_xcpus_del(int old_prs, struct cpuset *parent, if (parent == &top_cpuset) cpumask_andnot(subpartitions_cpus, subpartitions_cpus, xcpus); - if (old_prs != parent->partition_root_state) + isolcpus_updated = (old_prs != parent->partition_root_state); + if (isolcpus_updated) partition_xcpus_newstate(old_prs, parent->partition_root_state, xcpus); cpumask_and(xcpus, xcpus, cpu_active_mask); cpumask_or(parent->effective_cpus, parent->effective_cpus, xcpus); + return isolcpus_updated; +} + +static void update_unbound_workqueue_cpumask(bool isolcpus_updated) +{ + int ret; + + lockdep_assert_cpus_held(); + + if (!isolcpus_updated) + return; + + ret = workqueue_unbound_exclude_cpumask(isolated_cpus); + WARN_ON_ONCE(ret < 0); } /* @@ -1540,6 +1566,8 @@ static inline bool is_local_partition(struct cpuset *cs) static int remote_partition_enable(struct cpuset *cs, int new_prs, struct tmpmasks *tmp) { + bool isolcpus_updated; + /* * The user must have sysadmin privilege. */ @@ -1561,7 +1589,7 @@ static int remote_partition_enable(struct cpuset *cs, int new_prs, return 0; spin_lock_irq(&callback_lock); - partition_xcpus_add(new_prs, NULL, tmp->new_cpus); + isolcpus_updated = partition_xcpus_add(new_prs, NULL, tmp->new_cpus); list_add(&cs->remote_sibling, &remote_children); if (cs->use_parent_ecpus) { struct cpuset *parent = parent_cs(cs); @@ -1570,13 +1598,13 @@ static int remote_partition_enable(struct cpuset *cs, int new_prs, parent->child_ecpus_count--; } spin_unlock_irq(&callback_lock); + update_unbound_workqueue_cpumask(isolcpus_updated); /* * Proprogate changes in top_cpuset's effective_cpus down the hierarchy. */ update_tasks_cpumask(&top_cpuset, tmp->new_cpus); update_sibling_cpumasks(&top_cpuset, NULL, tmp); - return 1; } @@ -1591,18 +1619,22 @@ static int remote_partition_enable(struct cpuset *cs, int new_prs, */ static void remote_partition_disable(struct cpuset *cs, struct tmpmasks *tmp) { + bool isolcpus_updated; + compute_effective_exclusive_cpumask(cs, tmp->new_cpus); WARN_ON_ONCE(!is_remote_partition(cs)); WARN_ON_ONCE(!cpumask_subset(tmp->new_cpus, subpartitions_cpus)); spin_lock_irq(&callback_lock); list_del_init(&cs->remote_sibling); - partition_xcpus_del(cs->partition_root_state, NULL, tmp->new_cpus); + isolcpus_updated = partition_xcpus_del(cs->partition_root_state, + NULL, tmp->new_cpus); cs->partition_root_state = -cs->partition_root_state; if (!cs->prs_err) cs->prs_err = PERR_INVCPUS; reset_partition_data(cs); spin_unlock_irq(&callback_lock); + update_unbound_workqueue_cpumask(isolcpus_updated); /* * Proprogate changes in top_cpuset's effective_cpus down the hierarchy. @@ -1625,6 +1657,7 @@ static void remote_cpus_update(struct cpuset *cs, struct cpumask *newmask, { bool adding, deleting; int prs = cs->partition_root_state; + int isolcpus_updated = 0; if (WARN_ON_ONCE(!is_remote_partition(cs))) return; @@ -1649,10 +1682,11 @@ static void remote_cpus_update(struct cpuset *cs, struct cpumask *newmask, spin_lock_irq(&callback_lock); if (adding) - partition_xcpus_add(prs, NULL, tmp->addmask); + isolcpus_updated += partition_xcpus_add(prs, NULL, tmp->addmask); if (deleting) - partition_xcpus_del(prs, NULL, tmp->delmask); + isolcpus_updated += partition_xcpus_del(prs, NULL, tmp->delmask); spin_unlock_irq(&callback_lock); + update_unbound_workqueue_cpumask(isolcpus_updated); /* * Proprogate changes in top_cpuset's effective_cpus down the hierarchy. @@ -1774,6 +1808,7 @@ static int update_parent_effective_cpumask(struct cpuset *cs, int cmd, int part_error = PERR_NONE; /* Partition error? */ int subparts_delta = 0; struct cpumask *xcpus; /* cs effective_xcpus */ + int isolcpus_updated = 0; bool nocpu; lockdep_assert_held(&cpuset_mutex); @@ -2010,15 +2045,18 @@ write_error: * and vice versa. */ if (adding) - partition_xcpus_del(old_prs, parent, tmp->addmask); + isolcpus_updated += partition_xcpus_del(old_prs, parent, + tmp->addmask); if (deleting) - partition_xcpus_add(new_prs, parent, tmp->delmask); + isolcpus_updated += partition_xcpus_add(new_prs, parent, + tmp->delmask); if (is_partition_valid(parent)) { parent->nr_subparts += subparts_delta; WARN_ON_ONCE(parent->nr_subparts < 0); } spin_unlock_irq(&callback_lock); + update_unbound_workqueue_cpumask(isolcpus_updated); if ((old_prs != new_prs) && (cmd == partcmd_update)) update_partition_exclusive(cs, new_prs); @@ -3082,6 +3120,7 @@ out: else if (new_xcpus_state) partition_xcpus_newstate(old_prs, new_prs, cs->effective_xcpus); spin_unlock_irq(&callback_lock); + update_unbound_workqueue_cpumask(new_xcpus_state); /* Force update if switching back to member */ update_cpumasks_hier(cs, &tmpmask, !new_prs ? HIER_CHECKALL : 0); @@ -4370,6 +4409,30 @@ void cpuset_force_rebuild(void) force_rebuild = true; } +/* + * Attempt to acquire a cpus_read_lock while a hotplug operation may be in + * progress. + * Return: true if successful, false otherwise + * + * To avoid circular lock dependency between cpuset_mutex and cpus_read_lock, + * cpus_read_trylock() is used here to acquire the lock. + */ +static bool cpuset_hotplug_cpus_read_trylock(void) +{ + int retries = 0; + + while (!cpus_read_trylock()) { + /* + * CPU hotplug still in progress. Retry 5 times + * with a 10ms wait before bailing out. + */ + if (++retries > 5) + return false; + msleep(10); + } + return true; +} + /** * cpuset_hotplug_update_tasks - update tasks in a cpuset for hotunplug * @cs: cpuset in interest @@ -4386,6 +4449,7 @@ static void cpuset_hotplug_update_tasks(struct cpuset *cs, struct tmpmasks *tmp) bool cpus_updated; bool mems_updated; bool remote; + int partcmd = -1; struct cpuset *parent; retry: wait_event(cpuset_attach_wq, cs->attach_in_progress == 0); @@ -4417,11 +4481,13 @@ retry: compute_partition_effective_cpumask(cs, &new_cpus); if (remote && cpumask_empty(&new_cpus) && - partition_is_populated(cs, NULL)) { + partition_is_populated(cs, NULL) && + cpuset_hotplug_cpus_read_trylock()) { remote_partition_disable(cs, tmp); compute_effective_cpumask(&new_cpus, cs, parent); remote = false; cpuset_force_rebuild(); + cpus_read_unlock(); } /* @@ -4432,18 +4498,28 @@ retry: * partitions. */ if (is_local_partition(cs) && (!is_partition_valid(parent) || - tasks_nocpu_error(parent, cs, &new_cpus))) { - update_parent_effective_cpumask(cs, partcmd_invalidate, NULL, tmp); - compute_effective_cpumask(&new_cpus, cs, parent); - cpuset_force_rebuild(); - } + tasks_nocpu_error(parent, cs, &new_cpus))) + partcmd = partcmd_invalidate; /* * On the other hand, an invalid partition root may be transitioned * back to a regular one. */ - else if (is_partition_valid(parent) && is_partition_invalid(cs)) { - update_parent_effective_cpumask(cs, partcmd_update, NULL, tmp); - if (is_partition_valid(cs)) { + else if (is_partition_valid(parent) && is_partition_invalid(cs)) + partcmd = partcmd_update; + + /* + * cpus_read_lock needs to be held before calling + * update_parent_effective_cpumask(). To avoid circular lock + * dependency between cpuset_mutex and cpus_read_lock, + * cpus_read_trylock() is used here to acquire the lock. + */ + if (partcmd >= 0) { + if (!cpuset_hotplug_cpus_read_trylock()) + goto update_tasks; + + update_parent_effective_cpumask(cs, partcmd, NULL, tmp); + cpus_read_unlock(); + if ((partcmd == partcmd_invalidate) || is_partition_valid(cs)) { compute_partition_effective_cpumask(cs, &new_cpus); cpuset_force_rebuild(); } diff --git a/tools/testing/selftests/cgroup/test_cpuset_prs.sh b/tools/testing/selftests/cgroup/test_cpuset_prs.sh index 2b825019f806..7b7c4c2b6d85 100755 --- a/tools/testing/selftests/cgroup/test_cpuset_prs.sh +++ b/tools/testing/selftests/cgroup/test_cpuset_prs.sh @@ -232,11 +232,11 @@ TEST_MATRIX=( " C0-3:S+ C1-3:S+ C2-3 C4-5 X2-3 X2-3:P1 P2 P1 0 A1:0-1,A2:,A3:2-3,B1:4-5 \ A1:P0,A2:P1,A3:P2,B1:P1 2-3" " C0-3:S+ C1-3:S+ C2-3 C4 X2-3 X2-3:P1 P2 P1 0 A1:0-1,A2:,A3:2-3,B1:4 \ - A1:P0,A2:P1,A3:P2,B1:P1 2-4" + A1:P0,A2:P1,A3:P2,B1:P1 2-4,2-3" " C0-3:S+ C1-3:S+ C3 C4 X2-3 X2-3:P1 P2 P1 0 A1:0-1,A2:2,A3:3,B1:4 \ - A1:P0,A2:P1,A3:P2,B1:P1 2-4" + A1:P0,A2:P1,A3:P2,B1:P1 2-4,3" " C0-4:S+ C1-4:S+ C2-4 . X2-4 X2-4:P2 X4:P1 . 0 A1:0-1,A2:2-3,A3:4 \ - A1:P0,A2:P2,A3:P1 2-4" + A1:P0,A2:P2,A3:P1 2-4,2-3" " C0-4:X2-4:S+ C1-4:X2-4:S+:P2 C2-4:X4:P1 \ . . X5 . . 0 A1:0-4,A2:1-4,A3:2-4 \ A1:P0,A2:P-2,A3:P-1" @@ -248,7 +248,7 @@ TEST_MATRIX=( " C0-3:S+ C1-3:S+ C2-3 . X2-3 X2-3 X2-3:P2:O2=0 . 0 A1:0-1,A2:1,A3:3 A1:P0,A3:P2 2-3" " C0-3:S+ C1-3:S+ C2-3 . X2-3 X2-3 X2-3:P2:O2=0 O2=1 0 A1:0-1,A2:1,A3:2-3 A1:P0,A3:P2 2-3" " C0-3:S+ C1-3:S+ C3 . X2-3 X2-3 P2:O3=0 . 0 A1:0-2,A2:1-2,A3: A1:P0,A3:P2 3" - " C0-3:S+ C1-3:S+ C3 . X2-3 X2-3 T:P2:O3=0 . 0 A1:0-2,A2:1-2,A3:1-2 A1:P0,A3:P-2 3" + " C0-3:S+ C1-3:S+ C3 . X2-3 X2-3 T:P2:O3=0 . 0 A1:0-2,A2:1-2,A3:1-2 A1:P0,A3:P-2 3," # An invalidated remote partition cannot self-recover from hotplug " C0-3:S+ C1-3:S+ C2 . X2-3 X2-3 T:P2:O2=0 O2=1 0 A1:0-3,A2:1-3,A3:2 A1:P0,A3:P-2" @@ -376,7 +376,7 @@ write_cpu_online() } fi echo $VAL > $CPUFILE - pause 0.01 + pause 0.05 } # @@ -508,12 +508,14 @@ dump_states() XECPUS=$DIR/cpuset.cpus.exclusive.effective PRS=$DIR/cpuset.cpus.partition PCPUS=$DIR/.__DEBUG__.cpuset.cpus.subpartitions + ISCPUS=$DIR/.__DEBUG__.cpuset.cpus.isolated [[ -e $CPUS ]] && echo "$CPUS: $(cat $CPUS)" [[ -e $XCPUS ]] && echo "$XCPUS: $(cat $XCPUS)" [[ -e $ECPUS ]] && echo "$ECPUS: $(cat $ECPUS)" [[ -e $XECPUS ]] && echo "$XECPUS: $(cat $XECPUS)" [[ -e $PRS ]] && echo "$PRS: $(cat $PRS)" [[ -e $PCPUS ]] && echo "$PCPUS: $(cat $PCPUS)" + [[ -e $ISCPUS ]] && echo "$ISCPUS: $(cat $ISCPUS)" done } @@ -591,11 +593,17 @@ check_cgroup_states() # # Get isolated (including offline) CPUs by looking at -# /sys/kernel/debug/sched/domains and compare that with the expected value. +# /sys/kernel/debug/sched/domains and *cpuset.cpus.isolated control file, +# if available, and compare that with the expected value. # -# Note that a sched domain of just 1 CPU will be considered isolated. +# Note that isolated CPUs from the sched/domains context include offline +# CPUs as well as CPUs in non-isolated 1-CPU partition. Those CPUs may +# not be included in the *cpuset.cpus.isolated control file which contains +# only CPUs in isolated partitions. # -# $1 - expected isolated cpu list +# $1 - expected isolated cpu list(s) {,} +# - expected sched/domains value +# - *cpuset.cpus.isolated value = if not defined # check_isolcpus() { @@ -603,8 +611,38 @@ check_isolcpus() ISOLCPUS= LASTISOLCPU= SCHED_DOMAINS=/sys/kernel/debug/sched/domains + ISCPUS=${CGROUP2}/.__DEBUG__.cpuset.cpus.isolated + if [[ $EXPECT_VAL = . ]] + then + EXPECT_VAL= + EXPECT_VAL2= + elif [[ $(expr $EXPECT_VAL : ".*,.*") > 0 ]] + then + set -- $(echo $EXPECT_VAL | sed -e "s/,/ /g") + EXPECT_VAL=$1 + EXPECT_VAL2=$2 + else + EXPECT_VAL2=$EXPECT_VAL + fi + + # + # Check the debug isolated cpumask, if present + # + [[ -f $ISCPUS ]] && { + ISOLCPUS=$(cat $ISCPUS) + [[ "$EXPECT_VAL2" != "$ISOLCPUS" ]] && { + # Take a 50ms pause and try again + pause 0.05 + ISOLCPUS=$(cat $ISCPUS) + } + [[ "$EXPECT_VAL2" != "$ISOLCPUS" ]] && return 1 + ISOLCPUS= + } + + # + # Use the sched domain in debugfs to check isolated CPUs, if available + # [[ -d $SCHED_DOMAINS ]] || return 0 - [[ $EXPECT_VAL = . ]] && EXPECT_VAL= for ((CPU=0; CPU < $NR_CPUS; CPU++)) do @@ -648,6 +686,22 @@ test_fail() exit 1 } +# +# Check to see if there are unexpected isolated CPUs left +# +null_isolcpus_check() +{ + [[ $VERBOSE -gt 0 ]] || return 0 + pause 0.02 + check_isolcpus "." + if [[ $? -ne 0 ]] + then + echo "Unexpected isolated CPUs: $ISOLCPUS" + dump_states + exit 1 + fi +} + # # Run cpuset state transition test # $1 - test matrix name @@ -733,6 +787,7 @@ run_state_test() echo "Effective cpus changed to $NEWLIST after test $I!" exit 1 } + null_isolcpus_check [[ $VERBOSE -gt 0 ]] && echo "Test $I done." ((I++)) done @@ -802,6 +857,7 @@ test_isolated() console_msg "Cleaning up" echo $$ > $CGROUP2/cgroup.procs [[ -d A1 ]] && rmdir A1 + null_isolcpus_check } # -- cgit v1.2.3 From 5f42375904b08890f2e8e7cd955c5bf0c2c0d05a Mon Sep 17 00:00:00 2001 From: Casey Schaufler Date: Tue, 12 Sep 2023 13:56:51 -0700 Subject: LSM: wireup Linux Security Module syscalls MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Wireup lsm_get_self_attr, lsm_set_self_attr and lsm_list_modules system calls. Signed-off-by: Casey Schaufler Reviewed-by: Kees Cook Acked-by: Geert Uytterhoeven Acked-by: Arnd Bergmann Cc: linux-api@vger.kernel.org Reviewed-by: Mickaël Salaün [PM: forward ported beyond v6.6 due merge window changes] Signed-off-by: Paul Moore --- arch/alpha/kernel/syscalls/syscall.tbl | 3 +++ arch/arm/tools/syscall.tbl | 3 +++ arch/arm64/include/asm/unistd.h | 2 +- arch/arm64/include/asm/unistd32.h | 6 ++++++ arch/m68k/kernel/syscalls/syscall.tbl | 3 +++ arch/microblaze/kernel/syscalls/syscall.tbl | 3 +++ arch/mips/kernel/syscalls/syscall_n32.tbl | 3 +++ arch/mips/kernel/syscalls/syscall_n64.tbl | 3 +++ arch/mips/kernel/syscalls/syscall_o32.tbl | 3 +++ arch/parisc/kernel/syscalls/syscall.tbl | 3 +++ arch/powerpc/kernel/syscalls/syscall.tbl | 3 +++ arch/s390/kernel/syscalls/syscall.tbl | 3 +++ arch/sh/kernel/syscalls/syscall.tbl | 3 +++ arch/sparc/kernel/syscalls/syscall.tbl | 3 +++ arch/x86/entry/syscalls/syscall_32.tbl | 3 +++ arch/x86/entry/syscalls/syscall_64.tbl | 3 +++ arch/xtensa/kernel/syscalls/syscall.tbl | 3 +++ include/uapi/asm-generic/unistd.h | 9 ++++++++- tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl | 3 +++ tools/perf/arch/powerpc/entry/syscalls/syscall.tbl | 3 +++ tools/perf/arch/s390/entry/syscalls/syscall.tbl | 3 +++ tools/perf/arch/x86/entry/syscalls/syscall_64.tbl | 3 +++ 22 files changed, 72 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/arch/alpha/kernel/syscalls/syscall.tbl b/arch/alpha/kernel/syscalls/syscall.tbl index 18c842ca6c32..b04af0c9fcbc 100644 --- a/arch/alpha/kernel/syscalls/syscall.tbl +++ b/arch/alpha/kernel/syscalls/syscall.tbl @@ -496,3 +496,6 @@ 564 common futex_wake sys_futex_wake 565 common futex_wait sys_futex_wait 566 common futex_requeue sys_futex_requeue +567 common lsm_get_self_attr sys_lsm_get_self_attr +568 common lsm_set_self_attr sys_lsm_set_self_attr +569 common lsm_list_modules sys_lsm_list_modules diff --git a/arch/arm/tools/syscall.tbl b/arch/arm/tools/syscall.tbl index 584f9528c996..43313beefae7 100644 --- a/arch/arm/tools/syscall.tbl +++ b/arch/arm/tools/syscall.tbl @@ -470,3 +470,6 @@ 454 common futex_wake sys_futex_wake 455 common futex_wait sys_futex_wait 456 common futex_requeue sys_futex_requeue +457 common lsm_get_self_attr sys_lsm_get_self_attr +458 common lsm_set_self_attr sys_lsm_set_self_attr +459 common lsm_list_modules sys_lsm_list_modules diff --git a/arch/arm64/include/asm/unistd.h b/arch/arm64/include/asm/unistd.h index 531effca5f1f..abe10a833fcd 100644 --- a/arch/arm64/include/asm/unistd.h +++ b/arch/arm64/include/asm/unistd.h @@ -39,7 +39,7 @@ #define __ARM_NR_compat_set_tls (__ARM_NR_COMPAT_BASE + 5) #define __ARM_NR_COMPAT_END (__ARM_NR_COMPAT_BASE + 0x800) -#define __NR_compat_syscalls 457 +#define __NR_compat_syscalls 460 #endif #define __ARCH_WANT_SYS_CLONE diff --git a/arch/arm64/include/asm/unistd32.h b/arch/arm64/include/asm/unistd32.h index 9f7c1bf99526..ab1a7c2b6653 100644 --- a/arch/arm64/include/asm/unistd32.h +++ b/arch/arm64/include/asm/unistd32.h @@ -919,6 +919,12 @@ __SYSCALL(__NR_futex_wake, sys_futex_wake) __SYSCALL(__NR_futex_wait, sys_futex_wait) #define __NR_futex_requeue 456 __SYSCALL(__NR_futex_requeue, sys_futex_requeue) +#define __NR_lsm_get_self_attr 457 +__SYSCALL(__NR_lsm_get_self_attr, sys_lsm_get_self_attr) +#define __NR_lsm_set_self_attr 458 +__SYSCALL(__NR_lsm_set_self_attr, sys_lsm_set_self_attr) +#define __NR_lsm_list_modules 459 +__SYSCALL(__NR_lsm_list_modules, sys_lsm_list_modules) /* * Please add new compat syscalls above this comment and update diff --git a/arch/m68k/kernel/syscalls/syscall.tbl b/arch/m68k/kernel/syscalls/syscall.tbl index 7a4b780e82cb..90629ffc6732 100644 --- a/arch/m68k/kernel/syscalls/syscall.tbl +++ b/arch/m68k/kernel/syscalls/syscall.tbl @@ -456,3 +456,6 @@ 454 common futex_wake sys_futex_wake 455 common futex_wait sys_futex_wait 456 common futex_requeue sys_futex_requeue +457 common lsm_get_self_attr sys_lsm_get_self_attr +458 common lsm_set_self_attr sys_lsm_set_self_attr +459 common lsm_list_modules sys_lsm_list_modules diff --git a/arch/microblaze/kernel/syscalls/syscall.tbl b/arch/microblaze/kernel/syscalls/syscall.tbl index 5b6a0b02b7de..c395dece73b4 100644 --- a/arch/microblaze/kernel/syscalls/syscall.tbl +++ b/arch/microblaze/kernel/syscalls/syscall.tbl @@ -462,3 +462,6 @@ 454 common futex_wake sys_futex_wake 455 common futex_wait sys_futex_wait 456 common futex_requeue sys_futex_requeue +457 common lsm_get_self_attr sys_lsm_get_self_attr +458 common lsm_set_self_attr sys_lsm_set_self_attr +459 common lsm_list_modules sys_lsm_list_modules diff --git a/arch/mips/kernel/syscalls/syscall_n32.tbl b/arch/mips/kernel/syscalls/syscall_n32.tbl index a842b41c8e06..4a876c4e77d6 100644 --- a/arch/mips/kernel/syscalls/syscall_n32.tbl +++ b/arch/mips/kernel/syscalls/syscall_n32.tbl @@ -395,3 +395,6 @@ 454 n32 futex_wake sys_futex_wake 455 n32 futex_wait sys_futex_wait 456 n32 futex_requeue sys_futex_requeue +457 n32 lsm_get_self_attr sys_lsm_get_self_attr +458 n32 lsm_set_self_attr sys_lsm_set_self_attr +459 n32 lsm_list_modules sys_lsm_list_modules diff --git a/arch/mips/kernel/syscalls/syscall_n64.tbl b/arch/mips/kernel/syscalls/syscall_n64.tbl index 116ff501bf92..b74c8571f063 100644 --- a/arch/mips/kernel/syscalls/syscall_n64.tbl +++ b/arch/mips/kernel/syscalls/syscall_n64.tbl @@ -371,3 +371,6 @@ 454 n64 futex_wake sys_futex_wake 455 n64 futex_wait sys_futex_wait 456 n64 futex_requeue sys_futex_requeue +457 n64 lsm_get_self_attr sys_lsm_get_self_attr +458 n64 lsm_set_self_attr sys_lsm_set_self_attr +459 n64 lsm_list_modules sys_lsm_list_modules diff --git a/arch/mips/kernel/syscalls/syscall_o32.tbl b/arch/mips/kernel/syscalls/syscall_o32.tbl index 525cc54bc63b..bf41906e1f68 100644 --- a/arch/mips/kernel/syscalls/syscall_o32.tbl +++ b/arch/mips/kernel/syscalls/syscall_o32.tbl @@ -444,3 +444,6 @@ 454 o32 futex_wake sys_futex_wake 455 o32 futex_wait sys_futex_wait 456 o32 futex_requeue sys_futex_requeue +457 o32 lsm_get_self_attr sys_lsm_get_self_attr +458 032 lsm_set_self_attr sys_lsm_set_self_attr +459 o32 lsm_list_modules sys_lsm_list_modules diff --git a/arch/parisc/kernel/syscalls/syscall.tbl b/arch/parisc/kernel/syscalls/syscall.tbl index a47798fed54e..ccc0a679e774 100644 --- a/arch/parisc/kernel/syscalls/syscall.tbl +++ b/arch/parisc/kernel/syscalls/syscall.tbl @@ -455,3 +455,6 @@ 454 common futex_wake sys_futex_wake 455 common futex_wait sys_futex_wait 456 common futex_requeue sys_futex_requeue +457 common lsm_get_self_attr sys_lsm_get_self_attr +458 common lsm_set_self_attr sys_lsm_set_self_attr +459 common lsm_list_modules sys_lsm_list_modules diff --git a/arch/powerpc/kernel/syscalls/syscall.tbl b/arch/powerpc/kernel/syscalls/syscall.tbl index 7fab411378f2..a6f37e2333cb 100644 --- a/arch/powerpc/kernel/syscalls/syscall.tbl +++ b/arch/powerpc/kernel/syscalls/syscall.tbl @@ -543,3 +543,6 @@ 454 common futex_wake sys_futex_wake 455 common futex_wait sys_futex_wait 456 common futex_requeue sys_futex_requeue +457 common lsm_get_self_attr sys_lsm_get_self_attr +458 common lsm_set_self_attr sys_lsm_set_self_attr +459 common lsm_list_modules sys_lsm_list_modules diff --git a/arch/s390/kernel/syscalls/syscall.tbl b/arch/s390/kernel/syscalls/syscall.tbl index 86fec9b080f6..4b818e9ee832 100644 --- a/arch/s390/kernel/syscalls/syscall.tbl +++ b/arch/s390/kernel/syscalls/syscall.tbl @@ -459,3 +459,6 @@ 454 common futex_wake sys_futex_wake sys_futex_wake 455 common futex_wait sys_futex_wait sys_futex_wait 456 common futex_requeue sys_futex_requeue sys_futex_requeue +457 common lsm_get_self_attr sys_lsm_get_self_attr sys_lsm_get_self_attr +458 common lsm_set_self_attr sys_lsm_set_self_attr sys_lsm_set_self_attr +459 common lsm_list_modules sys_lsm_list_modules sys_lsm_list_modules diff --git a/arch/sh/kernel/syscalls/syscall.tbl b/arch/sh/kernel/syscalls/syscall.tbl index 363fae0fe9bf..1a3d88d1a07f 100644 --- a/arch/sh/kernel/syscalls/syscall.tbl +++ b/arch/sh/kernel/syscalls/syscall.tbl @@ -459,3 +459,6 @@ 454 common futex_wake sys_futex_wake 455 common futex_wait sys_futex_wait 456 common futex_requeue sys_futex_requeue +457 common lsm_get_self_attr sys_lsm_get_self_attr +458 common lsm_set_self_attr sys_lsm_set_self_attr +459 common lsm_list_modules sys_lsm_list_modules diff --git a/arch/sparc/kernel/syscalls/syscall.tbl b/arch/sparc/kernel/syscalls/syscall.tbl index 7bcaa3d5ea44..e0e8cec62358 100644 --- a/arch/sparc/kernel/syscalls/syscall.tbl +++ b/arch/sparc/kernel/syscalls/syscall.tbl @@ -502,3 +502,6 @@ 454 common futex_wake sys_futex_wake 455 common futex_wait sys_futex_wait 456 common futex_requeue sys_futex_requeue +457 common lsm_get_self_attr sys_lsm_get_self_attr +458 common lsm_set_self_attr sys_lsm_set_self_attr +459 common lsm_list_modules sys_lsm_list_modules diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl index c8fac5205803..6e45e693f339 100644 --- a/arch/x86/entry/syscalls/syscall_32.tbl +++ b/arch/x86/entry/syscalls/syscall_32.tbl @@ -461,3 +461,6 @@ 454 i386 futex_wake sys_futex_wake 455 i386 futex_wait sys_futex_wait 456 i386 futex_requeue sys_futex_requeue +457 i386 lsm_get_self_attr sys_lsm_get_self_attr +458 i386 lsm_set_self_attr sys_lsm_set_self_attr +459 i386 lsm_list_modules sys_lsm_list_modules diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl index 8cb8bf68721c..d3b41d059d4d 100644 --- a/arch/x86/entry/syscalls/syscall_64.tbl +++ b/arch/x86/entry/syscalls/syscall_64.tbl @@ -378,6 +378,9 @@ 454 common futex_wake sys_futex_wake 455 common futex_wait sys_futex_wait 456 common futex_requeue sys_futex_requeue +457 common lsm_get_self_attr sys_lsm_get_self_attr +458 common lsm_set_self_attr sys_lsm_set_self_attr +459 common lsm_list_modules sys_lsm_list_modules # # Due to a historical design error, certain syscalls are numbered differently diff --git a/arch/xtensa/kernel/syscalls/syscall.tbl b/arch/xtensa/kernel/syscalls/syscall.tbl index 06eefa9c1458..284784ea5a46 100644 --- a/arch/xtensa/kernel/syscalls/syscall.tbl +++ b/arch/xtensa/kernel/syscalls/syscall.tbl @@ -427,3 +427,6 @@ 454 common futex_wake sys_futex_wake 455 common futex_wait sys_futex_wait 456 common futex_requeue sys_futex_requeue +457 common lsm_get_self_attr sys_lsm_get_self_attr +458 common lsm_set_self_attr sys_lsm_set_self_attr +459 common lsm_list_modules sys_lsm_list_modules diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h index 756b013fb832..55cc0bcfb58d 100644 --- a/include/uapi/asm-generic/unistd.h +++ b/include/uapi/asm-generic/unistd.h @@ -829,8 +829,15 @@ __SYSCALL(__NR_futex_wait, sys_futex_wait) #define __NR_futex_requeue 456 __SYSCALL(__NR_futex_requeue, sys_futex_requeue) +#define __NR_lsm_get_self_attr 457 +__SYSCALL(__NR_lsm_get_self_attr, sys_lsm_get_self_attr) +#define __NR_lsm_set_self_attr 458 +__SYSCALL(__NR_lsm_set_self_attr, sys_lsm_set_self_attr) +#define __NR_lsm_list_modules 459 +__SYSCALL(__NR_lsm_list_modules, sys_lsm_list_modules) + #undef __NR_syscalls -#define __NR_syscalls 457 +#define __NR_syscalls 460 /* * 32 bit systems traditionally used different diff --git a/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl b/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl index 80be0e98ea0c..81c772c0f5c8 100644 --- a/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl +++ b/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl @@ -367,3 +367,6 @@ 450 common set_mempolicy_home_node sys_set_mempolicy_home_node 451 n64 cachestat sys_cachestat 452 n64 fchmodat2 sys_fchmodat2 +453 n64 lsm_get_self_attr sys_lsm_get_self_attr +454 n64 lsm_set_self_attr sys_lsm_set_self_attr +455 n64 lsm_list_modules sys_lsm_list_modules diff --git a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl index e1412519b4ad..861c6ca0a8c3 100644 --- a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl +++ b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl @@ -539,3 +539,6 @@ 450 nospu set_mempolicy_home_node sys_set_mempolicy_home_node 451 common cachestat sys_cachestat 452 common fchmodat2 sys_fchmodat2 +453 common lsm_get_self_attr sys_lsm_get_self_attr +454 common lsm_set_self_attr sys_lsm_set_self_attr +455 common lsm_list_modules sys_lsm_list_modules diff --git a/tools/perf/arch/s390/entry/syscalls/syscall.tbl b/tools/perf/arch/s390/entry/syscalls/syscall.tbl index cc0bc144b661..5a422443cb16 100644 --- a/tools/perf/arch/s390/entry/syscalls/syscall.tbl +++ b/tools/perf/arch/s390/entry/syscalls/syscall.tbl @@ -455,3 +455,6 @@ 450 common set_mempolicy_home_node sys_set_mempolicy_home_node sys_set_mempolicy_home_node 451 common cachestat sys_cachestat sys_cachestat 452 common fchmodat2 sys_fchmodat2 sys_fchmodat2 +453 common lsm_get_self_attr sys_lsm_get_self_attr sys_lsm_get_self_attr +454 common lsm_set_self_attr sys_lsm_set_self_attr sys_lsm_set_self_attr +455 common lsm_list_modules sys_lsm_list_modules sys_lsm_list_modules diff --git a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl index 2a62eaf30d69..e692c88105a6 100644 --- a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl +++ b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl @@ -375,6 +375,9 @@ 451 common cachestat sys_cachestat 452 common fchmodat2 sys_fchmodat2 453 64 map_shadow_stack sys_map_shadow_stack +454 common lsm_get_self_attr sys_lsm_get_self_attr +455 common lsm_set_self_attr sys_lsm_set_self_attr +456 common lsm_list_modules sys_lsm_list_modules # # Due to a historical design error, certain syscalls are numbered differently -- cgit v1.2.3 From d3d929a8b0cd6deb7d70d1d8d805bccee3fbf11f Mon Sep 17 00:00:00 2001 From: Casey Schaufler Date: Tue, 12 Sep 2023 13:56:56 -0700 Subject: LSM: selftests for Linux Security Module syscalls MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add selftests for the three system calls supporting the LSM infrastructure. This set of tests is limited by the differences in access policy enforced by the existing security modules. Signed-off-by: Casey Schaufler Reviewed-by: Mickaël Salaün Tested-by: Mickaël Salaün Signed-off-by: Paul Moore --- MAINTAINERS | 1 + tools/testing/selftests/Makefile | 1 + tools/testing/selftests/lsm/.gitignore | 1 + tools/testing/selftests/lsm/Makefile | 17 ++ tools/testing/selftests/lsm/common.c | 89 +++++++ tools/testing/selftests/lsm/common.h | 33 +++ tools/testing/selftests/lsm/config | 3 + .../testing/selftests/lsm/lsm_get_self_attr_test.c | 275 +++++++++++++++++++++ .../testing/selftests/lsm/lsm_list_modules_test.c | 140 +++++++++++ .../testing/selftests/lsm/lsm_set_self_attr_test.c | 74 ++++++ 10 files changed, 634 insertions(+) create mode 100644 tools/testing/selftests/lsm/.gitignore create mode 100644 tools/testing/selftests/lsm/Makefile create mode 100644 tools/testing/selftests/lsm/common.c create mode 100644 tools/testing/selftests/lsm/common.h create mode 100644 tools/testing/selftests/lsm/config create mode 100644 tools/testing/selftests/lsm/lsm_get_self_attr_test.c create mode 100644 tools/testing/selftests/lsm/lsm_list_modules_test.c create mode 100644 tools/testing/selftests/lsm/lsm_set_self_attr_test.c (limited to 'tools') diff --git a/MAINTAINERS b/MAINTAINERS index f1d41fd9159a..2482b40fd786 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -19513,6 +19513,7 @@ W: http://kernsec.org/ T: git git://git.kernel.org/pub/scm/linux/kernel/git/pcmoore/lsm.git F: include/uapi/linux/lsm.h F: security/ +F: tools/testing/selftests/lsm/ X: security/selinux/ SELINUX SECURITY MODULE diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile index 3b2061d1c1a5..1107be84ea95 100644 --- a/tools/testing/selftests/Makefile +++ b/tools/testing/selftests/Makefile @@ -43,6 +43,7 @@ TARGETS += landlock TARGETS += lib TARGETS += livepatch TARGETS += lkdtm +TARGETS += lsm TARGETS += membarrier TARGETS += memfd TARGETS += memory-hotplug diff --git a/tools/testing/selftests/lsm/.gitignore b/tools/testing/selftests/lsm/.gitignore new file mode 100644 index 000000000000..bd68f6c3fd07 --- /dev/null +++ b/tools/testing/selftests/lsm/.gitignore @@ -0,0 +1 @@ +/*_test diff --git a/tools/testing/selftests/lsm/Makefile b/tools/testing/selftests/lsm/Makefile new file mode 100644 index 000000000000..3f80c0bc093d --- /dev/null +++ b/tools/testing/selftests/lsm/Makefile @@ -0,0 +1,17 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# First run: make -C ../../../.. headers_install + +CFLAGS += -Wall -O2 $(KHDR_INCLUDES) +LOCAL_HDRS += common.h + +TEST_GEN_PROGS := lsm_get_self_attr_test lsm_list_modules_test \ + lsm_set_self_attr_test + +include ../lib.mk + +$(OUTPUT)/lsm_get_self_attr_test: lsm_get_self_attr_test.c common.c +$(OUTPUT)/lsm_set_self_attr_test: lsm_set_self_attr_test.c common.c +$(OUTPUT)/lsm_list_modules_test: lsm_list_modules_test.c common.c + +EXTRA_CLEAN = $(OUTPUT)/common.o diff --git a/tools/testing/selftests/lsm/common.c b/tools/testing/selftests/lsm/common.c new file mode 100644 index 000000000000..9ad258912646 --- /dev/null +++ b/tools/testing/selftests/lsm/common.c @@ -0,0 +1,89 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Linux Security Module infrastructure tests + * + * Copyright © 2023 Casey Schaufler + */ + +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include +#include +#include "common.h" + +#define PROCATTR "/proc/self/attr/" + +int read_proc_attr(const char *attr, char *value, size_t size) +{ + int fd; + int len; + char *path; + + len = strlen(PROCATTR) + strlen(attr) + 1; + path = calloc(len, 1); + if (path == NULL) + return -1; + sprintf(path, "%s%s", PROCATTR, attr); + + fd = open(path, O_RDONLY); + free(path); + + if (fd < 0) + return -1; + len = read(fd, value, size); + + close(fd); + + /* Ensure value is terminated */ + if (len <= 0 || len == size) + return -1; + value[len] = '\0'; + + path = strchr(value, '\n'); + if (path) + *path = '\0'; + + return 0; +} + +int read_sysfs_lsms(char *lsms, size_t size) +{ + FILE *fp; + size_t red; + + fp = fopen("/sys/kernel/security/lsm", "r"); + if (fp == NULL) + return -1; + red = fread(lsms, 1, size, fp); + fclose(fp); + + if (red <= 0 || red == size) + return -1; + lsms[red] = '\0'; + return 0; +} + +int attr_lsm_count(void) +{ + char *names = calloc(sysconf(_SC_PAGESIZE), 1); + int count = 0; + + if (!names) + return 0; + + if (read_sysfs_lsms(names, sysconf(_SC_PAGESIZE))) + return 0; + + if (strstr(names, "selinux")) + count++; + if (strstr(names, "smack")) + count++; + if (strstr(names, "apparmor")) + count++; + + return count; +} diff --git a/tools/testing/selftests/lsm/common.h b/tools/testing/selftests/lsm/common.h new file mode 100644 index 000000000000..d404329e5eeb --- /dev/null +++ b/tools/testing/selftests/lsm/common.h @@ -0,0 +1,33 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Linux Security Module infrastructure tests + * + * Copyright © 2023 Casey Schaufler + */ + +#ifndef lsm_get_self_attr +static inline int lsm_get_self_attr(unsigned int attr, struct lsm_ctx *ctx, + size_t *size, __u32 flags) +{ + return syscall(__NR_lsm_get_self_attr, attr, ctx, size, flags); +} +#endif + +#ifndef lsm_set_self_attr +static inline int lsm_set_self_attr(unsigned int attr, struct lsm_ctx *ctx, + size_t size, __u32 flags) +{ + return syscall(__NR_lsm_set_self_attr, attr, ctx, size, flags); +} +#endif + +#ifndef lsm_list_modules +static inline int lsm_list_modules(__u64 *ids, size_t *size, __u32 flags) +{ + return syscall(__NR_lsm_list_modules, ids, size, flags); +} +#endif + +extern int read_proc_attr(const char *attr, char *value, size_t size); +extern int read_sysfs_lsms(char *lsms, size_t size); +int attr_lsm_count(void); diff --git a/tools/testing/selftests/lsm/config b/tools/testing/selftests/lsm/config new file mode 100644 index 000000000000..1c0c4c020f9c --- /dev/null +++ b/tools/testing/selftests/lsm/config @@ -0,0 +1,3 @@ +CONFIG_SYSFS=y +CONFIG_SECURITY=y +CONFIG_SECURITYFS=y diff --git a/tools/testing/selftests/lsm/lsm_get_self_attr_test.c b/tools/testing/selftests/lsm/lsm_get_self_attr_test.c new file mode 100644 index 000000000000..e0e313d9047a --- /dev/null +++ b/tools/testing/selftests/lsm/lsm_get_self_attr_test.c @@ -0,0 +1,275 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Linux Security Module infrastructure tests + * Tests for the lsm_get_self_attr system call + * + * Copyright © 2022 Casey Schaufler + */ + +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include +#include "../kselftest_harness.h" +#include "common.h" + +static struct lsm_ctx *next_ctx(struct lsm_ctx *ctxp) +{ + void *vp; + + vp = (void *)ctxp + sizeof(*ctxp) + ctxp->ctx_len; + return (struct lsm_ctx *)vp; +} + +TEST(size_null_lsm_get_self_attr) +{ + const long page_size = sysconf(_SC_PAGESIZE); + struct lsm_ctx *ctx = calloc(page_size, 1); + + ASSERT_NE(NULL, ctx); + errno = 0; + ASSERT_EQ(-1, lsm_get_self_attr(LSM_ATTR_CURRENT, ctx, NULL, 0)); + ASSERT_EQ(EINVAL, errno); + + free(ctx); +} + +TEST(ctx_null_lsm_get_self_attr) +{ + const long page_size = sysconf(_SC_PAGESIZE); + size_t size = page_size; + int rc; + + rc = lsm_get_self_attr(LSM_ATTR_CURRENT, NULL, &size, 0); + + if (attr_lsm_count()) { + ASSERT_NE(-1, rc); + ASSERT_NE(1, size); + } else { + ASSERT_EQ(-1, rc); + } +} + +TEST(size_too_small_lsm_get_self_attr) +{ + const long page_size = sysconf(_SC_PAGESIZE); + struct lsm_ctx *ctx = calloc(page_size, 1); + size_t size = 1; + + ASSERT_NE(NULL, ctx); + errno = 0; + ASSERT_EQ(-1, lsm_get_self_attr(LSM_ATTR_CURRENT, ctx, &size, 0)); + if (attr_lsm_count()) { + ASSERT_EQ(E2BIG, errno); + } else { + ASSERT_EQ(EOPNOTSUPP, errno); + } + ASSERT_NE(1, size); + + free(ctx); +} + +TEST(flags_zero_lsm_get_self_attr) +{ + const long page_size = sysconf(_SC_PAGESIZE); + struct lsm_ctx *ctx = calloc(page_size, 1); + __u64 *syscall_lsms = calloc(page_size, 1); + size_t size; + int lsmcount; + int i; + + ASSERT_NE(NULL, ctx); + errno = 0; + size = page_size; + ASSERT_EQ(-1, lsm_get_self_attr(LSM_ATTR_CURRENT, ctx, &size, + LSM_FLAG_SINGLE)); + ASSERT_EQ(EINVAL, errno); + ASSERT_EQ(page_size, size); + + lsmcount = syscall(__NR_lsm_list_modules, syscall_lsms, &size, 0); + ASSERT_LE(1, lsmcount); + ASSERT_NE(NULL, syscall_lsms); + + for (i = 0; i < lsmcount; i++) { + errno = 0; + size = page_size; + ctx->id = syscall_lsms[i]; + + if (syscall_lsms[i] == LSM_ID_SELINUX || + syscall_lsms[i] == LSM_ID_SMACK || + syscall_lsms[i] == LSM_ID_APPARMOR) { + ASSERT_EQ(1, lsm_get_self_attr(LSM_ATTR_CURRENT, ctx, + &size, LSM_FLAG_SINGLE)); + } else { + ASSERT_EQ(-1, lsm_get_self_attr(LSM_ATTR_CURRENT, ctx, + &size, + LSM_FLAG_SINGLE)); + } + } + + free(ctx); +} + +TEST(flags_overset_lsm_get_self_attr) +{ + const long page_size = sysconf(_SC_PAGESIZE); + struct lsm_ctx *ctx = calloc(page_size, 1); + size_t size; + + ASSERT_NE(NULL, ctx); + + errno = 0; + size = page_size; + ASSERT_EQ(-1, lsm_get_self_attr(LSM_ATTR_CURRENT | LSM_ATTR_PREV, ctx, + &size, 0)); + ASSERT_EQ(EOPNOTSUPP, errno); + + errno = 0; + size = page_size; + ASSERT_EQ(-1, lsm_get_self_attr(LSM_ATTR_CURRENT, ctx, &size, + LSM_FLAG_SINGLE | + (LSM_FLAG_SINGLE << 1))); + ASSERT_EQ(EINVAL, errno); + + free(ctx); +} + +TEST(basic_lsm_get_self_attr) +{ + const long page_size = sysconf(_SC_PAGESIZE); + size_t size = page_size; + struct lsm_ctx *ctx = calloc(page_size, 1); + struct lsm_ctx *tctx = NULL; + __u64 *syscall_lsms = calloc(page_size, 1); + char *attr = calloc(page_size, 1); + int cnt_current = 0; + int cnt_exec = 0; + int cnt_fscreate = 0; + int cnt_keycreate = 0; + int cnt_prev = 0; + int cnt_sockcreate = 0; + int lsmcount; + int count; + int i; + + ASSERT_NE(NULL, ctx); + ASSERT_NE(NULL, syscall_lsms); + + lsmcount = syscall(__NR_lsm_list_modules, syscall_lsms, &size, 0); + ASSERT_LE(1, lsmcount); + + for (i = 0; i < lsmcount; i++) { + switch (syscall_lsms[i]) { + case LSM_ID_SELINUX: + cnt_current++; + cnt_exec++; + cnt_fscreate++; + cnt_keycreate++; + cnt_prev++; + cnt_sockcreate++; + break; + case LSM_ID_SMACK: + cnt_current++; + break; + case LSM_ID_APPARMOR: + cnt_current++; + cnt_exec++; + cnt_prev++; + break; + default: + break; + } + } + + if (cnt_current) { + size = page_size; + count = lsm_get_self_attr(LSM_ATTR_CURRENT, ctx, &size, 0); + ASSERT_EQ(cnt_current, count); + tctx = ctx; + ASSERT_EQ(0, read_proc_attr("current", attr, page_size)); + ASSERT_EQ(0, strcmp((char *)tctx->ctx, attr)); + for (i = 1; i < count; i++) { + tctx = next_ctx(tctx); + ASSERT_NE(0, strcmp((char *)tctx->ctx, attr)); + } + } + if (cnt_exec) { + size = page_size; + count = lsm_get_self_attr(LSM_ATTR_EXEC, ctx, &size, 0); + ASSERT_GE(cnt_exec, count); + if (count > 0) { + tctx = ctx; + if (read_proc_attr("exec", attr, page_size) == 0) + ASSERT_EQ(0, strcmp((char *)tctx->ctx, attr)); + } + for (i = 1; i < count; i++) { + tctx = next_ctx(tctx); + ASSERT_NE(0, strcmp((char *)tctx->ctx, attr)); + } + } + if (cnt_fscreate) { + size = page_size; + count = lsm_get_self_attr(LSM_ATTR_FSCREATE, ctx, &size, 0); + ASSERT_GE(cnt_fscreate, count); + if (count > 0) { + tctx = ctx; + if (read_proc_attr("fscreate", attr, page_size) == 0) + ASSERT_EQ(0, strcmp((char *)tctx->ctx, attr)); + } + for (i = 1; i < count; i++) { + tctx = next_ctx(tctx); + ASSERT_NE(0, strcmp((char *)tctx->ctx, attr)); + } + } + if (cnt_keycreate) { + size = page_size; + count = lsm_get_self_attr(LSM_ATTR_KEYCREATE, ctx, &size, 0); + ASSERT_GE(cnt_keycreate, count); + if (count > 0) { + tctx = ctx; + if (read_proc_attr("keycreate", attr, page_size) == 0) + ASSERT_EQ(0, strcmp((char *)tctx->ctx, attr)); + } + for (i = 1; i < count; i++) { + tctx = next_ctx(tctx); + ASSERT_NE(0, strcmp((char *)tctx->ctx, attr)); + } + } + if (cnt_prev) { + size = page_size; + count = lsm_get_self_attr(LSM_ATTR_PREV, ctx, &size, 0); + ASSERT_GE(cnt_prev, count); + if (count > 0) { + tctx = ctx; + ASSERT_EQ(0, read_proc_attr("prev", attr, page_size)); + ASSERT_EQ(0, strcmp((char *)tctx->ctx, attr)); + for (i = 1; i < count; i++) { + tctx = next_ctx(tctx); + ASSERT_NE(0, strcmp((char *)tctx->ctx, attr)); + } + } + } + if (cnt_sockcreate) { + size = page_size; + count = lsm_get_self_attr(LSM_ATTR_SOCKCREATE, ctx, &size, 0); + ASSERT_GE(cnt_sockcreate, count); + if (count > 0) { + tctx = ctx; + if (read_proc_attr("sockcreate", attr, page_size) == 0) + ASSERT_EQ(0, strcmp((char *)tctx->ctx, attr)); + } + for (i = 1; i < count; i++) { + tctx = next_ctx(tctx); + ASSERT_NE(0, strcmp((char *)tctx->ctx, attr)); + } + } + + free(ctx); + free(attr); + free(syscall_lsms); +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/lsm/lsm_list_modules_test.c b/tools/testing/selftests/lsm/lsm_list_modules_test.c new file mode 100644 index 000000000000..445c02f09c74 --- /dev/null +++ b/tools/testing/selftests/lsm/lsm_list_modules_test.c @@ -0,0 +1,140 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Linux Security Module infrastructure tests + * Tests for the lsm_list_modules system call + * + * Copyright © 2022 Casey Schaufler + */ + +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include "../kselftest_harness.h" +#include "common.h" + +TEST(size_null_lsm_list_modules) +{ + const long page_size = sysconf(_SC_PAGESIZE); + __u64 *syscall_lsms = calloc(page_size, 1); + + ASSERT_NE(NULL, syscall_lsms); + errno = 0; + ASSERT_EQ(-1, lsm_list_modules(syscall_lsms, NULL, 0)); + ASSERT_EQ(EFAULT, errno); + + free(syscall_lsms); +} + +TEST(ids_null_lsm_list_modules) +{ + const long page_size = sysconf(_SC_PAGESIZE); + size_t size = page_size; + + errno = 0; + ASSERT_EQ(-1, lsm_list_modules(NULL, &size, 0)); + ASSERT_EQ(EFAULT, errno); + ASSERT_NE(1, size); +} + +TEST(size_too_small_lsm_list_modules) +{ + const long page_size = sysconf(_SC_PAGESIZE); + __u64 *syscall_lsms = calloc(page_size, 1); + size_t size = 1; + + ASSERT_NE(NULL, syscall_lsms); + errno = 0; + ASSERT_EQ(-1, lsm_list_modules(syscall_lsms, &size, 0)); + ASSERT_EQ(E2BIG, errno); + ASSERT_NE(1, size); + + free(syscall_lsms); +} + +TEST(flags_set_lsm_list_modules) +{ + const long page_size = sysconf(_SC_PAGESIZE); + __u64 *syscall_lsms = calloc(page_size, 1); + size_t size = page_size; + + ASSERT_NE(NULL, syscall_lsms); + errno = 0; + ASSERT_EQ(-1, lsm_list_modules(syscall_lsms, &size, 7)); + ASSERT_EQ(EINVAL, errno); + ASSERT_EQ(page_size, size); + + free(syscall_lsms); +} + +TEST(correct_lsm_list_modules) +{ + const long page_size = sysconf(_SC_PAGESIZE); + size_t size = page_size; + __u64 *syscall_lsms = calloc(page_size, 1); + char *sysfs_lsms = calloc(page_size, 1); + char *name; + char *cp; + int count; + int i; + + ASSERT_NE(NULL, sysfs_lsms); + ASSERT_NE(NULL, syscall_lsms); + ASSERT_EQ(0, read_sysfs_lsms(sysfs_lsms, page_size)); + + count = lsm_list_modules(syscall_lsms, &size, 0); + ASSERT_LE(1, count); + cp = sysfs_lsms; + for (i = 0; i < count; i++) { + switch (syscall_lsms[i]) { + case LSM_ID_CAPABILITY: + name = "capability"; + break; + case LSM_ID_SELINUX: + name = "selinux"; + break; + case LSM_ID_SMACK: + name = "smack"; + break; + case LSM_ID_TOMOYO: + name = "tomoyo"; + break; + case LSM_ID_IMA: + name = "ima"; + break; + case LSM_ID_APPARMOR: + name = "apparmor"; + break; + case LSM_ID_YAMA: + name = "yama"; + break; + case LSM_ID_LOADPIN: + name = "loadpin"; + break; + case LSM_ID_SAFESETID: + name = "safesetid"; + break; + case LSM_ID_LOCKDOWN: + name = "lockdown"; + break; + case LSM_ID_BPF: + name = "bpf"; + break; + case LSM_ID_LANDLOCK: + name = "landlock"; + break; + default: + name = "INVALID"; + break; + } + ASSERT_EQ(0, strncmp(cp, name, strlen(name))); + cp += strlen(name) + 1; + } + + free(sysfs_lsms); + free(syscall_lsms); +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/lsm/lsm_set_self_attr_test.c b/tools/testing/selftests/lsm/lsm_set_self_attr_test.c new file mode 100644 index 000000000000..e9712c6cf596 --- /dev/null +++ b/tools/testing/selftests/lsm/lsm_set_self_attr_test.c @@ -0,0 +1,74 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Linux Security Module infrastructure tests + * Tests for the lsm_set_self_attr system call + * + * Copyright © 2022 Casey Schaufler + */ + +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include "../kselftest_harness.h" +#include "common.h" + +TEST(ctx_null_lsm_set_self_attr) +{ + ASSERT_EQ(-1, lsm_set_self_attr(LSM_ATTR_CURRENT, NULL, + sizeof(struct lsm_ctx), 0)); +} + +TEST(size_too_small_lsm_set_self_attr) +{ + const long page_size = sysconf(_SC_PAGESIZE); + struct lsm_ctx *ctx = calloc(page_size, 1); + size_t size = page_size; + + ASSERT_NE(NULL, ctx); + if (attr_lsm_count()) { + ASSERT_LE(1, lsm_get_self_attr(LSM_ATTR_CURRENT, ctx, &size, + 0)); + } + ASSERT_EQ(-1, lsm_set_self_attr(LSM_ATTR_CURRENT, ctx, 1, 0)); + + free(ctx); +} + +TEST(flags_zero_lsm_set_self_attr) +{ + const long page_size = sysconf(_SC_PAGESIZE); + struct lsm_ctx *ctx = calloc(page_size, 1); + size_t size = page_size; + + ASSERT_NE(NULL, ctx); + if (attr_lsm_count()) { + ASSERT_LE(1, lsm_get_self_attr(LSM_ATTR_CURRENT, ctx, &size, + 0)); + } + ASSERT_EQ(-1, lsm_set_self_attr(LSM_ATTR_CURRENT, ctx, size, 1)); + + free(ctx); +} + +TEST(flags_overset_lsm_set_self_attr) +{ + const long page_size = sysconf(_SC_PAGESIZE); + char *ctx = calloc(page_size, 1); + size_t size = page_size; + struct lsm_ctx *tctx = (struct lsm_ctx *)ctx; + + ASSERT_NE(NULL, ctx); + if (attr_lsm_count()) { + ASSERT_LE(1, lsm_get_self_attr(LSM_ATTR_CURRENT, tctx, &size, + 0)); + } + ASSERT_EQ(-1, lsm_set_self_attr(LSM_ATTR_CURRENT | LSM_ATTR_PREV, tctx, + size, 0)); + + free(ctx); +} + +TEST_HARNESS_MAIN -- cgit v1.2.3 From 89b212d4afef64331b08c44e661a703d2be0970b Mon Sep 17 00:00:00 2001 From: Thomas Weißschuh Date: Mon, 13 Nov 2023 21:16:51 +0100 Subject: selftests/nolibc: don't hang on config input MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When the kernel code has changed the build may ask for configuration input and hang. Prevent this and instead use the default settings. Signed-off-by: Thomas Weißschuh --- tools/testing/selftests/nolibc/Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/nolibc/Makefile b/tools/testing/selftests/nolibc/Makefile index a0fc07253baf..6c7040a75d81 100644 --- a/tools/testing/selftests/nolibc/Makefile +++ b/tools/testing/selftests/nolibc/Makefile @@ -210,10 +210,10 @@ defconfig: $(Q)$(MAKE) -C $(srctree) ARCH=$(ARCH) CC=$(CC) CROSS_COMPILE=$(CROSS_COMPILE) mrproper $(DEFCONFIG) prepare kernel: - $(Q)$(MAKE) -C $(srctree) ARCH=$(ARCH) CC=$(CC) CROSS_COMPILE=$(CROSS_COMPILE) $(IMAGE_NAME) + $(Q)$(MAKE) -C $(srctree) ARCH=$(ARCH) CC=$(CC) CROSS_COMPILE=$(CROSS_COMPILE) $(IMAGE_NAME) < /dev/null kernel-standalone: initramfs - $(Q)$(MAKE) -C $(srctree) ARCH=$(ARCH) CC=$(CC) CROSS_COMPILE=$(CROSS_COMPILE) $(IMAGE_NAME) CONFIG_INITRAMFS_SOURCE=$(CURDIR)/initramfs + $(Q)$(MAKE) -C $(srctree) ARCH=$(ARCH) CC=$(CC) CROSS_COMPILE=$(CROSS_COMPILE) $(IMAGE_NAME) CONFIG_INITRAMFS_SOURCE=$(CURDIR)/initramfs < /dev/null # run the tests after building the kernel run: kernel initramfs.cpio -- cgit v1.2.3 From 727a92d62fd6a382b4c5972008e45667e707b0e4 Mon Sep 17 00:00:00 2001 From: Jordan Rome Date: Sat, 11 Nov 2023 18:30:10 -0800 Subject: selftests/bpf: Add assert for user stacks in test_task_stack This is a follow up to: commit b8e3a87a627b ("bpf: Add crosstask check to __bpf_get_stack"). This test ensures that the task iterator only gets a single user stack (for the current task). Signed-off-by: Jordan Rome Signed-off-by: Andrii Nakryiko Acked-by: Stanislav Fomichev Link: https://lore.kernel.org/bpf/20231112023010.144675-1-linux@jordanrome.com --- tools/testing/selftests/bpf/prog_tests/bpf_iter.c | 2 ++ tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c | 5 +++++ 2 files changed, 7 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c index 4e02093c2cbe..618af9dfae9b 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c @@ -332,6 +332,8 @@ static void test_task_stack(void) do_dummy_read(skel->progs.dump_task_stack); do_dummy_read(skel->progs.get_task_user_stacks); + ASSERT_EQ(skel->bss->num_user_stacks, 1, "num_user_stacks"); + bpf_iter_task_stack__destroy(skel); } diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c b/tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c index f2b8167b72a8..442f4ca39fd7 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c +++ b/tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c @@ -35,6 +35,8 @@ int dump_task_stack(struct bpf_iter__task *ctx) return 0; } +int num_user_stacks = 0; + SEC("iter/task") int get_task_user_stacks(struct bpf_iter__task *ctx) { @@ -51,6 +53,9 @@ int get_task_user_stacks(struct bpf_iter__task *ctx) if (res <= 0) return 0; + /* Only one task, the current one, should succeed */ + ++num_user_stacks; + buf_sz += res; /* If the verifier doesn't refine bpf_get_task_stack res, and instead -- cgit v1.2.3 From 335869c3f2b881bda6eeeb2df342841a1db3310b Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 27 Oct 2023 11:22:06 -0700 Subject: KVM: selftests: Drop unused kvm_userspace_memory_region_find() helper Drop kvm_userspace_memory_region_find(), it's unused and a terrible API (probably why it's unused). If anything outside of kvm_util.c needs to get at the memslot, userspace_mem_region_find() can be exposed to give others full access to all memory region/slot information. Signed-off-by: Sean Christopherson Message-Id: <20231027182217.3615211-25-seanjc@google.com> Reviewed-by: Fuad Tabba Tested-by: Fuad Tabba Signed-off-by: Paolo Bonzini --- .../testing/selftests/kvm/include/kvm_util_base.h | 4 --- tools/testing/selftests/kvm/lib/kvm_util.c | 29 ---------------------- 2 files changed, 33 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/kvm/include/kvm_util_base.h b/tools/testing/selftests/kvm/include/kvm_util_base.h index a18db6a7b3cf..967eaaeacd75 100644 --- a/tools/testing/selftests/kvm/include/kvm_util_base.h +++ b/tools/testing/selftests/kvm/include/kvm_util_base.h @@ -776,10 +776,6 @@ vm_adjust_num_guest_pages(enum vm_guest_mode mode, unsigned int num_guest_pages) return n; } -struct kvm_userspace_memory_region * -kvm_userspace_memory_region_find(struct kvm_vm *vm, uint64_t start, - uint64_t end); - #define sync_global_to_guest(vm, g) ({ \ typeof(g) *_p = addr_gva2hva(vm, (vm_vaddr_t)&(g)); \ memcpy(_p, &(g), sizeof(g)); \ diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index 7a8af1821f5d..f09295d56c23 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -590,35 +590,6 @@ userspace_mem_region_find(struct kvm_vm *vm, uint64_t start, uint64_t end) return NULL; } -/* - * KVM Userspace Memory Region Find - * - * Input Args: - * vm - Virtual Machine - * start - Starting VM physical address - * end - Ending VM physical address, inclusive. - * - * Output Args: None - * - * Return: - * Pointer to overlapping region, NULL if no such region. - * - * Public interface to userspace_mem_region_find. Allows tests to look up - * the memslot datastructure for a given range of guest physical memory. - */ -struct kvm_userspace_memory_region * -kvm_userspace_memory_region_find(struct kvm_vm *vm, uint64_t start, - uint64_t end) -{ - struct userspace_mem_region *region; - - region = userspace_mem_region_find(vm, start, end); - if (!region) - return NULL; - - return ®ion->region; -} - __weak void vcpu_arch_free(struct kvm_vcpu *vcpu) { -- cgit v1.2.3 From 8d99e347c097ab3f9fb93d0f88dddf20051d7c88 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 27 Oct 2023 11:22:07 -0700 Subject: KVM: selftests: Convert lib's mem regions to KVM_SET_USER_MEMORY_REGION2 Use KVM_SET_USER_MEMORY_REGION2 throughout KVM's selftests library so that support for guest private memory can be added without needing an entirely separate set of helpers. Note, this obviously makes selftests backwards-incompatible with older KVM versions from this point forward. Signed-off-by: Sean Christopherson Message-Id: <20231027182217.3615211-26-seanjc@google.com> Reviewed-by: Fuad Tabba Tested-by: Fuad Tabba Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/include/kvm_util_base.h | 2 +- tools/testing/selftests/kvm/lib/kvm_util.c | 19 ++++++++++--------- 2 files changed, 11 insertions(+), 10 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/kvm/include/kvm_util_base.h b/tools/testing/selftests/kvm/include/kvm_util_base.h index 967eaaeacd75..9f144841c2ee 100644 --- a/tools/testing/selftests/kvm/include/kvm_util_base.h +++ b/tools/testing/selftests/kvm/include/kvm_util_base.h @@ -44,7 +44,7 @@ typedef uint64_t vm_paddr_t; /* Virtual Machine (Guest) physical address */ typedef uint64_t vm_vaddr_t; /* Virtual Machine (Guest) virtual address */ struct userspace_mem_region { - struct kvm_userspace_memory_region region; + struct kvm_userspace_memory_region2 region; struct sparsebit *unused_phy_pages; int fd; off_t offset; diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index f09295d56c23..3676b37bea38 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -453,8 +453,9 @@ void kvm_vm_restart(struct kvm_vm *vmp) vm_create_irqchip(vmp); hash_for_each(vmp->regions.slot_hash, ctr, region, slot_node) { - int ret = ioctl(vmp->fd, KVM_SET_USER_MEMORY_REGION, ®ion->region); - TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION IOCTL failed,\n" + int ret = ioctl(vmp->fd, KVM_SET_USER_MEMORY_REGION2, ®ion->region); + + TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION2 IOCTL failed,\n" " rc: %i errno: %i\n" " slot: %u flags: 0x%x\n" " guest_phys_addr: 0x%llx size: 0x%llx", @@ -657,7 +658,7 @@ static void __vm_mem_region_delete(struct kvm_vm *vm, } region->region.memory_size = 0; - vm_ioctl(vm, KVM_SET_USER_MEMORY_REGION, ®ion->region); + vm_ioctl(vm, KVM_SET_USER_MEMORY_REGION2, ®ion->region); sparsebit_free(®ion->unused_phy_pages); ret = munmap(region->mmap_start, region->mmap_size); @@ -1014,8 +1015,8 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm, region->region.guest_phys_addr = guest_paddr; region->region.memory_size = npages * vm->page_size; region->region.userspace_addr = (uintptr_t) region->host_mem; - ret = __vm_ioctl(vm, KVM_SET_USER_MEMORY_REGION, ®ion->region); - TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION IOCTL failed,\n" + ret = __vm_ioctl(vm, KVM_SET_USER_MEMORY_REGION2, ®ion->region); + TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION2 IOCTL failed,\n" " rc: %i errno: %i\n" " slot: %u flags: 0x%x\n" " guest_phys_addr: 0x%lx size: 0x%lx", @@ -1097,9 +1098,9 @@ void vm_mem_region_set_flags(struct kvm_vm *vm, uint32_t slot, uint32_t flags) region->region.flags = flags; - ret = __vm_ioctl(vm, KVM_SET_USER_MEMORY_REGION, ®ion->region); + ret = __vm_ioctl(vm, KVM_SET_USER_MEMORY_REGION2, ®ion->region); - TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION IOCTL failed,\n" + TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION2 IOCTL failed,\n" " rc: %i errno: %i slot: %u flags: 0x%x", ret, errno, slot, flags); } @@ -1127,9 +1128,9 @@ void vm_mem_region_move(struct kvm_vm *vm, uint32_t slot, uint64_t new_gpa) region->region.guest_phys_addr = new_gpa; - ret = __vm_ioctl(vm, KVM_SET_USER_MEMORY_REGION, ®ion->region); + ret = __vm_ioctl(vm, KVM_SET_USER_MEMORY_REGION2, ®ion->region); - TEST_ASSERT(!ret, "KVM_SET_USER_MEMORY_REGION failed\n" + TEST_ASSERT(!ret, "KVM_SET_USER_MEMORY_REGION2 failed\n" "ret: %i errno: %i slot: %u new_gpa: 0x%lx", ret, errno, slot, new_gpa); } -- cgit v1.2.3 From bb2968ad6c33c0902dce48ea57d58c5bb4f3c617 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 27 Oct 2023 11:22:08 -0700 Subject: KVM: selftests: Add support for creating private memslots Add support for creating "private" memslots via KVM_CREATE_GUEST_MEMFD and KVM_SET_USER_MEMORY_REGION2. Make vm_userspace_mem_region_add() a wrapper to its effective replacement, vm_mem_add(), so that private memslots are fully opt-in, i.e. don't require update all tests that add memory regions. Pivot on the KVM_MEM_PRIVATE flag instead of the validity of the "gmem" file descriptor so that simple tests can let vm_mem_add() do the heavy lifting of creating the guest memfd, but also allow the caller to pass in an explicit fd+offset so that fancier tests can do things like back multiple memslots with a single file. If the caller passes in a fd, dup() the fd so that (a) __vm_mem_region_delete() can close the fd associated with the memory region without needing yet another flag, and (b) so that the caller can safely close its copy of the fd without having to first destroy memslots. Co-developed-by: Ackerley Tng Signed-off-by: Ackerley Tng Signed-off-by: Sean Christopherson Message-Id: <20231027182217.3615211-27-seanjc@google.com> Reviewed-by: Fuad Tabba Tested-by: Fuad Tabba Signed-off-by: Paolo Bonzini --- .../testing/selftests/kvm/include/kvm_util_base.h | 23 +++++++ tools/testing/selftests/kvm/include/test_util.h | 5 ++ tools/testing/selftests/kvm/lib/kvm_util.c | 76 +++++++++++++--------- 3 files changed, 73 insertions(+), 31 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/kvm/include/kvm_util_base.h b/tools/testing/selftests/kvm/include/kvm_util_base.h index 9f144841c2ee..9f861182c02a 100644 --- a/tools/testing/selftests/kvm/include/kvm_util_base.h +++ b/tools/testing/selftests/kvm/include/kvm_util_base.h @@ -431,6 +431,26 @@ static inline uint64_t vm_get_stat(struct kvm_vm *vm, const char *stat_name) void vm_create_irqchip(struct kvm_vm *vm); +static inline int __vm_create_guest_memfd(struct kvm_vm *vm, uint64_t size, + uint64_t flags) +{ + struct kvm_create_guest_memfd guest_memfd = { + .size = size, + .flags = flags, + }; + + return __vm_ioctl(vm, KVM_CREATE_GUEST_MEMFD, &guest_memfd); +} + +static inline int vm_create_guest_memfd(struct kvm_vm *vm, uint64_t size, + uint64_t flags) +{ + int fd = __vm_create_guest_memfd(vm, size, flags); + + TEST_ASSERT(fd >= 0, KVM_IOCTL_ERROR(KVM_CREATE_GUEST_MEMFD, fd)); + return fd; +} + void vm_set_user_memory_region(struct kvm_vm *vm, uint32_t slot, uint32_t flags, uint64_t gpa, uint64_t size, void *hva); int __vm_set_user_memory_region(struct kvm_vm *vm, uint32_t slot, uint32_t flags, @@ -439,6 +459,9 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type, uint64_t guest_paddr, uint32_t slot, uint64_t npages, uint32_t flags); +void vm_mem_add(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type, + uint64_t guest_paddr, uint32_t slot, uint64_t npages, + uint32_t flags, int guest_memfd_fd, uint64_t guest_memfd_offset); void vm_mem_region_set_flags(struct kvm_vm *vm, uint32_t slot, uint32_t flags); void vm_mem_region_move(struct kvm_vm *vm, uint32_t slot, uint64_t new_gpa); diff --git a/tools/testing/selftests/kvm/include/test_util.h b/tools/testing/selftests/kvm/include/test_util.h index 7e614adc6cf4..7257f2243ab9 100644 --- a/tools/testing/selftests/kvm/include/test_util.h +++ b/tools/testing/selftests/kvm/include/test_util.h @@ -142,6 +142,11 @@ static inline bool backing_src_is_shared(enum vm_mem_backing_src_type t) return vm_mem_backing_src_alias(t)->flag & MAP_SHARED; } +static inline bool backing_src_can_be_huge(enum vm_mem_backing_src_type t) +{ + return t != VM_MEM_SRC_ANONYMOUS && t != VM_MEM_SRC_SHMEM; +} + /* Aligns x up to the next multiple of size. Size must be a power of 2. */ static inline uint64_t align_up(uint64_t x, uint64_t size) { diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index 3676b37bea38..b63500fca627 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -669,6 +669,8 @@ static void __vm_mem_region_delete(struct kvm_vm *vm, TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("munmap()", ret)); close(region->fd); } + if (region->region.guest_memfd >= 0) + close(region->region.guest_memfd); free(region); } @@ -870,36 +872,15 @@ void vm_set_user_memory_region(struct kvm_vm *vm, uint32_t slot, uint32_t flags, errno, strerror(errno)); } -/* - * VM Userspace Memory Region Add - * - * Input Args: - * vm - Virtual Machine - * src_type - Storage source for this region. - * NULL to use anonymous memory. - * guest_paddr - Starting guest physical address - * slot - KVM region slot - * npages - Number of physical pages - * flags - KVM memory region flags (e.g. KVM_MEM_LOG_DIRTY_PAGES) - * - * Output Args: None - * - * Return: None - * - * Allocates a memory area of the number of pages specified by npages - * and maps it to the VM specified by vm, at a starting physical address - * given by guest_paddr. The region is created with a KVM region slot - * given by slot, which must be unique and < KVM_MEM_SLOTS_NUM. The - * region is created with the flags given by flags. - */ -void vm_userspace_mem_region_add(struct kvm_vm *vm, - enum vm_mem_backing_src_type src_type, - uint64_t guest_paddr, uint32_t slot, uint64_t npages, - uint32_t flags) +/* FIXME: This thing needs to be ripped apart and rewritten. */ +void vm_mem_add(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type, + uint64_t guest_paddr, uint32_t slot, uint64_t npages, + uint32_t flags, int guest_memfd, uint64_t guest_memfd_offset) { int ret; struct userspace_mem_region *region; size_t backing_src_pagesz = get_backing_src_pagesz(src_type); + size_t mem_size = npages * vm->page_size; size_t alignment; TEST_ASSERT(vm_adjust_num_guest_pages(vm->mode, npages) == npages, @@ -952,7 +933,7 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm, /* Allocate and initialize new mem region structure. */ region = calloc(1, sizeof(*region)); TEST_ASSERT(region != NULL, "Insufficient Memory"); - region->mmap_size = npages * vm->page_size; + region->mmap_size = mem_size; #ifdef __s390x__ /* On s390x, the host address must be aligned to 1M (due to PGSTEs) */ @@ -999,14 +980,38 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm, /* As needed perform madvise */ if ((src_type == VM_MEM_SRC_ANONYMOUS || src_type == VM_MEM_SRC_ANONYMOUS_THP) && thp_configured()) { - ret = madvise(region->host_mem, npages * vm->page_size, + ret = madvise(region->host_mem, mem_size, src_type == VM_MEM_SRC_ANONYMOUS ? MADV_NOHUGEPAGE : MADV_HUGEPAGE); TEST_ASSERT(ret == 0, "madvise failed, addr: %p length: 0x%lx src_type: %s", - region->host_mem, npages * vm->page_size, + region->host_mem, mem_size, vm_mem_backing_src_alias(src_type)->name); } region->backing_src_type = src_type; + + if (flags & KVM_MEM_GUEST_MEMFD) { + if (guest_memfd < 0) { + uint32_t guest_memfd_flags = 0; + TEST_ASSERT(!guest_memfd_offset, + "Offset must be zero when creating new guest_memfd"); + guest_memfd = vm_create_guest_memfd(vm, mem_size, guest_memfd_flags); + } else { + /* + * Install a unique fd for each memslot so that the fd + * can be closed when the region is deleted without + * needing to track if the fd is owned by the framework + * or by the caller. + */ + guest_memfd = dup(guest_memfd); + TEST_ASSERT(guest_memfd >= 0, __KVM_SYSCALL_ERROR("dup()", guest_memfd)); + } + + region->region.guest_memfd = guest_memfd; + region->region.guest_memfd_offset = guest_memfd_offset; + } else { + region->region.guest_memfd = -1; + } + region->unused_phy_pages = sparsebit_alloc(); sparsebit_set_num(region->unused_phy_pages, guest_paddr >> vm->page_shift, npages); @@ -1019,9 +1024,10 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm, TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION2 IOCTL failed,\n" " rc: %i errno: %i\n" " slot: %u flags: 0x%x\n" - " guest_phys_addr: 0x%lx size: 0x%lx", + " guest_phys_addr: 0x%lx size: 0x%lx guest_memfd: %d\n", ret, errno, slot, flags, - guest_paddr, (uint64_t) region->region.memory_size); + guest_paddr, (uint64_t) region->region.memory_size, + region->region.guest_memfd); /* Add to quick lookup data structures */ vm_userspace_mem_region_gpa_insert(&vm->regions.gpa_tree, region); @@ -1042,6 +1048,14 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm, } } +void vm_userspace_mem_region_add(struct kvm_vm *vm, + enum vm_mem_backing_src_type src_type, + uint64_t guest_paddr, uint32_t slot, + uint64_t npages, uint32_t flags) +{ + vm_mem_add(vm, src_type, guest_paddr, slot, npages, flags, -1, 0); +} + /* * Memslot to region * -- cgit v1.2.3 From f7fa67495d118f734f98b406fd46888616b4a3c3 Mon Sep 17 00:00:00 2001 From: Vishal Annapurve Date: Fri, 27 Oct 2023 11:22:09 -0700 Subject: KVM: selftests: Add helpers to convert guest memory b/w private and shared Add helpers to convert memory between private and shared via KVM's memory attributes, as well as helpers to free/allocate guest_memfd memory via fallocate(). Userspace, i.e. tests, is NOT required to do fallocate() when converting memory, as the attributes are the single source of truth. Provide allocate() helpers so that tests can mimic a userspace that frees private memory on conversion, e.g. to prioritize memory usage over performance. Signed-off-by: Vishal Annapurve Co-developed-by: Sean Christopherson Signed-off-by: Sean Christopherson Message-Id: <20231027182217.3615211-28-seanjc@google.com> Reviewed-by: Fuad Tabba Tested-by: Fuad Tabba --- .../testing/selftests/kvm/include/kvm_util_base.h | 48 ++++++++++++++++++++++ tools/testing/selftests/kvm/lib/kvm_util.c | 28 +++++++++++++ 2 files changed, 76 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/kvm/include/kvm_util_base.h b/tools/testing/selftests/kvm/include/kvm_util_base.h index 9f861182c02a..1441fca6c273 100644 --- a/tools/testing/selftests/kvm/include/kvm_util_base.h +++ b/tools/testing/selftests/kvm/include/kvm_util_base.h @@ -333,6 +333,54 @@ static inline void vm_enable_cap(struct kvm_vm *vm, uint32_t cap, uint64_t arg0) vm_ioctl(vm, KVM_ENABLE_CAP, &enable_cap); } +static inline void vm_set_memory_attributes(struct kvm_vm *vm, uint64_t gpa, + uint64_t size, uint64_t attributes) +{ + struct kvm_memory_attributes attr = { + .attributes = attributes, + .address = gpa, + .size = size, + .flags = 0, + }; + + /* + * KVM_SET_MEMORY_ATTRIBUTES overwrites _all_ attributes. These flows + * need significant enhancements to support multiple attributes. + */ + TEST_ASSERT(!attributes || attributes == KVM_MEMORY_ATTRIBUTE_PRIVATE, + "Update me to support multiple attributes!"); + + vm_ioctl(vm, KVM_SET_MEMORY_ATTRIBUTES, &attr); +} + + +static inline void vm_mem_set_private(struct kvm_vm *vm, uint64_t gpa, + uint64_t size) +{ + vm_set_memory_attributes(vm, gpa, size, KVM_MEMORY_ATTRIBUTE_PRIVATE); +} + +static inline void vm_mem_set_shared(struct kvm_vm *vm, uint64_t gpa, + uint64_t size) +{ + vm_set_memory_attributes(vm, gpa, size, 0); +} + +void vm_guest_mem_fallocate(struct kvm_vm *vm, uint64_t gpa, uint64_t size, + bool punch_hole); + +static inline void vm_guest_mem_punch_hole(struct kvm_vm *vm, uint64_t gpa, + uint64_t size) +{ + vm_guest_mem_fallocate(vm, gpa, size, true); +} + +static inline void vm_guest_mem_allocate(struct kvm_vm *vm, uint64_t gpa, + uint64_t size) +{ + vm_guest_mem_fallocate(vm, gpa, size, false); +} + void vm_enable_dirty_ring(struct kvm_vm *vm, uint32_t ring_size); const char *vm_guest_mode_string(uint32_t i); diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index b63500fca627..cc6007abc4b1 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -1167,6 +1167,34 @@ void vm_mem_region_delete(struct kvm_vm *vm, uint32_t slot) __vm_mem_region_delete(vm, memslot2region(vm, slot), true); } +void vm_guest_mem_fallocate(struct kvm_vm *vm, uint64_t base, uint64_t size, + bool punch_hole) +{ + const int mode = FALLOC_FL_KEEP_SIZE | (punch_hole ? FALLOC_FL_PUNCH_HOLE : 0); + struct userspace_mem_region *region; + uint64_t end = base + size; + uint64_t gpa, len; + off_t fd_offset; + int ret; + + for (gpa = base; gpa < end; gpa += len) { + uint64_t offset; + + region = userspace_mem_region_find(vm, gpa, gpa); + TEST_ASSERT(region && region->region.flags & KVM_MEM_GUEST_MEMFD, + "Private memory region not found for GPA 0x%lx", gpa); + + offset = gpa - region->region.guest_phys_addr; + fd_offset = region->region.guest_memfd_offset + offset; + len = min_t(uint64_t, end - gpa, region->region.memory_size - offset); + + ret = fallocate(region->region.guest_memfd, mode, fd_offset, len); + TEST_ASSERT(!ret, "fallocate() failed to %s at %lx (len = %lu), fd = %d, mode = %x, offset = %lx\n", + punch_hole ? "punch hole" : "allocate", gpa, len, + region->region.guest_memfd, mode, fd_offset); + } +} + /* Returns the size of a vCPU's kvm_run structure. */ static int vcpu_mmap_sz(void) { -- cgit v1.2.3 From 01244fce2fa22176e46609c051f6fe15cf81e188 Mon Sep 17 00:00:00 2001 From: Vishal Annapurve Date: Fri, 27 Oct 2023 11:22:10 -0700 Subject: KVM: selftests: Add helpers to do KVM_HC_MAP_GPA_RANGE hypercalls (x86) Add helpers for x86 guests to invoke the KVM_HC_MAP_GPA_RANGE hypercall, which KVM will forward to userspace and thus can be used by tests to coordinate private<=>shared conversions between host userspace code and guest code. Signed-off-by: Vishal Annapurve [sean: drop shared/private helpers (let tests specify flags)] Signed-off-by: Sean Christopherson Message-Id: <20231027182217.3615211-29-seanjc@google.com> Reviewed-by: Fuad Tabba Tested-by: Fuad Tabba Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/include/x86_64/processor.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h index 25bc61dac5fb..a84863503fcb 100644 --- a/tools/testing/selftests/kvm/include/x86_64/processor.h +++ b/tools/testing/selftests/kvm/include/x86_64/processor.h @@ -15,6 +15,7 @@ #include #include +#include #include #include "../kvm_util.h" @@ -1194,6 +1195,20 @@ uint64_t kvm_hypercall(uint64_t nr, uint64_t a0, uint64_t a1, uint64_t a2, uint64_t __xen_hypercall(uint64_t nr, uint64_t a0, void *a1); void xen_hypercall(uint64_t nr, uint64_t a0, void *a1); +static inline uint64_t __kvm_hypercall_map_gpa_range(uint64_t gpa, + uint64_t size, uint64_t flags) +{ + return kvm_hypercall(KVM_HC_MAP_GPA_RANGE, gpa, size >> PAGE_SHIFT, flags, 0); +} + +static inline void kvm_hypercall_map_gpa_range(uint64_t gpa, uint64_t size, + uint64_t flags) +{ + uint64_t ret = __kvm_hypercall_map_gpa_range(gpa, size, flags); + + GUEST_ASSERT(!ret); +} + void __vm_xsave_require_permission(uint64_t xfeature, const char *name); #define vm_xsave_require_permission(xfeature) \ -- cgit v1.2.3 From 672eaa351015d8165c41fff644ee7d2b369cea12 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 27 Oct 2023 11:22:11 -0700 Subject: KVM: selftests: Introduce VM "shape" to allow tests to specify the VM type Add a "vm_shape" structure to encapsulate the selftests-defined "mode", along with the KVM-defined "type" for use when creating a new VM. "mode" tracks physical and virtual address properties, as well as the preferred backing memory type, while "type" corresponds to the VM type. Taking the VM type will allow adding tests for KVM_CREATE_GUEST_MEMFD without needing an entirely separate set of helpers. At this time, guest_memfd is effectively usable only by confidential VM types in the form of guest private memory, and it's expected that x86 will double down and require unique VM types for TDX and SNP guests. Signed-off-by: Sean Christopherson Message-Id: <20231027182217.3615211-30-seanjc@google.com> Signed-off-by: Paolo Bonzini --- .../selftests/kvm/aarch64/page_fault_test.c | 2 +- tools/testing/selftests/kvm/dirty_log_test.c | 2 +- .../testing/selftests/kvm/include/kvm_util_base.h | 54 ++++++++++++++++++---- tools/testing/selftests/kvm/kvm_page_table_test.c | 2 +- tools/testing/selftests/kvm/lib/kvm_util.c | 43 ++++++++--------- tools/testing/selftests/kvm/lib/memstress.c | 3 +- .../selftests/kvm/x86_64/ucna_injection_test.c | 2 +- 7 files changed, 73 insertions(+), 35 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/kvm/aarch64/page_fault_test.c b/tools/testing/selftests/kvm/aarch64/page_fault_test.c index eb4217b7c768..08a5ca5bed56 100644 --- a/tools/testing/selftests/kvm/aarch64/page_fault_test.c +++ b/tools/testing/selftests/kvm/aarch64/page_fault_test.c @@ -705,7 +705,7 @@ static void run_test(enum vm_guest_mode mode, void *arg) print_test_banner(mode, p); - vm = ____vm_create(mode); + vm = ____vm_create(VM_SHAPE(mode)); setup_memslots(vm, p); kvm_vm_elf_load(vm, program_invocation_name); setup_ucall(vm); diff --git a/tools/testing/selftests/kvm/dirty_log_test.c b/tools/testing/selftests/kvm/dirty_log_test.c index 936f3a8d1b83..6cbecf499767 100644 --- a/tools/testing/selftests/kvm/dirty_log_test.c +++ b/tools/testing/selftests/kvm/dirty_log_test.c @@ -699,7 +699,7 @@ static struct kvm_vm *create_vm(enum vm_guest_mode mode, struct kvm_vcpu **vcpu, pr_info("Testing guest mode: %s\n", vm_guest_mode_string(mode)); - vm = __vm_create(mode, 1, extra_mem_pages); + vm = __vm_create(VM_SHAPE(mode), 1, extra_mem_pages); log_mode_create_vm_done(vm); *vcpu = vm_vcpu_add(vm, 0, guest_code); diff --git a/tools/testing/selftests/kvm/include/kvm_util_base.h b/tools/testing/selftests/kvm/include/kvm_util_base.h index 1441fca6c273..157508c071f3 100644 --- a/tools/testing/selftests/kvm/include/kvm_util_base.h +++ b/tools/testing/selftests/kvm/include/kvm_util_base.h @@ -188,6 +188,23 @@ enum vm_guest_mode { NUM_VM_MODES, }; +struct vm_shape { + enum vm_guest_mode mode; + unsigned int type; +}; + +#define VM_TYPE_DEFAULT 0 + +#define VM_SHAPE(__mode) \ +({ \ + struct vm_shape shape = { \ + .mode = (__mode), \ + .type = VM_TYPE_DEFAULT \ + }; \ + \ + shape; \ +}) + #if defined(__aarch64__) extern enum vm_guest_mode vm_mode_default; @@ -220,6 +237,8 @@ extern enum vm_guest_mode vm_mode_default; #endif +#define VM_SHAPE_DEFAULT VM_SHAPE(VM_MODE_DEFAULT) + #define MIN_PAGE_SIZE (1U << MIN_PAGE_SHIFT) #define PTES_PER_MIN_PAGE ptes_per_page(MIN_PAGE_SIZE) @@ -784,21 +803,21 @@ vm_paddr_t vm_alloc_page_table(struct kvm_vm *vm); * __vm_create() does NOT create vCPUs, @nr_runnable_vcpus is used purely to * calculate the amount of memory needed for per-vCPU data, e.g. stacks. */ -struct kvm_vm *____vm_create(enum vm_guest_mode mode); -struct kvm_vm *__vm_create(enum vm_guest_mode mode, uint32_t nr_runnable_vcpus, +struct kvm_vm *____vm_create(struct vm_shape shape); +struct kvm_vm *__vm_create(struct vm_shape shape, uint32_t nr_runnable_vcpus, uint64_t nr_extra_pages); static inline struct kvm_vm *vm_create_barebones(void) { - return ____vm_create(VM_MODE_DEFAULT); + return ____vm_create(VM_SHAPE_DEFAULT); } static inline struct kvm_vm *vm_create(uint32_t nr_runnable_vcpus) { - return __vm_create(VM_MODE_DEFAULT, nr_runnable_vcpus, 0); + return __vm_create(VM_SHAPE_DEFAULT, nr_runnable_vcpus, 0); } -struct kvm_vm *__vm_create_with_vcpus(enum vm_guest_mode mode, uint32_t nr_vcpus, +struct kvm_vm *__vm_create_with_vcpus(struct vm_shape shape, uint32_t nr_vcpus, uint64_t extra_mem_pages, void *guest_code, struct kvm_vcpu *vcpus[]); @@ -806,17 +825,27 @@ static inline struct kvm_vm *vm_create_with_vcpus(uint32_t nr_vcpus, void *guest_code, struct kvm_vcpu *vcpus[]) { - return __vm_create_with_vcpus(VM_MODE_DEFAULT, nr_vcpus, 0, + return __vm_create_with_vcpus(VM_SHAPE_DEFAULT, nr_vcpus, 0, guest_code, vcpus); } + +struct kvm_vm *__vm_create_shape_with_one_vcpu(struct vm_shape shape, + struct kvm_vcpu **vcpu, + uint64_t extra_mem_pages, + void *guest_code); + /* * Create a VM with a single vCPU with reasonable defaults and @extra_mem_pages * additional pages of guest memory. Returns the VM and vCPU (via out param). */ -struct kvm_vm *__vm_create_with_one_vcpu(struct kvm_vcpu **vcpu, - uint64_t extra_mem_pages, - void *guest_code); +static inline struct kvm_vm *__vm_create_with_one_vcpu(struct kvm_vcpu **vcpu, + uint64_t extra_mem_pages, + void *guest_code) +{ + return __vm_create_shape_with_one_vcpu(VM_SHAPE_DEFAULT, vcpu, + extra_mem_pages, guest_code); +} static inline struct kvm_vm *vm_create_with_one_vcpu(struct kvm_vcpu **vcpu, void *guest_code) @@ -824,6 +853,13 @@ static inline struct kvm_vm *vm_create_with_one_vcpu(struct kvm_vcpu **vcpu, return __vm_create_with_one_vcpu(vcpu, 0, guest_code); } +static inline struct kvm_vm *vm_create_shape_with_one_vcpu(struct vm_shape shape, + struct kvm_vcpu **vcpu, + void *guest_code) +{ + return __vm_create_shape_with_one_vcpu(shape, vcpu, 0, guest_code); +} + struct kvm_vcpu *vm_recreate_with_one_vcpu(struct kvm_vm *vm); void kvm_pin_this_task_to_pcpu(uint32_t pcpu); diff --git a/tools/testing/selftests/kvm/kvm_page_table_test.c b/tools/testing/selftests/kvm/kvm_page_table_test.c index 69f26d80c821..e37dc9c21888 100644 --- a/tools/testing/selftests/kvm/kvm_page_table_test.c +++ b/tools/testing/selftests/kvm/kvm_page_table_test.c @@ -254,7 +254,7 @@ static struct kvm_vm *pre_init_before_test(enum vm_guest_mode mode, void *arg) /* Create a VM with enough guest pages */ guest_num_pages = test_mem_size / guest_page_size; - vm = __vm_create_with_vcpus(mode, nr_vcpus, guest_num_pages, + vm = __vm_create_with_vcpus(VM_SHAPE(mode), nr_vcpus, guest_num_pages, guest_code, test_args.vcpus); /* Align down GPA of the testing memslot */ diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index cc6007abc4b1..bf15635eda11 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -209,7 +209,7 @@ __weak void vm_vaddr_populate_bitmap(struct kvm_vm *vm) (1ULL << (vm->va_bits - 1)) >> vm->page_shift); } -struct kvm_vm *____vm_create(enum vm_guest_mode mode) +struct kvm_vm *____vm_create(struct vm_shape shape) { struct kvm_vm *vm; @@ -221,13 +221,13 @@ struct kvm_vm *____vm_create(enum vm_guest_mode mode) vm->regions.hva_tree = RB_ROOT; hash_init(vm->regions.slot_hash); - vm->mode = mode; - vm->type = 0; + vm->mode = shape.mode; + vm->type = shape.type; - vm->pa_bits = vm_guest_mode_params[mode].pa_bits; - vm->va_bits = vm_guest_mode_params[mode].va_bits; - vm->page_size = vm_guest_mode_params[mode].page_size; - vm->page_shift = vm_guest_mode_params[mode].page_shift; + vm->pa_bits = vm_guest_mode_params[vm->mode].pa_bits; + vm->va_bits = vm_guest_mode_params[vm->mode].va_bits; + vm->page_size = vm_guest_mode_params[vm->mode].page_size; + vm->page_shift = vm_guest_mode_params[vm->mode].page_shift; /* Setup mode specific traits. */ switch (vm->mode) { @@ -265,7 +265,7 @@ struct kvm_vm *____vm_create(enum vm_guest_mode mode) /* * Ignore KVM support for 5-level paging (vm->va_bits == 57), * it doesn't take effect unless a CR4.LA57 is set, which it - * isn't for this VM_MODE. + * isn't for this mode (48-bit virtual address space). */ TEST_ASSERT(vm->va_bits == 48 || vm->va_bits == 57, "Linear address width (%d bits) not supported", @@ -285,10 +285,11 @@ struct kvm_vm *____vm_create(enum vm_guest_mode mode) vm->pgtable_levels = 5; break; default: - TEST_FAIL("Unknown guest mode, mode: 0x%x", mode); + TEST_FAIL("Unknown guest mode: 0x%x", vm->mode); } #ifdef __aarch64__ + TEST_ASSERT(!vm->type, "ARM doesn't support test-provided types"); if (vm->pa_bits != 40) vm->type = KVM_VM_TYPE_ARM_IPA_SIZE(vm->pa_bits); #endif @@ -347,19 +348,19 @@ static uint64_t vm_nr_pages_required(enum vm_guest_mode mode, return vm_adjust_num_guest_pages(mode, nr_pages); } -struct kvm_vm *__vm_create(enum vm_guest_mode mode, uint32_t nr_runnable_vcpus, +struct kvm_vm *__vm_create(struct vm_shape shape, uint32_t nr_runnable_vcpus, uint64_t nr_extra_pages) { - uint64_t nr_pages = vm_nr_pages_required(mode, nr_runnable_vcpus, + uint64_t nr_pages = vm_nr_pages_required(shape.mode, nr_runnable_vcpus, nr_extra_pages); struct userspace_mem_region *slot0; struct kvm_vm *vm; int i; - pr_debug("%s: mode='%s' pages='%ld'\n", __func__, - vm_guest_mode_string(mode), nr_pages); + pr_debug("%s: mode='%s' type='%d', pages='%ld'\n", __func__, + vm_guest_mode_string(shape.mode), shape.type, nr_pages); - vm = ____vm_create(mode); + vm = ____vm_create(shape); vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, 0, 0, nr_pages, 0); for (i = 0; i < NR_MEM_REGIONS; i++) @@ -400,7 +401,7 @@ struct kvm_vm *__vm_create(enum vm_guest_mode mode, uint32_t nr_runnable_vcpus, * extra_mem_pages is only used to calculate the maximum page table size, * no real memory allocation for non-slot0 memory in this function. */ -struct kvm_vm *__vm_create_with_vcpus(enum vm_guest_mode mode, uint32_t nr_vcpus, +struct kvm_vm *__vm_create_with_vcpus(struct vm_shape shape, uint32_t nr_vcpus, uint64_t extra_mem_pages, void *guest_code, struct kvm_vcpu *vcpus[]) { @@ -409,7 +410,7 @@ struct kvm_vm *__vm_create_with_vcpus(enum vm_guest_mode mode, uint32_t nr_vcpus TEST_ASSERT(!nr_vcpus || vcpus, "Must provide vCPU array"); - vm = __vm_create(mode, nr_vcpus, extra_mem_pages); + vm = __vm_create(shape, nr_vcpus, extra_mem_pages); for (i = 0; i < nr_vcpus; ++i) vcpus[i] = vm_vcpu_add(vm, i, guest_code); @@ -417,15 +418,15 @@ struct kvm_vm *__vm_create_with_vcpus(enum vm_guest_mode mode, uint32_t nr_vcpus return vm; } -struct kvm_vm *__vm_create_with_one_vcpu(struct kvm_vcpu **vcpu, - uint64_t extra_mem_pages, - void *guest_code) +struct kvm_vm *__vm_create_shape_with_one_vcpu(struct vm_shape shape, + struct kvm_vcpu **vcpu, + uint64_t extra_mem_pages, + void *guest_code) { struct kvm_vcpu *vcpus[1]; struct kvm_vm *vm; - vm = __vm_create_with_vcpus(VM_MODE_DEFAULT, 1, extra_mem_pages, - guest_code, vcpus); + vm = __vm_create_with_vcpus(shape, 1, extra_mem_pages, guest_code, vcpus); *vcpu = vcpus[0]; return vm; diff --git a/tools/testing/selftests/kvm/lib/memstress.c b/tools/testing/selftests/kvm/lib/memstress.c index df457452d146..d05487e5a371 100644 --- a/tools/testing/selftests/kvm/lib/memstress.c +++ b/tools/testing/selftests/kvm/lib/memstress.c @@ -168,7 +168,8 @@ struct kvm_vm *memstress_create_vm(enum vm_guest_mode mode, int nr_vcpus, * The memory is also added to memslot 0, but that's a benign side * effect as KVM allows aliasing HVAs in meslots. */ - vm = __vm_create_with_vcpus(mode, nr_vcpus, slot0_pages + guest_num_pages, + vm = __vm_create_with_vcpus(VM_SHAPE(mode), nr_vcpus, + slot0_pages + guest_num_pages, memstress_guest_code, vcpus); args->vm = vm; diff --git a/tools/testing/selftests/kvm/x86_64/ucna_injection_test.c b/tools/testing/selftests/kvm/x86_64/ucna_injection_test.c index 85f34ca7e49e..0ed32ec903d0 100644 --- a/tools/testing/selftests/kvm/x86_64/ucna_injection_test.c +++ b/tools/testing/selftests/kvm/x86_64/ucna_injection_test.c @@ -271,7 +271,7 @@ int main(int argc, char *argv[]) kvm_check_cap(KVM_CAP_MCE); - vm = __vm_create(VM_MODE_DEFAULT, 3, 0); + vm = __vm_create(VM_SHAPE_DEFAULT, 3, 0); kvm_ioctl(vm->kvm_fd, KVM_X86_GET_MCE_CAP_SUPPORTED, &supported_mcg_caps); -- cgit v1.2.3 From 242331dfc4959f44e706c9b09b768f312aa5e117 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 27 Oct 2023 11:22:12 -0700 Subject: KVM: selftests: Add GUEST_SYNC[1-6] macros for synchronizing more data Add GUEST_SYNC[1-6]() so that tests can pass the maximum amount of information supported via ucall(), without needing to resort to shared memory. Signed-off-by: Sean Christopherson Message-Id: <20231027182217.3615211-31-seanjc@google.com> Reviewed-by: Fuad Tabba Tested-by: Fuad Tabba Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/include/ucall_common.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/kvm/include/ucall_common.h b/tools/testing/selftests/kvm/include/ucall_common.h index ce33d306c2cb..0fb472a5a058 100644 --- a/tools/testing/selftests/kvm/include/ucall_common.h +++ b/tools/testing/selftests/kvm/include/ucall_common.h @@ -52,6 +52,17 @@ int ucall_nr_pages_required(uint64_t page_size); #define GUEST_SYNC_ARGS(stage, arg1, arg2, arg3, arg4) \ ucall(UCALL_SYNC, 6, "hello", stage, arg1, arg2, arg3, arg4) #define GUEST_SYNC(stage) ucall(UCALL_SYNC, 2, "hello", stage) +#define GUEST_SYNC1(arg0) ucall(UCALL_SYNC, 1, arg0) +#define GUEST_SYNC2(arg0, arg1) ucall(UCALL_SYNC, 2, arg0, arg1) +#define GUEST_SYNC3(arg0, arg1, arg2) \ + ucall(UCALL_SYNC, 3, arg0, arg1, arg2) +#define GUEST_SYNC4(arg0, arg1, arg2, arg3) \ + ucall(UCALL_SYNC, 4, arg0, arg1, arg2, arg3) +#define GUEST_SYNC5(arg0, arg1, arg2, arg3, arg4) \ + ucall(UCALL_SYNC, 5, arg0, arg1, arg2, arg3, arg4) +#define GUEST_SYNC6(arg0, arg1, arg2, arg3, arg4, arg5) \ + ucall(UCALL_SYNC, 6, arg0, arg1, arg2, arg3, arg4, arg5) + #define GUEST_PRINTF(_fmt, _args...) ucall_fmt(UCALL_PRINTF, _fmt, ##_args) #define GUEST_DONE() ucall(UCALL_DONE, 0) -- cgit v1.2.3 From 43f623f350ce1c46c53b6b77f4dbe741af8c44f3 Mon Sep 17 00:00:00 2001 From: Vishal Annapurve Date: Fri, 27 Oct 2023 11:22:13 -0700 Subject: KVM: selftests: Add x86-only selftest for private memory conversions Add a selftest to exercise implicit/explicit conversion functionality within KVM and verify: - Shared memory is visible to host userspace - Private memory is not visible to host userspace - Host userspace and guest can communicate over shared memory - Data in shared backing is preserved across conversions (test's host userspace doesn't free the data) - Private memory is bound to the lifetime of the VM Ideally, KVM's selftests infrastructure would be reworked to allow backing a single region of guest memory with multiple memslots for _all_ backing types and shapes, i.e. ideally the code for using a single backing fd across multiple memslots would work for "regular" memory as well. But sadly, support for KVM_CREATE_GUEST_MEMFD has languished for far too long, and overhauling selftests' memslots infrastructure would likely open a can of worms, i.e. delay things even further. In addition to the more obvious tests, verify that PUNCH_HOLE actually frees memory. Directly verifying that KVM frees memory is impractical, if it's even possible, so instead indirectly verify memory is freed by asserting that the guest reads zeroes after a PUNCH_HOLE. E.g. if KVM zaps SPTEs but doesn't actually punch a hole in the inode, the subsequent read will still see the previous value. And obviously punching a hole shouldn't cause explosions. Let the user specify the number of memslots in the private mem conversion test, i.e. don't require the number of memslots to be '1' or "nr_vcpus". Creating more memslots than vCPUs is particularly interesting, e.g. it can result in a single KVM_SET_MEMORY_ATTRIBUTES spanning multiple memslots. To keep the math reasonable, align each vCPU's chunk to at least 2MiB (the size is 2MiB+4KiB), and require the total size to be cleanly divisible by the number of memslots. The goal is to be able to validate that KVM plays nice with multiple memslots, being able to create a truly arbitrary number of memslots doesn't add meaningful value, i.e. isn't worth the cost. Intentionally don't take a requirement on KVM_CAP_GUEST_MEMFD, KVM_CAP_MEMORY_FAULT_INFO, KVM_MEMORY_ATTRIBUTE_PRIVATE, etc., as it's a KVM bug to advertise KVM_X86_SW_PROTECTED_VM without its prerequisites. Signed-off-by: Vishal Annapurve Co-developed-by: Ackerley Tng Signed-off-by: Ackerley Tng Co-developed-by: Sean Christopherson Signed-off-by: Sean Christopherson Message-Id: <20231027182217.3615211-32-seanjc@google.com> Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/Makefile | 1 + .../kvm/x86_64/private_mem_conversions_test.c | 482 +++++++++++++++++++++ 2 files changed, 483 insertions(+) create mode 100644 tools/testing/selftests/kvm/x86_64/private_mem_conversions_test.c (limited to 'tools') diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index a5963ab9215b..ecdea5e7afa8 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -91,6 +91,7 @@ TEST_GEN_PROGS_x86_64 += x86_64/monitor_mwait_test TEST_GEN_PROGS_x86_64 += x86_64/nested_exceptions_test TEST_GEN_PROGS_x86_64 += x86_64/platform_info_test TEST_GEN_PROGS_x86_64 += x86_64/pmu_event_filter_test +TEST_GEN_PROGS_x86_64 += x86_64/private_mem_conversions_test TEST_GEN_PROGS_x86_64 += x86_64/set_boot_cpu_id TEST_GEN_PROGS_x86_64 += x86_64/set_sregs_test TEST_GEN_PROGS_x86_64 += x86_64/smaller_maxphyaddr_emulation_test diff --git a/tools/testing/selftests/kvm/x86_64/private_mem_conversions_test.c b/tools/testing/selftests/kvm/x86_64/private_mem_conversions_test.c new file mode 100644 index 000000000000..4d6a37a5d896 --- /dev/null +++ b/tools/testing/selftests/kvm/x86_64/private_mem_conversions_test.c @@ -0,0 +1,482 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2022, Google LLC. + */ +#define _GNU_SOURCE /* for program_invocation_short_name */ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include +#include +#include + +#define BASE_DATA_SLOT 10 +#define BASE_DATA_GPA ((uint64_t)(1ull << 32)) +#define PER_CPU_DATA_SIZE ((uint64_t)(SZ_2M + PAGE_SIZE)) + +/* Horrific macro so that the line info is captured accurately :-( */ +#define memcmp_g(gpa, pattern, size) \ +do { \ + uint8_t *mem = (uint8_t *)gpa; \ + size_t i; \ + \ + for (i = 0; i < size; i++) \ + __GUEST_ASSERT(mem[i] == pattern, \ + "Guest expected 0x%x at offset %lu (gpa 0x%llx), got 0x%x", \ + pattern, i, gpa + i, mem[i]); \ +} while (0) + +static void memcmp_h(uint8_t *mem, uint64_t gpa, uint8_t pattern, size_t size) +{ + size_t i; + + for (i = 0; i < size; i++) + TEST_ASSERT(mem[i] == pattern, + "Host expected 0x%x at gpa 0x%lx, got 0x%x", + pattern, gpa + i, mem[i]); +} + +/* + * Run memory conversion tests with explicit conversion: + * Execute KVM hypercall to map/unmap gpa range which will cause userspace exit + * to back/unback private memory. Subsequent accesses by guest to the gpa range + * will not cause exit to userspace. + * + * Test memory conversion scenarios with following steps: + * 1) Access private memory using private access and verify that memory contents + * are not visible to userspace. + * 2) Convert memory to shared using explicit conversions and ensure that + * userspace is able to access the shared regions. + * 3) Convert memory back to private using explicit conversions and ensure that + * userspace is again not able to access converted private regions. + */ + +#define GUEST_STAGE(o, s) { .offset = o, .size = s } + +enum ucall_syncs { + SYNC_SHARED, + SYNC_PRIVATE, +}; + +static void guest_sync_shared(uint64_t gpa, uint64_t size, + uint8_t current_pattern, uint8_t new_pattern) +{ + GUEST_SYNC5(SYNC_SHARED, gpa, size, current_pattern, new_pattern); +} + +static void guest_sync_private(uint64_t gpa, uint64_t size, uint8_t pattern) +{ + GUEST_SYNC4(SYNC_PRIVATE, gpa, size, pattern); +} + +/* Arbitrary values, KVM doesn't care about the attribute flags. */ +#define MAP_GPA_SET_ATTRIBUTES BIT(0) +#define MAP_GPA_SHARED BIT(1) +#define MAP_GPA_DO_FALLOCATE BIT(2) + +static void guest_map_mem(uint64_t gpa, uint64_t size, bool map_shared, + bool do_fallocate) +{ + uint64_t flags = MAP_GPA_SET_ATTRIBUTES; + + if (map_shared) + flags |= MAP_GPA_SHARED; + if (do_fallocate) + flags |= MAP_GPA_DO_FALLOCATE; + kvm_hypercall_map_gpa_range(gpa, size, flags); +} + +static void guest_map_shared(uint64_t gpa, uint64_t size, bool do_fallocate) +{ + guest_map_mem(gpa, size, true, do_fallocate); +} + +static void guest_map_private(uint64_t gpa, uint64_t size, bool do_fallocate) +{ + guest_map_mem(gpa, size, false, do_fallocate); +} + +struct { + uint64_t offset; + uint64_t size; +} static const test_ranges[] = { + GUEST_STAGE(0, PAGE_SIZE), + GUEST_STAGE(0, SZ_2M), + GUEST_STAGE(PAGE_SIZE, PAGE_SIZE), + GUEST_STAGE(PAGE_SIZE, SZ_2M), + GUEST_STAGE(SZ_2M, PAGE_SIZE), +}; + +static void guest_test_explicit_conversion(uint64_t base_gpa, bool do_fallocate) +{ + const uint8_t def_p = 0xaa; + const uint8_t init_p = 0xcc; + uint64_t j; + int i; + + /* Memory should be shared by default. */ + memset((void *)base_gpa, def_p, PER_CPU_DATA_SIZE); + memcmp_g(base_gpa, def_p, PER_CPU_DATA_SIZE); + guest_sync_shared(base_gpa, PER_CPU_DATA_SIZE, def_p, init_p); + + memcmp_g(base_gpa, init_p, PER_CPU_DATA_SIZE); + + for (i = 0; i < ARRAY_SIZE(test_ranges); i++) { + uint64_t gpa = base_gpa + test_ranges[i].offset; + uint64_t size = test_ranges[i].size; + uint8_t p1 = 0x11; + uint8_t p2 = 0x22; + uint8_t p3 = 0x33; + uint8_t p4 = 0x44; + + /* + * Set the test region to pattern one to differentiate it from + * the data range as a whole (contains the initial pattern). + */ + memset((void *)gpa, p1, size); + + /* + * Convert to private, set and verify the private data, and + * then verify that the rest of the data (map shared) still + * holds the initial pattern, and that the host always sees the + * shared memory (initial pattern). Unlike shared memory, + * punching a hole in private memory is destructive, i.e. + * previous values aren't guaranteed to be preserved. + */ + guest_map_private(gpa, size, do_fallocate); + + if (size > PAGE_SIZE) { + memset((void *)gpa, p2, PAGE_SIZE); + goto skip; + } + + memset((void *)gpa, p2, size); + guest_sync_private(gpa, size, p1); + + /* + * Verify that the private memory was set to pattern two, and + * that shared memory still holds the initial pattern. + */ + memcmp_g(gpa, p2, size); + if (gpa > base_gpa) + memcmp_g(base_gpa, init_p, gpa - base_gpa); + if (gpa + size < base_gpa + PER_CPU_DATA_SIZE) + memcmp_g(gpa + size, init_p, + (base_gpa + PER_CPU_DATA_SIZE) - (gpa + size)); + + /* + * Convert odd-number page frames back to shared to verify KVM + * also correctly handles holes in private ranges. + */ + for (j = 0; j < size; j += PAGE_SIZE) { + if ((j >> PAGE_SHIFT) & 1) { + guest_map_shared(gpa + j, PAGE_SIZE, do_fallocate); + guest_sync_shared(gpa + j, PAGE_SIZE, p1, p3); + + memcmp_g(gpa + j, p3, PAGE_SIZE); + } else { + guest_sync_private(gpa + j, PAGE_SIZE, p1); + } + } + +skip: + /* + * Convert the entire region back to shared, explicitly write + * pattern three to fill in the even-number frames before + * asking the host to verify (and write pattern four). + */ + guest_map_shared(gpa, size, do_fallocate); + memset((void *)gpa, p3, size); + guest_sync_shared(gpa, size, p3, p4); + memcmp_g(gpa, p4, size); + + /* Reset the shared memory back to the initial pattern. */ + memset((void *)gpa, init_p, size); + + /* + * Free (via PUNCH_HOLE) *all* private memory so that the next + * iteration starts from a clean slate, e.g. with respect to + * whether or not there are pages/folios in guest_mem. + */ + guest_map_shared(base_gpa, PER_CPU_DATA_SIZE, true); + } +} + +static void guest_punch_hole(uint64_t gpa, uint64_t size) +{ + /* "Mapping" memory shared via fallocate() is done via PUNCH_HOLE. */ + uint64_t flags = MAP_GPA_SHARED | MAP_GPA_DO_FALLOCATE; + + kvm_hypercall_map_gpa_range(gpa, size, flags); +} + +/* + * Test that PUNCH_HOLE actually frees memory by punching holes without doing a + * proper conversion. Freeing (PUNCH_HOLE) should zap SPTEs, and reallocating + * (subsequent fault) should zero memory. + */ +static void guest_test_punch_hole(uint64_t base_gpa, bool precise) +{ + const uint8_t init_p = 0xcc; + int i; + + /* + * Convert the entire range to private, this testcase is all about + * punching holes in guest_memfd, i.e. shared mappings aren't needed. + */ + guest_map_private(base_gpa, PER_CPU_DATA_SIZE, false); + + for (i = 0; i < ARRAY_SIZE(test_ranges); i++) { + uint64_t gpa = base_gpa + test_ranges[i].offset; + uint64_t size = test_ranges[i].size; + + /* + * Free all memory before each iteration, even for the !precise + * case where the memory will be faulted back in. Freeing and + * reallocating should obviously work, and freeing all memory + * minimizes the probability of cross-testcase influence. + */ + guest_punch_hole(base_gpa, PER_CPU_DATA_SIZE); + + /* Fault-in and initialize memory, and verify the pattern. */ + if (precise) { + memset((void *)gpa, init_p, size); + memcmp_g(gpa, init_p, size); + } else { + memset((void *)base_gpa, init_p, PER_CPU_DATA_SIZE); + memcmp_g(base_gpa, init_p, PER_CPU_DATA_SIZE); + } + + /* + * Punch a hole at the target range and verify that reads from + * the guest succeed and return zeroes. + */ + guest_punch_hole(gpa, size); + memcmp_g(gpa, 0, size); + } +} + +static void guest_code(uint64_t base_gpa) +{ + /* + * Run the conversion test twice, with and without doing fallocate() on + * the guest_memfd backing when converting between shared and private. + */ + guest_test_explicit_conversion(base_gpa, false); + guest_test_explicit_conversion(base_gpa, true); + + /* + * Run the PUNCH_HOLE test twice too, once with the entire guest_memfd + * faulted in, once with only the target range faulted in. + */ + guest_test_punch_hole(base_gpa, false); + guest_test_punch_hole(base_gpa, true); + GUEST_DONE(); +} + +static void handle_exit_hypercall(struct kvm_vcpu *vcpu) +{ + struct kvm_run *run = vcpu->run; + uint64_t gpa = run->hypercall.args[0]; + uint64_t size = run->hypercall.args[1] * PAGE_SIZE; + bool set_attributes = run->hypercall.args[2] & MAP_GPA_SET_ATTRIBUTES; + bool map_shared = run->hypercall.args[2] & MAP_GPA_SHARED; + bool do_fallocate = run->hypercall.args[2] & MAP_GPA_DO_FALLOCATE; + struct kvm_vm *vm = vcpu->vm; + + TEST_ASSERT(run->hypercall.nr == KVM_HC_MAP_GPA_RANGE, + "Wanted MAP_GPA_RANGE (%u), got '%llu'", + KVM_HC_MAP_GPA_RANGE, run->hypercall.nr); + + if (do_fallocate) + vm_guest_mem_fallocate(vm, gpa, size, map_shared); + + if (set_attributes) + vm_set_memory_attributes(vm, gpa, size, + map_shared ? 0 : KVM_MEMORY_ATTRIBUTE_PRIVATE); + run->hypercall.ret = 0; +} + +static bool run_vcpus; + +static void *__test_mem_conversions(void *__vcpu) +{ + struct kvm_vcpu *vcpu = __vcpu; + struct kvm_run *run = vcpu->run; + struct kvm_vm *vm = vcpu->vm; + struct ucall uc; + + while (!READ_ONCE(run_vcpus)) + ; + + for ( ;; ) { + vcpu_run(vcpu); + + if (run->exit_reason == KVM_EXIT_HYPERCALL) { + handle_exit_hypercall(vcpu); + continue; + } + + TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, + "Wanted KVM_EXIT_IO, got exit reason: %u (%s)", + run->exit_reason, exit_reason_str(run->exit_reason)); + + switch (get_ucall(vcpu, &uc)) { + case UCALL_ABORT: + REPORT_GUEST_ASSERT(uc); + case UCALL_SYNC: { + uint64_t gpa = uc.args[1]; + size_t size = uc.args[2]; + size_t i; + + TEST_ASSERT(uc.args[0] == SYNC_SHARED || + uc.args[0] == SYNC_PRIVATE, + "Unknown sync command '%ld'", uc.args[0]); + + for (i = 0; i < size; i += vm->page_size) { + size_t nr_bytes = min_t(size_t, vm->page_size, size - i); + uint8_t *hva = addr_gpa2hva(vm, gpa + i); + + /* In all cases, the host should observe the shared data. */ + memcmp_h(hva, gpa + i, uc.args[3], nr_bytes); + + /* For shared, write the new pattern to guest memory. */ + if (uc.args[0] == SYNC_SHARED) + memset(hva, uc.args[4], nr_bytes); + } + break; + } + case UCALL_DONE: + return NULL; + default: + TEST_FAIL("Unknown ucall 0x%lx.", uc.cmd); + } + } +} + +static void test_mem_conversions(enum vm_mem_backing_src_type src_type, uint32_t nr_vcpus, + uint32_t nr_memslots) +{ + /* + * Allocate enough memory so that each vCPU's chunk of memory can be + * naturally aligned with respect to the size of the backing store. + */ + const size_t alignment = max_t(size_t, SZ_2M, get_backing_src_pagesz(src_type)); + const size_t per_cpu_size = align_up(PER_CPU_DATA_SIZE, alignment); + const size_t memfd_size = per_cpu_size * nr_vcpus; + const size_t slot_size = memfd_size / nr_memslots; + struct kvm_vcpu *vcpus[KVM_MAX_VCPUS]; + pthread_t threads[KVM_MAX_VCPUS]; + struct kvm_vm *vm; + int memfd, i, r; + + const struct vm_shape shape = { + .mode = VM_MODE_DEFAULT, + .type = KVM_X86_SW_PROTECTED_VM, + }; + + TEST_ASSERT(slot_size * nr_memslots == memfd_size, + "The memfd size (0x%lx) needs to be cleanly divisible by the number of memslots (%u)", + memfd_size, nr_memslots); + vm = __vm_create_with_vcpus(shape, nr_vcpus, 0, guest_code, vcpus); + + vm_enable_cap(vm, KVM_CAP_EXIT_HYPERCALL, (1 << KVM_HC_MAP_GPA_RANGE)); + + memfd = vm_create_guest_memfd(vm, memfd_size, 0); + + for (i = 0; i < nr_memslots; i++) + vm_mem_add(vm, src_type, BASE_DATA_GPA + slot_size * i, + BASE_DATA_SLOT + i, slot_size / vm->page_size, + KVM_MEM_GUEST_MEMFD, memfd, slot_size * i); + + for (i = 0; i < nr_vcpus; i++) { + uint64_t gpa = BASE_DATA_GPA + i * per_cpu_size; + + vcpu_args_set(vcpus[i], 1, gpa); + + /* + * Map only what is needed so that an out-of-bounds access + * results #PF => SHUTDOWN instead of data corruption. + */ + virt_map(vm, gpa, gpa, PER_CPU_DATA_SIZE / vm->page_size); + + pthread_create(&threads[i], NULL, __test_mem_conversions, vcpus[i]); + } + + WRITE_ONCE(run_vcpus, true); + + for (i = 0; i < nr_vcpus; i++) + pthread_join(threads[i], NULL); + + kvm_vm_free(vm); + + /* + * Allocate and free memory from the guest_memfd after closing the VM + * fd. The guest_memfd is gifted a reference to its owning VM, i.e. + * should prevent the VM from being fully destroyed until the last + * reference to the guest_memfd is also put. + */ + r = fallocate(memfd, FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE, 0, memfd_size); + TEST_ASSERT(!r, __KVM_SYSCALL_ERROR("fallocate()", r)); + + r = fallocate(memfd, FALLOC_FL_KEEP_SIZE, 0, memfd_size); + TEST_ASSERT(!r, __KVM_SYSCALL_ERROR("fallocate()", r)); +} + +static void usage(const char *cmd) +{ + puts(""); + printf("usage: %s [-h] [-m nr_memslots] [-s mem_type] [-n nr_vcpus]\n", cmd); + puts(""); + backing_src_help("-s"); + puts(""); + puts(" -n: specify the number of vcpus (default: 1)"); + puts(""); + puts(" -m: specify the number of memslots (default: 1)"); + puts(""); +} + +int main(int argc, char *argv[]) +{ + enum vm_mem_backing_src_type src_type = DEFAULT_VM_MEM_SRC; + uint32_t nr_memslots = 1; + uint32_t nr_vcpus = 1; + int opt; + + TEST_REQUIRE(kvm_check_cap(KVM_CAP_VM_TYPES) & BIT(KVM_X86_SW_PROTECTED_VM)); + + while ((opt = getopt(argc, argv, "hm:s:n:")) != -1) { + switch (opt) { + case 's': + src_type = parse_backing_src_type(optarg); + break; + case 'n': + nr_vcpus = atoi_positive("nr_vcpus", optarg); + break; + case 'm': + nr_memslots = atoi_positive("nr_memslots", optarg); + break; + case 'h': + default: + usage(argv[0]); + exit(0); + } + } + + test_mem_conversions(src_type, nr_vcpus, nr_memslots); + + return 0; +} -- cgit v1.2.3 From e6f4f345b259cad178bad7e40a9d4b65cf195067 Mon Sep 17 00:00:00 2001 From: Chao Peng Date: Fri, 27 Oct 2023 11:22:14 -0700 Subject: KVM: selftests: Add KVM_SET_USER_MEMORY_REGION2 helper Add helpers to invoke KVM_SET_USER_MEMORY_REGION2 directly so that tests can validate of features that are unique to "version 2" of "set user memory region", e.g. do negative testing on gmem_fd and gmem_offset. Provide a raw version as well as an assert-success version to reduce the amount of boilerplate code need for basic usage. Signed-off-by: Chao Peng Signed-off-by: Ackerley Tng Signed-off-by: Sean Christopherson Message-Id: <20231027182217.3615211-33-seanjc@google.com> Reviewed-by: Fuad Tabba Tested-by: Fuad Tabba Signed-off-by: Paolo Bonzini --- .../testing/selftests/kvm/include/kvm_util_base.h | 7 ++++++ tools/testing/selftests/kvm/lib/kvm_util.c | 29 ++++++++++++++++++++++ 2 files changed, 36 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/kvm/include/kvm_util_base.h b/tools/testing/selftests/kvm/include/kvm_util_base.h index 157508c071f3..8ec122f5fcc8 100644 --- a/tools/testing/selftests/kvm/include/kvm_util_base.h +++ b/tools/testing/selftests/kvm/include/kvm_util_base.h @@ -522,6 +522,13 @@ void vm_set_user_memory_region(struct kvm_vm *vm, uint32_t slot, uint32_t flags, uint64_t gpa, uint64_t size, void *hva); int __vm_set_user_memory_region(struct kvm_vm *vm, uint32_t slot, uint32_t flags, uint64_t gpa, uint64_t size, void *hva); +void vm_set_user_memory_region2(struct kvm_vm *vm, uint32_t slot, uint32_t flags, + uint64_t gpa, uint64_t size, void *hva, + uint32_t guest_memfd, uint64_t guest_memfd_offset); +int __vm_set_user_memory_region2(struct kvm_vm *vm, uint32_t slot, uint32_t flags, + uint64_t gpa, uint64_t size, void *hva, + uint32_t guest_memfd, uint64_t guest_memfd_offset); + void vm_userspace_mem_region_add(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type, uint64_t guest_paddr, uint32_t slot, uint64_t npages, diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index bf15635eda11..9b29cbf49476 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -873,6 +873,35 @@ void vm_set_user_memory_region(struct kvm_vm *vm, uint32_t slot, uint32_t flags, errno, strerror(errno)); } +int __vm_set_user_memory_region2(struct kvm_vm *vm, uint32_t slot, uint32_t flags, + uint64_t gpa, uint64_t size, void *hva, + uint32_t guest_memfd, uint64_t guest_memfd_offset) +{ + struct kvm_userspace_memory_region2 region = { + .slot = slot, + .flags = flags, + .guest_phys_addr = gpa, + .memory_size = size, + .userspace_addr = (uintptr_t)hva, + .guest_memfd = guest_memfd, + .guest_memfd_offset = guest_memfd_offset, + }; + + return ioctl(vm->fd, KVM_SET_USER_MEMORY_REGION2, ®ion); +} + +void vm_set_user_memory_region2(struct kvm_vm *vm, uint32_t slot, uint32_t flags, + uint64_t gpa, uint64_t size, void *hva, + uint32_t guest_memfd, uint64_t guest_memfd_offset) +{ + int ret = __vm_set_user_memory_region2(vm, slot, flags, gpa, size, hva, + guest_memfd, guest_memfd_offset); + + TEST_ASSERT(!ret, "KVM_SET_USER_MEMORY_REGION2 failed, errno = %d (%s)", + errno, strerror(errno)); +} + + /* FIXME: This thing needs to be ripped apart and rewritten. */ void vm_mem_add(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type, uint64_t guest_paddr, uint32_t slot, uint64_t npages, -- cgit v1.2.3 From 2feabb855df8f0889146c2e951307ba477d1f37b Mon Sep 17 00:00:00 2001 From: Chao Peng Date: Fri, 27 Oct 2023 11:22:15 -0700 Subject: KVM: selftests: Expand set_memory_region_test to validate guest_memfd() Expand set_memory_region_test to exercise various positive and negative testcases for private memory. - Non-guest_memfd() file descriptor for private memory - guest_memfd() from different VM - Overlapping bindings - Unaligned bindings Signed-off-by: Chao Peng Co-developed-by: Ackerley Tng Signed-off-by: Ackerley Tng [sean: trim the testcases to remove duplicate coverage] Signed-off-by: Sean Christopherson Message-Id: <20231027182217.3615211-34-seanjc@google.com> Signed-off-by: Paolo Bonzini --- .../testing/selftests/kvm/include/kvm_util_base.h | 12 +++ .../testing/selftests/kvm/set_memory_region_test.c | 104 ++++++++++++++++++++- 2 files changed, 114 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/kvm/include/kvm_util_base.h b/tools/testing/selftests/kvm/include/kvm_util_base.h index 8ec122f5fcc8..1b58f943562f 100644 --- a/tools/testing/selftests/kvm/include/kvm_util_base.h +++ b/tools/testing/selftests/kvm/include/kvm_util_base.h @@ -819,6 +819,18 @@ static inline struct kvm_vm *vm_create_barebones(void) return ____vm_create(VM_SHAPE_DEFAULT); } +#ifdef __x86_64__ +static inline struct kvm_vm *vm_create_barebones_protected_vm(void) +{ + const struct vm_shape shape = { + .mode = VM_MODE_DEFAULT, + .type = KVM_X86_SW_PROTECTED_VM, + }; + + return ____vm_create(shape); +} +#endif + static inline struct kvm_vm *vm_create(uint32_t nr_runnable_vcpus) { return __vm_create(VM_SHAPE_DEFAULT, nr_runnable_vcpus, 0); diff --git a/tools/testing/selftests/kvm/set_memory_region_test.c b/tools/testing/selftests/kvm/set_memory_region_test.c index b32960189f5f..a78394faee7c 100644 --- a/tools/testing/selftests/kvm/set_memory_region_test.c +++ b/tools/testing/selftests/kvm/set_memory_region_test.c @@ -385,13 +385,105 @@ static void test_add_max_memory_regions(void) kvm_vm_free(vm); } + +#ifdef __x86_64__ +static void test_invalid_guest_memfd(struct kvm_vm *vm, int memfd, + size_t offset, const char *msg) +{ + int r = __vm_set_user_memory_region2(vm, MEM_REGION_SLOT, KVM_MEM_GUEST_MEMFD, + MEM_REGION_GPA, MEM_REGION_SIZE, + 0, memfd, offset); + TEST_ASSERT(r == -1 && errno == EINVAL, "%s", msg); +} + +static void test_add_private_memory_region(void) +{ + struct kvm_vm *vm, *vm2; + int memfd, i; + + pr_info("Testing ADD of KVM_MEM_GUEST_MEMFD memory regions\n"); + + vm = vm_create_barebones_protected_vm(); + + test_invalid_guest_memfd(vm, vm->kvm_fd, 0, "KVM fd should fail"); + test_invalid_guest_memfd(vm, vm->fd, 0, "VM's fd should fail"); + + memfd = kvm_memfd_alloc(MEM_REGION_SIZE, false); + test_invalid_guest_memfd(vm, memfd, 0, "Regular memfd() should fail"); + close(memfd); + + vm2 = vm_create_barebones_protected_vm(); + memfd = vm_create_guest_memfd(vm2, MEM_REGION_SIZE, 0); + test_invalid_guest_memfd(vm, memfd, 0, "Other VM's guest_memfd() should fail"); + + vm_set_user_memory_region2(vm2, MEM_REGION_SLOT, KVM_MEM_GUEST_MEMFD, + MEM_REGION_GPA, MEM_REGION_SIZE, 0, memfd, 0); + close(memfd); + kvm_vm_free(vm2); + + memfd = vm_create_guest_memfd(vm, MEM_REGION_SIZE, 0); + for (i = 1; i < PAGE_SIZE; i++) + test_invalid_guest_memfd(vm, memfd, i, "Unaligned offset should fail"); + + vm_set_user_memory_region2(vm, MEM_REGION_SLOT, KVM_MEM_GUEST_MEMFD, + MEM_REGION_GPA, MEM_REGION_SIZE, 0, memfd, 0); + close(memfd); + + kvm_vm_free(vm); +} + +static void test_add_overlapping_private_memory_regions(void) +{ + struct kvm_vm *vm; + int memfd; + int r; + + pr_info("Testing ADD of overlapping KVM_MEM_GUEST_MEMFD memory regions\n"); + + vm = vm_create_barebones_protected_vm(); + + memfd = vm_create_guest_memfd(vm, MEM_REGION_SIZE * 4, 0); + + vm_set_user_memory_region2(vm, MEM_REGION_SLOT, KVM_MEM_GUEST_MEMFD, + MEM_REGION_GPA, MEM_REGION_SIZE * 2, 0, memfd, 0); + + vm_set_user_memory_region2(vm, MEM_REGION_SLOT + 1, KVM_MEM_GUEST_MEMFD, + MEM_REGION_GPA * 2, MEM_REGION_SIZE * 2, + 0, memfd, MEM_REGION_SIZE * 2); + + /* + * Delete the first memslot, and then attempt to recreate it except + * with a "bad" offset that results in overlap in the guest_memfd(). + */ + vm_set_user_memory_region2(vm, MEM_REGION_SLOT, KVM_MEM_GUEST_MEMFD, + MEM_REGION_GPA, 0, NULL, -1, 0); + + /* Overlap the front half of the other slot. */ + r = __vm_set_user_memory_region2(vm, MEM_REGION_SLOT, KVM_MEM_GUEST_MEMFD, + MEM_REGION_GPA * 2 - MEM_REGION_SIZE, + MEM_REGION_SIZE * 2, + 0, memfd, 0); + TEST_ASSERT(r == -1 && errno == EEXIST, "%s", + "Overlapping guest_memfd() bindings should fail with EEXIST"); + + /* And now the back half of the other slot. */ + r = __vm_set_user_memory_region2(vm, MEM_REGION_SLOT, KVM_MEM_GUEST_MEMFD, + MEM_REGION_GPA * 2 + MEM_REGION_SIZE, + MEM_REGION_SIZE * 2, + 0, memfd, 0); + TEST_ASSERT(r == -1 && errno == EEXIST, "%s", + "Overlapping guest_memfd() bindings should fail with EEXIST"); + + close(memfd); + kvm_vm_free(vm); +} +#endif + int main(int argc, char *argv[]) { #ifdef __x86_64__ int i, loops; -#endif -#ifdef __x86_64__ /* * FIXME: the zero-memslot test fails on aarch64 and s390x because * KVM_RUN fails with ENOEXEC or EFAULT. @@ -402,6 +494,14 @@ int main(int argc, char *argv[]) test_add_max_memory_regions(); #ifdef __x86_64__ + if (kvm_has_cap(KVM_CAP_GUEST_MEMFD) && + (kvm_check_cap(KVM_CAP_VM_TYPES) & BIT(KVM_X86_SW_PROTECTED_VM))) { + test_add_private_memory_region(); + test_add_overlapping_private_memory_regions(); + } else { + pr_info("Skipping tests for KVM_MEM_GUEST_MEMFD memory regions\n"); + } + if (argc > 1) loops = atoi_positive("Number of iterations", argv[1]); else -- cgit v1.2.3 From 8a89efd43423cb3005c5e641e846184e292c1465 Mon Sep 17 00:00:00 2001 From: Chao Peng Date: Fri, 27 Oct 2023 11:22:16 -0700 Subject: KVM: selftests: Add basic selftest for guest_memfd() Add a selftest to verify the basic functionality of guest_memfd(): + file descriptor created with the guest_memfd() ioctl does not allow read/write/mmap operations + file size and block size as returned from fstat are as expected + fallocate on the fd checks that offset/length on fallocate(FALLOC_FL_PUNCH_HOLE) should be page aligned + invalid inputs (misaligned size, invalid flags) are rejected + file size and inode are unique (the innocuous-sounding anon_inode_getfile() backs all files with a single inode...) Signed-off-by: Chao Peng Co-developed-by: Ackerley Tng Signed-off-by: Ackerley Tng Co-developed-by: Paolo Bonzini Signed-off-by: Paolo Bonzini Co-developed-by: Sean Christopherson Signed-off-by: Sean Christopherson Message-Id: <20231027182217.3615211-35-seanjc@google.com> Reviewed-by: Fuad Tabba Tested-by: Fuad Tabba Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/Makefile | 1 + tools/testing/selftests/kvm/guest_memfd_test.c | 207 +++++++++++++++++++++++++ 2 files changed, 208 insertions(+) create mode 100644 tools/testing/selftests/kvm/guest_memfd_test.c (limited to 'tools') diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index ecdea5e7afa8..fd3b30a4ca7b 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -134,6 +134,7 @@ TEST_GEN_PROGS_x86_64 += access_tracking_perf_test TEST_GEN_PROGS_x86_64 += demand_paging_test TEST_GEN_PROGS_x86_64 += dirty_log_test TEST_GEN_PROGS_x86_64 += dirty_log_perf_test +TEST_GEN_PROGS_x86_64 += guest_memfd_test TEST_GEN_PROGS_x86_64 += guest_print_test TEST_GEN_PROGS_x86_64 += hardware_disable_test TEST_GEN_PROGS_x86_64 += kvm_create_max_vcpus diff --git a/tools/testing/selftests/kvm/guest_memfd_test.c b/tools/testing/selftests/kvm/guest_memfd_test.c new file mode 100644 index 000000000000..fd389663c49b --- /dev/null +++ b/tools/testing/selftests/kvm/guest_memfd_test.c @@ -0,0 +1,207 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright Intel Corporation, 2023 + * + * Author: Chao Peng + */ + +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include "test_util.h" +#include "kvm_util_base.h" + +static void test_file_read_write(int fd) +{ + char buf[64]; + + TEST_ASSERT(read(fd, buf, sizeof(buf)) < 0, + "read on a guest_mem fd should fail"); + TEST_ASSERT(write(fd, buf, sizeof(buf)) < 0, + "write on a guest_mem fd should fail"); + TEST_ASSERT(pread(fd, buf, sizeof(buf), 0) < 0, + "pread on a guest_mem fd should fail"); + TEST_ASSERT(pwrite(fd, buf, sizeof(buf), 0) < 0, + "pwrite on a guest_mem fd should fail"); +} + +static void test_mmap(int fd, size_t page_size) +{ + char *mem; + + mem = mmap(NULL, page_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); + TEST_ASSERT_EQ(mem, MAP_FAILED); +} + +static void test_file_size(int fd, size_t page_size, size_t total_size) +{ + struct stat sb; + int ret; + + ret = fstat(fd, &sb); + TEST_ASSERT(!ret, "fstat should succeed"); + TEST_ASSERT_EQ(sb.st_size, total_size); + TEST_ASSERT_EQ(sb.st_blksize, page_size); +} + +static void test_fallocate(int fd, size_t page_size, size_t total_size) +{ + int ret; + + ret = fallocate(fd, FALLOC_FL_KEEP_SIZE, 0, total_size); + TEST_ASSERT(!ret, "fallocate with aligned offset and size should succeed"); + + ret = fallocate(fd, FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE, + page_size - 1, page_size); + TEST_ASSERT(ret, "fallocate with unaligned offset should fail"); + + ret = fallocate(fd, FALLOC_FL_KEEP_SIZE, total_size, page_size); + TEST_ASSERT(ret, "fallocate beginning at total_size should fail"); + + ret = fallocate(fd, FALLOC_FL_KEEP_SIZE, total_size + page_size, page_size); + TEST_ASSERT(ret, "fallocate beginning after total_size should fail"); + + ret = fallocate(fd, FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE, + total_size, page_size); + TEST_ASSERT(!ret, "fallocate(PUNCH_HOLE) at total_size should succeed"); + + ret = fallocate(fd, FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE, + total_size + page_size, page_size); + TEST_ASSERT(!ret, "fallocate(PUNCH_HOLE) after total_size should succeed"); + + ret = fallocate(fd, FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE, + page_size, page_size - 1); + TEST_ASSERT(ret, "fallocate with unaligned size should fail"); + + ret = fallocate(fd, FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE, + page_size, page_size); + TEST_ASSERT(!ret, "fallocate(PUNCH_HOLE) with aligned offset and size should succeed"); + + ret = fallocate(fd, FALLOC_FL_KEEP_SIZE, page_size, page_size); + TEST_ASSERT(!ret, "fallocate to restore punched hole should succeed"); +} + +static void test_invalid_punch_hole(int fd, size_t page_size, size_t total_size) +{ + struct { + off_t offset; + off_t len; + } testcases[] = { + {0, 1}, + {0, page_size - 1}, + {0, page_size + 1}, + + {1, 1}, + {1, page_size - 1}, + {1, page_size}, + {1, page_size + 1}, + + {page_size, 1}, + {page_size, page_size - 1}, + {page_size, page_size + 1}, + }; + int ret, i; + + for (i = 0; i < ARRAY_SIZE(testcases); i++) { + ret = fallocate(fd, FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE, + testcases[i].offset, testcases[i].len); + TEST_ASSERT(ret == -1 && errno == EINVAL, + "PUNCH_HOLE with !PAGE_SIZE offset (%lx) and/or length (%lx) should fail", + testcases[i].offset, testcases[i].len); + } +} + +static void test_create_guest_memfd_invalid(struct kvm_vm *vm) +{ + size_t page_size = getpagesize(); + uint64_t flag; + size_t size; + int fd; + + for (size = 1; size < page_size; size++) { + fd = __vm_create_guest_memfd(vm, size, 0); + TEST_ASSERT(fd == -1 && errno == EINVAL, + "guest_memfd() with non-page-aligned page size '0x%lx' should fail with EINVAL", + size); + } + + for (flag = 1; flag; flag <<= 1) { + uint64_t bit; + + fd = __vm_create_guest_memfd(vm, page_size, flag); + TEST_ASSERT(fd == -1 && errno == EINVAL, + "guest_memfd() with flag '0x%lx' should fail with EINVAL", + flag); + + for_each_set_bit(bit, &valid_flags, 64) { + fd = __vm_create_guest_memfd(vm, page_size, flag | BIT_ULL(bit)); + TEST_ASSERT(fd == -1 && errno == EINVAL, + "guest_memfd() with flags '0x%llx' should fail with EINVAL", + flag | BIT_ULL(bit)); + } + } +} + +static void test_create_guest_memfd_multiple(struct kvm_vm *vm) +{ + int fd1, fd2, ret; + struct stat st1, st2; + + fd1 = __vm_create_guest_memfd(vm, 4096, 0); + TEST_ASSERT(fd1 != -1, "memfd creation should succeed"); + + ret = fstat(fd1, &st1); + TEST_ASSERT(ret != -1, "memfd fstat should succeed"); + TEST_ASSERT(st1.st_size == 4096, "memfd st_size should match requested size"); + + fd2 = __vm_create_guest_memfd(vm, 8192, 0); + TEST_ASSERT(fd2 != -1, "memfd creation should succeed"); + + ret = fstat(fd2, &st2); + TEST_ASSERT(ret != -1, "memfd fstat should succeed"); + TEST_ASSERT(st2.st_size == 8192, "second memfd st_size should match requested size"); + + ret = fstat(fd1, &st1); + TEST_ASSERT(ret != -1, "memfd fstat should succeed"); + TEST_ASSERT(st1.st_size == 4096, "first memfd st_size should still match requested size"); + TEST_ASSERT(st1.st_ino != st2.st_ino, "different memfd should have different inode numbers"); +} + +int main(int argc, char *argv[]) +{ + size_t page_size; + size_t total_size; + int fd; + struct kvm_vm *vm; + + TEST_REQUIRE(kvm_has_cap(KVM_CAP_GUEST_MEMFD)); + + page_size = getpagesize(); + total_size = page_size * 4; + + vm = vm_create_barebones(); + + test_create_guest_memfd_invalid(vm); + test_create_guest_memfd_multiple(vm); + + fd = vm_create_guest_memfd(vm, total_size, 0); + + test_file_read_write(fd); + test_mmap(fd, page_size); + test_file_size(fd, page_size, total_size); + test_fallocate(fd, page_size, total_size); + test_invalid_punch_hole(fd, page_size, total_size); + + close(fd); +} -- cgit v1.2.3 From e3577788de64139202d89224fe31613c0f02b790 Mon Sep 17 00:00:00 2001 From: Ackerley Tng Date: Fri, 27 Oct 2023 11:22:17 -0700 Subject: KVM: selftests: Test KVM exit behavior for private memory/access "Testing private access when memslot gets deleted" tests the behavior of KVM when a private memslot gets deleted while the VM is using the private memslot. When KVM looks up the deleted (slot = NULL) memslot, KVM should exit to userspace with KVM_EXIT_MEMORY_FAULT. In the second test, upon a private access to non-private memslot, KVM should also exit to userspace with KVM_EXIT_MEMORY_FAULT. Intentionally don't take a requirement on KVM_CAP_GUEST_MEMFD, KVM_CAP_MEMORY_FAULT_INFO, KVM_MEMORY_ATTRIBUTE_PRIVATE, etc., as it's a KVM bug to advertise KVM_X86_SW_PROTECTED_VM without its prerequisites. Signed-off-by: Ackerley Tng [sean: call out the similarities with set_memory_region_test] Signed-off-by: Sean Christopherson Message-Id: <20231027182217.3615211-36-seanjc@google.com> Reviewed-by: Fuad Tabba Tested-by: Fuad Tabba Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/Makefile | 1 + .../kvm/x86_64/private_mem_kvm_exits_test.c | 120 +++++++++++++++++++++ 2 files changed, 121 insertions(+) create mode 100644 tools/testing/selftests/kvm/x86_64/private_mem_kvm_exits_test.c (limited to 'tools') diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index fd3b30a4ca7b..69ce8e06b3a3 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -92,6 +92,7 @@ TEST_GEN_PROGS_x86_64 += x86_64/nested_exceptions_test TEST_GEN_PROGS_x86_64 += x86_64/platform_info_test TEST_GEN_PROGS_x86_64 += x86_64/pmu_event_filter_test TEST_GEN_PROGS_x86_64 += x86_64/private_mem_conversions_test +TEST_GEN_PROGS_x86_64 += x86_64/private_mem_kvm_exits_test TEST_GEN_PROGS_x86_64 += x86_64/set_boot_cpu_id TEST_GEN_PROGS_x86_64 += x86_64/set_sregs_test TEST_GEN_PROGS_x86_64 += x86_64/smaller_maxphyaddr_emulation_test diff --git a/tools/testing/selftests/kvm/x86_64/private_mem_kvm_exits_test.c b/tools/testing/selftests/kvm/x86_64/private_mem_kvm_exits_test.c new file mode 100644 index 000000000000..13e72fcec8dd --- /dev/null +++ b/tools/testing/selftests/kvm/x86_64/private_mem_kvm_exits_test.c @@ -0,0 +1,120 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2023, Google LLC. + */ +#include +#include +#include + +#include "kvm_util.h" +#include "processor.h" +#include "test_util.h" + +/* Arbitrarily selected to avoid overlaps with anything else */ +#define EXITS_TEST_GVA 0xc0000000 +#define EXITS_TEST_GPA EXITS_TEST_GVA +#define EXITS_TEST_NPAGES 1 +#define EXITS_TEST_SIZE (EXITS_TEST_NPAGES * PAGE_SIZE) +#define EXITS_TEST_SLOT 10 + +static uint64_t guest_repeatedly_read(void) +{ + volatile uint64_t value; + + while (true) + value = *((uint64_t *) EXITS_TEST_GVA); + + return value; +} + +static uint32_t run_vcpu_get_exit_reason(struct kvm_vcpu *vcpu) +{ + int r; + + r = _vcpu_run(vcpu); + if (r) { + TEST_ASSERT(errno == EFAULT, KVM_IOCTL_ERROR(KVM_RUN, r)); + TEST_ASSERT_EQ(vcpu->run->exit_reason, KVM_EXIT_MEMORY_FAULT); + } + return vcpu->run->exit_reason; +} + +const struct vm_shape protected_vm_shape = { + .mode = VM_MODE_DEFAULT, + .type = KVM_X86_SW_PROTECTED_VM, +}; + +static void test_private_access_memslot_deleted(void) +{ + struct kvm_vm *vm; + struct kvm_vcpu *vcpu; + pthread_t vm_thread; + void *thread_return; + uint32_t exit_reason; + + vm = vm_create_shape_with_one_vcpu(protected_vm_shape, &vcpu, + guest_repeatedly_read); + + vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, + EXITS_TEST_GPA, EXITS_TEST_SLOT, + EXITS_TEST_NPAGES, + KVM_MEM_GUEST_MEMFD); + + virt_map(vm, EXITS_TEST_GVA, EXITS_TEST_GPA, EXITS_TEST_NPAGES); + + /* Request to access page privately */ + vm_mem_set_private(vm, EXITS_TEST_GPA, EXITS_TEST_SIZE); + + pthread_create(&vm_thread, NULL, + (void *(*)(void *))run_vcpu_get_exit_reason, + (void *)vcpu); + + vm_mem_region_delete(vm, EXITS_TEST_SLOT); + + pthread_join(vm_thread, &thread_return); + exit_reason = (uint32_t)(uint64_t)thread_return; + + TEST_ASSERT_EQ(exit_reason, KVM_EXIT_MEMORY_FAULT); + TEST_ASSERT_EQ(vcpu->run->memory_fault.flags, KVM_MEMORY_EXIT_FLAG_PRIVATE); + TEST_ASSERT_EQ(vcpu->run->memory_fault.gpa, EXITS_TEST_GPA); + TEST_ASSERT_EQ(vcpu->run->memory_fault.size, EXITS_TEST_SIZE); + + kvm_vm_free(vm); +} + +static void test_private_access_memslot_not_private(void) +{ + struct kvm_vm *vm; + struct kvm_vcpu *vcpu; + uint32_t exit_reason; + + vm = vm_create_shape_with_one_vcpu(protected_vm_shape, &vcpu, + guest_repeatedly_read); + + /* Add a non-private memslot (flags = 0) */ + vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, + EXITS_TEST_GPA, EXITS_TEST_SLOT, + EXITS_TEST_NPAGES, 0); + + virt_map(vm, EXITS_TEST_GVA, EXITS_TEST_GPA, EXITS_TEST_NPAGES); + + /* Request to access page privately */ + vm_mem_set_private(vm, EXITS_TEST_GPA, EXITS_TEST_SIZE); + + exit_reason = run_vcpu_get_exit_reason(vcpu); + + TEST_ASSERT_EQ(exit_reason, KVM_EXIT_MEMORY_FAULT); + TEST_ASSERT_EQ(vcpu->run->memory_fault.flags, KVM_MEMORY_EXIT_FLAG_PRIVATE); + TEST_ASSERT_EQ(vcpu->run->memory_fault.gpa, EXITS_TEST_GPA); + TEST_ASSERT_EQ(vcpu->run->memory_fault.size, EXITS_TEST_SIZE); + + kvm_vm_free(vm); +} + +int main(int argc, char *argv[]) +{ + TEST_REQUIRE(kvm_check_cap(KVM_CAP_VM_TYPES) & BIT(KVM_X86_SW_PROTECTED_VM)); + + test_private_access_memslot_deleted(); + test_private_access_memslot_not_private(); +} -- cgit v1.2.3 From 5d74316466f4aabdd2ee1e33b45e4933c9bc3ea1 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Mon, 30 Oct 2023 17:20:49 -0700 Subject: KVM: selftests: Add a memory region subtest to validate invalid flags Add a subtest to set_memory_region_test to verify that KVM rejects invalid flags and combinations with -EINVAL. KVM might or might not fail with EINVAL anyways, but we can at least try. Signed-off-by: Sean Christopherson Message-Id: <20231031002049.3915752-1-seanjc@google.com> Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/guest_memfd_test.c | 9 +--- .../testing/selftests/kvm/set_memory_region_test.c | 49 ++++++++++++++++++++++ 2 files changed, 50 insertions(+), 8 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/kvm/guest_memfd_test.c b/tools/testing/selftests/kvm/guest_memfd_test.c index fd389663c49b..318ba6ba8bd3 100644 --- a/tools/testing/selftests/kvm/guest_memfd_test.c +++ b/tools/testing/selftests/kvm/guest_memfd_test.c @@ -136,20 +136,13 @@ static void test_create_guest_memfd_invalid(struct kvm_vm *vm) size); } - for (flag = 1; flag; flag <<= 1) { + for (flag = 0; flag; flag <<= 1) { uint64_t bit; fd = __vm_create_guest_memfd(vm, page_size, flag); TEST_ASSERT(fd == -1 && errno == EINVAL, "guest_memfd() with flag '0x%lx' should fail with EINVAL", flag); - - for_each_set_bit(bit, &valid_flags, 64) { - fd = __vm_create_guest_memfd(vm, page_size, flag | BIT_ULL(bit)); - TEST_ASSERT(fd == -1 && errno == EINVAL, - "guest_memfd() with flags '0x%llx' should fail with EINVAL", - flag | BIT_ULL(bit)); - } } } diff --git a/tools/testing/selftests/kvm/set_memory_region_test.c b/tools/testing/selftests/kvm/set_memory_region_test.c index a78394faee7c..1efee1cfcff0 100644 --- a/tools/testing/selftests/kvm/set_memory_region_test.c +++ b/tools/testing/selftests/kvm/set_memory_region_test.c @@ -326,6 +326,53 @@ static void test_zero_memory_regions(void) } #endif /* __x86_64__ */ +static void test_invalid_memory_region_flags(void) +{ + uint32_t supported_flags = KVM_MEM_LOG_DIRTY_PAGES; + const uint32_t v2_only_flags = KVM_MEM_GUEST_MEMFD; + struct kvm_vm *vm; + int r, i; + +#ifdef __x86_64__ + supported_flags |= KVM_MEM_READONLY; + + if (kvm_check_cap(KVM_CAP_VM_TYPES) & BIT(KVM_X86_SW_PROTECTED_VM)) + vm = vm_create_barebones_protected_vm(); + else +#endif + vm = vm_create_barebones(); + + if (kvm_check_cap(KVM_CAP_MEMORY_ATTRIBUTES) & KVM_MEMORY_ATTRIBUTE_PRIVATE) + supported_flags |= KVM_MEM_GUEST_MEMFD; + + for (i = 0; i < 32; i++) { + if ((supported_flags & BIT(i)) && !(v2_only_flags & BIT(i))) + continue; + + r = __vm_set_user_memory_region(vm, MEM_REGION_SLOT, BIT(i), + MEM_REGION_GPA, MEM_REGION_SIZE, NULL); + + TEST_ASSERT(r && errno == EINVAL, + "KVM_SET_USER_MEMORY_REGION should have failed on v2 only flag 0x%lx", BIT(i)); + + if (supported_flags & BIT(i)) + continue; + + r = __vm_set_user_memory_region2(vm, MEM_REGION_SLOT, BIT(i), + MEM_REGION_GPA, MEM_REGION_SIZE, NULL, 0, 0); + TEST_ASSERT(r && errno == EINVAL, + "KVM_SET_USER_MEMORY_REGION2 should have failed on unsupported flag 0x%lx", BIT(i)); + } + + if (supported_flags & KVM_MEM_GUEST_MEMFD) { + r = __vm_set_user_memory_region2(vm, MEM_REGION_SLOT, + KVM_MEM_LOG_DIRTY_PAGES | KVM_MEM_GUEST_MEMFD, + MEM_REGION_GPA, MEM_REGION_SIZE, NULL, 0, 0); + TEST_ASSERT(r && errno == EINVAL, + "KVM_SET_USER_MEMORY_REGION2 should have failed, dirty logging private memory is unsupported"); + } +} + /* * Test it can be added memory slots up to KVM_CAP_NR_MEMSLOTS, then any * tentative to add further slots should fail. @@ -491,6 +538,8 @@ int main(int argc, char *argv[]) test_zero_memory_regions(); #endif + test_invalid_memory_region_flags(); + test_add_max_memory_regions(); #ifdef __x86_64__ -- cgit v1.2.3 From 4849775587844e44d215289c425bcd70f315efe7 Mon Sep 17 00:00:00 2001 From: Yafang Shao Date: Sat, 11 Nov 2023 09:00:30 +0000 Subject: selftests/bpf: Fix issues in setup_classid_environment() If the net_cls subsystem is already mounted, attempting to mount it again in setup_classid_environment() will result in a failure with the error code EBUSY. Despite this, tmpfs will have been successfully mounted at /sys/fs/cgroup/net_cls. Consequently, the /sys/fs/cgroup/net_cls directory will be empty, causing subsequent setup operations to fail. Here's an error log excerpt illustrating the issue when net_cls has already been mounted at /sys/fs/cgroup/net_cls prior to running setup_classid_environment(): - Before that change $ tools/testing/selftests/bpf/test_progs --name=cgroup_v1v2 test_cgroup_v1v2:PASS:server_fd 0 nsec test_cgroup_v1v2:PASS:client_fd 0 nsec test_cgroup_v1v2:PASS:cgroup_fd 0 nsec test_cgroup_v1v2:PASS:server_fd 0 nsec run_test:PASS:skel_open 0 nsec run_test:PASS:prog_attach 0 nsec test_cgroup_v1v2:PASS:cgroup-v2-only 0 nsec (cgroup_helpers.c:248: errno: No such file or directory) Opening Cgroup Procs: /sys/fs/cgroup/net_cls/cgroup.procs (cgroup_helpers.c:540: errno: No such file or directory) Opening cgroup classid: /sys/fs/cgroup/net_cls/cgroup-test-work-dir/net_cls.classid run_test:PASS:skel_open 0 nsec run_test:PASS:prog_attach 0 nsec (cgroup_helpers.c:248: errno: No such file or directory) Opening Cgroup Procs: /sys/fs/cgroup/net_cls/cgroup-test-work-dir/cgroup.procs run_test:FAIL:join_classid unexpected error: 1 (errno 2) test_cgroup_v1v2:FAIL:cgroup-v1v2 unexpected error: -1 (errno 2) (cgroup_helpers.c:248: errno: No such file or directory) Opening Cgroup Procs: /sys/fs/cgroup/net_cls/cgroup.procs #44 cgroup_v1v2:FAIL Summary: 0/0 PASSED, 0 SKIPPED, 1 FAILED - After that change $ tools/testing/selftests/bpf/test_progs --name=cgroup_v1v2 #44 cgroup_v1v2:OK Summary: 1/0 PASSED, 0 SKIPPED, 0 FAILED Signed-off-by: Yafang Shao Link: https://lore.kernel.org/r/20231111090034.4248-3-laoar.shao@gmail.com Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/cgroup_helpers.c | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/cgroup_helpers.c b/tools/testing/selftests/bpf/cgroup_helpers.c index 5b1da2a32ea7..10b5f42e65e7 100644 --- a/tools/testing/selftests/bpf/cgroup_helpers.c +++ b/tools/testing/selftests/bpf/cgroup_helpers.c @@ -523,10 +523,20 @@ int setup_classid_environment(void) return 1; } - if (mount("net_cls", NETCLS_MOUNT_PATH, "cgroup", 0, "net_cls") && - errno != EBUSY) { - log_err("mount cgroup net_cls"); - return 1; + if (mount("net_cls", NETCLS_MOUNT_PATH, "cgroup", 0, "net_cls")) { + if (errno != EBUSY) { + log_err("mount cgroup net_cls"); + return 1; + } + + if (rmdir(NETCLS_MOUNT_PATH)) { + log_err("rmdir cgroup net_cls"); + return 1; + } + if (umount(CGROUP_MOUNT_DFLT)) { + log_err("umount cgroup base"); + return 1; + } } cleanup_classid_environment(); -- cgit v1.2.3 From f744d35ecf46f111bf9b54bfdbc89a28ee8b928a Mon Sep 17 00:00:00 2001 From: Yafang Shao Date: Sat, 11 Nov 2023 09:00:31 +0000 Subject: selftests/bpf: Add parallel support for classid Include the current pid in the classid cgroup path. This way, different testers relying on classid-based configurations will have distinct classid cgroup directories, enabling them to run concurrently. Additionally, we leverage the current pid as the classid, ensuring unique identification. Signed-off-by: Yafang Shao Link: https://lore.kernel.org/r/20231111090034.4248-4-laoar.shao@gmail.com Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/cgroup_helpers.c | 18 +++++++++++------- tools/testing/selftests/bpf/cgroup_helpers.h | 2 +- tools/testing/selftests/bpf/prog_tests/cgroup_v1v2.c | 2 +- 3 files changed, 13 insertions(+), 9 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/cgroup_helpers.c b/tools/testing/selftests/bpf/cgroup_helpers.c index 10b5f42e65e7..f18649a79d64 100644 --- a/tools/testing/selftests/bpf/cgroup_helpers.c +++ b/tools/testing/selftests/bpf/cgroup_helpers.c @@ -45,9 +45,12 @@ #define format_parent_cgroup_path(buf, path) \ format_cgroup_path_pid(buf, path, getppid()) -#define format_classid_path(buf) \ - snprintf(buf, sizeof(buf), "%s%s", NETCLS_MOUNT_PATH, \ - CGROUP_WORK_DIR) +#define format_classid_path_pid(buf, pid) \ + snprintf(buf, sizeof(buf), "%s%s%d", NETCLS_MOUNT_PATH, \ + CGROUP_WORK_DIR, pid) + +#define format_classid_path(buf) \ + format_classid_path_pid(buf, getpid()) static __thread bool cgroup_workdir_mounted; @@ -551,15 +554,16 @@ int setup_classid_environment(void) /** * set_classid() - Set a cgroupv1 net_cls classid - * @id: the numeric classid * - * Writes the passed classid into the cgroup work dir's net_cls.classid + * Writes the classid into the cgroup work dir's net_cls.classid * file in order to later on trigger socket tagging. * + * We leverage the current pid as the classid, ensuring unique identification. + * * On success, it returns 0, otherwise on failure it returns 1. If there * is a failure, it prints the error to stderr. */ -int set_classid(unsigned int id) +int set_classid(void) { char cgroup_workdir[PATH_MAX - 42]; char cgroup_classid_path[PATH_MAX + 1]; @@ -575,7 +579,7 @@ int set_classid(unsigned int id) return 1; } - if (dprintf(fd, "%u\n", id) < 0) { + if (dprintf(fd, "%u\n", getpid()) < 0) { log_err("Setting cgroup classid"); rc = 1; } diff --git a/tools/testing/selftests/bpf/cgroup_helpers.h b/tools/testing/selftests/bpf/cgroup_helpers.h index 5c2cb9c8b546..92fc41daf4a4 100644 --- a/tools/testing/selftests/bpf/cgroup_helpers.h +++ b/tools/testing/selftests/bpf/cgroup_helpers.h @@ -29,7 +29,7 @@ int setup_cgroup_environment(void); void cleanup_cgroup_environment(void); /* cgroupv1 related */ -int set_classid(unsigned int id); +int set_classid(void); int join_classid(void); int setup_classid_environment(void); diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_v1v2.c b/tools/testing/selftests/bpf/prog_tests/cgroup_v1v2.c index 9026b42914d3..addf720428f7 100644 --- a/tools/testing/selftests/bpf/prog_tests/cgroup_v1v2.c +++ b/tools/testing/selftests/bpf/prog_tests/cgroup_v1v2.c @@ -71,7 +71,7 @@ void test_cgroup_v1v2(void) } ASSERT_OK(run_test(cgroup_fd, server_fd, false), "cgroup-v2-only"); setup_classid_environment(); - set_classid(42); + set_classid(); ASSERT_OK(run_test(cgroup_fd, server_fd, true), "cgroup-v1v2"); cleanup_classid_environment(); close(server_fd); -- cgit v1.2.3 From c1dcc050aa648bb3b831030d547c3fcc1c68140c Mon Sep 17 00:00:00 2001 From: Yafang Shao Date: Sat, 11 Nov 2023 09:00:32 +0000 Subject: selftests/bpf: Add a new cgroup helper get_classid_cgroup_id() Introduce a new helper function to retrieve the cgroup ID from a net_cls cgroup directory. Signed-off-by: Yafang Shao Link: https://lore.kernel.org/r/20231111090034.4248-5-laoar.shao@gmail.com Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/cgroup_helpers.c | 28 ++++++++++++++++++++++------ tools/testing/selftests/bpf/cgroup_helpers.h | 1 + 2 files changed, 23 insertions(+), 6 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/cgroup_helpers.c b/tools/testing/selftests/bpf/cgroup_helpers.c index f18649a79d64..63bfa72185be 100644 --- a/tools/testing/selftests/bpf/cgroup_helpers.c +++ b/tools/testing/selftests/bpf/cgroup_helpers.c @@ -422,26 +422,23 @@ int create_and_get_cgroup(const char *relative_path) } /** - * get_cgroup_id() - Get cgroup id for a particular cgroup path - * @relative_path: The cgroup path, relative to the workdir, to join + * get_cgroup_id_from_path - Get cgroup id for a particular cgroup path + * @cgroup_workdir: The absolute cgroup path * * On success, it returns the cgroup id. On failure it returns 0, * which is an invalid cgroup id. * If there is a failure, it prints the error to stderr. */ -unsigned long long get_cgroup_id(const char *relative_path) +unsigned long long get_cgroup_id_from_path(const char *cgroup_workdir) { int dirfd, err, flags, mount_id, fhsize; union { unsigned long long cgid; unsigned char raw_bytes[8]; } id; - char cgroup_workdir[PATH_MAX + 1]; struct file_handle *fhp, *fhp2; unsigned long long ret = 0; - format_cgroup_path(cgroup_workdir, relative_path); - dirfd = AT_FDCWD; flags = 0; fhsize = sizeof(*fhp); @@ -477,6 +474,14 @@ free_mem: return ret; } +unsigned long long get_cgroup_id(const char *relative_path) +{ + char cgroup_workdir[PATH_MAX + 1]; + + format_cgroup_path(cgroup_workdir, relative_path); + return get_cgroup_id_from_path(cgroup_workdir); +} + int cgroup_setup_and_join(const char *path) { int cg_fd; @@ -621,3 +626,14 @@ void cleanup_classid_environment(void) join_cgroup_from_top(NETCLS_MOUNT_PATH); nftw(cgroup_workdir, nftwfunc, WALK_FD_LIMIT, FTW_DEPTH | FTW_MOUNT); } + +/** + * get_classid_cgroup_id - Get the cgroup id of a net_cls cgroup + */ +unsigned long long get_classid_cgroup_id(void) +{ + char cgroup_workdir[PATH_MAX + 1]; + + format_classid_path(cgroup_workdir); + return get_cgroup_id_from_path(cgroup_workdir); +} diff --git a/tools/testing/selftests/bpf/cgroup_helpers.h b/tools/testing/selftests/bpf/cgroup_helpers.h index 92fc41daf4a4..e71da4ef031b 100644 --- a/tools/testing/selftests/bpf/cgroup_helpers.h +++ b/tools/testing/selftests/bpf/cgroup_helpers.h @@ -31,6 +31,7 @@ void cleanup_cgroup_environment(void); /* cgroupv1 related */ int set_classid(void); int join_classid(void); +unsigned long long get_classid_cgroup_id(void); int setup_classid_environment(void); void cleanup_classid_environment(void); -- cgit v1.2.3 From bf47300b186facc8ae66a0e2aa89073565f82bb3 Mon Sep 17 00:00:00 2001 From: Yafang Shao Date: Sat, 11 Nov 2023 09:00:33 +0000 Subject: selftests/bpf: Add a new cgroup helper get_cgroup_hierarchy_id() A new cgroup helper function, get_cgroup1_hierarchy_id(), has been introduced to obtain the ID of a cgroup1 hierarchy based on the provided cgroup name. This cgroup name can be obtained from the /proc/self/cgroup file. Signed-off-by: Yafang Shao Link: https://lore.kernel.org/r/20231111090034.4248-6-laoar.shao@gmail.com Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/cgroup_helpers.c | 52 ++++++++++++++++++++++++++++ tools/testing/selftests/bpf/cgroup_helpers.h | 1 + 2 files changed, 53 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/cgroup_helpers.c b/tools/testing/selftests/bpf/cgroup_helpers.c index 63bfa72185be..5aa133bf3688 100644 --- a/tools/testing/selftests/bpf/cgroup_helpers.c +++ b/tools/testing/selftests/bpf/cgroup_helpers.c @@ -637,3 +637,55 @@ unsigned long long get_classid_cgroup_id(void) format_classid_path(cgroup_workdir); return get_cgroup_id_from_path(cgroup_workdir); } + +/** + * get_cgroup1_hierarchy_id - Retrieves the ID of a cgroup1 hierarchy from the cgroup1 subsys name. + * @subsys_name: The cgroup1 subsys name, which can be retrieved from /proc/self/cgroup. It can be + * a named cgroup like "name=systemd", a controller name like "net_cls", or multi-contollers like + * "net_cls,net_prio". + */ +int get_cgroup1_hierarchy_id(const char *subsys_name) +{ + char *c, *c2, *c3, *c4; + bool found = false; + char line[1024]; + FILE *file; + int i, id; + + if (!subsys_name) + return -1; + + file = fopen("/proc/self/cgroup", "r"); + if (!file) { + log_err("fopen /proc/self/cgroup"); + return -1; + } + + while (fgets(line, 1024, file)) { + i = 0; + for (c = strtok_r(line, ":", &c2); c && i < 2; c = strtok_r(NULL, ":", &c2)) { + if (i == 0) { + id = strtol(c, NULL, 10); + } else if (i == 1) { + if (!strcmp(c, subsys_name)) { + found = true; + break; + } + + /* Multiple subsystems may share one single mount point */ + for (c3 = strtok_r(c, ",", &c4); c3; + c3 = strtok_r(NULL, ",", &c4)) { + if (!strcmp(c, subsys_name)) { + found = true; + break; + } + } + } + i++; + } + if (found) + break; + } + fclose(file); + return found ? id : -1; +} diff --git a/tools/testing/selftests/bpf/cgroup_helpers.h b/tools/testing/selftests/bpf/cgroup_helpers.h index e71da4ef031b..ee053641c026 100644 --- a/tools/testing/selftests/bpf/cgroup_helpers.h +++ b/tools/testing/selftests/bpf/cgroup_helpers.h @@ -20,6 +20,7 @@ int get_root_cgroup(void); int create_and_get_cgroup(const char *relative_path); void remove_cgroup(const char *relative_path); unsigned long long get_cgroup_id(const char *relative_path); +int get_cgroup1_hierarchy_id(const char *subsys_name); int join_cgroup(const char *relative_path); int join_root_cgroup(void); -- cgit v1.2.3 From 360769233cc9c921e90ae387d167ea3cd3cbb04c Mon Sep 17 00:00:00 2001 From: Yafang Shao Date: Sat, 11 Nov 2023 09:00:34 +0000 Subject: selftests/bpf: Add selftests for cgroup1 hierarchy Add selftests for cgroup1 hierarchy. The result as follows, $ tools/testing/selftests/bpf/test_progs --name=cgroup1_hierarchy #36/1 cgroup1_hierarchy/test_cgroup1_hierarchy:OK #36/2 cgroup1_hierarchy/test_root_cgid:OK #36/3 cgroup1_hierarchy/test_invalid_level:OK #36/4 cgroup1_hierarchy/test_invalid_cgid:OK #36/5 cgroup1_hierarchy/test_invalid_hid:OK #36/6 cgroup1_hierarchy/test_invalid_cgrp_name:OK #36/7 cgroup1_hierarchy/test_invalid_cgrp_name2:OK #36/8 cgroup1_hierarchy/test_sleepable_prog:OK #36 cgroup1_hierarchy:OK Summary: 1/8 PASSED, 0 SKIPPED, 0 FAILED Besides, I also did some stress test similar to the patch #2 in this series, as follows (with CONFIG_PROVE_RCU_LIST enabled): - Continuously mounting and unmounting named cgroups in some tasks, for example: cgrp_name=$1 while true do mount -t cgroup -o none,name=$cgrp_name none /$cgrp_name umount /$cgrp_name done - Continuously run this selftest concurrently, while true; do ./test_progs --name=cgroup1_hierarchy; done They can ran successfully without any RCU warnings in dmesg. Signed-off-by: Yafang Shao Link: https://lore.kernel.org/r/20231111090034.4248-7-laoar.shao@gmail.com Signed-off-by: Alexei Starovoitov --- .../selftests/bpf/prog_tests/cgroup1_hierarchy.c | 158 +++++++++++++++++++++ .../selftests/bpf/progs/test_cgroup1_hierarchy.c | 71 +++++++++ 2 files changed, 229 insertions(+) create mode 100644 tools/testing/selftests/bpf/prog_tests/cgroup1_hierarchy.c create mode 100644 tools/testing/selftests/bpf/progs/test_cgroup1_hierarchy.c (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup1_hierarchy.c b/tools/testing/selftests/bpf/prog_tests/cgroup1_hierarchy.c new file mode 100644 index 000000000000..74d6d7546f40 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/cgroup1_hierarchy.c @@ -0,0 +1,158 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (C) 2023 Yafang Shao */ + +#include +#include +#include +#include "cgroup_helpers.h" +#include "test_cgroup1_hierarchy.skel.h" + +static void bpf_cgroup1(struct test_cgroup1_hierarchy *skel) +{ + struct bpf_link *lsm_link, *fentry_link; + int err; + + /* Attach LSM prog first */ + lsm_link = bpf_program__attach_lsm(skel->progs.lsm_run); + if (!ASSERT_OK_PTR(lsm_link, "lsm_attach")) + return; + + /* LSM prog will be triggered when attaching fentry */ + fentry_link = bpf_program__attach_trace(skel->progs.fentry_run); + ASSERT_NULL(fentry_link, "fentry_attach_fail"); + + err = bpf_link__destroy(lsm_link); + ASSERT_OK(err, "destroy_lsm"); +} + +static void bpf_cgroup1_sleepable(struct test_cgroup1_hierarchy *skel) +{ + struct bpf_link *lsm_link, *fentry_link; + int err; + + /* Attach LSM prog first */ + lsm_link = bpf_program__attach_lsm(skel->progs.lsm_s_run); + if (!ASSERT_OK_PTR(lsm_link, "lsm_attach")) + return; + + /* LSM prog will be triggered when attaching fentry */ + fentry_link = bpf_program__attach_trace(skel->progs.fentry_run); + ASSERT_NULL(fentry_link, "fentry_attach_fail"); + + err = bpf_link__destroy(lsm_link); + ASSERT_OK(err, "destroy_lsm"); +} + +static void bpf_cgroup1_invalid_id(struct test_cgroup1_hierarchy *skel) +{ + struct bpf_link *lsm_link, *fentry_link; + int err; + + /* Attach LSM prog first */ + lsm_link = bpf_program__attach_lsm(skel->progs.lsm_run); + if (!ASSERT_OK_PTR(lsm_link, "lsm_attach")) + return; + + /* LSM prog will be triggered when attaching fentry */ + fentry_link = bpf_program__attach_trace(skel->progs.fentry_run); + if (!ASSERT_OK_PTR(fentry_link, "fentry_attach_success")) + goto cleanup; + + err = bpf_link__destroy(fentry_link); + ASSERT_OK(err, "destroy_lsm"); + +cleanup: + err = bpf_link__destroy(lsm_link); + ASSERT_OK(err, "destroy_fentry"); +} + +void test_cgroup1_hierarchy(void) +{ + struct test_cgroup1_hierarchy *skel; + __u64 current_cgid; + int hid, err; + + skel = test_cgroup1_hierarchy__open(); + if (!ASSERT_OK_PTR(skel, "open")) + return; + + skel->bss->target_pid = getpid(); + + err = bpf_program__set_attach_target(skel->progs.fentry_run, 0, "bpf_fentry_test1"); + if (!ASSERT_OK(err, "fentry_set_target")) + goto destroy; + + err = test_cgroup1_hierarchy__load(skel); + if (!ASSERT_OK(err, "load")) + goto destroy; + + /* Setup cgroup1 hierarchy */ + err = setup_classid_environment(); + if (!ASSERT_OK(err, "setup_classid_environment")) + goto destroy; + + err = join_classid(); + if (!ASSERT_OK(err, "join_cgroup1")) + goto cleanup; + + current_cgid = get_classid_cgroup_id(); + if (!ASSERT_GE(current_cgid, 0, "cgroup1 id")) + goto cleanup; + + hid = get_cgroup1_hierarchy_id("net_cls"); + if (!ASSERT_GE(hid, 0, "cgroup1 id")) + goto cleanup; + skel->bss->target_hid = hid; + + if (test__start_subtest("test_cgroup1_hierarchy")) { + skel->bss->target_ancestor_cgid = current_cgid; + bpf_cgroup1(skel); + } + + if (test__start_subtest("test_root_cgid")) { + skel->bss->target_ancestor_cgid = 1; + skel->bss->target_ancestor_level = 0; + bpf_cgroup1(skel); + } + + if (test__start_subtest("test_invalid_level")) { + skel->bss->target_ancestor_cgid = 1; + skel->bss->target_ancestor_level = 1; + bpf_cgroup1_invalid_id(skel); + } + + if (test__start_subtest("test_invalid_cgid")) { + skel->bss->target_ancestor_cgid = 0; + bpf_cgroup1_invalid_id(skel); + } + + if (test__start_subtest("test_invalid_hid")) { + skel->bss->target_ancestor_cgid = 1; + skel->bss->target_ancestor_level = 0; + skel->bss->target_hid = -1; + bpf_cgroup1_invalid_id(skel); + } + + if (test__start_subtest("test_invalid_cgrp_name")) { + skel->bss->target_hid = get_cgroup1_hierarchy_id("net_cl"); + skel->bss->target_ancestor_cgid = current_cgid; + bpf_cgroup1_invalid_id(skel); + } + + if (test__start_subtest("test_invalid_cgrp_name2")) { + skel->bss->target_hid = get_cgroup1_hierarchy_id("net_cls,"); + skel->bss->target_ancestor_cgid = current_cgid; + bpf_cgroup1_invalid_id(skel); + } + + if (test__start_subtest("test_sleepable_prog")) { + skel->bss->target_hid = hid; + skel->bss->target_ancestor_cgid = current_cgid; + bpf_cgroup1_sleepable(skel); + } + +cleanup: + cleanup_classid_environment(); +destroy: + test_cgroup1_hierarchy__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/progs/test_cgroup1_hierarchy.c b/tools/testing/selftests/bpf/progs/test_cgroup1_hierarchy.c new file mode 100644 index 000000000000..44628865fe1d --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_cgroup1_hierarchy.c @@ -0,0 +1,71 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (C) 2023 Yafang Shao */ + +#include "vmlinux.h" +#include +#include +#include + +__u32 target_ancestor_level; +__u64 target_ancestor_cgid; +int target_pid, target_hid; + +struct cgroup *bpf_task_get_cgroup1(struct task_struct *task, int hierarchy_id) __ksym; +struct cgroup *bpf_cgroup_ancestor(struct cgroup *cgrp, int level) __ksym; +void bpf_cgroup_release(struct cgroup *cgrp) __ksym; + +static int bpf_link_create_verify(int cmd) +{ + struct cgroup *cgrp, *ancestor; + struct task_struct *task; + int ret = 0; + + if (cmd != BPF_LINK_CREATE) + return 0; + + task = bpf_get_current_task_btf(); + + /* Then it can run in parallel with others */ + if (task->pid != target_pid) + return 0; + + cgrp = bpf_task_get_cgroup1(task, target_hid); + if (!cgrp) + return 0; + + /* Refuse it if its cgid or its ancestor's cgid is the target cgid */ + if (cgrp->kn->id == target_ancestor_cgid) + ret = -1; + + ancestor = bpf_cgroup_ancestor(cgrp, target_ancestor_level); + if (!ancestor) + goto out; + + if (ancestor->kn->id == target_ancestor_cgid) + ret = -1; + bpf_cgroup_release(ancestor); + +out: + bpf_cgroup_release(cgrp); + return ret; +} + +SEC("lsm/bpf") +int BPF_PROG(lsm_run, int cmd, union bpf_attr *attr, unsigned int size) +{ + return bpf_link_create_verify(cmd); +} + +SEC("lsm.s/bpf") +int BPF_PROG(lsm_s_run, int cmd, union bpf_attr *attr, unsigned int size) +{ + return bpf_link_create_verify(cmd); +} + +SEC("fentry") +int BPF_PROG(fentry_run) +{ + return 0; +} + +char _license[] SEC("license") = "GPL"; -- cgit v1.2.3 From b539deafbadb2fc6ba79307a797196454b14f501 Mon Sep 17 00:00:00 2001 From: Thomas Richter Date: Fri, 10 Nov 2023 12:09:08 +0100 Subject: perf report: Add s390 raw data interpretation for PAI counters Commit 39d62336f5c126ad ("s390/pai: add support for cryptography counters") added support for Processor Activity Instrumentation Facility (PAI) counters. These counters values are added as raw data with the perf sample during 'perf record'. Now add support to display these counters in the 'perf report' command. The counter number, its assigned name and value is now printed in addition to the hexadecimal output. Output before: # perf report -D 6 514766399626050 0x7b058 [0x48]: PERF_RECORD_SAMPLE(IP, 0x1): 303977/303977: 0 period: 1 addr: 0 ... thread: paitest:303977 ...... dso: 0x7b0a0@/root/perf.data.paicrypto [0x48]: event: 9 . . ... raw event: size 72 bytes . 0000: 00 00 00 09 00 01 00 48 00 00 00 00 00 00 00 00 .......H........ . 0010: 00 04 a3 69 00 04 a3 69 00 01 d4 2d 76 de a0 bb ...i...i...-v... . 0020: 00 00 00 00 00 01 5c 53 00 00 00 06 00 00 00 00 ......\S........ . 0030: 00 00 00 00 00 00 00 01 00 00 00 0c 00 07 00 00 ................ . 0040: 00 00 00 53 96 af 00 00 ...S.... Output after: # perf report -D 6 514766399626050 0x7b058 [0x48]: PERF_RECORD_SAMPLE(IP, 0x1): 303977/303977: 0 period: 1 addr: 0 ... thread: paitest:303977 ...... dso: 0x7b0a0@/root/perf.data.paicrypto [0x48]: event: 9 . . ... raw event: size 72 bytes . 0000: 00 00 00 09 00 01 00 48 00 00 00 00 00 00 00 00 .......H........ . 0010: 00 04 a3 69 00 04 a3 69 00 01 d4 2d 76 de a0 bb ...i...i...-v... . 0020: 00 00 00 00 00 01 5c 53 00 00 00 06 00 00 00 00 ......\S........ . 0030: 00 00 00 00 00 00 00 01 00 00 00 0c 00 07 00 00 ................ . 0040: 00 00 00 53 96 af 00 00 ...S.... Counter:007 km_aes_128 Value:0x00000000005396af <--- new Committer notes: Had to add ignore pragmas for that __packed function: +#pragma GCC diagnostic ignored "-Wpacked" +#pragma GCC diagnostic ignored "-Wattributes" Otherwise this doesn't build in things like debian experimentao cross building to mips64, etc. Signed-off-by: Thomas Richter Tested-by: Sumanth Korikkar Acked-by: Sumanth Korikkar Cc: Heiko Carstens Cc: Sven Schnelle Cc: Vasily Gorbik Link: https://lore.kernel.org/r/20231110110908.2312308-1-tmricht@linux.ibm.com [ Corrected non-existent commit referred to the right one: 39d62336f5c126ad ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/s390-cpumcf-kernel.h | 2 + tools/perf/util/s390-sample-raw.c | 109 ++++++++++++++++++++++++++++++++--- 2 files changed, 103 insertions(+), 8 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/s390-cpumcf-kernel.h b/tools/perf/util/s390-cpumcf-kernel.h index f55ca07f3ca1..74b36644e384 100644 --- a/tools/perf/util/s390-cpumcf-kernel.h +++ b/tools/perf/util/s390-cpumcf-kernel.h @@ -12,6 +12,8 @@ #define S390_CPUMCF_DIAG_DEF 0xfeef /* Counter diagnostic entry ID */ #define PERF_EVENT_CPUM_CF_DIAG 0xBC000 /* Event: Counter sets */ #define PERF_EVENT_CPUM_SF_DIAG 0xBD000 /* Event: Combined-sampling */ +#define PERF_EVENT_PAI_CRYPTO_ALL 0x1000 /* Event: CRYPTO_ALL */ +#define PERF_EVENT_PAI_NNPA_ALL 0x1800 /* Event: NNPA_ALL */ struct cf_ctrset_entry { /* CPU-M CF counter set entry (8 byte) */ unsigned int def:16; /* 0-15 Data Entry Format */ diff --git a/tools/perf/util/s390-sample-raw.c b/tools/perf/util/s390-sample-raw.c index 115b16edb451..29a744eeb71e 100644 --- a/tools/perf/util/s390-sample-raw.c +++ b/tools/perf/util/s390-sample-raw.c @@ -125,6 +125,9 @@ static int get_counterset_start(int setnr) return 128; case CPUMF_CTR_SET_MT_DIAG: /* Diagnostic counter set */ return 448; + case PERF_EVENT_PAI_NNPA_ALL: /* PAI NNPA counter set */ + case PERF_EVENT_PAI_CRYPTO_ALL: /* PAI CRYPTO counter set */ + return setnr; default: return -1; } @@ -212,27 +215,117 @@ static void s390_cpumcfdg_dump(struct perf_pmu *pmu, struct perf_sample *sample) } } +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wpacked" +#pragma GCC diagnostic ignored "-Wattributes" +/* + * Check for consistency of PAI_CRYPTO/PAI_NNPA raw data. + */ +struct pai_data { /* Event number and value */ + u16 event_nr; + u64 event_val; +} __packed; + +#pragma GCC diagnostic pop + +/* + * Test for valid raw data. At least one PAI event should be in the raw + * data section. + */ +static bool s390_pai_all_test(struct perf_sample *sample) +{ + unsigned char *buf = sample->raw_data; + size_t len = sample->raw_size; + + if (len < 0xa || !buf) + return false; + return true; +} + +static void s390_pai_all_dump(struct evsel *evsel, struct perf_sample *sample) +{ + size_t len = sample->raw_size, offset = 0; + unsigned char *p = sample->raw_data; + const char *color = PERF_COLOR_BLUE; + struct pai_data pai_data; + char *ev_name; + + while (offset < len) { + memcpy(&pai_data.event_nr, p, sizeof(pai_data.event_nr)); + pai_data.event_nr = be16_to_cpu(pai_data.event_nr); + p += sizeof(pai_data.event_nr); + offset += sizeof(pai_data.event_nr); + + memcpy(&pai_data.event_val, p, sizeof(pai_data.event_val)); + pai_data.event_val = be64_to_cpu(pai_data.event_val); + p += sizeof(pai_data.event_val); + offset += sizeof(pai_data.event_val); + + ev_name = get_counter_name(evsel->core.attr.config, + pai_data.event_nr, evsel->pmu); + color_fprintf(stdout, color, "\tCounter:%03d %s Value:%#018lx\n", + pai_data.event_nr, ev_name ?: "", + pai_data.event_val); + free(ev_name); + + if (offset + 0xa > len) + break; + } + color_fprintf(stdout, color, "\n"); +} + /* S390 specific trace event function. Check for PERF_RECORD_SAMPLE events - * and if the event was triggered by a counter set diagnostic event display - * its raw data. + * and if the event was triggered by a + * - counter set diagnostic event + * - processor activity assist (PAI) crypto counter event + * - processor activity assist (PAI) neural network processor assist (NNPA) + * counter event + * display its raw data. * The function is only invoked when the dump flag -D is set. + * + * Function evlist__s390_sample_raw() is defined as call back after it has + * been verified that the perf.data file was created on s390 platform. */ -void evlist__s390_sample_raw(struct evlist *evlist, union perf_event *event, struct perf_sample *sample) +void evlist__s390_sample_raw(struct evlist *evlist, union perf_event *event, + struct perf_sample *sample) { + const char *pai_name; struct evsel *evsel; if (event->header.type != PERF_RECORD_SAMPLE) return; evsel = evlist__event2evsel(evlist, event); - if (evsel == NULL || - evsel->core.attr.config != PERF_EVENT_CPUM_CF_DIAG) + if (!evsel) return; /* Display raw data on screen */ - if (!s390_cpumcfdg_testctr(sample)) { - pr_err("Invalid counter set data encountered\n"); + if (evsel->core.attr.config == PERF_EVENT_CPUM_CF_DIAG) { + if (!evsel->pmu) + evsel->pmu = perf_pmus__find("cpum_cf"); + if (!s390_cpumcfdg_testctr(sample)) + pr_err("Invalid counter set data encountered\n"); + else + s390_cpumcfdg_dump(evsel->pmu, sample); return; } - s390_cpumcfdg_dump(evsel->pmu, sample); + + switch (evsel->core.attr.config) { + case PERF_EVENT_PAI_NNPA_ALL: + pai_name = "NNPA_ALL"; + break; + case PERF_EVENT_PAI_CRYPTO_ALL: + pai_name = "CRYPTO_ALL"; + break; + default: + return; + } + + if (!s390_pai_all_test(sample)) { + pr_err("Invalid %s raw data encountered\n", pai_name); + } else { + if (!evsel->pmu) + evsel->pmu = perf_pmus__find_by_type(evsel->core.attr.type); + s390_pai_all_dump(evsel, sample); + } } -- cgit v1.2.3 From acbf6de674ef7b1b5870b25e7b3c695bf84273d0 Mon Sep 17 00:00:00 2001 From: Ji Sheng Teoh Date: Fri, 3 Nov 2023 16:24:41 +0800 Subject: perf vendor events riscv: Add StarFive Dubhe-80 JSON file StarFive's Dubhe-80 supports raw event id 0x00 - 0x22. The raw events are enabled through PMU node of DT binding. Besides raw event, add standard RISC-V firmware events to support monitoring of firmware event. Example of PMU DT node: pmu { compatible = "riscv,pmu"; riscv,raw-event-to-mhpmcounters = /* Event ID 1-31 */ <0x00 0x00 0xFFFFFFFF 0xFFFFFFE0 0x00007FF8>, /* Event ID 32-33 */ <0x00 0x20 0xFFFFFFFF 0xFFFFFFFE 0x00007FF8>, /* Event ID 34 */ <0x00 0x22 0xFFFFFFFF 0xFFFFFF22 0x00007FF8>; }; Example of 'perf stat' output: [root@user]# perf stat -a \ -e access_mmu_stlb \ -e miss_mmu_stlb \ -e access_mmu_pte_c \ -e rob_flush \ -e btb_prediction_miss \ -e itlb_miss \ -e sync_del_fetch_g \ -e icache_miss \ -e bpu_br_retire \ -e bpu_br_miss \ -e ret_ins_retire \ -e ret_ins_miss \ -- openssl speed rsa2048 Doing 2048 bits private rsa's for 10s: 39 2048 bits private RSA's in 10.14s Doing 2048 bits public rsa's for 10s: 1563 2048 bits public RSA's in 10.00s version: 3.0.11 built on: Tue Sep 19 13:02:31 2023 UTC options: bn(64,64) CPUINFO: N/A sign verify sign/s verify/s rsa 2048 bits 0.260000s 0.006398s 3.8 156.3 Performance counter stats for 'system wide': 1338350 access_mmu_stlb 1154025 miss_mmu_stlb 1162691 access_mmu_pte_c 34067 rob_flush 11212384 btb_prediction_miss 1256242 itlb_miss 652523491 sync_del_fetch_g 384465 icache_miss 64635789 bpu_br_retire 323440 bpu_br_miss 8785143 ret_ins_retire 31236 ret_ins_miss 20.760822480 seconds time elapsed Reviewed-by: Ian Rogers Signed-off-by: Ji Sheng Teoh Cc: Adrian Hunter Cc: Albert Ou Cc: Alexander Shishkin Cc: Ingo Molnar Cc: Jiri Olsa Cc: Ley Foon Tan Cc: Mark Rutland Cc: Namhyung Kim Cc: Nikita Shubin Cc: Palmer Dabbelt Cc: Paul Walmsley Cc: Peter Zijlstra Cc: linux-riscv@lists.infradead.org Link: https://lore.kernel.org/r/20231103082441.1389842-1-jisheng.teoh@starfivetech.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/pmu-events/arch/riscv/mapfile.csv | 1 + .../arch/riscv/starfive/dubhe-80/common.json | 172 +++++++++++++++++++++ .../arch/riscv/starfive/dubhe-80/firmware.json | 68 ++++++++ 3 files changed, 241 insertions(+) create mode 100644 tools/perf/pmu-events/arch/riscv/starfive/dubhe-80/common.json create mode 100644 tools/perf/pmu-events/arch/riscv/starfive/dubhe-80/firmware.json (limited to 'tools') diff --git a/tools/perf/pmu-events/arch/riscv/mapfile.csv b/tools/perf/pmu-events/arch/riscv/mapfile.csv index c61b3d6ef616..ee61e26f90cd 100644 --- a/tools/perf/pmu-events/arch/riscv/mapfile.csv +++ b/tools/perf/pmu-events/arch/riscv/mapfile.csv @@ -15,3 +15,4 @@ # #MVENDORID-MARCHID-MIMPID,Version,Filename,EventType 0x489-0x8000000000000007-0x[[:xdigit:]]+,v1,sifive/u74,core +0x67e-0x80000000db000080-0x[[:xdigit:]]+,v1,starfive/dubhe-80,core diff --git a/tools/perf/pmu-events/arch/riscv/starfive/dubhe-80/common.json b/tools/perf/pmu-events/arch/riscv/starfive/dubhe-80/common.json new file mode 100644 index 000000000000..fbffcacb2ace --- /dev/null +++ b/tools/perf/pmu-events/arch/riscv/starfive/dubhe-80/common.json @@ -0,0 +1,172 @@ +[ + { + "EventName": "ACCESS_MMU_STLB", + "EventCode": "0x1", + "BriefDescription": "access MMU STLB" + }, + { + "EventName": "MISS_MMU_STLB", + "EventCode": "0x2", + "BriefDescription": "miss MMU STLB" + }, + { + "EventName": "ACCESS_MMU_PTE_C", + "EventCode": "0x3", + "BriefDescription": "access MMU PTE-Cache" + }, + { + "EventName": "MISS_MMU_PTE_C", + "EventCode": "0x4", + "BriefDescription": "miss MMU PTE-Cache" + }, + { + "EventName": "ROB_FLUSH", + "EventCode": "0x5", + "BriefDescription": "ROB flush (all kinds of exceptions)" + }, + { + "EventName": "BTB_PREDICTION_MISS", + "EventCode": "0x6", + "BriefDescription": "BTB prediction miss" + }, + { + "EventName": "ITLB_MISS", + "EventCode": "0x7", + "BriefDescription": "ITLB miss" + }, + { + "EventName": "SYNC_DEL_FETCH_G", + "EventCode": "0x8", + "BriefDescription": "SYNC delivery a fetch-group" + }, + { + "EventName": "ICACHE_MISS", + "EventCode": "0x9", + "BriefDescription": "ICache miss" + }, + { + "EventName": "BPU_BR_RETIRE", + "EventCode": "0xA", + "BriefDescription": "condition branch instruction retire" + }, + { + "EventName": "BPU_BR_MISS", + "EventCode": "0xB", + "BriefDescription": "condition branch instruction miss" + }, + { + "EventName": "RET_INS_RETIRE", + "EventCode": "0xC", + "BriefDescription": "return instruction retire" + }, + { + "EventName": "RET_INS_MISS", + "EventCode": "0xD", + "BriefDescription": "return instruction miss" + }, + { + "EventName": "INDIRECT_JR_MISS", + "EventCode": "0xE", + "BriefDescription": "indirect JR instruction miss (inlcude without target)" + }, + { + "EventName": "IBUF_VAL_ID_NORDY", + "EventCode": "0xF", + "BriefDescription": "IBUF valid while ID not ready" + }, + { + "EventName": "IBUF_NOVAL_ID_RDY", + "EventCode": "0x10", + "BriefDescription": "IBUF not valid while ID ready" + }, + { + "EventName": "REN_INT_PHY_REG_NORDY", + "EventCode": "0x11", + "BriefDescription": "REN integer physical register file is not ready" + }, + { + "EventName": "REN_FP_PHY_REG_NORDY", + "EventCode": "0x12", + "BriefDescription": "REN floating point physical register file is not ready" + }, + { + "EventName": "REN_CP_NORDY", + "EventCode": "0x13", + "BriefDescription": "REN checkpoint is not ready" + }, + { + "EventName": "DEC_VAL_ROB_NORDY", + "EventCode": "0x14", + "BriefDescription": "DEC is valid and ROB is not ready" + }, + { + "EventName": "OOD_FLUSH_LS_DEP", + "EventCode": "0x15", + "BriefDescription": "out of order flush due to load/store dependency" + }, + { + "EventName": "BRU_RET_IJR_INS", + "EventCode": "0x16", + "BriefDescription": "BRU retire an IJR instruction" + }, + { + "EventName": "ACCESS_DTLB", + "EventCode": "0x17", + "BriefDescription": "access DTLB" + }, + { + "EventName": "MISS_DTLB", + "EventCode": "0x18", + "BriefDescription": "miss DTLB" + }, + { + "EventName": "LOAD_INS_DCACHE", + "EventCode": "0x19", + "BriefDescription": "load instruction access DCache" + }, + { + "EventName": "LOAD_INS_MISS_DCACHE", + "EventCode": "0x1A", + "BriefDescription": "load instruction miss DCache" + }, + { + "EventName": "STORE_INS_DCACHE", + "EventCode": "0x1B", + "BriefDescription": "store/amo instruction access DCache" + }, + { + "EventName": "STORE_INS_MISS_DCACHE", + "EventCode": "0x1C", + "BriefDescription": "store/amo instruction miss DCache" + }, + { + "EventName": "LOAD_SCACHE", + "EventCode": "0x1D", + "BriefDescription": "load access SCache" + }, + { + "EventName": "STORE_SCACHE", + "EventCode": "0x1E", + "BriefDescription": "store access SCache" + }, + { + "EventName": "LOAD_MISS_SCACHE", + "EventCode": "0x1F", + "BriefDescription": "load miss SCache" + }, + { + "EventName": "STORE_MISS_SCACHE", + "EventCode": "0x20", + "BriefDescription": "store miss SCache" + }, + { + "EventName": "L2C_PF_REQ", + "EventCode": "0x21", + "BriefDescription": "L2C data-prefetcher request" + }, + { + "EventName": "L2C_PF_HIT", + "EventCode": "0x22", + "BriefDescription": "L2C data-prefetcher hit" + } +] diff --git a/tools/perf/pmu-events/arch/riscv/starfive/dubhe-80/firmware.json b/tools/perf/pmu-events/arch/riscv/starfive/dubhe-80/firmware.json new file mode 100644 index 000000000000..9b4a032186a7 --- /dev/null +++ b/tools/perf/pmu-events/arch/riscv/starfive/dubhe-80/firmware.json @@ -0,0 +1,68 @@ +[ + { + "ArchStdEvent": "FW_MISALIGNED_LOAD" + }, + { + "ArchStdEvent": "FW_MISALIGNED_STORE" + }, + { + "ArchStdEvent": "FW_ACCESS_LOAD" + }, + { + "ArchStdEvent": "FW_ACCESS_STORE" + }, + { + "ArchStdEvent": "FW_ILLEGAL_INSN" + }, + { + "ArchStdEvent": "FW_SET_TIMER" + }, + { + "ArchStdEvent": "FW_IPI_SENT" + }, + { + "ArchStdEvent": "FW_IPI_RECEIVED" + }, + { + "ArchStdEvent": "FW_FENCE_I_SENT" + }, + { + "ArchStdEvent": "FW_FENCE_I_RECEIVED" + }, + { + "ArchStdEvent": "FW_SFENCE_VMA_SENT" + }, + { + "ArchStdEvent": "FW_SFENCE_VMA_RECEIVED" + }, + { + "ArchStdEvent": "FW_SFENCE_VMA_RECEIVED" + }, + { + "ArchStdEvent": "FW_SFENCE_VMA_ASID_RECEIVED" + }, + { + "ArchStdEvent": "FW_HFENCE_GVMA_SENT" + }, + { + "ArchStdEvent": "FW_HFENCE_GVMA_RECEIVED" + }, + { + "ArchStdEvent": "FW_HFENCE_GVMA_VMID_SENT" + }, + { + "ArchStdEvent": "FW_HFENCE_GVMA_VMID_RECEIVED" + }, + { + "ArchStdEvent": "FW_HFENCE_VVMA_SENT" + }, + { + "ArchStdEvent": "FW_HFENCE_VVMA_RECEIVED" + }, + { + "ArchStdEvent": "FW_HFENCE_VVMA_ASID_SENT" + }, + { + "ArchStdEvent": "FW_HFENCE_VVMA_ASID_RECEIVED" + } +] -- cgit v1.2.3 From 5f99f312bd3bedb3b266b0d26376a8c500cdc97f Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Sat, 11 Nov 2023 17:06:00 -0800 Subject: bpf: add register bounds sanity checks and sanitization Add simple sanity checks that validate well-formed ranges (min <= max) across u64, s64, u32, and s32 ranges. Also for cases when the value is constant (either 64-bit or 32-bit), we validate that ranges and tnums are in agreement. These bounds checks are performed at the end of BPF_ALU/BPF_ALU64 operations, on conditional jumps, and for LDX instructions (where subreg zero/sign extension is probably the most important to check). This covers most of the interesting cases. Also, we validate the sanity of the return register when manually adjusting it for some special helpers. By default, sanity violation will trigger a warning in verifier log and resetting register bounds to "unbounded" ones. But to aid development and debugging, BPF_F_TEST_SANITY_STRICT flag is added, which will trigger hard failure of verification with -EFAULT on register bounds violations. This allows selftests to catch such issues. veristat will also gain a CLI option to enable this behavior. Acked-by: Eduard Zingerman Signed-off-by: Andrii Nakryiko Acked-by: Shung-Hsi Yu Link: https://lore.kernel.org/r/20231112010609.848406-5-andrii@kernel.org Signed-off-by: Alexei Starovoitov --- include/linux/bpf_verifier.h | 1 + include/uapi/linux/bpf.h | 3 ++ kernel/bpf/syscall.c | 3 +- kernel/bpf/verifier.c | 117 ++++++++++++++++++++++++++++++++--------- tools/include/uapi/linux/bpf.h | 3 ++ 5 files changed, 101 insertions(+), 26 deletions(-) (limited to 'tools') diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 24213a99cc79..402b6bc44a1b 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -602,6 +602,7 @@ struct bpf_verifier_env { int stack_size; /* number of states to be processed */ bool strict_alignment; /* perform strict pointer alignment checks */ bool test_state_freq; /* test verifier with different pruning frequency */ + bool test_sanity_strict; /* fail verification on sanity violations */ struct bpf_verifier_state *cur_state; /* current verifier state */ struct bpf_verifier_state_list **explored_states; /* search pruning optimization */ struct bpf_verifier_state_list *free_list; diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 7cf8bcf9f6a2..8a5855fcee69 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1200,6 +1200,9 @@ enum bpf_perf_event_type { */ #define BPF_F_XDP_DEV_BOUND_ONLY (1U << 6) +/* The verifier internal test flag. Behavior is undefined */ +#define BPF_F_TEST_SANITY_STRICT (1U << 7) + /* link_create.kprobe_multi.flags used in LINK_CREATE command for * BPF_TRACE_KPROBE_MULTI attach type to create return probe. */ diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 0ed286b8a0f0..f266e03ba342 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -2573,7 +2573,8 @@ static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size) BPF_F_SLEEPABLE | BPF_F_TEST_RND_HI32 | BPF_F_XDP_HAS_FRAGS | - BPF_F_XDP_DEV_BOUND_ONLY)) + BPF_F_XDP_DEV_BOUND_ONLY | + BPF_F_TEST_SANITY_STRICT)) return -EINVAL; if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 65570eedfe88..e7edacf86e0f 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -2615,6 +2615,56 @@ static void reg_bounds_sync(struct bpf_reg_state *reg) __update_reg_bounds(reg); } +static int reg_bounds_sanity_check(struct bpf_verifier_env *env, + struct bpf_reg_state *reg, const char *ctx) +{ + const char *msg; + + if (reg->umin_value > reg->umax_value || + reg->smin_value > reg->smax_value || + reg->u32_min_value > reg->u32_max_value || + reg->s32_min_value > reg->s32_max_value) { + msg = "range bounds violation"; + goto out; + } + + if (tnum_is_const(reg->var_off)) { + u64 uval = reg->var_off.value; + s64 sval = (s64)uval; + + if (reg->umin_value != uval || reg->umax_value != uval || + reg->smin_value != sval || reg->smax_value != sval) { + msg = "const tnum out of sync with range bounds"; + goto out; + } + } + + if (tnum_subreg_is_const(reg->var_off)) { + u32 uval32 = tnum_subreg(reg->var_off).value; + s32 sval32 = (s32)uval32; + + if (reg->u32_min_value != uval32 || reg->u32_max_value != uval32 || + reg->s32_min_value != sval32 || reg->s32_max_value != sval32) { + msg = "const subreg tnum out of sync with range bounds"; + goto out; + } + } + + return 0; +out: + verbose(env, "REG SANITY VIOLATION (%s): %s u64=[%#llx, %#llx] " + "s64=[%#llx, %#llx] u32=[%#x, %#x] s32=[%#x, %#x] var_off=(%#llx, %#llx)\n", + ctx, msg, reg->umin_value, reg->umax_value, + reg->smin_value, reg->smax_value, + reg->u32_min_value, reg->u32_max_value, + reg->s32_min_value, reg->s32_max_value, + reg->var_off.value, reg->var_off.mask); + if (env->test_sanity_strict) + return -EFAULT; + __mark_reg_unbounded(reg); + return 0; +} + static bool __reg32_bound_s64(s32 a) { return a >= 0 && a <= S32_MAX; @@ -9982,14 +10032,15 @@ static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx) return 0; } -static void do_refine_retval_range(struct bpf_reg_state *regs, int ret_type, - int func_id, - struct bpf_call_arg_meta *meta) +static int do_refine_retval_range(struct bpf_verifier_env *env, + struct bpf_reg_state *regs, int ret_type, + int func_id, + struct bpf_call_arg_meta *meta) { struct bpf_reg_state *ret_reg = ®s[BPF_REG_0]; if (ret_type != RET_INTEGER) - return; + return 0; switch (func_id) { case BPF_FUNC_get_stack: @@ -10015,6 +10066,8 @@ static void do_refine_retval_range(struct bpf_reg_state *regs, int ret_type, reg_bounds_sync(ret_reg); break; } + + return reg_bounds_sanity_check(env, ret_reg, "retval"); } static int @@ -10666,7 +10719,9 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn regs[BPF_REG_0].ref_obj_id = id; } - do_refine_retval_range(regs, fn->ret_type, func_id, &meta); + err = do_refine_retval_range(env, regs, fn->ret_type, func_id, &meta); + if (err) + return err; err = check_map_func_compatibility(env, meta.map_ptr, func_id); if (err) @@ -14166,13 +14221,12 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn) /* check dest operand */ err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK); + err = err ?: adjust_reg_min_max_vals(env, insn); if (err) return err; - - return adjust_reg_min_max_vals(env, insn); } - return 0; + return reg_bounds_sanity_check(env, ®s[insn->dst_reg], "alu"); } static void find_good_pkt_pointers(struct bpf_verifier_state *vstate, @@ -14653,18 +14707,21 @@ again: * Technically we can do similar adjustments for pointers to the same object, * but we don't support that right now. */ -static void reg_set_min_max(struct bpf_reg_state *true_reg1, - struct bpf_reg_state *true_reg2, - struct bpf_reg_state *false_reg1, - struct bpf_reg_state *false_reg2, - u8 opcode, bool is_jmp32) +static int reg_set_min_max(struct bpf_verifier_env *env, + struct bpf_reg_state *true_reg1, + struct bpf_reg_state *true_reg2, + struct bpf_reg_state *false_reg1, + struct bpf_reg_state *false_reg2, + u8 opcode, bool is_jmp32) { + int err; + /* If either register is a pointer, we can't learn anything about its * variable offset from the compare (unless they were a pointer into * the same object, but we don't bother with that). */ if (false_reg1->type != SCALAR_VALUE || false_reg2->type != SCALAR_VALUE) - return; + return 0; /* fallthrough (FALSE) branch */ regs_refine_cond_op(false_reg1, false_reg2, rev_opcode(opcode), is_jmp32); @@ -14675,6 +14732,12 @@ static void reg_set_min_max(struct bpf_reg_state *true_reg1, regs_refine_cond_op(true_reg1, true_reg2, opcode, is_jmp32); reg_bounds_sync(true_reg1); reg_bounds_sync(true_reg2); + + err = reg_bounds_sanity_check(env, true_reg1, "true_reg1"); + err = err ?: reg_bounds_sanity_check(env, true_reg2, "true_reg2"); + err = err ?: reg_bounds_sanity_check(env, false_reg1, "false_reg1"); + err = err ?: reg_bounds_sanity_check(env, false_reg2, "false_reg2"); + return err; } static void mark_ptr_or_null_reg(struct bpf_func_state *state, @@ -14968,15 +15031,20 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env, other_branch_regs = other_branch->frame[other_branch->curframe]->regs; if (BPF_SRC(insn->code) == BPF_X) { - reg_set_min_max(&other_branch_regs[insn->dst_reg], - &other_branch_regs[insn->src_reg], - dst_reg, src_reg, opcode, is_jmp32); + err = reg_set_min_max(env, + &other_branch_regs[insn->dst_reg], + &other_branch_regs[insn->src_reg], + dst_reg, src_reg, opcode, is_jmp32); } else /* BPF_SRC(insn->code) == BPF_K */ { - reg_set_min_max(&other_branch_regs[insn->dst_reg], - src_reg /* fake one */, - dst_reg, src_reg /* same fake one */, - opcode, is_jmp32); + err = reg_set_min_max(env, + &other_branch_regs[insn->dst_reg], + src_reg /* fake one */, + dst_reg, src_reg /* same fake one */, + opcode, is_jmp32); } + if (err) + return err; + if (BPF_SRC(insn->code) == BPF_X && src_reg->type == SCALAR_VALUE && src_reg->id && !WARN_ON_ONCE(src_reg->id != other_branch_regs[insn->src_reg].id)) { @@ -17479,10 +17547,8 @@ static int do_check(struct bpf_verifier_env *env) insn->off, BPF_SIZE(insn->code), BPF_READ, insn->dst_reg, false, BPF_MODE(insn->code) == BPF_MEMSX); - if (err) - return err; - - err = save_aux_ptr_type(env, src_reg_type, true); + err = err ?: save_aux_ptr_type(env, src_reg_type, true); + err = err ?: reg_bounds_sanity_check(env, ®s[insn->dst_reg], "ldx"); if (err) return err; } else if (class == BPF_STX) { @@ -20769,6 +20835,7 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr, __u3 if (is_priv) env->test_state_freq = attr->prog_flags & BPF_F_TEST_STATE_FREQ; + env->test_sanity_strict = attr->prog_flags & BPF_F_TEST_SANITY_STRICT; env->explored_states = kvcalloc(state_htab_size(env), sizeof(struct bpf_verifier_state_list *), diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 7cf8bcf9f6a2..8a5855fcee69 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -1200,6 +1200,9 @@ enum bpf_perf_event_type { */ #define BPF_F_XDP_DEV_BOUND_ONLY (1U << 6) +/* The verifier internal test flag. Behavior is undefined */ +#define BPF_F_TEST_SANITY_STRICT (1U << 7) + /* link_create.kprobe_multi.flags used in LINK_CREATE command for * BPF_TRACE_KPROBE_MULTI attach type to create return probe. */ -- cgit v1.2.3 From 8863238993e23ccc6d5a9d4ff9f1c043f88f692e Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Sat, 11 Nov 2023 17:06:03 -0800 Subject: selftests/bpf: BPF register range bounds tester Add test to validate BPF verifier's register range bounds tracking logic. The main bulk is a lot of auto-generated tests based on a small set of seed values for lower and upper 32 bits of full 64-bit values. Currently we validate only range vs const comparisons, but the idea is to start validating range over range comparisons in subsequent patch set. When setting up initial register ranges we treat registers as one of u64/s64/u32/s32 numeric types, and then independently perform conditional comparisons based on a potentially different u64/s64/u32/s32 types. This tests lots of tricky cases of deriving bounds information across different numeric domains. Given there are lots of auto-generated cases, we guard them behind SLOW_TESTS=1 envvar requirement, and skip them altogether otherwise. With current full set of upper/lower seed value, all supported comparison operators and all the combinations of u64/s64/u32/s32 number domains, we get about 7.7 million tests, which run in about 35 minutes on my local qemu instance without parallelization. But we also split those tests by init/cond numeric types, which allows to rely on test_progs's parallelization of tests with `-j` option, getting run time down to about 5 minutes on 8 cores. It's still something that shouldn't be run during normal test_progs run. But we can run it a reasonable time, and so perhaps a nightly CI test run (once we have it) would be a good option for this. We also add a small set of tricky conditions that came up during development and triggered various bugs or corner cases in either selftest's reimplementation of range bounds logic or in verifier's logic itself. These are fast enough to be run as part of normal test_progs test run and are great for a quick sanity checking. Let's take a look at test output to understand what's going on: $ sudo ./test_progs -t reg_bounds_crafted #191/1 reg_bounds_crafted/(u64)[0; 0xffffffff] (u64)< 0:OK ... #191/115 reg_bounds_crafted/(u64)[0; 0x17fffffff] (s32)< 0:OK ... #191/137 reg_bounds_crafted/(u64)[0xffffffff; 0x100000000] (u64)== 0:OK Each test case is uniquely and fully described by this generated string. E.g.: "(u64)[0; 0x17fffffff] (s32)< 0". This means that we initialize a register (R6) in such a way that verifier knows that it can have a value in [(u64)0; (u64)0x17fffffff] range. Another register (R7) is also set up as u64, but this time a constant (zero in this case). They then are compared using 32-bit signed < operation. Resulting TRUE/FALSE branches are evaluated (including cases where it's known that one of the branches will never be taken, in which case we validate that verifier also determines this as a dead code). Test validates that verifier's final register state matches expected state based on selftest's own reg_state logic, implemented from scratch for cross-checking purposes. These test names can be conveniently used for further debugging, and if -vv verboseness is requested we can get a corresponding verifier log (with mark_precise logs filtered out as irrelevant and distracting). Example below is slightly redacted for brevity, omitting irrelevant register output in some places, marked with [...]. $ sudo ./test_progs -a 'reg_bounds_crafted/(u32)[0; U32_MAX] (s32)< -1' -vv ... VERIFIER LOG: ======================== func#0 @0 0: R1=ctx(off=0,imm=0) R10=fp0 0: (05) goto pc+2 3: (85) call bpf_get_current_pid_tgid#14 ; R0_w=scalar() 4: (bc) w6 = w0 ; R0_w=scalar() R6_w=scalar(smin=0,smax=umax=4294967295,var_off=(0x0; 0xffffffff)) 5: (85) call bpf_get_current_pid_tgid#14 ; R0_w=scalar() 6: (bc) w7 = w0 ; R0_w=scalar() R7_w=scalar(smin=0,smax=umax=4294967295,var_off=(0x0; 0xffffffff)) 7: (b4) w1 = 0 ; R1_w=0 8: (b4) w2 = -1 ; R2=4294967295 9: (ae) if w6 < w1 goto pc-9 9: R1=0 R6=scalar(smin=0,smax=umax=4294967295,var_off=(0x0; 0xffffffff)) 10: (2e) if w6 > w2 goto pc-10 10: R2=4294967295 R6=scalar(smin=0,smax=umax=4294967295,var_off=(0x0; 0xffffffff)) 11: (b4) w1 = -1 ; R1_w=4294967295 12: (b4) w2 = -1 ; R2_w=4294967295 13: (ae) if w7 < w1 goto pc-13 ; R1_w=4294967295 R7=4294967295 14: (2e) if w7 > w2 goto pc-14 14: R2_w=4294967295 R7=4294967295 15: (bc) w0 = w6 ; [...] R6=scalar(id=1,smin=0,smax=umax=4294967295,var_off=(0x0; 0xffffffff)) 16: (bc) w0 = w7 ; [...] R7=4294967295 17: (ce) if w6 s< w7 goto pc+3 ; R6=scalar(id=1,smin=0,smax=umax=4294967295,smin32=-1,var_off=(0x0; 0xffffffff)) R7=4294967295 18: (bc) w0 = w6 ; [...] R6=scalar(id=1,smin=0,smax=umax=4294967295,smin32=-1,var_off=(0x0; 0xffffffff)) 19: (bc) w0 = w7 ; [...] R7=4294967295 20: (95) exit from 17 to 21: [...] 21: (bc) w0 = w6 ; [...] R6=scalar(id=1,smin=umin=umin32=2147483648,smax=umax=umax32=4294967294,smax32=-2,var_off=(0x80000000; 0x7fffffff)) 22: (bc) w0 = w7 ; [...] R7=4294967295 23: (95) exit from 13 to 1: [...] 1: [...] 1: (b7) r0 = 0 ; R0_w=0 2: (95) exit processed 24 insns (limit 1000000) max_states_per_insn 0 total_states 2 peak_states 2 mark_read 1 ===================== Verifier log above is for `(u32)[0; U32_MAX] (s32)< -1` use cases, where u32 range is used for initialization, followed by signed < operator. Note how we use w6/w7 in this case for register initialization (it would be R6/R7 for 64-bit types) and then `if w6 s< w7` for comparison at instruction #17. It will be `if R6 < R7` for 64-bit unsigned comparison. Above example gives a good impression of the overall structure of a BPF programs generated for reg_bounds tests. In the future, this "framework" can be extended to test not just conditional jumps, but also arithmetic operations. Adding randomized testing is another possibility. Some implementation notes. We basically have our own generics-like operations on numbers, where all the numbers are stored in u64, but how they are interpreted is passed as runtime argument enum num_t. Further, `struct range` represents a bounds range, and those are collected together into a minimal `struct reg_state`, which collects range bounds across all four numberical domains: u64, s64, u32, s64. Based on these primitives and `enum op` representing possible conditional operation (<, <=, >, >=, ==, !=), there is a set of generic helpers to perform "range arithmetics", which is used to maintain struct reg_state. We simulate what verifier will do for reg bounds of R6 and R7 registers using these range and reg_state primitives. Simulated information is used to determine branch taken conclusion and expected exact register state across all four number domains. Implementation of "range arithmetics" is more generic than what verifier is currently performing: it allows range over range comparisons and adjustments. This is the intended end goal of this patch set overall and verifier logic is enhanced in subsequent patches in this series to handle range vs range operations, at which point selftests are extended to validate these conditions as well. For now it's range vs const cases only. Note that tests are split into multiple groups by their numeric types for initialization of ranges and for comparison operation. This allows to use test_progs's -j parallelization to speed up tests, as we now have 16 groups of parallel running tests. Overall reduction of running time that allows is pretty good, we go down from more than 30 minutes to slightly less than 5 minutes running time. Acked-by: Eduard Zingerman Signed-off-by: Andrii Nakryiko Acked-by: Shung-Hsi Yu Link: https://lore.kernel.org/r/20231112010609.848406-8-andrii@kernel.org Signed-off-by: Alexei Starovoitov --- .../testing/selftests/bpf/prog_tests/reg_bounds.c | 1838 ++++++++++++++++++++ 1 file changed, 1838 insertions(+) create mode 100644 tools/testing/selftests/bpf/prog_tests/reg_bounds.c (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/reg_bounds.c b/tools/testing/selftests/bpf/prog_tests/reg_bounds.c new file mode 100644 index 000000000000..7a524b381ed3 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/reg_bounds.c @@ -0,0 +1,1838 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */ + +#define _GNU_SOURCE +#include +#include +#include +#include + +/* ================================= + * SHORT AND CONSISTENT NUMBER TYPES + * ================================= + */ +#define U64_MAX ((u64)UINT64_MAX) +#define U32_MAX ((u32)UINT_MAX) +#define S64_MIN ((s64)INT64_MIN) +#define S64_MAX ((s64)INT64_MAX) +#define S32_MIN ((s32)INT_MIN) +#define S32_MAX ((s32)INT_MAX) + +typedef unsigned long long ___u64; +typedef unsigned int ___u32; +typedef long long ___s64; +typedef int ___s32; + +/* avoid conflicts with already defined types in kernel headers */ +#define u64 ___u64 +#define u32 ___u32 +#define s64 ___s64 +#define s32 ___s32 + +/* ================================== + * STRING BUF ABSTRACTION AND HELPERS + * ================================== + */ +struct strbuf { + size_t buf_sz; + int pos; + char buf[0]; +}; + +#define DEFINE_STRBUF(name, N) \ + struct { struct strbuf buf; char data[(N)]; } ___##name; \ + struct strbuf *name = (___##name.buf.buf_sz = (N), ___##name.buf.pos = 0, &___##name.buf) + +__printf(2, 3) +static inline void snappendf(struct strbuf *s, const char *fmt, ...) +{ + va_list args; + + va_start(args, fmt); + s->pos += vsnprintf(s->buf + s->pos, + s->pos < s->buf_sz ? s->buf_sz - s->pos : 0, + fmt, args); + va_end(args); +} + +/* ================================== + * GENERIC NUMBER TYPE AND OPERATIONS + * ================================== + */ +enum num_t { U64, first_t = U64, U32, S64, S32, last_t = S32 }; + +static __always_inline u64 min_t(enum num_t t, u64 x, u64 y) +{ + switch (t) { + case U64: return (u64)x < (u64)y ? (u64)x : (u64)y; + case U32: return (u32)x < (u32)y ? (u32)x : (u32)y; + case S64: return (s64)x < (s64)y ? (s64)x : (s64)y; + case S32: return (s32)x < (s32)y ? (s32)x : (s32)y; + default: printf("min_t!\n"); exit(1); + } +} + +static __always_inline u64 max_t(enum num_t t, u64 x, u64 y) +{ + switch (t) { + case U64: return (u64)x > (u64)y ? (u64)x : (u64)y; + case U32: return (u32)x > (u32)y ? (u32)x : (u32)y; + case S64: return (s64)x > (s64)y ? (s64)x : (s64)y; + case S32: return (s32)x > (s32)y ? (u32)(s32)x : (u32)(s32)y; + default: printf("max_t!\n"); exit(1); + } +} + +static const char *t_str(enum num_t t) +{ + switch (t) { + case U64: return "u64"; + case U32: return "u32"; + case S64: return "s64"; + case S32: return "s32"; + default: printf("t_str!\n"); exit(1); + } +} + +static enum num_t t_is_32(enum num_t t) +{ + switch (t) { + case U64: return false; + case U32: return true; + case S64: return false; + case S32: return true; + default: printf("t_is_32!\n"); exit(1); + } +} + +static enum num_t t_signed(enum num_t t) +{ + switch (t) { + case U64: return S64; + case U32: return S32; + case S64: return S64; + case S32: return S32; + default: printf("t_signed!\n"); exit(1); + } +} + +static enum num_t t_unsigned(enum num_t t) +{ + switch (t) { + case U64: return U64; + case U32: return U32; + case S64: return U64; + case S32: return U32; + default: printf("t_unsigned!\n"); exit(1); + } +} + +static bool num_is_small(enum num_t t, u64 x) +{ + switch (t) { + case U64: return (u64)x <= 256; + case U32: return (u32)x <= 256; + case S64: return (s64)x >= -256 && (s64)x <= 256; + case S32: return (s32)x >= -256 && (s32)x <= 256; + default: printf("num_is_small!\n"); exit(1); + } +} + +static void snprintf_num(enum num_t t, struct strbuf *sb, u64 x) +{ + bool is_small = num_is_small(t, x); + + if (is_small) { + switch (t) { + case U64: return snappendf(sb, "%llu", (u64)x); + case U32: return snappendf(sb, "%u", (u32)x); + case S64: return snappendf(sb, "%lld", (s64)x); + case S32: return snappendf(sb, "%d", (s32)x); + default: printf("snprintf_num!\n"); exit(1); + } + } else { + switch (t) { + case U64: + if (x == U64_MAX) + return snappendf(sb, "U64_MAX"); + else if (x >= U64_MAX - 256) + return snappendf(sb, "U64_MAX-%llu", U64_MAX - x); + else + return snappendf(sb, "%#llx", (u64)x); + case U32: + if ((u32)x == U32_MAX) + return snappendf(sb, "U32_MAX"); + else if ((u32)x >= U32_MAX - 256) + return snappendf(sb, "U32_MAX-%u", U32_MAX - (u32)x); + else + return snappendf(sb, "%#x", (u32)x); + case S64: + if ((s64)x == S64_MAX) + return snappendf(sb, "S64_MAX"); + else if ((s64)x >= S64_MAX - 256) + return snappendf(sb, "S64_MAX-%lld", S64_MAX - (s64)x); + else if ((s64)x == S64_MIN) + return snappendf(sb, "S64_MIN"); + else if ((s64)x <= S64_MIN + 256) + return snappendf(sb, "S64_MIN+%lld", (s64)x - S64_MIN); + else + return snappendf(sb, "%#llx", (s64)x); + case S32: + if ((s32)x == S32_MAX) + return snappendf(sb, "S32_MAX"); + else if ((s32)x >= S32_MAX - 256) + return snappendf(sb, "S32_MAX-%d", S32_MAX - (s32)x); + else if ((s32)x == S32_MIN) + return snappendf(sb, "S32_MIN"); + else if ((s32)x <= S32_MIN + 256) + return snappendf(sb, "S32_MIN+%d", (s32)x - S32_MIN); + else + return snappendf(sb, "%#x", (s32)x); + default: printf("snprintf_num!\n"); exit(1); + } + } +} + +/* =================================== + * GENERIC RANGE STRUCT AND OPERATIONS + * =================================== + */ +struct range { + u64 a, b; +}; + +static void snprintf_range(enum num_t t, struct strbuf *sb, struct range x) +{ + if (x.a == x.b) + return snprintf_num(t, sb, x.a); + + snappendf(sb, "["); + snprintf_num(t, sb, x.a); + snappendf(sb, "; "); + snprintf_num(t, sb, x.b); + snappendf(sb, "]"); +} + +static void print_range(enum num_t t, struct range x, const char *sfx) +{ + DEFINE_STRBUF(sb, 128); + + snprintf_range(t, sb, x); + printf("%s%s", sb->buf, sfx); +} + +static const struct range unkn[] = { + [U64] = { 0, U64_MAX }, + [U32] = { 0, U32_MAX }, + [S64] = { (u64)S64_MIN, (u64)S64_MAX }, + [S32] = { (u64)(u32)S32_MIN, (u64)(u32)S32_MAX }, +}; + +static struct range unkn_subreg(enum num_t t) +{ + switch (t) { + case U64: return unkn[U32]; + case U32: return unkn[U32]; + case S64: return unkn[U32]; + case S32: return unkn[S32]; + default: printf("unkn_subreg!\n"); exit(1); + } +} + +static struct range range(enum num_t t, u64 a, u64 b) +{ + switch (t) { + case U64: return (struct range){ (u64)a, (u64)b }; + case U32: return (struct range){ (u32)a, (u32)b }; + case S64: return (struct range){ (s64)a, (s64)b }; + case S32: return (struct range){ (u32)(s32)a, (u32)(s32)b }; + default: printf("range!\n"); exit(1); + } +} + +static __always_inline u32 sign64(u64 x) { return (x >> 63) & 1; } +static __always_inline u32 sign32(u64 x) { return ((u32)x >> 31) & 1; } +static __always_inline u32 upper32(u64 x) { return (u32)(x >> 32); } +static __always_inline u64 swap_low32(u64 x, u32 y) { return (x & 0xffffffff00000000ULL) | y; } + +static bool range_eq(struct range x, struct range y) +{ + return x.a == y.a && x.b == y.b; +} + +static struct range range_cast_to_s32(struct range x) +{ + u64 a = x.a, b = x.b; + + /* if upper 32 bits are constant, lower 32 bits should form a proper + * s32 range to be correct + */ + if (upper32(a) == upper32(b) && (s32)a <= (s32)b) + return range(S32, a, b); + + /* Special case where upper bits form a small sequence of two + * sequential numbers (in 32-bit unsigned space, so 0xffffffff to + * 0x00000000 is also valid), while lower bits form a proper s32 range + * going from negative numbers to positive numbers. + * + * E.g.: [0xfffffff0ffffff00; 0xfffffff100000010]. Iterating + * over full 64-bit numbers range will form a proper [-16, 16] + * ([0xffffff00; 0x00000010]) range in its lower 32 bits. + */ + if (upper32(a) + 1 == upper32(b) && (s32)a < 0 && (s32)b >= 0) + return range(S32, a, b); + + /* otherwise we can't derive much meaningful information */ + return unkn[S32]; +} + +static struct range range_cast_u64(enum num_t to_t, struct range x) +{ + u64 a = (u64)x.a, b = (u64)x.b; + + switch (to_t) { + case U64: + return x; + case U32: + if (upper32(a) != upper32(b)) + return unkn[U32]; + return range(U32, a, b); + case S64: + if (sign64(a) != sign64(b)) + return unkn[S64]; + return range(S64, a, b); + case S32: + return range_cast_to_s32(x); + default: printf("range_cast_u64!\n"); exit(1); + } +} + +static struct range range_cast_s64(enum num_t to_t, struct range x) +{ + s64 a = (s64)x.a, b = (s64)x.b; + + switch (to_t) { + case U64: + /* equivalent to (s64)a <= (s64)b check */ + if (sign64(a) != sign64(b)) + return unkn[U64]; + return range(U64, a, b); + case U32: + if (upper32(a) != upper32(b) || sign32(a) != sign32(b)) + return unkn[U32]; + return range(U32, a, b); + case S64: + return x; + case S32: + return range_cast_to_s32(x); + default: printf("range_cast_s64!\n"); exit(1); + } +} + +static struct range range_cast_u32(enum num_t to_t, struct range x) +{ + u32 a = (u32)x.a, b = (u32)x.b; + + switch (to_t) { + case U64: + case S64: + /* u32 is always a valid zero-extended u64/s64 */ + return range(to_t, a, b); + case U32: + return x; + case S32: + return range_cast_to_s32(range(U32, a, b)); + default: printf("range_cast_u32!\n"); exit(1); + } +} + +static struct range range_cast_s32(enum num_t to_t, struct range x) +{ + s32 a = (s32)x.a, b = (s32)x.b; + + switch (to_t) { + case U64: + case U32: + case S64: + if (sign32(a) != sign32(b)) + return unkn[to_t]; + return range(to_t, a, b); + case S32: + return x; + default: printf("range_cast_s32!\n"); exit(1); + } +} + +/* Reinterpret range in *from_t* domain as a range in *to_t* domain preserving + * all possible information. Worst case, it will be unknown range within + * *to_t* domain, if nothing more specific can be guaranteed during the + * conversion + */ +static struct range range_cast(enum num_t from_t, enum num_t to_t, struct range from) +{ + switch (from_t) { + case U64: return range_cast_u64(to_t, from); + case U32: return range_cast_u32(to_t, from); + case S64: return range_cast_s64(to_t, from); + case S32: return range_cast_s32(to_t, from); + default: printf("range_cast!\n"); exit(1); + } +} + +static bool is_valid_num(enum num_t t, u64 x) +{ + switch (t) { + case U64: return true; + case U32: return upper32(x) == 0; + case S64: return true; + case S32: return upper32(x) == 0; + default: printf("is_valid_num!\n"); exit(1); + } +} + +static bool is_valid_range(enum num_t t, struct range x) +{ + if (!is_valid_num(t, x.a) || !is_valid_num(t, x.b)) + return false; + + switch (t) { + case U64: return (u64)x.a <= (u64)x.b; + case U32: return (u32)x.a <= (u32)x.b; + case S64: return (s64)x.a <= (s64)x.b; + case S32: return (s32)x.a <= (s32)x.b; + default: printf("is_valid_range!\n"); exit(1); + } +} + +static struct range range_improve(enum num_t t, struct range old, struct range new) +{ + return range(t, max_t(t, old.a, new.a), min_t(t, old.b, new.b)); +} + +static struct range range_refine(enum num_t x_t, struct range x, enum num_t y_t, struct range y) +{ + struct range y_cast; + + y_cast = range_cast(y_t, x_t, y); + + /* the case when new range knowledge, *y*, is a 32-bit subregister + * range, while previous range knowledge, *x*, is a full register + * 64-bit range, needs special treatment to take into account upper 32 + * bits of full register range + */ + if (t_is_32(y_t) && !t_is_32(x_t)) { + struct range x_swap; + + /* some combinations of upper 32 bits and sign bit can lead to + * invalid ranges, in such cases it's easier to detect them + * after cast/swap than try to enumerate all the conditions + * under which transformation and knowledge transfer is valid + */ + x_swap = range(x_t, swap_low32(x.a, y_cast.a), swap_low32(x.b, y_cast.b)); + if (!is_valid_range(x_t, x_swap)) + return x; + return range_improve(x_t, x, x_swap); + } + + /* otherwise, plain range cast and intersection works */ + return range_improve(x_t, x, y_cast); +} + +/* ======================= + * GENERIC CONDITIONAL OPS + * ======================= + */ +enum op { OP_LT, OP_LE, OP_GT, OP_GE, OP_EQ, OP_NE, first_op = OP_LT, last_op = OP_NE }; + +static enum op complement_op(enum op op) +{ + switch (op) { + case OP_LT: return OP_GE; + case OP_LE: return OP_GT; + case OP_GT: return OP_LE; + case OP_GE: return OP_LT; + case OP_EQ: return OP_NE; + case OP_NE: return OP_EQ; + default: printf("complement_op!\n"); exit(1); + } +} + +static const char *op_str(enum op op) +{ + switch (op) { + case OP_LT: return "<"; + case OP_LE: return "<="; + case OP_GT: return ">"; + case OP_GE: return ">="; + case OP_EQ: return "=="; + case OP_NE: return "!="; + default: printf("op_str!\n"); exit(1); + } +} + +/* Can register with range [x.a, x.b] *EVER* satisfy + * OP (<, <=, >, >=, ==, !=) relation to + * a regsiter with range [y.a, y.b] + * _in *num_t* domain_ + */ +static bool range_canbe_op(enum num_t t, struct range x, struct range y, enum op op) +{ +#define range_canbe(T) do { \ + switch (op) { \ + case OP_LT: return (T)x.a < (T)y.b; \ + case OP_LE: return (T)x.a <= (T)y.b; \ + case OP_GT: return (T)x.b > (T)y.a; \ + case OP_GE: return (T)x.b >= (T)y.a; \ + case OP_EQ: return (T)max_t(t, x.a, y.a) <= (T)min_t(t, x.b, y.b); \ + case OP_NE: return !((T)x.a == (T)x.b && (T)y.a == (T)y.b && (T)x.a == (T)y.a); \ + default: printf("range_canbe op %d\n", op); exit(1); \ + } \ +} while (0) + + switch (t) { + case U64: { range_canbe(u64); } + case U32: { range_canbe(u32); } + case S64: { range_canbe(s64); } + case S32: { range_canbe(s32); } + default: printf("range_canbe!\n"); exit(1); + } +#undef range_canbe +} + +/* Does register with range [x.a, x.b] *ALWAYS* satisfy + * OP (<, <=, >, >=, ==, !=) relation to + * a regsiter with range [y.a, y.b] + * _in *num_t* domain_ + */ +static bool range_always_op(enum num_t t, struct range x, struct range y, enum op op) +{ + /* always op <=> ! canbe complement(op) */ + return !range_canbe_op(t, x, y, complement_op(op)); +} + +/* Does register with range [x.a, x.b] *NEVER* satisfy + * OP (<, <=, >, >=, ==, !=) relation to + * a regsiter with range [y.a, y.b] + * _in *num_t* domain_ + */ +static bool range_never_op(enum num_t t, struct range x, struct range y, enum op op) +{ + return !range_canbe_op(t, x, y, op); +} + +/* similar to verifier's is_branch_taken(): + * 1 - always taken; + * 0 - never taken, + * -1 - unsure. + */ +static int range_branch_taken_op(enum num_t t, struct range x, struct range y, enum op op) +{ + if (range_always_op(t, x, y, op)) + return 1; + if (range_never_op(t, x, y, op)) + return 0; + return -1; +} + +/* What would be the new estimates for register x and y ranges assuming truthful + * OP comparison between them. I.e., (x OP y == true) => x <- newx, y <- newy. + * + * We assume "interesting" cases where ranges overlap. Cases where it's + * obvious that (x OP y) is either always true or false should be filtered with + * range_never and range_always checks. + */ +static void range_cond(enum num_t t, struct range x, struct range y, + enum op op, struct range *newx, struct range *newy) +{ + if (!range_canbe_op(t, x, y, op)) { + /* nothing to adjust, can't happen, return original values */ + *newx = x; + *newy = y; + return; + } + switch (op) { + case OP_LT: + *newx = range(t, x.a, min_t(t, x.b, y.b - 1)); + *newy = range(t, max_t(t, x.a + 1, y.a), y.b); + break; + case OP_LE: + *newx = range(t, x.a, min_t(t, x.b, y.b)); + *newy = range(t, max_t(t, x.a, y.a), y.b); + break; + case OP_GT: + *newx = range(t, max_t(t, x.a, y.a + 1), x.b); + *newy = range(t, y.a, min_t(t, x.b - 1, y.b)); + break; + case OP_GE: + *newx = range(t, max_t(t, x.a, y.a), x.b); + *newy = range(t, y.a, min_t(t, x.b, y.b)); + break; + case OP_EQ: + *newx = range(t, max_t(t, x.a, y.a), min_t(t, x.b, y.b)); + *newy = range(t, max_t(t, x.a, y.a), min_t(t, x.b, y.b)); + break; + case OP_NE: + /* generic case, can't derive more information */ + *newx = range(t, x.a, x.b); + *newy = range(t, y.a, y.b); + break; + + /* below extended logic is not supported by verifier just yet */ + if (x.a == x.b && x.a == y.a) { + /* X is a constant matching left side of Y */ + *newx = range(t, x.a, x.b); + *newy = range(t, y.a + 1, y.b); + } else if (x.a == x.b && x.b == y.b) { + /* X is a constant matching rigth side of Y */ + *newx = range(t, x.a, x.b); + *newy = range(t, y.a, y.b - 1); + } else if (y.a == y.b && x.a == y.a) { + /* Y is a constant matching left side of X */ + *newx = range(t, x.a + 1, x.b); + *newy = range(t, y.a, y.b); + } else if (y.a == y.b && x.b == y.b) { + /* Y is a constant matching rigth side of X */ + *newx = range(t, x.a, x.b - 1); + *newy = range(t, y.a, y.b); + } else { + /* generic case, can't derive more information */ + *newx = range(t, x.a, x.b); + *newy = range(t, y.a, y.b); + } + + break; + default: + break; + } +} + +/* ======================= + * REGISTER STATE HANDLING + * ======================= + */ +struct reg_state { + struct range r[4]; /* indexed by enum num_t: U64, U32, S64, S32 */ + bool valid; +}; + +static void print_reg_state(struct reg_state *r, const char *sfx) +{ + DEFINE_STRBUF(sb, 512); + enum num_t t; + int cnt = 0; + + if (!r->valid) { + printf("%s", sfx); + return; + } + + snappendf(sb, "scalar("); + for (t = first_t; t <= last_t; t++) { + snappendf(sb, "%s%s=", cnt++ ? "," : "", t_str(t)); + snprintf_range(t, sb, r->r[t]); + } + snappendf(sb, ")"); + + printf("%s%s", sb->buf, sfx); +} + +static void print_refinement(enum num_t s_t, struct range src, + enum num_t d_t, struct range old, struct range new, + const char *ctx) +{ + printf("REFINING (%s) (%s)SRC=", ctx, t_str(s_t)); + print_range(s_t, src, ""); + printf(" (%s)DST_OLD=", t_str(d_t)); + print_range(d_t, old, ""); + printf(" (%s)DST_NEW=", t_str(d_t)); + print_range(d_t, new, "\n"); +} + +static void reg_state_refine(struct reg_state *r, enum num_t t, struct range x, const char *ctx) +{ + enum num_t d_t, s_t; + struct range old; + bool keep_going = false; + +again: + /* try to derive new knowledge from just learned range x of type t */ + for (d_t = first_t; d_t <= last_t; d_t++) { + old = r->r[d_t]; + r->r[d_t] = range_refine(d_t, r->r[d_t], t, x); + if (!range_eq(r->r[d_t], old)) { + keep_going = true; + if (env.verbosity >= VERBOSE_VERY) + print_refinement(t, x, d_t, old, r->r[d_t], ctx); + } + } + + /* now see if we can derive anything new from updated reg_state's ranges */ + for (s_t = first_t; s_t <= last_t; s_t++) { + for (d_t = first_t; d_t <= last_t; d_t++) { + old = r->r[d_t]; + r->r[d_t] = range_refine(d_t, r->r[d_t], s_t, r->r[s_t]); + if (!range_eq(r->r[d_t], old)) { + keep_going = true; + if (env.verbosity >= VERBOSE_VERY) + print_refinement(s_t, r->r[s_t], d_t, old, r->r[d_t], ctx); + } + } + } + + /* keep refining until we converge */ + if (keep_going) { + keep_going = false; + goto again; + } +} + +static void reg_state_set_const(struct reg_state *rs, enum num_t t, u64 val) +{ + enum num_t tt; + + rs->valid = true; + for (tt = first_t; tt <= last_t; tt++) + rs->r[tt] = tt == t ? range(t, val, val) : unkn[tt]; + + reg_state_refine(rs, t, rs->r[t], "CONST"); +} + +static void reg_state_cond(enum num_t t, struct reg_state *x, struct reg_state *y, enum op op, + struct reg_state *newx, struct reg_state *newy, const char *ctx) +{ + char buf[32]; + enum num_t ts[2]; + struct reg_state xx = *x, yy = *y; + int i, t_cnt; + struct range z1, z2; + + if (op == OP_EQ || op == OP_NE) { + /* OP_EQ and OP_NE are sign-agnostic, so we need to process + * both signed and unsigned domains at the same time + */ + ts[0] = t_unsigned(t); + ts[1] = t_signed(t); + t_cnt = 2; + } else { + ts[0] = t; + t_cnt = 1; + } + + for (i = 0; i < t_cnt; i++) { + t = ts[i]; + z1 = x->r[t]; + z2 = y->r[t]; + + range_cond(t, z1, z2, op, &z1, &z2); + + if (newx) { + snprintf(buf, sizeof(buf), "%s R1", ctx); + reg_state_refine(&xx, t, z1, buf); + } + if (newy) { + snprintf(buf, sizeof(buf), "%s R2", ctx); + reg_state_refine(&yy, t, z2, buf); + } + } + + if (newx) + *newx = xx; + if (newy) + *newy = yy; +} + +static int reg_state_branch_taken_op(enum num_t t, struct reg_state *x, struct reg_state *y, + enum op op) +{ + if (op == OP_EQ || op == OP_NE) { + /* OP_EQ and OP_NE are sign-agnostic */ + enum num_t tu = t_unsigned(t); + enum num_t ts = t_signed(t); + int br_u, br_s; + + br_u = range_branch_taken_op(tu, x->r[tu], y->r[tu], op); + br_s = range_branch_taken_op(ts, x->r[ts], y->r[ts], op); + + if (br_u >= 0 && br_s >= 0 && br_u != br_s) + ASSERT_FALSE(true, "branch taken inconsistency!\n"); + if (br_u >= 0) + return br_u; + return br_s; + } + return range_branch_taken_op(t, x->r[t], y->r[t], op); +} + +/* ===================================== + * BPF PROGS GENERATION AND VERIFICATION + * ===================================== + */ +struct case_spec { + /* whether to init full register (r1) or sub-register (w1) */ + bool init_subregs; + /* whether to establish initial value range on full register (r1) or + * sub-register (w1) + */ + bool setup_subregs; + /* whether to establish initial value range using signed or unsigned + * comparisons (i.e., initialize umin/umax or smin/smax directly) + */ + bool setup_signed; + /* whether to perform comparison on full registers or sub-registers */ + bool compare_subregs; + /* whether to perform comparison using signed or unsigned operations */ + bool compare_signed; +}; + +/* Generate test BPF program based on provided test ranges, operation, and + * specifications about register bitness and signedness. + */ +static int load_range_cmp_prog(struct range x, struct range y, enum op op, + int branch_taken, struct case_spec spec, + char *log_buf, size_t log_sz, + int *false_pos, int *true_pos) +{ +#define emit(insn) ({ \ + struct bpf_insn __insns[] = { insn }; \ + int __i; \ + for (__i = 0; __i < ARRAY_SIZE(__insns); __i++) \ + insns[cur_pos + __i] = __insns[__i]; \ + cur_pos += __i; \ +}) +#define JMP_TO(target) (target - cur_pos - 1) + int cur_pos = 0, exit_pos, fd, op_code; + struct bpf_insn insns[64]; + LIBBPF_OPTS(bpf_prog_load_opts, opts, + .log_level = 2, + .log_buf = log_buf, + .log_size = log_sz, + ); + + /* ; skip exit block below + * goto +2; + */ + emit(BPF_JMP_A(2)); + exit_pos = cur_pos; + /* ; exit block for all the preparatory conditionals + * out: + * r0 = 0; + * exit; + */ + emit(BPF_MOV64_IMM(BPF_REG_0, 0)); + emit(BPF_EXIT_INSN()); + /* + * ; assign r6/w6 and r7/w7 unpredictable u64/u32 value + * call bpf_get_current_pid_tgid; + * r6 = r0; | w6 = w0; + * call bpf_get_current_pid_tgid; + * r7 = r0; | w7 = w0; + */ + emit(BPF_EMIT_CALL(BPF_FUNC_get_current_pid_tgid)); + if (spec.init_subregs) + emit(BPF_MOV32_REG(BPF_REG_6, BPF_REG_0)); + else + emit(BPF_MOV64_REG(BPF_REG_6, BPF_REG_0)); + emit(BPF_EMIT_CALL(BPF_FUNC_get_current_pid_tgid)); + if (spec.init_subregs) + emit(BPF_MOV32_REG(BPF_REG_7, BPF_REG_0)); + else + emit(BPF_MOV64_REG(BPF_REG_7, BPF_REG_0)); + /* ; setup initial r6/w6 possible value range ([x.a, x.b]) + * r1 = %[x.a] ll; | w1 = %[x.a]; + * r2 = %[x.b] ll; | w2 = %[x.b]; + * if r6 < r1 goto out; | if w6 < w1 goto out; + * if r6 > r2 goto out; | if w6 > w2 goto out; + */ + if (spec.setup_subregs) { + emit(BPF_MOV32_IMM(BPF_REG_1, (s32)x.a)); + emit(BPF_MOV32_IMM(BPF_REG_2, (s32)x.b)); + emit(BPF_JMP32_REG(spec.setup_signed ? BPF_JSLT : BPF_JLT, + BPF_REG_6, BPF_REG_1, JMP_TO(exit_pos))); + emit(BPF_JMP32_REG(spec.setup_signed ? BPF_JSGT : BPF_JGT, + BPF_REG_6, BPF_REG_2, JMP_TO(exit_pos))); + } else { + emit(BPF_LD_IMM64(BPF_REG_1, x.a)); + emit(BPF_LD_IMM64(BPF_REG_2, x.b)); + emit(BPF_JMP_REG(spec.setup_signed ? BPF_JSLT : BPF_JLT, + BPF_REG_6, BPF_REG_1, JMP_TO(exit_pos))); + emit(BPF_JMP_REG(spec.setup_signed ? BPF_JSGT : BPF_JGT, + BPF_REG_6, BPF_REG_2, JMP_TO(exit_pos))); + } + /* ; setup initial r7/w7 possible value range ([y.a, y.b]) + * r1 = %[y.a] ll; | w1 = %[y.a]; + * r2 = %[y.b] ll; | w2 = %[y.b]; + * if r7 < r1 goto out; | if w7 < w1 goto out; + * if r7 > r2 goto out; | if w7 > w2 goto out; + */ + if (spec.setup_subregs) { + emit(BPF_MOV32_IMM(BPF_REG_1, (s32)y.a)); + emit(BPF_MOV32_IMM(BPF_REG_2, (s32)y.b)); + emit(BPF_JMP32_REG(spec.setup_signed ? BPF_JSLT : BPF_JLT, + BPF_REG_7, BPF_REG_1, JMP_TO(exit_pos))); + emit(BPF_JMP32_REG(spec.setup_signed ? BPF_JSGT : BPF_JGT, + BPF_REG_7, BPF_REG_2, JMP_TO(exit_pos))); + } else { + emit(BPF_LD_IMM64(BPF_REG_1, y.a)); + emit(BPF_LD_IMM64(BPF_REG_2, y.b)); + emit(BPF_JMP_REG(spec.setup_signed ? BPF_JSLT : BPF_JLT, + BPF_REG_7, BPF_REG_1, JMP_TO(exit_pos))); + emit(BPF_JMP_REG(spec.setup_signed ? BPF_JSGT : BPF_JGT, + BPF_REG_7, BPF_REG_2, JMP_TO(exit_pos))); + } + /* ; range test instruction + * if r6 r7 goto +3; | if w6 w7 goto +3; + */ + switch (op) { + case OP_LT: op_code = spec.compare_signed ? BPF_JSLT : BPF_JLT; break; + case OP_LE: op_code = spec.compare_signed ? BPF_JSLE : BPF_JLE; break; + case OP_GT: op_code = spec.compare_signed ? BPF_JSGT : BPF_JGT; break; + case OP_GE: op_code = spec.compare_signed ? BPF_JSGE : BPF_JGE; break; + case OP_EQ: op_code = BPF_JEQ; break; + case OP_NE: op_code = BPF_JNE; break; + default: + printf("unrecognized op %d\n", op); + return -ENOTSUP; + } + /* ; BEFORE conditional, r0/w0 = {r6/w6,r7/w7} is to extract verifier state reliably + * ; this is used for debugging, as verifier doesn't always print + * ; registers states as of condition jump instruction (e.g., when + * ; precision marking happens) + * r0 = r6; | w0 = w6; + * r0 = r7; | w0 = w7; + */ + if (spec.compare_subregs) { + emit(BPF_MOV32_REG(BPF_REG_0, BPF_REG_6)); + emit(BPF_MOV32_REG(BPF_REG_0, BPF_REG_7)); + } else { + emit(BPF_MOV64_REG(BPF_REG_0, BPF_REG_6)); + emit(BPF_MOV64_REG(BPF_REG_0, BPF_REG_7)); + } + if (spec.compare_subregs) + emit(BPF_JMP32_REG(op_code, BPF_REG_6, BPF_REG_7, 3)); + else + emit(BPF_JMP_REG(op_code, BPF_REG_6, BPF_REG_7, 3)); + /* ; FALSE branch, r0/w0 = {r6/w6,r7/w7} is to extract verifier state reliably + * r0 = r6; | w0 = w6; + * r0 = r7; | w0 = w7; + * exit; + */ + *false_pos = cur_pos; + if (spec.compare_subregs) { + emit(BPF_MOV32_REG(BPF_REG_0, BPF_REG_6)); + emit(BPF_MOV32_REG(BPF_REG_0, BPF_REG_7)); + } else { + emit(BPF_MOV64_REG(BPF_REG_0, BPF_REG_6)); + emit(BPF_MOV64_REG(BPF_REG_0, BPF_REG_7)); + } + if (branch_taken == 1) /* false branch is never taken */ + emit(BPF_EMIT_CALL(0xDEAD)); /* poison this branch */ + else + emit(BPF_EXIT_INSN()); + /* ; TRUE branch, r0/w0 = {r6/w6,r7/w7} is to extract verifier state reliably + * r0 = r6; | w0 = w6; + * r0 = r7; | w0 = w7; + * exit; + */ + *true_pos = cur_pos; + if (spec.compare_subregs) { + emit(BPF_MOV32_REG(BPF_REG_0, BPF_REG_6)); + emit(BPF_MOV32_REG(BPF_REG_0, BPF_REG_7)); + } else { + emit(BPF_MOV64_REG(BPF_REG_0, BPF_REG_6)); + emit(BPF_MOV64_REG(BPF_REG_0, BPF_REG_7)); + } + if (branch_taken == 0) /* true branch is never taken */ + emit(BPF_EMIT_CALL(0xDEAD)); /* poison this branch */ + emit(BPF_EXIT_INSN()); /* last instruction has to be exit */ + + fd = bpf_prog_load(BPF_PROG_TYPE_RAW_TRACEPOINT, "reg_bounds_test", + "GPL", insns, cur_pos, &opts); + if (fd < 0) + return fd; + + close(fd); + return 0; +#undef emit +#undef JMP_TO +} + +#define str_has_pfx(str, pfx) (strncmp(str, pfx, strlen(pfx)) == 0) + +/* Parse register state from verifier log. + * `s` should point to the start of "Rx = ..." substring in the verifier log. + */ +static int parse_reg_state(const char *s, struct reg_state *reg) +{ + /* There are two generic forms for SCALAR register: + * - known constant: R6_rwD=P%lld + * - range: R6_rwD=scalar(id=1,...), where "..." is a comma-separated + * list of optional range specifiers: + * - umin=%llu, if missing, assumed 0; + * - umax=%llu, if missing, assumed U64_MAX; + * - smin=%lld, if missing, assumed S64_MIN; + * - smax=%lld, if missing, assummed S64_MAX; + * - umin32=%d, if missing, assumed 0; + * - umax32=%d, if missing, assumed U32_MAX; + * - smin32=%d, if missing, assumed S32_MIN; + * - smax32=%d, if missing, assummed S32_MAX; + * - var_off=(%#llx; %#llx), tnum part, we don't care about it. + * + * If some of the values are equal, they will be grouped (but min/max + * are not mixed together, and similarly negative values are not + * grouped with non-negative ones). E.g.: + * + * R6_w=Pscalar(smin=smin32=0, smax=umax=umax32=1000) + * + * _rwD part is optional (and any of the letters can be missing). + * P (precision mark) is optional as well. + * + * Anything inside scalar() is optional, including id, of course. + */ + struct { + const char *pfx; + const char *fmt; + u64 *dst, def; + bool is_32, is_set; + } *f, fields[8] = { + {"smin=", "%lld", ®->r[S64].a, S64_MIN}, + {"smax=", "%lld", ®->r[S64].b, S64_MAX}, + {"umin=", "%llu", ®->r[U64].a, 0}, + {"umax=", "%llu", ®->r[U64].b, U64_MAX}, + {"smin32=", "%lld", ®->r[S32].a, (u32)S32_MIN, true}, + {"smax32=", "%lld", ®->r[S32].b, (u32)S32_MAX, true}, + {"umin32=", "%llu", ®->r[U32].a, 0, true}, + {"umax32=", "%llu", ®->r[U32].b, U32_MAX, true}, + }; + const char *p, *fmt; + int i; + + p = strchr(s, '='); + if (!p) + return -EINVAL; + p++; + if (*p == 'P') + p++; + + if (!str_has_pfx(p, "scalar(")) { + long long sval; + enum num_t t; + + if (sscanf(p, "%lld", &sval) != 1) + return -EINVAL; + + reg->valid = true; + for (t = first_t; t <= last_t; t++) { + reg->r[t] = range(t, sval, sval); + } + return 0; + } + + p += sizeof("scalar"); + while (p) { + int midxs[ARRAY_SIZE(fields)], mcnt = 0; + u64 val; + + for (i = 0; i < ARRAY_SIZE(fields); i++) { + f = &fields[i]; + if (!str_has_pfx(p, f->pfx)) + continue; + midxs[mcnt++] = i; + p += strlen(f->pfx); + } + + if (mcnt) { + /* populate all matched fields */ + fmt = fields[midxs[0]].fmt; + if (sscanf(p, fmt, &val) != 1) + return -EINVAL; + + for (i = 0; i < mcnt; i++) { + f = &fields[midxs[i]]; + f->is_set = true; + *f->dst = f->is_32 ? (u64)(u32)val : val; + } + } else if (str_has_pfx(p, "var_off")) { + /* skip "var_off=(0x0; 0x3f)" part completely */ + p = strchr(p, ')'); + if (!p) + return -EINVAL; + p++; + } + + p = strpbrk(p, ",)"); + if (*p == ')') + break; + if (p) + p++; + } + + reg->valid = true; + + for (i = 0; i < ARRAY_SIZE(fields); i++) { + f = &fields[i]; + if (!f->is_set) + *f->dst = f->def; + } + + return 0; +} + + +/* Parse all register states (TRUE/FALSE branches and DST/SRC registers) + * out of the verifier log for a corresponding test case BPF program. + */ +static int parse_range_cmp_log(const char *log_buf, struct case_spec spec, + int false_pos, int true_pos, + struct reg_state *false1_reg, struct reg_state *false2_reg, + struct reg_state *true1_reg, struct reg_state *true2_reg) +{ + struct { + int insn_idx; + int reg_idx; + const char *reg_upper; + struct reg_state *state; + } specs[] = { + {false_pos, 6, "R6=", false1_reg}, + {false_pos + 1, 7, "R7=", false2_reg}, + {true_pos, 6, "R6=", true1_reg}, + {true_pos + 1, 7, "R7=", true2_reg}, + }; + char buf[32]; + const char *p = log_buf, *q; + int i, err; + + for (i = 0; i < 4; i++) { + sprintf(buf, "%d: (%s) %s = %s%d", specs[i].insn_idx, + spec.compare_subregs ? "bc" : "bf", + spec.compare_subregs ? "w0" : "r0", + spec.compare_subregs ? "w" : "r", specs[i].reg_idx); + + q = strstr(p, buf); + if (!q) { + *specs[i].state = (struct reg_state){.valid = false}; + continue; + } + p = strstr(q, specs[i].reg_upper); + if (!p) + return -EINVAL; + err = parse_reg_state(p, specs[i].state); + if (err) + return -EINVAL; + } + return 0; +} + +/* Validate ranges match, and print details if they don't */ +static bool assert_range_eq(enum num_t t, struct range x, struct range y, + const char *ctx1, const char *ctx2) +{ + DEFINE_STRBUF(sb, 512); + + if (range_eq(x, y)) + return true; + + snappendf(sb, "MISMATCH %s.%s: ", ctx1, ctx2); + snprintf_range(t, sb, x); + snappendf(sb, " != "); + snprintf_range(t, sb, y); + + printf("%s\n", sb->buf); + + return false; +} + +/* Validate that register states match, and print details if they don't */ +static bool assert_reg_state_eq(struct reg_state *r, struct reg_state *e, const char *ctx) +{ + bool ok = true; + enum num_t t; + + if (r->valid != e->valid) { + printf("MISMATCH %s: actual %s != expected %s\n", ctx, + r->valid ? "" : "", + e->valid ? "" : ""); + return false; + } + + if (!r->valid) + return true; + + for (t = first_t; t <= last_t; t++) { + if (!assert_range_eq(t, r->r[t], e->r[t], ctx, t_str(t))) + ok = false; + } + + return ok; +} + +/* Printf verifier log, filtering out irrelevant noise */ +static void print_verifier_log(const char *buf) +{ + const char *p; + + while (buf[0]) { + p = strchrnul(buf, '\n'); + + /* filter out irrelevant precision backtracking logs */ + if (str_has_pfx(buf, "mark_precise: ")) + goto skip_line; + + printf("%.*s\n", (int)(p - buf), buf); + +skip_line: + buf = *p == '\0' ? p : p + 1; + } +} + +/* Simulate provided test case purely with our own range-based logic. + * This is done to set up expectations for verifier's branch_taken logic and + * verifier's register states in the verifier log. + */ +static void sim_case(enum num_t init_t, enum num_t cond_t, + struct range x, struct range y, enum op op, + struct reg_state *fr1, struct reg_state *fr2, + struct reg_state *tr1, struct reg_state *tr2, + int *branch_taken) +{ + const u64 A = x.a; + const u64 B = x.b; + const u64 C = y.a; + const u64 D = y.b; + struct reg_state rc; + enum op rev_op = complement_op(op); + enum num_t t; + + fr1->valid = fr2->valid = true; + tr1->valid = tr2->valid = true; + for (t = first_t; t <= last_t; t++) { + /* if we are initializing using 32-bit subregisters, + * full registers get upper 32 bits zeroed automatically + */ + struct range z = t_is_32(init_t) ? unkn_subreg(t) : unkn[t]; + + fr1->r[t] = fr2->r[t] = tr1->r[t] = tr2->r[t] = z; + } + + /* step 1: r1 >= A, r2 >= C */ + reg_state_set_const(&rc, init_t, A); + reg_state_cond(init_t, fr1, &rc, OP_GE, fr1, NULL, "r1>=A"); + reg_state_set_const(&rc, init_t, C); + reg_state_cond(init_t, fr2, &rc, OP_GE, fr2, NULL, "r2>=C"); + *tr1 = *fr1; + *tr2 = *fr2; + if (env.verbosity >= VERBOSE_VERY) { + printf("STEP1 (%s) R1: ", t_str(init_t)); print_reg_state(fr1, "\n"); + printf("STEP1 (%s) R2: ", t_str(init_t)); print_reg_state(fr2, "\n"); + } + + /* step 2: r1 <= B, r2 <= D */ + reg_state_set_const(&rc, init_t, B); + reg_state_cond(init_t, fr1, &rc, OP_LE, fr1, NULL, "r1<=B"); + reg_state_set_const(&rc, init_t, D); + reg_state_cond(init_t, fr2, &rc, OP_LE, fr2, NULL, "r2<=D"); + *tr1 = *fr1; + *tr2 = *fr2; + if (env.verbosity >= VERBOSE_VERY) { + printf("STEP2 (%s) R1: ", t_str(init_t)); print_reg_state(fr1, "\n"); + printf("STEP2 (%s) R2: ", t_str(init_t)); print_reg_state(fr2, "\n"); + } + + /* step 3: r1 r2 */ + *branch_taken = reg_state_branch_taken_op(cond_t, fr1, fr2, op); + fr1->valid = fr2->valid = false; + tr1->valid = tr2->valid = false; + if (*branch_taken != 1) { /* FALSE is possible */ + fr1->valid = fr2->valid = true; + reg_state_cond(cond_t, fr1, fr2, rev_op, fr1, fr2, "FALSE"); + } + if (*branch_taken != 0) { /* TRUE is possible */ + tr1->valid = tr2->valid = true; + reg_state_cond(cond_t, tr1, tr2, op, tr1, tr2, "TRUE"); + } + if (env.verbosity >= VERBOSE_VERY) { + printf("STEP3 (%s) FALSE R1:", t_str(cond_t)); print_reg_state(fr1, "\n"); + printf("STEP3 (%s) FALSE R2:", t_str(cond_t)); print_reg_state(fr2, "\n"); + printf("STEP3 (%s) TRUE R1:", t_str(cond_t)); print_reg_state(tr1, "\n"); + printf("STEP3 (%s) TRUE R2:", t_str(cond_t)); print_reg_state(tr2, "\n"); + } +} + +/* =============================== + * HIGH-LEVEL TEST CASE VALIDATION + * =============================== + */ +static u32 upper_seeds[] = { + 0, + 1, + U32_MAX, + U32_MAX - 1, + S32_MAX, + (u32)S32_MIN, +}; + +static u32 lower_seeds[] = { + 0, + 1, + 2, (u32)-2, + 255, (u32)-255, + UINT_MAX, + UINT_MAX - 1, + INT_MAX, + (u32)INT_MIN, +}; + +struct ctx { + int val_cnt, subval_cnt, range_cnt, subrange_cnt; + u64 uvals[ARRAY_SIZE(upper_seeds) * ARRAY_SIZE(lower_seeds)]; + s64 svals[ARRAY_SIZE(upper_seeds) * ARRAY_SIZE(lower_seeds)]; + u32 usubvals[ARRAY_SIZE(lower_seeds)]; + s32 ssubvals[ARRAY_SIZE(lower_seeds)]; + struct range *uranges, *sranges; + struct range *usubranges, *ssubranges; + int max_failure_cnt, cur_failure_cnt; + int total_case_cnt, case_cnt; + __u64 start_ns; + char progress_ctx[32]; +}; + +static void cleanup_ctx(struct ctx *ctx) +{ + free(ctx->uranges); + free(ctx->sranges); + free(ctx->usubranges); + free(ctx->ssubranges); +} + +struct subtest_case { + enum num_t init_t; + enum num_t cond_t; + struct range x; + struct range y; + enum op op; +}; + +static void subtest_case_str(struct strbuf *sb, struct subtest_case *t) +{ + snappendf(sb, "(%s)", t_str(t->init_t)); + snprintf_range(t->init_t, sb, t->x); + snappendf(sb, " (%s)%s ", t_str(t->cond_t), op_str(t->op)); + snprintf_range(t->init_t, sb, t->y); +} + +/* Generate and validate test case based on specific combination of setup + * register ranges (including their expected num_t domain), and conditional + * operation to perform (including num_t domain in which it has to be + * performed) + */ +static int verify_case_op(enum num_t init_t, enum num_t cond_t, + struct range x, struct range y, enum op op) +{ + char log_buf[256 * 1024]; + size_t log_sz = sizeof(log_buf); + int err, false_pos = 0, true_pos = 0, branch_taken; + struct reg_state fr1, fr2, tr1, tr2; + struct reg_state fe1, fe2, te1, te2; + bool failed = false; + struct case_spec spec = { + .init_subregs = (init_t == U32 || init_t == S32), + .setup_subregs = (init_t == U32 || init_t == S32), + .setup_signed = (init_t == S64 || init_t == S32), + .compare_subregs = (cond_t == U32 || cond_t == S32), + .compare_signed = (cond_t == S64 || cond_t == S32), + }; + + log_buf[0] = '\0'; + + sim_case(init_t, cond_t, x, y, op, &fe1, &fe2, &te1, &te2, &branch_taken); + + err = load_range_cmp_prog(x, y, op, branch_taken, spec, + log_buf, log_sz, &false_pos, &true_pos); + if (err) { + ASSERT_OK(err, "load_range_cmp_prog"); + failed = true; + } + + err = parse_range_cmp_log(log_buf, spec, false_pos, true_pos, + &fr1, &fr2, &tr1, &tr2); + if (err) { + ASSERT_OK(err, "parse_range_cmp_log"); + failed = true; + } + + if (!assert_reg_state_eq(&fr1, &fe1, "false_reg1") || + !assert_reg_state_eq(&fr2, &fe2, "false_reg2") || + !assert_reg_state_eq(&tr1, &te1, "true_reg1") || + !assert_reg_state_eq(&tr2, &te2, "true_reg2")) { + failed = true; + } + + if (failed || env.verbosity >= VERBOSE_NORMAL) { + if (failed || env.verbosity >= VERBOSE_VERY) { + printf("VERIFIER LOG:\n========================\n"); + print_verifier_log(log_buf); + printf("=====================\n"); + } + printf("ACTUAL FALSE1: "); print_reg_state(&fr1, "\n"); + printf("EXPECTED FALSE1: "); print_reg_state(&fe1, "\n"); + printf("ACTUAL FALSE2: "); print_reg_state(&fr2, "\n"); + printf("EXPECTED FALSE2: "); print_reg_state(&fe2, "\n"); + printf("ACTUAL TRUE1: "); print_reg_state(&tr1, "\n"); + printf("EXPECTED TRUE1: "); print_reg_state(&te1, "\n"); + printf("ACTUAL TRUE2: "); print_reg_state(&tr2, "\n"); + printf("EXPECTED TRUE2: "); print_reg_state(&te2, "\n"); + + return failed ? -EINVAL : 0; + } + + return 0; +} + +/* Given setup ranges and number types, go over all supported operations, + * generating individual subtest for each allowed combination + */ +static int verify_case(struct ctx *ctx, enum num_t init_t, enum num_t cond_t, + struct range x, struct range y) +{ + DEFINE_STRBUF(sb, 256); + int err; + struct subtest_case sub = { + .init_t = init_t, + .cond_t = cond_t, + .x = x, + .y = y, + }; + + for (sub.op = first_op; sub.op <= last_op; sub.op++) { + sb->pos = 0; /* reset position in strbuf */ + subtest_case_str(sb, &sub); + if (!test__start_subtest(sb->buf)) + continue; + + if (env.verbosity >= VERBOSE_NORMAL) /* this speeds up debugging */ + printf("TEST CASE: %s\n", sb->buf); + + err = verify_case_op(init_t, cond_t, x, y, sub.op); + if (err || env.verbosity >= VERBOSE_NORMAL) + ASSERT_OK(err, sb->buf); + if (err) { + ctx->cur_failure_cnt++; + if (ctx->cur_failure_cnt > ctx->max_failure_cnt) + return err; + return 0; /* keep testing other cases */ + } + ctx->case_cnt++; + if ((ctx->case_cnt % 10000) == 0) { + double progress = (ctx->case_cnt + 0.0) / ctx->total_case_cnt; + u64 elapsed_ns = get_time_ns() - ctx->start_ns; + double remain_ns = elapsed_ns / progress * (1 - progress); + + fprintf(env.stderr, "PROGRESS (%s): %d/%d (%.2lf%%), " + "elapsed %llu mins (%.2lf hrs), " + "ETA %.0lf mins (%.2lf hrs)\n", + ctx->progress_ctx, + ctx->case_cnt, ctx->total_case_cnt, 100.0 * progress, + elapsed_ns / 1000000000 / 60, + elapsed_ns / 1000000000.0 / 3600, + remain_ns / 1000000000.0 / 60, + remain_ns / 1000000000.0 / 3600); + } + } + + return 0; +} + +/* ================================ + * GENERATED CASES FROM SEED VALUES + * ================================ + */ +static int u64_cmp(const void *p1, const void *p2) +{ + u64 x1 = *(const u64 *)p1, x2 = *(const u64 *)p2; + + return x1 != x2 ? (x1 < x2 ? -1 : 1) : 0; +} + +static int u32_cmp(const void *p1, const void *p2) +{ + u32 x1 = *(const u32 *)p1, x2 = *(const u32 *)p2; + + return x1 != x2 ? (x1 < x2 ? -1 : 1) : 0; +} + +static int s64_cmp(const void *p1, const void *p2) +{ + s64 x1 = *(const s64 *)p1, x2 = *(const s64 *)p2; + + return x1 != x2 ? (x1 < x2 ? -1 : 1) : 0; +} + +static int s32_cmp(const void *p1, const void *p2) +{ + s32 x1 = *(const s32 *)p1, x2 = *(const s32 *)p2; + + return x1 != x2 ? (x1 < x2 ? -1 : 1) : 0; +} + +/* Generate valid unique constants from seeds, both signed and unsigned */ +static void gen_vals(struct ctx *ctx) +{ + int i, j, cnt = 0; + + for (i = 0; i < ARRAY_SIZE(upper_seeds); i++) { + for (j = 0; j < ARRAY_SIZE(lower_seeds); j++) { + ctx->uvals[cnt++] = (((u64)upper_seeds[i]) << 32) | lower_seeds[j]; + } + } + + /* sort and compact uvals (i.e., it's `sort | uniq`) */ + qsort(ctx->uvals, cnt, sizeof(*ctx->uvals), u64_cmp); + for (i = 1, j = 0; i < cnt; i++) { + if (ctx->uvals[j] == ctx->uvals[i]) + continue; + j++; + ctx->uvals[j] = ctx->uvals[i]; + } + ctx->val_cnt = j + 1; + + /* we have exactly the same number of s64 values, they are just in + * a different order than u64s, so just sort them differently + */ + for (i = 0; i < ctx->val_cnt; i++) + ctx->svals[i] = ctx->uvals[i]; + qsort(ctx->svals, ctx->val_cnt, sizeof(*ctx->svals), s64_cmp); + + if (env.verbosity >= VERBOSE_SUPER) { + DEFINE_STRBUF(sb1, 256); + DEFINE_STRBUF(sb2, 256); + + for (i = 0; i < ctx->val_cnt; i++) { + sb1->pos = sb2->pos = 0; + snprintf_num(U64, sb1, ctx->uvals[i]); + snprintf_num(S64, sb2, ctx->svals[i]); + printf("SEED #%d: u64=%-20s s64=%-20s\n", i, sb1->buf, sb2->buf); + } + } + + /* 32-bit values are generated separately */ + cnt = 0; + for (i = 0; i < ARRAY_SIZE(lower_seeds); i++) { + ctx->usubvals[cnt++] = lower_seeds[i]; + } + + /* sort and compact usubvals (i.e., it's `sort | uniq`) */ + qsort(ctx->usubvals, cnt, sizeof(*ctx->usubvals), u32_cmp); + for (i = 1, j = 0; i < cnt; i++) { + if (ctx->usubvals[j] == ctx->usubvals[i]) + continue; + j++; + ctx->usubvals[j] = ctx->usubvals[i]; + } + ctx->subval_cnt = j + 1; + + for (i = 0; i < ctx->subval_cnt; i++) + ctx->ssubvals[i] = ctx->usubvals[i]; + qsort(ctx->ssubvals, ctx->subval_cnt, sizeof(*ctx->ssubvals), s32_cmp); + + if (env.verbosity >= VERBOSE_SUPER) { + DEFINE_STRBUF(sb1, 256); + DEFINE_STRBUF(sb2, 256); + + for (i = 0; i < ctx->subval_cnt; i++) { + sb1->pos = sb2->pos = 0; + snprintf_num(U32, sb1, ctx->usubvals[i]); + snprintf_num(S32, sb2, ctx->ssubvals[i]); + printf("SUBSEED #%d: u32=%-10s s32=%-10s\n", i, sb1->buf, sb2->buf); + } + } +} + +/* Generate valid ranges from upper/lower seeds */ +static int gen_ranges(struct ctx *ctx) +{ + int i, j, cnt = 0; + + for (i = 0; i < ctx->val_cnt; i++) { + for (j = i; j < ctx->val_cnt; j++) { + if (env.verbosity >= VERBOSE_SUPER) { + DEFINE_STRBUF(sb1, 256); + DEFINE_STRBUF(sb2, 256); + + sb1->pos = sb2->pos = 0; + snprintf_range(U64, sb1, range(U64, ctx->uvals[i], ctx->uvals[j])); + snprintf_range(S64, sb2, range(S64, ctx->svals[i], ctx->svals[j])); + printf("RANGE #%d: u64=%-40s s64=%-40s\n", cnt, sb1->buf, sb2->buf); + } + cnt++; + } + } + ctx->range_cnt = cnt; + + ctx->uranges = calloc(ctx->range_cnt, sizeof(*ctx->uranges)); + if (!ASSERT_OK_PTR(ctx->uranges, "uranges_calloc")) + return -EINVAL; + ctx->sranges = calloc(ctx->range_cnt, sizeof(*ctx->sranges)); + if (!ASSERT_OK_PTR(ctx->sranges, "sranges_calloc")) + return -EINVAL; + + cnt = 0; + for (i = 0; i < ctx->val_cnt; i++) { + for (j = i; j < ctx->val_cnt; j++) { + ctx->uranges[cnt] = range(U64, ctx->uvals[i], ctx->uvals[j]); + ctx->sranges[cnt] = range(S64, ctx->svals[i], ctx->svals[j]); + cnt++; + } + } + + cnt = 0; + for (i = 0; i < ctx->subval_cnt; i++) { + for (j = i; j < ctx->subval_cnt; j++) { + if (env.verbosity >= VERBOSE_SUPER) { + DEFINE_STRBUF(sb1, 256); + DEFINE_STRBUF(sb2, 256); + + sb1->pos = sb2->pos = 0; + snprintf_range(U32, sb1, range(U32, ctx->usubvals[i], ctx->usubvals[j])); + snprintf_range(S32, sb2, range(S32, ctx->ssubvals[i], ctx->ssubvals[j])); + printf("SUBRANGE #%d: u32=%-20s s32=%-20s\n", cnt, sb1->buf, sb2->buf); + } + cnt++; + } + } + ctx->subrange_cnt = cnt; + + ctx->usubranges = calloc(ctx->subrange_cnt, sizeof(*ctx->usubranges)); + if (!ASSERT_OK_PTR(ctx->usubranges, "usubranges_calloc")) + return -EINVAL; + ctx->ssubranges = calloc(ctx->subrange_cnt, sizeof(*ctx->ssubranges)); + if (!ASSERT_OK_PTR(ctx->ssubranges, "ssubranges_calloc")) + return -EINVAL; + + cnt = 0; + for (i = 0; i < ctx->subval_cnt; i++) { + for (j = i; j < ctx->subval_cnt; j++) { + ctx->usubranges[cnt] = range(U32, ctx->usubvals[i], ctx->usubvals[j]); + ctx->ssubranges[cnt] = range(S32, ctx->ssubvals[i], ctx->ssubvals[j]); + cnt++; + } + } + + return 0; +} + +static int parse_env_vars(struct ctx *ctx) +{ + const char *s; + + if (!(s = getenv("SLOW_TESTS")) || strcmp(s, "1") != 0) { + test__skip(); + return -ENOTSUP; + } + + if ((s = getenv("REG_BOUNDS_MAX_FAILURE_CNT"))) { + errno = 0; + ctx->max_failure_cnt = strtol(s, NULL, 10); + if (errno || ctx->max_failure_cnt < 0) { + ASSERT_OK(-errno, "REG_BOUNDS_MAX_FAILURE_CNT"); + return -EINVAL; + } + } + + return 0; +} + +static int prepare_gen_tests(struct ctx *ctx) +{ + int err; + + err = parse_env_vars(ctx); + if (err) + return err; + + gen_vals(ctx); + err = gen_ranges(ctx); + if (err) { + ASSERT_OK(err, "gen_ranges"); + return err; + } + + return 0; +} + +/* Go over generated constants and ranges and validate various supported + * combinations of them + */ +static void validate_gen_range_vs_const_64(enum num_t init_t, enum num_t cond_t) +{ + struct ctx ctx; + struct range rconst; + const struct range *ranges; + const u64 *vals; + int i, j; + + memset(&ctx, 0, sizeof(ctx)); + + if (prepare_gen_tests(&ctx)) + goto cleanup; + + ranges = init_t == U64 ? ctx.uranges : ctx.sranges; + vals = init_t == U64 ? ctx.uvals : (const u64 *)ctx.svals; + + ctx.total_case_cnt = (last_op - first_op + 1) * (2 * ctx.range_cnt * ctx.val_cnt); + ctx.start_ns = get_time_ns(); + snprintf(ctx.progress_ctx, sizeof(ctx.progress_ctx), + "RANGE x CONST, %s -> %s", + t_str(init_t), t_str(cond_t)); + + for (i = 0; i < ctx.val_cnt; i++) { + for (j = 0; j < ctx.range_cnt; j++) { + rconst = range(init_t, vals[i], vals[i]); + + /* (u64|s64)( x ) */ + if (verify_case(&ctx, init_t, cond_t, ranges[j], rconst)) + goto cleanup; + /* (u64|s64)( x ) */ + if (verify_case(&ctx, init_t, cond_t, rconst, ranges[j])) + goto cleanup; + } + } + +cleanup: + cleanup_ctx(&ctx); +} + +static void validate_gen_range_vs_const_32(enum num_t init_t, enum num_t cond_t) +{ + struct ctx ctx; + struct range rconst; + const struct range *ranges; + const u32 *vals; + int i, j; + + memset(&ctx, 0, sizeof(ctx)); + + if (prepare_gen_tests(&ctx)) + goto cleanup; + + ranges = init_t == U32 ? ctx.usubranges : ctx.ssubranges; + vals = init_t == U32 ? ctx.usubvals : (const u32 *)ctx.ssubvals; + + ctx.total_case_cnt = (last_op - first_op + 1) * (2 * ctx.subrange_cnt * ctx.subval_cnt); + ctx.start_ns = get_time_ns(); + snprintf(ctx.progress_ctx, sizeof(ctx.progress_ctx), + "RANGE x CONST, %s -> %s", + t_str(init_t), t_str(cond_t)); + + for (i = 0; i < ctx.subval_cnt; i++) { + for (j = 0; j < ctx.subrange_cnt; j++) { + rconst = range(init_t, vals[i], vals[i]); + + /* (u32|s32)( x ) */ + if (verify_case(&ctx, init_t, cond_t, ranges[j], rconst)) + goto cleanup; + /* (u32|s32)( x ) */ + if (verify_case(&ctx, init_t, cond_t, rconst, ranges[j])) + goto cleanup; + } + } + +cleanup: + cleanup_ctx(&ctx); +} + +/* Go over thousands of test cases generated from initial seed values. + * Given this take a long time, guard this begind SLOW_TESTS=1 envvar. If + * envvar is not set, this test is skipped during test_progs testing. + * + * We split this up into smaller subsets based on initialization and + * conditiona numeric domains to get an easy parallelization with test_progs' + * -j argument. + */ + +/* RANGE x CONST, U64 initial range */ +void test_reg_bounds_gen_consts_u64_u64(void) { validate_gen_range_vs_const_64(U64, U64); } +void test_reg_bounds_gen_consts_u64_s64(void) { validate_gen_range_vs_const_64(U64, S64); } +void test_reg_bounds_gen_consts_u64_u32(void) { validate_gen_range_vs_const_64(U64, U32); } +void test_reg_bounds_gen_consts_u64_s32(void) { validate_gen_range_vs_const_64(U64, S32); } +/* RANGE x CONST, S64 initial range */ +void test_reg_bounds_gen_consts_s64_u64(void) { validate_gen_range_vs_const_64(S64, U64); } +void test_reg_bounds_gen_consts_s64_s64(void) { validate_gen_range_vs_const_64(S64, S64); } +void test_reg_bounds_gen_consts_s64_u32(void) { validate_gen_range_vs_const_64(S64, U32); } +void test_reg_bounds_gen_consts_s64_s32(void) { validate_gen_range_vs_const_64(S64, S32); } +/* RANGE x CONST, U32 initial range */ +void test_reg_bounds_gen_consts_u32_u64(void) { validate_gen_range_vs_const_32(U32, U64); } +void test_reg_bounds_gen_consts_u32_s64(void) { validate_gen_range_vs_const_32(U32, S64); } +void test_reg_bounds_gen_consts_u32_u32(void) { validate_gen_range_vs_const_32(U32, U32); } +void test_reg_bounds_gen_consts_u32_s32(void) { validate_gen_range_vs_const_32(U32, S32); } +/* RANGE x CONST, S32 initial range */ +void test_reg_bounds_gen_consts_s32_u64(void) { validate_gen_range_vs_const_32(S32, U64); } +void test_reg_bounds_gen_consts_s32_s64(void) { validate_gen_range_vs_const_32(S32, S64); } +void test_reg_bounds_gen_consts_s32_u32(void) { validate_gen_range_vs_const_32(S32, U32); } +void test_reg_bounds_gen_consts_s32_s32(void) { validate_gen_range_vs_const_32(S32, S32); } + +/* A set of hard-coded "interesting" cases to validate as part of normal + * test_progs test runs + */ +static struct subtest_case crafted_cases[] = { + {U64, U64, {0, 0xffffffff}, {0, 0}}, + {U64, U64, {0, 0x80000000}, {0, 0}}, + {U64, U64, {0x100000000ULL, 0x100000100ULL}, {0, 0}}, + {U64, U64, {0x100000000ULL, 0x180000000ULL}, {0, 0}}, + {U64, U64, {0x100000000ULL, 0x1ffffff00ULL}, {0, 0}}, + {U64, U64, {0x100000000ULL, 0x1ffffff01ULL}, {0, 0}}, + {U64, U64, {0x100000000ULL, 0x1fffffffeULL}, {0, 0}}, + {U64, U64, {0x100000001ULL, 0x1000000ffULL}, {0, 0}}, + + {U64, S64, {0, 0xffffffff00000000ULL}, {0, 0}}, + {U64, S64, {0x7fffffffffffffffULL, 0xffffffff00000000ULL}, {0, 0}}, + {U64, S64, {0x7fffffff00000001ULL, 0xffffffff00000000ULL}, {0, 0}}, + {U64, S64, {0, 0xffffffffULL}, {1, 1}}, + {U64, S64, {0, 0xffffffffULL}, {0x7fffffff, 0x7fffffff}}, + + {U64, U32, {0, 0x100000000}, {0, 0}}, + {U64, U32, {0xfffffffe, 0x100000000}, {0x80000000, 0x80000000}}, + + {U64, S32, {0, 0xffffffff00000000ULL}, {0, 0}}, + /* these are tricky cases where lower 32 bits allow to tighten 64 + * bit boundaries based on tightened lower 32 bit boundaries + */ + {U64, S32, {0, 0x0ffffffffULL}, {0, 0}}, + {U64, S32, {0, 0x100000000ULL}, {0, 0}}, + {U64, S32, {0, 0x100000001ULL}, {0, 0}}, + {U64, S32, {0, 0x180000000ULL}, {0, 0}}, + {U64, S32, {0, 0x17fffffffULL}, {0, 0}}, + {U64, S32, {0, 0x180000001ULL}, {0, 0}}, + + /* verifier knows about [-1, 0] range for s32 for this case already */ + {S64, S64, {0xffffffffffffffffULL, 0}, {0xffffffff00000000ULL, 0xffffffff00000000ULL}}, + /* but didn't know about these cases initially */ + {U64, U64, {0xffffffff, 0x100000000ULL}, {0, 0}}, /* s32: [-1, 0] */ + {U64, U64, {0xffffffff, 0x100000001ULL}, {0, 0}}, /* s32: [-1, 1] */ + + /* longer convergence case: learning from u64 -> s64 -> u64 -> u32, + * arriving at u32: [1, U32_MAX] (instead of more pessimistic [0, U32_MAX]) + */ + {S64, U64, {0xffffffff00000001ULL, 0}, {0xffffffff00000000ULL, 0xffffffff00000000ULL}}, + + {U32, U32, {1, U32_MAX}, {0, 0}}, + + {U32, S32, {0, U32_MAX}, {U32_MAX, U32_MAX}}, +}; + +/* Go over crafted hard-coded cases. This is fast, so we do it as part of + * normal test_progs run. + */ +void test_reg_bounds_crafted(void) +{ + struct ctx ctx; + int i; + + memset(&ctx, 0, sizeof(ctx)); + + for (i = 0; i < ARRAY_SIZE(crafted_cases); i++) { + struct subtest_case *c = &crafted_cases[i]; + + verify_case(&ctx, c->init_t, c->cond_t, c->x, c->y); + verify_case(&ctx, c->init_t, c->cond_t, c->y, c->x); + } + + cleanup_ctx(&ctx); +} -- cgit v1.2.3 From 774f94c5e74d86d554c4fd1e97c517a1a7ee7fe0 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Sat, 11 Nov 2023 17:06:04 -0800 Subject: selftests/bpf: adjust OP_EQ/OP_NE handling to use subranges for branch taken Similar to kernel-side BPF verifier logic enhancements, use 32-bit subrange knowledge for is_branch_taken() logic in reg_bounds selftests. Signed-off-by: Andrii Nakryiko Acked-by: Eduard Zingerman Link: https://lore.kernel.org/r/20231112010609.848406-9-andrii@kernel.org Signed-off-by: Alexei Starovoitov --- .../testing/selftests/bpf/prog_tests/reg_bounds.c | 30 +++++++++++++++++++--- 1 file changed, 26 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/reg_bounds.c b/tools/testing/selftests/bpf/prog_tests/reg_bounds.c index 7a524b381ed3..10f3b6898274 100644 --- a/tools/testing/selftests/bpf/prog_tests/reg_bounds.c +++ b/tools/testing/selftests/bpf/prog_tests/reg_bounds.c @@ -748,16 +748,38 @@ static int reg_state_branch_taken_op(enum num_t t, struct reg_state *x, struct r /* OP_EQ and OP_NE are sign-agnostic */ enum num_t tu = t_unsigned(t); enum num_t ts = t_signed(t); - int br_u, br_s; + int br_u, br_s, br; br_u = range_branch_taken_op(tu, x->r[tu], y->r[tu], op); br_s = range_branch_taken_op(ts, x->r[ts], y->r[ts], op); if (br_u >= 0 && br_s >= 0 && br_u != br_s) ASSERT_FALSE(true, "branch taken inconsistency!\n"); - if (br_u >= 0) - return br_u; - return br_s; + + /* if 64-bit ranges are indecisive, use 32-bit subranges to + * eliminate always/never taken branches, if possible + */ + if (br_u == -1 && (t == U64 || t == S64)) { + br = range_branch_taken_op(U32, x->r[U32], y->r[U32], op); + /* we can only reject for OP_EQ, never take branch + * based on lower 32 bits + */ + if (op == OP_EQ && br == 0) + return 0; + /* for OP_NEQ we can be conclusive only if lower 32 bits + * differ and thus inequality branch is always taken + */ + if (op == OP_NE && br == 1) + return 1; + + br = range_branch_taken_op(S32, x->r[S32], y->r[S32], op); + if (op == OP_EQ && br == 0) + return 0; + if (op == OP_NE && br == 1) + return 1; + } + + return br_u >= 0 ? br_u : br_s; } return range_branch_taken_op(t, x->r[t], y->r[t], op); } -- cgit v1.2.3 From 2b0d204e368b306d4db894749947ed591b667ec5 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Sat, 11 Nov 2023 17:06:05 -0800 Subject: selftests/bpf: add range x range test to reg_bounds Now that verifier supports range vs range bounds adjustments, validate that by checking each generated range against every other generated range, across all supported operators (everything by JSET). We also add few cases that were problematic during development either for verifier or for selftest's range tracking implementation. Note that we utilize the same trick with splitting everything into multiple independent parallelizable tests, but init_t and cond_t. This brings down verification time in parallel mode from more than 8 hours down to less that 1.5 hours. 106 million cases were successfully validate for range vs range logic, in addition to about 7 million range vs const cases, added in earlier patch. Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20231112010609.848406-10-andrii@kernel.org Signed-off-by: Alexei Starovoitov --- .../testing/selftests/bpf/prog_tests/reg_bounds.c | 86 ++++++++++++++++++++++ 1 file changed, 86 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/reg_bounds.c b/tools/testing/selftests/bpf/prog_tests/reg_bounds.c index 10f3b6898274..5320fe5d9433 100644 --- a/tools/testing/selftests/bpf/prog_tests/reg_bounds.c +++ b/tools/testing/selftests/bpf/prog_tests/reg_bounds.c @@ -1760,6 +1760,60 @@ cleanup: cleanup_ctx(&ctx); } +static void validate_gen_range_vs_range(enum num_t init_t, enum num_t cond_t) +{ + struct ctx ctx; + const struct range *ranges; + int i, j, rcnt; + + memset(&ctx, 0, sizeof(ctx)); + + if (prepare_gen_tests(&ctx)) + goto cleanup; + + switch (init_t) + { + case U64: + ranges = ctx.uranges; + rcnt = ctx.range_cnt; + break; + case U32: + ranges = ctx.usubranges; + rcnt = ctx.subrange_cnt; + break; + case S64: + ranges = ctx.sranges; + rcnt = ctx.range_cnt; + break; + case S32: + ranges = ctx.ssubranges; + rcnt = ctx.subrange_cnt; + break; + default: + printf("validate_gen_range_vs_range!\n"); + exit(1); + } + + ctx.total_case_cnt = (MAX_OP - MIN_OP + 1) * (2 * rcnt * (rcnt + 1) / 2); + ctx.start_ns = get_time_ns(); + snprintf(ctx.progress_ctx, sizeof(ctx.progress_ctx), + "RANGE x RANGE, %s -> %s", + t_str(init_t), t_str(cond_t)); + + for (i = 0; i < rcnt; i++) { + for (j = i; j < rcnt; j++) { + /* ( x ) */ + if (verify_case(&ctx, init_t, cond_t, ranges[i], ranges[j])) + goto cleanup; + if (verify_case(&ctx, init_t, cond_t, ranges[j], ranges[i])) + goto cleanup; + } + } + +cleanup: + cleanup_ctx(&ctx); +} + /* Go over thousands of test cases generated from initial seed values. * Given this take a long time, guard this begind SLOW_TESTS=1 envvar. If * envvar is not set, this test is skipped during test_progs testing. @@ -1790,6 +1844,27 @@ void test_reg_bounds_gen_consts_s32_s64(void) { validate_gen_range_vs_const_32(S void test_reg_bounds_gen_consts_s32_u32(void) { validate_gen_range_vs_const_32(S32, U32); } void test_reg_bounds_gen_consts_s32_s32(void) { validate_gen_range_vs_const_32(S32, S32); } +/* RANGE x RANGE, U64 initial range */ +void test_reg_bounds_gen_ranges_u64_u64(void) { validate_gen_range_vs_range(U64, U64); } +void test_reg_bounds_gen_ranges_u64_s64(void) { validate_gen_range_vs_range(U64, S64); } +void test_reg_bounds_gen_ranges_u64_u32(void) { validate_gen_range_vs_range(U64, U32); } +void test_reg_bounds_gen_ranges_u64_s32(void) { validate_gen_range_vs_range(U64, S32); } +/* RANGE x RANGE, S64 initial range */ +void test_reg_bounds_gen_ranges_s64_u64(void) { validate_gen_range_vs_range(S64, U64); } +void test_reg_bounds_gen_ranges_s64_s64(void) { validate_gen_range_vs_range(S64, S64); } +void test_reg_bounds_gen_ranges_s64_u32(void) { validate_gen_range_vs_range(S64, U32); } +void test_reg_bounds_gen_ranges_s64_s32(void) { validate_gen_range_vs_range(S64, S32); } +/* RANGE x RANGE, U32 initial range */ +void test_reg_bounds_gen_ranges_u32_u64(void) { validate_gen_range_vs_range(U32, U64); } +void test_reg_bounds_gen_ranges_u32_s64(void) { validate_gen_range_vs_range(U32, S64); } +void test_reg_bounds_gen_ranges_u32_u32(void) { validate_gen_range_vs_range(U32, U32); } +void test_reg_bounds_gen_ranges_u32_s32(void) { validate_gen_range_vs_range(U32, S32); } +/* RANGE x RANGE, S32 initial range */ +void test_reg_bounds_gen_ranges_s32_u64(void) { validate_gen_range_vs_range(S32, U64); } +void test_reg_bounds_gen_ranges_s32_s64(void) { validate_gen_range_vs_range(S32, S64); } +void test_reg_bounds_gen_ranges_s32_u32(void) { validate_gen_range_vs_range(S32, U32); } +void test_reg_bounds_gen_ranges_s32_s32(void) { validate_gen_range_vs_range(S32, S32); } + /* A set of hard-coded "interesting" cases to validate as part of normal * test_progs test runs */ @@ -1803,6 +1878,12 @@ static struct subtest_case crafted_cases[] = { {U64, U64, {0x100000000ULL, 0x1fffffffeULL}, {0, 0}}, {U64, U64, {0x100000001ULL, 0x1000000ffULL}, {0, 0}}, + /* single point overlap, interesting BPF_EQ and BPF_NE interactions */ + {U64, U64, {0, 1}, {1, 0x80000000}}, + {U64, S64, {0, 1}, {1, 0x80000000}}, + {U64, U32, {0, 1}, {1, 0x80000000}}, + {U64, S32, {0, 1}, {1, 0x80000000}}, + {U64, S64, {0, 0xffffffff00000000ULL}, {0, 0}}, {U64, S64, {0x7fffffffffffffffULL, 0xffffffff00000000ULL}, {0, 0}}, {U64, S64, {0x7fffffff00000001ULL, 0xffffffff00000000ULL}, {0, 0}}, @@ -1837,6 +1918,11 @@ static struct subtest_case crafted_cases[] = { {U32, U32, {1, U32_MAX}, {0, 0}}, {U32, S32, {0, U32_MAX}, {U32_MAX, U32_MAX}}, + + {S32, U64, {(u32)(s32)S32_MIN, (u32)(s32)S32_MIN}, {(u32)(s32)-255, 0}}, + {S32, S64, {(u32)(s32)S32_MIN, (u32)(s32)-255}, {(u32)(s32)-2, 0}}, + {S32, S64, {0, 1}, {(u32)(s32)S32_MIN, (u32)(s32)S32_MIN}}, + {S32, U32, {(u32)(s32)S32_MIN, (u32)(s32)S32_MIN}, {(u32)(s32)S32_MIN, (u32)(s32)S32_MIN}}, }; /* Go over crafted hard-coded cases. This is fast, so we do it as part of -- cgit v1.2.3 From dab16659c50e8c9c7c5d9584beacec28c769dcca Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Sat, 11 Nov 2023 17:06:06 -0800 Subject: selftests/bpf: add randomized reg_bounds tests Add random cases generation to reg_bounds.c and run them without SLOW_TESTS=1 to increase a chance of BPF CI catching latent issues. Suggested-by: Alexei Starovoitov Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20231112010609.848406-11-andrii@kernel.org Signed-off-by: Alexei Starovoitov --- .../testing/selftests/bpf/prog_tests/reg_bounds.c | 166 ++++++++++++++++++++- 1 file changed, 159 insertions(+), 7 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/reg_bounds.c b/tools/testing/selftests/bpf/prog_tests/reg_bounds.c index 5320fe5d9433..f3f724062b35 100644 --- a/tools/testing/selftests/bpf/prog_tests/reg_bounds.c +++ b/tools/testing/selftests/bpf/prog_tests/reg_bounds.c @@ -83,6 +83,17 @@ static __always_inline u64 max_t(enum num_t t, u64 x, u64 y) } } +static __always_inline u64 cast_t(enum num_t t, u64 x) +{ + switch (t) { + case U64: return (u64)x; + case U32: return (u32)x; + case S64: return (s64)x; + case S32: return (u32)(s32)x; + default: printf("cast_t!\n"); exit(1); + } +} + static const char *t_str(enum num_t t) { switch (t) { @@ -1312,8 +1323,10 @@ struct ctx { struct range *usubranges, *ssubranges; int max_failure_cnt, cur_failure_cnt; int total_case_cnt, case_cnt; + int rand_case_cnt; + unsigned rand_seed; __u64 start_ns; - char progress_ctx[32]; + char progress_ctx[64]; }; static void cleanup_ctx(struct ctx *ctx) @@ -1644,11 +1657,6 @@ static int parse_env_vars(struct ctx *ctx) { const char *s; - if (!(s = getenv("SLOW_TESTS")) || strcmp(s, "1") != 0) { - test__skip(); - return -ENOTSUP; - } - if ((s = getenv("REG_BOUNDS_MAX_FAILURE_CNT"))) { errno = 0; ctx->max_failure_cnt = strtol(s, NULL, 10); @@ -1658,13 +1666,37 @@ static int parse_env_vars(struct ctx *ctx) } } + if ((s = getenv("REG_BOUNDS_RAND_CASE_CNT"))) { + errno = 0; + ctx->rand_case_cnt = strtol(s, NULL, 10); + if (errno || ctx->rand_case_cnt < 0) { + ASSERT_OK(-errno, "REG_BOUNDS_RAND_CASE_CNT"); + return -EINVAL; + } + } + + if ((s = getenv("REG_BOUNDS_RAND_SEED"))) { + errno = 0; + ctx->rand_seed = strtoul(s, NULL, 10); + if (errno) { + ASSERT_OK(-errno, "REG_BOUNDS_RAND_SEED"); + return -EINVAL; + } + } + return 0; } static int prepare_gen_tests(struct ctx *ctx) { + const char *s; int err; + if (!(s = getenv("SLOW_TESTS")) || strcmp(s, "1") != 0) { + test__skip(); + return -ENOTSUP; + } + err = parse_env_vars(ctx); if (err) return err; @@ -1794,7 +1826,7 @@ static void validate_gen_range_vs_range(enum num_t init_t, enum num_t cond_t) exit(1); } - ctx.total_case_cnt = (MAX_OP - MIN_OP + 1) * (2 * rcnt * (rcnt + 1) / 2); + ctx.total_case_cnt = (last_op - first_op + 1) * (2 * rcnt * (rcnt + 1) / 2); ctx.start_ns = get_time_ns(); snprintf(ctx.progress_ctx, sizeof(ctx.progress_ctx), "RANGE x RANGE, %s -> %s", @@ -1865,6 +1897,126 @@ void test_reg_bounds_gen_ranges_s32_s64(void) { validate_gen_range_vs_range(S32, void test_reg_bounds_gen_ranges_s32_u32(void) { validate_gen_range_vs_range(S32, U32); } void test_reg_bounds_gen_ranges_s32_s32(void) { validate_gen_range_vs_range(S32, S32); } +#define DEFAULT_RAND_CASE_CNT 25 + +#define RAND_21BIT_MASK ((1 << 22) - 1) + +static u64 rand_u64() +{ + /* RAND_MAX is guaranteed to be at least 1<<15, but in practice it + * seems to be 1<<31, so we need to call it thrice to get full u64; + * we'll use rougly equal split: 22 + 21 + 21 bits + */ + return ((u64)random() << 42) | + (((u64)random() & RAND_21BIT_MASK) << 21) | + (random() & RAND_21BIT_MASK); +} + +static u64 rand_const(enum num_t t) +{ + return cast_t(t, rand_u64()); +} + +static struct range rand_range(enum num_t t) +{ + u64 x = rand_const(t), y = rand_const(t); + + return range(t, min_t(t, x, y), max_t(t, x, y)); +} + +static void validate_rand_ranges(enum num_t init_t, enum num_t cond_t, bool const_range) +{ + struct ctx ctx; + struct range range1, range2; + int err, i; + u64 t; + + memset(&ctx, 0, sizeof(ctx)); + + err = parse_env_vars(&ctx); + if (err) { + ASSERT_OK(err, "parse_env_vars"); + return; + } + + if (ctx.rand_case_cnt == 0) + ctx.rand_case_cnt = DEFAULT_RAND_CASE_CNT; + if (ctx.rand_seed == 0) + ctx.rand_seed = (unsigned)get_time_ns(); + + srandom(ctx.rand_seed); + + ctx.total_case_cnt = (last_op - first_op + 1) * (2 * ctx.rand_case_cnt); + ctx.start_ns = get_time_ns(); + snprintf(ctx.progress_ctx, sizeof(ctx.progress_ctx), + "[RANDOM SEED %u] RANGE x %s, %s -> %s", + ctx.rand_seed, const_range ? "CONST" : "RANGE", + t_str(init_t), t_str(cond_t)); + fprintf(env.stdout, "%s\n", ctx.progress_ctx); + + for (i = 0; i < ctx.rand_case_cnt; i++) { + range1 = rand_range(init_t); + if (const_range) { + t = rand_const(init_t); + range2 = range(init_t, t, t); + } else { + range2 = rand_range(init_t); + } + + /* x */ + if (verify_case(&ctx, init_t, cond_t, range1, range2)) + goto cleanup; + /* x */ + if (verify_case(&ctx, init_t, cond_t, range2, range1)) + goto cleanup; + } + +cleanup: + cleanup_ctx(&ctx); +} + +/* [RANDOM] RANGE x CONST, U64 initial range */ +void test_reg_bounds_rand_consts_u64_u64(void) { validate_rand_ranges(U64, U64, true /* const */); } +void test_reg_bounds_rand_consts_u64_s64(void) { validate_rand_ranges(U64, S64, true /* const */); } +void test_reg_bounds_rand_consts_u64_u32(void) { validate_rand_ranges(U64, U32, true /* const */); } +void test_reg_bounds_rand_consts_u64_s32(void) { validate_rand_ranges(U64, S32, true /* const */); } +/* [RANDOM] RANGE x CONST, S64 initial range */ +void test_reg_bounds_rand_consts_s64_u64(void) { validate_rand_ranges(S64, U64, true /* const */); } +void test_reg_bounds_rand_consts_s64_s64(void) { validate_rand_ranges(S64, S64, true /* const */); } +void test_reg_bounds_rand_consts_s64_u32(void) { validate_rand_ranges(S64, U32, true /* const */); } +void test_reg_bounds_rand_consts_s64_s32(void) { validate_rand_ranges(S64, S32, true /* const */); } +/* [RANDOM] RANGE x CONST, U32 initial range */ +void test_reg_bounds_rand_consts_u32_u64(void) { validate_rand_ranges(U32, U64, true /* const */); } +void test_reg_bounds_rand_consts_u32_s64(void) { validate_rand_ranges(U32, S64, true /* const */); } +void test_reg_bounds_rand_consts_u32_u32(void) { validate_rand_ranges(U32, U32, true /* const */); } +void test_reg_bounds_rand_consts_u32_s32(void) { validate_rand_ranges(U32, S32, true /* const */); } +/* [RANDOM] RANGE x CONST, S32 initial range */ +void test_reg_bounds_rand_consts_s32_u64(void) { validate_rand_ranges(S32, U64, true /* const */); } +void test_reg_bounds_rand_consts_s32_s64(void) { validate_rand_ranges(S32, S64, true /* const */); } +void test_reg_bounds_rand_consts_s32_u32(void) { validate_rand_ranges(S32, U32, true /* const */); } +void test_reg_bounds_rand_consts_s32_s32(void) { validate_rand_ranges(S32, S32, true /* const */); } + +/* [RANDOM] RANGE x RANGE, U64 initial range */ +void test_reg_bounds_rand_ranges_u64_u64(void) { validate_rand_ranges(U64, U64, false /* range */); } +void test_reg_bounds_rand_ranges_u64_s64(void) { validate_rand_ranges(U64, S64, false /* range */); } +void test_reg_bounds_rand_ranges_u64_u32(void) { validate_rand_ranges(U64, U32, false /* range */); } +void test_reg_bounds_rand_ranges_u64_s32(void) { validate_rand_ranges(U64, S32, false /* range */); } +/* [RANDOM] RANGE x RANGE, S64 initial range */ +void test_reg_bounds_rand_ranges_s64_u64(void) { validate_rand_ranges(S64, U64, false /* range */); } +void test_reg_bounds_rand_ranges_s64_s64(void) { validate_rand_ranges(S64, S64, false /* range */); } +void test_reg_bounds_rand_ranges_s64_u32(void) { validate_rand_ranges(S64, U32, false /* range */); } +void test_reg_bounds_rand_ranges_s64_s32(void) { validate_rand_ranges(S64, S32, false /* range */); } +/* [RANDOM] RANGE x RANGE, U32 initial range */ +void test_reg_bounds_rand_ranges_u32_u64(void) { validate_rand_ranges(U32, U64, false /* range */); } +void test_reg_bounds_rand_ranges_u32_s64(void) { validate_rand_ranges(U32, S64, false /* range */); } +void test_reg_bounds_rand_ranges_u32_u32(void) { validate_rand_ranges(U32, U32, false /* range */); } +void test_reg_bounds_rand_ranges_u32_s32(void) { validate_rand_ranges(U32, S32, false /* range */); } +/* [RANDOM] RANGE x RANGE, S32 initial range */ +void test_reg_bounds_rand_ranges_s32_u64(void) { validate_rand_ranges(S32, U64, false /* range */); } +void test_reg_bounds_rand_ranges_s32_s64(void) { validate_rand_ranges(S32, S64, false /* range */); } +void test_reg_bounds_rand_ranges_s32_u32(void) { validate_rand_ranges(S32, U32, false /* range */); } +void test_reg_bounds_rand_ranges_s32_s32(void) { validate_rand_ranges(S32, S32, false /* range */); } + /* A set of hard-coded "interesting" cases to validate as part of normal * test_progs test runs */ -- cgit v1.2.3 From 8c5677f8b31e92b57be7d5d0fbb1ac66eedf4f91 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Sat, 11 Nov 2023 17:06:07 -0800 Subject: selftests/bpf: set BPF_F_TEST_SANITY_SCRIPT by default Make sure to set BPF_F_TEST_SANITY_STRICT program flag by default across most verifier tests (and a bunch of others that set custom prog flags). There are currently two tests that do fail validation, if enforced strictly: verifier_bounds/crossing_64_bit_signed_boundary_2 and verifier_bounds/crossing_32_bit_signed_boundary_2. To accommodate them, we teach test_loader a flag negation: __flag(!) will *clear* specified flag, allowing easy opt-out. We apply __flag(!BPF_F_TEST_SANITY_STRICT) to these to tests. Also sprinkle BPF_F_TEST_SANITY_STRICT everywhere where we already set test-only BPF_F_TEST_RND_HI32 flag, for completeness. Acked-by: Eduard Zingerman Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20231112010609.848406-12-andrii@kernel.org Signed-off-by: Alexei Starovoitov --- .../selftests/bpf/prog_tests/bpf_verif_scale.c | 2 +- .../testing/selftests/bpf/progs/verifier_bounds.c | 2 ++ tools/testing/selftests/bpf/test_loader.c | 35 ++++++++++++++++------ tools/testing/selftests/bpf/test_sock_addr.c | 1 + tools/testing/selftests/bpf/test_verifier.c | 2 +- tools/testing/selftests/bpf/testing_helpers.c | 4 +-- 6 files changed, 33 insertions(+), 13 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c b/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c index 731c343897d8..3f2d70831873 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c @@ -35,7 +35,7 @@ static int check_load(const char *file, enum bpf_prog_type type) } bpf_program__set_type(prog, type); - bpf_program__set_flags(prog, BPF_F_TEST_RND_HI32); + bpf_program__set_flags(prog, BPF_F_TEST_RND_HI32 | BPF_F_TEST_SANITY_STRICT); bpf_program__set_log_level(prog, 4 | extra_prog_load_log_flags); err = bpf_object__load(obj); diff --git a/tools/testing/selftests/bpf/progs/verifier_bounds.c b/tools/testing/selftests/bpf/progs/verifier_bounds.c index c5588a14fe2e..0c1460936373 100644 --- a/tools/testing/selftests/bpf/progs/verifier_bounds.c +++ b/tools/testing/selftests/bpf/progs/verifier_bounds.c @@ -965,6 +965,7 @@ l0_%=: r0 = 0; \ SEC("xdp") __description("bound check with JMP_JSLT for crossing 64-bit signed boundary") __success __retval(0) +__flag(!BPF_F_TEST_SANITY_STRICT) /* known sanity violation */ __naked void crossing_64_bit_signed_boundary_2(void) { asm volatile (" \ @@ -1046,6 +1047,7 @@ l0_%=: r0 = 0; \ SEC("xdp") __description("bound check with JMP32_JSLT for crossing 32-bit signed boundary") __success __retval(0) +__flag(!BPF_F_TEST_SANITY_STRICT) /* known sanity violation */ __naked void crossing_32_bit_signed_boundary_2(void) { asm volatile (" \ diff --git a/tools/testing/selftests/bpf/test_loader.c b/tools/testing/selftests/bpf/test_loader.c index 37ffa57f28a1..57e27b1a73a6 100644 --- a/tools/testing/selftests/bpf/test_loader.c +++ b/tools/testing/selftests/bpf/test_loader.c @@ -153,6 +153,14 @@ static int parse_retval(const char *str, int *val, const char *name) return parse_int(str, val, name); } +static void update_flags(int *flags, int flag, bool clear) +{ + if (clear) + *flags &= ~flag; + else + *flags |= flag; +} + /* Uses btf_decl_tag attributes to describe the expected test * behavior, see bpf_misc.h for detailed description of each attribute * and attribute combinations. @@ -171,6 +179,7 @@ static int parse_test_spec(struct test_loader *tester, memset(spec, 0, sizeof(*spec)); spec->prog_name = bpf_program__name(prog); + spec->prog_flags = BPF_F_TEST_SANITY_STRICT; /* by default be strict */ btf = bpf_object__btf(obj); if (!btf) { @@ -187,7 +196,8 @@ static int parse_test_spec(struct test_loader *tester, for (i = 1; i < btf__type_cnt(btf); i++) { const char *s, *val, *msg; const struct btf_type *t; - int tmp; + bool clear; + int flags; t = btf__type_by_id(btf, i); if (!btf_is_decl_tag(t)) @@ -253,23 +263,30 @@ static int parse_test_spec(struct test_loader *tester, goto cleanup; } else if (str_has_pfx(s, TEST_TAG_PROG_FLAGS_PFX)) { val = s + sizeof(TEST_TAG_PROG_FLAGS_PFX) - 1; + + clear = val[0] == '!'; + if (clear) + val++; + if (strcmp(val, "BPF_F_STRICT_ALIGNMENT") == 0) { - spec->prog_flags |= BPF_F_STRICT_ALIGNMENT; + update_flags(&spec->prog_flags, BPF_F_STRICT_ALIGNMENT, clear); } else if (strcmp(val, "BPF_F_ANY_ALIGNMENT") == 0) { - spec->prog_flags |= BPF_F_ANY_ALIGNMENT; + update_flags(&spec->prog_flags, BPF_F_ANY_ALIGNMENT, clear); } else if (strcmp(val, "BPF_F_TEST_RND_HI32") == 0) { - spec->prog_flags |= BPF_F_TEST_RND_HI32; + update_flags(&spec->prog_flags, BPF_F_TEST_RND_HI32, clear); } else if (strcmp(val, "BPF_F_TEST_STATE_FREQ") == 0) { - spec->prog_flags |= BPF_F_TEST_STATE_FREQ; + update_flags(&spec->prog_flags, BPF_F_TEST_STATE_FREQ, clear); } else if (strcmp(val, "BPF_F_SLEEPABLE") == 0) { - spec->prog_flags |= BPF_F_SLEEPABLE; + update_flags(&spec->prog_flags, BPF_F_SLEEPABLE, clear); } else if (strcmp(val, "BPF_F_XDP_HAS_FRAGS") == 0) { - spec->prog_flags |= BPF_F_XDP_HAS_FRAGS; + update_flags(&spec->prog_flags, BPF_F_XDP_HAS_FRAGS, clear); + } else if (strcmp(val, "BPF_F_TEST_SANITY_STRICT") == 0) { + update_flags(&spec->prog_flags, BPF_F_TEST_SANITY_STRICT, clear); } else /* assume numeric value */ { - err = parse_int(val, &tmp, "test prog flags"); + err = parse_int(val, &flags, "test prog flags"); if (err) goto cleanup; - spec->prog_flags |= tmp; + update_flags(&spec->prog_flags, flags, clear); } } } diff --git a/tools/testing/selftests/bpf/test_sock_addr.c b/tools/testing/selftests/bpf/test_sock_addr.c index 2c89674fc62c..878c077e0fa7 100644 --- a/tools/testing/selftests/bpf/test_sock_addr.c +++ b/tools/testing/selftests/bpf/test_sock_addr.c @@ -680,6 +680,7 @@ static int load_path(const struct sock_addr_test *test, const char *path) bpf_program__set_type(prog, BPF_PROG_TYPE_CGROUP_SOCK_ADDR); bpf_program__set_expected_attach_type(prog, test->expected_attach_type); bpf_program__set_flags(prog, BPF_F_TEST_RND_HI32); + bpf_program__set_flags(prog, BPF_F_TEST_SANITY_STRICT); err = bpf_object__load(obj); if (err) { diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index 98107e0452d3..4992022f3137 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -1588,7 +1588,7 @@ static void do_test_single(struct bpf_test *test, bool unpriv, if (fixup_skips != skips) return; - pflags = BPF_F_TEST_RND_HI32; + pflags = BPF_F_TEST_RND_HI32 | BPF_F_TEST_SANITY_STRICT; if (test->flags & F_LOAD_WITH_STRICT_ALIGNMENT) pflags |= BPF_F_STRICT_ALIGNMENT; if (test->flags & F_NEEDS_EFFICIENT_UNALIGNED_ACCESS) diff --git a/tools/testing/selftests/bpf/testing_helpers.c b/tools/testing/selftests/bpf/testing_helpers.c index 8d994884c7b4..9786a94a666c 100644 --- a/tools/testing/selftests/bpf/testing_helpers.c +++ b/tools/testing/selftests/bpf/testing_helpers.c @@ -276,7 +276,7 @@ int bpf_prog_test_load(const char *file, enum bpf_prog_type type, if (type != BPF_PROG_TYPE_UNSPEC && bpf_program__type(prog) != type) bpf_program__set_type(prog, type); - flags = bpf_program__flags(prog) | BPF_F_TEST_RND_HI32; + flags = bpf_program__flags(prog) | BPF_F_TEST_RND_HI32 | BPF_F_TEST_SANITY_STRICT; bpf_program__set_flags(prog, flags); err = bpf_object__load(obj); @@ -299,7 +299,7 @@ int bpf_test_load_program(enum bpf_prog_type type, const struct bpf_insn *insns, { LIBBPF_OPTS(bpf_prog_load_opts, opts, .kern_version = kern_version, - .prog_flags = BPF_F_TEST_RND_HI32, + .prog_flags = BPF_F_TEST_RND_HI32 | BPF_F_TEST_SANITY_STRICT, .log_level = extra_prog_load_log_flags, .log_buf = log_buf, .log_size = log_buf_sz, -- cgit v1.2.3 From a5c57f81eb2b5d6de4f46e47fd85be50d179bfd8 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Sat, 11 Nov 2023 17:06:08 -0800 Subject: veristat: add ability to set BPF_F_TEST_SANITY_STRICT flag with -r flag Add a new flag -r (--test-sanity), similar to -t (--test-states), to add extra BPF program flags when loading BPF programs. This allows to use veristat to easily catch sanity violations in production BPF programs. reg_bounds tests are also enforcing BPF_F_TEST_SANITY_STRICT flag now. Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20231112010609.848406-13-andrii@kernel.org Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/prog_tests/reg_bounds.c | 1 + tools/testing/selftests/bpf/veristat.c | 13 ++++++++++--- 2 files changed, 11 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/reg_bounds.c b/tools/testing/selftests/bpf/prog_tests/reg_bounds.c index f3f724062b35..fe0cb906644b 100644 --- a/tools/testing/selftests/bpf/prog_tests/reg_bounds.c +++ b/tools/testing/selftests/bpf/prog_tests/reg_bounds.c @@ -838,6 +838,7 @@ static int load_range_cmp_prog(struct range x, struct range y, enum op op, .log_level = 2, .log_buf = log_buf, .log_size = log_sz, + .prog_flags = BPF_F_TEST_SANITY_STRICT, ); /* ; skip exit block below diff --git a/tools/testing/selftests/bpf/veristat.c b/tools/testing/selftests/bpf/veristat.c index 443a29fc6a62..609fd9753af0 100644 --- a/tools/testing/selftests/bpf/veristat.c +++ b/tools/testing/selftests/bpf/veristat.c @@ -145,6 +145,7 @@ static struct env { bool debug; bool quiet; bool force_checkpoints; + bool strict_range_sanity; enum resfmt out_fmt; bool show_version; bool comparison_mode; @@ -214,8 +215,6 @@ static const struct argp_option opts[] = { { "log-level", 'l', "LEVEL", 0, "Verifier log level (default 0 for normal mode, 1 for verbose mode)" }, { "log-fixed", OPT_LOG_FIXED, NULL, 0, "Disable verifier log rotation" }, { "log-size", OPT_LOG_SIZE, "BYTES", 0, "Customize verifier log size (default to 16MB)" }, - { "test-states", 't', NULL, 0, - "Force frequent BPF verifier state checkpointing (set BPF_F_TEST_STATE_FREQ program flag)" }, { "top-n", 'n', "N", 0, "Emit only up to first N results." }, { "quiet", 'q', NULL, 0, "Quiet mode" }, { "emit", 'e', "SPEC", 0, "Specify stats to be emitted" }, @@ -224,6 +223,10 @@ static const struct argp_option opts[] = { { "compare", 'C', NULL, 0, "Comparison mode" }, { "replay", 'R', NULL, 0, "Replay mode" }, { "filter", 'f', "FILTER", 0, "Filter expressions (or @filename for file with expressions)." }, + { "test-states", 't', NULL, 0, + "Force frequent BPF verifier state checkpointing (set BPF_F_TEST_STATE_FREQ program flag)" }, + { "test-sanity", 'r', NULL, 0, + "Force strict BPF verifier register sanity behavior (BPF_F_TEST_SANITY_STRICT program flag)" }, {}, }; @@ -295,6 +298,9 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state) case 't': env.force_checkpoints = true; break; + case 'r': + env.strict_range_sanity = true; + break; case 'n': errno = 0; env.top_n = strtol(arg, NULL, 10); @@ -302,7 +308,6 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state) fprintf(stderr, "invalid top N specifier: %s\n", arg); argp_usage(state); } - break; case 'C': env.comparison_mode = true; break; @@ -1023,6 +1028,8 @@ static int process_prog(const char *filename, struct bpf_object *obj, struct bpf if (env.force_checkpoints) bpf_program__set_flags(prog, bpf_program__flags(prog) | BPF_F_TEST_STATE_FREQ); + if (env.strict_range_sanity) + bpf_program__set_flags(prog, bpf_program__flags(prog) | BPF_F_TEST_SANITY_STRICT); err = bpf_object__load(obj); env.progs_processed++; -- cgit v1.2.3 From 882e3d873c2d8a2aebbc6c192aa1a2990b9d5b27 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Sat, 11 Nov 2023 17:06:09 -0800 Subject: selftests/bpf: add iter test requiring range x range logic Add a simple verifier test that requires deriving reg bounds for one register from another register that's not a constant. This is a realistic example of iterating elements of an array with fixed maximum number of elements, but smaller actual number of elements. This small example was an original motivation for doing this whole patch set in the first place, yes. Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20231112010609.848406-14-andrii@kernel.org Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/progs/iters.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/progs/iters.c b/tools/testing/selftests/bpf/progs/iters.c index c20c4e38b71c..b2181f850d3e 100644 --- a/tools/testing/selftests/bpf/progs/iters.c +++ b/tools/testing/selftests/bpf/progs/iters.c @@ -1411,4 +1411,26 @@ __naked int checkpoint_states_deletion(void) ); } +struct { + int data[32]; + int n; +} loop_data; + +SEC("raw_tp") +__success +int iter_arr_with_actual_elem_count(const void *ctx) +{ + int i, n = loop_data.n, sum = 0; + + if (n > ARRAY_SIZE(loop_data.data)) + return 0; + + bpf_for(i, 0, n) { + /* no rechecking of i against ARRAY_SIZE(loop_data.n) */ + sum += loop_data.data[i]; + } + + return sum; +} + char _license[] SEC("license") = "GPL"; -- cgit v1.2.3 From 9ffa01cab0699476be12ed4917c7d282cedc5ddf Mon Sep 17 00:00:00 2001 From: Pedro Tammela Date: Tue, 14 Nov 2023 13:04:39 -0300 Subject: selftests: tc-testing: drop '-N' argument from nsPlugin This argument would bypass the net namespace creation and run the test in the root namespace, even if nsPlugin was specified. Drop it as it's the same as commenting out the nsPlugin from a test and adds additional complexity to the plugin code. Signed-off-by: Pedro Tammela Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- .../selftests/tc-testing/plugin-lib/nsPlugin.py | 49 ++++++---------------- 1 file changed, 13 insertions(+), 36 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/tc-testing/plugin-lib/nsPlugin.py b/tools/testing/selftests/tc-testing/plugin-lib/nsPlugin.py index b62429b0fcdb..2297b4568ca9 100644 --- a/tools/testing/selftests/tc-testing/plugin-lib/nsPlugin.py +++ b/tools/testing/selftests/tc-testing/plugin-lib/nsPlugin.py @@ -28,10 +28,7 @@ def prepare_suite(obj, test): shadow['DEV2'] = original['DEV2'] obj.args.NAMES = shadow - if obj.args.namespace: - obj._ns_create() - else: - obj._ports_create() + obj._ns_create() # Make sure the netns is visible in the fs while True: @@ -75,10 +72,7 @@ class SubPlugin(TdcPlugin): if self.args.verbose: print('{}.post_case'.format(self.sub_class)) - if self.args.namespace: - self._ns_destroy() - else: - self._ports_destroy() + self._ns_destroy() def post_suite(self, index): if self.args.verbose: @@ -93,24 +87,11 @@ class SubPlugin(TdcPlugin): subprocess.run(cmd, shell=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) - def add_args(self, parser): - super().add_args(parser) - self.argparser_group = self.argparser.add_argument_group( - 'netns', - 'options for nsPlugin(run commands in net namespace)') - self.argparser_group.add_argument( - '-N', '--no-namespace', action='store_false', default=True, - dest='namespace', help='Don\'t run commands in namespace') - return self.argparser - def adjust_command(self, stage, command): super().adjust_command(stage, command) cmdform = 'list' cmdlist = list() - if not self.args.namespace: - return command - if self.args.verbose: print('{}.adjust_command'.format(self.sub_class)) @@ -144,8 +125,6 @@ class SubPlugin(TdcPlugin): cmds.append(self._replace_keywords('link add $DEV0 type veth peer name $DEV1')) cmds.append(self._replace_keywords('link set $DEV0 up')) cmds.append(self._replace_keywords('link add $DUMMY type dummy')) - if not self.args.namespace: - cmds.append(self._replace_keywords('link set $DEV1 up')) return cmds @@ -161,18 +140,17 @@ class SubPlugin(TdcPlugin): def _ns_create_cmds(self): cmds = [] - if self.args.namespace: - ns = self.args.NAMES['NS'] + ns = self.args.NAMES['NS'] - cmds.append(self._replace_keywords('netns add {}'.format(ns))) - cmds.append(self._replace_keywords('link set $DEV1 netns {}'.format(ns))) - cmds.append(self._replace_keywords('link set $DUMMY netns {}'.format(ns))) - cmds.append(self._replace_keywords('netns exec {} $IP link set $DEV1 up'.format(ns))) - cmds.append(self._replace_keywords('netns exec {} $IP link set $DUMMY up'.format(ns))) + cmds.append(self._replace_keywords('netns add {}'.format(ns))) + cmds.append(self._replace_keywords('link set $DEV1 netns {}'.format(ns))) + cmds.append(self._replace_keywords('link set $DUMMY netns {}'.format(ns))) + cmds.append(self._replace_keywords('netns exec {} $IP link set $DEV1 up'.format(ns))) + cmds.append(self._replace_keywords('netns exec {} $IP link set $DUMMY up'.format(ns))) - if self.args.device: - cmds.append(self._replace_keywords('link set $DEV2 netns {}'.format(ns))) - cmds.append(self._replace_keywords('netns exec {} $IP link set $DEV2 up'.format(ns))) + if self.args.device: + cmds.append(self._replace_keywords('link set $DEV2 netns {}'.format(ns))) + cmds.append(self._replace_keywords('netns exec {} $IP link set $DEV2 up'.format(ns))) return cmds @@ -192,9 +170,8 @@ class SubPlugin(TdcPlugin): Destroy the network namespace for testing (and any associated network devices as well) ''' - if self.args.namespace: - self._exec_cmd('post', self._ns_destroy_cmd()) - self._ports_destroy() + self._exec_cmd('post', self._ns_destroy_cmd()) + self._ports_destroy() @cached_property def _proc(self): -- cgit v1.2.3 From fa63d353ddfb8a2d7688220a45b84e1507d211cf Mon Sep 17 00:00:00 2001 From: Pedro Tammela Date: Tue, 14 Nov 2023 13:04:40 -0300 Subject: selftests: tc-testing: rework namespaces and devices setup As mentioned in the TC Workshop 0x17, our recent changes to tdc broke downstream CI systems like tuxsuite. The issue is the classic problem with rcu/workqueue objects where you can miss them if not enough wall time has passed. The latter is subjective to the system and kernel config, in my machine could be nanoseconds while in another could be microseconds or more. In order to make the suite deterministic, poll for the existence of the objects in a reasonable manner. Talking netlink directly is the the best solution in order to avoid paying the cost of multiple 'fork()' calls, so introduce a netlink based setup routine using pyroute2. We leave the iproute2 one as a fallback when pyroute2 is not available. Also rework the iproute2 side to mimic the netlink routine where it creates DEV0 as the peer of DEV1 and moves DEV1 into the net namespace. This way when the namespace is deleted DEV0 is also deleted automatically, leaving no margin for resource leaks. Another bonus of this change is that our setup time sped up by a factor of 2 when using netlink. Signed-off-by: Pedro Tammela Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- .../selftests/tc-testing/plugin-lib/nsPlugin.py | 69 +++++++++++++++------- 1 file changed, 49 insertions(+), 20 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/tc-testing/plugin-lib/nsPlugin.py b/tools/testing/selftests/tc-testing/plugin-lib/nsPlugin.py index 2297b4568ca9..62974bd3a4a5 100644 --- a/tools/testing/selftests/tc-testing/plugin-lib/nsPlugin.py +++ b/tools/testing/selftests/tc-testing/plugin-lib/nsPlugin.py @@ -9,6 +9,14 @@ from TdcPlugin import TdcPlugin from tdc_config import * +try: + from pyroute2 import netns + from pyroute2 import IPRoute + netlink = True +except ImportError: + netlink = False + print("!!! Consider installing pyroute2 !!!") + def prepare_suite(obj, test): original = obj.args.NAMES @@ -28,7 +36,10 @@ def prepare_suite(obj, test): shadow['DEV2'] = original['DEV2'] obj.args.NAMES = shadow - obj._ns_create() + if netlink == True: + obj._nl_ns_create() + else: + obj._ns_create() # Make sure the netns is visible in the fs while True: @@ -67,7 +78,6 @@ class SubPlugin(TdcPlugin): if test_skip: return - def post_case(self): if self.args.verbose: print('{}.post_case'.format(self.sub_class)) @@ -119,23 +129,41 @@ class SubPlugin(TdcPlugin): print('adjust_command: return command [{}]'.format(command)) return command - def _ports_create_cmds(self): - cmds = [] + def _nl_ns_create(self): + ns = self.args.NAMES["NS"]; + dev0 = self.args.NAMES["DEV0"]; + dev1 = self.args.NAMES["DEV1"]; + dummy = self.args.NAMES["DUMMY"]; - cmds.append(self._replace_keywords('link add $DEV0 type veth peer name $DEV1')) - cmds.append(self._replace_keywords('link set $DEV0 up')) - cmds.append(self._replace_keywords('link add $DUMMY type dummy')) - - return cmds - - def _ports_create(self): - self._exec_cmd_batched('pre', self._ports_create_cmds()) - - def _ports_destroy_cmd(self): - return self._replace_keywords('link del $DEV0') - - def _ports_destroy(self): - self._exec_cmd('post', self._ports_destroy_cmd()) + if self.args.verbose: + print('{}._nl_ns_create'.format(self.sub_class)) + + netns.create(ns) + netns.pushns(newns=ns) + with IPRoute() as ip: + ip.link('add', ifname=dev1, kind='veth', peer={'ifname': dev0, 'net_ns_fd':'/proc/1/ns/net'}) + ip.link('add', ifname=dummy, kind='dummy') + while True: + try: + dev1_idx = ip.link_lookup(ifname=dev1)[0] + dummy_idx = ip.link_lookup(ifname=dummy)[0] + ip.link('set', index=dev1_idx, state='up') + ip.link('set', index=dummy_idx, state='up') + break + except: + time.sleep(0.1) + continue + netns.popns() + + with IPRoute() as ip: + while True: + try: + dev0_idx = ip.link_lookup(ifname=dev0)[0] + ip.link('set', index=dev0_idx, state='up') + break + except: + time.sleep(0.1) + continue def _ns_create_cmds(self): cmds = [] @@ -143,10 +171,13 @@ class SubPlugin(TdcPlugin): ns = self.args.NAMES['NS'] cmds.append(self._replace_keywords('netns add {}'.format(ns))) + cmds.append(self._replace_keywords('link add $DEV1 type veth peer name $DEV0')) cmds.append(self._replace_keywords('link set $DEV1 netns {}'.format(ns))) + cmds.append(self._replace_keywords('link add $DUMMY type dummy'.format(ns))) cmds.append(self._replace_keywords('link set $DUMMY netns {}'.format(ns))) cmds.append(self._replace_keywords('netns exec {} $IP link set $DEV1 up'.format(ns))) cmds.append(self._replace_keywords('netns exec {} $IP link set $DUMMY up'.format(ns))) + cmds.append(self._replace_keywords('link set $DEV0 up'.format(ns))) if self.args.device: cmds.append(self._replace_keywords('link set $DEV2 netns {}'.format(ns))) @@ -159,7 +190,6 @@ class SubPlugin(TdcPlugin): Create the network namespace in which the tests will be run and set up the required network devices for it. ''' - self._ports_create() self._exec_cmd_batched('pre', self._ns_create_cmds()) def _ns_destroy_cmd(self): @@ -171,7 +201,6 @@ class SubPlugin(TdcPlugin): devices as well) ''' self._exec_cmd('post', self._ns_destroy_cmd()) - self._ports_destroy() @cached_property def _proc(self): -- cgit v1.2.3 From bb9623c337f5d13b15d700127281c9607d1f8ec2 Mon Sep 17 00:00:00 2001 From: Pedro Tammela Date: Tue, 14 Nov 2023 13:04:41 -0300 Subject: selftests: tc-testing: preload all modules in kselftests While running tdc tests in parallel it can race over the module loading done by tc and fail the run with random errors. So avoid this by preloading all modules before running tdc in kselftests. Signed-off-by: Pedro Tammela Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- tools/testing/selftests/tc-testing/tdc.sh | 65 ++++++++++++++++++++++++++++++- 1 file changed, 63 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/tc-testing/tdc.sh b/tools/testing/selftests/tc-testing/tdc.sh index eb357bd7923c..ae08b7a47c42 100755 --- a/tools/testing/selftests/tc-testing/tdc.sh +++ b/tools/testing/selftests/tc-testing/tdc.sh @@ -1,7 +1,68 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0 -modprobe netdevsim -modprobe sch_teql +# If a module is required and was not compiled +# the test that requires it will fail anyways +try_modprobe() { + modprobe -q -R "$1" + if [ $? -ne 0 ]; then + echo "Module $1 not found... skipping." + else + modprobe "$1" + fi +} + +try_modprobe netdevsim +try_modprobe act_bpf +try_modprobe act_connmark +try_modprobe act_csum +try_modprobe act_ct +try_modprobe act_ctinfo +try_modprobe act_gact +try_modprobe act_gate +try_modprobe act_ipt +try_modprobe act_mirred +try_modprobe act_mpls +try_modprobe act_nat +try_modprobe act_pedit +try_modprobe act_police +try_modprobe act_sample +try_modprobe act_simple +try_modprobe act_skbedit +try_modprobe act_skbmod +try_modprobe act_tunnel_key +try_modprobe act_vlan +try_modprobe cls_basic +try_modprobe cls_bpf +try_modprobe cls_cgroup +try_modprobe cls_flow +try_modprobe cls_flower +try_modprobe cls_fw +try_modprobe cls_matchall +try_modprobe cls_route +try_modprobe cls_u32 +try_modprobe em_canid +try_modprobe em_cmp +try_modprobe em_ipset +try_modprobe em_ipt +try_modprobe em_meta +try_modprobe em_nbyte +try_modprobe em_text +try_modprobe em_u32 +try_modprobe sch_cake +try_modprobe sch_cbs +try_modprobe sch_choke +try_modprobe sch_codel +try_modprobe sch_drr +try_modprobe sch_etf +try_modprobe sch_ets +try_modprobe sch_fq +try_modprobe sch_fq_codel +try_modprobe sch_fq_pie +try_modprobe sch_gred +try_modprobe sch_hfsc +try_modprobe sch_hhf +try_modprobe sch_htb +try_modprobe sch_teql ./tdc.py -c actions --nobuildebpf ./tdc.py -c qdisc -- cgit v1.2.3 From 04fd47bf70f95533c2b8387c19c7e11c085945d2 Mon Sep 17 00:00:00 2001 From: Pedro Tammela Date: Tue, 14 Nov 2023 13:04:42 -0300 Subject: selftests: tc-testing: use parallel tdc in kselftests Leverage parallel tests in kselftests using all the available cpus. We tested this in tuxsuite and locally extensively and it seems it's ready for prime time. Signed-off-by: Pedro Tammela Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- tools/testing/selftests/tc-testing/tdc.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/tc-testing/tdc.sh b/tools/testing/selftests/tc-testing/tdc.sh index ae08b7a47c42..4dbe50bde5a0 100755 --- a/tools/testing/selftests/tc-testing/tdc.sh +++ b/tools/testing/selftests/tc-testing/tdc.sh @@ -64,5 +64,5 @@ try_modprobe sch_hfsc try_modprobe sch_hhf try_modprobe sch_htb try_modprobe sch_teql -./tdc.py -c actions --nobuildebpf -./tdc.py -c qdisc +./tdc.py -J`nproc` -c actions --nobuildebpf +./tdc.py -J`nproc` -c qdisc -- cgit v1.2.3 From 3bdd9fd29cb0f136b307559a19c107210ad5c314 Mon Sep 17 00:00:00 2001 From: Lucas Karpinski Date: Tue, 14 Nov 2023 10:11:31 -0500 Subject: selftests/net: synchronize udpgro tests' tx and rx connection The sockets used by udpgso_bench_tx aren't always ready when udpgso_bench_tx transmits packets. This issue is more prevalent in -rt kernels, but can occur in both. Replace the hacky sleep calls with a function that checks whether the ports in the namespace are ready for use. Suggested-by: Paolo Abeni Signed-off-by: Lucas Karpinski Reviewed-by: Willem de Bruijn Signed-off-by: David S. Miller --- tools/testing/selftests/net/net_helper.sh | 22 ++++++++++++++++++++++ tools/testing/selftests/net/udpgro.sh | 13 ++++++------- tools/testing/selftests/net/udpgro_bench.sh | 5 +++-- tools/testing/selftests/net/udpgro_frglist.sh | 5 +++-- 4 files changed, 34 insertions(+), 11 deletions(-) create mode 100755 tools/testing/selftests/net/net_helper.sh (limited to 'tools') diff --git a/tools/testing/selftests/net/net_helper.sh b/tools/testing/selftests/net/net_helper.sh new file mode 100755 index 000000000000..4fe0befa13fb --- /dev/null +++ b/tools/testing/selftests/net/net_helper.sh @@ -0,0 +1,22 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Helper functions + +wait_local_port_listen() +{ + local listener_ns="${1}" + local port="${2}" + local protocol="${3}" + local port_hex + local i + + port_hex="$(printf "%04X" "${port}")" + for i in $(seq 10); do + if ip netns exec "${listener_ns}" cat /proc/net/"${protocol}"* | \ + grep -q "${port_hex}"; then + break + fi + sleep 0.1 + done +} diff --git a/tools/testing/selftests/net/udpgro.sh b/tools/testing/selftests/net/udpgro.sh index 0c743752669a..af5dc57c8ce9 100755 --- a/tools/testing/selftests/net/udpgro.sh +++ b/tools/testing/selftests/net/udpgro.sh @@ -3,6 +3,8 @@ # # Run a series of udpgro functional tests. +source net_helper.sh + readonly PEER_NS="ns-peer-$(mktemp -u XXXXXX)" BPF_FILE="../bpf/xdp_dummy.bpf.o" @@ -51,8 +53,7 @@ run_one() { echo "ok" || \ echo "failed" & - # Hack: let bg programs complete the startup - sleep 0.2 + wait_local_port_listen ${PEER_NS} 8000 udp ./udpgso_bench_tx ${tx_args} ret=$? wait $(jobs -p) @@ -97,7 +98,7 @@ run_one_nat() { echo "ok" || \ echo "failed"& - sleep 0.1 + wait_local_port_listen "${PEER_NS}" 8000 udp ./udpgso_bench_tx ${tx_args} ret=$? kill -INT $pid @@ -118,11 +119,9 @@ run_one_2sock() { echo "ok" || \ echo "failed" & - # Hack: let bg programs complete the startup - sleep 0.2 + wait_local_port_listen "${PEER_NS}" 12345 udp ./udpgso_bench_tx ${tx_args} -p 12345 - sleep 0.1 - # first UDP GSO socket should be closed at this point + wait_local_port_listen "${PEER_NS}" 8000 udp ./udpgso_bench_tx ${tx_args} ret=$? wait $(jobs -p) diff --git a/tools/testing/selftests/net/udpgro_bench.sh b/tools/testing/selftests/net/udpgro_bench.sh index 894972877e8b..cb664679b434 100755 --- a/tools/testing/selftests/net/udpgro_bench.sh +++ b/tools/testing/selftests/net/udpgro_bench.sh @@ -3,6 +3,8 @@ # # Run a series of udpgro benchmarks +source net_helper.sh + readonly PEER_NS="ns-peer-$(mktemp -u XXXXXX)" BPF_FILE="../bpf/xdp_dummy.bpf.o" @@ -40,8 +42,7 @@ run_one() { ip netns exec "${PEER_NS}" ./udpgso_bench_rx ${rx_args} -r & ip netns exec "${PEER_NS}" ./udpgso_bench_rx -t ${rx_args} -r & - # Hack: let bg programs complete the startup - sleep 0.2 + wait_local_port_listen "${PEER_NS}" 8000 udp ./udpgso_bench_tx ${tx_args} } diff --git a/tools/testing/selftests/net/udpgro_frglist.sh b/tools/testing/selftests/net/udpgro_frglist.sh index 0a6359bed0b9..dd47fa96f6b3 100755 --- a/tools/testing/selftests/net/udpgro_frglist.sh +++ b/tools/testing/selftests/net/udpgro_frglist.sh @@ -3,6 +3,8 @@ # # Run a series of udpgro benchmarks +source net_helper.sh + readonly PEER_NS="ns-peer-$(mktemp -u XXXXXX)" BPF_FILE="../bpf/xdp_dummy.bpf.o" @@ -45,8 +47,7 @@ run_one() { echo ${rx_args} ip netns exec "${PEER_NS}" ./udpgso_bench_rx ${rx_args} -r & - # Hack: let bg programs complete the startup - sleep 0.2 + wait_local_port_listen "${PEER_NS}" 8000 udp ./udpgso_bench_tx ${tx_args} } -- cgit v1.2.3 From e2e13630f93d942d02f3b3f98660228a3545c60e Mon Sep 17 00:00:00 2001 From: Sam James Date: Tue, 7 Nov 2023 20:55:00 +0000 Subject: objtool: Fix calloc call for new -Walloc-size MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit GCC 14 introduces a new -Walloc-size included in -Wextra which errors out like: ``` check.c: In function ‘cfi_alloc’: check.c:294:33: error: allocation of insufficient size ‘1’ for type ‘struct cfi_state’ with size ‘320’ [-Werror=alloc-size] 294 | struct cfi_state *cfi = calloc(sizeof(struct cfi_state), 1); | ^~~~~~ ``` The calloc prototype is: ``` void *calloc(size_t nmemb, size_t size); ``` So, just swap the number of members and size arguments to match the prototype, as we're initialising 1 struct of size `sizeof(struct ...)`. GCC then sees we're not doing anything wrong. Signed-off-by: Sam James Signed-off-by: Peter Zijlstra (Intel) Acked-by: Josh Poimboeuf Link: https://lore.kernel.org/r/20231107205504.1470006-1-sam@gentoo.org --- tools/objtool/check.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/objtool/check.c b/tools/objtool/check.c index e94756e09ca9..548ec3cd7c00 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -291,7 +291,7 @@ static void init_insn_state(struct objtool_file *file, struct insn_state *state, static struct cfi_state *cfi_alloc(void) { - struct cfi_state *cfi = calloc(sizeof(struct cfi_state), 1); + struct cfi_state *cfi = calloc(1, sizeof(struct cfi_state)); if (!cfi) { WARN("calloc failed"); exit(1); -- cgit v1.2.3 From ff8867af01daa7ea770bebf5f91199b7434b74e5 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Fri, 17 Nov 2023 09:14:04 -0800 Subject: bpf: rename BPF_F_TEST_SANITY_STRICT to BPF_F_TEST_REG_INVARIANTS Rename verifier internal flag BPF_F_TEST_SANITY_STRICT to more neutral BPF_F_TEST_REG_INVARIANTS. This is a follow up to [0]. A few selftests and veristat need to be adjusted in the same patch as well. [0] https://patchwork.kernel.org/project/netdevbpf/patch/20231112010609.848406-5-andrii@kernel.org/ Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20231117171404.225508-1-andrii@kernel.org Signed-off-by: Alexei Starovoitov --- include/linux/bpf_verifier.h | 2 +- include/uapi/linux/bpf.h | 2 +- kernel/bpf/syscall.c | 2 +- kernel/bpf/verifier.c | 6 +++--- tools/include/uapi/linux/bpf.h | 2 +- tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c | 2 +- tools/testing/selftests/bpf/prog_tests/reg_bounds.c | 2 +- tools/testing/selftests/bpf/progs/verifier_bounds.c | 4 ++-- tools/testing/selftests/bpf/test_loader.c | 6 +++--- tools/testing/selftests/bpf/test_sock_addr.c | 3 +-- tools/testing/selftests/bpf/test_verifier.c | 2 +- tools/testing/selftests/bpf/testing_helpers.c | 4 ++-- tools/testing/selftests/bpf/veristat.c | 12 ++++++------ 13 files changed, 24 insertions(+), 25 deletions(-) (limited to 'tools') diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 402b6bc44a1b..52a4012b8255 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -602,7 +602,7 @@ struct bpf_verifier_env { int stack_size; /* number of states to be processed */ bool strict_alignment; /* perform strict pointer alignment checks */ bool test_state_freq; /* test verifier with different pruning frequency */ - bool test_sanity_strict; /* fail verification on sanity violations */ + bool test_reg_invariants; /* fail verification on register invariants violations */ struct bpf_verifier_state *cur_state; /* current verifier state */ struct bpf_verifier_state_list **explored_states; /* search pruning optimization */ struct bpf_verifier_state_list *free_list; diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 8a5855fcee69..7a5498242eaa 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1201,7 +1201,7 @@ enum bpf_perf_event_type { #define BPF_F_XDP_DEV_BOUND_ONLY (1U << 6) /* The verifier internal test flag. Behavior is undefined */ -#define BPF_F_TEST_SANITY_STRICT (1U << 7) +#define BPF_F_TEST_REG_INVARIANTS (1U << 7) /* link_create.kprobe_multi.flags used in LINK_CREATE command for * BPF_TRACE_KPROBE_MULTI attach type to create return probe. diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index f266e03ba342..5e43ddd1b83f 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -2574,7 +2574,7 @@ static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size) BPF_F_TEST_RND_HI32 | BPF_F_XDP_HAS_FRAGS | BPF_F_XDP_DEV_BOUND_ONLY | - BPF_F_TEST_SANITY_STRICT)) + BPF_F_TEST_REG_INVARIANTS)) return -EINVAL; if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 59505881e7a7..7c3461b89513 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -2608,14 +2608,14 @@ static int reg_bounds_sanity_check(struct bpf_verifier_env *env, return 0; out: - verbose(env, "REG SANITY VIOLATION (%s): %s u64=[%#llx, %#llx] " + verbose(env, "REG INVARIANTS VIOLATION (%s): %s u64=[%#llx, %#llx] " "s64=[%#llx, %#llx] u32=[%#x, %#x] s32=[%#x, %#x] var_off=(%#llx, %#llx)\n", ctx, msg, reg->umin_value, reg->umax_value, reg->smin_value, reg->smax_value, reg->u32_min_value, reg->u32_max_value, reg->s32_min_value, reg->s32_max_value, reg->var_off.value, reg->var_off.mask); - if (env->test_sanity_strict) + if (env->test_reg_invariants) return -EFAULT; __mark_reg_unbounded(reg); return 0; @@ -20791,7 +20791,7 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr, __u3 if (is_priv) env->test_state_freq = attr->prog_flags & BPF_F_TEST_STATE_FREQ; - env->test_sanity_strict = attr->prog_flags & BPF_F_TEST_SANITY_STRICT; + env->test_reg_invariants = attr->prog_flags & BPF_F_TEST_REG_INVARIANTS; env->explored_states = kvcalloc(state_htab_size(env), sizeof(struct bpf_verifier_state_list *), diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 8a5855fcee69..7a5498242eaa 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -1201,7 +1201,7 @@ enum bpf_perf_event_type { #define BPF_F_XDP_DEV_BOUND_ONLY (1U << 6) /* The verifier internal test flag. Behavior is undefined */ -#define BPF_F_TEST_SANITY_STRICT (1U << 7) +#define BPF_F_TEST_REG_INVARIANTS (1U << 7) /* link_create.kprobe_multi.flags used in LINK_CREATE command for * BPF_TRACE_KPROBE_MULTI attach type to create return probe. diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c b/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c index 3f2d70831873..e770912fc1d2 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c @@ -35,7 +35,7 @@ static int check_load(const char *file, enum bpf_prog_type type) } bpf_program__set_type(prog, type); - bpf_program__set_flags(prog, BPF_F_TEST_RND_HI32 | BPF_F_TEST_SANITY_STRICT); + bpf_program__set_flags(prog, BPF_F_TEST_RND_HI32 | BPF_F_TEST_REG_INVARIANTS); bpf_program__set_log_level(prog, 4 | extra_prog_load_log_flags); err = bpf_object__load(obj); diff --git a/tools/testing/selftests/bpf/prog_tests/reg_bounds.c b/tools/testing/selftests/bpf/prog_tests/reg_bounds.c index fe0cb906644b..7a8b0bf0a7f8 100644 --- a/tools/testing/selftests/bpf/prog_tests/reg_bounds.c +++ b/tools/testing/selftests/bpf/prog_tests/reg_bounds.c @@ -838,7 +838,7 @@ static int load_range_cmp_prog(struct range x, struct range y, enum op op, .log_level = 2, .log_buf = log_buf, .log_size = log_sz, - .prog_flags = BPF_F_TEST_SANITY_STRICT, + .prog_flags = BPF_F_TEST_REG_INVARIANTS, ); /* ; skip exit block below diff --git a/tools/testing/selftests/bpf/progs/verifier_bounds.c b/tools/testing/selftests/bpf/progs/verifier_bounds.c index 0c1460936373..ec430b71730b 100644 --- a/tools/testing/selftests/bpf/progs/verifier_bounds.c +++ b/tools/testing/selftests/bpf/progs/verifier_bounds.c @@ -965,7 +965,7 @@ l0_%=: r0 = 0; \ SEC("xdp") __description("bound check with JMP_JSLT for crossing 64-bit signed boundary") __success __retval(0) -__flag(!BPF_F_TEST_SANITY_STRICT) /* known sanity violation */ +__flag(!BPF_F_TEST_REG_INVARIANTS) /* known invariants violation */ __naked void crossing_64_bit_signed_boundary_2(void) { asm volatile (" \ @@ -1047,7 +1047,7 @@ l0_%=: r0 = 0; \ SEC("xdp") __description("bound check with JMP32_JSLT for crossing 32-bit signed boundary") __success __retval(0) -__flag(!BPF_F_TEST_SANITY_STRICT) /* known sanity violation */ +__flag(!BPF_F_TEST_REG_INVARIANTS) /* known invariants violation */ __naked void crossing_32_bit_signed_boundary_2(void) { asm volatile (" \ diff --git a/tools/testing/selftests/bpf/test_loader.c b/tools/testing/selftests/bpf/test_loader.c index 57e27b1a73a6..a350ecdfba4a 100644 --- a/tools/testing/selftests/bpf/test_loader.c +++ b/tools/testing/selftests/bpf/test_loader.c @@ -179,7 +179,7 @@ static int parse_test_spec(struct test_loader *tester, memset(spec, 0, sizeof(*spec)); spec->prog_name = bpf_program__name(prog); - spec->prog_flags = BPF_F_TEST_SANITY_STRICT; /* by default be strict */ + spec->prog_flags = BPF_F_TEST_REG_INVARIANTS; /* by default be strict */ btf = bpf_object__btf(obj); if (!btf) { @@ -280,8 +280,8 @@ static int parse_test_spec(struct test_loader *tester, update_flags(&spec->prog_flags, BPF_F_SLEEPABLE, clear); } else if (strcmp(val, "BPF_F_XDP_HAS_FRAGS") == 0) { update_flags(&spec->prog_flags, BPF_F_XDP_HAS_FRAGS, clear); - } else if (strcmp(val, "BPF_F_TEST_SANITY_STRICT") == 0) { - update_flags(&spec->prog_flags, BPF_F_TEST_SANITY_STRICT, clear); + } else if (strcmp(val, "BPF_F_TEST_REG_INVARIANTS") == 0) { + update_flags(&spec->prog_flags, BPF_F_TEST_REG_INVARIANTS, clear); } else /* assume numeric value */ { err = parse_int(val, &flags, "test prog flags"); if (err) diff --git a/tools/testing/selftests/bpf/test_sock_addr.c b/tools/testing/selftests/bpf/test_sock_addr.c index 878c077e0fa7..b0068a9d2cfe 100644 --- a/tools/testing/selftests/bpf/test_sock_addr.c +++ b/tools/testing/selftests/bpf/test_sock_addr.c @@ -679,8 +679,7 @@ static int load_path(const struct sock_addr_test *test, const char *path) bpf_program__set_type(prog, BPF_PROG_TYPE_CGROUP_SOCK_ADDR); bpf_program__set_expected_attach_type(prog, test->expected_attach_type); - bpf_program__set_flags(prog, BPF_F_TEST_RND_HI32); - bpf_program__set_flags(prog, BPF_F_TEST_SANITY_STRICT); + bpf_program__set_flags(prog, BPF_F_TEST_RND_HI32 | BPF_F_TEST_REG_INVARIANTS); err = bpf_object__load(obj); if (err) { diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index 4992022f3137..f36e41435be7 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -1588,7 +1588,7 @@ static void do_test_single(struct bpf_test *test, bool unpriv, if (fixup_skips != skips) return; - pflags = BPF_F_TEST_RND_HI32 | BPF_F_TEST_SANITY_STRICT; + pflags = BPF_F_TEST_RND_HI32 | BPF_F_TEST_REG_INVARIANTS; if (test->flags & F_LOAD_WITH_STRICT_ALIGNMENT) pflags |= BPF_F_STRICT_ALIGNMENT; if (test->flags & F_NEEDS_EFFICIENT_UNALIGNED_ACCESS) diff --git a/tools/testing/selftests/bpf/testing_helpers.c b/tools/testing/selftests/bpf/testing_helpers.c index 9786a94a666c..d2458c1b1671 100644 --- a/tools/testing/selftests/bpf/testing_helpers.c +++ b/tools/testing/selftests/bpf/testing_helpers.c @@ -276,7 +276,7 @@ int bpf_prog_test_load(const char *file, enum bpf_prog_type type, if (type != BPF_PROG_TYPE_UNSPEC && bpf_program__type(prog) != type) bpf_program__set_type(prog, type); - flags = bpf_program__flags(prog) | BPF_F_TEST_RND_HI32 | BPF_F_TEST_SANITY_STRICT; + flags = bpf_program__flags(prog) | BPF_F_TEST_RND_HI32 | BPF_F_TEST_REG_INVARIANTS; bpf_program__set_flags(prog, flags); err = bpf_object__load(obj); @@ -299,7 +299,7 @@ int bpf_test_load_program(enum bpf_prog_type type, const struct bpf_insn *insns, { LIBBPF_OPTS(bpf_prog_load_opts, opts, .kern_version = kern_version, - .prog_flags = BPF_F_TEST_RND_HI32 | BPF_F_TEST_SANITY_STRICT, + .prog_flags = BPF_F_TEST_RND_HI32 | BPF_F_TEST_REG_INVARIANTS, .log_level = extra_prog_load_log_flags, .log_buf = log_buf, .log_size = log_buf_sz, diff --git a/tools/testing/selftests/bpf/veristat.c b/tools/testing/selftests/bpf/veristat.c index 609fd9753af0..1d418d66e375 100644 --- a/tools/testing/selftests/bpf/veristat.c +++ b/tools/testing/selftests/bpf/veristat.c @@ -145,7 +145,7 @@ static struct env { bool debug; bool quiet; bool force_checkpoints; - bool strict_range_sanity; + bool force_reg_invariants; enum resfmt out_fmt; bool show_version; bool comparison_mode; @@ -225,8 +225,8 @@ static const struct argp_option opts[] = { { "filter", 'f', "FILTER", 0, "Filter expressions (or @filename for file with expressions)." }, { "test-states", 't', NULL, 0, "Force frequent BPF verifier state checkpointing (set BPF_F_TEST_STATE_FREQ program flag)" }, - { "test-sanity", 'r', NULL, 0, - "Force strict BPF verifier register sanity behavior (BPF_F_TEST_SANITY_STRICT program flag)" }, + { "test-reg-invariants", 'r', NULL, 0, + "Force BPF verifier failure on register invariant violation (BPF_F_TEST_REG_INVARIANTS program flag)" }, {}, }; @@ -299,7 +299,7 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state) env.force_checkpoints = true; break; case 'r': - env.strict_range_sanity = true; + env.force_reg_invariants = true; break; case 'n': errno = 0; @@ -1028,8 +1028,8 @@ static int process_prog(const char *filename, struct bpf_object *obj, struct bpf if (env.force_checkpoints) bpf_program__set_flags(prog, bpf_program__flags(prog) | BPF_F_TEST_STATE_FREQ); - if (env.strict_range_sanity) - bpf_program__set_flags(prog, bpf_program__flags(prog) | BPF_F_TEST_SANITY_STRICT); + if (env.force_reg_invariants) + bpf_program__set_flags(prog, bpf_program__flags(prog) | BPF_F_TEST_REG_INVARIANTS); err = bpf_object__load(obj); env.progs_processed++; -- cgit v1.2.3 From af51d6bd0b130b06fc58b35fee8f93b0a5c77f21 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 15 Nov 2023 13:17:23 +0100 Subject: selftests: mlxsw: Add PCI reset test Test that PCI reset works correctly by verifying that only the expected reset methods are supported and that after issuing the reset the ifindex of the port changes. Signed-off-by: Ido Schimmel Reviewed-by: Petr Machata Signed-off-by: Petr Machata Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- .../selftests/drivers/net/mlxsw/pci_reset.sh | 58 ++++++++++++++++++++++ 1 file changed, 58 insertions(+) create mode 100755 tools/testing/selftests/drivers/net/mlxsw/pci_reset.sh (limited to 'tools') diff --git a/tools/testing/selftests/drivers/net/mlxsw/pci_reset.sh b/tools/testing/selftests/drivers/net/mlxsw/pci_reset.sh new file mode 100755 index 000000000000..fe0343b95e6c --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/pci_reset.sh @@ -0,0 +1,58 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Test that PCI reset works correctly by verifying that only the expected reset +# methods are supported and that after issuing the reset the ifindex of the +# port changes. + +lib_dir=$(dirname $0)/../../../net/forwarding + +ALL_TESTS=" + pci_reset_test +" +NUM_NETIFS=1 +source $lib_dir/lib.sh +source $lib_dir/devlink_lib.sh + +pci_reset_test() +{ + RET=0 + + local bus=$(echo $DEVLINK_DEV | cut -d '/' -f 1) + local bdf=$(echo $DEVLINK_DEV | cut -d '/' -f 2) + + if [ $bus != "pci" ]; then + check_err 1 "devlink device is not a PCI device" + log_test "pci reset" + return + fi + + if [ ! -f /sys/bus/pci/devices/$bdf/reset_method ]; then + check_err 1 "reset is not supported" + log_test "pci reset" + return + fi + + [[ $(cat /sys/bus/pci/devices/$bdf/reset_method) == "bus" ]] + check_err $? "only \"bus\" reset method should be supported" + + local ifindex_pre=$(ip -j link show dev $swp1 | jq '.[]["ifindex"]') + + echo 1 > /sys/bus/pci/devices/$bdf/reset + check_err $? "reset failed" + + # Wait for udev to rename newly created netdev. + udevadm settle + + local ifindex_post=$(ip -j link show dev $swp1 | jq '.[]["ifindex"]') + + [[ $ifindex_pre != $ifindex_post ]] + check_err $? "reset not performed" + + log_test "pci reset" +} + +swp1=${NETIFS[p1]} +tests_run + +exit $EXIT_STATUS -- cgit v1.2.3 From 0c95c9fdb696f35c7864785ba84cb9a50152daff Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Fri, 17 Nov 2023 19:46:20 -0800 Subject: bpf: emit map name in register state if applicable and available In complicated real-world applications, whenever debugging some verification error through verifier log, it often would be very useful to see map name for PTR_TO_MAP_VALUE register. Usually this needs to be inferred from key/value sizes and maybe trying to guess C code location, but it's not always clear. Given verifier has the name, and it's never too long, let's just emit it for ptr_to_map_key, ptr_to_map_value, and const_ptr_to_map registers. We reshuffle the order a bit, so that map name, key size, and value size appear before offset and immediate values, which seems like a more logical order. Current output: R1_w=map_ptr(map=array_map,ks=4,vs=8,off=0,imm=0) But we'll get rid of useless off=0 and imm=0 parts in the next patch. Acked-by: Eduard Zingerman Acked-by: Stanislav Fomichev Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20231118034623.3320920-6-andrii@kernel.org Signed-off-by: Alexei Starovoitov --- kernel/bpf/log.c | 24 ++++++++++++++++------ tools/testing/selftests/bpf/prog_tests/spin_lock.c | 10 ++++----- 2 files changed, 23 insertions(+), 11 deletions(-) (limited to 'tools') diff --git a/kernel/bpf/log.c b/kernel/bpf/log.c index 97a1641e848e..c209ab1ec2b5 100644 --- a/kernel/bpf/log.c +++ b/kernel/bpf/log.c @@ -553,6 +553,17 @@ static void print_scalar_ranges(struct bpf_verifier_env *env, } } +static bool type_is_map_ptr(enum bpf_reg_type t) { + switch (base_type(t)) { + case CONST_PTR_TO_MAP: + case PTR_TO_MAP_KEY: + case PTR_TO_MAP_VALUE: + return true; + default: + return false; + } +} + static void print_reg_state(struct bpf_verifier_env *env, const struct bpf_reg_state *reg) { enum bpf_reg_type t; @@ -584,16 +595,17 @@ static void print_reg_state(struct bpf_verifier_env *env, const struct bpf_reg_s verbose_a("ref_obj_id=%d", reg->ref_obj_id); if (type_is_non_owning_ref(reg->type)) verbose_a("%s", "non_own_ref"); + if (type_is_map_ptr(t)) { + if (reg->map_ptr->name[0]) + verbose_a("map=%s", reg->map_ptr->name); + verbose_a("ks=%d,vs=%d", + reg->map_ptr->key_size, + reg->map_ptr->value_size); + } if (t != SCALAR_VALUE) verbose_a("off=%d", reg->off); if (type_is_pkt_pointer(t)) verbose_a("r=%d", reg->range); - else if (base_type(t) == CONST_PTR_TO_MAP || - base_type(t) == PTR_TO_MAP_KEY || - base_type(t) == PTR_TO_MAP_VALUE) - verbose_a("ks=%d,vs=%d", - reg->map_ptr->key_size, - reg->map_ptr->value_size); if (tnum_is_const(reg->var_off)) { /* Typically an immediate SCALAR_VALUE, but * could be a pointer whose offset is too big diff --git a/tools/testing/selftests/bpf/prog_tests/spin_lock.c b/tools/testing/selftests/bpf/prog_tests/spin_lock.c index f29c08d93beb..ace65224286f 100644 --- a/tools/testing/selftests/bpf/prog_tests/spin_lock.c +++ b/tools/testing/selftests/bpf/prog_tests/spin_lock.c @@ -17,18 +17,18 @@ static struct { "R1_w=ptr_foo(id=2,ref_obj_id=2,off=0,imm=0) refs=2\n6: (85) call bpf_this_cpu_ptr#154\n" "R1 type=ptr_ expected=percpu_ptr_" }, { "lock_id_global_zero", - "; R1_w=map_value(off=0,ks=4,vs=4,imm=0)\n2: (85) call bpf_this_cpu_ptr#154\n" + "; R1_w=map_value(map=.data.A,ks=4,vs=4,off=0,imm=0)\n2: (85) call bpf_this_cpu_ptr#154\n" "R1 type=map_value expected=percpu_ptr_" }, { "lock_id_mapval_preserve", "[0-9]\\+: (bf) r1 = r0 ;" - " R0_w=map_value(id=1,off=0,ks=4,vs=8,imm=0)" - " R1_w=map_value(id=1,off=0,ks=4,vs=8,imm=0)\n" + " R0_w=map_value(id=1,map=array_map,ks=4,vs=8,off=0,imm=0)" + " R1_w=map_value(id=1,map=array_map,ks=4,vs=8,off=0,imm=0)\n" "[0-9]\\+: (85) call bpf_this_cpu_ptr#154\n" "R1 type=map_value expected=percpu_ptr_" }, { "lock_id_innermapval_preserve", "[0-9]\\+: (bf) r1 = r0 ;" - " R0=map_value(id=2,off=0,ks=4,vs=8,imm=0)" - " R1_w=map_value(id=2,off=0,ks=4,vs=8,imm=0)\n" + " R0=map_value(id=2,ks=4,vs=8,off=0,imm=0)" + " R1_w=map_value(id=2,ks=4,vs=8,off=0,imm=0)\n" "[0-9]\\+: (85) call bpf_this_cpu_ptr#154\n" "R1 type=map_value expected=percpu_ptr_" }, { "lock_id_mismatch_kptr_kptr", "bpf_spin_unlock of different lock" }, -- cgit v1.2.3 From 1db747d75b1dbe17bf4283ed87bd3b7a92010f34 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Fri, 17 Nov 2023 19:46:21 -0800 Subject: bpf: omit default off=0 and imm=0 in register state log Simplify BPF verifier log further by omitting default (and frequently irrelevant) off=0 and imm=0 parts for non-SCALAR_VALUE registers. As can be seen from fixed tests, this is often a visual noise for PTR_TO_CTX register and even for PTR_TO_PACKET registers. Omitting default values follows the rest of register state logic: we omit default values to keep verifier log succinct and to highlight interesting state that deviates from default one. E.g., we do the same for var_off, when it's unknown, which gives no additional information. Acked-by: Eduard Zingerman Acked-by: Stanislav Fomichev Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20231118034623.3320920-7-andrii@kernel.org Signed-off-by: Alexei Starovoitov --- kernel/bpf/log.c | 10 +++--- tools/testing/selftests/bpf/prog_tests/align.c | 42 +++++++++++----------- tools/testing/selftests/bpf/prog_tests/log_buf.c | 4 +-- tools/testing/selftests/bpf/prog_tests/spin_lock.c | 14 ++++---- .../selftests/bpf/progs/exceptions_assert.c | 10 +++--- 5 files changed, 39 insertions(+), 41 deletions(-) (limited to 'tools') diff --git a/kernel/bpf/log.c b/kernel/bpf/log.c index c209ab1ec2b5..20b4f81087da 100644 --- a/kernel/bpf/log.c +++ b/kernel/bpf/log.c @@ -602,16 +602,14 @@ static void print_reg_state(struct bpf_verifier_env *env, const struct bpf_reg_s reg->map_ptr->key_size, reg->map_ptr->value_size); } - if (t != SCALAR_VALUE) + if (t != SCALAR_VALUE && reg->off) verbose_a("off=%d", reg->off); if (type_is_pkt_pointer(t)) verbose_a("r=%d", reg->range); if (tnum_is_const(reg->var_off)) { - /* Typically an immediate SCALAR_VALUE, but - * could be a pointer whose offset is too big - * for reg->off - */ - verbose_a("imm=%llx", reg->var_off.value); + /* a pointer register with fixed offset */ + if (reg->var_off.value) + verbose_a("imm=%llx", reg->var_off.value); } else { print_scalar_ranges(env, reg, &sep); if (!tnum_is_unknown(reg->var_off)) { diff --git a/tools/testing/selftests/bpf/prog_tests/align.c b/tools/testing/selftests/bpf/prog_tests/align.c index 465c1c3a3d3c..4ebd0da898f5 100644 --- a/tools/testing/selftests/bpf/prog_tests/align.c +++ b/tools/testing/selftests/bpf/prog_tests/align.c @@ -40,7 +40,7 @@ static struct bpf_align_test tests[] = { }, .prog_type = BPF_PROG_TYPE_SCHED_CLS, .matches = { - {0, "R1", "ctx(off=0,imm=0)"}, + {0, "R1", "ctx()"}, {0, "R10", "fp0"}, {0, "R3_w", "2"}, {1, "R3_w", "4"}, @@ -68,7 +68,7 @@ static struct bpf_align_test tests[] = { }, .prog_type = BPF_PROG_TYPE_SCHED_CLS, .matches = { - {0, "R1", "ctx(off=0,imm=0)"}, + {0, "R1", "ctx()"}, {0, "R10", "fp0"}, {0, "R3_w", "1"}, {1, "R3_w", "2"}, @@ -97,7 +97,7 @@ static struct bpf_align_test tests[] = { }, .prog_type = BPF_PROG_TYPE_SCHED_CLS, .matches = { - {0, "R1", "ctx(off=0,imm=0)"}, + {0, "R1", "ctx()"}, {0, "R10", "fp0"}, {0, "R3_w", "4"}, {1, "R3_w", "8"}, @@ -119,7 +119,7 @@ static struct bpf_align_test tests[] = { }, .prog_type = BPF_PROG_TYPE_SCHED_CLS, .matches = { - {0, "R1", "ctx(off=0,imm=0)"}, + {0, "R1", "ctx()"}, {0, "R10", "fp0"}, {0, "R3_w", "7"}, {1, "R3_w", "7"}, @@ -162,13 +162,13 @@ static struct bpf_align_test tests[] = { }, .prog_type = BPF_PROG_TYPE_SCHED_CLS, .matches = { - {6, "R0_w", "pkt(off=8,r=8,imm=0)"}, + {6, "R0_w", "pkt(off=8,r=8)"}, {6, "R3_w", "var_off=(0x0; 0xff)"}, {7, "R3_w", "var_off=(0x0; 0x1fe)"}, {8, "R3_w", "var_off=(0x0; 0x3fc)"}, {9, "R3_w", "var_off=(0x0; 0x7f8)"}, {10, "R3_w", "var_off=(0x0; 0xff0)"}, - {12, "R3_w", "pkt_end(off=0,imm=0)"}, + {12, "R3_w", "pkt_end()"}, {17, "R4_w", "var_off=(0x0; 0xff)"}, {18, "R4_w", "var_off=(0x0; 0x1fe0)"}, {19, "R4_w", "var_off=(0x0; 0xff0)"}, @@ -235,11 +235,11 @@ static struct bpf_align_test tests[] = { }, .prog_type = BPF_PROG_TYPE_SCHED_CLS, .matches = { - {2, "R5_w", "pkt(off=0,r=0,imm=0)"}, - {4, "R5_w", "pkt(off=14,r=0,imm=0)"}, - {5, "R4_w", "pkt(off=14,r=0,imm=0)"}, - {9, "R2", "pkt(off=0,r=18,imm=0)"}, - {10, "R5", "pkt(off=14,r=18,imm=0)"}, + {2, "R5_w", "pkt(r=0)"}, + {4, "R5_w", "pkt(off=14,r=0)"}, + {5, "R4_w", "pkt(off=14,r=0)"}, + {9, "R2", "pkt(r=18)"}, + {10, "R5", "pkt(off=14,r=18)"}, {10, "R4_w", "var_off=(0x0; 0xff)"}, {13, "R4_w", "var_off=(0x0; 0xffff)"}, {14, "R4_w", "var_off=(0x0; 0xffff)"}, @@ -299,7 +299,7 @@ static struct bpf_align_test tests[] = { /* Calculated offset in R6 has unknown value, but known * alignment of 4. */ - {6, "R2_w", "pkt(off=0,r=8,imm=0)"}, + {6, "R2_w", "pkt(r=8)"}, {7, "R6_w", "var_off=(0x0; 0x3fc)"}, /* Offset is added to packet pointer R5, resulting in * known fixed offset, and variable offset from R6. @@ -337,7 +337,7 @@ static struct bpf_align_test tests[] = { /* Constant offset is added to R5 packet pointer, * resulting in reg->off value of 14. */ - {26, "R5_w", "pkt(off=14,r=8,"}, + {26, "R5_w", "pkt(off=14,r=8)"}, /* Variable offset is added to R5, resulting in a * variable offset of (4n). See comment for insn #18 * for R4 = R5 trick. @@ -397,7 +397,7 @@ static struct bpf_align_test tests[] = { /* Calculated offset in R6 has unknown value, but known * alignment of 4. */ - {6, "R2_w", "pkt(off=0,r=8,imm=0)"}, + {6, "R2_w", "pkt(r=8)"}, {7, "R6_w", "var_off=(0x0; 0x3fc)"}, /* Adding 14 makes R6 be (4n+2) */ {8, "R6_w", "var_off=(0x2; 0x7fc)"}, @@ -459,7 +459,7 @@ static struct bpf_align_test tests[] = { .prog_type = BPF_PROG_TYPE_SCHED_CLS, .result = REJECT, .matches = { - {3, "R5_w", "pkt_end(off=0,imm=0)"}, + {3, "R5_w", "pkt_end()"}, /* (ptr - ptr) << 2 == unknown, (4n) */ {5, "R5_w", "var_off=(0x0; 0xfffffffffffffffc)"}, /* (4n) + 14 == (4n+2). We blow our bounds, because @@ -513,7 +513,7 @@ static struct bpf_align_test tests[] = { /* Calculated offset in R6 has unknown value, but known * alignment of 4. */ - {6, "R2_w", "pkt(off=0,r=8,imm=0)"}, + {6, "R2_w", "pkt(r=8)"}, {8, "R6_w", "var_off=(0x0; 0x3fc)"}, /* Adding 14 makes R6 be (4n+2) */ {9, "R6_w", "var_off=(0x2; 0x7fc)"}, @@ -566,7 +566,7 @@ static struct bpf_align_test tests[] = { /* Calculated offset in R6 has unknown value, but known * alignment of 4. */ - {6, "R2_w", "pkt(off=0,r=8,imm=0)"}, + {6, "R2_w", "pkt(r=8)"}, {9, "R6_w", "var_off=(0x0; 0x3c)"}, /* Adding 14 makes R6 be (4n+2) */ {10, "R6_w", "var_off=(0x2; 0x7c)"}, @@ -659,14 +659,14 @@ static int do_test_single(struct bpf_align_test *test) /* Check the next line as well in case the previous line * did not have a corresponding bpf insn. Example: * func#0 @0 - * 0: R1=ctx(off=0,imm=0) R10=fp0 + * 0: R1=ctx() R10=fp0 * 0: (b7) r3 = 2 ; R3_w=2 * * Sometimes it's actually two lines below, e.g. when * searching for "6: R3_w=scalar(umax=255,var_off=(0x0; 0xff))": - * from 4 to 6: R0_w=pkt(off=8,r=8,imm=0) R1=ctx(off=0,imm=0) R2_w=pkt(off=0,r=8,imm=0) R3_w=pkt_end(off=0,imm=0) R10=fp0 - * 6: R0_w=pkt(off=8,r=8,imm=0) R1=ctx(off=0,imm=0) R2_w=pkt(off=0,r=8,imm=0) R3_w=pkt_end(off=0,imm=0) R10=fp0 - * 6: (71) r3 = *(u8 *)(r2 +0) ; R2_w=pkt(off=0,r=8,imm=0) R3_w=scalar(umax=255,var_off=(0x0; 0xff)) + * from 4 to 6: R0_w=pkt(off=8,r=8) R1=ctx() R2_w=pkt(r=8) R3_w=pkt_end() R10=fp0 + * 6: R0_w=pkt(off=8,r=8) R1=ctx() R2_w=pkt(r=8) R3_w=pkt_end() R10=fp0 + * 6: (71) r3 = *(u8 *)(r2 +0) ; R2_w=pkt(r=8) R3_w=scalar(umax=255,var_off=(0x0; 0xff)) */ while (!(p = strstr(line_ptr, m.reg)) || !strstr(p, m.match)) { cur_line = -1; diff --git a/tools/testing/selftests/bpf/prog_tests/log_buf.c b/tools/testing/selftests/bpf/prog_tests/log_buf.c index fe9a23e65ef4..0f7ea4d7d9f6 100644 --- a/tools/testing/selftests/bpf/prog_tests/log_buf.c +++ b/tools/testing/selftests/bpf/prog_tests/log_buf.c @@ -78,7 +78,7 @@ static void obj_load_log_buf(void) ASSERT_OK_PTR(strstr(libbpf_log_buf, "prog 'bad_prog': BPF program load failed"), "libbpf_log_not_empty"); ASSERT_OK_PTR(strstr(obj_log_buf, "DATASEC license"), "obj_log_not_empty"); - ASSERT_OK_PTR(strstr(good_log_buf, "0: R1=ctx(off=0,imm=0) R10=fp0"), + ASSERT_OK_PTR(strstr(good_log_buf, "0: R1=ctx() R10=fp0"), "good_log_verbose"); ASSERT_OK_PTR(strstr(bad_log_buf, "invalid access to map value, value_size=16 off=16000 size=4"), "bad_log_not_empty"); @@ -175,7 +175,7 @@ static void bpf_prog_load_log_buf(void) opts.log_level = 2; fd = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, "good_prog", "GPL", good_prog_insns, good_prog_insn_cnt, &opts); - ASSERT_OK_PTR(strstr(log_buf, "0: R1=ctx(off=0,imm=0) R10=fp0"), "good_log_2"); + ASSERT_OK_PTR(strstr(log_buf, "0: R1=ctx() R10=fp0"), "good_log_2"); ASSERT_GE(fd, 0, "good_fd2"); if (fd >= 0) close(fd); diff --git a/tools/testing/selftests/bpf/prog_tests/spin_lock.c b/tools/testing/selftests/bpf/prog_tests/spin_lock.c index ace65224286f..18d451be57c8 100644 --- a/tools/testing/selftests/bpf/prog_tests/spin_lock.c +++ b/tools/testing/selftests/bpf/prog_tests/spin_lock.c @@ -13,22 +13,22 @@ static struct { const char *err_msg; } spin_lock_fail_tests[] = { { "lock_id_kptr_preserve", - "5: (bf) r1 = r0 ; R0_w=ptr_foo(id=2,ref_obj_id=2,off=0,imm=0) " - "R1_w=ptr_foo(id=2,ref_obj_id=2,off=0,imm=0) refs=2\n6: (85) call bpf_this_cpu_ptr#154\n" + "5: (bf) r1 = r0 ; R0_w=ptr_foo(id=2,ref_obj_id=2) " + "R1_w=ptr_foo(id=2,ref_obj_id=2) refs=2\n6: (85) call bpf_this_cpu_ptr#154\n" "R1 type=ptr_ expected=percpu_ptr_" }, { "lock_id_global_zero", - "; R1_w=map_value(map=.data.A,ks=4,vs=4,off=0,imm=0)\n2: (85) call bpf_this_cpu_ptr#154\n" + "; R1_w=map_value(map=.data.A,ks=4,vs=4)\n2: (85) call bpf_this_cpu_ptr#154\n" "R1 type=map_value expected=percpu_ptr_" }, { "lock_id_mapval_preserve", "[0-9]\\+: (bf) r1 = r0 ;" - " R0_w=map_value(id=1,map=array_map,ks=4,vs=8,off=0,imm=0)" - " R1_w=map_value(id=1,map=array_map,ks=4,vs=8,off=0,imm=0)\n" + " R0_w=map_value(id=1,map=array_map,ks=4,vs=8)" + " R1_w=map_value(id=1,map=array_map,ks=4,vs=8)\n" "[0-9]\\+: (85) call bpf_this_cpu_ptr#154\n" "R1 type=map_value expected=percpu_ptr_" }, { "lock_id_innermapval_preserve", "[0-9]\\+: (bf) r1 = r0 ;" - " R0=map_value(id=2,ks=4,vs=8,off=0,imm=0)" - " R1_w=map_value(id=2,ks=4,vs=8,off=0,imm=0)\n" + " R0=map_value(id=2,ks=4,vs=8)" + " R1_w=map_value(id=2,ks=4,vs=8)\n" "[0-9]\\+: (85) call bpf_this_cpu_ptr#154\n" "R1 type=map_value expected=percpu_ptr_" }, { "lock_id_mismatch_kptr_kptr", "bpf_spin_unlock of different lock" }, diff --git a/tools/testing/selftests/bpf/progs/exceptions_assert.c b/tools/testing/selftests/bpf/progs/exceptions_assert.c index e1e5c54a6a11..26f7d67432cc 100644 --- a/tools/testing/selftests/bpf/progs/exceptions_assert.c +++ b/tools/testing/selftests/bpf/progs/exceptions_assert.c @@ -59,7 +59,7 @@ check_assert(s64, ge, neg, INT_MIN); SEC("?tc") __log_level(2) __failure -__msg(": R0=0 R1=ctx(off=0,imm=0) R2=scalar(smin=smin32=-2147483646,smax=smax32=2147483645) R10=fp0") +__msg(": R0=0 R1=ctx() R2=scalar(smin=smin32=-2147483646,smax=smax32=2147483645) R10=fp0") int check_assert_range_s64(struct __sk_buff *ctx) { struct bpf_sock *sk = ctx->sk; @@ -75,7 +75,7 @@ int check_assert_range_s64(struct __sk_buff *ctx) SEC("?tc") __log_level(2) __failure -__msg(": R1=ctx(off=0,imm=0) R2=scalar(smin=umin=smin32=umin32=4096,smax=umax=smax32=umax32=8192,var_off=(0x0; 0x3fff))") +__msg(": R1=ctx() R2=scalar(smin=umin=smin32=umin32=4096,smax=umax=smax32=umax32=8192,var_off=(0x0; 0x3fff))") int check_assert_range_u64(struct __sk_buff *ctx) { u64 num = ctx->len; @@ -86,7 +86,7 @@ int check_assert_range_u64(struct __sk_buff *ctx) SEC("?tc") __log_level(2) __failure -__msg(": R0=0 R1=ctx(off=0,imm=0) R2=4096 R10=fp0") +__msg(": R0=0 R1=ctx() R2=4096 R10=fp0") int check_assert_single_range_s64(struct __sk_buff *ctx) { struct bpf_sock *sk = ctx->sk; @@ -103,7 +103,7 @@ int check_assert_single_range_s64(struct __sk_buff *ctx) SEC("?tc") __log_level(2) __failure -__msg(": R1=ctx(off=0,imm=0) R2=4096 R10=fp0") +__msg(": R1=ctx() R2=4096 R10=fp0") int check_assert_single_range_u64(struct __sk_buff *ctx) { u64 num = ctx->len; @@ -114,7 +114,7 @@ int check_assert_single_range_u64(struct __sk_buff *ctx) SEC("?tc") __log_level(2) __failure -__msg(": R1=pkt(off=64,r=64,imm=0) R2=pkt_end(off=0,imm=0) R6=pkt(off=0,r=64,imm=0) R10=fp0") +__msg(": R1=pkt(off=64,r=64) R2=pkt_end() R6=pkt(r=64) R10=fp0") int check_assert_generic(struct __sk_buff *ctx) { u8 *data_end = (void *)(long)ctx->data_end; -- cgit v1.2.3 From 0f8dbdbc641b45a5fa31d497f9fc83ffe1174fa3 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Fri, 17 Nov 2023 19:46:22 -0800 Subject: bpf: smarter verifier log number printing logic Instead of always printing numbers as either decimals (and in some cases, like for "imm=%llx", in hexadecimals), decide the form based on actual values. For numbers in a reasonably small range (currently, [0, U16_MAX] for unsigned values, and [S16_MIN, S16_MAX] for signed ones), emit them as decimals. In all other cases, even for signed values, emit them in hexadecimals. For large values hex form is often times way more useful: it's easier to see an exact difference between 0xffffffff80000000 and 0xffffffff7fffffff, than between 18446744071562067966 and 18446744071562067967, as one particular example. Small values representing small pointer offsets or application constants, on the other hand, are way more useful to be represented in decimal notation. Adjust reg_bounds register state parsing logic to take into account this change. Acked-by: Eduard Zingerman Acked-by: Stanislav Fomichev Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20231118034623.3320920-8-andrii@kernel.org Signed-off-by: Alexei Starovoitov --- kernel/bpf/log.c | 79 +++++++++++++++++++--- .../testing/selftests/bpf/prog_tests/reg_bounds.c | 53 +++++++++------ .../selftests/bpf/progs/exceptions_assert.c | 32 ++++----- 3 files changed, 118 insertions(+), 46 deletions(-) (limited to 'tools') diff --git a/kernel/bpf/log.c b/kernel/bpf/log.c index 20b4f81087da..87105aa482ed 100644 --- a/kernel/bpf/log.c +++ b/kernel/bpf/log.c @@ -509,10 +509,52 @@ static void print_liveness(struct bpf_verifier_env *env, verbose(env, "D"); } +#define UNUM_MAX_DECIMAL U16_MAX +#define SNUM_MAX_DECIMAL S16_MAX +#define SNUM_MIN_DECIMAL S16_MIN + +static bool is_unum_decimal(u64 num) +{ + return num <= UNUM_MAX_DECIMAL; +} + +static bool is_snum_decimal(s64 num) +{ + return num >= SNUM_MIN_DECIMAL && num <= SNUM_MAX_DECIMAL; +} + +static void verbose_unum(struct bpf_verifier_env *env, u64 num) +{ + if (is_unum_decimal(num)) + verbose(env, "%llu", num); + else + verbose(env, "%#llx", num); +} + +static void verbose_snum(struct bpf_verifier_env *env, s64 num) +{ + if (is_snum_decimal(num)) + verbose(env, "%lld", num); + else + verbose(env, "%#llx", num); +} + static void print_scalar_ranges(struct bpf_verifier_env *env, const struct bpf_reg_state *reg, const char **sep) { + /* For signed ranges, we want to unify 64-bit and 32-bit values in the + * output as much as possible, but there is a bit of a complication. + * If we choose to print values as decimals, this is natural to do, + * because negative 64-bit and 32-bit values >= -S32_MIN have the same + * representation due to sign extension. But if we choose to print + * them in hex format (see is_snum_decimal()), then sign extension is + * misleading. + * E.g., smin=-2 and smin32=-2 are exactly the same in decimal, but in + * hex they will be smin=0xfffffffffffffffe and smin32=0xfffffffe, two + * very different numbers. + * So we avoid sign extension if we choose to print values in hex. + */ struct { const char *name; u64 val; @@ -522,8 +564,14 @@ static void print_scalar_ranges(struct bpf_verifier_env *env, {"smax", reg->smax_value, reg->smax_value == S64_MAX}, {"umin", reg->umin_value, reg->umin_value == 0}, {"umax", reg->umax_value, reg->umax_value == U64_MAX}, - {"smin32", (s64)reg->s32_min_value, reg->s32_min_value == S32_MIN}, - {"smax32", (s64)reg->s32_max_value, reg->s32_max_value == S32_MAX}, + {"smin32", + is_snum_decimal((s64)reg->s32_min_value) + ? (s64)reg->s32_min_value + : (u32)reg->s32_min_value, reg->s32_min_value == S32_MIN}, + {"smax32", + is_snum_decimal((s64)reg->s32_max_value) + ? (s64)reg->s32_max_value + : (u32)reg->s32_max_value, reg->s32_max_value == S32_MAX}, {"umin32", reg->u32_min_value, reg->u32_min_value == 0}, {"umax32", reg->u32_max_value, reg->u32_max_value == U32_MAX}, }, *m1, *m2, *mend = &minmaxs[ARRAY_SIZE(minmaxs)]; @@ -549,7 +597,10 @@ static void print_scalar_ranges(struct bpf_verifier_env *env, verbose(env, "%s=", m2->name); } - verbose(env, m1->name[0] == 's' ? "%lld" : "%llu", m1->val); + if (m1->name[0] == 's') + verbose_snum(env, m1->val); + else + verbose_unum(env, m1->val); } } @@ -576,14 +627,14 @@ static void print_reg_state(struct bpf_verifier_env *env, const struct bpf_reg_s tnum_is_const(reg->var_off)) { /* reg->off should be 0 for SCALAR_VALUE */ verbose(env, "%s", t == SCALAR_VALUE ? "" : reg_type_str(env, t)); - verbose(env, "%lld", reg->var_off.value + reg->off); + verbose_snum(env, reg->var_off.value + reg->off); return; } /* * _a stands for append, was shortened to avoid multiline statements below. * This macro is used to output a comma separated list of attributes. */ -#define verbose_a(fmt, ...) ({ verbose(env, "%s" fmt, sep, __VA_ARGS__); sep = ","; }) +#define verbose_a(fmt, ...) ({ verbose(env, "%s" fmt, sep, ##__VA_ARGS__); sep = ","; }) verbose(env, "%s", reg_type_str(env, t)); if (base_type(t) == PTR_TO_BTF_ID) @@ -602,14 +653,20 @@ static void print_reg_state(struct bpf_verifier_env *env, const struct bpf_reg_s reg->map_ptr->key_size, reg->map_ptr->value_size); } - if (t != SCALAR_VALUE && reg->off) - verbose_a("off=%d", reg->off); - if (type_is_pkt_pointer(t)) - verbose_a("r=%d", reg->range); + if (t != SCALAR_VALUE && reg->off) { + verbose_a("off="); + verbose_snum(env, reg->off); + } + if (type_is_pkt_pointer(t)) { + verbose_a("r="); + verbose_unum(env, reg->range); + } if (tnum_is_const(reg->var_off)) { /* a pointer register with fixed offset */ - if (reg->var_off.value) - verbose_a("imm=%llx", reg->var_off.value); + if (reg->var_off.value) { + verbose_a("imm="); + verbose_snum(env, reg->var_off.value); + } } else { print_scalar_ranges(env, reg, &sep); if (!tnum_is_unknown(reg->var_off)) { diff --git a/tools/testing/selftests/bpf/prog_tests/reg_bounds.c b/tools/testing/selftests/bpf/prog_tests/reg_bounds.c index 7a8b0bf0a7f8..fd4ab23e6f54 100644 --- a/tools/testing/selftests/bpf/prog_tests/reg_bounds.c +++ b/tools/testing/selftests/bpf/prog_tests/reg_bounds.c @@ -13,10 +13,13 @@ */ #define U64_MAX ((u64)UINT64_MAX) #define U32_MAX ((u32)UINT_MAX) +#define U16_MAX ((u32)UINT_MAX) #define S64_MIN ((s64)INT64_MIN) #define S64_MAX ((s64)INT64_MAX) #define S32_MIN ((s32)INT_MIN) #define S32_MAX ((s32)INT_MAX) +#define S16_MIN ((s16)0x80000000) +#define S16_MAX ((s16)0x7fffffff) typedef unsigned long long ___u64; typedef unsigned int ___u32; @@ -138,13 +141,17 @@ static enum num_t t_unsigned(enum num_t t) } } +#define UNUM_MAX_DECIMAL U16_MAX +#define SNUM_MAX_DECIMAL S16_MAX +#define SNUM_MIN_DECIMAL S16_MIN + static bool num_is_small(enum num_t t, u64 x) { switch (t) { - case U64: return (u64)x <= 256; - case U32: return (u32)x <= 256; - case S64: return (s64)x >= -256 && (s64)x <= 256; - case S32: return (s32)x >= -256 && (s32)x <= 256; + case U64: return (u64)x <= UNUM_MAX_DECIMAL; + case U32: return (u32)x <= UNUM_MAX_DECIMAL; + case S64: return (s64)x >= SNUM_MIN_DECIMAL && (s64)x <= SNUM_MAX_DECIMAL; + case S32: return (s32)x >= SNUM_MIN_DECIMAL && (s32)x <= SNUM_MAX_DECIMAL; default: printf("num_is_small!\n"); exit(1); } } @@ -1023,20 +1030,19 @@ static int parse_reg_state(const char *s, struct reg_state *reg) */ struct { const char *pfx; - const char *fmt; u64 *dst, def; bool is_32, is_set; } *f, fields[8] = { - {"smin=", "%lld", ®->r[S64].a, S64_MIN}, - {"smax=", "%lld", ®->r[S64].b, S64_MAX}, - {"umin=", "%llu", ®->r[U64].a, 0}, - {"umax=", "%llu", ®->r[U64].b, U64_MAX}, - {"smin32=", "%lld", ®->r[S32].a, (u32)S32_MIN, true}, - {"smax32=", "%lld", ®->r[S32].b, (u32)S32_MAX, true}, - {"umin32=", "%llu", ®->r[U32].a, 0, true}, - {"umax32=", "%llu", ®->r[U32].b, U32_MAX, true}, + {"smin=", ®->r[S64].a, S64_MIN}, + {"smax=", ®->r[S64].b, S64_MAX}, + {"umin=", ®->r[U64].a, 0}, + {"umax=", ®->r[U64].b, U64_MAX}, + {"smin32=", ®->r[S32].a, (u32)S32_MIN, true}, + {"smax32=", ®->r[S32].b, (u32)S32_MAX, true}, + {"umin32=", ®->r[U32].a, 0, true}, + {"umax32=", ®->r[U32].b, U32_MAX, true}, }; - const char *p, *fmt; + const char *p; int i; p = strchr(s, '='); @@ -1050,8 +1056,13 @@ static int parse_reg_state(const char *s, struct reg_state *reg) long long sval; enum num_t t; - if (sscanf(p, "%lld", &sval) != 1) - return -EINVAL; + if (p[0] == '0' && p[1] == 'x') { + if (sscanf(p, "%llx", &sval) != 1) + return -EINVAL; + } else { + if (sscanf(p, "%lld", &sval) != 1) + return -EINVAL; + } reg->valid = true; for (t = first_t; t <= last_t; t++) { @@ -1075,9 +1086,13 @@ static int parse_reg_state(const char *s, struct reg_state *reg) if (mcnt) { /* populate all matched fields */ - fmt = fields[midxs[0]].fmt; - if (sscanf(p, fmt, &val) != 1) - return -EINVAL; + if (p[0] == '0' && p[1] == 'x') { + if (sscanf(p, "%llx", &val) != 1) + return -EINVAL; + } else { + if (sscanf(p, "%lld", &val) != 1) + return -EINVAL; + } for (i = 0; i < mcnt; i++) { f = &fields[midxs[i]]; diff --git a/tools/testing/selftests/bpf/progs/exceptions_assert.c b/tools/testing/selftests/bpf/progs/exceptions_assert.c index 26f7d67432cc..49efaed143fc 100644 --- a/tools/testing/selftests/bpf/progs/exceptions_assert.c +++ b/tools/testing/selftests/bpf/progs/exceptions_assert.c @@ -18,48 +18,48 @@ return *(u64 *)num; \ } -__msg(": R0_w=-2147483648 R10=fp0") +__msg(": R0_w=0xffffffff80000000 R10=fp0") check_assert(s64, eq, int_min, INT_MIN); -__msg(": R0_w=2147483647 R10=fp0") +__msg(": R0_w=0x7fffffff R10=fp0") check_assert(s64, eq, int_max, INT_MAX); __msg(": R0_w=0 R10=fp0") check_assert(s64, eq, zero, 0); -__msg(": R0_w=-9223372036854775808 R1_w=-9223372036854775808 R10=fp0") +__msg(": R0_w=0x8000000000000000 R1_w=0x8000000000000000 R10=fp0") check_assert(s64, eq, llong_min, LLONG_MIN); -__msg(": R0_w=9223372036854775807 R1_w=9223372036854775807 R10=fp0") +__msg(": R0_w=0x7fffffffffffffff R1_w=0x7fffffffffffffff R10=fp0") check_assert(s64, eq, llong_max, LLONG_MAX); -__msg(": R0_w=scalar(smax=2147483646) R10=fp0") +__msg(": R0_w=scalar(smax=0x7ffffffe) R10=fp0") check_assert(s64, lt, pos, INT_MAX); -__msg(": R0_w=scalar(smax=-1,umin=9223372036854775808,var_off=(0x8000000000000000; 0x7fffffffffffffff))") +__msg(": R0_w=scalar(smax=-1,umin=0x8000000000000000,var_off=(0x8000000000000000; 0x7fffffffffffffff))") check_assert(s64, lt, zero, 0); -__msg(": R0_w=scalar(smax=-2147483649,umin=9223372036854775808,umax=18446744071562067967,var_off=(0x8000000000000000; 0x7fffffffffffffff))") +__msg(": R0_w=scalar(smax=0xffffffff7fffffff,umin=0x8000000000000000,umax=0xffffffff7fffffff,var_off=(0x8000000000000000; 0x7fffffffffffffff))") check_assert(s64, lt, neg, INT_MIN); -__msg(": R0_w=scalar(smax=2147483647) R10=fp0") +__msg(": R0_w=scalar(smax=0x7fffffff) R10=fp0") check_assert(s64, le, pos, INT_MAX); __msg(": R0_w=scalar(smax=0) R10=fp0") check_assert(s64, le, zero, 0); -__msg(": R0_w=scalar(smax=-2147483648,umin=9223372036854775808,umax=18446744071562067968,var_off=(0x8000000000000000; 0x7fffffffffffffff))") +__msg(": R0_w=scalar(smax=0xffffffff80000000,umin=0x8000000000000000,umax=0xffffffff80000000,var_off=(0x8000000000000000; 0x7fffffffffffffff))") check_assert(s64, le, neg, INT_MIN); -__msg(": R0_w=scalar(smin=umin=2147483648,umax=9223372036854775807,var_off=(0x0; 0x7fffffffffffffff))") +__msg(": R0_w=scalar(smin=umin=0x80000000,umax=0x7fffffffffffffff,var_off=(0x0; 0x7fffffffffffffff))") check_assert(s64, gt, pos, INT_MAX); -__msg(": R0_w=scalar(smin=umin=1,umax=9223372036854775807,var_off=(0x0; 0x7fffffffffffffff))") +__msg(": R0_w=scalar(smin=umin=1,umax=0x7fffffffffffffff,var_off=(0x0; 0x7fffffffffffffff))") check_assert(s64, gt, zero, 0); -__msg(": R0_w=scalar(smin=-2147483647) R10=fp0") +__msg(": R0_w=scalar(smin=0xffffffff80000001) R10=fp0") check_assert(s64, gt, neg, INT_MIN); -__msg(": R0_w=scalar(smin=umin=2147483647,umax=9223372036854775807,var_off=(0x0; 0x7fffffffffffffff))") +__msg(": R0_w=scalar(smin=umin=0x7fffffff,umax=0x7fffffffffffffff,var_off=(0x0; 0x7fffffffffffffff))") check_assert(s64, ge, pos, INT_MAX); -__msg(": R0_w=scalar(smin=0,umax=9223372036854775807,var_off=(0x0; 0x7fffffffffffffff)) R10=fp0") +__msg(": R0_w=scalar(smin=0,umax=0x7fffffffffffffff,var_off=(0x0; 0x7fffffffffffffff)) R10=fp0") check_assert(s64, ge, zero, 0); -__msg(": R0_w=scalar(smin=-2147483648) R10=fp0") +__msg(": R0_w=scalar(smin=0xffffffff80000000) R10=fp0") check_assert(s64, ge, neg, INT_MIN); SEC("?tc") __log_level(2) __failure -__msg(": R0=0 R1=ctx() R2=scalar(smin=smin32=-2147483646,smax=smax32=2147483645) R10=fp0") +__msg(": R0=0 R1=ctx() R2=scalar(smin=0xffffffff80000002,smax=smax32=0x7ffffffd,smin32=0x80000002) R10=fp0") int check_assert_range_s64(struct __sk_buff *ctx) { struct bpf_sock *sk = ctx->sk; -- cgit v1.2.3 From 54293e4d6a629befb0b0d93aa416a658678f7f01 Mon Sep 17 00:00:00 2001 From: Pedro Tammela Date: Tue, 14 Nov 2023 11:18:56 -0300 Subject: selftests/tc-testing: add hashtable tests for u32 Add tests to specifically check for the refcount interactions of hashtables created by u32. These tables should not be deleted when referenced and the flush order should respect a tree like composition. Signed-off-by: Pedro Tammela Acked-by: Jamal Hadi Salim Link: https://lore.kernel.org/r/20231114141856.974326-3-pctammela@mojatatu.com Signed-off-by: Jakub Kicinski --- .../selftests/tc-testing/tc-tests/filters/u32.json | 57 ++++++++++++++++++++++ 1 file changed, 57 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/tc-testing/tc-tests/filters/u32.json b/tools/testing/selftests/tc-testing/tc-tests/filters/u32.json index ddc7c355be0a..24bd0c2a3014 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/filters/u32.json +++ b/tools/testing/selftests/tc-testing/tc-tests/filters/u32.json @@ -272,5 +272,62 @@ "teardown": [ "$TC qdisc del dev $DEV1 parent root drr" ] + }, + { + "id": "bd32", + "name": "Try to delete hashtable referenced by another u32 filter", + "category": [ + "filter", + "u32" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$TC qdisc add dev $DEV1 parent root handle 10: drr", + "$TC filter add dev $DEV1 parent 10:0 protocol ip prio 2 handle 1: u32 divisor 1", + "$TC filter add dev $DEV1 parent 10:0 protocol ip prio 2 u32 ht 800: match ip src any link 1:" + ], + "cmdUnderTest": "$TC filter delete dev $DEV1 parent 10: prio 2 handle 1: u32", + "expExitCode": "2", + "verifyCmd": "$TC filter show dev $DEV1", + "matchPattern": "protocol ip pref 2 u32 chain 0 fh 1:", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DEV1 parent root drr" + ] + }, + { + "id": "4585", + "name": "Delete small tree of u32 hashtables and filters", + "category": [ + "filter", + "u32" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$TC qdisc add dev $DEV1 parent root handle 10: drr", + "$TC filter add dev $DEV1 parent 10:0 protocol ip prio 2 handle 1: u32 divisor 1", + "$TC filter add dev $DEV1 parent 10:0 protocol ip prio 2 handle 2: u32 divisor 1", + "$TC filter add dev $DEV1 parent 10:0 protocol ip prio 2 handle 3: u32 divisor 2", + "$TC filter add dev $DEV1 parent 10:0 protocol ip prio 2 handle 4: u32 divisor 1", + "$TC filter add dev $DEV1 parent 10:0 protocol ip prio 2 u32 ht 1: match ip src any action drop", + "$TC filter add dev $DEV1 parent 10:0 protocol ip prio 2 u32 ht 2: match ip src any action drop", + "$TC filter add dev $DEV1 parent 10:0 protocol ip prio 2 u32 ht 3: match ip src any link 2:", + "$TC filter add dev $DEV1 parent 10:0 protocol ip prio 2 u32 ht 3: match ip src any link 1:", + "$TC filter add dev $DEV1 parent 10:0 protocol ip prio 2 u32 ht 4: match ip src any action drop", + "$TC filter add dev $DEV1 parent 10:0 protocol ip prio 2 u32 ht 800: match ip src any link 3:", + "$TC filter add dev $DEV1 parent 10:0 protocol ip prio 2 u32 ht 800: match ip src any link 4:" + ], + "cmdUnderTest": "$TC filter delete dev $DEV1 parent 10:", + "expExitCode": "0", + "verifyCmd": "$TC filter show dev $DEV1", + "matchPattern": "protocol ip pref 2 u32", + "matchCount": "0", + "teardown": [ + "$TC qdisc del dev $DEV1 parent root drr" + ] } ] -- cgit v1.2.3 From 57b97ecb40caeb116c22451bbdaaa9a1d12c0b43 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Mon, 20 Nov 2023 10:04:52 -0800 Subject: selftests/bpf: reduce verboseness of reg_bounds selftest logs Reduce verboseness of test_progs' output in reg_bounds set of tests with two changes. First, instead of each different operator (<, <=, >, ...) being it's own subtest, combine all different ops for the same (x, y, init_t, cond_t) values into single subtest. Instead of getting 6 subtests, we get one generic one, e.g.: #192/53 reg_bounds_crafted/(s64)[0xffffffffffffffff; 0] (s64) 0xffffffff00000000:OK Second, for random generated test cases, treat all of them as a single test to eliminate very verbose output with random values in them. So now we'll just get one line per each combination of (init_t, cond_t), instead of 6 x 25 = 150 subtests before this change: #225 reg_bounds_rand_consts_s32_s32:OK Given we reduce verboseness so much, it makes sense to do a bit more random testing, so we also bump default number of random tests to 100, up from 25. This doesn't increase runtime significantly, especially in parallelized mode. With all the above changes we still make sure that we have all the information necessary for reproducing test case if it happens to fail. That includes reporting random seed and specific operator that is failing. Those will only be printed to console if related test/subtest fails, so it doesn't have any added verboseness implications. Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20231120180452.145849-1-andrii@kernel.org Signed-off-by: Alexei Starovoitov --- .../testing/selftests/bpf/prog_tests/reg_bounds.c | 32 ++++++++++++++-------- 1 file changed, 21 insertions(+), 11 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/reg_bounds.c b/tools/testing/selftests/bpf/prog_tests/reg_bounds.c index fd4ab23e6f54..0c9abd279e18 100644 --- a/tools/testing/selftests/bpf/prog_tests/reg_bounds.c +++ b/tools/testing/selftests/bpf/prog_tests/reg_bounds.c @@ -1361,11 +1361,11 @@ struct subtest_case { enum op op; }; -static void subtest_case_str(struct strbuf *sb, struct subtest_case *t) +static void subtest_case_str(struct strbuf *sb, struct subtest_case *t, bool use_op) { snappendf(sb, "(%s)", t_str(t->init_t)); snprintf_range(t->init_t, sb, t->x); - snappendf(sb, " (%s)%s ", t_str(t->cond_t), op_str(t->op)); + snappendf(sb, " (%s)%s ", t_str(t->cond_t), use_op ? op_str(t->op) : ""); snprintf_range(t->init_t, sb, t->y); } @@ -1440,8 +1440,8 @@ static int verify_case_op(enum num_t init_t, enum num_t cond_t, /* Given setup ranges and number types, go over all supported operations, * generating individual subtest for each allowed combination */ -static int verify_case(struct ctx *ctx, enum num_t init_t, enum num_t cond_t, - struct range x, struct range y) +static int verify_case_opt(struct ctx *ctx, enum num_t init_t, enum num_t cond_t, + struct range x, struct range y, bool is_subtest) { DEFINE_STRBUF(sb, 256); int err; @@ -1452,11 +1452,14 @@ static int verify_case(struct ctx *ctx, enum num_t init_t, enum num_t cond_t, .y = y, }; + sb->pos = 0; /* reset position in strbuf */ + subtest_case_str(sb, &sub, false /* ignore op */); + if (is_subtest && !test__start_subtest(sb->buf)) + return 0; + for (sub.op = first_op; sub.op <= last_op; sub.op++) { sb->pos = 0; /* reset position in strbuf */ - subtest_case_str(sb, &sub); - if (!test__start_subtest(sb->buf)) - continue; + subtest_case_str(sb, &sub, true /* print op */); if (env.verbosity >= VERBOSE_NORMAL) /* this speeds up debugging */ printf("TEST CASE: %s\n", sb->buf); @@ -1491,6 +1494,12 @@ static int verify_case(struct ctx *ctx, enum num_t init_t, enum num_t cond_t, return 0; } +static int verify_case(struct ctx *ctx, enum num_t init_t, enum num_t cond_t, + struct range x, struct range y) +{ + return verify_case_opt(ctx, init_t, cond_t, x, y, true /* is_subtest */); +} + /* ================================ * GENERATED CASES FROM SEED VALUES * ================================ @@ -1913,7 +1922,7 @@ void test_reg_bounds_gen_ranges_s32_s64(void) { validate_gen_range_vs_range(S32, void test_reg_bounds_gen_ranges_s32_u32(void) { validate_gen_range_vs_range(S32, U32); } void test_reg_bounds_gen_ranges_s32_s32(void) { validate_gen_range_vs_range(S32, S32); } -#define DEFAULT_RAND_CASE_CNT 25 +#define DEFAULT_RAND_CASE_CNT 100 #define RAND_21BIT_MASK ((1 << 22) - 1) @@ -1968,7 +1977,6 @@ static void validate_rand_ranges(enum num_t init_t, enum num_t cond_t, bool cons "[RANDOM SEED %u] RANGE x %s, %s -> %s", ctx.rand_seed, const_range ? "CONST" : "RANGE", t_str(init_t), t_str(cond_t)); - fprintf(env.stdout, "%s\n", ctx.progress_ctx); for (i = 0; i < ctx.rand_case_cnt; i++) { range1 = rand_range(init_t); @@ -1980,14 +1988,16 @@ static void validate_rand_ranges(enum num_t init_t, enum num_t cond_t, bool cons } /* x */ - if (verify_case(&ctx, init_t, cond_t, range1, range2)) + if (verify_case_opt(&ctx, init_t, cond_t, range1, range2, false /* !is_subtest */)) goto cleanup; /* x */ - if (verify_case(&ctx, init_t, cond_t, range2, range1)) + if (verify_case_opt(&ctx, init_t, cond_t, range2, range1, false /* !is_subtest */)) goto cleanup; } cleanup: + /* make sure we report random seed for reproducing */ + ASSERT_TRUE(true, ctx.progress_ctx); cleanup_ctx(&ctx); } -- cgit v1.2.3 From 98594181944daa201481ad63242806beb7c89ff4 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Thu, 16 Nov 2023 16:52:15 +0000 Subject: iommufd/selftest: Fix _test_mock_dirty_bitmaps() The ASSERT_EQ() macro sneakily expands to two statements, so the loop here needs braces to ensure it captures both and actually terminates the test upon failure. Where these tests are currently failing on my arm64 machine, this reduces the number of logged lines from a rather unreasonable ~197,000 down to 10. While we're at it, we can also clean up the tautologous "count" calculations whose assertions can never fail unless mathematics and/or the C language become fundamentally broken. Fixes: a9af47e382a4 ("iommufd/selftest: Test IOMMU_HWPT_GET_DIRTY_BITMAP") Link: https://lore.kernel.org/r/90e083045243ef407dd592bb1deec89cd1f4ddf2.1700153535.git.robin.murphy@arm.com Signed-off-by: Robin Murphy Reviewed-by: Kevin Tian Reviewed-by: Joao Martins Tested-by: Joao Martins Signed-off-by: Jason Gunthorpe --- tools/testing/selftests/iommu/iommufd_utils.h | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/iommu/iommufd_utils.h b/tools/testing/selftests/iommu/iommufd_utils.h index 050e9751321c..ad9202335656 100644 --- a/tools/testing/selftests/iommu/iommufd_utils.h +++ b/tools/testing/selftests/iommu/iommufd_utils.h @@ -293,15 +293,13 @@ static int _test_mock_dirty_bitmaps(int fd, __u32 hwpt_id, size_t length, __u64 bitmap_size, __u32 flags, struct __test_metadata *_metadata) { - unsigned long i, count, nbits = bitmap_size * BITS_PER_BYTE; + unsigned long i, nbits = bitmap_size * BITS_PER_BYTE; unsigned long nr = nbits / 2; __u64 out_dirty = 0; /* Mark all even bits as dirty in the mock domain */ - for (count = 0, i = 0; i < nbits; count += !(i % 2), i++) - if (!(i % 2)) - set_bit(i, (unsigned long *)bitmap); - ASSERT_EQ(nr, count); + for (i = 0; i < nbits; i += 2) + set_bit(i, (unsigned long *)bitmap); test_cmd_mock_domain_set_dirty(fd, hwpt_id, length, iova, page_size, bitmap, &out_dirty); @@ -311,9 +309,10 @@ static int _test_mock_dirty_bitmaps(int fd, __u32 hwpt_id, size_t length, memset(bitmap, 0, bitmap_size); test_cmd_get_dirty_bitmap(fd, hwpt_id, length, iova, page_size, bitmap, flags); - for (count = 0, i = 0; i < nbits; count += !(i % 2), i++) + /* Beware ASSERT_EQ() is two statements -- braces are not redundant! */ + for (i = 0; i < nbits; i++) { ASSERT_EQ(!(i % 2), test_bit(i, (unsigned long *)bitmap)); - ASSERT_EQ(count, out_dirty); + } memset(bitmap, 0, bitmap_size); test_cmd_get_dirty_bitmap(fd, hwpt_id, length, iova, page_size, bitmap, -- cgit v1.2.3 From a0bc96c0cd6e61fcaebff34432791a4b5118fc68 Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Thu, 16 Nov 2023 15:34:43 -0500 Subject: selftests: net: verify fq per-band packet limit Commit 29f834aa326e ("net_sched: sch_fq: add 3 bands and WRR scheduling") introduces multiple traffic bands, and per-band maximum packet count. Per-band limits ensures that packets in one class cannot fill the entire qdisc and so cause DoS to the traffic in the other classes. Verify this behavior: 1. set the limit to 10 per band 2. send 20 pkts on band A: verify that 10 are queued, 10 dropped 3. send 20 pkts on band A: verify that 0 are queued, 20 dropped 4. send 20 pkts on band B: verify that 10 are queued, 10 dropped Packets must remain queued for a period to trigger this behavior. Use SO_TXTIME to store packets for 100 msec. The test reuses existing upstream test infra. The script is a fork of cmsg_time.sh. The scripts call cmsg_sender. The test extends cmsg_sender with two arguments: * '-P' SO_PRIORITY There is a subtle difference between IPv4 and IPv6 stack behavior: PF_INET/IP_TOS sets IP header bits and sk_priority PF_INET6/IPV6_TCLASS sets IP header bits BUT NOT sk_priority * '-n' num pkts Send multiple packets in quick succession. I first attempted a for loop in the script, but this is too slow in virtualized environments, causing flakiness as the 100ms timeout is reached and packets are dequeued. Also do not wait for timestamps to be queued unless timestamps are requested. Signed-off-by: Willem de Bruijn Reviewed-by: Simon Horman Reviewed-by: Eric Dumazet Link: https://lore.kernel.org/r/20231116203449.2627525-1-willemdebruijn.kernel@gmail.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/Makefile | 1 + tools/testing/selftests/net/cmsg_sender.c | 50 ++++++++++++++-------- tools/testing/selftests/net/fq_band_pktlimit.sh | 57 +++++++++++++++++++++++++ 3 files changed, 91 insertions(+), 17 deletions(-) create mode 100755 tools/testing/selftests/net/fq_band_pktlimit.sh (limited to 'tools') diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index 5b2aca4c5f10..9274edfb76ff 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -91,6 +91,7 @@ TEST_PROGS += test_bridge_neigh_suppress.sh TEST_PROGS += test_vxlan_nolocalbypass.sh TEST_PROGS += test_bridge_backup_port.sh TEST_PROGS += fdb_flush.sh +TEST_PROGS += fq_band_pktlimit.sh TEST_FILES := settings diff --git a/tools/testing/selftests/net/cmsg_sender.c b/tools/testing/selftests/net/cmsg_sender.c index 24b21b15ed3f..8d7575389f58 100644 --- a/tools/testing/selftests/net/cmsg_sender.c +++ b/tools/testing/selftests/net/cmsg_sender.c @@ -45,11 +45,13 @@ struct options { const char *host; const char *service; unsigned int size; + unsigned int num_pkt; struct { unsigned int mark; unsigned int dontfrag; unsigned int tclass; unsigned int hlimit; + unsigned int priority; } sockopt; struct { unsigned int family; @@ -72,6 +74,7 @@ struct options { } v6; } opt = { .size = 13, + .num_pkt = 1, .sock = { .family = AF_UNSPEC, .type = SOCK_DGRAM, @@ -112,7 +115,7 @@ static void cs_parse_args(int argc, char *argv[]) { int o; - while ((o = getopt(argc, argv, "46sS:p:m:M:d:tf:F:c:C:l:L:H:")) != -1) { + while ((o = getopt(argc, argv, "46sS:p:P:m:M:n:d:tf:F:c:C:l:L:H:")) != -1) { switch (o) { case 's': opt.silent_send = true; @@ -138,7 +141,9 @@ static void cs_parse_args(int argc, char *argv[]) cs_usage(argv[0]); } break; - + case 'P': + opt.sockopt.priority = atoi(optarg); + break; case 'm': opt.mark.ena = true; opt.mark.val = atoi(optarg); @@ -146,6 +151,9 @@ static void cs_parse_args(int argc, char *argv[]) case 'M': opt.sockopt.mark = atoi(optarg); break; + case 'n': + opt.num_pkt = atoi(optarg); + break; case 'd': opt.txtime.ena = true; opt.txtime.delay = atoi(optarg); @@ -410,6 +418,10 @@ static void ca_set_sockopts(int fd) setsockopt(fd, SOL_IPV6, IPV6_UNICAST_HOPS, &opt.sockopt.hlimit, sizeof(opt.sockopt.hlimit))) error(ERN_SOCKOPT, errno, "setsockopt IPV6_HOPLIMIT"); + if (opt.sockopt.priority && + setsockopt(fd, SOL_SOCKET, SO_PRIORITY, + &opt.sockopt.priority, sizeof(opt.sockopt.priority))) + error(ERN_SOCKOPT, errno, "setsockopt SO_PRIORITY"); } int main(int argc, char *argv[]) @@ -421,6 +433,7 @@ int main(int argc, char *argv[]) char *buf; int err; int fd; + int i; cs_parse_args(argc, argv); @@ -480,24 +493,27 @@ int main(int argc, char *argv[]) cs_write_cmsg(fd, &msg, cbuf, sizeof(cbuf)); - err = sendmsg(fd, &msg, 0); - if (err < 0) { - if (!opt.silent_send) - fprintf(stderr, "send failed: %s\n", strerror(errno)); - err = ERN_SEND; - goto err_out; - } else if (err != (int)opt.size) { - fprintf(stderr, "short send\n"); - err = ERN_SEND_SHORT; - goto err_out; - } else { - err = ERN_SUCCESS; + for (i = 0; i < opt.num_pkt; i++) { + err = sendmsg(fd, &msg, 0); + if (err < 0) { + if (!opt.silent_send) + fprintf(stderr, "send failed: %s\n", strerror(errno)); + err = ERN_SEND; + goto err_out; + } else if (err != (int)opt.size) { + fprintf(stderr, "short send\n"); + err = ERN_SEND_SHORT; + goto err_out; + } } + err = ERN_SUCCESS; - /* Make sure all timestamps have time to loop back */ - usleep(opt.txtime.delay); + if (opt.ts.ena) { + /* Make sure all timestamps have time to loop back */ + usleep(opt.txtime.delay); - cs_read_cmsg(fd, &msg, cbuf, sizeof(cbuf)); + cs_read_cmsg(fd, &msg, cbuf, sizeof(cbuf)); + } err_out: close(fd); diff --git a/tools/testing/selftests/net/fq_band_pktlimit.sh b/tools/testing/selftests/net/fq_band_pktlimit.sh new file mode 100755 index 000000000000..24b77bdf41ff --- /dev/null +++ b/tools/testing/selftests/net/fq_band_pktlimit.sh @@ -0,0 +1,57 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Verify that FQ has a packet limit per band: +# +# 1. set the limit to 10 per band +# 2. send 20 pkts on band A: verify that 10 are queued, 10 dropped +# 3. send 20 pkts on band A: verify that 0 are queued, 20 dropped +# 4. send 20 pkts on band B: verify that 10 are queued, 10 dropped +# +# Send packets with a 100ms delay to ensure that previously sent +# packets are still queued when later ones are sent. +# Use SO_TXTIME for this. + +die() { + echo "$1" + exit 1 +} + +# run inside private netns +if [[ $# -eq 0 ]]; then + ./in_netns.sh "$0" __subprocess + exit +fi + +ip link add type dummy +ip link set dev dummy0 up +ip -6 addr add fdaa::1/128 dev dummy0 +ip -6 route add fdaa::/64 dev dummy0 +tc qdisc replace dev dummy0 root handle 1: fq quantum 1514 initial_quantum 1514 limit 10 + +./cmsg_sender -6 -p u -d 100000 -n 20 fdaa::2 8000 +OUT1="$(tc -s qdisc show dev dummy0 | grep '^\ Sent')" + +./cmsg_sender -6 -p u -d 100000 -n 20 fdaa::2 8000 +OUT2="$(tc -s qdisc show dev dummy0 | grep '^\ Sent')" + +./cmsg_sender -6 -p u -d 100000 -n 20 -P 7 fdaa::2 8000 +OUT3="$(tc -s qdisc show dev dummy0 | grep '^\ Sent')" + +# Initial stats will report zero sent, as all packets are still +# queued in FQ. Sleep for the delay period (100ms) and see that +# twenty are now sent. +sleep 0.1 +OUT4="$(tc -s qdisc show dev dummy0 | grep '^\ Sent')" + +# Log the output after the test +echo "${OUT1}" +echo "${OUT2}" +echo "${OUT3}" +echo "${OUT4}" + +# Test the output for expected values +echo "${OUT1}" | grep -q '0\ pkt\ (dropped\ 10' || die "unexpected drop count at 1" +echo "${OUT2}" | grep -q '0\ pkt\ (dropped\ 30' || die "unexpected drop count at 2" +echo "${OUT3}" | grep -q '0\ pkt\ (dropped\ 40' || die "unexpected drop count at 3" +echo "${OUT4}" | grep -q '20\ pkt\ (dropped\ 40' || die "unexpected accept count at 4" -- cgit v1.2.3 From 025de7b6a6dd44e78d0d45b4f3b4f104ed54b0fd Mon Sep 17 00:00:00 2001 From: Pedro Tammela Date: Fri, 17 Nov 2023 14:12:03 -0300 Subject: selftests: tc-testing: cap parallel tdc to 4 cores We have observed a lot of lock contention and test instability when running with >8 cores. Enough to actually make the tests run slower than with fewer cores. Cap the maximum cores of parallel tdc to 4 which showed in testing to be a reasonable number for efficiency and stability in different kernel config scenarios. Signed-off-by: Pedro Tammela Reviewed-by: Simon Horman Acked-by: Jamal Hadi Salim Link: https://lore.kernel.org/r/20231117171208.2066136-2-pctammela@mojatatu.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/tc-testing/tdc.py | 1 + 1 file changed, 1 insertion(+) (limited to 'tools') diff --git a/tools/testing/selftests/tc-testing/tdc.py b/tools/testing/selftests/tc-testing/tdc.py index a6718192aff3..f764b43f112b 100755 --- a/tools/testing/selftests/tc-testing/tdc.py +++ b/tools/testing/selftests/tc-testing/tdc.py @@ -1017,6 +1017,7 @@ def main(): parser = pm.call_add_args(parser) (args, remaining) = parser.parse_known_args() args.NAMES = NAMES + args.mp = min(args.mp, 4) pm.set_args(args) check_default_settings(args, remaining, pm) if args.verbose > 2: -- cgit v1.2.3 From 50a5988a7a540fb1ad4e620e1bbf11cc646e3dc7 Mon Sep 17 00:00:00 2001 From: Pedro Tammela Date: Fri, 17 Nov 2023 14:12:04 -0300 Subject: selftests: tc-testing: move back to per test ns setup Surprisingly in kernel configs with most of the debug knobs turned on, pre-allocating the test resources makes tdc run much slower overall than when allocating resources on a per test basis. As these knobs are used in kselftests in downstream CIs, let's go back to the old way of doing things to avoid kselftests timeouts. Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-lkp/202311161129.3b45ed53-oliver.sang@intel.com Signed-off-by: Pedro Tammela Reviewed-by: Simon Horman Acked-by: Jamal Hadi Salim Link: https://lore.kernel.org/r/20231117171208.2066136-3-pctammela@mojatatu.com Signed-off-by: Jakub Kicinski --- .../selftests/tc-testing/plugin-lib/nsPlugin.py | 68 ++++++++-------------- 1 file changed, 25 insertions(+), 43 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/tc-testing/plugin-lib/nsPlugin.py b/tools/testing/selftests/tc-testing/plugin-lib/nsPlugin.py index 62974bd3a4a5..2b8cbfdf1083 100644 --- a/tools/testing/selftests/tc-testing/plugin-lib/nsPlugin.py +++ b/tools/testing/selftests/tc-testing/plugin-lib/nsPlugin.py @@ -17,44 +17,6 @@ except ImportError: netlink = False print("!!! Consider installing pyroute2 !!!") -def prepare_suite(obj, test): - original = obj.args.NAMES - - if 'skip' in test and test['skip'] == 'yes': - return - - if 'nsPlugin' not in test['plugins']: - return - - shadow = {} - shadow['IP'] = original['IP'] - shadow['TC'] = original['TC'] - shadow['NS'] = '{}-{}'.format(original['NS'], test['random']) - shadow['DEV0'] = '{}id{}'.format(original['DEV0'], test['id']) - shadow['DEV1'] = '{}id{}'.format(original['DEV1'], test['id']) - shadow['DUMMY'] = '{}id{}'.format(original['DUMMY'], test['id']) - shadow['DEV2'] = original['DEV2'] - obj.args.NAMES = shadow - - if netlink == True: - obj._nl_ns_create() - else: - obj._ns_create() - - # Make sure the netns is visible in the fs - while True: - obj._proc_check() - try: - ns = obj.args.NAMES['NS'] - f = open('/run/netns/{}'.format(ns)) - f.close() - break - except: - time.sleep(0.1) - continue - - obj.args.NAMES = original - class SubPlugin(TdcPlugin): def __init__(self): self.sub_class = 'ns/SubPlugin' @@ -65,19 +27,39 @@ class SubPlugin(TdcPlugin): super().pre_suite(testcount, testlist) - print("Setting up namespaces and devices...") + def prepare_test(self, test): + if 'skip' in test and test['skip'] == 'yes': + return - with Pool(self.args.mp) as p: - it = zip(cycle([self]), testlist) - p.starmap(prepare_suite, it) + if 'nsPlugin' not in test['plugins']: + return - def pre_case(self, caseinfo, test_skip): + if netlink == True: + self._nl_ns_create() + else: + self._ns_create() + + # Make sure the netns is visible in the fs + while True: + self._proc_check() + try: + ns = self.args.NAMES['NS'] + f = open('/run/netns/{}'.format(ns)) + f.close() + break + except: + time.sleep(0.1) + continue + + def pre_case(self, test, test_skip): if self.args.verbose: print('{}.pre_case'.format(self.sub_class)) if test_skip: return + self.prepare_test(test) + def post_case(self): if self.args.verbose: print('{}.post_case'.format(self.sub_class)) -- cgit v1.2.3 From 3d5026fc5adbc796a0547fcef19d997786e0bb31 Mon Sep 17 00:00:00 2001 From: Pedro Tammela Date: Fri, 17 Nov 2023 14:12:05 -0300 Subject: selftests: tc-testing: use netns delete from pyroute2 When pyroute2 is available, use the native netns delete routine instead of calling iproute2 to do it. As forks are expensive with some kernel configs, minimize its usage to avoid kselftests timeouts. Signed-off-by: Pedro Tammela Reviewed-by: Simon Horman Acked-by: Jamal Hadi Salim Link: https://lore.kernel.org/r/20231117171208.2066136-4-pctammela@mojatatu.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/tc-testing/plugin-lib/nsPlugin.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/tc-testing/plugin-lib/nsPlugin.py b/tools/testing/selftests/tc-testing/plugin-lib/nsPlugin.py index 2b8cbfdf1083..920dcbedc395 100644 --- a/tools/testing/selftests/tc-testing/plugin-lib/nsPlugin.py +++ b/tools/testing/selftests/tc-testing/plugin-lib/nsPlugin.py @@ -64,7 +64,10 @@ class SubPlugin(TdcPlugin): if self.args.verbose: print('{}.post_case'.format(self.sub_class)) - self._ns_destroy() + if netlink == True: + self._nl_ns_destroy() + else: + self._ns_destroy() def post_suite(self, index): if self.args.verbose: @@ -174,6 +177,10 @@ class SubPlugin(TdcPlugin): ''' self._exec_cmd_batched('pre', self._ns_create_cmds()) + def _nl_ns_destroy(self): + ns = self.args.NAMES['NS'] + netns.remove(ns) + def _ns_destroy_cmd(self): return self._replace_keywords('netns delete {}'.format(self.args.NAMES['NS'])) -- cgit v1.2.3 From 3f2d94a4ff489ebbc6b66cc33f9775cb33c00533 Mon Sep 17 00:00:00 2001 From: Pedro Tammela Date: Fri, 17 Nov 2023 14:12:06 -0300 Subject: selftests: tc-testing: leverage -all in suite ns teardown Instead of listing lingering ns pinned files and delete them one by one, leverage '-all' from iproute2 to do it in a single process fork. Signed-off-by: Pedro Tammela Reviewed-by: Simon Horman Acked-by: Jamal Hadi Salim Link: https://lore.kernel.org/r/20231117171208.2066136-5-pctammela@mojatatu.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/tc-testing/plugin-lib/nsPlugin.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/tc-testing/plugin-lib/nsPlugin.py b/tools/testing/selftests/tc-testing/plugin-lib/nsPlugin.py index 920dcbedc395..7b674befceec 100644 --- a/tools/testing/selftests/tc-testing/plugin-lib/nsPlugin.py +++ b/tools/testing/selftests/tc-testing/plugin-lib/nsPlugin.py @@ -74,13 +74,12 @@ class SubPlugin(TdcPlugin): print('{}.post_suite'.format(self.sub_class)) # Make sure we don't leak resources - for f in os.listdir('/run/netns/'): - cmd = self._replace_keywords("$IP netns del {}".format(f)) + cmd = "$IP -a netns del" - if self.args.verbose > 3: - print('_exec_cmd: command "{}"'.format(cmd)) + if self.args.verbose > 3: + print('_exec_cmd: command "{}"'.format(cmd)) - subprocess.run(cmd, shell=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) + subprocess.run(cmd, shell=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) def adjust_command(self, stage, command): super().adjust_command(stage, command) -- cgit v1.2.3 From 4b480cfb1066a8394017697ff4a58a970641e9b7 Mon Sep 17 00:00:00 2001 From: Pedro Tammela Date: Fri, 17 Nov 2023 14:12:07 -0300 Subject: selftests: tc-testing: timeout on unbounded loops In the spirit of failing early, timeout on unbounded loops that take longer than 20 ticks to complete. Such loops are to ensure that objects created are already visible so tests can proceed without any issues. If a test setup takes more than 20 ticks to see an object, there's definetely something wrong. Signed-off-by: Pedro Tammela Reviewed-by: Simon Horman Acked-by: Jamal Hadi Salim Link: https://lore.kernel.org/r/20231117171208.2066136-6-pctammela@mojatatu.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/tc-testing/plugin-lib/nsPlugin.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/tc-testing/plugin-lib/nsPlugin.py b/tools/testing/selftests/tc-testing/plugin-lib/nsPlugin.py index 7b674befceec..65c8f3f983b9 100644 --- a/tools/testing/selftests/tc-testing/plugin-lib/nsPlugin.py +++ b/tools/testing/selftests/tc-testing/plugin-lib/nsPlugin.py @@ -40,7 +40,10 @@ class SubPlugin(TdcPlugin): self._ns_create() # Make sure the netns is visible in the fs + ticks = 20 while True: + if ticks == 0: + raise TimeoutError self._proc_check() try: ns = self.args.NAMES['NS'] @@ -49,6 +52,7 @@ class SubPlugin(TdcPlugin): break except: time.sleep(0.1) + ticks -= 1 continue def pre_case(self, test, test_skip): @@ -127,7 +131,10 @@ class SubPlugin(TdcPlugin): with IPRoute() as ip: ip.link('add', ifname=dev1, kind='veth', peer={'ifname': dev0, 'net_ns_fd':'/proc/1/ns/net'}) ip.link('add', ifname=dummy, kind='dummy') + ticks = 20 while True: + if ticks == 0: + raise TimeoutError try: dev1_idx = ip.link_lookup(ifname=dev1)[0] dummy_idx = ip.link_lookup(ifname=dummy)[0] @@ -136,17 +143,22 @@ class SubPlugin(TdcPlugin): break except: time.sleep(0.1) + ticks -= 1 continue netns.popns() with IPRoute() as ip: + ticks = 20 while True: + if ticks == 0: + raise TimeoutError try: dev0_idx = ip.link_lookup(ifname=dev0)[0] ip.link('set', index=dev0_idx, state='up') break except: time.sleep(0.1) + ticks -= 1 continue def _ns_create_cmds(self): -- cgit v1.2.3 From 4968afa0143dbff8a84b5ce3c302dd7d0bdcfa48 Mon Sep 17 00:00:00 2001 From: Pedro Tammela Date: Fri, 17 Nov 2023 14:12:08 -0300 Subject: selftests: tc-testing: report number of workers in use Report the number of workers in use to process the test batches. Since the number is now subject to a limit, avoid users getting confused. Signed-off-by: Pedro Tammela Reviewed-by: Simon Horman Acked-by: Jamal Hadi Salim Link: https://lore.kernel.org/r/20231117171208.2066136-7-pctammela@mojatatu.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/tc-testing/tdc.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/tc-testing/tdc.py b/tools/testing/selftests/tc-testing/tdc.py index f764b43f112b..669ec89ebfe1 100755 --- a/tools/testing/selftests/tc-testing/tdc.py +++ b/tools/testing/selftests/tc-testing/tdc.py @@ -616,7 +616,7 @@ def test_runner_mp(pm, args, alltests): batches.insert(0, serial) print("Executing {} tests in parallel and {} in serial".format(len(parallel), len(serial))) - print("Using {} batches".format(len(batches))) + print("Using {} batches and {} workers".format(len(batches), args.mp)) # We can't pickle these objects so workaround them global mp_pm -- cgit v1.2.3 From e9e60c82fe391d04db55a91c733df4a017c28b2f Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 21 Nov 2023 11:24:08 -0500 Subject: selftests/kvm: fix compilation on non-x86_64 platforms MEM_REGION_SLOT and MEM_REGION_GPA are not really needed in test_invalid_memory_region_flags; the VM never runs and there are no other slots, so it is okay to use slot 0 and place it at address zero. This fixes compilation on architectures that do not define them. Fixes: 5d74316466f4 ("KVM: selftests: Add a memory region subtest to validate invalid flags") Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/guest_memfd_test.c | 2 -- tools/testing/selftests/kvm/set_memory_region_test.c | 12 ++++++------ 2 files changed, 6 insertions(+), 8 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/kvm/guest_memfd_test.c b/tools/testing/selftests/kvm/guest_memfd_test.c index 318ba6ba8bd3..c78a98c1a915 100644 --- a/tools/testing/selftests/kvm/guest_memfd_test.c +++ b/tools/testing/selftests/kvm/guest_memfd_test.c @@ -137,8 +137,6 @@ static void test_create_guest_memfd_invalid(struct kvm_vm *vm) } for (flag = 0; flag; flag <<= 1) { - uint64_t bit; - fd = __vm_create_guest_memfd(vm, page_size, flag); TEST_ASSERT(fd == -1 && errno == EINVAL, "guest_memfd() with flag '0x%lx' should fail with EINVAL", diff --git a/tools/testing/selftests/kvm/set_memory_region_test.c b/tools/testing/selftests/kvm/set_memory_region_test.c index 1efee1cfcff0..6637a0845acf 100644 --- a/tools/testing/selftests/kvm/set_memory_region_test.c +++ b/tools/testing/selftests/kvm/set_memory_region_test.c @@ -349,8 +349,8 @@ static void test_invalid_memory_region_flags(void) if ((supported_flags & BIT(i)) && !(v2_only_flags & BIT(i))) continue; - r = __vm_set_user_memory_region(vm, MEM_REGION_SLOT, BIT(i), - MEM_REGION_GPA, MEM_REGION_SIZE, NULL); + r = __vm_set_user_memory_region(vm, 0, BIT(i), + 0, MEM_REGION_SIZE, NULL); TEST_ASSERT(r && errno == EINVAL, "KVM_SET_USER_MEMORY_REGION should have failed on v2 only flag 0x%lx", BIT(i)); @@ -358,16 +358,16 @@ static void test_invalid_memory_region_flags(void) if (supported_flags & BIT(i)) continue; - r = __vm_set_user_memory_region2(vm, MEM_REGION_SLOT, BIT(i), - MEM_REGION_GPA, MEM_REGION_SIZE, NULL, 0, 0); + r = __vm_set_user_memory_region2(vm, 0, BIT(i), + 0, MEM_REGION_SIZE, NULL, 0, 0); TEST_ASSERT(r && errno == EINVAL, "KVM_SET_USER_MEMORY_REGION2 should have failed on unsupported flag 0x%lx", BIT(i)); } if (supported_flags & KVM_MEM_GUEST_MEMFD) { - r = __vm_set_user_memory_region2(vm, MEM_REGION_SLOT, + r = __vm_set_user_memory_region2(vm, 0, KVM_MEM_LOG_DIRTY_PAGES | KVM_MEM_GUEST_MEMFD, - MEM_REGION_GPA, MEM_REGION_SIZE, NULL, 0, 0); + 0, MEM_REGION_SIZE, NULL, 0, 0); TEST_ASSERT(r && errno == EINVAL, "KVM_SET_USER_MEMORY_REGION2 should have failed, dirty logging private memory is unsupported"); } -- cgit v1.2.3 From a6dda77a752d918b35ef4a3f94e6b8c7d7ba4a73 Mon Sep 17 00:00:00 2001 From: Yang Jihong Date: Sat, 18 Nov 2023 02:48:56 +0000 Subject: perf kwork: Fix a build error on 32-bit lkft reported a build error for 32-bit system: builtin-kwork.c: In function 'top_print_work': builtin-kwork.c:1646:28: error: format '%ld' expects argument of type 'long int', but argument 3 has type 'u64' {aka 'long long unsigned int'} [-Werror=format=] 1646 | ret += printf(" %*ld ", PRINT_PID_WIDTH, work->id); | ~~~^ ~~~~~~~~ | | | | long int u64 {aka long long unsigned int} | %*lld cc1: all warnings being treated as errors make[3]: *** [/builds/linux/tools/build/Makefile.build:106: /home/tuxbuild/.cache/tuxmake/builds/1/build/builtin-kwork.o] Error 1 Fix it. Fixes: 55c40e505234 ("perf kwork top: Introduce new top utility") Reported-by: Linux Kernel Functional Testing Signed-off-by: Yang Jihong Acked-by: Namhyung Kim Cc: avagin@google.com Cc: daniel.diaz@linaro.org Link: https://lore.kernel.org/r/20231118024858.1567039-2-yangjihong1@huawei.com Signed-off-by: Namhyung Kim --- tools/perf/builtin-kwork.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/builtin-kwork.c b/tools/perf/builtin-kwork.c index f007a9b27065..0092b9b39611 100644 --- a/tools/perf/builtin-kwork.c +++ b/tools/perf/builtin-kwork.c @@ -1643,7 +1643,7 @@ static int top_print_work(struct perf_kwork *kwork __maybe_unused, struct kwork_ /* * pid */ - ret += printf(" %*ld ", PRINT_PID_WIDTH, work->id); + ret += printf(" %*" PRIu64 " ", PRINT_PID_WIDTH, work->id); /* * tgid -- cgit v1.2.3 From 29b8e94dcf2575c17541f843741ee96691ff1ded Mon Sep 17 00:00:00 2001 From: Yang Jihong Date: Sat, 18 Nov 2023 02:48:57 +0000 Subject: perf lock contention: Fix a build error on 32-bit Fix a build error on 32-bit system: util/bpf_lock_contention.c: In function 'lock_contention_get_name': util/bpf_lock_contention.c:253:50: error: format '%lu' expects argument of type 'long unsigned int', but argument 4 has type 'u64 {aka long long unsigned int}' [-Werror=format=] snprintf(name_buf, sizeof(name_buf), "cgroup:%lu", cgrp_id); ~~^ %llu cc1: all warnings being treated as errors Fixes: d0c502e46e97 ("perf lock contention: Prepare to handle cgroups") Signed-off-by: Yang Jihong Acked-by: Namhyung Kim Cc: avagin@google.com Cc: daniel.diaz@linaro.org Link: https://lore.kernel.org/r/20231118024858.1567039-3-yangjihong1@huawei.com Signed-off-by: Namhyung Kim --- tools/perf/util/bpf_lock_contention.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/util/bpf_lock_contention.c b/tools/perf/util/bpf_lock_contention.c index e105245eb905..f1716c089c99 100644 --- a/tools/perf/util/bpf_lock_contention.c +++ b/tools/perf/util/bpf_lock_contention.c @@ -12,6 +12,7 @@ #include #include #include +#include #include "bpf_skel/lock_contention.skel.h" #include "bpf_skel/lock_data.h" @@ -250,7 +251,7 @@ static const char *lock_contention_get_name(struct lock_contention *con, if (cgrp) return cgrp->name; - snprintf(name_buf, sizeof(name_buf), "cgroup:%lu", cgrp_id); + snprintf(name_buf, sizeof(name_buf), "cgroup:%" PRIu64 "", cgrp_id); return name_buf; } -- cgit v1.2.3 From b0e2a0395312f4e53504ae84eeb5902e5518d1d7 Mon Sep 17 00:00:00 2001 From: Yuran Pereira Date: Tue, 21 Nov 2023 05:35:39 +0530 Subject: selftests/bpf: Replaces the usage of CHECK calls for ASSERTs in bpf_tcp_ca bpf_tcp_ca uses the `CHECK` calls even though the use of ASSERT_ series of macros is preferred in the bpf selftests. This patch replaces all `CHECK` calls for equivalent `ASSERT_` macro calls. Signed-off-by: Yuran Pereira Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/GV1PR10MB6563F180C0F2BB4F6CFA5130E8BBA@GV1PR10MB6563.EURPRD10.PROD.OUTLOOK.COM --- .../testing/selftests/bpf/prog_tests/bpf_tcp_ca.c | 48 ++++++++++------------ 1 file changed, 22 insertions(+), 26 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c b/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c index 4aabeaa525d4..a88e6e07e4f5 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c @@ -20,15 +20,14 @@ static const unsigned int total_bytes = 10 * 1024 * 1024; static int expected_stg = 0xeB9F; -static int stop, duration; +static int stop; static int settcpca(int fd, const char *tcp_ca) { int err; err = setsockopt(fd, IPPROTO_TCP, TCP_CONGESTION, tcp_ca, strlen(tcp_ca)); - if (CHECK(err == -1, "setsockopt(fd, TCP_CONGESTION)", "errno:%d\n", - errno)) + if (!ASSERT_NEQ(err, -1, "setsockopt")) return -1; return 0; @@ -65,8 +64,7 @@ static void *server(void *arg) bytes += nr_sent; } - CHECK(bytes != total_bytes, "send", "%zd != %u nr_sent:%zd errno:%d\n", - bytes, total_bytes, nr_sent, errno); + ASSERT_EQ(bytes, total_bytes, "send"); done: if (fd >= 0) @@ -92,10 +90,11 @@ static void do_test(const char *tcp_ca, const struct bpf_map *sk_stg_map) WRITE_ONCE(stop, 0); lfd = socket(AF_INET6, SOCK_STREAM, 0); - if (CHECK(lfd == -1, "socket", "errno:%d\n", errno)) + if (!ASSERT_NEQ(lfd, -1, "socket")) return; + fd = socket(AF_INET6, SOCK_STREAM, 0); - if (CHECK(fd == -1, "socket", "errno:%d\n", errno)) { + if (!ASSERT_NEQ(fd, -1, "socket")) { close(lfd); return; } @@ -108,26 +107,27 @@ static void do_test(const char *tcp_ca, const struct bpf_map *sk_stg_map) sa6.sin6_family = AF_INET6; sa6.sin6_addr = in6addr_loopback; err = bind(lfd, (struct sockaddr *)&sa6, addrlen); - if (CHECK(err == -1, "bind", "errno:%d\n", errno)) + if (!ASSERT_NEQ(err, -1, "bind")) goto done; + err = getsockname(lfd, (struct sockaddr *)&sa6, &addrlen); - if (CHECK(err == -1, "getsockname", "errno:%d\n", errno)) + if (!ASSERT_NEQ(err, -1, "getsockname")) goto done; + err = listen(lfd, 1); - if (CHECK(err == -1, "listen", "errno:%d\n", errno)) + if (!ASSERT_NEQ(err, -1, "listen")) goto done; if (sk_stg_map) { err = bpf_map_update_elem(bpf_map__fd(sk_stg_map), &fd, &expected_stg, BPF_NOEXIST); - if (CHECK(err, "bpf_map_update_elem(sk_stg_map)", - "err:%d errno:%d\n", err, errno)) + if (!ASSERT_OK(err, "bpf_map_update_elem(sk_stg_map)")) goto done; } /* connect to server */ err = connect(fd, (struct sockaddr *)&sa6, addrlen); - if (CHECK(err == -1, "connect", "errno:%d\n", errno)) + if (!ASSERT_NEQ(err, -1, "connect")) goto done; if (sk_stg_map) { @@ -135,14 +135,13 @@ static void do_test(const char *tcp_ca, const struct bpf_map *sk_stg_map) err = bpf_map_lookup_elem(bpf_map__fd(sk_stg_map), &fd, &tmp_stg); - if (CHECK(!err || errno != ENOENT, - "bpf_map_lookup_elem(sk_stg_map)", - "err:%d errno:%d\n", err, errno)) + if (!ASSERT_ERR(err, "bpf_map_lookup_elem(sk_stg_map)") || + !ASSERT_EQ(errno, ENOENT, "bpf_map_lookup_elem(sk_stg_map)")) goto done; } err = pthread_create(&srv_thread, NULL, server, (void *)(long)lfd); - if (CHECK(err != 0, "pthread_create", "err:%d errno:%d\n", err, errno)) + if (!ASSERT_OK(err, "pthread_create")) goto done; /* recv total_bytes */ @@ -156,13 +155,12 @@ static void do_test(const char *tcp_ca, const struct bpf_map *sk_stg_map) bytes += nr_recv; } - CHECK(bytes != total_bytes, "recv", "%zd != %u nr_recv:%zd errno:%d\n", - bytes, total_bytes, nr_recv, errno); + ASSERT_EQ(bytes, total_bytes, "recv"); WRITE_ONCE(stop, 1); pthread_join(srv_thread, &thread_ret); - CHECK(IS_ERR(thread_ret), "pthread_join", "thread_ret:%ld", - PTR_ERR(thread_ret)); + ASSERT_OK(IS_ERR(thread_ret), "thread_ret"); + done: close(lfd); close(fd); @@ -174,7 +172,7 @@ static void test_cubic(void) struct bpf_link *link; cubic_skel = bpf_cubic__open_and_load(); - if (CHECK(!cubic_skel, "bpf_cubic__open_and_load", "failed\n")) + if (!ASSERT_OK_PTR(cubic_skel, "bpf_cubic__open_and_load")) return; link = bpf_map__attach_struct_ops(cubic_skel->maps.cubic); @@ -197,7 +195,7 @@ static void test_dctcp(void) struct bpf_link *link; dctcp_skel = bpf_dctcp__open_and_load(); - if (CHECK(!dctcp_skel, "bpf_dctcp__open_and_load", "failed\n")) + if (!ASSERT_OK_PTR(dctcp_skel, "bpf_dctcp__open_and_load")) return; link = bpf_map__attach_struct_ops(dctcp_skel->maps.dctcp); @@ -207,9 +205,7 @@ static void test_dctcp(void) } do_test("bpf_dctcp", dctcp_skel->maps.sk_stg_map); - CHECK(dctcp_skel->bss->stg_result != expected_stg, - "Unexpected stg_result", "stg_result (%x) != expected_stg (%x)\n", - dctcp_skel->bss->stg_result, expected_stg); + ASSERT_EQ(dctcp_skel->bss->stg_result, expected_stg, "stg_result"); bpf_link__destroy(link); bpf_dctcp__destroy(dctcp_skel); -- cgit v1.2.3 From 3ec1114a97457398077e45b231d502d1cc30439d Mon Sep 17 00:00:00 2001 From: Yuran Pereira Date: Tue, 21 Nov 2023 05:37:43 +0530 Subject: selftests/bpf: Replaces the usage of CHECK calls for ASSERTs in bind_perm bind_perm uses the `CHECK` calls even though the use of ASSERT_ series of macros is preferred in the bpf selftests. This patch replaces all `CHECK` calls for equivalent `ASSERT_` macro calls. Signed-off-by: Yuran Pereira Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/GV1PR10MB656314F467E075A106CA02BFE8BBA@GV1PR10MB6563.EURPRD10.PROD.OUTLOOK.COM --- tools/testing/selftests/bpf/prog_tests/bind_perm.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/bind_perm.c b/tools/testing/selftests/bpf/prog_tests/bind_perm.c index a1766a298bb7..f7cd129cb82b 100644 --- a/tools/testing/selftests/bpf/prog_tests/bind_perm.c +++ b/tools/testing/selftests/bpf/prog_tests/bind_perm.c @@ -9,8 +9,6 @@ #include "cap_helpers.h" #include "bind_perm.skel.h" -static int duration; - static int create_netns(void) { if (!ASSERT_OK(unshare(CLONE_NEWNET), "create netns")) @@ -27,7 +25,7 @@ void try_bind(int family, int port, int expected_errno) int fd = -1; fd = socket(family, SOCK_STREAM, 0); - if (CHECK(fd < 0, "fd", "errno %d", errno)) + if (!ASSERT_GE(fd, 0, "socket")) goto close_socket; if (family == AF_INET) { @@ -60,7 +58,7 @@ void test_bind_perm(void) return; cgroup_fd = test__join_cgroup("/bind_perm"); - if (CHECK(cgroup_fd < 0, "cg-join", "errno %d", errno)) + if (!ASSERT_GE(cgroup_fd, 0, "test__join_cgroup")) return; skel = bind_perm__open_and_load(); -- cgit v1.2.3 From f125d09b99fc0ee43f865810390f10b8f23a2c98 Mon Sep 17 00:00:00 2001 From: Yuran Pereira Date: Tue, 21 Nov 2023 05:39:25 +0530 Subject: selftests/bpf: Replaces the usage of CHECK calls for ASSERTs in bpf_obj_id bpf_obj_id uses the `CHECK` calls even though the use of ASSERT_ series of macros is preferred in the bpf selftests. This patch replaces all `CHECK` calls for equivalent `ASSERT_` macro calls. Signed-off-by: Yuran Pereira Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/GV1PR10MB65639AA3A10B4BBAA79952C7E8BBA@GV1PR10MB6563.EURPRD10.PROD.OUTLOOK.COM --- .../testing/selftests/bpf/prog_tests/bpf_obj_id.c | 204 ++++++++------------- 1 file changed, 73 insertions(+), 131 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_obj_id.c b/tools/testing/selftests/bpf/prog_tests/bpf_obj_id.c index 675b90b15280..f09d6ac2ef09 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_obj_id.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_obj_id.c @@ -25,7 +25,7 @@ void serial_test_bpf_obj_id(void) */ __u32 map_ids[nr_iters + 1]; char jited_insns[128], xlated_insns[128], zeros[128], tp_name[128]; - __u32 i, next_id, info_len, nr_id_found, duration = 0; + __u32 i, next_id, info_len, nr_id_found; struct timespec real_time_ts, boot_time_ts; int err = 0; __u64 array_value; @@ -33,16 +33,16 @@ void serial_test_bpf_obj_id(void) time_t now, load_time; err = bpf_prog_get_fd_by_id(0); - CHECK(err >= 0 || errno != ENOENT, - "get-fd-by-notexist-prog-id", "err %d errno %d\n", err, errno); + ASSERT_LT(err, 0, "bpf_prog_get_fd_by_id"); + ASSERT_EQ(errno, ENOENT, "bpf_prog_get_fd_by_id"); err = bpf_map_get_fd_by_id(0); - CHECK(err >= 0 || errno != ENOENT, - "get-fd-by-notexist-map-id", "err %d errno %d\n", err, errno); + ASSERT_LT(err, 0, "bpf_map_get_fd_by_id"); + ASSERT_EQ(errno, ENOENT, "bpf_map_get_fd_by_id"); err = bpf_link_get_fd_by_id(0); - CHECK(err >= 0 || errno != ENOENT, - "get-fd-by-notexist-link-id", "err %d errno %d\n", err, errno); + ASSERT_LT(err, 0, "bpf_map_get_fd_by_id"); + ASSERT_EQ(errno, ENOENT, "bpf_map_get_fd_by_id"); /* Check bpf_map_get_info_by_fd() */ bzero(zeros, sizeof(zeros)); @@ -53,25 +53,26 @@ void serial_test_bpf_obj_id(void) /* test_obj_id.o is a dumb prog. It should never fail * to load. */ - if (CHECK_FAIL(err)) + if (!ASSERT_OK(err, "bpf_prog_test_load")) continue; /* Insert a magic value to the map */ map_fds[i] = bpf_find_map(__func__, objs[i], "test_map_id"); - if (CHECK_FAIL(map_fds[i] < 0)) + if (!ASSERT_GE(map_fds[i], 0, "bpf_find_map")) goto done; + err = bpf_map_update_elem(map_fds[i], &array_key, &array_magic_value, 0); - if (CHECK_FAIL(err)) + if (!ASSERT_OK(err, "bpf_map_update_elem")) goto done; - prog = bpf_object__find_program_by_name(objs[i], - "test_obj_id"); - if (CHECK_FAIL(!prog)) + prog = bpf_object__find_program_by_name(objs[i], "test_obj_id"); + if (!ASSERT_OK_PTR(prog, "bpf_object__find_program_by_name")) goto done; + links[i] = bpf_program__attach(prog); err = libbpf_get_error(links[i]); - if (CHECK(err, "prog_attach", "prog #%d, err %d\n", i, err)) { + if (!ASSERT_OK(err, "bpf_program__attach")) { links[i] = NULL; goto done; } @@ -81,24 +82,14 @@ void serial_test_bpf_obj_id(void) bzero(&map_infos[i], info_len); err = bpf_map_get_info_by_fd(map_fds[i], &map_infos[i], &info_len); - if (CHECK(err || - map_infos[i].type != BPF_MAP_TYPE_ARRAY || - map_infos[i].key_size != sizeof(__u32) || - map_infos[i].value_size != sizeof(__u64) || - map_infos[i].max_entries != 1 || - map_infos[i].map_flags != 0 || - info_len != sizeof(struct bpf_map_info) || - strcmp((char *)map_infos[i].name, expected_map_name), - "get-map-info(fd)", - "err %d errno %d type %d(%d) info_len %u(%zu) key_size %u value_size %u max_entries %u map_flags %X name %s(%s)\n", - err, errno, - map_infos[i].type, BPF_MAP_TYPE_ARRAY, - info_len, sizeof(struct bpf_map_info), - map_infos[i].key_size, - map_infos[i].value_size, - map_infos[i].max_entries, - map_infos[i].map_flags, - map_infos[i].name, expected_map_name)) + if (!ASSERT_OK(err, "bpf_map_get_info_by_fd") || + !ASSERT_EQ(map_infos[i].type, BPF_MAP_TYPE_ARRAY, "map_type") || + !ASSERT_EQ(map_infos[i].key_size, sizeof(__u32), "key_size") || + !ASSERT_EQ(map_infos[i].value_size, sizeof(__u64), "value_size") || + !ASSERT_EQ(map_infos[i].max_entries, 1, "max_entries") || + !ASSERT_EQ(map_infos[i].map_flags, 0, "map_flags") || + !ASSERT_EQ(info_len, sizeof(struct bpf_map_info), "map_info_len") || + !ASSERT_STREQ((char *)map_infos[i].name, expected_map_name, "map_name")) goto done; /* Check getting prog info */ @@ -112,48 +103,34 @@ void serial_test_bpf_obj_id(void) prog_infos[i].xlated_prog_len = sizeof(xlated_insns); prog_infos[i].map_ids = ptr_to_u64(map_ids + i); prog_infos[i].nr_map_ids = 2; + err = clock_gettime(CLOCK_REALTIME, &real_time_ts); - if (CHECK_FAIL(err)) + if (!ASSERT_OK(err, "clock_gettime")) goto done; + err = clock_gettime(CLOCK_BOOTTIME, &boot_time_ts); - if (CHECK_FAIL(err)) + if (!ASSERT_OK(err, "clock_gettime")) goto done; + err = bpf_prog_get_info_by_fd(prog_fds[i], &prog_infos[i], &info_len); load_time = (real_time_ts.tv_sec - boot_time_ts.tv_sec) + (prog_infos[i].load_time / nsec_per_sec); - if (CHECK(err || - prog_infos[i].type != BPF_PROG_TYPE_RAW_TRACEPOINT || - info_len != sizeof(struct bpf_prog_info) || - (env.jit_enabled && !prog_infos[i].jited_prog_len) || - (env.jit_enabled && - !memcmp(jited_insns, zeros, sizeof(zeros))) || - !prog_infos[i].xlated_prog_len || - !memcmp(xlated_insns, zeros, sizeof(zeros)) || - load_time < now - 60 || load_time > now + 60 || - prog_infos[i].created_by_uid != my_uid || - prog_infos[i].nr_map_ids != 1 || - *(int *)(long)prog_infos[i].map_ids != map_infos[i].id || - strcmp((char *)prog_infos[i].name, expected_prog_name), - "get-prog-info(fd)", - "err %d errno %d i %d type %d(%d) info_len %u(%zu) " - "jit_enabled %d jited_prog_len %u xlated_prog_len %u " - "jited_prog %d xlated_prog %d load_time %lu(%lu) " - "uid %u(%u) nr_map_ids %u(%u) map_id %u(%u) " - "name %s(%s)\n", - err, errno, i, - prog_infos[i].type, BPF_PROG_TYPE_SOCKET_FILTER, - info_len, sizeof(struct bpf_prog_info), - env.jit_enabled, - prog_infos[i].jited_prog_len, - prog_infos[i].xlated_prog_len, - !!memcmp(jited_insns, zeros, sizeof(zeros)), - !!memcmp(xlated_insns, zeros, sizeof(zeros)), - load_time, now, - prog_infos[i].created_by_uid, my_uid, - prog_infos[i].nr_map_ids, 1, - *(int *)(long)prog_infos[i].map_ids, map_infos[i].id, - prog_infos[i].name, expected_prog_name)) + + if (!ASSERT_OK(err, "bpf_prog_get_info_by_fd") || + !ASSERT_EQ(prog_infos[i].type, BPF_PROG_TYPE_RAW_TRACEPOINT, "prog_type") || + !ASSERT_EQ(info_len, sizeof(struct bpf_prog_info), "prog_info_len") || + !ASSERT_FALSE((env.jit_enabled && !prog_infos[i].jited_prog_len), "jited_prog_len") || + !ASSERT_FALSE((env.jit_enabled && !memcmp(jited_insns, zeros, sizeof(zeros))), + "jited_insns") || + !ASSERT_NEQ(prog_infos[i].xlated_prog_len, 0, "xlated_prog_len") || + !ASSERT_NEQ(memcmp(xlated_insns, zeros, sizeof(zeros)), 0, "xlated_insns") || + !ASSERT_GE(load_time, (now - 60), "load_time") || + !ASSERT_LE(load_time, (now + 60), "load_time") || + !ASSERT_EQ(prog_infos[i].created_by_uid, my_uid, "created_by_uid") || + !ASSERT_EQ(prog_infos[i].nr_map_ids, 1, "nr_map_ids") || + !ASSERT_EQ(*(int *)(long)prog_infos[i].map_ids, map_infos[i].id, "map_ids") || + !ASSERT_STREQ((char *)prog_infos[i].name, expected_prog_name, "prog_name")) goto done; /* Check getting link info */ @@ -163,25 +140,12 @@ void serial_test_bpf_obj_id(void) link_infos[i].raw_tracepoint.tp_name_len = sizeof(tp_name); err = bpf_link_get_info_by_fd(bpf_link__fd(links[i]), &link_infos[i], &info_len); - if (CHECK(err || - link_infos[i].type != BPF_LINK_TYPE_RAW_TRACEPOINT || - link_infos[i].prog_id != prog_infos[i].id || - link_infos[i].raw_tracepoint.tp_name != ptr_to_u64(&tp_name) || - strcmp(u64_to_ptr(link_infos[i].raw_tracepoint.tp_name), - "sys_enter") || - info_len != sizeof(struct bpf_link_info), - "get-link-info(fd)", - "err %d errno %d info_len %u(%zu) type %d(%d) id %d " - "prog_id %d (%d) tp_name %s(%s)\n", - err, errno, - info_len, sizeof(struct bpf_link_info), - link_infos[i].type, BPF_LINK_TYPE_RAW_TRACEPOINT, - link_infos[i].id, - link_infos[i].prog_id, prog_infos[i].id, - (const char *)u64_to_ptr(link_infos[i].raw_tracepoint.tp_name), - "sys_enter")) + if (!ASSERT_OK(err, "bpf_link_get_info_by_fd") || + !ASSERT_EQ(link_infos[i].type, BPF_LINK_TYPE_RAW_TRACEPOINT, "link_type") || + !ASSERT_EQ(link_infos[i].prog_id, prog_infos[i].id, "prog_id") || + !ASSERT_EQ(link_infos[i].raw_tracepoint.tp_name, ptr_to_u64(&tp_name), "&tp_name") || + !ASSERT_STREQ(u64_to_ptr(link_infos[i].raw_tracepoint.tp_name), "sys_enter", "tp_name")) goto done; - } /* Check bpf_prog_get_next_id() */ @@ -190,7 +154,7 @@ void serial_test_bpf_obj_id(void) while (!bpf_prog_get_next_id(next_id, &next_id)) { struct bpf_prog_info prog_info = {}; __u32 saved_map_id; - int prog_fd; + int prog_fd, cmp_res; info_len = sizeof(prog_info); @@ -198,9 +162,7 @@ void serial_test_bpf_obj_id(void) if (prog_fd < 0 && errno == ENOENT) /* The bpf_prog is in the dead row */ continue; - if (CHECK(prog_fd < 0, "get-prog-fd(next_id)", - "prog_fd %d next_id %d errno %d\n", - prog_fd, next_id, errno)) + if (!ASSERT_GE(prog_fd, 0, "bpf_prog_get_fd_by_id")) break; for (i = 0; i < nr_iters; i++) @@ -218,9 +180,8 @@ void serial_test_bpf_obj_id(void) */ prog_info.nr_map_ids = 1; err = bpf_prog_get_info_by_fd(prog_fd, &prog_info, &info_len); - if (CHECK(!err || errno != EFAULT, - "get-prog-fd-bad-nr-map-ids", "err %d errno %d(%d)", - err, errno, EFAULT)) + if (!ASSERT_ERR(err, "bpf_prog_get_info_by_fd") || + !ASSERT_EQ(errno, EFAULT, "bpf_prog_get_info_by_fd")) break; bzero(&prog_info, sizeof(prog_info)); info_len = sizeof(prog_info); @@ -231,27 +192,22 @@ void serial_test_bpf_obj_id(void) err = bpf_prog_get_info_by_fd(prog_fd, &prog_info, &info_len); prog_infos[i].jited_prog_insns = 0; prog_infos[i].xlated_prog_insns = 0; - CHECK(err || info_len != sizeof(struct bpf_prog_info) || - memcmp(&prog_info, &prog_infos[i], info_len) || - *(int *)(long)prog_info.map_ids != saved_map_id, - "get-prog-info(next_id->fd)", - "err %d errno %d info_len %u(%zu) memcmp %d map_id %u(%u)\n", - err, errno, info_len, sizeof(struct bpf_prog_info), - memcmp(&prog_info, &prog_infos[i], info_len), - *(int *)(long)prog_info.map_ids, saved_map_id); + cmp_res = memcmp(&prog_info, &prog_infos[i], info_len); + + ASSERT_OK(err, "bpf_prog_get_info_by_fd"); + ASSERT_EQ(info_len, sizeof(struct bpf_prog_info), "prog_info_len"); + ASSERT_OK(cmp_res, "memcmp"); + ASSERT_EQ(*(int *)(long)prog_info.map_ids, saved_map_id, "map_id"); close(prog_fd); } - CHECK(nr_id_found != nr_iters, - "check total prog id found by get_next_id", - "nr_id_found %u(%u)\n", - nr_id_found, nr_iters); + ASSERT_EQ(nr_id_found, nr_iters, "prog_nr_id_found"); /* Check bpf_map_get_next_id() */ nr_id_found = 0; next_id = 0; while (!bpf_map_get_next_id(next_id, &next_id)) { struct bpf_map_info map_info = {}; - int map_fd; + int map_fd, cmp_res; info_len = sizeof(map_info); @@ -259,9 +215,7 @@ void serial_test_bpf_obj_id(void) if (map_fd < 0 && errno == ENOENT) /* The bpf_map is in the dead row */ continue; - if (CHECK(map_fd < 0, "get-map-fd(next_id)", - "map_fd %d next_id %u errno %d\n", - map_fd, next_id, errno)) + if (!ASSERT_GE(map_fd, 0, "bpf_map_get_fd_by_id")) break; for (i = 0; i < nr_iters; i++) @@ -274,25 +228,19 @@ void serial_test_bpf_obj_id(void) nr_id_found++; err = bpf_map_lookup_elem(map_fd, &array_key, &array_value); - if (CHECK_FAIL(err)) + if (!ASSERT_OK(err, "bpf_map_lookup_elem")) goto done; err = bpf_map_get_info_by_fd(map_fd, &map_info, &info_len); - CHECK(err || info_len != sizeof(struct bpf_map_info) || - memcmp(&map_info, &map_infos[i], info_len) || - array_value != array_magic_value, - "check get-map-info(next_id->fd)", - "err %d errno %d info_len %u(%zu) memcmp %d array_value %llu(%llu)\n", - err, errno, info_len, sizeof(struct bpf_map_info), - memcmp(&map_info, &map_infos[i], info_len), - array_value, array_magic_value); + cmp_res = memcmp(&map_info, &map_infos[i], info_len); + ASSERT_OK(err, "bpf_map_get_info_by_fd"); + ASSERT_EQ(info_len, sizeof(struct bpf_map_info), "info_len"); + ASSERT_OK(cmp_res, "memcmp"); + ASSERT_EQ(array_value, array_magic_value, "array_value"); close(map_fd); } - CHECK(nr_id_found != nr_iters, - "check total map id found by get_next_id", - "nr_id_found %u(%u)\n", - nr_id_found, nr_iters); + ASSERT_EQ(nr_id_found, nr_iters, "map_nr_id_found"); /* Check bpf_link_get_next_id() */ nr_id_found = 0; @@ -308,9 +256,7 @@ void serial_test_bpf_obj_id(void) if (link_fd < 0 && errno == ENOENT) /* The bpf_link is in the dead row */ continue; - if (CHECK(link_fd < 0, "get-link-fd(next_id)", - "link_fd %d next_id %u errno %d\n", - link_fd, next_id, errno)) + if (!ASSERT_GE(link_fd, 0, "bpf_link_get_fd_by_id")) break; for (i = 0; i < nr_iters; i++) @@ -325,17 +271,13 @@ void serial_test_bpf_obj_id(void) err = bpf_link_get_info_by_fd(link_fd, &link_info, &info_len); cmp_res = memcmp(&link_info, &link_infos[i], offsetof(struct bpf_link_info, raw_tracepoint)); - CHECK(err || info_len != sizeof(link_info) || cmp_res, - "check get-link-info(next_id->fd)", - "err %d errno %d info_len %u(%zu) memcmp %d\n", - err, errno, info_len, sizeof(struct bpf_link_info), - cmp_res); + ASSERT_OK(err, "bpf_link_get_info_by_fd"); + ASSERT_EQ(info_len, sizeof(link_info), "info_len"); + ASSERT_OK(cmp_res, "memcmp"); close(link_fd); } - CHECK(nr_id_found != nr_iters, - "check total link id found by get_next_id", - "nr_id_found %u(%u)\n", nr_id_found, nr_iters); + ASSERT_EQ(nr_id_found, nr_iters, "link_nr_id_found"); done: for (i = 0; i < nr_iters; i++) { -- cgit v1.2.3 From 3ece0e85f679c23d2a5128993846c58a2f5f890e Mon Sep 17 00:00:00 2001 From: Yuran Pereira Date: Tue, 21 Nov 2023 05:40:41 +0530 Subject: selftests/bpf: Replaces the usage of CHECK calls for ASSERTs in vmlinux vmlinux.c uses the `CHECK` calls even though the use of ASSERT_ series of macros is preferred in the bpf selftests. This patch replaces all `CHECK` calls for equivalent `ASSERT_` macro calls. Signed-off-by: Yuran Pereira Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/GV1PR10MB6563ED1023A2A3AEF30BDA5DE8BBA@GV1PR10MB6563.EURPRD10.PROD.OUTLOOK.COM --- tools/testing/selftests/bpf/prog_tests/vmlinux.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/vmlinux.c b/tools/testing/selftests/bpf/prog_tests/vmlinux.c index 72310cfc6474..6fb2217d940b 100644 --- a/tools/testing/selftests/bpf/prog_tests/vmlinux.c +++ b/tools/testing/selftests/bpf/prog_tests/vmlinux.c @@ -16,27 +16,27 @@ static void nsleep() void test_vmlinux(void) { - int duration = 0, err; + int err; struct test_vmlinux* skel; struct test_vmlinux__bss *bss; skel = test_vmlinux__open_and_load(); - if (CHECK(!skel, "skel_open", "failed to open skeleton\n")) + if (!ASSERT_OK_PTR(skel, "test_vmlinux__open_and_load")) return; bss = skel->bss; err = test_vmlinux__attach(skel); - if (CHECK(err, "skel_attach", "skeleton attach failed: %d\n", err)) + if (!ASSERT_OK(err, "test_vmlinux__attach")) goto cleanup; /* trigger everything */ nsleep(); - CHECK(!bss->tp_called, "tp", "not called\n"); - CHECK(!bss->raw_tp_called, "raw_tp", "not called\n"); - CHECK(!bss->tp_btf_called, "tp_btf", "not called\n"); - CHECK(!bss->kprobe_called, "kprobe", "not called\n"); - CHECK(!bss->fentry_called, "fentry", "not called\n"); + ASSERT_TRUE(bss->tp_called, "tp"); + ASSERT_TRUE(bss->raw_tp_called, "raw_tp"); + ASSERT_TRUE(bss->tp_btf_called, "tp_btf"); + ASSERT_TRUE(bss->kprobe_called, "kprobe"); + ASSERT_TRUE(bss->fentry_called, "fentry"); cleanup: test_vmlinux__destroy(skel); -- cgit v1.2.3 From 80b4ff1d2c9bc7e20b82d18535a27fa32dffa1dd Mon Sep 17 00:00:00 2001 From: Paul Moore Date: Wed, 22 Nov 2023 11:02:44 -0500 Subject: selftests: remove the LSM_ID_IMA check in lsm/lsm_list_modules_test The IMA LSM ID token was removed as IMA isn't yet a proper LSM, but we forgot to remove the check from the selftest. Reported-by: kernel test robot Closes: https://lore.kernel.org/r/202311221047.a9Dww3vY-lkp@intel.com/ Acked-by: Casey Schaufler Signed-off-by: Paul Moore --- tools/testing/selftests/lsm/lsm_list_modules_test.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/lsm/lsm_list_modules_test.c b/tools/testing/selftests/lsm/lsm_list_modules_test.c index 445c02f09c74..9df29b1e3497 100644 --- a/tools/testing/selftests/lsm/lsm_list_modules_test.c +++ b/tools/testing/selftests/lsm/lsm_list_modules_test.c @@ -101,9 +101,6 @@ TEST(correct_lsm_list_modules) case LSM_ID_TOMOYO: name = "tomoyo"; break; - case LSM_ID_IMA: - name = "ima"; - break; case LSM_ID_APPARMOR: name = "apparmor"; break; -- cgit v1.2.3 From 1041dfe6109fcb24e9a3d5d4ca9218e64dc0ed29 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Tue, 21 Nov 2023 14:56:36 -0800 Subject: tools headers UAPI: Update tools's copy of drm headers tldr; Just FYI, I'm carrying this on the perf tools tree. Full explanation: There used to be no copies, with tools/ code using kernel headers directly. From time to time tools/perf/ broke due to legitimate kernel hacking. At some point Linus complained about such direct usage. Then we adopted the current model. The way these headers are used in perf are not restricted to just including them to compile something. There are sometimes used in scripts that convert defines into string tables, etc, so some change may break one of these scripts, or new MSRs may use some different #define pattern, etc. E.g.: $ ls -1 tools/perf/trace/beauty/*.sh | head -5 tools/perf/trace/beauty/arch_errno_names.sh tools/perf/trace/beauty/drm_ioctl.sh tools/perf/trace/beauty/fadvise.sh tools/perf/trace/beauty/fsconfig.sh tools/perf/trace/beauty/fsmount.sh $ $ tools/perf/trace/beauty/fadvise.sh static const char *fadvise_advices[] = { [0] = "NORMAL", [1] = "RANDOM", [2] = "SEQUENTIAL", [3] = "WILLNEED", [4] = "DONTNEED", [5] = "NOREUSE", }; $ The tools/perf/check-headers.sh script, part of the tools/ build process, points out changes in the original files. So its important not to touch the copies in tools/ when doing changes in the original kernel headers, that will be done later, when check-headers.sh inform about the change to the perf tools hackers. Cc: David Airlie Cc: Daniel Vetter Cc: Maarten Lankhorst Cc: Maxime Ripard Cc: Thomas Zimmermann Cc: dri-devel@lists.freedesktop.org Signed-off-by: Namhyung Kim Link: https://lore.kernel.org/r/20231121225650.390246-1-namhyung@kernel.org --- tools/include/uapi/drm/drm.h | 20 ++++++++++++++++++++ tools/include/uapi/drm/i915_drm.h | 8 ++++---- 2 files changed, 24 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/include/uapi/drm/drm.h b/tools/include/uapi/drm/drm.h index 794c1d857677..de723566c5ae 100644 --- a/tools/include/uapi/drm/drm.h +++ b/tools/include/uapi/drm/drm.h @@ -1134,6 +1134,26 @@ extern "C" { #define DRM_IOCTL_MODE_PAGE_FLIP DRM_IOWR(0xB0, struct drm_mode_crtc_page_flip) #define DRM_IOCTL_MODE_DIRTYFB DRM_IOWR(0xB1, struct drm_mode_fb_dirty_cmd) +/** + * DRM_IOCTL_MODE_CREATE_DUMB - Create a new dumb buffer object. + * + * KMS dumb buffers provide a very primitive way to allocate a buffer object + * suitable for scanout and map it for software rendering. KMS dumb buffers are + * not suitable for hardware-accelerated rendering nor video decoding. KMS dumb + * buffers are not suitable to be displayed on any other device than the KMS + * device where they were allocated from. Also see + * :ref:`kms_dumb_buffer_objects`. + * + * The IOCTL argument is a struct drm_mode_create_dumb. + * + * User-space is expected to create a KMS dumb buffer via this IOCTL, then add + * it as a KMS framebuffer via &DRM_IOCTL_MODE_ADDFB and map it via + * &DRM_IOCTL_MODE_MAP_DUMB. + * + * &DRM_CAP_DUMB_BUFFER indicates whether this IOCTL is supported. + * &DRM_CAP_DUMB_PREFERRED_DEPTH and &DRM_CAP_DUMB_PREFER_SHADOW indicate + * driver preferences for dumb buffers. + */ #define DRM_IOCTL_MODE_CREATE_DUMB DRM_IOWR(0xB2, struct drm_mode_create_dumb) #define DRM_IOCTL_MODE_MAP_DUMB DRM_IOWR(0xB3, struct drm_mode_map_dumb) #define DRM_IOCTL_MODE_DESTROY_DUMB DRM_IOWR(0xB4, struct drm_mode_destroy_dumb) diff --git a/tools/include/uapi/drm/i915_drm.h b/tools/include/uapi/drm/i915_drm.h index 7000e5910a1d..218edb0a96f8 100644 --- a/tools/include/uapi/drm/i915_drm.h +++ b/tools/include/uapi/drm/i915_drm.h @@ -38,13 +38,13 @@ extern "C" { */ /** - * DOC: uevents generated by i915 on it's device node + * DOC: uevents generated by i915 on its device node * * I915_L3_PARITY_UEVENT - Generated when the driver receives a parity mismatch - * event from the gpu l3 cache. Additional information supplied is ROW, + * event from the GPU L3 cache. Additional information supplied is ROW, * BANK, SUBBANK, SLICE of the affected cacheline. Userspace should keep - * track of these events and if a specific cache-line seems to have a - * persistent error remap it with the l3 remapping tool supplied in + * track of these events, and if a specific cache-line seems to have a + * persistent error, remap it with the L3 remapping tool supplied in * intel-gpu-tools. The value supplied with the event is always 1. * * I915_ERROR_UEVENT - Generated upon error detection, currently only via -- cgit v1.2.3 From 111844666672347747585e24d6cebda640bd1284 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Tue, 21 Nov 2023 14:56:37 -0800 Subject: tools headers UAPI: Update tools's copy of fscrypt.h header tldr; Just FYI, I'm carrying this on the perf tools tree. Full explanation: There used to be no copies, with tools/ code using kernel headers directly. From time to time tools/perf/ broke due to legitimate kernel hacking. At some point Linus complained about such direct usage. Then we adopted the current model. The way these headers are used in perf are not restricted to just including them to compile something. There are sometimes used in scripts that convert defines into string tables, etc, so some change may break one of these scripts, or new MSRs may use some different #define pattern, etc. E.g.: $ ls -1 tools/perf/trace/beauty/*.sh | head -5 tools/perf/trace/beauty/arch_errno_names.sh tools/perf/trace/beauty/drm_ioctl.sh tools/perf/trace/beauty/fadvise.sh tools/perf/trace/beauty/fsconfig.sh tools/perf/trace/beauty/fsmount.sh $ $ tools/perf/trace/beauty/fadvise.sh static const char *fadvise_advices[] = { [0] = "NORMAL", [1] = "RANDOM", [2] = "SEQUENTIAL", [3] = "WILLNEED", [4] = "DONTNEED", [5] = "NOREUSE", }; $ The tools/perf/check-headers.sh script, part of the tools/ build process, points out changes in the original files. So its important not to touch the copies in tools/ when doing changes in the original kernel headers, that will be done later, when check-headers.sh inform about the change to the perf tools hackers. Cc: Eric Biggers Cc: "Theodore Y. Ts'o" Cc: Jaegeuk Kim Cc: linux-fscrypt@vger.kernel.org Signed-off-by: Namhyung Kim Link: https://lore.kernel.org/r/20231121225650.390246-2-namhyung@kernel.org --- tools/include/uapi/linux/fscrypt.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/include/uapi/linux/fscrypt.h b/tools/include/uapi/linux/fscrypt.h index fd1fb0d5389d..7a8f4c290187 100644 --- a/tools/include/uapi/linux/fscrypt.h +++ b/tools/include/uapi/linux/fscrypt.h @@ -71,7 +71,8 @@ struct fscrypt_policy_v2 { __u8 contents_encryption_mode; __u8 filenames_encryption_mode; __u8 flags; - __u8 __reserved[4]; + __u8 log2_data_unit_size; + __u8 __reserved[3]; __u8 master_key_identifier[FSCRYPT_KEY_IDENTIFIER_SIZE]; }; -- cgit v1.2.3 From 5a9f95b67059dda160f65ab41c48587103b3b3a0 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Tue, 21 Nov 2023 14:56:38 -0800 Subject: tools headers UAPI: Update tools's copy of kvm.h header tldr; Just FYI, I'm carrying this on the perf tools tree. Full explanation: There used to be no copies, with tools/ code using kernel headers directly. From time to time tools/perf/ broke due to legitimate kernel hacking. At some point Linus complained about such direct usage. Then we adopted the current model. The way these headers are used in perf are not restricted to just including them to compile something. There are sometimes used in scripts that convert defines into string tables, etc, so some change may break one of these scripts, or new MSRs may use some different #define pattern, etc. E.g.: $ ls -1 tools/perf/trace/beauty/*.sh | head -5 tools/perf/trace/beauty/arch_errno_names.sh tools/perf/trace/beauty/drm_ioctl.sh tools/perf/trace/beauty/fadvise.sh tools/perf/trace/beauty/fsconfig.sh tools/perf/trace/beauty/fsmount.sh $ $ tools/perf/trace/beauty/fadvise.sh static const char *fadvise_advices[] = { [0] = "NORMAL", [1] = "RANDOM", [2] = "SEQUENTIAL", [3] = "WILLNEED", [4] = "DONTNEED", [5] = "NOREUSE", }; $ The tools/perf/check-headers.sh script, part of the tools/ build process, points out changes in the original files. So its important not to touch the copies in tools/ when doing changes in the original kernel headers, that will be done later, when check-headers.sh inform about the change to the perf tools hackers. Cc: Paolo Bonzini Cc: kvm@vger.kernel.org Signed-off-by: Namhyung Kim Link: https://lore.kernel.org/r/20231121225650.390246-3-namhyung@kernel.org --- tools/include/uapi/linux/kvm.h | 24 +++++++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h index f089ab290978..211b86de35ac 100644 --- a/tools/include/uapi/linux/kvm.h +++ b/tools/include/uapi/linux/kvm.h @@ -264,6 +264,7 @@ struct kvm_xen_exit { #define KVM_EXIT_RISCV_SBI 35 #define KVM_EXIT_RISCV_CSR 36 #define KVM_EXIT_NOTIFY 37 +#define KVM_EXIT_LOONGARCH_IOCSR 38 /* For KVM_EXIT_INTERNAL_ERROR */ /* Emulate instruction failed. */ @@ -336,6 +337,13 @@ struct kvm_run { __u32 len; __u8 is_write; } mmio; + /* KVM_EXIT_LOONGARCH_IOCSR */ + struct { + __u64 phys_addr; + __u8 data[8]; + __u32 len; + __u8 is_write; + } iocsr_io; /* KVM_EXIT_HYPERCALL */ struct { __u64 nr; @@ -1192,6 +1200,7 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_COUNTER_OFFSET 227 #define KVM_CAP_ARM_EAGER_SPLIT_CHUNK_SIZE 228 #define KVM_CAP_ARM_SUPPORTED_BLOCK_SIZES 229 +#define KVM_CAP_ARM_SUPPORTED_REG_MASK_RANGES 230 #ifdef KVM_CAP_IRQ_ROUTING @@ -1362,6 +1371,7 @@ struct kvm_dirty_tlb { #define KVM_REG_ARM64 0x6000000000000000ULL #define KVM_REG_MIPS 0x7000000000000000ULL #define KVM_REG_RISCV 0x8000000000000000ULL +#define KVM_REG_LOONGARCH 0x9000000000000000ULL #define KVM_REG_SIZE_SHIFT 52 #define KVM_REG_SIZE_MASK 0x00f0000000000000ULL @@ -1418,9 +1428,16 @@ struct kvm_device_attr { __u64 addr; /* userspace address of attr data */ }; -#define KVM_DEV_VFIO_GROUP 1 -#define KVM_DEV_VFIO_GROUP_ADD 1 -#define KVM_DEV_VFIO_GROUP_DEL 2 +#define KVM_DEV_VFIO_FILE 1 + +#define KVM_DEV_VFIO_FILE_ADD 1 +#define KVM_DEV_VFIO_FILE_DEL 2 + +/* KVM_DEV_VFIO_GROUP aliases are for compile time uapi compatibility */ +#define KVM_DEV_VFIO_GROUP KVM_DEV_VFIO_FILE + +#define KVM_DEV_VFIO_GROUP_ADD KVM_DEV_VFIO_FILE_ADD +#define KVM_DEV_VFIO_GROUP_DEL KVM_DEV_VFIO_FILE_DEL #define KVM_DEV_VFIO_GROUP_SET_SPAPR_TCE 3 enum kvm_device_type { @@ -1555,6 +1572,7 @@ struct kvm_s390_ucas_mapping { #define KVM_ARM_MTE_COPY_TAGS _IOR(KVMIO, 0xb4, struct kvm_arm_copy_mte_tags) /* Available with KVM_CAP_COUNTER_OFFSET */ #define KVM_ARM_SET_COUNTER_OFFSET _IOW(KVMIO, 0xb5, struct kvm_arm_counter_offset) +#define KVM_ARM_GET_REG_WRITABLE_MASKS _IOR(KVMIO, 0xb6, struct reg_mask_range) /* ioctl for vm fd */ #define KVM_CREATE_DEVICE _IOWR(KVMIO, 0xe0, struct kvm_create_device) -- cgit v1.2.3 From fb3648a6a87b674540f0ba7701a353006c408065 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Tue, 21 Nov 2023 14:56:39 -0800 Subject: tools headers UAPI: Update tools's copy of mount.h header tldr; Just FYI, I'm carrying this on the perf tools tree. Full explanation: There used to be no copies, with tools/ code using kernel headers directly. From time to time tools/perf/ broke due to legitimate kernel hacking. At some point Linus complained about such direct usage. Then we adopted the current model. The way these headers are used in perf are not restricted to just including them to compile something. There are sometimes used in scripts that convert defines into string tables, etc, so some change may break one of these scripts, or new MSRs may use some different #define pattern, etc. E.g.: $ ls -1 tools/perf/trace/beauty/*.sh | head -5 tools/perf/trace/beauty/arch_errno_names.sh tools/perf/trace/beauty/drm_ioctl.sh tools/perf/trace/beauty/fadvise.sh tools/perf/trace/beauty/fsconfig.sh tools/perf/trace/beauty/fsmount.sh $ $ tools/perf/trace/beauty/fadvise.sh static const char *fadvise_advices[] = { [0] = "NORMAL", [1] = "RANDOM", [2] = "SEQUENTIAL", [3] = "WILLNEED", [4] = "DONTNEED", [5] = "NOREUSE", }; $ The tools/perf/check-headers.sh script, part of the tools/ build process, points out changes in the original files. So its important not to touch the copies in tools/ when doing changes in the original kernel headers, that will be done later, when check-headers.sh inform about the change to the perf tools hackers. Cc: Alexander Viro Cc: Christian Brauner Cc: linux-fsdevel@vger.kernel.org Signed-off-by: Namhyung Kim Link: https://lore.kernel.org/r/20231121225650.390246-4-namhyung@kernel.org --- tools/include/uapi/linux/mount.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/include/uapi/linux/mount.h b/tools/include/uapi/linux/mount.h index 8eb0d7b758d2..bb242fdcfe6b 100644 --- a/tools/include/uapi/linux/mount.h +++ b/tools/include/uapi/linux/mount.h @@ -100,8 +100,9 @@ enum fsconfig_command { FSCONFIG_SET_PATH = 3, /* Set parameter, supplying an object by path */ FSCONFIG_SET_PATH_EMPTY = 4, /* Set parameter, supplying an object by (empty) path */ FSCONFIG_SET_FD = 5, /* Set parameter, supplying an object by fd */ - FSCONFIG_CMD_CREATE = 6, /* Invoke superblock creation */ + FSCONFIG_CMD_CREATE = 6, /* Create new or reuse existing superblock */ FSCONFIG_CMD_RECONFIGURE = 7, /* Invoke superblock reconfiguration */ + FSCONFIG_CMD_CREATE_EXCL = 8, /* Create new superblock, fail if reusing existing superblock */ }; /* -- cgit v1.2.3 From daa97513415525e4d17c4978f3c47a7b7e935b36 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Tue, 21 Nov 2023 14:56:40 -0800 Subject: tools headers UAPI: Update tools's copy of vhost.h header tldr; Just FYI, I'm carrying this on the perf tools tree. Full explanation: There used to be no copies, with tools/ code using kernel headers directly. From time to time tools/perf/ broke due to legitimate kernel hacking. At some point Linus complained about such direct usage. Then we adopted the current model. The way these headers are used in perf are not restricted to just including them to compile something. There are sometimes used in scripts that convert defines into string tables, etc, so some change may break one of these scripts, or new MSRs may use some different #define pattern, etc. E.g.: $ ls -1 tools/perf/trace/beauty/*.sh | head -5 tools/perf/trace/beauty/arch_errno_names.sh tools/perf/trace/beauty/drm_ioctl.sh tools/perf/trace/beauty/fadvise.sh tools/perf/trace/beauty/fsconfig.sh tools/perf/trace/beauty/fsmount.sh $ $ tools/perf/trace/beauty/fadvise.sh static const char *fadvise_advices[] = { [0] = "NORMAL", [1] = "RANDOM", [2] = "SEQUENTIAL", [3] = "WILLNEED", [4] = "DONTNEED", [5] = "NOREUSE", }; $ The tools/perf/check-headers.sh script, part of the tools/ build process, points out changes in the original files. So its important not to touch the copies in tools/ when doing changes in the original kernel headers, that will be done later, when check-headers.sh inform about the change to the perf tools hackers. Cc: "Michael S. Tsirkin" Cc: Jason Wang Cc: kvm@vger.kernel.org Cc: virtualization@lists.linux.dev Cc: netdev@vger.kernel.org Signed-off-by: Namhyung Kim Link: https://lore.kernel.org/r/20231121225650.390246-5-namhyung@kernel.org --- tools/include/uapi/linux/vhost.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'tools') diff --git a/tools/include/uapi/linux/vhost.h b/tools/include/uapi/linux/vhost.h index f5c48b61ab62..649560c685f1 100644 --- a/tools/include/uapi/linux/vhost.h +++ b/tools/include/uapi/linux/vhost.h @@ -219,4 +219,12 @@ */ #define VHOST_VDPA_RESUME _IO(VHOST_VIRTIO, 0x7E) +/* Get the group for the descriptor table including driver & device areas + * of a virtqueue: read index, write group in num. + * The virtqueue index is stored in the index field of vhost_vring_state. + * The group ID of the descriptor table for this specific virtqueue + * is returned via num field of vhost_vring_state. + */ +#define VHOST_VDPA_GET_VRING_DESC_GROUP _IOWR(VHOST_VIRTIO, 0x7F, \ + struct vhost_vring_state) #endif -- cgit v1.2.3 From 91c97b36bd6939a5c3c29de9e83e6359146c6939 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Tue, 21 Nov 2023 14:56:41 -0800 Subject: tools headers UAPI: Update tools's copy of unistd.h header tldr; Just FYI, I'm carrying this on the perf tools tree. Full explanation: There used to be no copies, with tools/ code using kernel headers directly. From time to time tools/perf/ broke due to legitimate kernel hacking. At some point Linus complained about such direct usage. Then we adopted the current model. The way these headers are used in perf are not restricted to just including them to compile something. There are sometimes used in scripts that convert defines into string tables, etc, so some change may break one of these scripts, or new MSRs may use some different #define pattern, etc. E.g.: $ ls -1 tools/perf/trace/beauty/*.sh | head -5 tools/perf/trace/beauty/arch_errno_names.sh tools/perf/trace/beauty/drm_ioctl.sh tools/perf/trace/beauty/fadvise.sh tools/perf/trace/beauty/fsconfig.sh tools/perf/trace/beauty/fsmount.sh $ $ tools/perf/trace/beauty/fadvise.sh static const char *fadvise_advices[] = { [0] = "NORMAL", [1] = "RANDOM", [2] = "SEQUENTIAL", [3] = "WILLNEED", [4] = "DONTNEED", [5] = "NOREUSE", }; $ The tools/perf/check-headers.sh script, part of the tools/ build process, points out changes in the original files. So its important not to touch the copies in tools/ when doing changes in the original kernel headers, that will be done later, when check-headers.sh inform about the change to the perf tools hackers. Cc: Arnd Bergmann Cc: linux-arch@vger.kernel.org Signed-off-by: Namhyung Kim Link: https://lore.kernel.org/r/20231121225650.390246-6-namhyung@kernel.org --- tools/include/uapi/asm-generic/unistd.h | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/include/uapi/asm-generic/unistd.h b/tools/include/uapi/asm-generic/unistd.h index 76d946445391..756b013fb832 100644 --- a/tools/include/uapi/asm-generic/unistd.h +++ b/tools/include/uapi/asm-generic/unistd.h @@ -816,15 +816,21 @@ __SYSCALL(__NR_process_mrelease, sys_process_mrelease) __SYSCALL(__NR_futex_waitv, sys_futex_waitv) #define __NR_set_mempolicy_home_node 450 __SYSCALL(__NR_set_mempolicy_home_node, sys_set_mempolicy_home_node) - #define __NR_cachestat 451 __SYSCALL(__NR_cachestat, sys_cachestat) - #define __NR_fchmodat2 452 __SYSCALL(__NR_fchmodat2, sys_fchmodat2) +#define __NR_map_shadow_stack 453 +__SYSCALL(__NR_map_shadow_stack, sys_map_shadow_stack) +#define __NR_futex_wake 454 +__SYSCALL(__NR_futex_wake, sys_futex_wake) +#define __NR_futex_wait 455 +__SYSCALL(__NR_futex_wait, sys_futex_wait) +#define __NR_futex_requeue 456 +__SYSCALL(__NR_futex_requeue, sys_futex_requeue) #undef __NR_syscalls -#define __NR_syscalls 453 +#define __NR_syscalls 457 /* * 32 bit systems traditionally used different -- cgit v1.2.3 From fd2ddee727d1ae9296b3875087410cc3698885f0 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Tue, 21 Nov 2023 14:56:42 -0800 Subject: tools headers: Update tools's copy of socket.h header tldr; Just FYI, I'm carrying this on the perf tools tree. Full explanation: There used to be no copies, with tools/ code using kernel headers directly. From time to time tools/perf/ broke due to legitimate kernel hacking. At some point Linus complained about such direct usage. Then we adopted the current model. The way these headers are used in perf are not restricted to just including them to compile something. There are sometimes used in scripts that convert defines into string tables, etc, so some change may break one of these scripts, or new MSRs may use some different #define pattern, etc. E.g.: $ ls -1 tools/perf/trace/beauty/*.sh | head -5 tools/perf/trace/beauty/arch_errno_names.sh tools/perf/trace/beauty/drm_ioctl.sh tools/perf/trace/beauty/fadvise.sh tools/perf/trace/beauty/fsconfig.sh tools/perf/trace/beauty/fsmount.sh $ $ tools/perf/trace/beauty/fadvise.sh static const char *fadvise_advices[] = { [0] = "NORMAL", [1] = "RANDOM", [2] = "SEQUENTIAL", [3] = "WILLNEED", [4] = "DONTNEED", [5] = "NOREUSE", }; $ The tools/perf/check-headers.sh script, part of the tools/ build process, points out changes in the original files. So its important not to touch the copies in tools/ when doing changes in the original kernel headers, that will be done later, when check-headers.sh inform about the change to the perf tools hackers. Cc: netdev@vger.kernel.org Signed-off-by: Namhyung Kim Link: https://lore.kernel.org/r/20231121225650.390246-7-namhyung@kernel.org --- tools/perf/trace/beauty/include/linux/socket.h | 1 + 1 file changed, 1 insertion(+) (limited to 'tools') diff --git a/tools/perf/trace/beauty/include/linux/socket.h b/tools/perf/trace/beauty/include/linux/socket.h index 39b74d83c7c4..cfcb7e2c3813 100644 --- a/tools/perf/trace/beauty/include/linux/socket.h +++ b/tools/perf/trace/beauty/include/linux/socket.h @@ -383,6 +383,7 @@ struct ucred { #define SOL_MPTCP 284 #define SOL_MCTP 285 #define SOL_SMC 286 +#define SOL_VSOCK 287 /* IPX options */ #define IPX_TYPE 1 -- cgit v1.2.3 From c23708f37652b74570409725e0bd70a81c275867 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Tue, 21 Nov 2023 14:56:43 -0800 Subject: tools headers: Update tools's copy of x86/asm headers tldr; Just FYI, I'm carrying this on the perf tools tree. Full explanation: There used to be no copies, with tools/ code using kernel headers directly. From time to time tools/perf/ broke due to legitimate kernel hacking. At some point Linus complained about such direct usage. Then we adopted the current model. The way these headers are used in perf are not restricted to just including them to compile something. There are sometimes used in scripts that convert defines into string tables, etc, so some change may break one of these scripts, or new MSRs may use some different #define pattern, etc. E.g.: $ ls -1 tools/perf/trace/beauty/*.sh | head -5 tools/perf/trace/beauty/arch_errno_names.sh tools/perf/trace/beauty/drm_ioctl.sh tools/perf/trace/beauty/fadvise.sh tools/perf/trace/beauty/fsconfig.sh tools/perf/trace/beauty/fsmount.sh $ $ tools/perf/trace/beauty/fadvise.sh static const char *fadvise_advices[] = { [0] = "NORMAL", [1] = "RANDOM", [2] = "SEQUENTIAL", [3] = "WILLNEED", [4] = "DONTNEED", [5] = "NOREUSE", }; $ The tools/perf/check-headers.sh script, part of the tools/ build process, points out changes in the original files. So its important not to touch the copies in tools/ when doing changes in the original kernel headers, that will be done later, when check-headers.sh inform about the change to the perf tools hackers. Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Borislav Petkov Cc: Dave Hansen Cc: x86@kernel.org Cc: "H. Peter Anvin" Signed-off-by: Namhyung Kim Link: https://lore.kernel.org/r/20231121225650.390246-8-namhyung@kernel.org --- tools/arch/x86/include/asm/cpufeatures.h | 16 +++++++++++++++- tools/arch/x86/include/asm/disabled-features.h | 16 ++++++++++++++-- tools/arch/x86/include/asm/msr-index.h | 23 +++++++++++++++++++---- tools/arch/x86/include/uapi/asm/prctl.h | 12 ++++++++++++ 4 files changed, 60 insertions(+), 7 deletions(-) (limited to 'tools') diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h index 798e60b5454b..4af140cf5719 100644 --- a/tools/arch/x86/include/asm/cpufeatures.h +++ b/tools/arch/x86/include/asm/cpufeatures.h @@ -198,7 +198,6 @@ #define X86_FEATURE_CAT_L3 ( 7*32+ 4) /* Cache Allocation Technology L3 */ #define X86_FEATURE_CAT_L2 ( 7*32+ 5) /* Cache Allocation Technology L2 */ #define X86_FEATURE_CDP_L3 ( 7*32+ 6) /* Code and Data Prioritization L3 */ -#define X86_FEATURE_INVPCID_SINGLE ( 7*32+ 7) /* Effectively INVPCID && CR4.PCIDE=1 */ #define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */ #define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */ #define X86_FEATURE_XCOMPACTED ( 7*32+10) /* "" Use compacted XSTATE (XSAVES or XSAVEC) */ @@ -308,6 +307,11 @@ #define X86_FEATURE_MSR_TSX_CTRL (11*32+20) /* "" MSR IA32_TSX_CTRL (Intel) implemented */ #define X86_FEATURE_SMBA (11*32+21) /* "" Slow Memory Bandwidth Allocation */ #define X86_FEATURE_BMEC (11*32+22) /* "" Bandwidth Monitoring Event Configuration */ +#define X86_FEATURE_USER_SHSTK (11*32+23) /* Shadow stack support for user mode applications */ + +#define X86_FEATURE_SRSO (11*32+24) /* "" AMD BTB untrain RETs */ +#define X86_FEATURE_SRSO_ALIAS (11*32+25) /* "" AMD BTB untrain RETs through aliasing */ +#define X86_FEATURE_IBPB_ON_VMEXIT (11*32+26) /* "" Issue an IBPB only on VMEXIT */ /* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */ #define X86_FEATURE_AVX_VNNI (12*32+ 4) /* AVX VNNI instructions */ @@ -380,6 +384,7 @@ #define X86_FEATURE_OSPKE (16*32+ 4) /* OS Protection Keys Enable */ #define X86_FEATURE_WAITPKG (16*32+ 5) /* UMONITOR/UMWAIT/TPAUSE Instructions */ #define X86_FEATURE_AVX512_VBMI2 (16*32+ 6) /* Additional AVX512 Vector Bit Manipulation Instructions */ +#define X86_FEATURE_SHSTK (16*32+ 7) /* "" Shadow stack */ #define X86_FEATURE_GFNI (16*32+ 8) /* Galois Field New Instructions */ #define X86_FEATURE_VAES (16*32+ 9) /* Vector AES */ #define X86_FEATURE_VPCLMULQDQ (16*32+10) /* Carry-Less Multiplication Double Quadword */ @@ -438,11 +443,16 @@ /* AMD-defined Extended Feature 2 EAX, CPUID level 0x80000021 (EAX), word 20 */ #define X86_FEATURE_NO_NESTED_DATA_BP (20*32+ 0) /* "" No Nested Data Breakpoints */ +#define X86_FEATURE_WRMSR_XX_BASE_NS (20*32+ 1) /* "" WRMSR to {FS,GS,KERNEL_GS}_BASE is non-serializing */ #define X86_FEATURE_LFENCE_RDTSC (20*32+ 2) /* "" LFENCE always serializing / synchronizes RDTSC */ #define X86_FEATURE_NULL_SEL_CLR_BASE (20*32+ 6) /* "" Null Selector Clears Base */ #define X86_FEATURE_AUTOIBRS (20*32+ 8) /* "" Automatic IBRS */ #define X86_FEATURE_NO_SMM_CTL_MSR (20*32+ 9) /* "" SMM_CTL MSR is not present */ +#define X86_FEATURE_SBPB (20*32+27) /* "" Selective Branch Prediction Barrier */ +#define X86_FEATURE_IBPB_BRTYPE (20*32+28) /* "" MSR_PRED_CMD[IBPB] flushes all branch type predictions */ +#define X86_FEATURE_SRSO_NO (20*32+29) /* "" CPU is not affected by SRSO */ + /* * BUG word(s) */ @@ -484,5 +494,9 @@ #define X86_BUG_RETBLEED X86_BUG(27) /* CPU is affected by RETBleed */ #define X86_BUG_EIBRS_PBRSB X86_BUG(28) /* EIBRS is vulnerable to Post Barrier RSB Predictions */ #define X86_BUG_SMT_RSB X86_BUG(29) /* CPU is vulnerable to Cross-Thread Return Address Predictions */ +#define X86_BUG_GDS X86_BUG(30) /* CPU is affected by Gather Data Sampling */ +/* BUG word 2 */ +#define X86_BUG_SRSO X86_BUG(1*32 + 0) /* AMD SRSO bug */ +#define X86_BUG_DIV0 X86_BUG(1*32 + 1) /* AMD DIV0 speculation bug */ #endif /* _ASM_X86_CPUFEATURES_H */ diff --git a/tools/arch/x86/include/asm/disabled-features.h b/tools/arch/x86/include/asm/disabled-features.h index fafe9be7a6f4..702d93fdd10e 100644 --- a/tools/arch/x86/include/asm/disabled-features.h +++ b/tools/arch/x86/include/asm/disabled-features.h @@ -105,6 +105,18 @@ # define DISABLE_TDX_GUEST (1 << (X86_FEATURE_TDX_GUEST & 31)) #endif +#ifdef CONFIG_X86_USER_SHADOW_STACK +#define DISABLE_USER_SHSTK 0 +#else +#define DISABLE_USER_SHSTK (1 << (X86_FEATURE_USER_SHSTK & 31)) +#endif + +#ifdef CONFIG_X86_KERNEL_IBT +#define DISABLE_IBT 0 +#else +#define DISABLE_IBT (1 << (X86_FEATURE_IBT & 31)) +#endif + /* * Make sure to add features to the correct mask */ @@ -120,7 +132,7 @@ #define DISABLED_MASK9 (DISABLE_SGX) #define DISABLED_MASK10 0 #define DISABLED_MASK11 (DISABLE_RETPOLINE|DISABLE_RETHUNK|DISABLE_UNRET| \ - DISABLE_CALL_DEPTH_TRACKING) + DISABLE_CALL_DEPTH_TRACKING|DISABLE_USER_SHSTK) #define DISABLED_MASK12 (DISABLE_LAM) #define DISABLED_MASK13 0 #define DISABLED_MASK14 0 @@ -128,7 +140,7 @@ #define DISABLED_MASK16 (DISABLE_PKU|DISABLE_OSPKE|DISABLE_LA57|DISABLE_UMIP| \ DISABLE_ENQCMD) #define DISABLED_MASK17 0 -#define DISABLED_MASK18 0 +#define DISABLED_MASK18 (DISABLE_IBT) #define DISABLED_MASK19 0 #define DISABLED_MASK20 0 #define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 21) diff --git a/tools/arch/x86/include/asm/msr-index.h b/tools/arch/x86/include/asm/msr-index.h index 1d111350197f..1d51e1850ed0 100644 --- a/tools/arch/x86/include/asm/msr-index.h +++ b/tools/arch/x86/include/asm/msr-index.h @@ -222,6 +222,7 @@ #define MSR_INTEGRITY_CAPS_ARRAY_BIST BIT(MSR_INTEGRITY_CAPS_ARRAY_BIST_BIT) #define MSR_INTEGRITY_CAPS_PERIODIC_BIST_BIT 4 #define MSR_INTEGRITY_CAPS_PERIODIC_BIST BIT(MSR_INTEGRITY_CAPS_PERIODIC_BIST_BIT) +#define MSR_INTEGRITY_CAPS_SAF_GEN_MASK GENMASK_ULL(10, 9) #define MSR_LBR_NHM_FROM 0x00000680 #define MSR_LBR_NHM_TO 0x000006c0 @@ -553,6 +554,7 @@ #define MSR_AMD64_CPUID_FN_1 0xc0011004 #define MSR_AMD64_LS_CFG 0xc0011020 #define MSR_AMD64_DC_CFG 0xc0011022 +#define MSR_AMD64_TW_CFG 0xc0011023 #define MSR_AMD64_DE_CFG 0xc0011029 #define MSR_AMD64_DE_CFG_LFENCE_SERIALIZE_BIT 1 @@ -637,12 +639,21 @@ /* AMD Last Branch Record MSRs */ #define MSR_AMD64_LBR_SELECT 0xc000010e +/* Zen4 */ +#define MSR_ZEN4_BP_CFG 0xc001102e +#define MSR_ZEN4_BP_CFG_SHARED_BTB_FIX_BIT 5 + +/* Fam 19h MSRs */ +#define MSR_F19H_UMC_PERF_CTL 0xc0010800 +#define MSR_F19H_UMC_PERF_CTR 0xc0010801 + +/* Zen 2 */ +#define MSR_ZEN2_SPECTRAL_CHICKEN 0xc00110e3 +#define MSR_ZEN2_SPECTRAL_CHICKEN_BIT BIT_ULL(1) + /* Fam 17h MSRs */ #define MSR_F17H_IRPERF 0xc00000e9 -#define MSR_ZEN2_SPECTRAL_CHICKEN 0xc00110e3 -#define MSR_ZEN2_SPECTRAL_CHICKEN_BIT BIT_ULL(1) - /* Fam 16h MSRs */ #define MSR_F16H_L2I_PERF_CTL 0xc0010230 #define MSR_F16H_L2I_PERF_CTR 0xc0010231 @@ -1112,12 +1123,16 @@ #define MSR_IA32_VMX_MISC_INTEL_PT (1ULL << 14) #define MSR_IA32_VMX_MISC_VMWRITE_SHADOW_RO_FIELDS (1ULL << 29) #define MSR_IA32_VMX_MISC_PREEMPTION_TIMER_SCALE 0x1F -/* AMD-V MSRs */ +/* AMD-V MSRs */ #define MSR_VM_CR 0xc0010114 #define MSR_VM_IGNNE 0xc0010115 #define MSR_VM_HSAVE_PA 0xc0010117 +#define SVM_VM_CR_VALID_MASK 0x001fULL +#define SVM_VM_CR_SVM_LOCK_MASK 0x0008ULL +#define SVM_VM_CR_SVM_DIS_MASK 0x0010ULL + /* Hardware Feedback Interface */ #define MSR_IA32_HW_FEEDBACK_PTR 0x17d0 #define MSR_IA32_HW_FEEDBACK_CONFIG 0x17d1 diff --git a/tools/arch/x86/include/uapi/asm/prctl.h b/tools/arch/x86/include/uapi/asm/prctl.h index e8d7ebbca1a4..384e2cc6ac19 100644 --- a/tools/arch/x86/include/uapi/asm/prctl.h +++ b/tools/arch/x86/include/uapi/asm/prctl.h @@ -23,9 +23,21 @@ #define ARCH_MAP_VDSO_32 0x2002 #define ARCH_MAP_VDSO_64 0x2003 +/* Don't use 0x3001-0x3004 because of old glibcs */ + #define ARCH_GET_UNTAG_MASK 0x4001 #define ARCH_ENABLE_TAGGED_ADDR 0x4002 #define ARCH_GET_MAX_TAG_BITS 0x4003 #define ARCH_FORCE_TAGGED_SVA 0x4004 +#define ARCH_SHSTK_ENABLE 0x5001 +#define ARCH_SHSTK_DISABLE 0x5002 +#define ARCH_SHSTK_LOCK 0x5003 +#define ARCH_SHSTK_UNLOCK 0x5004 +#define ARCH_SHSTK_STATUS 0x5005 + +/* ARCH_SHSTK_ features bits */ +#define ARCH_SHSTK_SHSTK (1ULL << 0) +#define ARCH_SHSTK_WRSS (1ULL << 1) + #endif /* _ASM_X86_PRCTL_H */ -- cgit v1.2.3 From fad8afdcc18ff72e1d90c558be364f26534b8a5b Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Tue, 21 Nov 2023 14:56:44 -0800 Subject: tools headers: Update tools's copy of arm64/asm headers tldr; Just FYI, I'm carrying this on the perf tools tree. Full explanation: There used to be no copies, with tools/ code using kernel headers directly. From time to time tools/perf/ broke due to legitimate kernel hacking. At some point Linus complained about such direct usage. Then we adopted the current model. The way these headers are used in perf are not restricted to just including them to compile something. There are sometimes used in scripts that convert defines into string tables, etc, so some change may break one of these scripts, or new MSRs may use some different #define pattern, etc. E.g.: $ ls -1 tools/perf/trace/beauty/*.sh | head -5 tools/perf/trace/beauty/arch_errno_names.sh tools/perf/trace/beauty/drm_ioctl.sh tools/perf/trace/beauty/fadvise.sh tools/perf/trace/beauty/fsconfig.sh tools/perf/trace/beauty/fsmount.sh $ $ tools/perf/trace/beauty/fadvise.sh static const char *fadvise_advices[] = { [0] = "NORMAL", [1] = "RANDOM", [2] = "SEQUENTIAL", [3] = "WILLNEED", [4] = "DONTNEED", [5] = "NOREUSE", }; $ The tools/perf/check-headers.sh script, part of the tools/ build process, points out changes in the original files. So its important not to touch the copies in tools/ when doing changes in the original kernel headers, that will be done later, when check-headers.sh inform about the change to the perf tools hackers. Cc: Catalin Marinas Cc: Will Deacon Cc: linux-arm-kernel@lists.infradead.org Signed-off-by: Namhyung Kim Link: https://lore.kernel.org/r/20231121225650.390246-9-namhyung@kernel.org --- tools/arch/arm64/include/asm/cputype.h | 5 ++++- tools/arch/arm64/include/uapi/asm/kvm.h | 32 +++++++++++++++++++++++++++ tools/arch/arm64/include/uapi/asm/perf_regs.h | 10 +++++---- 3 files changed, 42 insertions(+), 5 deletions(-) (limited to 'tools') diff --git a/tools/arch/arm64/include/asm/cputype.h b/tools/arch/arm64/include/asm/cputype.h index 5f6f84837a49..7c7493cb571f 100644 --- a/tools/arch/arm64/include/asm/cputype.h +++ b/tools/arch/arm64/include/asm/cputype.h @@ -79,13 +79,15 @@ #define ARM_CPU_PART_CORTEX_A78AE 0xD42 #define ARM_CPU_PART_CORTEX_X1 0xD44 #define ARM_CPU_PART_CORTEX_A510 0xD46 +#define ARM_CPU_PART_CORTEX_A520 0xD80 #define ARM_CPU_PART_CORTEX_A710 0xD47 #define ARM_CPU_PART_CORTEX_A715 0xD4D #define ARM_CPU_PART_CORTEX_X2 0xD48 #define ARM_CPU_PART_NEOVERSE_N2 0xD49 #define ARM_CPU_PART_CORTEX_A78C 0xD4B -#define APM_CPU_PART_POTENZA 0x000 +#define APM_CPU_PART_XGENE 0x000 +#define APM_CPU_VAR_POTENZA 0x00 #define CAVIUM_CPU_PART_THUNDERX 0x0A1 #define CAVIUM_CPU_PART_THUNDERX_81XX 0x0A2 @@ -148,6 +150,7 @@ #define MIDR_CORTEX_A78AE MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A78AE) #define MIDR_CORTEX_X1 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X1) #define MIDR_CORTEX_A510 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A510) +#define MIDR_CORTEX_A520 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A520) #define MIDR_CORTEX_A710 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A710) #define MIDR_CORTEX_A715 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A715) #define MIDR_CORTEX_X2 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X2) diff --git a/tools/arch/arm64/include/uapi/asm/kvm.h b/tools/arch/arm64/include/uapi/asm/kvm.h index f7ddd73a8c0f..89d2fc872d9f 100644 --- a/tools/arch/arm64/include/uapi/asm/kvm.h +++ b/tools/arch/arm64/include/uapi/asm/kvm.h @@ -505,6 +505,38 @@ struct kvm_smccc_filter { #define KVM_HYPERCALL_EXIT_SMC (1U << 0) #define KVM_HYPERCALL_EXIT_16BIT (1U << 1) +/* + * Get feature ID registers userspace writable mask. + * + * From DDI0487J.a, D19.2.66 ("ID_AA64MMFR2_EL1, AArch64 Memory Model + * Feature Register 2"): + * + * "The Feature ID space is defined as the System register space in + * AArch64 with op0==3, op1=={0, 1, 3}, CRn==0, CRm=={0-7}, + * op2=={0-7}." + * + * This covers all currently known R/O registers that indicate + * anything useful feature wise, including the ID registers. + * + * If we ever need to introduce a new range, it will be described as + * such in the range field. + */ +#define KVM_ARM_FEATURE_ID_RANGE_IDX(op0, op1, crn, crm, op2) \ + ({ \ + __u64 __op1 = (op1) & 3; \ + __op1 -= (__op1 == 3); \ + (__op1 << 6 | ((crm) & 7) << 3 | (op2)); \ + }) + +#define KVM_ARM_FEATURE_ID_RANGE 0 +#define KVM_ARM_FEATURE_ID_RANGE_SIZE (3 * 8 * 8) + +struct reg_mask_range { + __u64 addr; /* Pointer to mask array */ + __u32 range; /* Requested range */ + __u32 reserved[13]; +}; + #endif #endif /* __ARM_KVM_H__ */ diff --git a/tools/arch/arm64/include/uapi/asm/perf_regs.h b/tools/arch/arm64/include/uapi/asm/perf_regs.h index fd157f46727e..86e556429e0e 100644 --- a/tools/arch/arm64/include/uapi/asm/perf_regs.h +++ b/tools/arch/arm64/include/uapi/asm/perf_regs.h @@ -36,11 +36,13 @@ enum perf_event_arm_regs { PERF_REG_ARM64_LR, PERF_REG_ARM64_SP, PERF_REG_ARM64_PC, + PERF_REG_ARM64_MAX, /* Extended/pseudo registers */ - PERF_REG_ARM64_VG = 46, // SVE Vector Granule - - PERF_REG_ARM64_MAX = PERF_REG_ARM64_PC + 1, - PERF_REG_ARM64_EXTENDED_MAX = PERF_REG_ARM64_VG + 1 + PERF_REG_ARM64_VG = 46, /* SVE Vector Granule */ + PERF_REG_ARM64_EXTENDED_MAX }; + +#define PERF_REG_EXTENDED_MASK (1ULL << PERF_REG_ARM64_VG) + #endif /* _ASM_ARM64_PERF_REGS_H */ -- cgit v1.2.3 From e1d7426bb9156d34d385d0516a7309c8f33c55bc Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Tue, 21 Nov 2023 14:56:45 -0800 Subject: tools headers: Update tools's copy of s390/asm headers tldr; Just FYI, I'm carrying this on the perf tools tree. Full explanation: There used to be no copies, with tools/ code using kernel headers directly. From time to time tools/perf/ broke due to legitimate kernel hacking. At some point Linus complained about such direct usage. Then we adopted the current model. The way these headers are used in perf are not restricted to just including them to compile something. There are sometimes used in scripts that convert defines into string tables, etc, so some change may break one of these scripts, or new MSRs may use some different #define pattern, etc. E.g.: $ ls -1 tools/perf/trace/beauty/*.sh | head -5 tools/perf/trace/beauty/arch_errno_names.sh tools/perf/trace/beauty/drm_ioctl.sh tools/perf/trace/beauty/fadvise.sh tools/perf/trace/beauty/fsconfig.sh tools/perf/trace/beauty/fsmount.sh $ $ tools/perf/trace/beauty/fadvise.sh static const char *fadvise_advices[] = { [0] = "NORMAL", [1] = "RANDOM", [2] = "SEQUENTIAL", [3] = "WILLNEED", [4] = "DONTNEED", [5] = "NOREUSE", }; $ The tools/perf/check-headers.sh script, part of the tools/ build process, points out changes in the original files. So its important not to touch the copies in tools/ when doing changes in the original kernel headers, that will be done later, when check-headers.sh inform about the change to the perf tools hackers. Cc: Heiko Carstens Cc: Vasily Gorbik Cc: Alexander Gordeev Cc: Christian Borntraeger Cc: Sven Schnelle Cc: linux-s390@vger.kernel.org Signed-off-by: Namhyung Kim Link: https://lore.kernel.org/r/20231121225650.390246-10-namhyung@kernel.org --- tools/arch/s390/include/uapi/asm/kvm.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'tools') diff --git a/tools/arch/s390/include/uapi/asm/kvm.h b/tools/arch/s390/include/uapi/asm/kvm.h index a73cf01a1606..abe926d43cbe 100644 --- a/tools/arch/s390/include/uapi/asm/kvm.h +++ b/tools/arch/s390/include/uapi/asm/kvm.h @@ -159,6 +159,22 @@ struct kvm_s390_vm_cpu_subfunc { __u8 reserved[1728]; }; +#define KVM_S390_VM_CPU_PROCESSOR_UV_FEAT_GUEST 6 +#define KVM_S390_VM_CPU_MACHINE_UV_FEAT_GUEST 7 + +#define KVM_S390_VM_CPU_UV_FEAT_NR_BITS 64 +struct kvm_s390_vm_cpu_uv_feat { + union { + struct { + __u64 : 4; + __u64 ap : 1; /* bit 4 */ + __u64 ap_intr : 1; /* bit 5 */ + __u64 : 58; + }; + __u64 feat; + }; +}; + /* kvm attributes for crypto */ #define KVM_S390_VM_CRYPTO_ENABLE_AES_KW 0 #define KVM_S390_VM_CRYPTO_ENABLE_DEA_KW 1 -- cgit v1.2.3 From b3b11aed147af8f7c3d79c5b0b7474505d1dde7b Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Tue, 21 Nov 2023 14:56:46 -0800 Subject: tools/perf: Update tools's copy of x86 syscall table tldr; Just FYI, I'm carrying this on the perf tools tree. Full explanation: There used to be no copies, with tools/ code using kernel headers directly. From time to time tools/perf/ broke due to legitimate kernel hacking. At some point Linus complained about such direct usage. Then we adopted the current model. The way these headers are used in perf are not restricted to just including them to compile something. There are sometimes used in scripts that convert defines into string tables, etc, so some change may break one of these scripts, or new MSRs may use some different #define pattern, etc. E.g.: $ ls -1 tools/perf/trace/beauty/*.sh | head -5 tools/perf/trace/beauty/arch_errno_names.sh tools/perf/trace/beauty/drm_ioctl.sh tools/perf/trace/beauty/fadvise.sh tools/perf/trace/beauty/fsconfig.sh tools/perf/trace/beauty/fsmount.sh $ $ tools/perf/trace/beauty/fadvise.sh static const char *fadvise_advices[] = { [0] = "NORMAL", [1] = "RANDOM", [2] = "SEQUENTIAL", [3] = "WILLNEED", [4] = "DONTNEED", [5] = "NOREUSE", }; $ The tools/perf/check-headers.sh script, part of the tools/ build process, points out changes in the original files. So its important not to touch the copies in tools/ when doing changes in the original kernel headers, that will be done later, when check-headers.sh inform about the change to the perf tools hackers. Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Borislav Petkov Cc: Dave Hansen Cc: x86@kernel.org Cc: "H. Peter Anvin" Signed-off-by: Namhyung Kim Link: https://lore.kernel.org/r/20231121225650.390246-11-namhyung@kernel.org --- tools/perf/arch/x86/entry/syscalls/syscall_64.tbl | 3 +++ 1 file changed, 3 insertions(+) (limited to 'tools') diff --git a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl index 2a62eaf30d69..8cb8bf68721c 100644 --- a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl +++ b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl @@ -375,6 +375,9 @@ 451 common cachestat sys_cachestat 452 common fchmodat2 sys_fchmodat2 453 64 map_shadow_stack sys_map_shadow_stack +454 common futex_wake sys_futex_wake +455 common futex_wait sys_futex_wait +456 common futex_requeue sys_futex_requeue # # Due to a historical design error, certain syscalls are numbered differently -- cgit v1.2.3 From 3483d2440538aa2575c3cddac0b7f42d488570cf Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Tue, 21 Nov 2023 14:56:47 -0800 Subject: tools/perf: Update tools's copy of powerpc syscall table tldr; Just FYI, I'm carrying this on the perf tools tree. Full explanation: There used to be no copies, with tools/ code using kernel headers directly. From time to time tools/perf/ broke due to legitimate kernel hacking. At some point Linus complained about such direct usage. Then we adopted the current model. The way these headers are used in perf are not restricted to just including them to compile something. There are sometimes used in scripts that convert defines into string tables, etc, so some change may break one of these scripts, or new MSRs may use some different #define pattern, etc. E.g.: $ ls -1 tools/perf/trace/beauty/*.sh | head -5 tools/perf/trace/beauty/arch_errno_names.sh tools/perf/trace/beauty/drm_ioctl.sh tools/perf/trace/beauty/fadvise.sh tools/perf/trace/beauty/fsconfig.sh tools/perf/trace/beauty/fsmount.sh $ $ tools/perf/trace/beauty/fadvise.sh static const char *fadvise_advices[] = { [0] = "NORMAL", [1] = "RANDOM", [2] = "SEQUENTIAL", [3] = "WILLNEED", [4] = "DONTNEED", [5] = "NOREUSE", }; $ The tools/perf/check-headers.sh script, part of the tools/ build process, points out changes in the original files. So its important not to touch the copies in tools/ when doing changes in the original kernel headers, that will be done later, when check-headers.sh inform about the change to the perf tools hackers. Cc: Michael Ellerman Cc: Nicholas Piggin Cc: Christophe Leroy Cc: linuxppc-dev@lists.ozlabs.org Signed-off-by: Namhyung Kim Link: https://lore.kernel.org/r/20231121225650.390246-12-namhyung@kernel.org --- tools/perf/arch/powerpc/entry/syscalls/syscall.tbl | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'tools') diff --git a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl index e1412519b4ad..7fab411378f2 100644 --- a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl +++ b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl @@ -539,3 +539,7 @@ 450 nospu set_mempolicy_home_node sys_set_mempolicy_home_node 451 common cachestat sys_cachestat 452 common fchmodat2 sys_fchmodat2 +453 common map_shadow_stack sys_ni_syscall +454 common futex_wake sys_futex_wake +455 common futex_wait sys_futex_wait +456 common futex_requeue sys_futex_requeue -- cgit v1.2.3 From d3968c974a2453de9289bdb9d34130b2ce323628 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Tue, 21 Nov 2023 14:56:48 -0800 Subject: tools/perf: Update tools's copy of s390 syscall table tldr; Just FYI, I'm carrying this on the perf tools tree. Full explanation: There used to be no copies, with tools/ code using kernel headers directly. From time to time tools/perf/ broke due to legitimate kernel hacking. At some point Linus complained about such direct usage. Then we adopted the current model. The way these headers are used in perf are not restricted to just including them to compile something. There are sometimes used in scripts that convert defines into string tables, etc, so some change may break one of these scripts, or new MSRs may use some different #define pattern, etc. E.g.: $ ls -1 tools/perf/trace/beauty/*.sh | head -5 tools/perf/trace/beauty/arch_errno_names.sh tools/perf/trace/beauty/drm_ioctl.sh tools/perf/trace/beauty/fadvise.sh tools/perf/trace/beauty/fsconfig.sh tools/perf/trace/beauty/fsmount.sh $ $ tools/perf/trace/beauty/fadvise.sh static const char *fadvise_advices[] = { [0] = "NORMAL", [1] = "RANDOM", [2] = "SEQUENTIAL", [3] = "WILLNEED", [4] = "DONTNEED", [5] = "NOREUSE", }; $ The tools/perf/check-headers.sh script, part of the tools/ build process, points out changes in the original files. So its important not to touch the copies in tools/ when doing changes in the original kernel headers, that will be done later, when check-headers.sh inform about the change to the perf tools hackers. Cc: Heiko Carstens Cc: Vasily Gorbik Cc: Alexander Gordeev Cc: Christian Borntraeger Cc: Sven Schnelle Cc: linux-s390@vger.kernel.org Signed-off-by: Namhyung Kim Link: https://lore.kernel.org/r/20231121225650.390246-13-namhyung@kernel.org --- tools/perf/arch/s390/entry/syscalls/syscall.tbl | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'tools') diff --git a/tools/perf/arch/s390/entry/syscalls/syscall.tbl b/tools/perf/arch/s390/entry/syscalls/syscall.tbl index cc0bc144b661..86fec9b080f6 100644 --- a/tools/perf/arch/s390/entry/syscalls/syscall.tbl +++ b/tools/perf/arch/s390/entry/syscalls/syscall.tbl @@ -455,3 +455,7 @@ 450 common set_mempolicy_home_node sys_set_mempolicy_home_node sys_set_mempolicy_home_node 451 common cachestat sys_cachestat sys_cachestat 452 common fchmodat2 sys_fchmodat2 sys_fchmodat2 +453 common map_shadow_stack sys_map_shadow_stack sys_map_shadow_stack +454 common futex_wake sys_futex_wake sys_futex_wake +455 common futex_wait sys_futex_wait sys_futex_wait +456 common futex_requeue sys_futex_requeue sys_futex_requeue -- cgit v1.2.3 From 027905fe5baec70a00e00890e982d035d6c8b6b3 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Tue, 21 Nov 2023 14:56:49 -0800 Subject: tools/perf: Update tools's copy of mips syscall table tldr; Just FYI, I'm carrying this on the perf tools tree. Full explanation: There used to be no copies, with tools/ code using kernel headers directly. From time to time tools/perf/ broke due to legitimate kernel hacking. At some point Linus complained about such direct usage. Then we adopted the current model. The way these headers are used in perf are not restricted to just including them to compile something. There are sometimes used in scripts that convert defines into string tables, etc, so some change may break one of these scripts, or new MSRs may use some different #define pattern, etc. E.g.: $ ls -1 tools/perf/trace/beauty/*.sh | head -5 tools/perf/trace/beauty/arch_errno_names.sh tools/perf/trace/beauty/drm_ioctl.sh tools/perf/trace/beauty/fadvise.sh tools/perf/trace/beauty/fsconfig.sh tools/perf/trace/beauty/fsmount.sh $ $ tools/perf/trace/beauty/fadvise.sh static const char *fadvise_advices[] = { [0] = "NORMAL", [1] = "RANDOM", [2] = "SEQUENTIAL", [3] = "WILLNEED", [4] = "DONTNEED", [5] = "NOREUSE", }; $ The tools/perf/check-headers.sh script, part of the tools/ build process, points out changes in the original files. So its important not to touch the copies in tools/ when doing changes in the original kernel headers, that will be done later, when check-headers.sh inform about the change to the perf tools hackers. Cc: Thomas Bogendoerfer Cc: linux-mips@vger.kernel.org Signed-off-by: Namhyung Kim Link: https://lore.kernel.org/r/20231121225650.390246-14-namhyung@kernel.org --- tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'tools') diff --git a/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl b/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl index 80be0e98ea0c..116ff501bf92 100644 --- a/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl +++ b/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl @@ -367,3 +367,7 @@ 450 common set_mempolicy_home_node sys_set_mempolicy_home_node 451 n64 cachestat sys_cachestat 452 n64 fchmodat2 sys_fchmodat2 +453 n64 map_shadow_stack sys_map_shadow_stack +454 n64 futex_wake sys_futex_wake +455 n64 futex_wait sys_futex_wait +456 n64 futex_requeue sys_futex_requeue -- cgit v1.2.3 From ef5c958090a909c9f2ab717ba6abb86869e42da7 Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Tue, 21 Nov 2023 19:29:55 +0000 Subject: tools perf: Add arm64 sysreg files to MANIFEST Ian pointed out that source tarballs are incomplete as of commit e2bdd172e665 ("perf build: Generate arm64's sysreg-defs.h and add to include path"), since the source files needed from the kernel tree do not appear in the manifest. Add them. Reported-by: Ian Rogers Fixes: e2bdd172e665 ("perf build: Generate arm64's sysreg-defs.h and add to include path") Signed-off-by: Oliver Upton Link: https://lore.kernel.org/r/20231121192956.919380-2-oliver.upton@linux.dev Signed-off-by: Namhyung Kim --- tools/perf/MANIFEST | 2 ++ 1 file changed, 2 insertions(+) (limited to 'tools') diff --git a/tools/perf/MANIFEST b/tools/perf/MANIFEST index 1da7f4b91b4f..dc42de1785ce 100644 --- a/tools/perf/MANIFEST +++ b/tools/perf/MANIFEST @@ -1,3 +1,5 @@ +arch/arm64/tools/gen-sysreg.awk +arch/arm64/tools/sysreg tools/perf tools/arch tools/scripts -- cgit v1.2.3 From a29ee6aea7030786a63fde0d6d83a8f477b060fb Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Tue, 21 Nov 2023 19:29:56 +0000 Subject: perf build: Ensure sysreg-defs Makefile respects output dir Currently the sysreg-defs are written out to the source tree unconditionally, ignoring the specified output directory. Correct the build rule to emit the header to the output directory. Opportunistically reorganize the rules to avoid interleaving with the set of beauty make rules. Reported-by: Ian Rogers Signed-off-by: Oliver Upton Link: https://lore.kernel.org/r/20231121192956.919380-3-oliver.upton@linux.dev Signed-off-by: Namhyung Kim --- tools/arch/arm64/tools/Makefile | 2 +- tools/perf/Makefile.perf | 24 +++++++++++++++--------- tools/perf/util/Build | 2 +- tools/testing/selftests/kvm/Makefile | 5 +++-- 4 files changed, 20 insertions(+), 13 deletions(-) (limited to 'tools') diff --git a/tools/arch/arm64/tools/Makefile b/tools/arch/arm64/tools/Makefile index 7f64b8bb5107..7b42feedf647 100644 --- a/tools/arch/arm64/tools/Makefile +++ b/tools/arch/arm64/tools/Makefile @@ -22,7 +22,7 @@ endif arm64_tools_dir = $(top_srcdir)/arch/arm64/tools arm64_sysreg_tbl = $(arm64_tools_dir)/sysreg arm64_gen_sysreg = $(arm64_tools_dir)/gen-sysreg.awk -arm64_generated_dir = $(top_srcdir)/tools/arch/arm64/include/generated +arm64_generated_dir = $(OUTPUT)arch/arm64/include/generated arm64_sysreg_defs = $(arm64_generated_dir)/asm/sysreg-defs.h all: $(arm64_sysreg_defs) diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index d88da787e815..058c9aecf608 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -434,6 +434,21 @@ export INSTALL SHELL_PATH SHELL = $(SHELL_PATH) +arm64_gen_sysreg_dir := $(srctree)/tools/arch/arm64/tools +ifneq ($(OUTPUT),) + arm64_gen_sysreg_outdir := $(OUTPUT) +else + arm64_gen_sysreg_outdir := $(CURDIR) +endif + +arm64-sysreg-defs: FORCE + $(Q)$(MAKE) -C $(arm64_gen_sysreg_dir) O=$(arm64_gen_sysreg_outdir) + +arm64-sysreg-defs-clean: + $(call QUIET_CLEAN,arm64-sysreg-defs) + $(Q)$(MAKE) -C $(arm64_gen_sysreg_dir) O=$(arm64_gen_sysreg_outdir) \ + clean > /dev/null + beauty_linux_dir := $(srctree)/tools/perf/trace/beauty/include/linux/ linux_uapi_dir := $(srctree)/tools/include/uapi/linux asm_generic_uapi_dir := $(srctree)/tools/include/uapi/asm-generic @@ -450,15 +465,6 @@ drm_ioctl_tbl := $(srctree)/tools/perf/trace/beauty/drm_ioctl.sh # Create output directory if not already present _dummy := $(shell [ -d '$(beauty_ioctl_outdir)' ] || mkdir -p '$(beauty_ioctl_outdir)') -arm64_gen_sysreg_dir := $(srctree)/tools/arch/arm64/tools - -arm64-sysreg-defs: FORCE - $(Q)$(MAKE) -C $(arm64_gen_sysreg_dir) - -arm64-sysreg-defs-clean: - $(call QUIET_CLEAN,arm64-sysreg-defs) - $(Q)$(MAKE) -C $(arm64_gen_sysreg_dir) clean > /dev/null - $(drm_ioctl_array): $(drm_hdr_dir)/drm.h $(drm_hdr_dir)/i915_drm.h $(drm_ioctl_tbl) $(Q)$(SHELL) '$(drm_ioctl_tbl)' $(drm_hdr_dir) > $@ diff --git a/tools/perf/util/Build b/tools/perf/util/Build index fb661c48992f..988473bf907a 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -347,7 +347,7 @@ CFLAGS_rbtree.o += -Wno-unused-parameter -DETC_PERFCONFIG="BUILD_STR($(ET CFLAGS_libstring.o += -Wno-unused-parameter -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))" CFLAGS_hweight.o += -Wno-unused-parameter -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))" CFLAGS_header.o += -include $(OUTPUT)PERF-VERSION-FILE -CFLAGS_arm-spe.o += -I$(srctree)/tools/arch/arm64/include/ -I$(srctree)/tools/arch/arm64/include/generated/ +CFLAGS_arm-spe.o += -I$(srctree)/tools/arch/arm64/include/ -I$(OUTPUT)arch/arm64/include/generated/ $(OUTPUT)util/argv_split.o: ../lib/argv_split.c FORCE $(call rule_mkdir) diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index a5963ab9215b..52c59bad7213 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -18,12 +18,13 @@ else endif ifeq ($(ARCH),arm64) -arm64_tools_dir := $(top_srcdir)/tools/arch/arm64/tools/ +tools_dir := $(top_srcdir)/tools +arm64_tools_dir := $(tools_dir)/arch/arm64/tools/ GEN_HDRS := $(top_srcdir)/tools/arch/arm64/include/generated/ CFLAGS += -I$(GEN_HDRS) $(GEN_HDRS): $(wildcard $(arm64_tools_dir)/*) - $(MAKE) -C $(arm64_tools_dir) + $(MAKE) -C $(arm64_tools_dir) O=$(tools_dir) endif LIBKVM += lib/assert.c -- cgit v1.2.3 From 57686a72da08ae555d93148aa8756b16417a6aff Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 9 Nov 2023 16:34:09 -0300 Subject: tools: Disable __packed attribute compiler warning due to -Werror=attributes Noticed on several perf tools cross build test containers: [perfbuilder@five ~]$ grep FAIL ~/dm.log/summary 19 10.18 debian:experimental-x-mips : FAIL gcc version 12.3.0 (Debian 12.3.0-6) 20 11.21 debian:experimental-x-mips64 : FAIL gcc version 12.3.0 (Debian 12.3.0-6) 21 11.30 debian:experimental-x-mipsel : FAIL gcc version 12.3.0 (Debian 12.3.0-6) 37 12.07 ubuntu:18.04-x-arm : FAIL gcc version 7.5.0 (Ubuntu/Linaro 7.5.0-3ubuntu1~18.04) 42 11.91 ubuntu:18.04-x-riscv64 : FAIL gcc version 7.5.0 (Ubuntu 7.5.0-3ubuntu1~18.04) 44 13.17 ubuntu:18.04-x-sh4 : FAIL gcc version 7.5.0 (Ubuntu 7.5.0-3ubuntu1~18.04) 45 12.09 ubuntu:18.04-x-sparc64 : FAIL gcc version 7.5.0 (Ubuntu 7.5.0-3ubuntu1~18.04) [perfbuilder@five ~]$ In file included from util/intel-pt-decoder/intel-pt-pkt-decoder.c:10: /tmp/perf-6.6.0-rc1/tools/include/asm-generic/unaligned.h: In function 'get_unaligned_le16': /tmp/perf-6.6.0-rc1/tools/include/asm-generic/unaligned.h:13:29: error: packed attribute causes inefficient alignment for 'x' [-Werror=attributes] 13 | const struct { type x; } __packed *__pptr = (typeof(__pptr))(ptr); \ | ^ /tmp/perf-6.6.0-rc1/tools/include/asm-generic/unaligned.h:27:28: note: in expansion of macro '__get_unaligned_t' 27 | return le16_to_cpu(__get_unaligned_t(__le16, p)); | ^~~~~~~~~~~~~~~~~ This comes from the kernel, where the -Wattributes and -Wpacked isn't used, -Wpacked is already disabled, do it for the attributes as well. Fixes: a91c987254651443 ("perf tools: Add get_unaligned_leNN()") Suggested-by: Adrian Hunter Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Link: https://lore.kernel.org/lkml/7c5b626c-1de9-4c12-a781-e44985b4a797@intel.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Namhyung Kim --- tools/include/asm-generic/unaligned.h | 1 + 1 file changed, 1 insertion(+) (limited to 'tools') diff --git a/tools/include/asm-generic/unaligned.h b/tools/include/asm-generic/unaligned.h index 156743d399ae..2fd551915c20 100644 --- a/tools/include/asm-generic/unaligned.h +++ b/tools/include/asm-generic/unaligned.h @@ -8,6 +8,7 @@ */ #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wpacked" +#pragma GCC diagnostic ignored "-Wattributes" #define __get_unaligned_t(type, ptr) ({ \ const struct { type x; } __packed *__pptr = (typeof(__pptr))(ptr); \ -- cgit v1.2.3 From 454723b1615f7423fcba0b8f722cef4992a1846f Mon Sep 17 00:00:00 2001 From: Thomas Weißschuh Date: Mon, 16 Oct 2023 00:25:14 +0200 Subject: rcutorture: add nolibc init support for mips, ppc and rv64 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use nolibc for all support architectures. Signed-off-by: Thomas Weißschuh Signed-off-by: Paul E. McKenney Signed-off-by: Neeraj Upadhyay (AMD) --- tools/testing/selftests/rcutorture/bin/mkinitrd.sh | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/rcutorture/bin/mkinitrd.sh b/tools/testing/selftests/rcutorture/bin/mkinitrd.sh index 212c52ca90b5..f3f867129560 100755 --- a/tools/testing/selftests/rcutorture/bin/mkinitrd.sh +++ b/tools/testing/selftests/rcutorture/bin/mkinitrd.sh @@ -67,7 +67,10 @@ ___EOF___ # build using nolibc on supported archs (smaller executable) and fall # back to regular glibc on other ones. if echo -e "#if __x86_64__||__i386__||__i486__||__i586__||__i686__" \ - "||__ARM_EABI__||__aarch64__||__s390x__||__loongarch__\nyes\n#endif" \ + "||__ARM_EABI__||__aarch64__||(__mips__ && _ABIO32)" \ + "||__powerpc__||(__riscv && __riscv_xlen == 64)" \ + "||__s390x__||__loongarch__" \ + "\nyes\n#endif" \ | ${CROSS_COMPILE}gcc -E -nostdlib -xc - \ | grep -q '^yes'; then # architecture supported by nolibc -- cgit v1.2.3 From af19a2526cba92082723b98fcf191a595054a952 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 1 Nov 2023 10:30:56 -0700 Subject: rcutorture: Add mid-sized stall to TREE07 There is code in rcu_implicit_dynticks_qs() that checks for the current grace period being halfway to the RCU CPU stall timeout, but rcutorture currently does not test this code. This commit therefore adds a 14-second stall to the TREE07 scenario in order to test this code given the default RCU CPU stall warning timeout of 21 seconds. Signed-off-by: Paul E. McKenney Signed-off-by: Neeraj Upadhyay (AMD) --- tools/testing/selftests/rcutorture/configs/rcu/TREE07.boot | 3 +++ 1 file changed, 3 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE07.boot b/tools/testing/selftests/rcutorture/configs/rcu/TREE07.boot index d44609937503..979edbf4c820 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TREE07.boot +++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE07.boot @@ -1 +1,4 @@ nohz_full=2-9 +rcutorture.stall_cpu=14 +rcutorture.stall_cpu_holdoff=90 +rcutorture.fwd_progress=0 -- cgit v1.2.3 From 280b4e4a9e8009affd8f20ec2d467cb4deb05c1c Mon Sep 17 00:00:00 2001 From: Benjamin Gray Date: Tue, 12 Sep 2023 16:07:59 +1000 Subject: perf tools: Address python 3.6 DeprecationWarning for string scapes Python 3.6 introduced a DeprecationWarning for invalid escape sequences. This is upgraded to a SyntaxWarning in Python 3.12, and will eventually be a syntax error. Fix these now to get ahead of it before it's an error. Signed-off-by: Benjamin Gray Acked-by: Adrian Hunter Cc: Alexander Shishkin Cc: Andrii Nakryiko Cc: Hartley Sweeten Cc: Ian Abbott Cc: Ian Rogers Cc: Ingo Molnar Cc: Jan Kiszka Cc: Jiri Olsa Cc: Jonathan Corbet Cc: Kieran Bingham Cc: Mark Rutland Cc: Mykola Lysenko Cc: Namhyung Kim Cc: Nathan Chancellor Cc: Nick Desaulniers Cc: Peter Zijlstra Cc: Shuah Khan Cc: Todd E Brandt Cc: Tom Rix Cc: linux-doc@vger.kernel.org Cc: linux-ia64@vger.kernel.org Cc: linux-kselftest@vger.kernel.org Cc: linux-pm@vger.kernel.org Cc: llvm@lists.linux.dev Link: https://lore.kernel.org/r/20230912060801.95533-6-bgray@linux.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/pmu-events/jevents.py | 2 +- tools/perf/scripts/python/arm-cs-trace-disasm.py | 4 ++-- tools/perf/scripts/python/compaction-times.py | 2 +- tools/perf/scripts/python/exported-sql-viewer.py | 4 ++-- 4 files changed, 6 insertions(+), 6 deletions(-) (limited to 'tools') diff --git a/tools/perf/pmu-events/jevents.py b/tools/perf/pmu-events/jevents.py index 3c091ab75305..0093c998cb6e 100755 --- a/tools/perf/pmu-events/jevents.py +++ b/tools/perf/pmu-events/jevents.py @@ -83,7 +83,7 @@ def c_len(s: str) -> int: """Return the length of s a C string This doesn't handle all escape characters properly. It first assumes - all \ are for escaping, it then adjusts as it will have over counted + all \\ are for escaping, it then adjusts as it will have over counted \\. The code uses \000 rather than \0 as a terminator as an adjacent number would be folded into a string of \0 (ie. "\0" + "5" doesn't equal a terminator followed by the number 5 but the escape of diff --git a/tools/perf/scripts/python/arm-cs-trace-disasm.py b/tools/perf/scripts/python/arm-cs-trace-disasm.py index d59ff53f1d94..de58991c78bb 100755 --- a/tools/perf/scripts/python/arm-cs-trace-disasm.py +++ b/tools/perf/scripts/python/arm-cs-trace-disasm.py @@ -45,8 +45,8 @@ parser = OptionParser(option_list=option_list) # Initialize global dicts and regular expression disasm_cache = dict() cpu_data = dict() -disasm_re = re.compile("^\s*([0-9a-fA-F]+):") -disasm_func_re = re.compile("^\s*([0-9a-fA-F]+)\s.*:") +disasm_re = re.compile(r"^\s*([0-9a-fA-F]+):") +disasm_func_re = re.compile(r"^\s*([0-9a-fA-F]+)\s.*:") cache_size = 64*1024 glb_source_file_name = None diff --git a/tools/perf/scripts/python/compaction-times.py b/tools/perf/scripts/python/compaction-times.py index 2560a042dc6f..9401f7c14747 100644 --- a/tools/perf/scripts/python/compaction-times.py +++ b/tools/perf/scripts/python/compaction-times.py @@ -260,7 +260,7 @@ def pr_help(): comm_re = None pid_re = None -pid_regex = "^(\d*)-(\d*)$|^(\d*)$" +pid_regex = r"^(\d*)-(\d*)$|^(\d*)$" opt_proc = popt.DISP_DFL opt_disp = topt.DISP_ALL diff --git a/tools/perf/scripts/python/exported-sql-viewer.py b/tools/perf/scripts/python/exported-sql-viewer.py index 13f2d8a81610..121cf61ba1b3 100755 --- a/tools/perf/scripts/python/exported-sql-viewer.py +++ b/tools/perf/scripts/python/exported-sql-viewer.py @@ -677,8 +677,8 @@ class CallGraphModelBase(TreeModel): # sqlite supports GLOB (text only) which uses * and ? and is case sensitive if not self.glb.dbref.is_sqlite3: # Escape % and _ - s = value.replace("%", "\%") - s = s.replace("_", "\_") + s = value.replace("%", "\\%") + s = s.replace("_", "\\_") # Translate * and ? into SQL LIKE pattern characters % and _ trans = string.maketrans("*?", "%_") match = " LIKE '" + str(s).translate(trans) + "'" -- cgit v1.2.3 From b8d78cb2e24d92352878a9f6525aec002c891528 Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Thu, 23 Nov 2023 02:04:39 +0200 Subject: libbpf: Start v1.4 development cycle Bump libbpf.map to v1.4.0 to start a new libbpf version cycle. Signed-off-by: Eduard Zingerman Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20231123000439.12025-1-eddyz87@gmail.com --- tools/lib/bpf/libbpf.map | 3 +++ tools/lib/bpf/libbpf_version.h | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map index b52dc28dc8af..91c5aef7dae7 100644 --- a/tools/lib/bpf/libbpf.map +++ b/tools/lib/bpf/libbpf.map @@ -409,3 +409,6 @@ LIBBPF_1.3.0 { ring__size; ring_buffer__ring; } LIBBPF_1.2.0; + +LIBBPF_1.4.0 { +} LIBBPF_1.3.0; diff --git a/tools/lib/bpf/libbpf_version.h b/tools/lib/bpf/libbpf_version.h index 290411ddb39e..e783a47da815 100644 --- a/tools/lib/bpf/libbpf_version.h +++ b/tools/lib/bpf/libbpf_version.h @@ -4,6 +4,6 @@ #define __LIBBPF_VERSION_H #define LIBBPF_MAJOR_VERSION 1 -#define LIBBPF_MINOR_VERSION 3 +#define LIBBPF_MINOR_VERSION 4 #endif /* __LIBBPF_VERSION_H */ -- cgit v1.2.3 From f061c9f7d058ffc32de66f2efb3e1c368e305423 Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Tue, 21 Nov 2023 03:48:31 -0800 Subject: Documentation: Document each netlink family This is a simple script that parses the Netlink YAML spec files (Documentation/netlink/specs/), and generates RST files to be rendered in the Network -> Netlink Specification documentation page. Create a python script that is invoked during 'make htmldocs', reads the YAML specs input file and generate the correspondent RST file. Create a new Documentation/networking/netlink_spec index page, and reference each Netlink RST file that was processed above in this main index.rst file. In case of any exception during the parsing, dump the error and skip the file. Do not regenerate the RST files if the input files (YAML) were not changed in-between invocations. Suggested-by: Jakub Kicinski Signed-off-by: Breno Leitao ---- Changelog: V3: * Do not regenerate the RST files if the input files were not changed. In order to do it, a few things changed: - Rely on Makefile more to find what changed, and trigger individual file processing - The script parses file by file now (instead of batches) - Create a new option to generate the index file V2: * Moved the logic from a sphinx extension to a external script * Adjust some formatting as suggested by Donald Hunter and Jakub * Auto generating all the rsts instead of having stubs * Handling error gracefully Reviewed-by: Jakub Kicinski Signed-off-by: David S. Miller --- Documentation/Makefile | 16 +- Documentation/networking/index.rst | 1 + Documentation/networking/netlink_spec/.gitignore | 1 + Documentation/networking/netlink_spec/readme.txt | 4 + tools/net/ynl/ynl-gen-rst.py | 388 +++++++++++++++++++++++ 5 files changed, 409 insertions(+), 1 deletion(-) create mode 100644 Documentation/networking/netlink_spec/.gitignore create mode 100644 Documentation/networking/netlink_spec/readme.txt create mode 100755 tools/net/ynl/ynl-gen-rst.py (limited to 'tools') diff --git a/Documentation/Makefile b/Documentation/Makefile index 2f35793acd2a..5c156fbb6cdf 100644 --- a/Documentation/Makefile +++ b/Documentation/Makefile @@ -97,7 +97,21 @@ quiet_cmd_sphinx = SPHINX $@ --> file://$(abspath $(BUILDDIR)/$3/$4) cp $(if $(patsubst /%,,$(DOCS_CSS)),$(abspath $(srctree)/$(DOCS_CSS)),$(DOCS_CSS)) $(BUILDDIR)/$3/_static/; \ fi -htmldocs: +YNL_INDEX:=$(srctree)/Documentation/networking/netlink_spec/index.rst +YNL_RST_DIR:=$(srctree)/Documentation/networking/netlink_spec +YNL_YAML_DIR:=$(srctree)/Documentation/netlink/specs +YNL_TOOL:=$(srctree)/tools/net/ynl/ynl-gen-rst.py + +YNL_RST_FILES_TMP := $(patsubst %.yaml,%.rst,$(wildcard $(YNL_YAML_DIR)/*.yaml)) +YNL_RST_FILES := $(patsubst $(YNL_YAML_DIR)%,$(YNL_RST_DIR)%, $(YNL_RST_FILES_TMP)) + +$(YNL_INDEX): $(YNL_RST_FILES) + @$(YNL_TOOL) -o $@ -x + +$(YNL_RST_DIR)/%.rst: $(YNL_YAML_DIR)/%.yaml + @$(YNL_TOOL) -i $< -o $@ + +htmldocs: $(YNL_INDEX) @$(srctree)/scripts/sphinx-pre-install --version-check @+$(foreach var,$(SPHINXDIRS),$(call loop_cmd,sphinx,html,$(var),,$(var))) diff --git a/Documentation/networking/index.rst b/Documentation/networking/index.rst index 683eb42309cc..cb435c141794 100644 --- a/Documentation/networking/index.rst +++ b/Documentation/networking/index.rst @@ -55,6 +55,7 @@ Contents: filter generic-hdlc generic_netlink + netlink_spec/index gen_stats gtp ila diff --git a/Documentation/networking/netlink_spec/.gitignore b/Documentation/networking/netlink_spec/.gitignore new file mode 100644 index 000000000000..30d85567b592 --- /dev/null +++ b/Documentation/networking/netlink_spec/.gitignore @@ -0,0 +1 @@ +*.rst diff --git a/Documentation/networking/netlink_spec/readme.txt b/Documentation/networking/netlink_spec/readme.txt new file mode 100644 index 000000000000..6763f99d216c --- /dev/null +++ b/Documentation/networking/netlink_spec/readme.txt @@ -0,0 +1,4 @@ +SPDX-License-Identifier: GPL-2.0 + +This file is populated during the build of the documentation (htmldocs) by the +tools/net/ynl/ynl-gen-rst.py script. diff --git a/tools/net/ynl/ynl-gen-rst.py b/tools/net/ynl/ynl-gen-rst.py new file mode 100755 index 000000000000..b6292109e236 --- /dev/null +++ b/tools/net/ynl/ynl-gen-rst.py @@ -0,0 +1,388 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 +# -*- coding: utf-8; mode: python -*- + +""" + Script to auto generate the documentation for Netlink specifications. + + :copyright: Copyright (C) 2023 Breno Leitao + :license: GPL Version 2, June 1991 see linux/COPYING for details. + + This script performs extensive parsing to the Linux kernel's netlink YAML + spec files, in an effort to avoid needing to heavily mark up the original + YAML file. + + This code is split in three big parts: + 1) RST formatters: Use to convert a string to a RST output + 2) Parser helpers: Functions to parse the YAML data structure + 3) Main function and small helpers +""" + +from typing import Any, Dict, List +import os.path +import sys +import argparse +import logging +import yaml + + +SPACE_PER_LEVEL = 4 + + +# RST Formatters +# ============== +def headroom(level: int) -> str: + """Return space to format""" + return " " * (level * SPACE_PER_LEVEL) + + +def bold(text: str) -> str: + """Format bold text""" + return f"**{text}**" + + +def inline(text: str) -> str: + """Format inline text""" + return f"``{text}``" + + +def sanitize(text: str) -> str: + """Remove newlines and multiple spaces""" + # This is useful for some fields that are spread across multiple lines + return str(text).replace("\n", "").strip() + + +def rst_fields(key: str, value: str, level: int = 0) -> str: + """Return a RST formatted field""" + return headroom(level) + f":{key}: {value}" + + +def rst_definition(key: str, value: Any, level: int = 0) -> str: + """Format a single rst definition""" + return headroom(level) + key + "\n" + headroom(level + 1) + str(value) + + +def rst_paragraph(paragraph: str, level: int = 0) -> str: + """Return a formatted paragraph""" + return headroom(level) + paragraph + + +def rst_bullet(item: str, level: int = 0) -> str: + """Return a formatted a bullet""" + return headroom(level) + f" - {item}" + + +def rst_subsection(title: str) -> str: + """Add a sub-section to the document""" + return f"{title}\n" + "-" * len(title) + + +def rst_subsubsection(title: str) -> str: + """Add a sub-sub-section to the document""" + return f"{title}\n" + "~" * len(title) + + +def rst_section(title: str) -> str: + """Add a section to the document""" + return f"\n{title}\n" + "=" * len(title) + + +def rst_subtitle(title: str) -> str: + """Add a subtitle to the document""" + return "\n" + "-" * len(title) + f"\n{title}\n" + "-" * len(title) + "\n\n" + + +def rst_title(title: str) -> str: + """Add a title to the document""" + return "=" * len(title) + f"\n{title}\n" + "=" * len(title) + "\n\n" + + +def rst_list_inline(list_: List[str], level: int = 0) -> str: + """Format a list using inlines""" + return headroom(level) + "[" + ", ".join(inline(i) for i in list_) + "]" + + +def rst_header() -> str: + """The headers for all the auto generated RST files""" + lines = [] + + lines.append(rst_paragraph(".. SPDX-License-Identifier: GPL-2.0")) + lines.append(rst_paragraph(".. NOTE: This document was auto-generated.\n\n")) + + return "\n".join(lines) + + +def rst_toctree(maxdepth: int = 2) -> str: + """Generate a toctree RST primitive""" + lines = [] + + lines.append(".. toctree::") + lines.append(f" :maxdepth: {maxdepth}\n\n") + + return "\n".join(lines) + + +# Parsers +# ======= + + +def parse_mcast_group(mcast_group: List[Dict[str, Any]]) -> str: + """Parse 'multicast' group list and return a formatted string""" + lines = [] + for group in mcast_group: + lines.append(rst_bullet(group["name"])) + + return "\n".join(lines) + + +def parse_do(do_dict: Dict[str, Any], level: int = 0) -> str: + """Parse 'do' section and return a formatted string""" + lines = [] + for key in do_dict.keys(): + lines.append(rst_paragraph(bold(key), level + 1)) + lines.append(parse_do_attributes(do_dict[key], level + 1) + "\n") + + return "\n".join(lines) + + +def parse_do_attributes(attrs: Dict[str, Any], level: int = 0) -> str: + """Parse 'attributes' section""" + if "attributes" not in attrs: + return "" + lines = [rst_fields("attributes", rst_list_inline(attrs["attributes"]), level + 1)] + + return "\n".join(lines) + + +def parse_operations(operations: List[Dict[str, Any]]) -> str: + """Parse operations block""" + preprocessed = ["name", "doc", "title", "do", "dump"] + lines = [] + + for operation in operations: + lines.append(rst_section(operation["name"])) + lines.append(rst_paragraph(sanitize(operation["doc"])) + "\n") + + for key in operation.keys(): + if key in preprocessed: + # Skip the special fields + continue + lines.append(rst_fields(key, operation[key], 0)) + + if "do" in operation: + lines.append(rst_paragraph(":do:", 0)) + lines.append(parse_do(operation["do"], 0)) + if "dump" in operation: + lines.append(rst_paragraph(":dump:", 0)) + lines.append(parse_do(operation["dump"], 0)) + + # New line after fields + lines.append("\n") + + return "\n".join(lines) + + +def parse_entries(entries: List[Dict[str, Any]], level: int) -> str: + """Parse a list of entries""" + lines = [] + for entry in entries: + if isinstance(entry, dict): + # entries could be a list or a dictionary + lines.append( + rst_fields(entry.get("name", ""), sanitize(entry.get("doc", "")), level) + ) + elif isinstance(entry, list): + lines.append(rst_list_inline(entry, level)) + else: + lines.append(rst_bullet(inline(sanitize(entry)), level)) + + lines.append("\n") + return "\n".join(lines) + + +def parse_definitions(defs: Dict[str, Any]) -> str: + """Parse definitions section""" + preprocessed = ["name", "entries", "members"] + ignored = ["render-max"] # This is not printed + lines = [] + + for definition in defs: + lines.append(rst_section(definition["name"])) + for k in definition.keys(): + if k in preprocessed + ignored: + continue + lines.append(rst_fields(k, sanitize(definition[k]), 0)) + + # Field list needs to finish with a new line + lines.append("\n") + if "entries" in definition: + lines.append(rst_paragraph(":entries:", 0)) + lines.append(parse_entries(definition["entries"], 1)) + if "members" in definition: + lines.append(rst_paragraph(":members:", 0)) + lines.append(parse_entries(definition["members"], 1)) + + return "\n".join(lines) + + +def parse_attr_sets(entries: List[Dict[str, Any]]) -> str: + """Parse attribute from attribute-set""" + preprocessed = ["name", "type"] + ignored = ["checks"] + lines = [] + + for entry in entries: + lines.append(rst_section(entry["name"])) + for attr in entry["attributes"]: + type_ = attr.get("type") + attr_line = bold(attr["name"]) + if type_: + # Add the attribute type in the same line + attr_line += f" ({inline(type_)})" + + lines.append(rst_subsubsection(attr_line)) + + for k in attr.keys(): + if k in preprocessed + ignored: + continue + lines.append(rst_fields(k, sanitize(attr[k]), 2)) + lines.append("\n") + + return "\n".join(lines) + + +def parse_yaml(obj: Dict[str, Any]) -> str: + """Format the whole YAML into a RST string""" + lines = [] + + # Main header + + lines.append(rst_header()) + + title = f"Family ``{obj['name']}`` netlink specification" + lines.append(rst_title(title)) + lines.append(rst_paragraph(".. contents::\n")) + + if "doc" in obj: + lines.append(rst_subtitle("Summary")) + lines.append(rst_paragraph(obj["doc"], 0)) + + # Operations + if "operations" in obj: + lines.append(rst_subtitle("Operations")) + lines.append(parse_operations(obj["operations"]["list"])) + + # Multicast groups + if "mcast-groups" in obj: + lines.append(rst_subtitle("Multicast groups")) + lines.append(parse_mcast_group(obj["mcast-groups"]["list"])) + + # Definitions + if "definitions" in obj: + lines.append(rst_subtitle("Definitions")) + lines.append(parse_definitions(obj["definitions"])) + + # Attributes set + if "attribute-sets" in obj: + lines.append(rst_subtitle("Attribute sets")) + lines.append(parse_attr_sets(obj["attribute-sets"])) + + return "\n".join(lines) + + +# Main functions +# ============== + + +def parse_arguments() -> argparse.Namespace: + """Parse arguments from user""" + parser = argparse.ArgumentParser(description="Netlink RST generator") + + parser.add_argument("-v", "--verbose", action="store_true") + parser.add_argument("-o", "--output", help="Output file name") + + # Index and input are mutually exclusive + group = parser.add_mutually_exclusive_group() + group.add_argument( + "-x", "--index", action="store_true", help="Generate the index page" + ) + group.add_argument("-i", "--input", help="YAML file name") + + args = parser.parse_args() + + if args.verbose: + logging.basicConfig(level=logging.DEBUG) + + if args.input and not os.path.isfile(args.input): + logging.warning("%s is not a valid file.", args.input) + sys.exit(-1) + + if not args.output: + logging.error("No output file specified.") + sys.exit(-1) + + if os.path.isfile(args.output): + logging.debug("%s already exists. Overwriting it.", args.output) + + return args + + +def parse_yaml_file(filename: str) -> str: + """Transform the YAML specified by filename into a rst-formmated string""" + with open(filename, "r", encoding="utf-8") as spec_file: + yaml_data = yaml.safe_load(spec_file) + content = parse_yaml(yaml_data) + + return content + + +def write_to_rstfile(content: str, filename: str) -> None: + """Write the generated content into an RST file""" + logging.debug("Saving RST file to %s", filename) + + with open(filename, "w", encoding="utf-8") as rst_file: + rst_file.write(content) + + +def generate_main_index_rst(output: str) -> None: + """Generate the `networking_spec/index` content and write to the file""" + lines = [] + + lines.append(rst_header()) + lines.append(rst_title("Netlink Specification")) + lines.append(rst_toctree(1)) + + index_dir = os.path.dirname(output) + logging.debug("Looking for .rst files in %s", index_dir) + for filename in os.listdir(index_dir): + if not filename.endswith(".rst") or filename == "index.rst": + continue + lines.append(f" {filename.replace('.rst', '')}\n") + + logging.debug("Writing an index file at %s", output) + write_to_rstfile("".join(lines), output) + + +def main() -> None: + """Main function that reads the YAML files and generates the RST files""" + + args = parse_arguments() + + if args.input: + logging.debug("Parsing %s", args.input) + try: + content = parse_yaml_file(os.path.join(args.input)) + except Exception as exception: + logging.warning("Failed to parse %s.", args.input) + logging.warning(exception) + sys.exit(-1) + + write_to_rstfile(content, args.output) + + if args.index: + # Generate the index RST file + generate_main_index_rst(args.output) + + +if __name__ == "__main__": + main() -- cgit v1.2.3 From 2afae08c9dcb8ac648414277cec70c2fe6a34d9e Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Thu, 23 Nov 2023 19:59:36 -0800 Subject: bpf: Validate global subprogs lazily Slightly change BPF verifier logic around eagerness and order of global subprog validation. Instead of going over every global subprog eagerly and validating it before main (entry) BPF program is verified, turn it around. Validate main program first, mark subprogs that were called from main program for later verification, but otherwise assume it is valid. Afterwards, go over marked global subprogs and validate those, potentially marking some more global functions as being called. Continue this process until all (transitively) callable global subprogs are validated. It's a BFS traversal at its heart and will always converge. This is an important change because it allows to feature-gate some subprograms that might not be verifiable on some older kernel, depending on supported set of features. E.g., at some point, global functions were allowed to accept a pointer to memory, which size is identified by user-provided type. Unfortunately, older kernels don't support this feature. With BPF CO-RE approach, the natural way would be to still compile BPF object file once and guard calls to this global subprog with some CO-RE check or using .rodata variables. That's what people do to guard usage of new helpers or kfuncs, and any other new BPF-side feature that might be missing on old kernels. That's currently impossible to do with global subprogs, unfortunately, because they are eagerly and unconditionally validated. This patch set aims to change this, so that in the future when global funcs gain new features, those can be guarded using BPF CO-RE techniques in the same fashion as any other new kernel feature. Two selftests had to be adjusted in sync with these changes. test_global_func12 relied on eager global subprog validation failing before main program failure is detected (unknown return value). Fix by making sure that main program is always valid. verifier_subprog_precision's parent_stack_slot_precise subtest relied on verifier checkpointing heuristic to do a checkpoint at instruction #5, but that's no longer true because we don't have enough jumps validated before reaching insn #5 due to global subprogs being validated later. Other than that, no changes, as one would expect. Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Acked-by: Eduard Zingerman Acked-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20231124035937.403208-3-andrii@kernel.org --- include/linux/bpf.h | 2 + kernel/bpf/verifier.c | 48 +++++++++++++++++++--- .../selftests/bpf/progs/test_global_func12.c | 4 +- .../bpf/progs/verifier_subprog_precision.c | 4 +- 4 files changed, 48 insertions(+), 10 deletions(-) (limited to 'tools') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 258ba232e302..eb447b0a9423 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1347,6 +1347,8 @@ static inline bool bpf_prog_has_trampoline(const struct bpf_prog *prog) struct bpf_func_info_aux { u16 linkage; bool unreliable; + bool called : 1; + bool verified : 1; }; enum bpf_jit_poke_reason { diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index a2939ebf2638..8e7b6072e3f4 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -434,6 +434,11 @@ static const char *subprog_name(const struct bpf_verifier_env *env, int subprog) return btf_type_name(env->prog->aux->btf, info->type_id); } +static struct bpf_func_info_aux *subprog_aux(const struct bpf_verifier_env *env, int subprog) +{ + return &env->prog->aux->func_info_aux[subprog]; +} + static bool reg_may_point_to_spin_lock(const struct bpf_reg_state *reg) { return btf_record_has_field(reg_btf_record(reg), BPF_SPIN_LOCK); @@ -9290,6 +9295,8 @@ static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn, verbose(env, "Func#%d ('%s') is global and assumed valid.\n", subprog, sub_name); + /* mark global subprog for verifying after main prog */ + subprog_aux(env, subprog)->called = true; clear_caller_saved_regs(env, caller->regs); /* All global functions return a 64-bit SCALAR_VALUE */ @@ -19873,8 +19880,11 @@ out: return ret; } -/* Verify all global functions in a BPF program one by one based on their BTF. - * All global functions must pass verification. Otherwise the whole program is rejected. +/* Lazily verify all global functions based on their BTF, if they are called + * from main BPF program or any of subprograms transitively. + * BPF global subprogs called from dead code are not validated. + * All callable global functions must pass verification. + * Otherwise the whole program is rejected. * Consider: * int bar(int); * int foo(int f) @@ -19893,14 +19903,26 @@ out: static int do_check_subprogs(struct bpf_verifier_env *env) { struct bpf_prog_aux *aux = env->prog->aux; - int i, ret; + struct bpf_func_info_aux *sub_aux; + int i, ret, new_cnt; if (!aux->func_info) return 0; + /* exception callback is presumed to be always called */ + if (env->exception_callback_subprog) + subprog_aux(env, env->exception_callback_subprog)->called = true; + +again: + new_cnt = 0; for (i = 1; i < env->subprog_cnt; i++) { - if (aux->func_info_aux[i].linkage != BTF_FUNC_GLOBAL) + if (!subprog_is_global(env, i)) + continue; + + sub_aux = subprog_aux(env, i); + if (!sub_aux->called || sub_aux->verified) continue; + env->insn_idx = env->subprog_info[i].start; WARN_ON_ONCE(env->insn_idx == 0); ret = do_check_common(env, i, env->exception_callback_subprog == i); @@ -19910,7 +19932,21 @@ static int do_check_subprogs(struct bpf_verifier_env *env) verbose(env, "Func#%d ('%s') is safe for any args that match its prototype\n", i, subprog_name(env, i)); } + + /* We verified new global subprog, it might have called some + * more global subprogs that we haven't verified yet, so we + * need to do another pass over subprogs to verify those. + */ + sub_aux->verified = true; + new_cnt++; } + + /* We can't loop forever as we verify at least one global subprog on + * each pass. + */ + if (new_cnt) + goto again; + return 0; } @@ -20556,8 +20592,8 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr, __u3 if (ret < 0) goto skip_full_check; - ret = do_check_subprogs(env); - ret = ret ?: do_check_main(env); + ret = do_check_main(env); + ret = ret ?: do_check_subprogs(env); if (ret == 0 && bpf_prog_is_offloaded(env->prog->aux)) ret = bpf_prog_offload_finalize(env); diff --git a/tools/testing/selftests/bpf/progs/test_global_func12.c b/tools/testing/selftests/bpf/progs/test_global_func12.c index 7f159d83c6f6..6e03d42519a6 100644 --- a/tools/testing/selftests/bpf/progs/test_global_func12.c +++ b/tools/testing/selftests/bpf/progs/test_global_func12.c @@ -19,5 +19,7 @@ int global_func12(struct __sk_buff *skb) { const struct S s = {.x = skb->len }; - return foo(&s); + foo(&s); + + return 1; } diff --git a/tools/testing/selftests/bpf/progs/verifier_subprog_precision.c b/tools/testing/selftests/bpf/progs/verifier_subprog_precision.c index f61d623b1ce8..b5efcaeaa1ae 100644 --- a/tools/testing/selftests/bpf/progs/verifier_subprog_precision.c +++ b/tools/testing/selftests/bpf/progs/verifier_subprog_precision.c @@ -370,12 +370,10 @@ __naked int parent_stack_slot_precise(void) SEC("?raw_tp") __success __log_level(2) __msg("9: (0f) r1 += r6") -__msg("mark_precise: frame0: last_idx 9 first_idx 6") +__msg("mark_precise: frame0: last_idx 9 first_idx 0") __msg("mark_precise: frame0: regs=r6 stack= before 8: (bf) r1 = r7") __msg("mark_precise: frame0: regs=r6 stack= before 7: (27) r6 *= 4") __msg("mark_precise: frame0: regs=r6 stack= before 6: (79) r6 = *(u64 *)(r10 -8)") -__msg("mark_precise: frame0: parent state regs= stack=-8:") -__msg("mark_precise: frame0: last_idx 5 first_idx 0") __msg("mark_precise: frame0: regs= stack=-8 before 5: (85) call pc+6") __msg("mark_precise: frame0: regs= stack=-8 before 4: (b7) r1 = 0") __msg("mark_precise: frame0: regs= stack=-8 before 3: (7b) *(u64 *)(r10 -8) = r6") -- cgit v1.2.3 From e8a339b5235e294f29153149ea7cf26a9a87dbea Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Thu, 23 Nov 2023 19:59:37 -0800 Subject: selftests/bpf: Add lazy global subprog validation tests Add a few test that validate BPF verifier's lazy approach to validating global subprogs. We check that global subprogs that are called transitively through another global subprog is validated. We also check that invalid global subprog is not validated, if it's not called from the main program. And we also check that main program is always validated first, before any of the subprogs. Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Acked-by: Eduard Zingerman Acked-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20231124035937.403208-4-andrii@kernel.org --- tools/testing/selftests/bpf/prog_tests/verifier.c | 2 + .../selftests/bpf/progs/verifier_global_subprogs.c | 92 ++++++++++++++++++++++ 2 files changed, 94 insertions(+) create mode 100644 tools/testing/selftests/bpf/progs/verifier_global_subprogs.c (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/verifier.c b/tools/testing/selftests/bpf/prog_tests/verifier.c index 5cfa7a6316b6..8d746642cbd7 100644 --- a/tools/testing/selftests/bpf/prog_tests/verifier.c +++ b/tools/testing/selftests/bpf/prog_tests/verifier.c @@ -25,6 +25,7 @@ #include "verifier_direct_stack_access_wraparound.skel.h" #include "verifier_div0.skel.h" #include "verifier_div_overflow.skel.h" +#include "verifier_global_subprogs.skel.h" #include "verifier_gotol.skel.h" #include "verifier_helper_access_var_len.skel.h" #include "verifier_helper_packet_access.skel.h" @@ -134,6 +135,7 @@ void test_verifier_direct_packet_access(void) { RUN(verifier_direct_packet_acces void test_verifier_direct_stack_access_wraparound(void) { RUN(verifier_direct_stack_access_wraparound); } void test_verifier_div0(void) { RUN(verifier_div0); } void test_verifier_div_overflow(void) { RUN(verifier_div_overflow); } +void test_verifier_global_subprogs(void) { RUN(verifier_global_subprogs); } void test_verifier_gotol(void) { RUN(verifier_gotol); } void test_verifier_helper_access_var_len(void) { RUN(verifier_helper_access_var_len); } void test_verifier_helper_packet_access(void) { RUN(verifier_helper_packet_access); } diff --git a/tools/testing/selftests/bpf/progs/verifier_global_subprogs.c b/tools/testing/selftests/bpf/progs/verifier_global_subprogs.c new file mode 100644 index 000000000000..a0a5efd1caa1 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/verifier_global_subprogs.c @@ -0,0 +1,92 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */ + +#include +#include +#include +#include +#include +#include "bpf_misc.h" + +int arr[1]; +int unkn_idx; + +__noinline long global_bad(void) +{ + return arr[unkn_idx]; /* BOOM */ +} + +__noinline long global_good(void) +{ + return arr[0]; +} + +__noinline long global_calls_bad(void) +{ + return global_good() + global_bad() /* does BOOM indirectly */; +} + +__noinline long global_calls_good_only(void) +{ + return global_good(); +} + +SEC("?raw_tp") +__success __log_level(2) +/* main prog is validated completely first */ +__msg("('global_calls_good_only') is global and assumed valid.") +__msg("1: (95) exit") +/* eventually global_good() is transitively validated as well */ +__msg("Validating global_good() func") +__msg("('global_good') is safe for any args that match its prototype") +int chained_global_func_calls_success(void) +{ + return global_calls_good_only(); +} + +SEC("?raw_tp") +__failure __log_level(2) +/* main prog validated successfully first */ +__msg("1: (95) exit") +/* eventually we validate global_bad() and fail */ +__msg("Validating global_bad() func") +__msg("math between map_value pointer and register") /* BOOM */ +int chained_global_func_calls_bad(void) +{ + return global_calls_bad(); +} + +/* do out of bounds access forcing verifier to fail verification if this + * global func is called + */ +__noinline int global_unsupp(const int *mem) +{ + if (!mem) + return 0; + return mem[100]; /* BOOM */ +} + +const volatile bool skip_unsupp_global = true; + +SEC("?raw_tp") +__success +int guarded_unsupp_global_called(void) +{ + if (!skip_unsupp_global) + return global_unsupp(NULL); + return 0; +} + +SEC("?raw_tp") +__failure __log_level(2) +__msg("Func#1 ('global_unsupp') is global and assumed valid.") +__msg("Validating global_unsupp() func#1...") +__msg("value is outside of the allowed memory range") +int unguarded_unsupp_global_called(void) +{ + int x = 0; + + return global_unsupp(&x); +} + +char _license[] SEC("license") = "GPL"; -- cgit v1.2.3 From 8e3707975e04e432f132955652fc77e9ff82f31d Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 22 Nov 2023 09:33:23 -0800 Subject: tools: ynl-gen: always append ULL/LL to range types 32bit builds generate the following warning when we use a u32-max in range validation: warning: decimal constant 4294967295 is between LONG_MAX and ULONG_MAX. For C99 that means long long, C90 compilers are very likely to produce unsigned long (and a warning) here The range values are u64, slap ULL/LL on all of them just to avoid such noise. There's currently no code using full range validation, but it will matter in the upcoming page-pool introspection. Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- tools/net/ynl/ynl-gen-c.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/net/ynl/ynl-gen-c.py b/tools/net/ynl/ynl-gen-c.py index 3bd6b928c14f..fe5ca7fbe011 100755 --- a/tools/net/ynl/ynl-gen-c.py +++ b/tools/net/ynl/ynl-gen-c.py @@ -2070,12 +2070,13 @@ def print_kernel_policy_ranges(family, cw): first = False sign = '' if attr.type[0] == 'u' else '_signed' + suffix = 'ULL' if attr.type[0] == 'u' else 'LL' cw.block_start(line=f'static const struct netlink_range_validation{sign} {c_lower(attr.enum_name)}_range =') members = [] if 'min' in attr.checks: - members.append(('min', attr.get_limit('min'))) + members.append(('min', str(attr.get_limit('min')) + suffix)) if 'max' in attr.checks: - members.append(('max', attr.get_limit('max'))) + members.append(('max', str(attr.get_limit('max')) + suffix)) cw.write_struct_init(members) cw.block_end(line=';') cw.nl() -- cgit v1.2.3 From 19ed9b3d7a77c6ac3927fe05f4eacfa056b43a48 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 22 Nov 2023 19:08:44 -0800 Subject: tools: ynl-get: use family c-name If a new family is ever added with a dash in the name the C codegen will break. Make sure we use the "safe" form of the name consistently. Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- tools/net/ynl/ynl-gen-c.py | 46 +++++++++++++++++++++++----------------------- 1 file changed, 23 insertions(+), 23 deletions(-) (limited to 'tools') diff --git a/tools/net/ynl/ynl-gen-c.py b/tools/net/ynl/ynl-gen-c.py index fe5ca7fbe011..b3438e96fde6 100755 --- a/tools/net/ynl/ynl-gen-c.py +++ b/tools/net/ynl/ynl-gen-c.py @@ -67,9 +67,9 @@ class Type(SpecAttr): if 'nested-attributes' in attr: self.nested_attrs = attr['nested-attributes'] if self.nested_attrs == family.name: - self.nested_render_name = f"{family.name}" + self.nested_render_name = c_lower(f"{family.name}") else: - self.nested_render_name = f"{family.name}_{c_lower(self.nested_attrs)}" + self.nested_render_name = c_lower(f"{family.name}_{self.nested_attrs}") if self.nested_attrs in self.family.consts: self.nested_struct_type = 'struct ' + self.nested_render_name + '_' @@ -335,7 +335,7 @@ class TypeScalar(Type): maybe_enum = not self.is_bitfield and 'enum' in self.attr if maybe_enum and self.family.consts[self.attr['enum']].enum_name: - self.type_name = f"enum {self.family.name}_{c_lower(self.attr['enum'])}" + self.type_name = c_lower(f"enum {self.family.name}_{self.attr['enum']}") elif self.is_auto_scalar: self.type_name = '__' + self.type[0] + '64' else: @@ -685,9 +685,9 @@ class Struct: self.nested = type_list is None if family.name == c_lower(space_name): - self.render_name = f"{family.name}" + self.render_name = c_lower(family.name) else: - self.render_name = f"{family.name}_{c_lower(space_name)}" + self.render_name = c_lower(family.name + '-' + space_name) self.struct_name = 'struct ' + self.render_name if self.nested and space_name in family.consts: self.struct_name += '_' @@ -841,7 +841,7 @@ class Operation(SpecOperation): def __init__(self, family, yaml, req_value, rsp_value): super().__init__(family, yaml, req_value, rsp_value) - self.render_name = family.name + '_' + c_lower(self.name) + self.render_name = c_lower(family.name + '_' + self.name) self.dual_policy = ('do' in yaml and 'request' in yaml['do']) and \ ('dump' in yaml and 'request' in yaml['dump']) @@ -1431,7 +1431,7 @@ def op_prefix(ri, direction, deref=False): suffix += '_rsp' suffix += '_dump' if deref else '_list' - return f"{ri.family['name']}{suffix}" + return f"{ri.family.c_name}{suffix}" def type_name(ri, direction, deref=False): @@ -1497,11 +1497,11 @@ def _put_enum_to_str_helper(cw, render_name, map_name, arg_name, enum=None): def put_op_name_fwd(family, cw): - cw.write_func_prot('const char *', f'{family.name}_op_str', ['int op'], suffix=';') + cw.write_func_prot('const char *', f'{family.c_name}_op_str', ['int op'], suffix=';') def put_op_name(family, cw): - map_name = f'{family.name}_op_strmap' + map_name = f'{family.c_name}_op_strmap' cw.block_start(line=f"static const char * const {map_name}[] =") for op_name, op in family.msgs.items(): if op.rsp_value: @@ -1518,7 +1518,7 @@ def put_op_name(family, cw): cw.block_end(line=';') cw.nl() - _put_enum_to_str_helper(cw, family.name + '_op', map_name, 'op') + _put_enum_to_str_helper(cw, family.c_name + '_op', map_name, 'op') def put_enum_to_str_fwd(family, cw, enum): @@ -1840,7 +1840,7 @@ def _print_type(ri, direction, struct): if ri.op_mode == 'dump': suffix += '_dump' - ri.cw.block_start(line=f"struct {ri.family['name']}{suffix}") + ri.cw.block_start(line=f"struct {ri.family.c_name}{suffix}") meta_started = False for _, attr in struct.member_list(): @@ -2106,7 +2106,7 @@ def print_kernel_op_table_fwd(family, cw, terminate): cnt = len(family.ops) qual = 'static const' if not exported else 'const' - line = f"{qual} struct {struct_type} {family.name}_nl_ops[{cnt}]" + line = f"{qual} struct {struct_type} {family.c_name}_nl_ops[{cnt}]" if terminate: cw.p(f"extern {line};") else: @@ -2249,7 +2249,7 @@ def print_kernel_mcgrp_src(family, cw): if not family.mcgrps['list']: return - cw.block_start('static const struct genl_multicast_group ' + family.name + '_nl_mcgrps[] =') + cw.block_start('static const struct genl_multicast_group ' + family.c_name + '_nl_mcgrps[] =') for grp in family.mcgrps['list']: name = grp['name'] grp_id = c_upper(f"{family.name}-nlgrp-{name}") @@ -2262,7 +2262,7 @@ def print_kernel_family_struct_hdr(family, cw): if not kernel_can_gen_family_struct(family): return - cw.p(f"extern struct genl_family {family.name}_nl_family;") + cw.p(f"extern struct genl_family {family.c_name}_nl_family;") cw.nl() @@ -2277,14 +2277,14 @@ def print_kernel_family_struct_src(family, cw): cw.p('.parallel_ops\t= true,') cw.p('.module\t\t= THIS_MODULE,') if family.kernel_policy == 'per-op': - cw.p(f'.ops\t\t= {family.name}_nl_ops,') - cw.p(f'.n_ops\t\t= ARRAY_SIZE({family.name}_nl_ops),') + cw.p(f'.ops\t\t= {family.c_name}_nl_ops,') + cw.p(f'.n_ops\t\t= ARRAY_SIZE({family.c_name}_nl_ops),') elif family.kernel_policy == 'split': - cw.p(f'.split_ops\t= {family.name}_nl_ops,') - cw.p(f'.n_split_ops\t= ARRAY_SIZE({family.name}_nl_ops),') + cw.p(f'.split_ops\t= {family.c_name}_nl_ops,') + cw.p(f'.n_split_ops\t= ARRAY_SIZE({family.c_name}_nl_ops),') if family.mcgrps['list']: - cw.p(f'.mcgrps\t\t= {family.name}_nl_mcgrps,') - cw.p(f'.n_mcgrps\t= ARRAY_SIZE({family.name}_nl_mcgrps),') + cw.p(f'.mcgrps\t\t= {family.c_name}_nl_mcgrps,') + cw.p(f'.n_mcgrps\t= ARRAY_SIZE({family.c_name}_nl_mcgrps),') cw.block_end(';') @@ -2294,7 +2294,7 @@ def uapi_enum_start(family, cw, obj, ckey='', enum_name='enum-name'): if obj[enum_name]: start_line = 'enum ' + c_lower(obj[enum_name]) elif ckey and ckey in obj: - start_line = 'enum ' + family.name + '_' + c_lower(obj[ckey]) + start_line = 'enum ' + family.c_name + '_' + c_lower(obj[ckey]) cw.block_start(line=start_line) @@ -2478,7 +2478,7 @@ def render_user_family(family, cw, prototype): cw.nl() cw.block_start(f'{symbol} = ') - cw.p(f'.name\t\t= "{family.name}",') + cw.p(f'.name\t\t= "{family.c_name}",') if family.ntfs: cw.p(f".ntf_info\t= {family['name']}_ntf_info,") cw.p(f".ntf_info_size\t= MNL_ARRAY_SIZE({family['name']}_ntf_info),") @@ -2560,7 +2560,7 @@ def main(): render_uapi(parsed, cw) return - hdr_prot = f"_LINUX_{parsed.name.upper()}_GEN_H" + hdr_prot = f"_LINUX_{parsed.c_name.upper()}_GEN_H" if args.header: cw.p('#ifndef ' + hdr_prot) cw.p('#define ' + hdr_prot) -- cgit v1.2.3 From 30c90200153430915a4c4aaaca7437d2592e6ed2 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 22 Nov 2023 19:10:50 -0800 Subject: tools: ynl-gen: use enum name from the spec The enum name used for id-to-str table does not handle the enum-name override in the spec correctly. Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- tools/net/ynl/ynl-gen-c.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) (limited to 'tools') diff --git a/tools/net/ynl/ynl-gen-c.py b/tools/net/ynl/ynl-gen-c.py index b3438e96fde6..cbbda276f6d1 100755 --- a/tools/net/ynl/ynl-gen-c.py +++ b/tools/net/ynl/ynl-gen-c.py @@ -755,11 +755,17 @@ class EnumSet(SpecEnumSet): if 'enum-name' in yaml: if yaml['enum-name']: self.enum_name = 'enum ' + c_lower(yaml['enum-name']) + self.user_type = self.enum_name else: self.enum_name = None else: self.enum_name = 'enum ' + self.render_name + if self.enum_name: + self.user_type = self.enum_name + else: + self.user_type = 'int' + self.value_pfx = yaml.get('name-prefix', f"{family.name}-{yaml['name']}-") super().__init__(family, yaml) @@ -1483,8 +1489,8 @@ def put_typol(cw, struct): def _put_enum_to_str_helper(cw, render_name, map_name, arg_name, enum=None): args = [f'int {arg_name}'] - if enum and not ('enum-name' in enum and not enum['enum-name']): - args = [f'enum {render_name} {arg_name}'] + if enum: + args = [enum.user_type + ' ' + arg_name] cw.write_func_prot('const char *', f'{render_name}_str', args) cw.block_start() if enum and enum.type == 'flags': @@ -1522,9 +1528,7 @@ def put_op_name(family, cw): def put_enum_to_str_fwd(family, cw, enum): - args = [f'enum {enum.render_name} value'] - if 'enum-name' in enum and not enum['enum-name']: - args = ['int value'] + args = [enum.user_type + ' value'] cw.write_func_prot('const char *', f'{enum.render_name}_str', args, suffix=';') -- cgit v1.2.3 From 72b4ca7e993e94f09bcf6d19fc385a2e8060c71f Mon Sep 17 00:00:00 2001 From: Nick Forrington Date: Thu, 2 Nov 2023 16:22:24 +0000 Subject: perf test: Remove atomics from test_loop to avoid test failures The current use of atomics can lead to test failures, as tests (such as tests/shell/record.sh) search for samples with "test_loop" as the top-most stack frame, but find frames related to the atomic operation (e.g. __aarch64_ldadd4_relax). This change simply removes the "count" variable, as it is not necessary. Fixes: 1962ab6f6e0b39e4 ("perf test workload thloop: Make count increments atomic") Reviewed-by: James Clark Signed-off-by: Nick Forrington Acked-by: Leo Yan Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Ian Rogers Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Link: https://lore.kernel.org/r/20231102162225.50028-1-nick.forrington@arm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/workloads/thloop.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/perf/tests/workloads/thloop.c b/tools/perf/tests/workloads/thloop.c index af05269c2eb8..457b29f91c3e 100644 --- a/tools/perf/tests/workloads/thloop.c +++ b/tools/perf/tests/workloads/thloop.c @@ -7,7 +7,6 @@ #include "../tests.h" static volatile sig_atomic_t done; -static volatile unsigned count; /* We want to check this symbol in perf report */ noinline void test_loop(void); @@ -19,8 +18,7 @@ static void sighandler(int sig __maybe_unused) noinline void test_loop(void) { - while (!done) - __atomic_fetch_add(&count, 1, __ATOMIC_RELAXED); + while (!done); } static void *thfunc(void *arg) -- cgit v1.2.3 From b457c526072aa8ab312517010837b06d38b9bb66 Mon Sep 17 00:00:00 2001 From: Paran Lee Date: Tue, 21 Nov 2023 07:32:19 +0900 Subject: perf script python: Fail check on dynamic allocation Add PyList_New() Fail check in get_field_numeric_entry() function and dynamic allocation checking for set_regs_in_dict(), python_start_script(). Reviewed-by: Adrian Hunter Reviewed-by: MichelleJin Signed-off-by: Paran Lee Cc: Alexander Shishkin Cc: Austin Kim Cc: Honggyu Kim Cc: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Li Dong Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Sean Christopherson Link: https://lore.kernel.org/r/20231120223218.9036-1-p4ranlee@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/scripting-engines/trace-event-python.c | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c index 94312741443a..860e1837ba96 100644 --- a/tools/perf/util/scripting-engines/trace-event-python.c +++ b/tools/perf/util/scripting-engines/trace-event-python.c @@ -353,6 +353,8 @@ static PyObject *get_field_numeric_entry(struct tep_event *event, if (is_array) { list = PyList_New(field->arraylen); + if (!list) + Py_FatalError("couldn't create Python list"); item_size = field->size / field->arraylen; n_items = field->arraylen; } else { @@ -754,7 +756,7 @@ static void regs_map(struct regs_dump *regs, uint64_t mask, const char *arch, ch } } -static void set_regs_in_dict(PyObject *dict, +static int set_regs_in_dict(PyObject *dict, struct perf_sample *sample, struct evsel *evsel) { @@ -770,6 +772,8 @@ static void set_regs_in_dict(PyObject *dict, */ int size = __sw_hweight64(attr->sample_regs_intr) * 28; char *bf = malloc(size); + if (!bf) + return -1; regs_map(&sample->intr_regs, attr->sample_regs_intr, arch, bf, size); @@ -781,6 +785,8 @@ static void set_regs_in_dict(PyObject *dict, pydict_set_item_string_decref(dict, "uregs", _PyUnicode_FromString(bf)); free(bf); + + return 0; } static void set_sym_in_dict(PyObject *dict, struct addr_location *al, @@ -920,7 +926,8 @@ static PyObject *get_perf_sample_dict(struct perf_sample *sample, PyLong_FromUnsignedLongLong(sample->cyc_cnt)); } - set_regs_in_dict(dict, sample, evsel); + if (set_regs_in_dict(dict, sample, evsel)) + Py_FatalError("Failed to setting regs in dict"); return dict; } @@ -1918,12 +1925,18 @@ static int python_start_script(const char *script, int argc, const char **argv, scripting_context->session = session; #if PY_MAJOR_VERSION < 3 command_line = malloc((argc + 1) * sizeof(const char *)); + if (!command_line) + return -1; + command_line[0] = script; for (i = 1; i < argc + 1; i++) command_line[i] = argv[i - 1]; PyImport_AppendInittab(name, initperf_trace_context); #else command_line = malloc((argc + 1) * sizeof(wchar_t *)); + if (!command_line) + return -1; + command_line[0] = Py_DecodeLocale(script, NULL); for (i = 1; i < argc + 1; i++) command_line[i] = Py_DecodeLocale(argv[i - 1], NULL); -- cgit v1.2.3 From cd38d6b5fa2dfc3beb7311f0b373e3141620c76b Mon Sep 17 00:00:00 2001 From: zhaimingbing Date: Mon, 20 Nov 2023 19:23:56 +0800 Subject: perf script perl: Fail check on dynamic allocation Return ENOMEM when dynamic allocation failed. Reviewed-by: Ian Rogers Signed-off-by: zhaimingbing Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Ingo Molnar Cc: Jiri Olsa Cc: Li Dong Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Sean Christopherson Link: https://lore.kernel.org/r/20231120112356.8652-1-zhaimingbing@cmss.chinamobile.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/scripting-engines/trace-event-perl.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'tools') diff --git a/tools/perf/util/scripting-engines/trace-event-perl.c b/tools/perf/util/scripting-engines/trace-event-perl.c index 603091317bed..b072ac5d3bc2 100644 --- a/tools/perf/util/scripting-engines/trace-event-perl.c +++ b/tools/perf/util/scripting-engines/trace-event-perl.c @@ -490,6 +490,9 @@ static int perl_start_script(const char *script, int argc, const char **argv, scripting_context->session = session; command_line = malloc((argc + 2) * sizeof(const char *)); + if (!command_line) + return -ENOMEM; + command_line[0] = ""; command_line[1] = script; for (i = 2; i < argc + 2; i++) -- cgit v1.2.3 From 697579629f850d8f863147351e12e74c50114a8a Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Tue, 7 Nov 2023 10:40:20 -0800 Subject: perf test: Basic branch counter support Add a basic test for the branch counter feature. The test verifies that - The new filter can be successfully applied on the supported platforms. - The counter value can be outputted via the perf report -D Signed-off-by: Kan Liang Tested-by: Ian Rogers Cc: Adrian Hunter Cc: Ingo Molnar Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Tinghao Zhang Link: https://lore.kernel.org/r/20231107184020.1497571-1-kan.liang@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/shell/record.sh | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) (limited to 'tools') diff --git a/tools/perf/tests/shell/record.sh b/tools/perf/tests/shell/record.sh index 29443b8e8876..c74412c68e65 100755 --- a/tools/perf/tests/shell/record.sh +++ b/tools/perf/tests/shell/record.sh @@ -12,6 +12,9 @@ err=0 perfdata=$(mktemp /tmp/__perf_test.perf.data.XXXXX) testprog="perf test -w thloop" testsym="test_loop" +cpu_pmu_dir="/sys/bus/event_source/devices/cpu*" +br_cntr_file="/caps/branch_counter_nr" +br_cntr_output="branch stack counters" cleanup() { rm -rf "${perfdata}" @@ -155,10 +158,37 @@ test_workload() { echo "Basic target workload test [Success]" } +test_branch_counter() { + echo "Basic branch counter test" + # Check if the branch counter feature is supported + for dir in $cpu_pmu_dir + do + if [ ! -e "$dir$br_cntr_file" ] + then + echo "branch counter feature not supported on all core PMUs ($dir) [Skipped]" + return + fi + done + if ! perf record -o "${perfdata}" -j any,counter ${testprog} 2> /dev/null + then + echo "Basic branch counter test [Failed record]" + err=1 + return + fi + if ! perf report -i "${perfdata}" -D -q | grep -q "$br_cntr_output" + then + echo "Basic branch record test [Failed missing output]" + err=1 + return + fi + echo "Basic branch counter test [Success]" +} + test_per_thread test_register_capture test_system_wide test_workload +test_branch_counter cleanup exit $err -- cgit v1.2.3 From 2dbba30fd69b604802a9535b74bddb5bcca23793 Mon Sep 17 00:00:00 2001 From: James Clark Date: Fri, 1 Sep 2023 14:37:15 +0100 Subject: perf cs-etm: Bump minimum OpenCSD version to ensure a bugfix is present Since commit d927ef5004ef ("perf cs-etm: Add exception level consistency check"), the exception that was added to Perf will be triggered unless the following bugfix from OpenCSD is present: - _Version 1.2.1_: - __Bugfix__: ETM4x / ETE - output of context elements to client can in some circumstances be delayed until after subsequent atoms have been processed leading to incorrect memory decode access via the client callbacks. Fixed to flush context elements immediately they are committed. Rather than remove the assert and silently fail, just increase the minimum version requirement to avoid hard to debug issues and regressions. Reviewed-by: Ian Rogers Signed-off-by: James Clark Tested-by: Leo Yan Cc: John Garry Cc: Mike Leach Cc: Will Deacon Cc: linux-arm-kernel@lists.infradead.org Link: https://lore.kernel.org/r/20230901133716.677499-1-james.clark@arm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/build/feature/test-libopencsd.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/build/feature/test-libopencsd.c b/tools/build/feature/test-libopencsd.c index eb6303ff446e..4cfcef9da3e4 100644 --- a/tools/build/feature/test-libopencsd.c +++ b/tools/build/feature/test-libopencsd.c @@ -4,9 +4,9 @@ /* * Check OpenCSD library version is sufficient to provide required features */ -#define OCSD_MIN_VER ((1 << 16) | (1 << 8) | (1)) +#define OCSD_MIN_VER ((1 << 16) | (2 << 8) | (1)) #if !defined(OCSD_VER_NUM) || (OCSD_VER_NUM < OCSD_MIN_VER) -#error "OpenCSD >= 1.1.1 is required" +#error "OpenCSD >= 1.2.1 is required" #endif int main(void) -- cgit v1.2.3 From 26218331f49c858d52e60418cf3cef4c3fa6cf2e Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Sat, 14 Oct 2023 15:45:12 +0800 Subject: perf auxtrace: Add 'T' itrace option for timestamp trace An AUX trace can contain timestamp, but in some situations, the hardware trace module (e.g. Arm CoreSight) cannot decide the traced timestamp is the same source with CPU's time, thus the decoder can not use the timestamp trace for samples. This patch introduces 'T' itrace option. If users know the platforms they are working on have the same time counter with CPUs, users can use this new option to tell a decoder for using timestamp trace as kernel time. Signed-off-by: Leo Yan Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Ian Rogers Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: John Garry Cc: Mark Rutland Cc: Mike Leach Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Suzuki Poulouse Cc: Will Deacon Cc: coresight@lists.linaro.org Cc: linux-arm-kernel@lists.infradead.org Link: https://lore.kernel.org/r/20231014074513.1668000-2-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/itrace.txt | 1 + tools/perf/util/auxtrace.c | 3 +++ tools/perf/util/auxtrace.h | 3 +++ 3 files changed, 7 insertions(+) (limited to 'tools') diff --git a/tools/perf/Documentation/itrace.txt b/tools/perf/Documentation/itrace.txt index a97f95825b14..19cc179be9a7 100644 --- a/tools/perf/Documentation/itrace.txt +++ b/tools/perf/Documentation/itrace.txt @@ -25,6 +25,7 @@ q quicker (less detailed) decoding A approximate IPC Z prefer to ignore timestamps (so-called "timeless" decoding) + T use the timestamp trace as kernel time The default is all events i.e. the same as --itrace=iybxwpe, except for perf script where it is --itrace=ce diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c index a0368202a746..f528c4364d23 100644 --- a/tools/perf/util/auxtrace.c +++ b/tools/perf/util/auxtrace.c @@ -1638,6 +1638,9 @@ int itrace_do_parse_synth_opts(struct itrace_synth_opts *synth_opts, case 'Z': synth_opts->timeless_decoding = true; break; + case 'T': + synth_opts->use_timestamp = true; + break; case ' ': case ',': break; diff --git a/tools/perf/util/auxtrace.h b/tools/perf/util/auxtrace.h index 29eb82dff574..55702215a82d 100644 --- a/tools/perf/util/auxtrace.h +++ b/tools/perf/util/auxtrace.h @@ -99,6 +99,7 @@ enum itrace_period_type { * @remote_access: whether to synthesize remote access events * @mem: whether to synthesize memory events * @timeless_decoding: prefer "timeless" decoding i.e. ignore timestamps + * @use_timestamp: use the timestamp trace as kernel time * @vm_time_correlation: perform VM Time Correlation * @vm_tm_corr_dry_run: VM Time Correlation dry-run * @vm_tm_corr_args: VM Time Correlation implementation-specific arguments @@ -146,6 +147,7 @@ struct itrace_synth_opts { bool remote_access; bool mem; bool timeless_decoding; + bool use_timestamp; bool vm_time_correlation; bool vm_tm_corr_dry_run; char *vm_tm_corr_args; @@ -678,6 +680,7 @@ bool auxtrace__evsel_is_auxtrace(struct perf_session *session, " q: quicker (less detailed) decoding\n" \ " A: approximate IPC\n" \ " Z: prefer to ignore timestamps (so-called \"timeless\" decoding)\n" \ +" T: use the timestamp trace as kernel time\n" \ " PERIOD[ns|us|ms|i|t]: specify period to sample stream\n" \ " concatenate multiple options. Default is iybxwpe or cewp\n" -- cgit v1.2.3 From a4271827e609d30b7255aa7e4c453a8f3fe36a7b Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Sat, 14 Oct 2023 15:45:13 +0800 Subject: perf cs-etm: Enable itrace option 'T' Prior to Armv8.4, the feature FEAT_TRF is not supported by Arm CPUs. Consequently, the sysfs node 'ts_source' will not be set as 1 by the CoreSight ETM driver. On the other hand, the perf tool relies on the 'ts_source' node to determine whether the kernel timestamp is traced. Since the 'ts_source' is not set for Arm CPUs prior to Armv8.4, platforms in this case cannot utilize the traced timestamp as the kernel time. This patch enables the 'T' itrace option, which forcibly utilizes the traced timestamp as the kernel time. If users are aware that their working platform's Arm CoreSight shares the same counter with the kernel time, they can specify 'T' option to decode the traced timestamp as the kernel time. An usage example is: # perf record -e cs_etm// -- test_program # perf script --itrace=i10ibT # perf report --itrace=i10ibT Reviewed-by: James Clark Signed-off-by: Leo Yan Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: John Garry Cc: Mark Rutland Cc: Mike Leach Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Suzuki Poulouse Cc: Will Deacon Cc: coresight@lists.linaro.org Cc: linux-arm-kernel@lists.infradead.org Link: https://lore.kernel.org/r/20231014074513.1668000-3-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/cs-etm.c | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c index a9873d14c632..d65d7485886c 100644 --- a/tools/perf/util/cs-etm.c +++ b/tools/perf/util/cs-etm.c @@ -3346,12 +3346,27 @@ int cs_etm__process_auxtrace_info_full(union perf_event *event, etm->metadata = metadata; etm->auxtrace_type = auxtrace_info->type; - /* Use virtual timestamps if all ETMs report ts_source = 1 */ - etm->has_virtual_ts = cs_etm__has_virtual_ts(metadata, num_cpu); + if (etm->synth_opts.use_timestamp) + /* + * Prior to Armv8.4, Arm CPUs don't support FEAT_TRF feature, + * therefore the decoder cannot know if the timestamp trace is + * same with the kernel time. + * + * If a user has knowledge for the working platform and can + * specify itrace option 'T' to tell decoder to forcely use the + * traced timestamp as the kernel time. + */ + etm->has_virtual_ts = true; + else + /* Use virtual timestamps if all ETMs report ts_source = 1 */ + etm->has_virtual_ts = cs_etm__has_virtual_ts(metadata, num_cpu); if (!etm->has_virtual_ts) ui__warning("Virtual timestamps are not enabled, or not supported by the traced system.\n" - "The time field of the samples will not be set accurately.\n\n"); + "The time field of the samples will not be set accurately.\n" + "For Arm CPUs prior to Armv8.4 or without support FEAT_TRF,\n" + "you can specify the itrace option 'T' for timestamp decoding\n" + "if the Coresight timestamp on the platform is same with the kernel time.\n\n"); etm->auxtrace.process_event = cs_etm__process_event; etm->auxtrace.process_auxtrace_event = cs_etm__process_auxtrace_event; -- cgit v1.2.3 From a24d9d9dc096fc0d0bd85302c9a4fe4fe3b1107b Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Wed, 22 Nov 2023 20:29:22 -0800 Subject: perf parse-events: Make legacy events lower priority than sysfs/JSON The perf tool has previously made legacy events the priority so with or without a PMU the legacy event would be opened: $ perf stat -e cpu-cycles,cpu/cpu-cycles/ true Using CPUID GenuineIntel-6-8D-1 intel_pt default config: tsc,mtc,mtc_period=3,psb_period=3,pt,branch Attempting to add event pmu 'cpu' with 'cpu-cycles,' that may result in non-fatal errors After aliases, add event pmu 'cpu' with 'cpu-cycles,' that may result in non-fatal errors Control descriptor is not initialized ------------------------------------------------------------ perf_event_attr: type 0 (PERF_TYPE_HARDWARE) size 136 config 0 (PERF_COUNT_HW_CPU_CYCLES) sample_type IDENTIFIER read_format TOTAL_TIME_ENABLED|TOTAL_TIME_RUNNING disabled 1 inherit 1 enable_on_exec 1 exclude_guest 1 ------------------------------------------------------------ sys_perf_event_open: pid 833967 cpu -1 group_fd -1 flags 0x8 = 3 ------------------------------------------------------------ perf_event_attr: type 0 (PERF_TYPE_HARDWARE) size 136 config 0 (PERF_COUNT_HW_CPU_CYCLES) sample_type IDENTIFIER read_format TOTAL_TIME_ENABLED|TOTAL_TIME_RUNNING disabled 1 inherit 1 enable_on_exec 1 exclude_guest 1 ------------------------------------------------------------ ... Fixes to make hybrid/BIG.little PMUs behave correctly, ie as core PMUs capable of opening legacy events on each, removing hard coded "cpu_core" and "cpu_atom" Intel PMU names, etc. caused a behavioral difference on Apple/ARM due to latent issues in the PMU driver reported in: https://lore.kernel.org/lkml/08f1f185-e259-4014-9ca4-6411d5c1bc65@marcan.st/ As part of that report Mark Rutland requested that legacy events not be higher in priority when a PMU is specified reversing what has until this change been perf's default behavior. With this change the above becomes: $ perf stat -e cpu-cycles,cpu/cpu-cycles/ true Using CPUID GenuineIntel-6-8D-1 Attempt to add: cpu/cpu-cycles=0/ ..after resolving event: cpu/event=0x3c/ Control descriptor is not initialized ------------------------------------------------------------ perf_event_attr: type 0 (PERF_TYPE_HARDWARE) size 136 config 0 (PERF_COUNT_HW_CPU_CYCLES) sample_type IDENTIFIER read_format TOTAL_TIME_ENABLED|TOTAL_TIME_RUNNING disabled 1 inherit 1 enable_on_exec 1 exclude_guest 1 ------------------------------------------------------------ sys_perf_event_open: pid 827628 cpu -1 group_fd -1 flags 0x8 = 3 ------------------------------------------------------------ perf_event_attr: type 4 (PERF_TYPE_RAW) size 136 config 0x3c sample_type IDENTIFIER read_format TOTAL_TIME_ENABLED|TOTAL_TIME_RUNNING disabled 1 inherit 1 enable_on_exec 1 exclude_guest 1 ------------------------------------------------------------ ... So the second event has become a raw event as /sys/devices/cpu/events/cpu-cycles exists. A fix was necessary to config_term_pmu in parse-events.c as check_alias expansion needs to happen after config_term_pmu, and config_term_pmu may need calling a second time because of this. config_term_pmu is updated to not use the legacy event when the PMU has such a named event (either from JSON or sysfs). The bulk of this change is updating all of the parse-events test expectations so that if a sysfs/JSON event exists for a PMU the test doesn't fail - a further sign, if it were needed, that the legacy event priority was a known and tested behavior of the perf tool. Reported-by: Hector Martin Signed-off-by: Ian Rogers Tested-by: Hector Martin Tested-by: Marc Zyngier Acked-by: Mark Rutland Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Kan Liang Cc: Namhyung Kim Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20231123042922.834425-1-irogers@google.com [ Initialize the 'alias_rewrote_terms' variable to false to address a clang warning ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/parse-events.c | 256 +++++++++++++++++++++++++++++----------- tools/perf/util/parse-events.c | 52 +++++--- tools/perf/util/pmu.c | 8 +- tools/perf/util/pmu.h | 3 +- 4 files changed, 231 insertions(+), 88 deletions(-) (limited to 'tools') diff --git a/tools/perf/tests/parse-events.c b/tools/perf/tests/parse-events.c index e52f45c7c3d1..fbdf710d5eea 100644 --- a/tools/perf/tests/parse-events.c +++ b/tools/perf/tests/parse-events.c @@ -162,6 +162,22 @@ static int test__checkevent_numeric(struct evlist *evlist) return TEST_OK; } + +static int assert_hw(struct perf_evsel *evsel, enum perf_hw_id id, const char *name) +{ + struct perf_pmu *pmu; + + if (evsel->attr.type == PERF_TYPE_HARDWARE) { + TEST_ASSERT_VAL("wrong config", test_perf_config(evsel, id)); + return 0; + } + pmu = perf_pmus__find_by_type(evsel->attr.type); + + TEST_ASSERT_VAL("unexpected PMU type", pmu); + TEST_ASSERT_VAL("PMU missing event", perf_pmu__have_event(pmu, name)); + return 0; +} + static int test__checkevent_symbolic_name(struct evlist *evlist) { struct perf_evsel *evsel; @@ -169,10 +185,12 @@ static int test__checkevent_symbolic_name(struct evlist *evlist) TEST_ASSERT_VAL("wrong number of entries", 0 != evlist->core.nr_entries); perf_evlist__for_each_evsel(&evlist->core, evsel) { - TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type); - TEST_ASSERT_VAL("wrong config", - test_perf_config(evsel, PERF_COUNT_HW_INSTRUCTIONS)); + int ret = assert_hw(evsel, PERF_COUNT_HW_INSTRUCTIONS, "instructions"); + + if (ret) + return ret; } + return TEST_OK; } @@ -183,8 +201,10 @@ static int test__checkevent_symbolic_name_config(struct evlist *evlist) TEST_ASSERT_VAL("wrong number of entries", 0 != evlist->core.nr_entries); perf_evlist__for_each_evsel(&evlist->core, evsel) { - TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type); - TEST_ASSERT_VAL("wrong config", test_perf_config(evsel, PERF_COUNT_HW_CPU_CYCLES)); + int ret = assert_hw(evsel, PERF_COUNT_HW_CPU_CYCLES, "cycles"); + + if (ret) + return ret; /* * The period value gets configured within evlist__config, * while this test executes only parse events method. @@ -861,10 +881,14 @@ static int test__group1(struct evlist *evlist) evlist__nr_groups(evlist) == num_core_entries()); for (int i = 0; i < num_core_entries(); i++) { + int ret; + /* instructions:k */ evsel = leader = (i == 0 ? evlist__first(evlist) : evsel__next(evsel)); - TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); - TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_INSTRUCTIONS)); + ret = assert_hw(&evsel->core, PERF_COUNT_HW_INSTRUCTIONS, "instructions"); + if (ret) + return ret; + TEST_ASSERT_VAL("wrong exclude_user", evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel); TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv); @@ -878,8 +902,10 @@ static int test__group1(struct evlist *evlist) /* cycles:upp */ evsel = evsel__next(evsel); - TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); - TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CPU_CYCLES)); + ret = assert_hw(&evsel->core, PERF_COUNT_HW_CPU_CYCLES, "cycles"); + if (ret) + return ret; + TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel); TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv); @@ -907,6 +933,8 @@ static int test__group2(struct evlist *evlist) TEST_ASSERT_VAL("wrong number of groups", 1 == evlist__nr_groups(evlist)); evlist__for_each_entry(evlist, evsel) { + int ret; + if (evsel->core.attr.type == PERF_TYPE_SOFTWARE) { /* faults + :ku modifier */ leader = evsel; @@ -939,8 +967,10 @@ static int test__group2(struct evlist *evlist) continue; } /* cycles:k */ - TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); - TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CPU_CYCLES)); + ret = assert_hw(&evsel->core, PERF_COUNT_HW_CPU_CYCLES, "cycles"); + if (ret) + return ret; + TEST_ASSERT_VAL("wrong exclude_user", evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel); TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv); @@ -957,6 +987,7 @@ static int test__group2(struct evlist *evlist) static int test__group3(struct evlist *evlist __maybe_unused) { struct evsel *evsel, *group1_leader = NULL, *group2_leader = NULL; + int ret; TEST_ASSERT_VAL("wrong number of entries", evlist->core.nr_entries == (3 * perf_pmus__num_core_pmus() + 2)); @@ -1045,8 +1076,10 @@ static int test__group3(struct evlist *evlist __maybe_unused) continue; } /* instructions:u */ - TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); - TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_INSTRUCTIONS)); + ret = assert_hw(&evsel->core, PERF_COUNT_HW_INSTRUCTIONS, "instructions"); + if (ret) + return ret; + TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel); TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv); @@ -1070,10 +1103,14 @@ static int test__group4(struct evlist *evlist __maybe_unused) num_core_entries() == evlist__nr_groups(evlist)); for (int i = 0; i < num_core_entries(); i++) { + int ret; + /* cycles:u + p */ evsel = leader = (i == 0 ? evlist__first(evlist) : evsel__next(evsel)); - TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); - TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CPU_CYCLES)); + ret = assert_hw(&evsel->core, PERF_COUNT_HW_CPU_CYCLES, "cycles"); + if (ret) + return ret; + TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel); TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv); @@ -1089,8 +1126,10 @@ static int test__group4(struct evlist *evlist __maybe_unused) /* instructions:kp + p */ evsel = evsel__next(evsel); - TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); - TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_INSTRUCTIONS)); + ret = assert_hw(&evsel->core, PERF_COUNT_HW_INSTRUCTIONS, "instructions"); + if (ret) + return ret; + TEST_ASSERT_VAL("wrong exclude_user", evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel); TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv); @@ -1108,6 +1147,7 @@ static int test__group4(struct evlist *evlist __maybe_unused) static int test__group5(struct evlist *evlist __maybe_unused) { struct evsel *evsel = NULL, *leader; + int ret; TEST_ASSERT_VAL("wrong number of entries", evlist->core.nr_entries == (5 * num_core_entries())); @@ -1117,8 +1157,10 @@ static int test__group5(struct evlist *evlist __maybe_unused) for (int i = 0; i < num_core_entries(); i++) { /* cycles + G */ evsel = leader = (i == 0 ? evlist__first(evlist) : evsel__next(evsel)); - TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); - TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CPU_CYCLES)); + ret = assert_hw(&evsel->core, PERF_COUNT_HW_CPU_CYCLES, "cycles"); + if (ret) + return ret; + TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel); TEST_ASSERT_VAL("wrong exclude_hv", !evsel->core.attr.exclude_hv); @@ -1133,8 +1175,10 @@ static int test__group5(struct evlist *evlist __maybe_unused) /* instructions + G */ evsel = evsel__next(evsel); - TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); - TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_INSTRUCTIONS)); + ret = assert_hw(&evsel->core, PERF_COUNT_HW_INSTRUCTIONS, "instructions"); + if (ret) + return ret; + TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel); TEST_ASSERT_VAL("wrong exclude_hv", !evsel->core.attr.exclude_hv); @@ -1148,8 +1192,10 @@ static int test__group5(struct evlist *evlist __maybe_unused) for (int i = 0; i < num_core_entries(); i++) { /* cycles:G */ evsel = leader = evsel__next(evsel); - TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); - TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CPU_CYCLES)); + ret = assert_hw(&evsel->core, PERF_COUNT_HW_CPU_CYCLES, "cycles"); + if (ret) + return ret; + TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel); TEST_ASSERT_VAL("wrong exclude_hv", !evsel->core.attr.exclude_hv); @@ -1164,8 +1210,10 @@ static int test__group5(struct evlist *evlist __maybe_unused) /* instructions:G */ evsel = evsel__next(evsel); - TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); - TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_INSTRUCTIONS)); + ret = assert_hw(&evsel->core, PERF_COUNT_HW_INSTRUCTIONS, "instructions"); + if (ret) + return ret; + TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel); TEST_ASSERT_VAL("wrong exclude_hv", !evsel->core.attr.exclude_hv); @@ -1178,8 +1226,10 @@ static int test__group5(struct evlist *evlist __maybe_unused) for (int i = 0; i < num_core_entries(); i++) { /* cycles */ evsel = evsel__next(evsel); - TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); - TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CPU_CYCLES)); + ret = assert_hw(&evsel->core, PERF_COUNT_HW_CPU_CYCLES, "cycles"); + if (ret) + return ret; + TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel); TEST_ASSERT_VAL("wrong exclude_hv", !evsel->core.attr.exclude_hv); @@ -1201,10 +1251,14 @@ static int test__group_gh1(struct evlist *evlist) evlist__nr_groups(evlist) == num_core_entries()); for (int i = 0; i < num_core_entries(); i++) { + int ret; + /* cycles + :H group modifier */ evsel = leader = (i == 0 ? evlist__first(evlist) : evsel__next(evsel)); - TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); - TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CPU_CYCLES)); + ret = assert_hw(&evsel->core, PERF_COUNT_HW_CPU_CYCLES, "cycles"); + if (ret) + return ret; + TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel); TEST_ASSERT_VAL("wrong exclude_hv", !evsel->core.attr.exclude_hv); @@ -1218,8 +1272,10 @@ static int test__group_gh1(struct evlist *evlist) /* cache-misses:G + :H group modifier */ evsel = evsel__next(evsel); - TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); - TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CACHE_MISSES)); + ret = assert_hw(&evsel->core, PERF_COUNT_HW_CACHE_MISSES, "cache-misses"); + if (ret) + return ret; + TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel); TEST_ASSERT_VAL("wrong exclude_hv", !evsel->core.attr.exclude_hv); @@ -1242,10 +1298,14 @@ static int test__group_gh2(struct evlist *evlist) evlist__nr_groups(evlist) == num_core_entries()); for (int i = 0; i < num_core_entries(); i++) { + int ret; + /* cycles + :G group modifier */ evsel = leader = (i == 0 ? evlist__first(evlist) : evsel__next(evsel)); - TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); - TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CPU_CYCLES)); + ret = assert_hw(&evsel->core, PERF_COUNT_HW_CPU_CYCLES, "cycles"); + if (ret) + return ret; + TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel); TEST_ASSERT_VAL("wrong exclude_hv", !evsel->core.attr.exclude_hv); @@ -1259,8 +1319,10 @@ static int test__group_gh2(struct evlist *evlist) /* cache-misses:H + :G group modifier */ evsel = evsel__next(evsel); - TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); - TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CACHE_MISSES)); + ret = assert_hw(&evsel->core, PERF_COUNT_HW_CACHE_MISSES, "cache-misses"); + if (ret) + return ret; + TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel); TEST_ASSERT_VAL("wrong exclude_hv", !evsel->core.attr.exclude_hv); @@ -1283,10 +1345,14 @@ static int test__group_gh3(struct evlist *evlist) evlist__nr_groups(evlist) == num_core_entries()); for (int i = 0; i < num_core_entries(); i++) { + int ret; + /* cycles:G + :u group modifier */ evsel = leader = (i == 0 ? evlist__first(evlist) : evsel__next(evsel)); - TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); - TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CPU_CYCLES)); + ret = assert_hw(&evsel->core, PERF_COUNT_HW_CPU_CYCLES, "cycles"); + if (ret) + return ret; + TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel); TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv); @@ -1300,8 +1366,10 @@ static int test__group_gh3(struct evlist *evlist) /* cache-misses:H + :u group modifier */ evsel = evsel__next(evsel); - TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); - TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CACHE_MISSES)); + ret = assert_hw(&evsel->core, PERF_COUNT_HW_CACHE_MISSES, "cache-misses"); + if (ret) + return ret; + TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel); TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv); @@ -1324,10 +1392,14 @@ static int test__group_gh4(struct evlist *evlist) evlist__nr_groups(evlist) == num_core_entries()); for (int i = 0; i < num_core_entries(); i++) { + int ret; + /* cycles:G + :uG group modifier */ evsel = leader = (i == 0 ? evlist__first(evlist) : evsel__next(evsel)); - TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); - TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CPU_CYCLES)); + ret = assert_hw(&evsel->core, PERF_COUNT_HW_CPU_CYCLES, "cycles"); + if (ret) + return ret; + TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel); TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv); @@ -1341,8 +1413,10 @@ static int test__group_gh4(struct evlist *evlist) /* cache-misses:H + :uG group modifier */ evsel = evsel__next(evsel); - TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); - TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CACHE_MISSES)); + ret = assert_hw(&evsel->core, PERF_COUNT_HW_CACHE_MISSES, "cache-misses"); + if (ret) + return ret; + TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel); TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv); @@ -1363,10 +1437,14 @@ static int test__leader_sample1(struct evlist *evlist) evlist->core.nr_entries == (3 * num_core_entries())); for (int i = 0; i < num_core_entries(); i++) { + int ret; + /* cycles - sampling group leader */ evsel = leader = (i == 0 ? evlist__first(evlist) : evsel__next(evsel)); - TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); - TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CPU_CYCLES)); + ret = assert_hw(&evsel->core, PERF_COUNT_HW_CPU_CYCLES, "cycles"); + if (ret) + return ret; + TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel); TEST_ASSERT_VAL("wrong exclude_hv", !evsel->core.attr.exclude_hv); @@ -1379,8 +1457,10 @@ static int test__leader_sample1(struct evlist *evlist) /* cache-misses - not sampling */ evsel = evsel__next(evsel); - TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); - TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CACHE_MISSES)); + ret = assert_hw(&evsel->core, PERF_COUNT_HW_CACHE_MISSES, "cache-misses"); + if (ret) + return ret; + TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel); TEST_ASSERT_VAL("wrong exclude_hv", !evsel->core.attr.exclude_hv); @@ -1392,8 +1472,10 @@ static int test__leader_sample1(struct evlist *evlist) /* branch-misses - not sampling */ evsel = evsel__next(evsel); - TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); - TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_BRANCH_MISSES)); + ret = assert_hw(&evsel->core, PERF_COUNT_HW_BRANCH_MISSES, "branch-misses"); + if (ret) + return ret; + TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel); TEST_ASSERT_VAL("wrong exclude_hv", !evsel->core.attr.exclude_hv); @@ -1415,10 +1497,14 @@ static int test__leader_sample2(struct evlist *evlist __maybe_unused) evlist->core.nr_entries == (2 * num_core_entries())); for (int i = 0; i < num_core_entries(); i++) { + int ret; + /* instructions - sampling group leader */ evsel = leader = (i == 0 ? evlist__first(evlist) : evsel__next(evsel)); - TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); - TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_INSTRUCTIONS)); + ret = assert_hw(&evsel->core, PERF_COUNT_HW_INSTRUCTIONS, "instructions"); + if (ret) + return ret; + TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel); TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv); @@ -1431,8 +1517,10 @@ static int test__leader_sample2(struct evlist *evlist __maybe_unused) /* branch-misses - not sampling */ evsel = evsel__next(evsel); - TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); - TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_BRANCH_MISSES)); + ret = assert_hw(&evsel->core, PERF_COUNT_HW_BRANCH_MISSES, "branch-misses"); + if (ret) + return ret; + TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel); TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv); @@ -1472,10 +1560,14 @@ static int test__pinned_group(struct evlist *evlist) evlist->core.nr_entries == (3 * num_core_entries())); for (int i = 0; i < num_core_entries(); i++) { + int ret; + /* cycles - group leader */ evsel = leader = (i == 0 ? evlist__first(evlist) : evsel__next(evsel)); - TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); - TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CPU_CYCLES)); + ret = assert_hw(&evsel->core, PERF_COUNT_HW_CPU_CYCLES, "cycles"); + if (ret) + return ret; + TEST_ASSERT_VAL("wrong group name", !evsel->group_name); TEST_ASSERT_VAL("wrong leader", evsel__has_leader(evsel, leader)); /* TODO: The group modifier is not copied to the split group leader. */ @@ -1484,13 +1576,18 @@ static int test__pinned_group(struct evlist *evlist) /* cache-misses - can not be pinned, but will go on with the leader */ evsel = evsel__next(evsel); - TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); - TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CACHE_MISSES)); + ret = assert_hw(&evsel->core, PERF_COUNT_HW_CACHE_MISSES, "cache-misses"); + if (ret) + return ret; + TEST_ASSERT_VAL("wrong pinned", !evsel->core.attr.pinned); /* branch-misses - ditto */ evsel = evsel__next(evsel); - TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_BRANCH_MISSES)); + ret = assert_hw(&evsel->core, PERF_COUNT_HW_BRANCH_MISSES, "branch-misses"); + if (ret) + return ret; + TEST_ASSERT_VAL("wrong pinned", !evsel->core.attr.pinned); } return TEST_OK; @@ -1517,10 +1614,14 @@ static int test__exclusive_group(struct evlist *evlist) evlist->core.nr_entries == 3 * num_core_entries()); for (int i = 0; i < num_core_entries(); i++) { + int ret; + /* cycles - group leader */ evsel = leader = (i == 0 ? evlist__first(evlist) : evsel__next(evsel)); - TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); - TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CPU_CYCLES)); + ret = assert_hw(&evsel->core, PERF_COUNT_HW_CPU_CYCLES, "cycles"); + if (ret) + return ret; + TEST_ASSERT_VAL("wrong group name", !evsel->group_name); TEST_ASSERT_VAL("wrong leader", evsel__has_leader(evsel, leader)); /* TODO: The group modifier is not copied to the split group leader. */ @@ -1529,13 +1630,18 @@ static int test__exclusive_group(struct evlist *evlist) /* cache-misses - can not be pinned, but will go on with the leader */ evsel = evsel__next(evsel); - TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); - TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CACHE_MISSES)); + ret = assert_hw(&evsel->core, PERF_COUNT_HW_CACHE_MISSES, "cache-misses"); + if (ret) + return ret; + TEST_ASSERT_VAL("wrong exclusive", !evsel->core.attr.exclusive); /* branch-misses - ditto */ evsel = evsel__next(evsel); - TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_BRANCH_MISSES)); + ret = assert_hw(&evsel->core, PERF_COUNT_HW_BRANCH_MISSES, "branch-misses"); + if (ret) + return ret; + TEST_ASSERT_VAL("wrong exclusive", !evsel->core.attr.exclusive); } return TEST_OK; @@ -1677,9 +1783,11 @@ static int test__checkevent_raw_pmu(struct evlist *evlist) static int test__sym_event_slash(struct evlist *evlist) { struct evsel *evsel = evlist__first(evlist); + int ret = assert_hw(&evsel->core, PERF_COUNT_HW_CPU_CYCLES, "cycles"); + + if (ret) + return ret; - TEST_ASSERT_VAL("wrong type", evsel->core.attr.type == PERF_TYPE_HARDWARE); - TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CPU_CYCLES)); TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel); return TEST_OK; } @@ -1687,9 +1795,11 @@ static int test__sym_event_slash(struct evlist *evlist) static int test__sym_event_dc(struct evlist *evlist) { struct evsel *evsel = evlist__first(evlist); + int ret = assert_hw(&evsel->core, PERF_COUNT_HW_CPU_CYCLES, "cycles"); + + if (ret) + return ret; - TEST_ASSERT_VAL("wrong type", evsel->core.attr.type == PERF_TYPE_HARDWARE); - TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CPU_CYCLES)); TEST_ASSERT_VAL("wrong exclude_user", evsel->core.attr.exclude_user); return TEST_OK; } @@ -1697,9 +1807,11 @@ static int test__sym_event_dc(struct evlist *evlist) static int test__term_equal_term(struct evlist *evlist) { struct evsel *evsel = evlist__first(evlist); + int ret = assert_hw(&evsel->core, PERF_COUNT_HW_CPU_CYCLES, "cycles"); + + if (ret) + return ret; - TEST_ASSERT_VAL("wrong type", evsel->core.attr.type == PERF_TYPE_HARDWARE); - TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CPU_CYCLES)); TEST_ASSERT_VAL("wrong name setting", strcmp(evsel->name, "name") == 0); return TEST_OK; } @@ -1707,9 +1819,11 @@ static int test__term_equal_term(struct evlist *evlist) static int test__term_equal_legacy(struct evlist *evlist) { struct evsel *evsel = evlist__first(evlist); + int ret = assert_hw(&evsel->core, PERF_COUNT_HW_CPU_CYCLES, "cycles"); + + if (ret) + return ret; - TEST_ASSERT_VAL("wrong type", evsel->core.attr.type == PERF_TYPE_HARDWARE); - TEST_ASSERT_VAL("wrong config", test_config(evsel, PERF_COUNT_HW_CPU_CYCLES)); TEST_ASSERT_VAL("wrong name setting", strcmp(evsel->name, "l1d") == 0); return TEST_OK; } diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index aa2f5c6fc7fc..66eabcea4242 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -976,7 +976,7 @@ static int config_term_pmu(struct perf_event_attr *attr, struct parse_events_error *err) { if (term->type_term == PARSE_EVENTS__TERM_TYPE_LEGACY_CACHE) { - const struct perf_pmu *pmu = perf_pmus__find_by_type(attr->type); + struct perf_pmu *pmu = perf_pmus__find_by_type(attr->type); if (!pmu) { char *err_str; @@ -986,15 +986,23 @@ static int config_term_pmu(struct perf_event_attr *attr, err_str, /*help=*/NULL); return -EINVAL; } - if (perf_pmu__supports_legacy_cache(pmu)) { + /* + * Rewrite the PMU event to a legacy cache one unless the PMU + * doesn't support legacy cache events or the event is present + * within the PMU. + */ + if (perf_pmu__supports_legacy_cache(pmu) && + !perf_pmu__have_event(pmu, term->config)) { attr->type = PERF_TYPE_HW_CACHE; return parse_events__decode_legacy_cache(term->config, pmu->type, &attr->config); - } else + } else { term->type_term = PARSE_EVENTS__TERM_TYPE_USER; + term->no_value = true; + } } if (term->type_term == PARSE_EVENTS__TERM_TYPE_HARDWARE) { - const struct perf_pmu *pmu = perf_pmus__find_by_type(attr->type); + struct perf_pmu *pmu = perf_pmus__find_by_type(attr->type); if (!pmu) { char *err_str; @@ -1004,10 +1012,19 @@ static int config_term_pmu(struct perf_event_attr *attr, err_str, /*help=*/NULL); return -EINVAL; } - attr->type = PERF_TYPE_HARDWARE; - attr->config = term->val.num; - if (perf_pmus__supports_extended_type()) - attr->config |= (__u64)pmu->type << PERF_PMU_TYPE_SHIFT; + /* + * If the PMU has a sysfs or json event prefer it over + * legacy. ARM requires this. + */ + if (perf_pmu__have_event(pmu, term->config)) { + term->type_term = PARSE_EVENTS__TERM_TYPE_USER; + term->no_value = true; + } else { + attr->type = PERF_TYPE_HARDWARE; + attr->config = term->val.num; + if (perf_pmus__supports_extended_type()) + attr->config |= (__u64)pmu->type << PERF_PMU_TYPE_SHIFT; + } return 0; } if (term->type_term == PARSE_EVENTS__TERM_TYPE_USER || @@ -1381,6 +1398,7 @@ int parse_events_add_pmu(struct parse_events_state *parse_state, YYLTYPE *loc = loc_; LIST_HEAD(config_terms); struct parse_events_terms parsed_terms; + bool alias_rewrote_terms = false; pmu = parse_state->fake_pmu ?: perf_pmus__find(name); @@ -1433,7 +1451,15 @@ int parse_events_add_pmu(struct parse_events_state *parse_state, return evsel ? 0 : -ENOMEM; } - if (!parse_state->fake_pmu && perf_pmu__check_alias(pmu, &parsed_terms, &info, err)) { + /* Configure attr/terms with a known PMU, this will set hardcoded terms. */ + if (config_attr(&attr, &parsed_terms, parse_state->error, config_term_pmu)) { + parse_events_terms__exit(&parsed_terms); + return -EINVAL; + } + + /* Look for event names in the terms and rewrite into format based terms. */ + if (!parse_state->fake_pmu && perf_pmu__check_alias(pmu, &parsed_terms, + &info, &alias_rewrote_terms, err)) { parse_events_terms__exit(&parsed_terms); return -EINVAL; } @@ -1447,11 +1473,9 @@ int parse_events_add_pmu(struct parse_events_state *parse_state, strbuf_release(&sb); } - /* - * Configure hardcoded terms first, no need to check - * return value when called with fail == 0 ;) - */ - if (config_attr(&attr, &parsed_terms, parse_state->error, config_term_pmu)) { + /* Configure attr/terms again if an alias was expanded. */ + if (alias_rewrote_terms && + config_attr(&attr, &parsed_terms, parse_state->error, config_term_pmu)) { parse_events_terms__exit(&parsed_terms); return -EINVAL; } diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index d3c9aa4326be..3c9609944a2f 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -1494,12 +1494,14 @@ static int check_info_data(struct perf_pmu *pmu, * defined for the alias */ int perf_pmu__check_alias(struct perf_pmu *pmu, struct parse_events_terms *head_terms, - struct perf_pmu_info *info, struct parse_events_error *err) + struct perf_pmu_info *info, bool *rewrote_terms, + struct parse_events_error *err) { struct parse_events_term *term, *h; struct perf_pmu_alias *alias; int ret; + *rewrote_terms = false; info->per_pkg = false; /* @@ -1521,7 +1523,7 @@ int perf_pmu__check_alias(struct perf_pmu *pmu, struct parse_events_terms *head_ NULL); return ret; } - + *rewrote_terms = true; ret = check_info_data(pmu, alias, info, err, term->err_term); if (ret) return ret; @@ -1615,6 +1617,8 @@ bool perf_pmu__auto_merge_stats(const struct perf_pmu *pmu) bool perf_pmu__have_event(struct perf_pmu *pmu, const char *name) { + if (!name) + return false; if (perf_pmu__find_alias(pmu, name, /*load=*/ true) != NULL) return true; if (pmu->cpu_aliases_added || !pmu->events_table) diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h index d2895d415f08..424c3fee0949 100644 --- a/tools/perf/util/pmu.h +++ b/tools/perf/util/pmu.h @@ -201,7 +201,8 @@ int perf_pmu__config_terms(const struct perf_pmu *pmu, __u64 perf_pmu__format_bits(struct perf_pmu *pmu, const char *name); int perf_pmu__format_type(struct perf_pmu *pmu, const char *name); int perf_pmu__check_alias(struct perf_pmu *pmu, struct parse_events_terms *head_terms, - struct perf_pmu_info *info, struct parse_events_error *err); + struct perf_pmu_info *info, bool *rewrote_terms, + struct parse_events_error *err); int perf_pmu__find_event(struct perf_pmu *pmu, const char *event, void *state, pmu_event_callback cb); int perf_pmu__format_parse(struct perf_pmu *pmu, int dirfd, bool eager_load); -- cgit v1.2.3 From 4a18ab467820b75436ea9ddd42ee7cb10efa491c Mon Sep 17 00:00:00 2001 From: zhaimingbing Date: Fri, 24 Nov 2023 17:26:57 +0800 Subject: perf lock: Fix a memory leak on an error path if a strdup-ed string is NULL,the allocated memory needs freeing. Signed-off-by: zhaimingbing Acked-by: Ingo Molnar Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Ian Rogers Cc: Jiri Olsa Cc: Li Dong Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Sean Christopherson Link: https://lore.kernel.org/r/20231124092657.10392-1-zhaimingbing@cmss.chinamobile.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-lock.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c index a3ff2f4edbaa..230461280e45 100644 --- a/tools/perf/builtin-lock.c +++ b/tools/perf/builtin-lock.c @@ -2285,8 +2285,10 @@ setup_args: else ev_name = strdup(contention_tracepoints[j].name); - if (!ev_name) + if (!ev_name) { + free(rec_argv); return -ENOMEM; + } rec_argv[i++] = "-e"; rec_argv[i++] = ev_name; -- cgit v1.2.3 From b16904fd9f01b580db357ef2b1cc9e86d89576c2 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Sun, 26 Nov 2023 21:03:42 -0800 Subject: bpf: Fix a few selftest failures due to llvm18 change With latest upstream llvm18, the following test cases failed: $ ./test_progs -j #13/2 bpf_cookie/multi_kprobe_link_api:FAIL #13/3 bpf_cookie/multi_kprobe_attach_api:FAIL #13 bpf_cookie:FAIL #77 fentry_fexit:FAIL #78/1 fentry_test/fentry:FAIL #78 fentry_test:FAIL #82/1 fexit_test/fexit:FAIL #82 fexit_test:FAIL #112/1 kprobe_multi_test/skel_api:FAIL #112/2 kprobe_multi_test/link_api_addrs:FAIL [...] #112 kprobe_multi_test:FAIL #356/17 test_global_funcs/global_func17:FAIL #356 test_global_funcs:FAIL Further analysis shows llvm upstream patch [1] is responsible for the above failures. For example, for function bpf_fentry_test7() in net/bpf/test_run.c, without [1], the asm code is: 0000000000000400 : 400: f3 0f 1e fa endbr64 404: e8 00 00 00 00 callq 0x409 409: 48 89 f8 movq %rdi, %rax 40c: c3 retq 40d: 0f 1f 00 nopl (%rax) ... and with [1], the asm code is: 0000000000005d20 : 5d20: e8 00 00 00 00 callq 0x5d25 5d25: c3 retq ... and is called instead of and this caused test failures for #13/#77 etc. except #356. For test case #356/17, with [1] (progs/test_global_func17.c)), the main prog looks like: 0000000000000000 : 0: b4 00 00 00 2a 00 00 00 w0 = 0x2a 1: 95 00 00 00 00 00 00 00 exit ... which passed verification while the test itself expects a verification failure. Let us add 'barrier_var' style asm code in both places to prevent function specialization which caused selftests failure. [1] https://github.com/llvm/llvm-project/pull/72903 Signed-off-by: Yonghong Song Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20231127050342.1945270-1-yonghong.song@linux.dev --- net/bpf/test_run.c | 2 +- tools/testing/selftests/bpf/progs/test_global_func17.c | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c index c9fdcc5cdce1..711cf5d59816 100644 --- a/net/bpf/test_run.c +++ b/net/bpf/test_run.c @@ -542,7 +542,7 @@ struct bpf_fentry_test_t { int noinline bpf_fentry_test7(struct bpf_fentry_test_t *arg) { - asm volatile (""); + asm volatile ("": "+r"(arg)); return (long)arg; } diff --git a/tools/testing/selftests/bpf/progs/test_global_func17.c b/tools/testing/selftests/bpf/progs/test_global_func17.c index a32e11c7d933..5de44b09e8ec 100644 --- a/tools/testing/selftests/bpf/progs/test_global_func17.c +++ b/tools/testing/selftests/bpf/progs/test_global_func17.c @@ -5,6 +5,7 @@ __noinline int foo(int *p) { + barrier_var(p); return p ? (*p = 42) : 0; } -- cgit v1.2.3 From 581ff5b66c94a8133d1c77ed42334623fadc968c Mon Sep 17 00:00:00 2001 From: zhujun2 Date: Tue, 14 Nov 2023 22:42:55 -0800 Subject: perf tests coresight: Remove unused variables These variables are never referenced in the code, just remove them. Reviewed-by: James Clark Signed-off-by: zhujun2 Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Leo Yan Cc: Mark Rutland Cc: Mike Leach Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Suzuki Poulouse Cc: coresight@lists.linaro.org Link: https://lore.kernel.org/r/20231115064255.11057-1-zhujun2@cmss.chinamobile.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/shell/coresight/memcpy_thread/memcpy_thread.c | 1 - tools/perf/tests/shell/coresight/thread_loop/thread_loop.c | 1 - tools/perf/tests/shell/coresight/unroll_loop_thread/unroll_loop_thread.c | 1 - 3 files changed, 3 deletions(-) (limited to 'tools') diff --git a/tools/perf/tests/shell/coresight/memcpy_thread/memcpy_thread.c b/tools/perf/tests/shell/coresight/memcpy_thread/memcpy_thread.c index a7e169d1bf64..5f886cd09e6b 100644 --- a/tools/perf/tests/shell/coresight/memcpy_thread/memcpy_thread.c +++ b/tools/perf/tests/shell/coresight/memcpy_thread/memcpy_thread.c @@ -42,7 +42,6 @@ static pthread_t new_thr(void *(*fn) (void *arg), void *arg) int main(int argc, char **argv) { unsigned long i, len, size, thr; - pthread_t threads[256]; struct args args[256]; long long v; diff --git a/tools/perf/tests/shell/coresight/thread_loop/thread_loop.c b/tools/perf/tests/shell/coresight/thread_loop/thread_loop.c index c0158fac7d0b..e05a559253ca 100644 --- a/tools/perf/tests/shell/coresight/thread_loop/thread_loop.c +++ b/tools/perf/tests/shell/coresight/thread_loop/thread_loop.c @@ -57,7 +57,6 @@ static pthread_t new_thr(void *(*fn) (void *arg), void *arg) int main(int argc, char **argv) { unsigned int i, len, thr; - pthread_t threads[256]; struct args args[256]; if (argc < 3) { diff --git a/tools/perf/tests/shell/coresight/unroll_loop_thread/unroll_loop_thread.c b/tools/perf/tests/shell/coresight/unroll_loop_thread/unroll_loop_thread.c index 8f6d384208ed..0fc7bf1a25af 100644 --- a/tools/perf/tests/shell/coresight/unroll_loop_thread/unroll_loop_thread.c +++ b/tools/perf/tests/shell/coresight/unroll_loop_thread/unroll_loop_thread.c @@ -51,7 +51,6 @@ static pthread_t new_thr(void *(*fn) (void *arg), void *arg) int main(int argc, char **argv) { unsigned int i, thr; - pthread_t threads[256]; struct args args[256]; if (argc < 2) { -- cgit v1.2.3 From 5ebe2f4bf0a8fe8cceb5664a7dea4c17e2cf8477 Mon Sep 17 00:00:00 2001 From: Ji Sheng Teoh Date: Wed, 22 Nov 2023 11:09:08 +0800 Subject: perf vendor events riscv: Add StarFive Dubhe-90 JSON file Similar to StarFive's Dubhe-80, Dubhe-90 supports raw event id 0x00 - 0x22. Reuse Dubhe-80 firmware and common json file. The raw events are enabled through PMU node of DT binding. Besides raw event, add standard RISC-V firmware events to support monitoring of firmware event. Example of PMU DT node: pmu { compatible = "riscv,pmu"; riscv,raw-event-to-mhpmcounters = /* Event ID 1-31 */ <0x00 0x00 0xFFFFFFFF 0xFFFFFFE0 0x00007FF8>, /* Event ID 32-33 */ <0x00 0x20 0xFFFFFFFF 0xFFFFFFFE 0x00007FF8>, /* Event ID 34 */ <0x00 0x22 0xFFFFFFFF 0xFFFFFF22 0x00007FF8>; }; 'perf stat' output: [root@user]# perf stat -a \ -e access_mmu_stlb \ -e miss_mmu_stlb \ -e access_mmu_pte_c \ -e rob_flush \ -e btb_prediction_miss \ -e itlb_miss \ -e sync_del_fetch_g \ -e icache_miss \ -e bpu_br_retire \ -e bpu_br_miss \ -e ret_ins_retire \ -e ret_ins_miss \ -- openssl speed rsa2048 Doing 2048 bits private rsa's for 10s: 39 2048 bits private RSA's in 10.03s Doing 2048 bits public rsa's for 10s: 1469 2048 bits public RSA's in 9.47s version: 3.0.10 built on: Tue Aug 1 13:47:24 2023 UTC options: bn(64,64) CPUINFO: N/A sign verify sign/s verify/s rsa 2048 bits 0.257179s 0.006447s 3.9 155.1 Performance counter stats for 'system wide': 3112882 access_mmu_stlb 10550 miss_mmu_stlb 18251 access_mmu_pte_c 274765 rob_flush 22470560 btb_prediction_miss 3035839 itlb_miss 643549060 sync_del_fetch_g 133013 icache_miss 62982796 bpu_br_retire 287548 bpu_br_miss 8935910 ret_ins_retire 8308 ret_ins_miss 20.656182600 seconds time elapsed Reviewed-by: Ley Foon Tan Signed-off-by: Ji Sheng Teoh Cc: Adrian Hunter Cc: Albert Ou Cc: Alexander Shishkin Cc: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Nikita Shubin Cc: Palmer Dabbelt Cc: Paul Walmsley Cc: Peter Zijlstra Cc: linux-riscv@lists.infradead.org Link: https://lore.kernel.org/r/20231122030908.2981502-1-jisheng.teoh@starfivetech.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/pmu-events/arch/riscv/mapfile.csv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/pmu-events/arch/riscv/mapfile.csv b/tools/perf/pmu-events/arch/riscv/mapfile.csv index ee61e26f90cd..56b03138d46a 100644 --- a/tools/perf/pmu-events/arch/riscv/mapfile.csv +++ b/tools/perf/pmu-events/arch/riscv/mapfile.csv @@ -15,4 +15,4 @@ # #MVENDORID-MARCHID-MIMPID,Version,Filename,EventType 0x489-0x8000000000000007-0x[[:xdigit:]]+,v1,sifive/u74,core -0x67e-0x80000000db000080-0x[[:xdigit:]]+,v1,starfive/dubhe-80,core +0x67e-0x80000000db0000[89]0-0x[[:xdigit:]]+,v1,starfive/dubhe-80,core -- cgit v1.2.3 From 1638b11ef8156c8551f5aaa5799069633593c5fe Mon Sep 17 00:00:00 2001 From: Athira Rajeev Date: Thu, 23 Nov 2023 21:32:32 +0530 Subject: perf tools: Add perf binary dependent rule for shellcheck log in Makefile.perf Add rule in new Makefile "tests/Makefile.tests" for running shellcheck on shell test scripts. This automates below shellcheck into the build. $ for F in $(find tests/shell/ -perm -o=x -name '*.sh'); do shellcheck -S warning $F; done Condition for shellcheck is added in Makefile.perf to avoid build breakage in the absence of shellcheck binary. Update Makefile.perf to contain new rule for "SHELLCHECK_TEST" which is for making shellcheck test as a dependency on perf binary. Added "tests/Makefile.tests" to run shellcheck on shellscripts in tests/shell. The make rule "SHLLCHECK_RUN" ensures that, every time during make, shellcheck will be run only on modified files during subsequent invocations. By this, if any newly added shell scripts or fixes in existing scripts breaks coding/formatting style, it will get captured during the perf build. Example build failure by modifying probe_vfs_getname.sh in tests/shell: In tests/shell/probe_vfs_getname.sh line 8: . $(dirname $0)/lib/probe.sh ^-----------^ SC2046 (warning): Quote this to prevent word splitting. For more information: https://www.shellcheck.net/wiki/SC2046 -- Quote this to prevent word splitt... make[3]: *** [/root/athira/perf-tools-next/tools/perf/tests/Makefile.tests:18: tests/shell/.probe_vfs_getname.sh.shellcheck_log] Error 1 make[2]: *** [Makefile.perf:686: SHELLCHECK_TEST] Error 2 make[2]: *** Waiting for unfinished jobs.... make[1]: *** [Makefile.perf:244: sub-make] Error 2 make: *** [Makefile:70: all] Error 2 Here, like other files which gets created during compilation (ex: .builtin-bench.o.cmd or .perf.o.cmd ), create .shellcheck_log also as a hidden file. Example: tests/shell/.probe_vfs_getname.sh.shellcheck_log shellcheck is re-run if any of the script gets modified based on its dependency of this log file. After this, for testing, changed "tests/shell/trace+probe_vfs_getname.sh" to break shellcheck format. In the next make run, it is also captured: In tests/shell/probe_vfs_getname.sh line 8: . $(dirname $0)/lib/probe.sh ^-----------^ SC2046 (warning): Quote this to prevent word splitting. For more information: https://www.shellcheck.net/wiki/SC2046 -- Quote this to prevent word splitt... make[3]: *** [/root/athira/perf-tools-next/tools/perf/tests/Makefile.tests:18: tests/shell/.probe_vfs_getname.sh.shellcheck_log] Error 1 make[3]: *** Waiting for unfinished jobs.... In tests/shell/trace+probe_vfs_getname.sh line 14: . $(dirname $0)/lib/probe.sh ^-----------^ SC2046 (warning): Quote this to prevent word splitting. For more information: https://www.shellcheck.net/wiki/SC2046 -- Quote this to prevent word splitt... make[3]: *** [/root/athira/perf-tools-next/tools/perf/tests/Makefile.tests:18: tests/shell/.trace+probe_vfs_getname.sh.shellcheck_log] Error 1 make[2]: *** [Makefile.perf:686: SHELLCHECK_TEST] Error 2 make[2]: *** Waiting for unfinished jobs.... make[1]: *** [Makefile.perf:244: sub-make] Error 2 make: *** [Makefile:70: all] Error 2 Failure log can be found in the stdout of make itself. This is reported at build time. To be able to go ahead with the build or disable shellcheck even though it is known that some test is broken, add a "NO_SHELLCHECK" option. Example: make NO_SHELLCHECK=1 INSTALL libsubcmd_headers INSTALL libsymbol_headers INSTALL libapi_headers INSTALL libperf_headers INSTALL libbpf_headers LINK perf Note: This is tested on RHEL and also SLES. Use below check: "$(shell which shellcheck 2> /dev/null)" to look for presence of shellcheck binary. The approach "shell command -v" is not used here. In some of the distros(RHEL), command is available as executable file (/usr/bin/command). But in some distros(SLES), it is a shell builtin and not available as executable file. Committer testing: $ type shellcheck shellcheck is hashed (/usr/bin/shellcheck) $ rpm -qf /usr/bin/shellcheck ShellCheck-0.9.0-2.fc38.x86_64 $ $ alias m $ git diff diff --git a/tools/perf/tests/shell/probe_vfs_getname.sh b/tools/perf/tests/shell/probe_vfs_getname.sh index 554e12e83c55fd56..dbc14634678e2bf6 100755 --- a/tools/perf/tests/shell/probe_vfs_getname.sh +++ b/tools/perf/tests/shell/probe_vfs_getname.sh @@ -5,7 +5,7 @@ # Arnaldo Carvalho de Melo , 2017 # shellcheck source=lib/probe.sh -. "$(dirname $0)"/lib/probe.sh +. $(dirname $0)/lib/probe.sh skip_if_no_perf_probe || exit 2 alias m='rm -rf ~/libexec/perf-core/ ; make -k CORESIGHT=1 O=/tmp/build/$(basename $PWD) -C tools/perf install-bin && perf test python' $ m make: Entering directory '/home/acme/git/perf-tools-next/tools/perf' BUILD: Doing 'make -j32' parallel build INSTALL libbpf_headers In tests/shell/probe_vfs_getname.sh line 8: . $(dirname $0)/lib/probe.sh ^-----------^ SC2046 (warning): Quote this to prevent word splitting. For more information: https://www.shellcheck.net/wiki/SC2046 -- Quote this to prevent word splitt... make[3]: *** [/home/acme/git/perf-tools-next/tools/perf/tests/Makefile.tests:18: tests/shell/.probe_vfs_getname.sh.shellcheck_log] Error 1 make[2]: *** [Makefile.perf:686: SHELLCHECK_TEST] Error 2 make[2]: *** Waiting for unfinished jobs.... make[1]: *** [Makefile.perf:244: sub-make] Error 2 make: *** [Makefile:113: install-bin] Error 2 make: Leaving directory '/home/acme/git/perf-tools-next/tools/perf' $ Reviewed-by: James Clark Signed-off-by: Athira Jajeev Tested-by: Arnaldo Carvalho de Melo Cc: Adrian Hunter Cc: Disha Goel Cc: Ian Rogers Cc: Jiri Olsa Cc: Kajol Jain Cc: Madhavan Srinivasan Cc: Namhyung Kim Cc: linuxppc-dev@lists.ozlabs.org Link: https://lore.kernel.org/r/20231123160232.94253-1-atrajeev@linux.vnet.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile.perf | 21 ++++++++++++++++++++- tools/perf/tests/Makefile.tests | 22 ++++++++++++++++++++++ 2 files changed, 42 insertions(+), 1 deletion(-) create mode 100644 tools/perf/tests/Makefile.tests (limited to 'tools') diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index d80dcaa5a1e3..824cbc0af7d7 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -134,6 +134,8 @@ include ../scripts/utilities.mak # x86 instruction decoder - new instructions test # # Define GEN_VMLINUX_H to generate vmlinux.h from the BTF. +# +# Define NO_SHELLCHECK if you do not want to run shellcheck during build # As per kernel Makefile, avoid funny character set dependencies unexport LC_ALL @@ -671,7 +673,23 @@ $(PERF_IN): prepare FORCE $(PMU_EVENTS_IN): FORCE prepare $(Q)$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=pmu-events obj=pmu-events -$(OUTPUT)perf: $(PERFLIBS) $(PERF_IN) $(PMU_EVENTS_IN) +# Runs shellcheck on perf test shell scripts + +SHELLCHECK := $(shell which shellcheck 2> /dev/null) + +ifeq ($(NO_SHELLCHECK),1) +SHELLCHECK := +endif + +ifneq ($(SHELLCHECK),) +SHELLCHECK_TEST: FORCE prepare + $(Q)$(MAKE) -f $(srctree)/tools/perf/tests/Makefile.tests +else +SHELLCHECK_TEST: + @: +endif + +$(OUTPUT)perf: $(PERFLIBS) $(PERF_IN) $(PMU_EVENTS_IN) SHELLCHECK_TEST $(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) \ $(PERF_IN) $(PMU_EVENTS_IN) $(LIBS) -o $@ @@ -1134,6 +1152,7 @@ bpf-skel-clean: $(call QUIET_CLEAN, bpf-skel) $(RM) -r $(SKEL_TMP_OUT) $(SKELETONS) clean:: $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clean $(LIBSYMBOL)-clean $(LIBPERF)-clean fixdep-clean python-clean bpf-skel-clean tests-coresight-targets-clean + $(Q)$(MAKE) -f $(srctree)/tools/perf/tests/Makefile.tests clean $(call QUIET_CLEAN, core-objs) $(RM) $(LIBPERF_A) $(OUTPUT)perf-archive $(OUTPUT)perf-iostat $(LANG_BINDINGS) $(Q)find $(or $(OUTPUT),.) -name '*.o' -delete -o -name '\.*.cmd' -delete -o -name '\.*.d' -delete $(Q)$(RM) $(OUTPUT).config-detected diff --git a/tools/perf/tests/Makefile.tests b/tools/perf/tests/Makefile.tests new file mode 100644 index 000000000000..fdaca5f7a946 --- /dev/null +++ b/tools/perf/tests/Makefile.tests @@ -0,0 +1,22 @@ +# SPDX-License-Identifier: GPL-2.0 +# Athira Rajeev , 2023 + +PROGS := $(shell find tests/shell -perm -o=x -type f -name '*.sh') +FILE_NAME := $(notdir $(PROGS)) +FILE_NAME := $(FILE_NAME:%=.%) +LOGS := $(join $(dir $(PROGS)),$(FILE_NAME)) +LOGS := $(LOGS:%=%.shellcheck_log) + +.PHONY: all +all: SHELLCHECK_RUN + @: + +SHELLCHECK_RUN: $(LOGS) + +.%.shellcheck_log: % + $(call rule_mkdir) + $(Q)$(call frecho-cmd,test)@shellcheck -S warning "$<" > $@ || (cat $@ && rm $@ && false) + +clean: + $(eval log_files := $(shell find . -name '.*.shellcheck_log')) + @rm -rf $(log_files) -- cgit v1.2.3 From 72324ac52ddddb168d8008e36d6a617b9b74f6c1 Mon Sep 17 00:00:00 2001 From: Ryan Roberts Date: Mon, 27 Nov 2023 11:17:36 +0000 Subject: KVM: selftests: arm64: Determine max ipa size per-page size We are about to add 52 bit PA guest modes for 4K and 16K pages when the system supports LPA2. In preparation beef up the logic that parses mmfr0 to also tell us what the maximum supported PA size is for each page size. Max PA size = 0 implies the page size is not supported at all. Reviewed-by: Oliver Upton Signed-off-by: Ryan Roberts Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20231127111737.1897081-12-ryan.roberts@arm.com --- .../selftests/kvm/include/aarch64/processor.h | 4 +- tools/testing/selftests/kvm/include/guest_modes.h | 4 +- .../testing/selftests/kvm/lib/aarch64/processor.c | 30 +++++++++++--- tools/testing/selftests/kvm/lib/guest_modes.c | 48 ++++++++++------------ 4 files changed, 50 insertions(+), 36 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/kvm/include/aarch64/processor.h b/tools/testing/selftests/kvm/include/aarch64/processor.h index c42d683102c7..cf20e44e86f2 100644 --- a/tools/testing/selftests/kvm/include/aarch64/processor.h +++ b/tools/testing/selftests/kvm/include/aarch64/processor.h @@ -119,8 +119,8 @@ enum { /* Access flag update enable/disable */ #define TCR_EL1_HA (1ULL << 39) -void aarch64_get_supported_page_sizes(uint32_t ipa, - bool *ps4k, bool *ps16k, bool *ps64k); +void aarch64_get_supported_page_sizes(uint32_t ipa, uint32_t *ipa4k, + uint32_t *ipa16k, uint32_t *ipa64k); void vm_init_descriptor_tables(struct kvm_vm *vm); void vcpu_init_descriptor_tables(struct kvm_vcpu *vcpu); diff --git a/tools/testing/selftests/kvm/include/guest_modes.h b/tools/testing/selftests/kvm/include/guest_modes.h index b691df33e64e..63f5167397cc 100644 --- a/tools/testing/selftests/kvm/include/guest_modes.h +++ b/tools/testing/selftests/kvm/include/guest_modes.h @@ -11,8 +11,8 @@ struct guest_mode { extern struct guest_mode guest_modes[NUM_VM_MODES]; -#define guest_mode_append(mode, supported, enabled) ({ \ - guest_modes[mode] = (struct guest_mode){ supported, enabled }; \ +#define guest_mode_append(mode, enabled) ({ \ + guest_modes[mode] = (struct guest_mode){ (enabled), (enabled) }; \ }) void guest_modes_append_default(void); diff --git a/tools/testing/selftests/kvm/lib/aarch64/processor.c b/tools/testing/selftests/kvm/lib/aarch64/processor.c index 6fe12e985ba5..e6ffd9037c37 100644 --- a/tools/testing/selftests/kvm/lib/aarch64/processor.c +++ b/tools/testing/selftests/kvm/lib/aarch64/processor.c @@ -492,12 +492,24 @@ uint32_t guest_get_vcpuid(void) return read_sysreg(tpidr_el1); } -void aarch64_get_supported_page_sizes(uint32_t ipa, - bool *ps4k, bool *ps16k, bool *ps64k) +static uint32_t max_ipa_for_page_size(uint32_t vm_ipa, uint32_t gran, + uint32_t not_sup_val, uint32_t ipa52_min_val) +{ + if (gran == not_sup_val) + return 0; + else if (gran >= ipa52_min_val && vm_ipa >= 52) + return 52; + else + return min(vm_ipa, 48U); +} + +void aarch64_get_supported_page_sizes(uint32_t ipa, uint32_t *ipa4k, + uint32_t *ipa16k, uint32_t *ipa64k) { struct kvm_vcpu_init preferred_init; int kvm_fd, vm_fd, vcpu_fd, err; uint64_t val; + uint32_t gran; struct kvm_one_reg reg = { .id = KVM_ARM64_SYS_REG(SYS_ID_AA64MMFR0_EL1), .addr = (uint64_t)&val, @@ -518,9 +530,17 @@ void aarch64_get_supported_page_sizes(uint32_t ipa, err = ioctl(vcpu_fd, KVM_GET_ONE_REG, ®); TEST_ASSERT(err == 0, KVM_IOCTL_ERROR(KVM_GET_ONE_REG, vcpu_fd)); - *ps4k = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64MMFR0_EL1_TGRAN4), val) != 0xf; - *ps64k = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64MMFR0_EL1_TGRAN64), val) == 0; - *ps16k = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64MMFR0_EL1_TGRAN16), val) != 0; + gran = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64MMFR0_EL1_TGRAN4), val); + *ipa4k = max_ipa_for_page_size(ipa, gran, ID_AA64MMFR0_EL1_TGRAN4_NI, + ID_AA64MMFR0_EL1_TGRAN4_52_BIT); + + gran = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64MMFR0_EL1_TGRAN64), val); + *ipa64k = max_ipa_for_page_size(ipa, gran, ID_AA64MMFR0_EL1_TGRAN64_NI, + ID_AA64MMFR0_EL1_TGRAN64_IMP); + + gran = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64MMFR0_EL1_TGRAN16), val); + *ipa16k = max_ipa_for_page_size(ipa, gran, ID_AA64MMFR0_EL1_TGRAN16_NI, + ID_AA64MMFR0_EL1_TGRAN16_52_BIT); close(vcpu_fd); close(vm_fd); diff --git a/tools/testing/selftests/kvm/lib/guest_modes.c b/tools/testing/selftests/kvm/lib/guest_modes.c index 1df3ce4b16fd..772a7dd15db4 100644 --- a/tools/testing/selftests/kvm/lib/guest_modes.c +++ b/tools/testing/selftests/kvm/lib/guest_modes.c @@ -14,37 +14,31 @@ struct guest_mode guest_modes[NUM_VM_MODES]; void guest_modes_append_default(void) { #ifndef __aarch64__ - guest_mode_append(VM_MODE_DEFAULT, true, true); + guest_mode_append(VM_MODE_DEFAULT, true); #else { unsigned int limit = kvm_check_cap(KVM_CAP_ARM_VM_IPA_SIZE); - bool ps4k, ps16k, ps64k; + uint32_t ipa4k, ipa16k, ipa64k; int i; - aarch64_get_supported_page_sizes(limit, &ps4k, &ps16k, &ps64k); + aarch64_get_supported_page_sizes(limit, &ipa4k, &ipa16k, &ipa64k); - vm_mode_default = NUM_VM_MODES; + guest_mode_append(VM_MODE_P52V48_64K, ipa64k >= 52); - if (limit >= 52) - guest_mode_append(VM_MODE_P52V48_64K, ps64k, ps64k); - if (limit >= 48) { - guest_mode_append(VM_MODE_P48V48_4K, ps4k, ps4k); - guest_mode_append(VM_MODE_P48V48_16K, ps16k, ps16k); - guest_mode_append(VM_MODE_P48V48_64K, ps64k, ps64k); - } - if (limit >= 40) { - guest_mode_append(VM_MODE_P40V48_4K, ps4k, ps4k); - guest_mode_append(VM_MODE_P40V48_16K, ps16k, ps16k); - guest_mode_append(VM_MODE_P40V48_64K, ps64k, ps64k); - if (ps4k) - vm_mode_default = VM_MODE_P40V48_4K; - } - if (limit >= 36) { - guest_mode_append(VM_MODE_P36V48_4K, ps4k, ps4k); - guest_mode_append(VM_MODE_P36V48_16K, ps16k, ps16k); - guest_mode_append(VM_MODE_P36V48_64K, ps64k, ps64k); - guest_mode_append(VM_MODE_P36V47_16K, ps16k, ps16k); - } + guest_mode_append(VM_MODE_P48V48_4K, ipa4k >= 48); + guest_mode_append(VM_MODE_P48V48_16K, ipa16k >= 48); + guest_mode_append(VM_MODE_P48V48_64K, ipa64k >= 48); + + guest_mode_append(VM_MODE_P40V48_4K, ipa4k >= 40); + guest_mode_append(VM_MODE_P40V48_16K, ipa16k >= 40); + guest_mode_append(VM_MODE_P40V48_64K, ipa64k >= 40); + + guest_mode_append(VM_MODE_P36V48_4K, ipa4k >= 36); + guest_mode_append(VM_MODE_P36V48_16K, ipa16k >= 36); + guest_mode_append(VM_MODE_P36V48_64K, ipa64k >= 36); + guest_mode_append(VM_MODE_P36V47_16K, ipa16k >= 36); + + vm_mode_default = ipa4k >= 40 ? VM_MODE_P40V48_4K : NUM_VM_MODES; /* * Pick the first supported IPA size if the default @@ -72,7 +66,7 @@ void guest_modes_append_default(void) close(kvm_fd); /* Starting with z13 we have 47bits of physical address */ if (info.ibc >= 0x30) - guest_mode_append(VM_MODE_P47V64_4K, true, true); + guest_mode_append(VM_MODE_P47V64_4K, true); } #endif #ifdef __riscv @@ -80,9 +74,9 @@ void guest_modes_append_default(void) unsigned int sz = kvm_check_cap(KVM_CAP_VM_GPA_BITS); if (sz >= 52) - guest_mode_append(VM_MODE_P52V48_4K, true, true); + guest_mode_append(VM_MODE_P52V48_4K, true); if (sz >= 48) - guest_mode_append(VM_MODE_P48V48_4K, true, true); + guest_mode_append(VM_MODE_P48V48_4K, true); } #endif } -- cgit v1.2.3 From 10a0cc3b688fcf753ff3f6518bb15e7a6809e908 Mon Sep 17 00:00:00 2001 From: Ryan Roberts Date: Mon, 27 Nov 2023 11:17:37 +0000 Subject: KVM: selftests: arm64: Support P52V48 4K and 16K guest_modes Add support for VM_MODE_P52V48_4K and VM_MODE_P52V48_16K guest modes by using the FEAT_LPA2 pte format for stage1, when FEAT_LPA2 is available. Reviewed-by: Oliver Upton Signed-off-by: Ryan Roberts Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20231127111737.1897081-13-ryan.roberts@arm.com --- .../testing/selftests/kvm/include/kvm_util_base.h | 1 + .../testing/selftests/kvm/lib/aarch64/processor.c | 39 +++++++++++++++++----- tools/testing/selftests/kvm/lib/guest_modes.c | 2 ++ tools/testing/selftests/kvm/lib/kvm_util.c | 3 ++ 4 files changed, 36 insertions(+), 9 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/kvm/include/kvm_util_base.h b/tools/testing/selftests/kvm/include/kvm_util_base.h index a18db6a7b3cf..406500fb6e28 100644 --- a/tools/testing/selftests/kvm/include/kvm_util_base.h +++ b/tools/testing/selftests/kvm/include/kvm_util_base.h @@ -171,6 +171,7 @@ static inline struct userspace_mem_region *vm_get_mem_region(struct kvm_vm *vm, enum vm_guest_mode { VM_MODE_P52V48_4K, + VM_MODE_P52V48_16K, VM_MODE_P52V48_64K, VM_MODE_P48V48_4K, VM_MODE_P48V48_16K, diff --git a/tools/testing/selftests/kvm/lib/aarch64/processor.c b/tools/testing/selftests/kvm/lib/aarch64/processor.c index e6ffd9037c37..41c776b642c0 100644 --- a/tools/testing/selftests/kvm/lib/aarch64/processor.c +++ b/tools/testing/selftests/kvm/lib/aarch64/processor.c @@ -12,6 +12,7 @@ #include "kvm_util.h" #include "processor.h" #include +#include #define DEFAULT_ARM64_GUEST_STACK_VADDR_MIN 0xac0000 @@ -58,13 +59,25 @@ static uint64_t pte_index(struct kvm_vm *vm, vm_vaddr_t gva) return (gva >> vm->page_shift) & mask; } +static inline bool use_lpa2_pte_format(struct kvm_vm *vm) +{ + return (vm->page_size == SZ_4K || vm->page_size == SZ_16K) && + (vm->pa_bits > 48 || vm->va_bits > 48); +} + static uint64_t addr_pte(struct kvm_vm *vm, uint64_t pa, uint64_t attrs) { uint64_t pte; - pte = pa & GENMASK(47, vm->page_shift); - if (vm->page_shift == 16) - pte |= FIELD_GET(GENMASK(51, 48), pa) << 12; + if (use_lpa2_pte_format(vm)) { + pte = pa & GENMASK(49, vm->page_shift); + pte |= FIELD_GET(GENMASK(51, 50), pa) << 8; + attrs &= ~GENMASK(9, 8); + } else { + pte = pa & GENMASK(47, vm->page_shift); + if (vm->page_shift == 16) + pte |= FIELD_GET(GENMASK(51, 48), pa) << 12; + } pte |= attrs; return pte; @@ -74,9 +87,14 @@ static uint64_t pte_addr(struct kvm_vm *vm, uint64_t pte) { uint64_t pa; - pa = pte & GENMASK(47, vm->page_shift); - if (vm->page_shift == 16) - pa |= FIELD_GET(GENMASK(15, 12), pte) << 48; + if (use_lpa2_pte_format(vm)) { + pa = pte & GENMASK(49, vm->page_shift); + pa |= FIELD_GET(GENMASK(9, 8), pte) << 50; + } else { + pa = pte & GENMASK(47, vm->page_shift); + if (vm->page_shift == 16) + pa |= FIELD_GET(GENMASK(15, 12), pte) << 48; + } return pa; } @@ -266,9 +284,6 @@ void aarch64_vcpu_setup(struct kvm_vcpu *vcpu, struct kvm_vcpu_init *init) /* Configure base granule size */ switch (vm->mode) { - case VM_MODE_P52V48_4K: - TEST_FAIL("AArch64 does not support 4K sized pages " - "with 52-bit physical address ranges"); case VM_MODE_PXXV48_4K: TEST_FAIL("AArch64 does not support 4K sized pages " "with ANY-bit physical address ranges"); @@ -278,12 +293,14 @@ void aarch64_vcpu_setup(struct kvm_vcpu *vcpu, struct kvm_vcpu_init *init) case VM_MODE_P36V48_64K: tcr_el1 |= 1ul << 14; /* TG0 = 64KB */ break; + case VM_MODE_P52V48_16K: case VM_MODE_P48V48_16K: case VM_MODE_P40V48_16K: case VM_MODE_P36V48_16K: case VM_MODE_P36V47_16K: tcr_el1 |= 2ul << 14; /* TG0 = 16KB */ break; + case VM_MODE_P52V48_4K: case VM_MODE_P48V48_4K: case VM_MODE_P40V48_4K: case VM_MODE_P36V48_4K: @@ -297,6 +314,8 @@ void aarch64_vcpu_setup(struct kvm_vcpu *vcpu, struct kvm_vcpu_init *init) /* Configure output size */ switch (vm->mode) { + case VM_MODE_P52V48_4K: + case VM_MODE_P52V48_16K: case VM_MODE_P52V48_64K: tcr_el1 |= 6ul << 32; /* IPS = 52 bits */ ttbr0_el1 |= FIELD_GET(GENMASK(51, 48), vm->pgd) << 2; @@ -325,6 +344,8 @@ void aarch64_vcpu_setup(struct kvm_vcpu *vcpu, struct kvm_vcpu_init *init) /* TCR_EL1 |= IRGN0:WBWA | ORGN0:WBWA | SH0:Inner-Shareable */; tcr_el1 |= (1 << 8) | (1 << 10) | (3 << 12); tcr_el1 |= (64 - vm->va_bits) /* T0SZ */; + if (use_lpa2_pte_format(vm)) + tcr_el1 |= (1ul << 59) /* DS */; vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_SCTLR_EL1), sctlr_el1); vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_TCR_EL1), tcr_el1); diff --git a/tools/testing/selftests/kvm/lib/guest_modes.c b/tools/testing/selftests/kvm/lib/guest_modes.c index 772a7dd15db4..b04901e55138 100644 --- a/tools/testing/selftests/kvm/lib/guest_modes.c +++ b/tools/testing/selftests/kvm/lib/guest_modes.c @@ -23,6 +23,8 @@ void guest_modes_append_default(void) aarch64_get_supported_page_sizes(limit, &ipa4k, &ipa16k, &ipa64k); + guest_mode_append(VM_MODE_P52V48_4K, ipa4k >= 52); + guest_mode_append(VM_MODE_P52V48_16K, ipa16k >= 52); guest_mode_append(VM_MODE_P52V48_64K, ipa64k >= 52); guest_mode_append(VM_MODE_P48V48_4K, ipa4k >= 48); diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index 7a8af1821f5d..aeba7a23105c 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -148,6 +148,7 @@ const char *vm_guest_mode_string(uint32_t i) { static const char * const strings[] = { [VM_MODE_P52V48_4K] = "PA-bits:52, VA-bits:48, 4K pages", + [VM_MODE_P52V48_16K] = "PA-bits:52, VA-bits:48, 16K pages", [VM_MODE_P52V48_64K] = "PA-bits:52, VA-bits:48, 64K pages", [VM_MODE_P48V48_4K] = "PA-bits:48, VA-bits:48, 4K pages", [VM_MODE_P48V48_16K] = "PA-bits:48, VA-bits:48, 16K pages", @@ -173,6 +174,7 @@ const char *vm_guest_mode_string(uint32_t i) const struct vm_guest_mode_params vm_guest_mode_params[] = { [VM_MODE_P52V48_4K] = { 52, 48, 0x1000, 12 }, + [VM_MODE_P52V48_16K] = { 52, 48, 0x4000, 14 }, [VM_MODE_P52V48_64K] = { 52, 48, 0x10000, 16 }, [VM_MODE_P48V48_4K] = { 48, 48, 0x1000, 12 }, [VM_MODE_P48V48_16K] = { 48, 48, 0x4000, 14 }, @@ -251,6 +253,7 @@ struct kvm_vm *____vm_create(enum vm_guest_mode mode) case VM_MODE_P36V48_64K: vm->pgtable_levels = 3; break; + case VM_MODE_P52V48_16K: case VM_MODE_P48V48_16K: case VM_MODE_P40V48_16K: case VM_MODE_P36V48_16K: -- cgit v1.2.3 From 2112aa034907c428785e1a5730927181276ee45b Mon Sep 17 00:00:00 2001 From: Jaroslav Kysela Date: Fri, 17 Nov 2023 13:05:55 +0100 Subject: ALSA: pcm: Introduce MSBITS subformat interface Improve granularity of format selection for S32/U32 formats by adding constants representing 20, 24 and MAX most significant bits. The MAX means the maximum number of significant bits which can the physical format hold. For 32-bit formats, MAX is related to 32 bits. For 8-bit formats, MAX is related to 8 bits etc. As there is only one user currently (format S32_LE), subformat is represented by a simple u32 and stores flags only for that one user alone. The approach of subformat being part of struct snd_pcm_hardware is a compromise between ALSA and ASoC allowing for hw_params-intersection code to be alloc/free-less while not adding any new responsibilities to ASoC runtime structures. Acked-by: Mark Brown Signed-off-by: Jaroslav Kysela Co-developed-by: Cezary Rojewski Signed-off-by: Cezary Rojewski Link: https://lore.kernel.org/r/20231117120610.1755254-2-cezary.rojewski@intel.com Signed-off-by: Takashi Iwai --- include/sound/pcm.h | 7 +++++ include/uapi/sound/asound.h | 7 +++-- sound/core/pcm.c | 3 +++ sound/core/pcm_native.c | 55 +++++++++++++++++++++++++++++++++++++-- tools/include/uapi/sound/asound.h | 7 +++-- 5 files changed, 73 insertions(+), 6 deletions(-) (limited to 'tools') diff --git a/include/sound/pcm.h b/include/sound/pcm.h index 2a815373dac1..cc175c623dac 100644 --- a/include/sound/pcm.h +++ b/include/sound/pcm.h @@ -32,6 +32,7 @@ struct snd_pcm_hardware { unsigned int info; /* SNDRV_PCM_INFO_* */ u64 formats; /* SNDRV_PCM_FMTBIT_* */ + u32 subformats; /* for S32_LE, SNDRV_PCM_SUBFMTBIT_* */ unsigned int rates; /* SNDRV_PCM_RATE_* */ unsigned int rate_min; /* min rate */ unsigned int rate_max; /* max rate */ @@ -217,6 +218,12 @@ struct snd_pcm_ops { #define SNDRV_PCM_FMTBIT_U20 SNDRV_PCM_FMTBIT_U20_BE #endif +#define _SNDRV_PCM_SUBFMTBIT(fmt) BIT((__force int)SNDRV_PCM_SUBFORMAT_##fmt) +#define SNDRV_PCM_SUBFMTBIT_STD _SNDRV_PCM_SUBFMTBIT(STD) +#define SNDRV_PCM_SUBFMTBIT_MSBITS_MAX _SNDRV_PCM_SUBFMTBIT(MSBITS_MAX) +#define SNDRV_PCM_SUBFMTBIT_MSBITS_20 _SNDRV_PCM_SUBFMTBIT(MSBITS_20) +#define SNDRV_PCM_SUBFMTBIT_MSBITS_24 _SNDRV_PCM_SUBFMTBIT(MSBITS_24) + struct snd_pcm_file { struct snd_pcm_substream *substream; int no_compat_mmap; diff --git a/include/uapi/sound/asound.h b/include/uapi/sound/asound.h index f9939da41122..d5b9cfbd9cea 100644 --- a/include/uapi/sound/asound.h +++ b/include/uapi/sound/asound.h @@ -142,7 +142,7 @@ struct snd_hwdep_dsp_image { * * *****************************************************************************/ -#define SNDRV_PCM_VERSION SNDRV_PROTOCOL_VERSION(2, 0, 15) +#define SNDRV_PCM_VERSION SNDRV_PROTOCOL_VERSION(2, 0, 16) typedef unsigned long snd_pcm_uframes_t; typedef signed long snd_pcm_sframes_t; @@ -267,7 +267,10 @@ typedef int __bitwise snd_pcm_format_t; typedef int __bitwise snd_pcm_subformat_t; #define SNDRV_PCM_SUBFORMAT_STD ((__force snd_pcm_subformat_t) 0) -#define SNDRV_PCM_SUBFORMAT_LAST SNDRV_PCM_SUBFORMAT_STD +#define SNDRV_PCM_SUBFORMAT_MSBITS_MAX ((__force snd_pcm_subformat_t) 1) +#define SNDRV_PCM_SUBFORMAT_MSBITS_20 ((__force snd_pcm_subformat_t) 2) +#define SNDRV_PCM_SUBFORMAT_MSBITS_24 ((__force snd_pcm_subformat_t) 3) +#define SNDRV_PCM_SUBFORMAT_LAST SNDRV_PCM_SUBFORMAT_MSBITS_24 #define SNDRV_PCM_INFO_MMAP 0x00000001 /* hardware supports mmap */ #define SNDRV_PCM_INFO_MMAP_VALID 0x00000002 /* period data are valid during transfer */ diff --git a/sound/core/pcm.c b/sound/core/pcm.c index 20bb2d7c8d4b..c4bc15f048b6 100644 --- a/sound/core/pcm.c +++ b/sound/core/pcm.c @@ -265,6 +265,9 @@ static const char * const snd_pcm_access_names[] = { static const char * const snd_pcm_subformat_names[] = { SUBFORMAT(STD), + SUBFORMAT(MSBITS_MAX), + SUBFORMAT(MSBITS_20), + SUBFORMAT(MSBITS_24), }; static const char * const snd_pcm_tstamp_mode_names[] = { diff --git a/sound/core/pcm_native.c b/sound/core/pcm_native.c index f610b08f5a2b..f5ff00f99788 100644 --- a/sound/core/pcm_native.c +++ b/sound/core/pcm_native.c @@ -479,6 +479,7 @@ static int fixup_unreferenced_params(struct snd_pcm_substream *substream, { const struct snd_interval *i; const struct snd_mask *m; + struct snd_mask *m_rw; int err; if (!params->msbits) { @@ -487,6 +488,22 @@ static int fixup_unreferenced_params(struct snd_pcm_substream *substream, params->msbits = snd_interval_value(i); } + if (params->msbits) { + m = hw_param_mask_c(params, SNDRV_PCM_HW_PARAM_FORMAT); + if (snd_mask_single(m)) { + snd_pcm_format_t format = (__force snd_pcm_format_t)snd_mask_min(m); + + if (snd_pcm_format_linear(format) && + snd_pcm_format_width(format) != params->msbits) { + m_rw = hw_param_mask(params, SNDRV_PCM_HW_PARAM_SUBFORMAT); + snd_mask_reset(m_rw, + (__force unsigned)SNDRV_PCM_SUBFORMAT_MSBITS_MAX); + if (snd_mask_empty(m_rw)) + return -EINVAL; + } + } + } + if (!params->rate_den) { i = hw_param_interval_c(params, SNDRV_PCM_HW_PARAM_RATE); if (snd_interval_single(i)) { @@ -2483,6 +2500,41 @@ static int snd_pcm_hw_rule_buffer_bytes_max(struct snd_pcm_hw_params *params, return snd_interval_refine(hw_param_interval(params, rule->var), &t); } +static int snd_pcm_hw_rule_subformats(struct snd_pcm_hw_params *params, + struct snd_pcm_hw_rule *rule) +{ + struct snd_mask *sfmask = hw_param_mask(params, SNDRV_PCM_HW_PARAM_SUBFORMAT); + struct snd_mask *fmask = hw_param_mask(params, SNDRV_PCM_HW_PARAM_FORMAT); + u32 *subformats = rule->private; + snd_pcm_format_t f; + struct snd_mask m; + + snd_mask_none(&m); + /* All PCMs support at least the default STD subformat. */ + snd_mask_set(&m, (__force unsigned)SNDRV_PCM_SUBFORMAT_STD); + + pcm_for_each_format(f) { + if (!snd_mask_test(fmask, (__force unsigned)f)) + continue; + + if (f == SNDRV_PCM_FORMAT_S32_LE && *subformats) + m.bits[0] |= *subformats; + else if (snd_pcm_format_linear(f)) + snd_mask_set(&m, (__force unsigned)SNDRV_PCM_SUBFORMAT_MSBITS_MAX); + } + + return snd_mask_refine(sfmask, &m); +} + +static int snd_pcm_hw_constraint_subformats(struct snd_pcm_runtime *runtime, + unsigned int cond, u32 *subformats) +{ + return snd_pcm_hw_rule_add(runtime, cond, -1, + snd_pcm_hw_rule_subformats, (void *)subformats, + SNDRV_PCM_HW_PARAM_SUBFORMAT, + SNDRV_PCM_HW_PARAM_FORMAT, -1); +} + static int snd_pcm_hw_constraints_init(struct snd_pcm_substream *substream) { struct snd_pcm_runtime *runtime = substream->runtime; @@ -2634,8 +2686,7 @@ static int snd_pcm_hw_constraints_complete(struct snd_pcm_substream *substream) if (err < 0) return err; - err = snd_pcm_hw_constraint_mask(runtime, SNDRV_PCM_HW_PARAM_SUBFORMAT, - PARAM_MASK_BIT(SNDRV_PCM_SUBFORMAT_STD)); + err = snd_pcm_hw_constraint_subformats(runtime, 0, &hw->subformats); if (err < 0) return err; diff --git a/tools/include/uapi/sound/asound.h b/tools/include/uapi/sound/asound.h index f9939da41122..d5b9cfbd9cea 100644 --- a/tools/include/uapi/sound/asound.h +++ b/tools/include/uapi/sound/asound.h @@ -142,7 +142,7 @@ struct snd_hwdep_dsp_image { * * *****************************************************************************/ -#define SNDRV_PCM_VERSION SNDRV_PROTOCOL_VERSION(2, 0, 15) +#define SNDRV_PCM_VERSION SNDRV_PROTOCOL_VERSION(2, 0, 16) typedef unsigned long snd_pcm_uframes_t; typedef signed long snd_pcm_sframes_t; @@ -267,7 +267,10 @@ typedef int __bitwise snd_pcm_format_t; typedef int __bitwise snd_pcm_subformat_t; #define SNDRV_PCM_SUBFORMAT_STD ((__force snd_pcm_subformat_t) 0) -#define SNDRV_PCM_SUBFORMAT_LAST SNDRV_PCM_SUBFORMAT_STD +#define SNDRV_PCM_SUBFORMAT_MSBITS_MAX ((__force snd_pcm_subformat_t) 1) +#define SNDRV_PCM_SUBFORMAT_MSBITS_20 ((__force snd_pcm_subformat_t) 2) +#define SNDRV_PCM_SUBFORMAT_MSBITS_24 ((__force snd_pcm_subformat_t) 3) +#define SNDRV_PCM_SUBFORMAT_LAST SNDRV_PCM_SUBFORMAT_MSBITS_24 #define SNDRV_PCM_INFO_MMAP 0x00000001 /* hardware supports mmap */ #define SNDRV_PCM_INFO_MMAP_VALID 0x00000002 /* period data are valid during transfer */ -- cgit v1.2.3 From 76cab6f4fd63129e852eb15e14d6d359b57e797f Mon Sep 17 00:00:00 2001 From: Yi Zhang Date: Mon, 27 Nov 2023 12:00:26 +0800 Subject: ndtest: fix typo class_regster -> class_register Fixes: dd6cad2dcb58 ("testing: nvdimm: make struct class structures constant") Signed-off-by: Yi Zhang Reviewed-by: Dave Jiang Link: https://lore.kernel.org/r/20231127040026.362729-1-yi.zhang@redhat.com Signed-off-by: Greg Kroah-Hartman --- tools/testing/nvdimm/test/ndtest.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/nvdimm/test/ndtest.c b/tools/testing/nvdimm/test/ndtest.c index fd26189d53be..b8419f460368 100644 --- a/tools/testing/nvdimm/test/ndtest.c +++ b/tools/testing/nvdimm/test/ndtest.c @@ -924,7 +924,7 @@ static __init int ndtest_init(void) nfit_test_setup(ndtest_resource_lookup, NULL); - rc = class_regster(&ndtest_dimm_class); + rc = class_register(&ndtest_dimm_class); if (rc) goto err_register; -- cgit v1.2.3 From 8aa1e6e29a21f6bb99dcaa64d11e97a21f0f9dc1 Mon Sep 17 00:00:00 2001 From: Thomas Richter Date: Wed, 22 Nov 2023 10:27:03 +0100 Subject: perf report: Remove warning on missing raw data for s390 Command # ./perf report -i /tmp/111 -D > /dev/null emits an error message when a sample for event CRYPTO_ALL in the perf.data file does not contain any raw data. This is ok. Do not trigger this warning when the sample in the perf.data files does not contain any raw data at all. Check for availability of raw data for all events and return if none is available. Output before: # ./perf report -i /tmp/111 -D > /dev/null Invalid CRYPTO_ALL raw data encountered Invalid CRYPTO_ALL raw data encountered Invalid CRYPTO_ALL raw data encountered # Output after: # ./perf report -i /tmp/111 -D > /dev/null # Fixes: b539deafbadb2fc6 ("perf report: Add s390 raw data interpretation for PAI counters") Signed-off-by: Thomas Richter Acked-by: Namhyung Kim Acked-by: Sumanth Korikkar Cc: Heiko Carstens Cc: Sven Schnelle Cc: Thomas Richter Cc: Vasily Gorbik Link: https://lore.kernel.org/r/20231122092703.3163191-1-tmricht@linux.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/s390-sample-raw.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/s390-sample-raw.c b/tools/perf/util/s390-sample-raw.c index 29a744eeb71e..53383e97ec9d 100644 --- a/tools/perf/util/s390-sample-raw.c +++ b/tools/perf/util/s390-sample-raw.c @@ -51,8 +51,6 @@ static bool s390_cpumcfdg_testctr(struct perf_sample *sample) struct cf_trailer_entry *te; struct cf_ctrset_entry *cep, ce; - if (!len) - return false; while (offset < len) { cep = (struct cf_ctrset_entry *)(buf + offset); ce.def = be16_to_cpu(cep->def); @@ -234,10 +232,9 @@ struct pai_data { /* Event number and value */ */ static bool s390_pai_all_test(struct perf_sample *sample) { - unsigned char *buf = sample->raw_data; size_t len = sample->raw_size; - if (len < 0xa || !buf) + if (len < 0xa) return false; return true; } @@ -299,6 +296,10 @@ void evlist__s390_sample_raw(struct evlist *evlist, union perf_event *event, if (!evsel) return; + /* Check for raw data in sample */ + if (!sample->raw_size || !sample->raw_data) + return; + /* Display raw data on screen */ if (evsel->core.attr.config == PERF_EVENT_CPUM_CF_DIAG) { if (!evsel->pmu) -- cgit v1.2.3 From 70df07838fc1c0acfab3325ae79014e241a88bdf Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Thu, 23 Nov 2023 09:58:41 +0200 Subject: perf header: Fix segfault on build_mem_topology() error path Do not increase the node count unless a node has been successfully read, because it can lead to a segfault if an error occurs. For example, if perf exceeds the open file limit in memory_node__read(), which, on a test system, could be made to happen by setting the file limit to exactly 32: Before: $ ulimit -n 32 $ perf mem record --all-user -- sleep 1 [ perf record: Woken up 1 times to write data ] failed: can't open memory sysfs data perf: Segmentation fault Obtained 14 stack frames. perf(sighandler_dump_stack+0x48) [0x55f4b1f59558] /lib/x86_64-linux-gnu/libc.so.6(+0x42520) [0x7f4ba1c42520] /lib/x86_64-linux-gnu/libc.so.6(free+0x1e) [0x7f4ba1ca53fe] perf(+0x178ff4) [0x55f4b1f48ff4] perf(+0x179a70) [0x55f4b1f49a70] perf(+0x17ef5d) [0x55f4b1f4ef5d] perf(+0x85c0b) [0x55f4b1e55c0b] perf(cmd_record+0xe1d) [0x55f4b1e5920d] perf(cmd_mem+0xc96) [0x55f4b1e80e56] perf(+0x130460) [0x55f4b1f00460] perf(main+0x689) [0x55f4b1e427d9] /lib/x86_64-linux-gnu/libc.so.6(+0x29d90) [0x7f4ba1c29d90] /lib/x86_64-linux-gnu/libc.so.6(__libc_start_main+0x80) [0x7f4ba1c29e40] perf(_start+0x25) [0x55f4b1e42a25] Segmentation fault (core dumped) $ After: $ ulimit -n 32 $ perf mem record --all-user -- sleep 1 [ perf record: Woken up 1 times to write data ] failed: can't open memory sysfs data [ perf record: Captured and wrote 0.005 MB perf.data (11 samples) ] $ Fixes: f8e502b9d1b3b197 ("perf header: Ensure bitmaps are freed") Signed-off-by: Adrian Hunter Acked-by: Ian Rogers Cc: German Gomez Cc: Ian Rogers Cc: James Clark Cc: Jiri Olsa Cc: Leo Yan Cc: Namhyung Kim Link: https://lore.kernel.org/r/20231123075848.9652-2-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/header.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 1c687b5789c0..08cc2febabde 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -1444,7 +1444,9 @@ static int build_mem_topology(struct memory_node **nodesp, u64 *cntp) nodes = new_nodes; size += 4; } - ret = memory_node__read(&nodes[cnt++], idx); + ret = memory_node__read(&nodes[cnt], idx); + if (!ret) + cnt += 1; } out: closedir(dir); -- cgit v1.2.3 From 96ba5999e8d86138cea90422f8c00309a7eedd3b Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Thu, 23 Nov 2023 09:58:42 +0200 Subject: perf tests lib: Add perf_has_symbol.sh Some shell tests depend on finding symbols for perf itself, and fail if perf has been stripped and no debug object is available. Add helper functions to check if perf has a needed symbol. This is preparation for amending the tests themselves to be skipped if a needed symbol is not found. The functions make use of the "Symbols" test which reads and checks symbols from a dso, perf itself by default. Note the "Symbols" test will find symbols using the same method as other perf tests, including, for example, looking in the buildid cache. An alternative would be to prevent the needed symbols from being stripped, which seems to work with gcc's externally_visible attribute, but that attribute is not supported by clang. Another alternative would be to use option -Wl,-E (which is already used when perf is built with perl support) which causes the linker to add all (global) symbols to the dynamic symbol table. Then the required symbols need only be made global in scope to avoid being strippable. However that goes beyond what is needed. Signed-off-by: Adrian Hunter Acked-by: Ian Rogers Cc: German Gomez Cc: James Clark Cc: Jiri Olsa Cc: Leo Yan Cc: Namhyung Kim Link: https://lore.kernel.org/r/20231123075848.9652-3-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/shell/lib/perf_has_symbol.sh | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) create mode 100644 tools/perf/tests/shell/lib/perf_has_symbol.sh (limited to 'tools') diff --git a/tools/perf/tests/shell/lib/perf_has_symbol.sh b/tools/perf/tests/shell/lib/perf_has_symbol.sh new file mode 100644 index 000000000000..5d59c32ae3e7 --- /dev/null +++ b/tools/perf/tests/shell/lib/perf_has_symbol.sh @@ -0,0 +1,21 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 + +perf_has_symbol() +{ + if perf test -vv "Symbols" 2>&1 | grep "[[:space:]]$1$"; then + echo "perf does have symbol '$1'" + return 0 + fi + echo "perf does not have symbol '$1'" + return 1 +} + +skip_test_missing_symbol() +{ + if ! perf_has_symbol "$1" ; then + echo "perf is missing symbols - skipping test" + exit 2 + fi + return 0 +} -- cgit v1.2.3 From c9526a735082bba57da322332cbcef1bbdff5698 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Thu, 23 Nov 2023 09:58:43 +0200 Subject: perf tests: Skip pipe test if noploop symbol is missing perf pipe recording and injection test depends on finding symbol noploop in perf, and fails if perf has been stripped and no debug object is available. In that case, skip the test instead. Example: Before: $ strip tools/perf/perf $ tools/perf/perf buildid-cache -p `realpath tools/perf/perf` $ tools/perf/perf test -v pipe 86: perf pipe recording and injection test : --- start --- test child forked, pid 47734 [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.000 MB - ] 47741 47741 -1 |perf [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.000 MB - ] cannot find noploop function in pipe #1 test child finished with -1 ---- end ---- perf pipe recording and injection test: FAILED! After: $ tools/perf/perf test -v pipe 86: perf pipe recording and injection test : --- start --- test child forked, pid 48996 perf does not have symbol 'noploop' perf is missing symbols - skipping test test child finished with -2 ---- end ---- perf pipe recording and injection test: Skip Signed-off-by: Adrian Hunter Acked-by: Ian Rogers Cc: German Gomez Cc: James Clark Cc: Jiri Olsa Cc: Leo Yan Cc: Namhyung Kim Link: https://lore.kernel.org/r/20231123075848.9652-4-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/shell/pipe_test.sh | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/tests/shell/pipe_test.sh b/tools/perf/tests/shell/pipe_test.sh index 8dd115dd35a7..a78d35d2cff0 100755 --- a/tools/perf/tests/shell/pipe_test.sh +++ b/tools/perf/tests/shell/pipe_test.sh @@ -2,10 +2,17 @@ # perf pipe recording and injection test # SPDX-License-Identifier: GPL-2.0 +shelldir=$(dirname "$0") +# shellcheck source=lib/perf_has_symbol.sh +. "${shelldir}"/lib/perf_has_symbol.sh + +sym="noploop" + +skip_test_missing_symbol ${sym} + data=$(mktemp /tmp/perf.data.XXXXXX) prog="perf test -w noploop" task="perf" -sym="noploop" if ! perf record -e task-clock:u -o - ${prog} | perf report -i - --task | grep ${task}; then echo "cannot find the test file in the perf report" -- cgit v1.2.3 From 3c489dbe69c155c86c4460491d11520cf8ec3637 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Thu, 23 Nov 2023 09:58:44 +0200 Subject: perf tests: Skip record test if test_loop symbol is missing perf record test depends on finding symbol test_loop in perf, and fails if perf has been stripped and no debug object is available. In that case, skip the test instead. Example: Note, building with perl support adds option -Wl,-E which causes the linker to add all (global) symbols to the dynamic symbol table. So the test_loop symbol, being global, does not get stripped unless NO_LIBPERL=1 Before: $ make NO_LIBPERL=1 -C tools/perf >/dev/null 2>&1 $ strip tools/perf/perf $ tools/perf/perf buildid-cache -p `realpath tools/perf/perf` $ tools/perf/perf test -v 'record tests' 91: perf record tests : --- start --- test child forked, pid 118750 Basic --per-thread mode test Per-thread record [Failed missing output] Register capture test Register capture test [Success] Basic --system-wide mode test System-wide record [Skipped not supported] Basic target workload test Workload record [Failed missing output] test child finished with -1 ---- end ---- perf record tests: FAILED! After: $ tools/perf/perf test -v 'record tests' 91: perf record tests : --- start --- test child forked, pid 120025 perf does not have symbol 'test_loop' perf is missing symbols - skipping test test child finished with -2 ---- end ---- perf record tests: Skip Signed-off-by: Adrian Hunter Acked-by: Ian Rogers Cc: German Gomez Cc: James Clark Cc: Jiri Olsa Cc: Leo Yan Cc: Namhyung Kim Link: https://lore.kernel.org/r/20231123075848.9652-5-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/shell/record.sh | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/tests/shell/record.sh b/tools/perf/tests/shell/record.sh index c74412c68e65..3d1a7759a7b2 100755 --- a/tools/perf/tests/shell/record.sh +++ b/tools/perf/tests/shell/record.sh @@ -8,10 +8,16 @@ shelldir=$(dirname "$0") # shellcheck source=lib/waiting.sh . "${shelldir}"/lib/waiting.sh +# shellcheck source=lib/perf_has_symbol.sh +. "${shelldir}"/lib/perf_has_symbol.sh + +testsym="test_loop" + +skip_test_missing_symbol ${testsym} + err=0 perfdata=$(mktemp /tmp/__perf_test.perf.data.XXXXX) testprog="perf test -w thloop" -testsym="test_loop" cpu_pmu_dir="/sys/bus/event_source/devices/cpu*" br_cntr_file="/caps/branch_counter_nr" br_cntr_output="branch stack counters" -- cgit v1.2.3 From fc1de29a8b8ad46b590b2d389b53b4ecf9758273 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Thu, 23 Nov 2023 09:58:45 +0200 Subject: perf tests: Skip Arm64 callgraphs test if leafloop symbol is missing The test "Check Arm64 callgraphs are complete in fp mode" depends on finding symbol leafloop in perf, and fails if perf has been stripped and no debug object is available. In that case, skip the test instead. Signed-off-by: Adrian Hunter Acked-by: Ian Rogers Cc: German Gomez Cc: James Clark Cc: Jiri Olsa Cc: Leo Yan Cc: Namhyung Kim Link: https://lore.kernel.org/r/20231123075848.9652-6-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/shell/test_arm_callgraph_fp.sh | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'tools') diff --git a/tools/perf/tests/shell/test_arm_callgraph_fp.sh b/tools/perf/tests/shell/test_arm_callgraph_fp.sh index 66dfdfdad553..e342e6c8aa50 100755 --- a/tools/perf/tests/shell/test_arm_callgraph_fp.sh +++ b/tools/perf/tests/shell/test_arm_callgraph_fp.sh @@ -2,8 +2,14 @@ # Check Arm64 callgraphs are complete in fp mode # SPDX-License-Identifier: GPL-2.0 +shelldir=$(dirname "$0") +# shellcheck source=lib/perf_has_symbol.sh +. "${shelldir}"/lib/perf_has_symbol.sh + lscpu | grep -q "aarch64" || exit 2 +skip_test_missing_symbol leafloop + PERF_DATA=$(mktemp /tmp/__perf_test.perf.data.XXXXX) TEST_PROGRAM="perf test -w leafloop" -- cgit v1.2.3 From fcfb5a6189f55669c931dce9fec85280655c515f Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Thu, 23 Nov 2023 09:58:46 +0200 Subject: perf tests: Skip branch stack sampling test if brstack_bench symbol is missing The test "Check branch stack sampling" depends on finding symbol brstack_bench (and several others) in perf, and fails if perf has been stripped and no debug object is available. In that case, skip the test instead. Example: Before: $ strip tools/perf/perf $ tools/perf/perf buildid-cache -p `realpath tools/perf/perf` $ tools/perf/perf test -v 'branch stack sampling' 112: Check branch stack sampling : --- start --- test child forked, pid 123741 Testing user branch stack sampling + grep -E -m1 ^brstack_bench\+[^ ]*/brstack_foo\+[^ ]*/IND_CALL/.*$ /tmp/__perf_test.program.5Dz1U/perf.script + cleanup + rm -rf /tmp/__perf_test.program.5Dz1U test child finished with -1 ---- end ---- Check branch stack sampling: FAILED! After: $ tools/perf/perf test -v 'branch stack sampling' 112: Check branch stack sampling : --- start --- test child forked, pid 125157 perf does not have symbol 'brstack_bench' perf is missing symbols - skipping test test child finished with -2 ---- end ---- Check branch stack sampling: Skip Signed-off-by: Adrian Hunter Acked-by: Ian Rogers Cc: German Gomez Cc: James Clark Cc: Jiri Olsa Cc: Leo Yan Cc: Namhyung Kim Link: https://lore.kernel.org/r/20231123075848.9652-7-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/shell/test_brstack.sh | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'tools') diff --git a/tools/perf/tests/shell/test_brstack.sh b/tools/perf/tests/shell/test_brstack.sh index 09908d71c994..5f14d0cb013f 100755 --- a/tools/perf/tests/shell/test_brstack.sh +++ b/tools/perf/tests/shell/test_brstack.sh @@ -4,6 +4,10 @@ # SPDX-License-Identifier: GPL-2.0 # German Gomez , 2022 +shelldir=$(dirname "$0") +# shellcheck source=lib/perf_has_symbol.sh +. "${shelldir}"/lib/perf_has_symbol.sh + # skip the test if the hardware doesn't support branch stack sampling # and if the architecture doesn't support filter types: any,save_type,u if ! perf record -o- --no-buildid --branch-filter any,save_type,u -- true > /dev/null 2>&1 ; then @@ -11,6 +15,8 @@ if ! perf record -o- --no-buildid --branch-filter any,save_type,u -- true > /dev exit 2 fi +skip_test_missing_symbol brstack_bench + TMPDIR=$(mktemp -d /tmp/__perf_test.program.XXXXX) TESTPROG="perf test -w brstack" -- cgit v1.2.3 From 3b24b15cf6fb2dbe1d009a52c9ddcb7721503d8f Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Thu, 23 Nov 2023 09:58:47 +0200 Subject: perf tests: Make data symbol test wait for perf to start The perf data symbol test waits 1 second for perf to run and collect data, which may be too little if perf takes a long time to start up, which has been noticed on systems with many CPUs. Use existing wait_for_perf_to_start helper to wait for perf to start. Signed-off-by: Adrian Hunter Acked-by: Ian Rogers Cc: German Gomez Cc: James Clark Cc: Jiri Olsa Cc: Leo Yan Cc: Namhyung Kim Link: https://lore.kernel.org/r/20231123075848.9652-8-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/shell/test_data_symbol.sh | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/perf/tests/shell/test_data_symbol.sh b/tools/perf/tests/shell/test_data_symbol.sh index 69bb6fe86c50..e50e54e94f6f 100755 --- a/tools/perf/tests/shell/test_data_symbol.sh +++ b/tools/perf/tests/shell/test_data_symbol.sh @@ -4,6 +4,10 @@ # SPDX-License-Identifier: GPL-2.0 # Leo Yan , 2022 +shelldir=$(dirname "$0") +# shellcheck source=lib/waiting.sh +. "${shelldir}"/lib/waiting.sh + skip_if_no_mem_event() { perf mem record -e list 2>&1 | grep -E -q 'available' && return 0 return 2 @@ -13,6 +17,7 @@ skip_if_no_mem_event || exit 2 TEST_PROGRAM="perf test -w datasym" PERF_DATA=$(mktemp /tmp/__perf_test.perf.data.XXXXX) +ERR_FILE=$(mktemp /tmp/__perf_test.stderr.XXXXX) check_result() { # The memory report format is as below: @@ -50,13 +55,15 @@ echo "Recording workload..." # specific CPU and test in per-CPU mode. is_amd=$(grep -E -c 'vendor_id.*AuthenticAMD' /proc/cpuinfo) if (($is_amd >= 1)); then - perf mem record -o ${PERF_DATA} -C 0 -- taskset -c 0 $TEST_PROGRAM & + perf mem record -vvv -o ${PERF_DATA} -C 0 -- taskset -c 0 $TEST_PROGRAM 2>"${ERR_FILE}" & else - perf mem record --all-user -o ${PERF_DATA} -- $TEST_PROGRAM & + perf mem record -vvv --all-user -o ${PERF_DATA} -- $TEST_PROGRAM 2>"${ERR_FILE}" & fi PERFPID=$! +wait_for_perf_to_start ${PERFPID} "${ERR_FILE}" + sleep 1 kill $PERFPID -- cgit v1.2.3 From 124bf6360ad8fe9267017d10b5dd465d4af73247 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Thu, 23 Nov 2023 09:58:48 +0200 Subject: perf tests: Skip data symbol test if buf1 symbol is missing perf data symbol test depends on finding symbol buf1 in perf, and fails if perf has been stripped and no debug object is available. In that case, skip the test instead. Example: Before: $ strip tools/perf/perf $ tools/perf/perf buildid-cache -p `realpath tools/perf/perf` $ tools/perf/perf test -v 'data symbol' 113: Test data symbol : --- start --- test child forked, pid 125646 Recording workload... [ perf record: Woken up 3 times to write data ] [ perf record: Captured and wrote 0.577 MB /tmp/__perf_test.perf.data.Jhbdp (7794 samples) ] Cleaning up files... test child finished with -1 ---- end ---- Test data symbol: FAILED! After: $ tools/perf/perf test -v 'data symbol' 113: Test data symbol : --- start --- test child forked, pid 125747 perf does not have symbol 'buf1' perf is missing symbols - skipping test test child finished with -2 ---- end ---- Test data symbol: Skip Signed-off-by: Adrian Hunter Acked-by: Ian Rogers Cc: German Gomez Cc: James Clark Cc: Jiri Olsa Cc: Leo Yan Cc: Namhyung Kim Link: https://lore.kernel.org/r/20231123075848.9652-9-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/shell/test_data_symbol.sh | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'tools') diff --git a/tools/perf/tests/shell/test_data_symbol.sh b/tools/perf/tests/shell/test_data_symbol.sh index e50e54e94f6f..3dfa91832aa8 100755 --- a/tools/perf/tests/shell/test_data_symbol.sh +++ b/tools/perf/tests/shell/test_data_symbol.sh @@ -8,6 +8,9 @@ shelldir=$(dirname "$0") # shellcheck source=lib/waiting.sh . "${shelldir}"/lib/waiting.sh +# shellcheck source=lib/perf_has_symbol.sh +. "${shelldir}"/lib/perf_has_symbol.sh + skip_if_no_mem_event() { perf mem record -e list 2>&1 | grep -E -q 'available' && return 0 return 2 @@ -15,6 +18,8 @@ skip_if_no_mem_event() { skip_if_no_mem_event || exit 2 +skip_test_missing_symbol buf1 + TEST_PROGRAM="perf test -w datasym" PERF_DATA=$(mktemp /tmp/__perf_test.perf.data.XXXXX) ERR_FILE=$(mktemp /tmp/__perf_test.stderr.XXXXX) -- cgit v1.2.3 From 19dd49c9337a482325d4dd7000e328dac11f6b5c Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Thu, 9 Nov 2023 15:27:32 -0800 Subject: perf vendor events: Add skx, clx, icx and spr upi bandwidth metric Add upi_data_receive_bw metric for skylakex, cascadelakex, icelakex and sapphirerapids. The metric was added to perfmon metrics in: https://github.com/intel/perfmon/pull/119 Signed-off-by: Ian Rogers Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Caleb Biggers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Kan Liang Cc: Mark Rutland Cc: Namhyung Kim Cc: Perry Taylor Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20231109232732.2973015-1-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/pmu-events/arch/x86/cascadelakex/clx-metrics.json | 6 ++++++ tools/perf/pmu-events/arch/x86/icelakex/icx-metrics.json | 6 ++++++ tools/perf/pmu-events/arch/x86/sapphirerapids/spr-metrics.json | 6 ++++++ tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json | 6 ++++++ 4 files changed, 24 insertions(+) (limited to 'tools') diff --git a/tools/perf/pmu-events/arch/x86/cascadelakex/clx-metrics.json b/tools/perf/pmu-events/arch/x86/cascadelakex/clx-metrics.json index 84c132af3dfa..8bc6c0707856 100644 --- a/tools/perf/pmu-events/arch/x86/cascadelakex/clx-metrics.json +++ b/tools/perf/pmu-events/arch/x86/cascadelakex/clx-metrics.json @@ -1862,6 +1862,12 @@ "MetricName": "uncore_frequency", "ScaleUnit": "1GHz" }, + { + "BriefDescription": "Intel(R) Ultra Path Interconnect (UPI) data receive bandwidth (MB/sec)", + "MetricExpr": "UNC_UPI_RxL_FLITS.ALL_DATA * 7.111111111111111 / 1e6 / duration_time", + "MetricName": "upi_data_receive_bw", + "ScaleUnit": "1MB/s" + }, { "BriefDescription": "Intel(R) Ultra Path Interconnect (UPI) data transmit bandwidth (MB/sec)", "MetricExpr": "UNC_UPI_TxL_FLITS.ALL_DATA * 7.111111111111111 / 1e6 / duration_time", diff --git a/tools/perf/pmu-events/arch/x86/icelakex/icx-metrics.json b/tools/perf/pmu-events/arch/x86/icelakex/icx-metrics.json index e98602c66707..71d78a7841ea 100644 --- a/tools/perf/pmu-events/arch/x86/icelakex/icx-metrics.json +++ b/tools/perf/pmu-events/arch/x86/icelakex/icx-metrics.json @@ -1846,6 +1846,12 @@ "MetricName": "uncore_frequency", "ScaleUnit": "1GHz" }, + { + "BriefDescription": "Intel(R) Ultra Path Interconnect (UPI) data receive bandwidth (MB/sec)", + "MetricExpr": "UNC_UPI_RxL_FLITS.ALL_DATA * 7.111111111111111 / 1e6 / duration_time", + "MetricName": "upi_data_receive_bw", + "ScaleUnit": "1MB/s" + }, { "BriefDescription": "Intel(R) Ultra Path Interconnect (UPI) data transmit bandwidth (MB/sec)", "MetricExpr": "UNC_UPI_TxL_FLITS.ALL_DATA * 7.111111111111111 / 1e6 / duration_time", diff --git a/tools/perf/pmu-events/arch/x86/sapphirerapids/spr-metrics.json b/tools/perf/pmu-events/arch/x86/sapphirerapids/spr-metrics.json index 06c6d67cb76b..e31a4aac9f20 100644 --- a/tools/perf/pmu-events/arch/x86/sapphirerapids/spr-metrics.json +++ b/tools/perf/pmu-events/arch/x86/sapphirerapids/spr-metrics.json @@ -1964,6 +1964,12 @@ "MetricName": "uncore_frequency", "ScaleUnit": "1GHz" }, + { + "BriefDescription": "Intel(R) Ultra Path Interconnect (UPI) data receive bandwidth (MB/sec)", + "MetricExpr": "UNC_UPI_RxL_FLITS.ALL_DATA * 7.111111111111111 / 1e6 / duration_time", + "MetricName": "upi_data_receive_bw", + "ScaleUnit": "1MB/s" + }, { "BriefDescription": "Intel(R) Ultra Path Interconnect (UPI) data transmit bandwidth (MB/sec)", "MetricExpr": "UNC_UPI_TxL_FLITS.ALL_DATA * 7.111111111111111 / 1e6 / duration_time", diff --git a/tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json b/tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json index 4a8f8eeb7525..ec3aa5ef00a3 100644 --- a/tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json +++ b/tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json @@ -1806,6 +1806,12 @@ "MetricName": "uncore_frequency", "ScaleUnit": "1GHz" }, + { + "BriefDescription": "Intel(R) Ultra Path Interconnect (UPI) data receive bandwidth (MB/sec)", + "MetricExpr": "UNC_UPI_RxL_FLITS.ALL_DATA * 7.111111111111111 / 1e6 / duration_time", + "MetricName": "upi_data_receive_bw", + "ScaleUnit": "1MB/s" + }, { "BriefDescription": "Intel(R) Ultra Path Interconnect (UPI) data transmit bandwidth (MB/sec)", "MetricExpr": "UNC_UPI_TxL_FLITS.ALL_DATA * 7.111111111111111 / 1e6 / duration_time", -- cgit v1.2.3 From 7340c6df49df1b261892d287444c255d0a378063 Mon Sep 17 00:00:00 2001 From: Inochi Amaoto Date: Wed, 22 Nov 2023 20:41:06 +0800 Subject: perf vendor events riscv: add T-HEAD C9xx JSON file Add JSON file of T-HEAD C9xx series events. The event idx (raw value) is summary as following: event id range | support cpu 0x01 - 0x2a | c906,c910,c920 The event ids are based on the public document of T-HEAD and cover the c900 series. These events are the max that c900 series support. Since T-HEAD let manufacturers decide whether events are usable, the final support of the perf events is determined by the pmu node of the soc dtb. Signed-off-by: Inochi Amaoto Tested-by: Guo Ren Acked-by: Palmer Dabbelt Cc: Adrian Hunter Cc: Albert Ou Cc: Alexander Shishkin Cc: Chen Wang Cc: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Jisheng Zhang Cc: Mark Rutland Cc: Namhyung Kim Cc: Paul Walmsley Cc: Peter Zijlstra Cc: Wei Fu Cc: linux-riscv@lists.infradead.org Link: https://lore.kernel.org/r/IA1PR20MB495325FCF603BAA841E29281BBBAA@IA1PR20MB4953.namprd20.prod.outlook.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/pmu-events/arch/riscv/mapfile.csv | 1 + .../arch/riscv/thead/c900-legacy/cache.json | 67 ++++++++++++++++++ .../arch/riscv/thead/c900-legacy/firmware.json | 68 ++++++++++++++++++ .../arch/riscv/thead/c900-legacy/instruction.json | 72 +++++++++++++++++++ .../arch/riscv/thead/c900-legacy/microarch.json | 80 ++++++++++++++++++++++ 5 files changed, 288 insertions(+) create mode 100644 tools/perf/pmu-events/arch/riscv/thead/c900-legacy/cache.json create mode 100644 tools/perf/pmu-events/arch/riscv/thead/c900-legacy/firmware.json create mode 100644 tools/perf/pmu-events/arch/riscv/thead/c900-legacy/instruction.json create mode 100644 tools/perf/pmu-events/arch/riscv/thead/c900-legacy/microarch.json (limited to 'tools') diff --git a/tools/perf/pmu-events/arch/riscv/mapfile.csv b/tools/perf/pmu-events/arch/riscv/mapfile.csv index 56b03138d46a..cfc449b19810 100644 --- a/tools/perf/pmu-events/arch/riscv/mapfile.csv +++ b/tools/perf/pmu-events/arch/riscv/mapfile.csv @@ -15,4 +15,5 @@ # #MVENDORID-MARCHID-MIMPID,Version,Filename,EventType 0x489-0x8000000000000007-0x[[:xdigit:]]+,v1,sifive/u74,core +0x5b7-0x0-0x0,v1,thead/c900-legacy,core 0x67e-0x80000000db0000[89]0-0x[[:xdigit:]]+,v1,starfive/dubhe-80,core diff --git a/tools/perf/pmu-events/arch/riscv/thead/c900-legacy/cache.json b/tools/perf/pmu-events/arch/riscv/thead/c900-legacy/cache.json new file mode 100644 index 000000000000..2b142348d635 --- /dev/null +++ b/tools/perf/pmu-events/arch/riscv/thead/c900-legacy/cache.json @@ -0,0 +1,67 @@ +[ + { + "EventName": "L1_ICACHE_ACCESS", + "EventCode": "0x00000001", + "BriefDescription": "L1 instruction cache access" + }, + { + "EventName": "L1_ICACHE_MISS", + "EventCode": "0x00000002", + "BriefDescription": "L1 instruction cache miss" + }, + { + "EventName": "ITLB_MISS", + "EventCode": "0x00000003", + "BriefDescription": "I-UTLB miss" + }, + { + "EventName": "DTLB_MISS", + "EventCode": "0x00000004", + "BriefDescription": "D-UTLB miss" + }, + { + "EventName": "JTLB_MISS", + "EventCode": "0x00000005", + "BriefDescription": "JTLB miss" + }, + { + "EventName": "L1_DCACHE_READ_ACCESS", + "EventCode": "0x0000000c", + "BriefDescription": "L1 data cache read access" + }, + { + "EventName": "L1_DCACHE_READ_MISS", + "EventCode": "0x0000000d", + "BriefDescription": "L1 data cache read miss" + }, + { + "EventName": "L1_DCACHE_WRITE_ACCESS", + "EventCode": "0x0000000e", + "BriefDescription": "L1 data cache write access" + }, + { + "EventName": "L1_DCACHE_WRITE_MISS", + "EventCode": "0x0000000f", + "BriefDescription": "L1 data cache write miss" + }, + { + "EventName": "LL_CACHE_READ_ACCESS", + "EventCode": "0x00000010", + "BriefDescription": "LL Cache read access" + }, + { + "EventName": "LL_CACHE_READ_MISS", + "EventCode": "0x00000011", + "BriefDescription": "LL Cache read miss" + }, + { + "EventName": "LL_CACHE_WRITE_ACCESS", + "EventCode": "0x00000012", + "BriefDescription": "LL Cache write access" + }, + { + "EventName": "LL_CACHE_WRITE_MISS", + "EventCode": "0x00000013", + "BriefDescription": "LL Cache write miss" + } +] diff --git a/tools/perf/pmu-events/arch/riscv/thead/c900-legacy/firmware.json b/tools/perf/pmu-events/arch/riscv/thead/c900-legacy/firmware.json new file mode 100644 index 000000000000..9b4a032186a7 --- /dev/null +++ b/tools/perf/pmu-events/arch/riscv/thead/c900-legacy/firmware.json @@ -0,0 +1,68 @@ +[ + { + "ArchStdEvent": "FW_MISALIGNED_LOAD" + }, + { + "ArchStdEvent": "FW_MISALIGNED_STORE" + }, + { + "ArchStdEvent": "FW_ACCESS_LOAD" + }, + { + "ArchStdEvent": "FW_ACCESS_STORE" + }, + { + "ArchStdEvent": "FW_ILLEGAL_INSN" + }, + { + "ArchStdEvent": "FW_SET_TIMER" + }, + { + "ArchStdEvent": "FW_IPI_SENT" + }, + { + "ArchStdEvent": "FW_IPI_RECEIVED" + }, + { + "ArchStdEvent": "FW_FENCE_I_SENT" + }, + { + "ArchStdEvent": "FW_FENCE_I_RECEIVED" + }, + { + "ArchStdEvent": "FW_SFENCE_VMA_SENT" + }, + { + "ArchStdEvent": "FW_SFENCE_VMA_RECEIVED" + }, + { + "ArchStdEvent": "FW_SFENCE_VMA_RECEIVED" + }, + { + "ArchStdEvent": "FW_SFENCE_VMA_ASID_RECEIVED" + }, + { + "ArchStdEvent": "FW_HFENCE_GVMA_SENT" + }, + { + "ArchStdEvent": "FW_HFENCE_GVMA_RECEIVED" + }, + { + "ArchStdEvent": "FW_HFENCE_GVMA_VMID_SENT" + }, + { + "ArchStdEvent": "FW_HFENCE_GVMA_VMID_RECEIVED" + }, + { + "ArchStdEvent": "FW_HFENCE_VVMA_SENT" + }, + { + "ArchStdEvent": "FW_HFENCE_VVMA_RECEIVED" + }, + { + "ArchStdEvent": "FW_HFENCE_VVMA_ASID_SENT" + }, + { + "ArchStdEvent": "FW_HFENCE_VVMA_ASID_RECEIVED" + } +] diff --git a/tools/perf/pmu-events/arch/riscv/thead/c900-legacy/instruction.json b/tools/perf/pmu-events/arch/riscv/thead/c900-legacy/instruction.json new file mode 100644 index 000000000000..c822b5373333 --- /dev/null +++ b/tools/perf/pmu-events/arch/riscv/thead/c900-legacy/instruction.json @@ -0,0 +1,72 @@ +[ + { + "EventName": "INST_BRANCH_MISPREDICT", + "EventCode": "0x00000006", + "BriefDescription": "Mispredicted branch instructions" + }, + { + "EventName": "INST_BRANCH", + "EventCode": "0x00000007", + "BriefDescription": "Retired branch instructions" + }, + { + "EventName": "INST_JMP_MISPREDICT", + "EventCode": "0x00000008", + "BriefDescription": "Indirect branch mispredict" + }, + { + "EventName": "INST_JMP", + "EventCode": "0x00000009", + "BriefDescription": "Retired jmp instructions" + }, + { + "EventName": "INST_STORE", + "EventCode": "0x0000000b", + "BriefDescription": "Retired store instructions" + }, + { + "EventName": "INST_ALU", + "EventCode": "0x0000001d", + "BriefDescription": "Retired ALU instructions" + }, + { + "EventName": "INST_LDST", + "EventCode": "0x0000001e", + "BriefDescription": "Retired Load/Store instructions" + }, + { + "EventName": "INST_VECTOR", + "EventCode": "0x0000001f", + "BriefDescription": "Retired Vector instructions" + }, + { + "EventName": "INST_CSR", + "EventCode": "0x00000020", + "BriefDescription": "Retired CSR instructions" + }, + { + "EventName": "INST_SYNC", + "EventCode": "0x00000021", + "BriefDescription": "Retired sync instructions (AMO/LR/SC instructions)" + }, + { + "EventName": "INST_UNALIGNED_ACCESS", + "EventCode": "0x00000022", + "BriefDescription": "Retired Store/Load instructions with unaligned memory access" + }, + { + "EventName": "INST_ECALL", + "EventCode": "0x00000025", + "BriefDescription": "Retired ecall instructions" + }, + { + "EventName": "INST_LONG_JP", + "EventCode": "0x00000026", + "BriefDescription": "Retired long jump instructions" + }, + { + "EventName": "INST_FP", + "EventCode": "0x0000002a", + "BriefDescription": "Retired FPU instructions" + } +] diff --git a/tools/perf/pmu-events/arch/riscv/thead/c900-legacy/microarch.json b/tools/perf/pmu-events/arch/riscv/thead/c900-legacy/microarch.json new file mode 100644 index 000000000000..0ab6f288af91 --- /dev/null +++ b/tools/perf/pmu-events/arch/riscv/thead/c900-legacy/microarch.json @@ -0,0 +1,80 @@ +[ + { + "EventName": "LSU_SPEC_FAIL", + "EventCode": "0x0000000a", + "BriefDescription": "LSU speculation fail" + }, + { + "EventName": "IDU_RF_PIPE_FAIL", + "EventCode": "0x00000014", + "BriefDescription": "Instruction decode unit launch pipeline failed in RF state" + }, + { + "EventName": "IDU_RF_REG_FAIL", + "EventCode": "0x00000015", + "BriefDescription": "Instruction decode unit launch register file fail in RF state" + }, + { + "EventName": "IDU_RF_INSTRUCTION", + "EventCode": "0x00000016", + "BriefDescription": "retired instruction count of Instruction decode unit in RF (Register File) stage" + }, + { + "EventName": "LSU_4K_STALL", + "EventCode": "0x00000017", + "BriefDescription": "LSU stall times for long distance data access (Over 4K)", + "PublicDescription": "This stall occurs when translate virtual address with page offset over 4k" + }, + { + "EventName": "LSU_OTHER_STALL", + "EventCode": "0x00000018", + "BriefDescription": "LSU stall times for other reasons (except the 4k stall)" + }, + { + "EventName": "LSU_SQ_OTHER_DIS", + "EventCode": "0x00000019", + "BriefDescription": "LSU store queue discard others" + }, + { + "EventName": "LSU_SQ_DATA_DISCARD", + "EventCode": "0x0000001a", + "BriefDescription": "LSU store queue discard data (uops)" + }, + { + "EventName": "BRANCH_DIRECTION_MISPREDICTION", + "EventCode": "0x0000001b", + "BriefDescription": "Branch misprediction in BTB" + }, + { + "EventName": "BRANCH_DIRECTION_PREDICTION", + "EventCode": "0x0000001c", + "BriefDescription": "All branch prediction in BTB", + "PublicDescription": "This event including both successful prediction and failed prediction in BTB" + }, + { + "EventName": "INTERRUPT_ACK_COUNT", + "EventCode": "0x00000023", + "BriefDescription": "acknowledged interrupt count" + }, + { + "EventName": "INTERRUPT_OFF_CYCLE", + "EventCode": "0x00000024", + "BriefDescription": "PLIC arbitration time when the interrupt is not responded", + "PublicDescription": "The arbitration time is recorded while meeting any of the following:\n- CPU is M-mode and MIE == 0\n- CPU is S-mode and delegation and SIE == 0\n" + }, + { + "EventName": "IFU_STALLED_CYCLE", + "EventCode": "0x00000027", + "BriefDescription": "Number of stall cycles of the instruction fetch unit (IFU)." + }, + { + "EventName": "IDU_STALLED_CYCLE", + "EventCode": "0x00000028", + "BriefDescription": "hpcp_backend_stall Number of stall cycles of the instruction decoding unit (IDU) and next-level pipeline unit." + }, + { + "EventName": "SYNC_STALL", + "EventCode": "0x00000029", + "BriefDescription": "Sync instruction stall cycle fence/fence.i/sync/sfence" + } +] -- cgit v1.2.3 From ffa96259ca5f02bbcecd2c45831e7632cd6d3485 Mon Sep 17 00:00:00 2001 From: James Clark Date: Mon, 13 Nov 2023 10:23:24 +0000 Subject: perf test: Use existing config value for objdump path There is already an existing config value for changing the objdump path, so instead of having two values that do the same thing, make 'perf test' use annotate.objdump as well. Signed-off-by: James Clark Acked-by: Namhyung Kim Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Fangrui Song Cc: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Kan Liang Cc: Mark Rutland Cc: Peter Zijlstra Cc: Yang Jihong Cc: Yonghong Song Link: https://lore.kernel.org/r/ZU5Cx4LTrB5q0sIG@kernel.org Link: https://lore.kernel.org/r/20231113102327.695386-1-james.clark@arm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-config.txt | 8 ++------ tools/perf/tests/builtin-test.c | 2 +- 2 files changed, 3 insertions(+), 7 deletions(-) (limited to 'tools') diff --git a/tools/perf/Documentation/perf-config.txt b/tools/perf/Documentation/perf-config.txt index 16398babd1ef..379f9d7a8ab1 100644 --- a/tools/perf/Documentation/perf-config.txt +++ b/tools/perf/Documentation/perf-config.txt @@ -251,7 +251,8 @@ annotate.*:: addr2line binary to use for file names and line numbers. annotate.objdump:: - objdump binary to use for disassembly and annotations. + objdump binary to use for disassembly and annotations, + including in the 'perf test' command. annotate.disassembler_style:: Use this to change the default disassembler style to some other value @@ -722,11 +723,6 @@ session-.*:: Defines new record session for daemon. The value is record's command line without the 'record' keyword. -test.*:: - - test.objdump:: - objdump binary to use for disassembly and annotations. - SEE ALSO -------- linkperf:perf[1] diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c index 113e92119e1d..b8c21e81a021 100644 --- a/tools/perf/tests/builtin-test.c +++ b/tools/perf/tests/builtin-test.c @@ -518,7 +518,7 @@ static int run_workload(const char *work, int argc, const char **argv) static int perf_test__config(const char *var, const char *value, void *data __maybe_unused) { - if (!strcmp(var, "test.objdump")) + if (!strcmp(var, "annotate.objdump")) test_objdump_path = value; return 0; -- cgit v1.2.3 From 08973307d28311505b85216d724826e2ca21759e Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 11 Oct 2023 20:50:25 -0700 Subject: perf annotate: Check if operand has multiple regs It needs to check all possible information in an instruction. Let's add a field indicating if the operand has multiple registers. I'll be used to search type information like in an array access on x86 like: mov 0x10(%rax,%rbx,8), %rcx ------------- here Signed-off-by: Namhyung Kim Cc: Adrian Hunter Cc: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Linus Torvalds Cc: Masami Hiramatsu Cc: Peter Zijlstra Cc: Stephane Eranian Cc: linux-toolchains@vger.kernel.org Cc: linux-trace-devel@vger.kernel.org Link: https://lore.kernel.org/r/20231012035111.676789-3-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/annotate.c | 36 ++++++++++++++++++++++++++++++++++++ tools/perf/util/annotate.h | 2 ++ 2 files changed, 38 insertions(+) (limited to 'tools') diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 3364edf30f50..9a828dc601c7 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -85,6 +85,8 @@ struct arch { struct { char comment_char; char skip_functions_char; + char register_char; + char memory_ref_char; } objdump; }; @@ -188,6 +190,8 @@ static struct arch architectures[] = { .insn_suffix = "bwlq", .objdump = { .comment_char = '#', + .register_char = '%', + .memory_ref_char = '(', }, }, { @@ -566,6 +570,34 @@ static struct ins_ops lock_ops = { .scnprintf = lock__scnprintf, }; +/* + * Check if the operand has more than one registers like x86 SIB addressing: + * 0x1234(%rax, %rbx, 8) + * + * But it doesn't care segment selectors like %gs:0x5678(%rcx), so just check + * the input string after 'memory_ref_char' if exists. + */ +static bool check_multi_regs(struct arch *arch, const char *op) +{ + int count = 0; + + if (arch->objdump.register_char == 0) + return false; + + if (arch->objdump.memory_ref_char) { + op = strchr(op, arch->objdump.memory_ref_char); + if (op == NULL) + return false; + } + + while ((op = strchr(op, arch->objdump.register_char)) != NULL) { + count++; + op++; + } + + return count > 1; +} + static int mov__parse(struct arch *arch, struct ins_operands *ops, struct map_symbol *ms __maybe_unused) { char *s = strchr(ops->raw, ','), *target, *comment, prev; @@ -593,6 +625,8 @@ static int mov__parse(struct arch *arch, struct ins_operands *ops, struct map_sy if (ops->source.raw == NULL) return -1; + ops->source.multi_regs = check_multi_regs(arch, ops->source.raw); + target = skip_spaces(++s); comment = strchr(s, arch->objdump.comment_char); @@ -613,6 +647,8 @@ static int mov__parse(struct arch *arch, struct ins_operands *ops, struct map_sy if (ops->target.raw == NULL) goto out_free_source; + ops->target.multi_regs = check_multi_regs(arch, ops->target.raw); + if (comment == NULL) return 0; diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h index bc8b95e8b1be..b64a2be287b3 100644 --- a/tools/perf/util/annotate.h +++ b/tools/perf/util/annotate.h @@ -39,12 +39,14 @@ struct ins_operands { s64 offset; bool offset_avail; bool outside; + bool multi_regs; } target; union { struct { char *raw; char *name; u64 addr; + bool multi_regs; } source; struct { struct ins ins; -- cgit v1.2.3 From 876843ce1e4897e8ceade50bfa3d9a4ec483abf3 Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Mon, 27 Nov 2023 10:20:56 -0800 Subject: bpftool: mark orphaned programs during prog show Commit ef01f4e25c17 ("bpf: restore the ebpf program ID for BPF_AUDIT_UNLOAD and PERF_BPF_EVENT_PROG_UNLOAD") stopped removing program's id from idr when the offloaded/bound netdev goes away. I was supposed to take a look and check in [0], but apparently I did not. Martin points out it might be useful to keep it that way for observability sake, but we at least need to mark those programs as unusable. Mark those programs as 'orphaned' and keep printing the list when we encounter ENODEV. 0: unspec tag 0000000000000000 xlated 0B not jited memlock 4096B orphaned [0]: https://lore.kernel.org/all/CAKH8qBtyR20ZWAc11z1-6pGb3Hd47AQUTbE_cfoktG59TqaJ7Q@mail.gmail.com/ v3: * use two spaces for " orphaned" (Quentin) Cc: netdev@vger.kernel.org Fixes: ef01f4e25c17 ("bpf: restore the ebpf program ID for BPF_AUDIT_UNLOAD and PERF_BPF_EVENT_PROG_UNLOAD") Signed-off-by: Stanislav Fomichev Reviewed-by: Quentin Monnet Link: https://lore.kernel.org/r/20231127182057.1081138-1-sdf@google.com Signed-off-by: Martin KaFai Lau --- tools/bpf/bpftool/prog.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) (limited to 'tools') diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c index 7ec4f5671e7a..feb8e305804f 100644 --- a/tools/bpf/bpftool/prog.c +++ b/tools/bpf/bpftool/prog.c @@ -442,7 +442,7 @@ static void print_prog_header_json(struct bpf_prog_info *info, int fd) jsonw_uint_field(json_wtr, "recursion_misses", info->recursion_misses); } -static void print_prog_json(struct bpf_prog_info *info, int fd) +static void print_prog_json(struct bpf_prog_info *info, int fd, bool orphaned) { char *memlock; @@ -461,6 +461,7 @@ static void print_prog_json(struct bpf_prog_info *info, int fd) jsonw_uint_field(json_wtr, "uid", info->created_by_uid); } + jsonw_bool_field(json_wtr, "orphaned", orphaned); jsonw_uint_field(json_wtr, "bytes_xlated", info->xlated_prog_len); if (info->jited_prog_len) { @@ -527,7 +528,7 @@ static void print_prog_header_plain(struct bpf_prog_info *info, int fd) printf("\n"); } -static void print_prog_plain(struct bpf_prog_info *info, int fd) +static void print_prog_plain(struct bpf_prog_info *info, int fd, bool orphaned) { char *memlock; @@ -554,6 +555,9 @@ static void print_prog_plain(struct bpf_prog_info *info, int fd) printf(" memlock %sB", memlock); free(memlock); + if (orphaned) + printf(" orphaned"); + if (info->nr_map_ids) show_prog_maps(fd, info->nr_map_ids); @@ -581,15 +585,15 @@ static int show_prog(int fd) int err; err = bpf_prog_get_info_by_fd(fd, &info, &len); - if (err) { + if (err && err != -ENODEV) { p_err("can't get prog info: %s", strerror(errno)); return -1; } if (json_output) - print_prog_json(&info, fd); + print_prog_json(&info, fd, err == -ENODEV); else - print_prog_plain(&info, fd); + print_prog_plain(&info, fd, err == -ENODEV); return 0; } -- cgit v1.2.3 From cf9791631027a476f7cdb0e1b3ac6add16eff264 Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Mon, 27 Nov 2023 10:20:57 -0800 Subject: selftests/bpf: update test_offload to use new orphaned property - filter orphaned programs by default - when trying to query orphaned program, don't expect bpftool failure Cc: netdev@vger.kernel.org Signed-off-by: Stanislav Fomichev Link: https://lore.kernel.org/r/20231127182057.1081138-2-sdf@google.com Signed-off-by: Martin KaFai Lau --- tools/testing/selftests/bpf/test_offload.py | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/test_offload.py b/tools/testing/selftests/bpf/test_offload.py index 40cba8d368d9..6157f884d091 100755 --- a/tools/testing/selftests/bpf/test_offload.py +++ b/tools/testing/selftests/bpf/test_offload.py @@ -169,12 +169,14 @@ def bpftool(args, JSON=True, ns="", fail=True, include_stderr=False): return tool("bpftool", args, {"json":"-p"}, JSON=JSON, ns=ns, fail=fail, include_stderr=include_stderr) -def bpftool_prog_list(expected=None, ns=""): +def bpftool_prog_list(expected=None, ns="", exclude_orphaned=True): _, progs = bpftool("prog show", JSON=True, ns=ns, fail=True) # Remove the base progs for p in base_progs: if p in progs: progs.remove(p) + if exclude_orphaned: + progs = [ p for p in progs if not p['orphaned'] ] if expected is not None: if len(progs) != expected: fail(True, "%d BPF programs loaded, expected %d" % @@ -612,11 +614,9 @@ def pin_map(file_name, idx=0, expected=1): def check_dev_info_removed(prog_file=None, map_file=None): bpftool_prog_list(expected=0) + bpftool_prog_list(expected=1, exclude_orphaned=False) ret, err = bpftool("prog show pin %s" % (prog_file), fail=False) - fail(ret == 0, "Showing prog with removed device did not fail") - fail(err["error"].find("No such device") == -1, - "Showing prog with removed device expected ENODEV, error is %s" % - (err["error"])) + fail(ret != 0, "failed to show prog with removed device") bpftool_map_list(expected=0) ret, err = bpftool("map show pin %s" % (map_file), fail=False) @@ -1395,10 +1395,7 @@ try: start_test("Test multi-dev ASIC cross-dev destruction - orphaned...") ret, out = bpftool("prog show %s" % (progB), fail=False) - fail(ret == 0, "got information about orphaned program") - fail("error" not in out, "no error reported for get info on orphaned") - fail(out["error"] != "can't get prog info: No such device", - "wrong error for get info on orphaned") + fail(ret != 0, "couldn't get information about orphaned program") print("%s: OK" % (os.path.basename(__file__))) -- cgit v1.2.3 From 088559815477c6f623a5db5993491ddd7facbec7 Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Fri, 24 Nov 2023 12:15:19 -0500 Subject: selftests/net: ipsec: fix constant out of range Fix a small compiler warning. nr_process must be a signed long: it is assigned a signed long by strtol() and is compared against LONG_MIN and LONG_MAX. ipsec.c:2280:65: error: result of comparison of constant -9223372036854775808 with expression of type 'unsigned int' is always false [-Werror,-Wtautological-constant-out-of-range-compare] if ((errno == ERANGE && (nr_process == LONG_MAX || nr_process == LONG_MIN)) Fixes: bc2652b7ae1e ("selftest/net/xfrm: Add test for ipsec tunnel") Signed-off-by: Willem de Bruijn Reviewed-by: Dmitry Safonov <0x7f454c46@gmail.com> Link: https://lore.kernel.org/r/20231124171645.1011043-2-willemdebruijn.kernel@gmail.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/ipsec.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/net/ipsec.c b/tools/testing/selftests/net/ipsec.c index 9a8229abfa02..be4a30a0d02a 100644 --- a/tools/testing/selftests/net/ipsec.c +++ b/tools/testing/selftests/net/ipsec.c @@ -2263,7 +2263,7 @@ static int check_results(void) int main(int argc, char **argv) { - unsigned int nr_process = 1; + long nr_process = 1; int route_sock = -1, ret = KSFT_SKIP; int test_desc_fd[2]; uint32_t route_seq; @@ -2284,7 +2284,7 @@ int main(int argc, char **argv) exit_usage(argv); } - if (nr_process > MAX_PROCESSES || !nr_process) { + if (nr_process > MAX_PROCESSES || nr_process < 1) { printk("nr_process should be between [1; %u]", MAX_PROCESSES); exit_usage(argv); -- cgit v1.2.3 From 7b29828c5af6841bdeb9fafa32fdfeff7ab9c407 Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Fri, 24 Nov 2023 12:15:20 -0500 Subject: selftests/net: fix a char signedness issue Signedness of char is signed on x86_64, but unsigned on arm64. Fix the warning building cmsg_sender.c on signed platforms or forced with -fsigned-char: msg_sender.c:455:12: error: implicit conversion from 'int' to 'char' changes value from 128 to -128 [-Werror,-Wconstant-conversion] buf[0] = ICMPV6_ECHO_REQUEST; constant ICMPV6_ECHO_REQUEST is 128. Link: https://lwn.net/Articles/911914 Fixes: de17e305a810 ("selftests: net: cmsg_sender: support icmp and raw sockets") Signed-off-by: Willem de Bruijn Link: https://lore.kernel.org/r/20231124171645.1011043-3-willemdebruijn.kernel@gmail.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/cmsg_sender.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/net/cmsg_sender.c b/tools/testing/selftests/net/cmsg_sender.c index 24b21b15ed3f..6ff3e732f449 100644 --- a/tools/testing/selftests/net/cmsg_sender.c +++ b/tools/testing/selftests/net/cmsg_sender.c @@ -416,9 +416,9 @@ int main(int argc, char *argv[]) { struct addrinfo hints, *ai; struct iovec iov[1]; + unsigned char *buf; struct msghdr msg; char cbuf[1024]; - char *buf; int err; int fd; -- cgit v1.2.3 From 59fef379d453781f0dabfa1f1a1e86e78aee919a Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Fri, 24 Nov 2023 12:15:21 -0500 Subject: selftests/net: unix: fix unused variable compiler warning Remove an unused variable. diag_uid.c:151:24: error: unused variable 'udr' [-Werror,-Wunused-variable] Fixes: ac011361bd4f ("af_unix: Add test for sock_diag and UDIAG_SHOW_UID.") Signed-off-by: Willem de Bruijn Link: https://lore.kernel.org/r/20231124171645.1011043-4-willemdebruijn.kernel@gmail.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/af_unix/diag_uid.c | 1 - 1 file changed, 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/net/af_unix/diag_uid.c b/tools/testing/selftests/net/af_unix/diag_uid.c index 5b88f7129fea..79a3dd75590e 100644 --- a/tools/testing/selftests/net/af_unix/diag_uid.c +++ b/tools/testing/selftests/net/af_unix/diag_uid.c @@ -148,7 +148,6 @@ void receive_response(struct __test_metadata *_metadata, .msg_iov = &iov, .msg_iovlen = 1 }; - struct unix_diag_req *udr; struct nlmsghdr *nlh; int ret; -- cgit v1.2.3 From 00a4f8fd9c750f20d8fd4535c71c9caa7ef5ff2f Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Fri, 24 Nov 2023 12:15:22 -0500 Subject: selftests/net: mptcp: fix uninitialized variable warnings Same init_rng() in both tests. The function reads /dev/urandom to initialize srand(). In case of failure, it falls back onto the entropy in the uninitialized variable. Not sure if this is on purpose. But failure reading urandom should be rare, so just fail hard. While at it, convert to getrandom(). Which man 4 random suggests is simpler and more robust. mptcp_inq.c:525:6: mptcp_connect.c:1131:6: error: variable 'foo' is used uninitialized whenever 'if' condition is false [-Werror,-Wsometimes-uninitialized] Fixes: 048d19d444be ("mptcp: add basic kselftest for mptcp") Fixes: b51880568f20 ("selftests: mptcp: add inq test case") Cc: Florian Westphal Signed-off-by: Willem de Bruijn ---- When input is randomized because this is expected to meaningfully explore edge cases, should we also add 1. logging the random seed to stdout and 2. adding a command line argument to replay from a specific seed I can do this in net-next, if authors find it useful in this case. Reviewed-by: Matthieu Baerts Link: https://lore.kernel.org/r/20231124171645.1011043-5-willemdebruijn.kernel@gmail.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/mptcp/mptcp_connect.c | 11 ++++------- tools/testing/selftests/net/mptcp/mptcp_inq.c | 11 ++++------- 2 files changed, 8 insertions(+), 14 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.c b/tools/testing/selftests/net/mptcp/mptcp_connect.c index c7f9ebeebc2c..d2043ec3bf6d 100644 --- a/tools/testing/selftests/net/mptcp/mptcp_connect.c +++ b/tools/testing/selftests/net/mptcp/mptcp_connect.c @@ -18,6 +18,7 @@ #include #include +#include #include #include #include @@ -1125,15 +1126,11 @@ again: static void init_rng(void) { - int fd = open("/dev/urandom", O_RDONLY); unsigned int foo; - if (fd > 0) { - int ret = read(fd, &foo, sizeof(foo)); - - if (ret < 0) - srand(fd + foo); - close(fd); + if (getrandom(&foo, sizeof(foo), 0) == -1) { + perror("getrandom"); + exit(1); } srand(foo); diff --git a/tools/testing/selftests/net/mptcp/mptcp_inq.c b/tools/testing/selftests/net/mptcp/mptcp_inq.c index 8672d898f8cd..218aac467321 100644 --- a/tools/testing/selftests/net/mptcp/mptcp_inq.c +++ b/tools/testing/selftests/net/mptcp/mptcp_inq.c @@ -18,6 +18,7 @@ #include #include +#include #include #include #include @@ -519,15 +520,11 @@ static int client(int unixfd) static void init_rng(void) { - int fd = open("/dev/urandom", O_RDONLY); unsigned int foo; - if (fd > 0) { - int ret = read(fd, &foo, sizeof(foo)); - - if (ret < 0) - srand(fd + foo); - close(fd); + if (getrandom(&foo, sizeof(foo), 0) == -1) { + perror("getrandom"); + exit(1); } srand(foo); -- cgit v1.2.3 From a79d8ba734bdbd2574ad16dd1b96506e5f642c4a Mon Sep 17 00:00:00 2001 From: Pedro Tammela Date: Fri, 24 Nov 2023 12:42:44 -0300 Subject: selftests: tc-testing: remove buildebpf plugin As tdc only tests loading/deleting and anything more complicated is better left to the ebpf test suite, provide a pre-compiled version of 'action.c' and don't bother compiling it in kselftests or on the fly at all. Cc: Davide Caratti Signed-off-by: Pedro Tammela Acked-by: Jamal Hadi Salim Link: https://lore.kernel.org/r/20231124154248.315470-2-pctammela@mojatatu.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/tc-testing/Makefile | 29 +-------- tools/testing/selftests/tc-testing/README | 2 - tools/testing/selftests/tc-testing/action-ebpf | Bin 0 -> 856 bytes .../tc-testing/plugin-lib/buildebpfPlugin.py | 67 --------------------- .../selftests/tc-testing/tc-tests/actions/bpf.json | 14 ++--- .../selftests/tc-testing/tc-tests/filters/bpf.json | 10 ++- tools/testing/selftests/tc-testing/tdc.sh | 2 +- 7 files changed, 11 insertions(+), 113 deletions(-) create mode 100644 tools/testing/selftests/tc-testing/action-ebpf delete mode 100644 tools/testing/selftests/tc-testing/plugin-lib/buildebpfPlugin.py (limited to 'tools') diff --git a/tools/testing/selftests/tc-testing/Makefile b/tools/testing/selftests/tc-testing/Makefile index b1fa2e177e2f..e8b3dde4fa16 100644 --- a/tools/testing/selftests/tc-testing/Makefile +++ b/tools/testing/selftests/tc-testing/Makefile @@ -1,31 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 -include ../../../scripts/Makefile.include -top_srcdir = $(abspath ../../../..) -APIDIR := $(top_scrdir)/include/uapi -TEST_GEN_FILES = action.o +TEST_PROGS += ./tdc.sh +TEST_FILES := action-ebpf tdc*.py Tdc*.py plugins plugin-lib tc-tests scripts include ../lib.mk - -PROBE := $(shell $(LLC) -march=bpf -mcpu=probe -filetype=null /dev/null 2>&1) - -ifeq ($(PROBE),) - CPU ?= probe -else - CPU ?= generic -endif - -CLANG_SYS_INCLUDES := $(shell $(CLANG) -v -E - &1 \ - | sed -n '/<...> search starts here:/,/End of search list./{ s| \(/.*\)|-idirafter \1|p }') - -CLANG_FLAGS = -I. -I$(APIDIR) \ - $(CLANG_SYS_INCLUDES) \ - -Wno-compare-distinct-pointer-types - -$(OUTPUT)/%.o: %.c - $(CLANG) $(CLANG_FLAGS) \ - -O2 --target=bpf -emit-llvm -c $< -o - | \ - $(LLC) -march=bpf -mcpu=$(CPU) $(LLC_FLAGS) -filetype=obj -o $@ - -TEST_PROGS += ./tdc.sh -TEST_FILES := tdc*.py Tdc*.py plugins plugin-lib tc-tests scripts diff --git a/tools/testing/selftests/tc-testing/README b/tools/testing/selftests/tc-testing/README index be7b00799b3e..fc8e858ff119 100644 --- a/tools/testing/selftests/tc-testing/README +++ b/tools/testing/selftests/tc-testing/README @@ -195,8 +195,6 @@ directory: and the other is a test whether the command leaked memory or not. (This one is a preliminary version, it may not work quite right yet, but the overall template is there and it should only need tweaks.) - - buildebpfPlugin.py: - builds all programs in $EBPFDIR. ACKNOWLEDGEMENTS diff --git a/tools/testing/selftests/tc-testing/action-ebpf b/tools/testing/selftests/tc-testing/action-ebpf new file mode 100644 index 000000000000..4879479b2ee5 Binary files /dev/null and b/tools/testing/selftests/tc-testing/action-ebpf differ diff --git a/tools/testing/selftests/tc-testing/plugin-lib/buildebpfPlugin.py b/tools/testing/selftests/tc-testing/plugin-lib/buildebpfPlugin.py deleted file mode 100644 index d34fe06268d2..000000000000 --- a/tools/testing/selftests/tc-testing/plugin-lib/buildebpfPlugin.py +++ /dev/null @@ -1,67 +0,0 @@ -''' -build ebpf program -''' - -import os -import signal -from string import Template -import subprocess -import time -from TdcPlugin import TdcPlugin -from tdc_config import * - -class SubPlugin(TdcPlugin): - def __init__(self): - self.sub_class = 'buildebpf/SubPlugin' - self.tap = '' - super().__init__() - - def pre_suite(self, testcount, testidlist): - super().pre_suite(testcount, testidlist) - - if self.args.buildebpf: - self._ebpf_makeall() - - def post_suite(self, index): - super().post_suite(index) - - self._ebpf_makeclean() - - def add_args(self, parser): - super().add_args(parser) - - self.argparser_group = self.argparser.add_argument_group( - 'buildebpf', - 'options for buildebpfPlugin') - self.argparser_group.add_argument( - '--nobuildebpf', action='store_false', default=True, - dest='buildebpf', - help='Don\'t build eBPF programs') - - return self.argparser - - def _ebpf_makeall(self): - if self.args.buildebpf: - self._make('all') - - def _ebpf_makeclean(self): - if self.args.buildebpf: - self._make('clean') - - def _make(self, target): - command = 'make -C {} {}'.format(self.args.NAMES['EBPFDIR'], target) - proc = subprocess.Popen(command, - shell=True, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - env=os.environ.copy()) - (rawout, serr) = proc.communicate() - - if proc.returncode != 0 and len(serr) > 0: - foutput = serr.decode("utf-8") - else: - foutput = rawout.decode("utf-8") - - proc.stdout.close() - proc.stderr.close() - return proc, foutput diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/bpf.json b/tools/testing/selftests/tc-testing/tc-tests/actions/bpf.json index 91832400ddbd..6e00bf32ef9a 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/actions/bpf.json +++ b/tools/testing/selftests/tc-testing/tc-tests/actions/bpf.json @@ -54,9 +54,6 @@ "actions", "bpf" ], - "plugins": { - "requires": "buildebpfPlugin" - }, "setup": [ [ "$TC action flush action bpf", @@ -65,10 +62,10 @@ 255 ] ], - "cmdUnderTest": "$TC action add action bpf object-file $EBPFDIR/action.o section action-ok index 667", + "cmdUnderTest": "$TC action add action bpf object-file $EBPFDIR/action-ebpf section action-ok index 667", "expExitCode": "0", "verifyCmd": "$TC action get action bpf index 667", - "matchPattern": "action order [0-9]*: bpf action.o:\\[action-ok\\] id [0-9].* tag [0-9a-f]{16}( jited)? default-action pipe.*index 667 ref", + "matchPattern": "action order [0-9]*: bpf action-ebpf:\\[action-ok\\] id [0-9].* tag [0-9a-f]{16}( jited)? default-action pipe.*index 667 ref", "matchCount": "1", "teardown": [ "$TC action flush action bpf" @@ -81,9 +78,6 @@ "actions", "bpf" ], - "plugins": { - "requires": "buildebpfPlugin" - }, "setup": [ [ "$TC action flush action bpf", @@ -92,10 +86,10 @@ 255 ] ], - "cmdUnderTest": "$TC action add action bpf object-file $EBPFDIR/action.o section action-ko index 667", + "cmdUnderTest": "$TC action add action bpf object-file $EBPFDIR/action-ebpf section action-ko index 667", "expExitCode": "255", "verifyCmd": "$TC action get action bpf index 667", - "matchPattern": "action order [0-9]*: bpf action.o:\\[action-ko\\] id [0-9].*index 667 ref", + "matchPattern": "action order [0-9]*: bpf action-ebpf:\\[action-ko\\] id [0-9].*index 667 ref", "matchCount": "0", "teardown": [ [ diff --git a/tools/testing/selftests/tc-testing/tc-tests/filters/bpf.json b/tools/testing/selftests/tc-testing/tc-tests/filters/bpf.json index 013fb983bc3f..725d406a30ac 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/filters/bpf.json +++ b/tools/testing/selftests/tc-testing/tc-tests/filters/bpf.json @@ -52,17 +52,16 @@ ], "plugins": { "requires": [ - "buildebpfPlugin", "nsPlugin" ] }, "setup": [ "$TC qdisc add dev $DEV1 ingress" ], - "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 100 bpf object-file $EBPFDIR/action.o section action-ok", + "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 100 bpf object-file $EBPFDIR/action-ebpf section action-ok", "expExitCode": "0", "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 protocol ip prio 100 bpf", - "matchPattern": "filter parent ffff: protocol ip pref 100 bpf chain [0-9]+ handle 0x1 action.o:\\[action-ok\\].*tag [0-9a-f]{16}( jited)?", + "matchPattern": "filter parent ffff: protocol ip pref 100 bpf chain [0-9]+ handle 0x1 action-ebpf:\\[action-ok\\].*tag [0-9a-f]{16}( jited)?", "matchCount": "1", "teardown": [ "$TC qdisc del dev $DEV1 ingress" @@ -77,17 +76,16 @@ ], "plugins": { "requires": [ - "buildebpfPlugin", "nsPlugin" ] }, "setup": [ "$TC qdisc add dev $DEV1 ingress" ], - "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 100 bpf object-file $EBPFDIR/action.o section action-ko", + "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 100 bpf object-file $EBPFDIR/action-ebpf section action-ko", "expExitCode": "1", "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 protocol ip prio 100 bpf", - "matchPattern": "filter parent ffff: protocol ip pref 100 bpf chain [0-9]+ handle 0x1 action.o:\\[action-ko\\].*tag [0-9a-f]{16}( jited)?", + "matchPattern": "filter parent ffff: protocol ip pref 100 bpf chain [0-9]+ handle 0x1 action-ebpf:\\[action-ko\\].*tag [0-9a-f]{16}( jited)?", "matchCount": "0", "teardown": [ "$TC qdisc del dev $DEV1 ingress" diff --git a/tools/testing/selftests/tc-testing/tdc.sh b/tools/testing/selftests/tc-testing/tdc.sh index 4dbe50bde5a0..407fa53822a0 100755 --- a/tools/testing/selftests/tc-testing/tdc.sh +++ b/tools/testing/selftests/tc-testing/tdc.sh @@ -64,5 +64,5 @@ try_modprobe sch_hfsc try_modprobe sch_hhf try_modprobe sch_htb try_modprobe sch_teql -./tdc.py -J`nproc` -c actions --nobuildebpf +./tdc.py -J`nproc` -c actions ./tdc.py -J`nproc` -c qdisc -- cgit v1.2.3 From 8059e68b99280cc9a224593f3142f9368229c6ee Mon Sep 17 00:00:00 2001 From: Pedro Tammela Date: Fri, 24 Nov 2023 12:42:45 -0300 Subject: selftests: tc-testing: remove unnecessary time.sleep This operation is redundant and it's not stabilizing nor waiting for anything. Signed-off-by: Pedro Tammela Acked-by: Jamal Hadi Salim Link: https://lore.kernel.org/r/20231124154248.315470-3-pctammela@mojatatu.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/tc-testing/tdc.py | 5 ----- 1 file changed, 5 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/tc-testing/tdc.py b/tools/testing/selftests/tc-testing/tdc.py index 669ec89ebfe1..c5ec861687b6 100755 --- a/tools/testing/selftests/tc-testing/tdc.py +++ b/tools/testing/selftests/tc-testing/tdc.py @@ -497,11 +497,6 @@ def prepare_run(pm, args, testlist): pm.call_post_suite(1) return emergency_exit_message - if args.verbose: - print('give test rig 2 seconds to stabilize') - - time.sleep(2) - def purge_run(pm, index): pm.call_post_suite(index) -- cgit v1.2.3 From 56e16bc69bb7d36a931111d8abdcd44b939751c4 Mon Sep 17 00:00:00 2001 From: Pedro Tammela Date: Fri, 24 Nov 2023 12:42:46 -0300 Subject: selftests: tc-testing: prefix iproute2 functions with "ipr2" As suggested by Simon, prefix the functions that operate on iproute2 commands in contrast with the "nl" netlink prefix. Cc: Simon Horman Signed-off-by: Pedro Tammela Acked-by: Jamal Hadi Salim Link: https://lore.kernel.org/r/20231124154248.315470-4-pctammela@mojatatu.com Signed-off-by: Jakub Kicinski --- .../testing/selftests/tc-testing/plugin-lib/nsPlugin.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/tc-testing/plugin-lib/nsPlugin.py b/tools/testing/selftests/tc-testing/plugin-lib/nsPlugin.py index 65c8f3f983b9..dc7a0597cf44 100644 --- a/tools/testing/selftests/tc-testing/plugin-lib/nsPlugin.py +++ b/tools/testing/selftests/tc-testing/plugin-lib/nsPlugin.py @@ -37,7 +37,7 @@ class SubPlugin(TdcPlugin): if netlink == True: self._nl_ns_create() else: - self._ns_create() + self._ipr2_ns_create() # Make sure the netns is visible in the fs ticks = 20 @@ -71,7 +71,7 @@ class SubPlugin(TdcPlugin): if netlink == True: self._nl_ns_destroy() else: - self._ns_destroy() + self._ipr2_ns_destroy() def post_suite(self, index): if self.args.verbose: @@ -161,7 +161,7 @@ class SubPlugin(TdcPlugin): ticks -= 1 continue - def _ns_create_cmds(self): + def _ipr2_ns_create_cmds(self): cmds = [] ns = self.args.NAMES['NS'] @@ -181,26 +181,26 @@ class SubPlugin(TdcPlugin): return cmds - def _ns_create(self): + def _ipr2_ns_create(self): ''' Create the network namespace in which the tests will be run and set up the required network devices for it. ''' - self._exec_cmd_batched('pre', self._ns_create_cmds()) + self._exec_cmd_batched('pre', self._ipr2_ns_create_cmds()) def _nl_ns_destroy(self): ns = self.args.NAMES['NS'] netns.remove(ns) - def _ns_destroy_cmd(self): + def _ipr2_ns_destroy_cmd(self): return self._replace_keywords('netns delete {}'.format(self.args.NAMES['NS'])) - def _ns_destroy(self): + def _ipr2_ns_destroy(self): ''' Destroy the network namespace for testing (and any associated network devices as well) ''' - self._exec_cmd('post', self._ns_destroy_cmd()) + self._exec_cmd('post', self._ipr2_ns_destroy_cmd()) @cached_property def _proc(self): -- cgit v1.2.3 From 501679f5d4a433144ae755dd2e5f757b1ce5a152 Mon Sep 17 00:00:00 2001 From: Pedro Tammela Date: Fri, 24 Nov 2023 12:42:47 -0300 Subject: selftests: tc-testing: cleanup on Ctrl-C Cleanup net namespaces and other resources if we get a SIGINT (Ctrl-C). As user visible resources are allocated on a per test basis, it's only required to catch this condition when (possibly) running tests. So far calling post_suite is enough to free up anything that might linger. A missing keyword replacement for nsPlugin is also included. Signed-off-by: Pedro Tammela Acked-by: Jamal Hadi Salim Link: https://lore.kernel.org/r/20231124154248.315470-5-pctammela@mojatatu.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/tc-testing/plugin-lib/nsPlugin.py | 2 +- tools/testing/selftests/tc-testing/tdc.py | 6 +++++- 2 files changed, 6 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/tc-testing/plugin-lib/nsPlugin.py b/tools/testing/selftests/tc-testing/plugin-lib/nsPlugin.py index dc7a0597cf44..77b1106b8388 100644 --- a/tools/testing/selftests/tc-testing/plugin-lib/nsPlugin.py +++ b/tools/testing/selftests/tc-testing/plugin-lib/nsPlugin.py @@ -78,7 +78,7 @@ class SubPlugin(TdcPlugin): print('{}.post_suite'.format(self.sub_class)) # Make sure we don't leak resources - cmd = "$IP -a netns del" + cmd = self._replace_keywords("$IP -a netns del") if self.args.verbose > 3: print('_exec_cmd: command "{}"'.format(cmd)) diff --git a/tools/testing/selftests/tc-testing/tdc.py b/tools/testing/selftests/tc-testing/tdc.py index c5ec861687b6..caeacc691587 100755 --- a/tools/testing/selftests/tc-testing/tdc.py +++ b/tools/testing/selftests/tc-testing/tdc.py @@ -1018,7 +1018,11 @@ def main(): if args.verbose > 2: print('args is {}'.format(args)) - set_operation_mode(pm, parser, args, remaining) + try: + set_operation_mode(pm, parser, args, remaining) + except KeyboardInterrupt: + # Cleanup on Ctrl-C + pm.call_post_suite(None) if __name__ == "__main__": main() -- cgit v1.2.3 From ed346fccfc40364888601a2ec75dd94f4dca23bd Mon Sep 17 00:00:00 2001 From: Pedro Tammela Date: Fri, 24 Nov 2023 12:42:48 -0300 Subject: selftests: tc-testing: remove unused import Remove this leftover from the times we pre-allocated everything Signed-off-by: Pedro Tammela Acked-by: Jamal Hadi Salim Link: https://lore.kernel.org/r/20231124154248.315470-6-pctammela@mojatatu.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/tc-testing/plugin-lib/nsPlugin.py | 2 -- 1 file changed, 2 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/tc-testing/plugin-lib/nsPlugin.py b/tools/testing/selftests/tc-testing/plugin-lib/nsPlugin.py index 77b1106b8388..bb19b8b76d3b 100644 --- a/tools/testing/selftests/tc-testing/plugin-lib/nsPlugin.py +++ b/tools/testing/selftests/tc-testing/plugin-lib/nsPlugin.py @@ -23,8 +23,6 @@ class SubPlugin(TdcPlugin): super().__init__() def pre_suite(self, testcount, testlist): - from itertools import cycle - super().pre_suite(testcount, testlist) def prepare_test(self, test): -- cgit v1.2.3 From 637567e4a3ef6f6a5ffa48781207d270265f7e68 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Sun, 26 Nov 2023 15:07:40 -0800 Subject: tools: ynl: add sample for getting page-pool information Regenerate the tools/ code after netdev spec changes. Add sample to query page-pool info in a concise fashion: $ ./page-pool eth0[2] page pools: 10 (zombies: 0) refs: 41984 bytes: 171966464 (refs: 0 bytes: 0) recycling: 90.3% (alloc: 656:397681 recycle: 89652:270201) Acked-by: Jesper Dangaard Brouer Signed-off-by: Jakub Kicinski Signed-off-by: Paolo Abeni --- tools/include/uapi/linux/netdev.h | 36 +++ tools/net/ynl/generated/netdev-user.c | 419 ++++++++++++++++++++++++++++++++++ tools/net/ynl/generated/netdev-user.h | 171 ++++++++++++++ tools/net/ynl/lib/ynl.h | 2 +- tools/net/ynl/samples/.gitignore | 1 + tools/net/ynl/samples/Makefile | 2 +- tools/net/ynl/samples/page-pool.c | 147 ++++++++++++ 7 files changed, 776 insertions(+), 2 deletions(-) create mode 100644 tools/net/ynl/samples/page-pool.c (limited to 'tools') diff --git a/tools/include/uapi/linux/netdev.h b/tools/include/uapi/linux/netdev.h index 2943a151d4f1..2b37233e00c0 100644 --- a/tools/include/uapi/linux/netdev.h +++ b/tools/include/uapi/linux/netdev.h @@ -64,16 +64,52 @@ enum { NETDEV_A_DEV_MAX = (__NETDEV_A_DEV_MAX - 1) }; +enum { + NETDEV_A_PAGE_POOL_ID = 1, + NETDEV_A_PAGE_POOL_IFINDEX, + NETDEV_A_PAGE_POOL_NAPI_ID, + NETDEV_A_PAGE_POOL_INFLIGHT, + NETDEV_A_PAGE_POOL_INFLIGHT_MEM, + NETDEV_A_PAGE_POOL_DETACH_TIME, + + __NETDEV_A_PAGE_POOL_MAX, + NETDEV_A_PAGE_POOL_MAX = (__NETDEV_A_PAGE_POOL_MAX - 1) +}; + +enum { + NETDEV_A_PAGE_POOL_STATS_INFO = 1, + NETDEV_A_PAGE_POOL_STATS_ALLOC_FAST = 8, + NETDEV_A_PAGE_POOL_STATS_ALLOC_SLOW, + NETDEV_A_PAGE_POOL_STATS_ALLOC_SLOW_HIGH_ORDER, + NETDEV_A_PAGE_POOL_STATS_ALLOC_EMPTY, + NETDEV_A_PAGE_POOL_STATS_ALLOC_REFILL, + NETDEV_A_PAGE_POOL_STATS_ALLOC_WAIVE, + NETDEV_A_PAGE_POOL_STATS_RECYCLE_CACHED, + NETDEV_A_PAGE_POOL_STATS_RECYCLE_CACHE_FULL, + NETDEV_A_PAGE_POOL_STATS_RECYCLE_RING, + NETDEV_A_PAGE_POOL_STATS_RECYCLE_RING_FULL, + NETDEV_A_PAGE_POOL_STATS_RECYCLE_RELEASED_REFCNT, + + __NETDEV_A_PAGE_POOL_STATS_MAX, + NETDEV_A_PAGE_POOL_STATS_MAX = (__NETDEV_A_PAGE_POOL_STATS_MAX - 1) +}; + enum { NETDEV_CMD_DEV_GET = 1, NETDEV_CMD_DEV_ADD_NTF, NETDEV_CMD_DEV_DEL_NTF, NETDEV_CMD_DEV_CHANGE_NTF, + NETDEV_CMD_PAGE_POOL_GET, + NETDEV_CMD_PAGE_POOL_ADD_NTF, + NETDEV_CMD_PAGE_POOL_DEL_NTF, + NETDEV_CMD_PAGE_POOL_CHANGE_NTF, + NETDEV_CMD_PAGE_POOL_STATS_GET, __NETDEV_CMD_MAX, NETDEV_CMD_MAX = (__NETDEV_CMD_MAX - 1) }; #define NETDEV_MCGRP_MGMT "mgmt" +#define NETDEV_MCGRP_PAGE_POOL "page-pool" #endif /* _UAPI_LINUX_NETDEV_H */ diff --git a/tools/net/ynl/generated/netdev-user.c b/tools/net/ynl/generated/netdev-user.c index b5ffe8cd1144..a7b7019d00f1 100644 --- a/tools/net/ynl/generated/netdev-user.c +++ b/tools/net/ynl/generated/netdev-user.c @@ -18,6 +18,11 @@ static const char * const netdev_op_strmap[] = { [NETDEV_CMD_DEV_ADD_NTF] = "dev-add-ntf", [NETDEV_CMD_DEV_DEL_NTF] = "dev-del-ntf", [NETDEV_CMD_DEV_CHANGE_NTF] = "dev-change-ntf", + [NETDEV_CMD_PAGE_POOL_GET] = "page-pool-get", + [NETDEV_CMD_PAGE_POOL_ADD_NTF] = "page-pool-add-ntf", + [NETDEV_CMD_PAGE_POOL_DEL_NTF] = "page-pool-del-ntf", + [NETDEV_CMD_PAGE_POOL_CHANGE_NTF] = "page-pool-change-ntf", + [NETDEV_CMD_PAGE_POOL_STATS_GET] = "page-pool-stats-get", }; const char *netdev_op_str(int op) @@ -59,6 +64,16 @@ const char *netdev_xdp_rx_metadata_str(enum netdev_xdp_rx_metadata value) } /* Policies */ +struct ynl_policy_attr netdev_page_pool_info_policy[NETDEV_A_PAGE_POOL_MAX + 1] = { + [NETDEV_A_PAGE_POOL_ID] = { .name = "id", .type = YNL_PT_UINT, }, + [NETDEV_A_PAGE_POOL_IFINDEX] = { .name = "ifindex", .type = YNL_PT_U32, }, +}; + +struct ynl_policy_nest netdev_page_pool_info_nest = { + .max_attr = NETDEV_A_PAGE_POOL_MAX, + .table = netdev_page_pool_info_policy, +}; + struct ynl_policy_attr netdev_dev_policy[NETDEV_A_DEV_MAX + 1] = { [NETDEV_A_DEV_IFINDEX] = { .name = "ifindex", .type = YNL_PT_U32, }, [NETDEV_A_DEV_PAD] = { .name = "pad", .type = YNL_PT_IGNORE, }, @@ -72,7 +87,85 @@ struct ynl_policy_nest netdev_dev_nest = { .table = netdev_dev_policy, }; +struct ynl_policy_attr netdev_page_pool_policy[NETDEV_A_PAGE_POOL_MAX + 1] = { + [NETDEV_A_PAGE_POOL_ID] = { .name = "id", .type = YNL_PT_UINT, }, + [NETDEV_A_PAGE_POOL_IFINDEX] = { .name = "ifindex", .type = YNL_PT_U32, }, + [NETDEV_A_PAGE_POOL_NAPI_ID] = { .name = "napi-id", .type = YNL_PT_UINT, }, + [NETDEV_A_PAGE_POOL_INFLIGHT] = { .name = "inflight", .type = YNL_PT_UINT, }, + [NETDEV_A_PAGE_POOL_INFLIGHT_MEM] = { .name = "inflight-mem", .type = YNL_PT_UINT, }, + [NETDEV_A_PAGE_POOL_DETACH_TIME] = { .name = "detach-time", .type = YNL_PT_UINT, }, +}; + +struct ynl_policy_nest netdev_page_pool_nest = { + .max_attr = NETDEV_A_PAGE_POOL_MAX, + .table = netdev_page_pool_policy, +}; + +struct ynl_policy_attr netdev_page_pool_stats_policy[NETDEV_A_PAGE_POOL_STATS_MAX + 1] = { + [NETDEV_A_PAGE_POOL_STATS_INFO] = { .name = "info", .type = YNL_PT_NEST, .nest = &netdev_page_pool_info_nest, }, + [NETDEV_A_PAGE_POOL_STATS_ALLOC_FAST] = { .name = "alloc-fast", .type = YNL_PT_UINT, }, + [NETDEV_A_PAGE_POOL_STATS_ALLOC_SLOW] = { .name = "alloc-slow", .type = YNL_PT_UINT, }, + [NETDEV_A_PAGE_POOL_STATS_ALLOC_SLOW_HIGH_ORDER] = { .name = "alloc-slow-high-order", .type = YNL_PT_UINT, }, + [NETDEV_A_PAGE_POOL_STATS_ALLOC_EMPTY] = { .name = "alloc-empty", .type = YNL_PT_UINT, }, + [NETDEV_A_PAGE_POOL_STATS_ALLOC_REFILL] = { .name = "alloc-refill", .type = YNL_PT_UINT, }, + [NETDEV_A_PAGE_POOL_STATS_ALLOC_WAIVE] = { .name = "alloc-waive", .type = YNL_PT_UINT, }, + [NETDEV_A_PAGE_POOL_STATS_RECYCLE_CACHED] = { .name = "recycle-cached", .type = YNL_PT_UINT, }, + [NETDEV_A_PAGE_POOL_STATS_RECYCLE_CACHE_FULL] = { .name = "recycle-cache-full", .type = YNL_PT_UINT, }, + [NETDEV_A_PAGE_POOL_STATS_RECYCLE_RING] = { .name = "recycle-ring", .type = YNL_PT_UINT, }, + [NETDEV_A_PAGE_POOL_STATS_RECYCLE_RING_FULL] = { .name = "recycle-ring-full", .type = YNL_PT_UINT, }, + [NETDEV_A_PAGE_POOL_STATS_RECYCLE_RELEASED_REFCNT] = { .name = "recycle-released-refcnt", .type = YNL_PT_UINT, }, +}; + +struct ynl_policy_nest netdev_page_pool_stats_nest = { + .max_attr = NETDEV_A_PAGE_POOL_STATS_MAX, + .table = netdev_page_pool_stats_policy, +}; + /* Common nested types */ +void netdev_page_pool_info_free(struct netdev_page_pool_info *obj) +{ +} + +int netdev_page_pool_info_put(struct nlmsghdr *nlh, unsigned int attr_type, + struct netdev_page_pool_info *obj) +{ + struct nlattr *nest; + + nest = mnl_attr_nest_start(nlh, attr_type); + if (obj->_present.id) + mnl_attr_put_uint(nlh, NETDEV_A_PAGE_POOL_ID, obj->id); + if (obj->_present.ifindex) + mnl_attr_put_u32(nlh, NETDEV_A_PAGE_POOL_IFINDEX, obj->ifindex); + mnl_attr_nest_end(nlh, nest); + + return 0; +} + +int netdev_page_pool_info_parse(struct ynl_parse_arg *yarg, + const struct nlattr *nested) +{ + struct netdev_page_pool_info *dst = yarg->data; + const struct nlattr *attr; + + mnl_attr_for_each_nested(attr, nested) { + unsigned int type = mnl_attr_get_type(attr); + + if (type == NETDEV_A_PAGE_POOL_ID) { + if (ynl_attr_validate(yarg, attr)) + return MNL_CB_ERROR; + dst->_present.id = 1; + dst->id = mnl_attr_get_uint(attr); + } else if (type == NETDEV_A_PAGE_POOL_IFINDEX) { + if (ynl_attr_validate(yarg, attr)) + return MNL_CB_ERROR; + dst->_present.ifindex = 1; + dst->ifindex = mnl_attr_get_u32(attr); + } + } + + return 0; +} + /* ============== NETDEV_CMD_DEV_GET ============== */ /* NETDEV_CMD_DEV_GET - do */ void netdev_dev_get_req_free(struct netdev_dev_get_req *req) @@ -197,6 +290,314 @@ void netdev_dev_get_ntf_free(struct netdev_dev_get_ntf *rsp) free(rsp); } +/* ============== NETDEV_CMD_PAGE_POOL_GET ============== */ +/* NETDEV_CMD_PAGE_POOL_GET - do */ +void netdev_page_pool_get_req_free(struct netdev_page_pool_get_req *req) +{ + free(req); +} + +void netdev_page_pool_get_rsp_free(struct netdev_page_pool_get_rsp *rsp) +{ + free(rsp); +} + +int netdev_page_pool_get_rsp_parse(const struct nlmsghdr *nlh, void *data) +{ + struct netdev_page_pool_get_rsp *dst; + struct ynl_parse_arg *yarg = data; + const struct nlattr *attr; + + dst = yarg->data; + + mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) { + unsigned int type = mnl_attr_get_type(attr); + + if (type == NETDEV_A_PAGE_POOL_ID) { + if (ynl_attr_validate(yarg, attr)) + return MNL_CB_ERROR; + dst->_present.id = 1; + dst->id = mnl_attr_get_uint(attr); + } else if (type == NETDEV_A_PAGE_POOL_IFINDEX) { + if (ynl_attr_validate(yarg, attr)) + return MNL_CB_ERROR; + dst->_present.ifindex = 1; + dst->ifindex = mnl_attr_get_u32(attr); + } else if (type == NETDEV_A_PAGE_POOL_NAPI_ID) { + if (ynl_attr_validate(yarg, attr)) + return MNL_CB_ERROR; + dst->_present.napi_id = 1; + dst->napi_id = mnl_attr_get_uint(attr); + } else if (type == NETDEV_A_PAGE_POOL_INFLIGHT) { + if (ynl_attr_validate(yarg, attr)) + return MNL_CB_ERROR; + dst->_present.inflight = 1; + dst->inflight = mnl_attr_get_uint(attr); + } else if (type == NETDEV_A_PAGE_POOL_INFLIGHT_MEM) { + if (ynl_attr_validate(yarg, attr)) + return MNL_CB_ERROR; + dst->_present.inflight_mem = 1; + dst->inflight_mem = mnl_attr_get_uint(attr); + } else if (type == NETDEV_A_PAGE_POOL_DETACH_TIME) { + if (ynl_attr_validate(yarg, attr)) + return MNL_CB_ERROR; + dst->_present.detach_time = 1; + dst->detach_time = mnl_attr_get_uint(attr); + } + } + + return MNL_CB_OK; +} + +struct netdev_page_pool_get_rsp * +netdev_page_pool_get(struct ynl_sock *ys, struct netdev_page_pool_get_req *req) +{ + struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; + struct netdev_page_pool_get_rsp *rsp; + struct nlmsghdr *nlh; + int err; + + nlh = ynl_gemsg_start_req(ys, ys->family_id, NETDEV_CMD_PAGE_POOL_GET, 1); + ys->req_policy = &netdev_page_pool_nest; + yrs.yarg.rsp_policy = &netdev_page_pool_nest; + + if (req->_present.id) + mnl_attr_put_uint(nlh, NETDEV_A_PAGE_POOL_ID, req->id); + + rsp = calloc(1, sizeof(*rsp)); + yrs.yarg.data = rsp; + yrs.cb = netdev_page_pool_get_rsp_parse; + yrs.rsp_cmd = NETDEV_CMD_PAGE_POOL_GET; + + err = ynl_exec(ys, nlh, &yrs); + if (err < 0) + goto err_free; + + return rsp; + +err_free: + netdev_page_pool_get_rsp_free(rsp); + return NULL; +} + +/* NETDEV_CMD_PAGE_POOL_GET - dump */ +void netdev_page_pool_get_list_free(struct netdev_page_pool_get_list *rsp) +{ + struct netdev_page_pool_get_list *next = rsp; + + while ((void *)next != YNL_LIST_END) { + rsp = next; + next = rsp->next; + + free(rsp); + } +} + +struct netdev_page_pool_get_list * +netdev_page_pool_get_dump(struct ynl_sock *ys) +{ + struct ynl_dump_state yds = {}; + struct nlmsghdr *nlh; + int err; + + yds.ys = ys; + yds.alloc_sz = sizeof(struct netdev_page_pool_get_list); + yds.cb = netdev_page_pool_get_rsp_parse; + yds.rsp_cmd = NETDEV_CMD_PAGE_POOL_GET; + yds.rsp_policy = &netdev_page_pool_nest; + + nlh = ynl_gemsg_start_dump(ys, ys->family_id, NETDEV_CMD_PAGE_POOL_GET, 1); + + err = ynl_exec_dump(ys, nlh, &yds); + if (err < 0) + goto free_list; + + return yds.first; + +free_list: + netdev_page_pool_get_list_free(yds.first); + return NULL; +} + +/* NETDEV_CMD_PAGE_POOL_GET - notify */ +void netdev_page_pool_get_ntf_free(struct netdev_page_pool_get_ntf *rsp) +{ + free(rsp); +} + +/* ============== NETDEV_CMD_PAGE_POOL_STATS_GET ============== */ +/* NETDEV_CMD_PAGE_POOL_STATS_GET - do */ +void +netdev_page_pool_stats_get_req_free(struct netdev_page_pool_stats_get_req *req) +{ + netdev_page_pool_info_free(&req->info); + free(req); +} + +void +netdev_page_pool_stats_get_rsp_free(struct netdev_page_pool_stats_get_rsp *rsp) +{ + netdev_page_pool_info_free(&rsp->info); + free(rsp); +} + +int netdev_page_pool_stats_get_rsp_parse(const struct nlmsghdr *nlh, + void *data) +{ + struct netdev_page_pool_stats_get_rsp *dst; + struct ynl_parse_arg *yarg = data; + const struct nlattr *attr; + struct ynl_parse_arg parg; + + dst = yarg->data; + parg.ys = yarg->ys; + + mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) { + unsigned int type = mnl_attr_get_type(attr); + + if (type == NETDEV_A_PAGE_POOL_STATS_INFO) { + if (ynl_attr_validate(yarg, attr)) + return MNL_CB_ERROR; + dst->_present.info = 1; + + parg.rsp_policy = &netdev_page_pool_info_nest; + parg.data = &dst->info; + if (netdev_page_pool_info_parse(&parg, attr)) + return MNL_CB_ERROR; + } else if (type == NETDEV_A_PAGE_POOL_STATS_ALLOC_FAST) { + if (ynl_attr_validate(yarg, attr)) + return MNL_CB_ERROR; + dst->_present.alloc_fast = 1; + dst->alloc_fast = mnl_attr_get_uint(attr); + } else if (type == NETDEV_A_PAGE_POOL_STATS_ALLOC_SLOW) { + if (ynl_attr_validate(yarg, attr)) + return MNL_CB_ERROR; + dst->_present.alloc_slow = 1; + dst->alloc_slow = mnl_attr_get_uint(attr); + } else if (type == NETDEV_A_PAGE_POOL_STATS_ALLOC_SLOW_HIGH_ORDER) { + if (ynl_attr_validate(yarg, attr)) + return MNL_CB_ERROR; + dst->_present.alloc_slow_high_order = 1; + dst->alloc_slow_high_order = mnl_attr_get_uint(attr); + } else if (type == NETDEV_A_PAGE_POOL_STATS_ALLOC_EMPTY) { + if (ynl_attr_validate(yarg, attr)) + return MNL_CB_ERROR; + dst->_present.alloc_empty = 1; + dst->alloc_empty = mnl_attr_get_uint(attr); + } else if (type == NETDEV_A_PAGE_POOL_STATS_ALLOC_REFILL) { + if (ynl_attr_validate(yarg, attr)) + return MNL_CB_ERROR; + dst->_present.alloc_refill = 1; + dst->alloc_refill = mnl_attr_get_uint(attr); + } else if (type == NETDEV_A_PAGE_POOL_STATS_ALLOC_WAIVE) { + if (ynl_attr_validate(yarg, attr)) + return MNL_CB_ERROR; + dst->_present.alloc_waive = 1; + dst->alloc_waive = mnl_attr_get_uint(attr); + } else if (type == NETDEV_A_PAGE_POOL_STATS_RECYCLE_CACHED) { + if (ynl_attr_validate(yarg, attr)) + return MNL_CB_ERROR; + dst->_present.recycle_cached = 1; + dst->recycle_cached = mnl_attr_get_uint(attr); + } else if (type == NETDEV_A_PAGE_POOL_STATS_RECYCLE_CACHE_FULL) { + if (ynl_attr_validate(yarg, attr)) + return MNL_CB_ERROR; + dst->_present.recycle_cache_full = 1; + dst->recycle_cache_full = mnl_attr_get_uint(attr); + } else if (type == NETDEV_A_PAGE_POOL_STATS_RECYCLE_RING) { + if (ynl_attr_validate(yarg, attr)) + return MNL_CB_ERROR; + dst->_present.recycle_ring = 1; + dst->recycle_ring = mnl_attr_get_uint(attr); + } else if (type == NETDEV_A_PAGE_POOL_STATS_RECYCLE_RING_FULL) { + if (ynl_attr_validate(yarg, attr)) + return MNL_CB_ERROR; + dst->_present.recycle_ring_full = 1; + dst->recycle_ring_full = mnl_attr_get_uint(attr); + } else if (type == NETDEV_A_PAGE_POOL_STATS_RECYCLE_RELEASED_REFCNT) { + if (ynl_attr_validate(yarg, attr)) + return MNL_CB_ERROR; + dst->_present.recycle_released_refcnt = 1; + dst->recycle_released_refcnt = mnl_attr_get_uint(attr); + } + } + + return MNL_CB_OK; +} + +struct netdev_page_pool_stats_get_rsp * +netdev_page_pool_stats_get(struct ynl_sock *ys, + struct netdev_page_pool_stats_get_req *req) +{ + struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; + struct netdev_page_pool_stats_get_rsp *rsp; + struct nlmsghdr *nlh; + int err; + + nlh = ynl_gemsg_start_req(ys, ys->family_id, NETDEV_CMD_PAGE_POOL_STATS_GET, 1); + ys->req_policy = &netdev_page_pool_stats_nest; + yrs.yarg.rsp_policy = &netdev_page_pool_stats_nest; + + if (req->_present.info) + netdev_page_pool_info_put(nlh, NETDEV_A_PAGE_POOL_STATS_INFO, &req->info); + + rsp = calloc(1, sizeof(*rsp)); + yrs.yarg.data = rsp; + yrs.cb = netdev_page_pool_stats_get_rsp_parse; + yrs.rsp_cmd = NETDEV_CMD_PAGE_POOL_STATS_GET; + + err = ynl_exec(ys, nlh, &yrs); + if (err < 0) + goto err_free; + + return rsp; + +err_free: + netdev_page_pool_stats_get_rsp_free(rsp); + return NULL; +} + +/* NETDEV_CMD_PAGE_POOL_STATS_GET - dump */ +void +netdev_page_pool_stats_get_list_free(struct netdev_page_pool_stats_get_list *rsp) +{ + struct netdev_page_pool_stats_get_list *next = rsp; + + while ((void *)next != YNL_LIST_END) { + rsp = next; + next = rsp->next; + + netdev_page_pool_info_free(&rsp->obj.info); + free(rsp); + } +} + +struct netdev_page_pool_stats_get_list * +netdev_page_pool_stats_get_dump(struct ynl_sock *ys) +{ + struct ynl_dump_state yds = {}; + struct nlmsghdr *nlh; + int err; + + yds.ys = ys; + yds.alloc_sz = sizeof(struct netdev_page_pool_stats_get_list); + yds.cb = netdev_page_pool_stats_get_rsp_parse; + yds.rsp_cmd = NETDEV_CMD_PAGE_POOL_STATS_GET; + yds.rsp_policy = &netdev_page_pool_stats_nest; + + nlh = ynl_gemsg_start_dump(ys, ys->family_id, NETDEV_CMD_PAGE_POOL_STATS_GET, 1); + + err = ynl_exec_dump(ys, nlh, &yds); + if (err < 0) + goto free_list; + + return yds.first; + +free_list: + netdev_page_pool_stats_get_list_free(yds.first); + return NULL; +} + static const struct ynl_ntf_info netdev_ntf_info[] = { [NETDEV_CMD_DEV_ADD_NTF] = { .alloc_sz = sizeof(struct netdev_dev_get_ntf), @@ -216,6 +617,24 @@ static const struct ynl_ntf_info netdev_ntf_info[] = { .policy = &netdev_dev_nest, .free = (void *)netdev_dev_get_ntf_free, }, + [NETDEV_CMD_PAGE_POOL_ADD_NTF] = { + .alloc_sz = sizeof(struct netdev_page_pool_get_ntf), + .cb = netdev_page_pool_get_rsp_parse, + .policy = &netdev_page_pool_nest, + .free = (void *)netdev_page_pool_get_ntf_free, + }, + [NETDEV_CMD_PAGE_POOL_DEL_NTF] = { + .alloc_sz = sizeof(struct netdev_page_pool_get_ntf), + .cb = netdev_page_pool_get_rsp_parse, + .policy = &netdev_page_pool_nest, + .free = (void *)netdev_page_pool_get_ntf_free, + }, + [NETDEV_CMD_PAGE_POOL_CHANGE_NTF] = { + .alloc_sz = sizeof(struct netdev_page_pool_get_ntf), + .cb = netdev_page_pool_get_rsp_parse, + .policy = &netdev_page_pool_nest, + .free = (void *)netdev_page_pool_get_ntf_free, + }, }; const struct ynl_family ynl_netdev_family = { diff --git a/tools/net/ynl/generated/netdev-user.h b/tools/net/ynl/generated/netdev-user.h index 4fafac879df3..4093602c9b6c 100644 --- a/tools/net/ynl/generated/netdev-user.h +++ b/tools/net/ynl/generated/netdev-user.h @@ -21,6 +21,16 @@ const char *netdev_xdp_act_str(enum netdev_xdp_act value); const char *netdev_xdp_rx_metadata_str(enum netdev_xdp_rx_metadata value); /* Common nested types */ +struct netdev_page_pool_info { + struct { + __u32 id:1; + __u32 ifindex:1; + } _present; + + __u64 id; + __u32 ifindex; +}; + /* ============== NETDEV_CMD_DEV_GET ============== */ /* NETDEV_CMD_DEV_GET - do */ struct netdev_dev_get_req { @@ -87,4 +97,165 @@ struct netdev_dev_get_ntf { void netdev_dev_get_ntf_free(struct netdev_dev_get_ntf *rsp); +/* ============== NETDEV_CMD_PAGE_POOL_GET ============== */ +/* NETDEV_CMD_PAGE_POOL_GET - do */ +struct netdev_page_pool_get_req { + struct { + __u32 id:1; + } _present; + + __u64 id; +}; + +static inline struct netdev_page_pool_get_req * +netdev_page_pool_get_req_alloc(void) +{ + return calloc(1, sizeof(struct netdev_page_pool_get_req)); +} +void netdev_page_pool_get_req_free(struct netdev_page_pool_get_req *req); + +static inline void +netdev_page_pool_get_req_set_id(struct netdev_page_pool_get_req *req, __u64 id) +{ + req->_present.id = 1; + req->id = id; +} + +struct netdev_page_pool_get_rsp { + struct { + __u32 id:1; + __u32 ifindex:1; + __u32 napi_id:1; + __u32 inflight:1; + __u32 inflight_mem:1; + __u32 detach_time:1; + } _present; + + __u64 id; + __u32 ifindex; + __u64 napi_id; + __u64 inflight; + __u64 inflight_mem; + __u64 detach_time; +}; + +void netdev_page_pool_get_rsp_free(struct netdev_page_pool_get_rsp *rsp); + +/* + * Get / dump information about Page Pools. +(Only Page Pools associated with a net_device can be listed.) + + */ +struct netdev_page_pool_get_rsp * +netdev_page_pool_get(struct ynl_sock *ys, struct netdev_page_pool_get_req *req); + +/* NETDEV_CMD_PAGE_POOL_GET - dump */ +struct netdev_page_pool_get_list { + struct netdev_page_pool_get_list *next; + struct netdev_page_pool_get_rsp obj __attribute__((aligned(8))); +}; + +void netdev_page_pool_get_list_free(struct netdev_page_pool_get_list *rsp); + +struct netdev_page_pool_get_list * +netdev_page_pool_get_dump(struct ynl_sock *ys); + +/* NETDEV_CMD_PAGE_POOL_GET - notify */ +struct netdev_page_pool_get_ntf { + __u16 family; + __u8 cmd; + struct ynl_ntf_base_type *next; + void (*free)(struct netdev_page_pool_get_ntf *ntf); + struct netdev_page_pool_get_rsp obj __attribute__((aligned(8))); +}; + +void netdev_page_pool_get_ntf_free(struct netdev_page_pool_get_ntf *rsp); + +/* ============== NETDEV_CMD_PAGE_POOL_STATS_GET ============== */ +/* NETDEV_CMD_PAGE_POOL_STATS_GET - do */ +struct netdev_page_pool_stats_get_req { + struct { + __u32 info:1; + } _present; + + struct netdev_page_pool_info info; +}; + +static inline struct netdev_page_pool_stats_get_req * +netdev_page_pool_stats_get_req_alloc(void) +{ + return calloc(1, sizeof(struct netdev_page_pool_stats_get_req)); +} +void +netdev_page_pool_stats_get_req_free(struct netdev_page_pool_stats_get_req *req); + +static inline void +netdev_page_pool_stats_get_req_set_info_id(struct netdev_page_pool_stats_get_req *req, + __u64 id) +{ + req->_present.info = 1; + req->info._present.id = 1; + req->info.id = id; +} +static inline void +netdev_page_pool_stats_get_req_set_info_ifindex(struct netdev_page_pool_stats_get_req *req, + __u32 ifindex) +{ + req->_present.info = 1; + req->info._present.ifindex = 1; + req->info.ifindex = ifindex; +} + +struct netdev_page_pool_stats_get_rsp { + struct { + __u32 info:1; + __u32 alloc_fast:1; + __u32 alloc_slow:1; + __u32 alloc_slow_high_order:1; + __u32 alloc_empty:1; + __u32 alloc_refill:1; + __u32 alloc_waive:1; + __u32 recycle_cached:1; + __u32 recycle_cache_full:1; + __u32 recycle_ring:1; + __u32 recycle_ring_full:1; + __u32 recycle_released_refcnt:1; + } _present; + + struct netdev_page_pool_info info; + __u64 alloc_fast; + __u64 alloc_slow; + __u64 alloc_slow_high_order; + __u64 alloc_empty; + __u64 alloc_refill; + __u64 alloc_waive; + __u64 recycle_cached; + __u64 recycle_cache_full; + __u64 recycle_ring; + __u64 recycle_ring_full; + __u64 recycle_released_refcnt; +}; + +void +netdev_page_pool_stats_get_rsp_free(struct netdev_page_pool_stats_get_rsp *rsp); + +/* + * Get page pool statistics. + */ +struct netdev_page_pool_stats_get_rsp * +netdev_page_pool_stats_get(struct ynl_sock *ys, + struct netdev_page_pool_stats_get_req *req); + +/* NETDEV_CMD_PAGE_POOL_STATS_GET - dump */ +struct netdev_page_pool_stats_get_list { + struct netdev_page_pool_stats_get_list *next; + struct netdev_page_pool_stats_get_rsp obj __attribute__((aligned(8))); +}; + +void +netdev_page_pool_stats_get_list_free(struct netdev_page_pool_stats_get_list *rsp); + +struct netdev_page_pool_stats_get_list * +netdev_page_pool_stats_get_dump(struct ynl_sock *ys); + #endif /* _LINUX_NETDEV_GEN_H */ diff --git a/tools/net/ynl/lib/ynl.h b/tools/net/ynl/lib/ynl.h index e974378e3b8c..075d868f3b57 100644 --- a/tools/net/ynl/lib/ynl.h +++ b/tools/net/ynl/lib/ynl.h @@ -239,7 +239,7 @@ int ynl_error_parse(struct ynl_parse_arg *yarg, const char *msg); #ifndef MNL_HAS_AUTO_SCALARS static inline uint64_t mnl_attr_get_uint(const struct nlattr *attr) { - if (mnl_attr_get_len(attr) == 4) + if (mnl_attr_get_payload_len(attr) == 4) return mnl_attr_get_u32(attr); return mnl_attr_get_u64(attr); } diff --git a/tools/net/ynl/samples/.gitignore b/tools/net/ynl/samples/.gitignore index 2aae60c4829f..49637b26c482 100644 --- a/tools/net/ynl/samples/.gitignore +++ b/tools/net/ynl/samples/.gitignore @@ -1,3 +1,4 @@ ethtool devlink netdev +page-pool \ No newline at end of file diff --git a/tools/net/ynl/samples/Makefile b/tools/net/ynl/samples/Makefile index 3dbb106e87d9..1afefc266b7a 100644 --- a/tools/net/ynl/samples/Makefile +++ b/tools/net/ynl/samples/Makefile @@ -18,7 +18,7 @@ include $(wildcard *.d) all: $(BINS) -$(BINS): ../lib/ynl.a ../generated/protos.a +$(BINS): ../lib/ynl.a ../generated/protos.a $(SRCS) @echo -e '\tCC sample $@' @$(COMPILE.c) $(CFLAGS_$@) $@.c -o $@.o @$(LINK.c) $@.o -o $@ $(LDLIBS) diff --git a/tools/net/ynl/samples/page-pool.c b/tools/net/ynl/samples/page-pool.c new file mode 100644 index 000000000000..18d359713469 --- /dev/null +++ b/tools/net/ynl/samples/page-pool.c @@ -0,0 +1,147 @@ +// SPDX-License-Identifier: GPL-2.0 +#define _GNU_SOURCE + +#include +#include + +#include + +#include + +#include "netdev-user.h" + +struct stat { + unsigned int ifc; + + struct { + unsigned int cnt; + size_t refs, bytes; + } live[2]; + + size_t alloc_slow, alloc_fast, recycle_ring, recycle_cache; +}; + +struct stats_array { + unsigned int i, max; + struct stat *s; +}; + +static struct stat *find_ifc(struct stats_array *a, unsigned int ifindex) +{ + unsigned int i; + + for (i = 0; i < a->i; i++) { + if (a->s[i].ifc == ifindex) + return &a->s[i]; + } + + a->i++; + if (a->i == a->max) { + a->max *= 2; + a->s = reallocarray(a->s, a->max, sizeof(*a->s)); + } + a->s[i].ifc = ifindex; + return &a->s[i]; +} + +static void count(struct stat *s, unsigned int l, + struct netdev_page_pool_get_rsp *pp) +{ + s->live[l].cnt++; + if (pp->_present.inflight) + s->live[l].refs += pp->inflight; + if (pp->_present.inflight_mem) + s->live[l].bytes += pp->inflight_mem; +} + +int main(int argc, char **argv) +{ + struct netdev_page_pool_stats_get_list *pp_stats; + struct netdev_page_pool_get_list *pools; + struct stats_array a = {}; + struct ynl_error yerr; + struct ynl_sock *ys; + + ys = ynl_sock_create(&ynl_netdev_family, &yerr); + if (!ys) { + fprintf(stderr, "YNL: %s\n", yerr.msg); + return 1; + } + + a.max = 128; + a.s = calloc(a.max, sizeof(*a.s)); + if (!a.s) + goto err_close; + + pools = netdev_page_pool_get_dump(ys); + if (!pools) + goto err_free; + + ynl_dump_foreach(pools, pp) { + struct stat *s = find_ifc(&a, pp->ifindex); + + count(s, 1, pp); + if (pp->_present.destroyed) + count(s, 0, pp); + } + netdev_page_pool_get_list_free(pools); + + pp_stats = netdev_page_pool_stats_get_dump(ys); + if (!pp_stats) + goto err_free; + + ynl_dump_foreach(pp_stats, pp) { + struct stat *s = find_ifc(&a, pp->info.ifindex); + + if (pp->_present.alloc_fast) + s->alloc_fast += pp->alloc_fast; + if (pp->_present.alloc_slow) + s->alloc_slow += pp->alloc_slow; + if (pp->_present.recycle_ring) + s->recycle_ring += pp->recycle_ring; + if (pp->_present.recycle_cached) + s->recycle_cache += pp->recycle_cached; + } + netdev_page_pool_stats_get_list_free(pp_stats); + + for (unsigned int i = 0; i < a.i; i++) { + char ifname[IF_NAMESIZE]; + struct stat *s = &a.s[i]; + const char *name; + double recycle; + + if (!s->ifc) { + name = "\t"; + } else { + name = if_indextoname(s->ifc, ifname); + if (name) + printf("%8s", name); + printf("[%d]\t", s->ifc); + } + + printf("page pools: %u (zombies: %u)\n", + s->live[1].cnt, s->live[0].cnt); + printf("\t\trefs: %zu bytes: %zu (refs: %zu bytes: %zu)\n", + s->live[1].refs, s->live[1].bytes, + s->live[0].refs, s->live[0].bytes); + + /* We don't know how many pages are sitting in cache and ring + * so we will under-count the recycling rate a bit. + */ + recycle = (double)(s->recycle_ring + s->recycle_cache) / + (s->alloc_fast + s->alloc_slow) * 100; + printf("\t\trecycling: %.1lf%% (alloc: %zu:%zu recycle: %zu:%zu)\n", + recycle, s->alloc_slow, s->alloc_fast, + s->recycle_ring, s->recycle_cache); + } + + ynl_sock_destroy(ys); + return 0; + +err_free: + free(a.s); +err_close: + fprintf(stderr, "YNL: %s\n", ys->err.msg); + ynl_sock_destroy(ys); + return 2; +} -- cgit v1.2.3 From 877c737db9355acaa1ec2fd2b8dbdaff82605df7 Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Mon, 27 Nov 2023 14:51:05 -0500 Subject: cgroup/cpuset: Expose cpuset.cpus.isolated The root-only cpuset.cpus.isolated control file shows the current set of isolated CPUs in isolated partitions. This control file is currently exposed only with the cgroup_debug boot command line option which also adds the ".__DEBUG__." prefix. This is actually a useful control file if users want to find out which CPUs are currently in an isolated state by the cpuset controller. Remove CFTYPE_DEBUG flag for this control file and make it available by default without any prefix. The test_cpuset_prs.sh test script and the cgroup-v2.rst documentation file are also updated accordingly. Minor code change is also made in test_cpuset_prs.sh to avoid false test failure when running on debug kernel. Signed-off-by: Waiman Long Signed-off-by: Tejun Heo --- Documentation/admin-guide/cgroup-v2.rst | 7 +++++ kernel/cgroup/cpuset.c | 2 +- tools/testing/selftests/cgroup/test_cpuset_prs.sh | 32 +++++++++++++---------- 3 files changed, 26 insertions(+), 15 deletions(-) (limited to 'tools') diff --git a/Documentation/admin-guide/cgroup-v2.rst b/Documentation/admin-guide/cgroup-v2.rst index cf5651a11df8..30f6ff2eba47 100644 --- a/Documentation/admin-guide/cgroup-v2.rst +++ b/Documentation/admin-guide/cgroup-v2.rst @@ -2316,6 +2316,13 @@ Cpuset Interface Files treated to have an implicit value of "cpuset.cpus" in the formation of local partition. + cpuset.cpus.isolated + A read-only and root cgroup only multiple values file. + + This file shows the set of all isolated CPUs used in existing + isolated partitions. It will be empty if no isolated partition + is created. + cpuset.cpus.partition A read-write single value file which exists on non-root cpuset-enabled cgroups. This flag is owned by the parent cgroup diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c index 1bad4007ff4b..2a16df86c55c 100644 --- a/kernel/cgroup/cpuset.c +++ b/kernel/cgroup/cpuset.c @@ -3974,7 +3974,7 @@ static struct cftype dfl_files[] = { .name = "cpus.isolated", .seq_show = cpuset_common_seq_show, .private = FILE_ISOLATED_CPULIST, - .flags = CFTYPE_ONLY_ON_ROOT | CFTYPE_DEBUG, + .flags = CFTYPE_ONLY_ON_ROOT, }, { } /* terminate */ diff --git a/tools/testing/selftests/cgroup/test_cpuset_prs.sh b/tools/testing/selftests/cgroup/test_cpuset_prs.sh index 7b7c4c2b6d85..b5eb1be2248c 100755 --- a/tools/testing/selftests/cgroup/test_cpuset_prs.sh +++ b/tools/testing/selftests/cgroup/test_cpuset_prs.sh @@ -508,7 +508,7 @@ dump_states() XECPUS=$DIR/cpuset.cpus.exclusive.effective PRS=$DIR/cpuset.cpus.partition PCPUS=$DIR/.__DEBUG__.cpuset.cpus.subpartitions - ISCPUS=$DIR/.__DEBUG__.cpuset.cpus.isolated + ISCPUS=$DIR/cpuset.cpus.isolated [[ -e $CPUS ]] && echo "$CPUS: $(cat $CPUS)" [[ -e $XCPUS ]] && echo "$XCPUS: $(cat $XCPUS)" [[ -e $ECPUS ]] && echo "$ECPUS: $(cat $ECPUS)" @@ -593,17 +593,17 @@ check_cgroup_states() # # Get isolated (including offline) CPUs by looking at -# /sys/kernel/debug/sched/domains and *cpuset.cpus.isolated control file, +# /sys/kernel/debug/sched/domains and cpuset.cpus.isolated control file, # if available, and compare that with the expected value. # # Note that isolated CPUs from the sched/domains context include offline # CPUs as well as CPUs in non-isolated 1-CPU partition. Those CPUs may -# not be included in the *cpuset.cpus.isolated control file which contains +# not be included in the cpuset.cpus.isolated control file which contains # only CPUs in isolated partitions. # # $1 - expected isolated cpu list(s) {,} # - expected sched/domains value -# - *cpuset.cpus.isolated value = if not defined +# - cpuset.cpus.isolated value = if not defined # check_isolcpus() { @@ -611,7 +611,7 @@ check_isolcpus() ISOLCPUS= LASTISOLCPU= SCHED_DOMAINS=/sys/kernel/debug/sched/domains - ISCPUS=${CGROUP2}/.__DEBUG__.cpuset.cpus.isolated + ISCPUS=${CGROUP2}/cpuset.cpus.isolated if [[ $EXPECT_VAL = . ]] then EXPECT_VAL= @@ -692,14 +692,18 @@ test_fail() null_isolcpus_check() { [[ $VERBOSE -gt 0 ]] || return 0 - pause 0.02 - check_isolcpus "." - if [[ $? -ne 0 ]] - then - echo "Unexpected isolated CPUs: $ISOLCPUS" - dump_states - exit 1 - fi + # Retry a few times before printing error + RETRY=0 + while [[ $RETRY -lt 5 ]] + do + pause 0.01 + check_isolcpus "." + [[ $? -eq 0 ]] && return 0 + ((RETRY++)) + done + echo "Unexpected isolated CPUs: $ISOLCPUS" + dump_states + exit 1 } # @@ -776,7 +780,7 @@ run_state_test() # NEWLIST=$(cat cpuset.cpus.effective) RETRY=0 - while [[ $NEWLIST != $CPULIST && $RETRY -lt 5 ]] + while [[ $NEWLIST != $CPULIST && $RETRY -lt 8 ]] do # Wait a bit longer & recheck a few times pause 0.01 -- cgit v1.2.3 From 72108c0b9c0e004d7dfc2fc88b02c30b12711325 Mon Sep 17 00:00:00 2001 From: Yang Jihong Date: Tue, 31 Oct 2023 10:55:23 +0000 Subject: perf tools: Add --debug-file option to redirect debug output Currently, debug messages is output to stderr, add --debug-file option to support redirection to a specified file. Some test scenarios: # perf --list-opts --help --version --exec-path --html-path --paginate --no-pager --debugfs-dir --buildid-dir --list-cmds --list-opts --debug --debug-file # perf --debug-file No path given for --debug-file. Usage: perf [--version] [--help] [OPTIONS] COMMAND [ARGS] # perf --debug-file /sys/perf.log record -v true Open debug file '/sys/perf.log' failed: Permission denied Usage: perf [--version] [--help] [OPTIONS] COMMAND [ARGS] # perf --debug-file /tmp/perf.log record -v true [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.013 MB perf.data (26 samples) ] # cat /tmp/perf.log DEBUGINFOD_URLS= Using CPUID GenuineIntel-6-3E-4 nr_cblocks: 0 affinity: SYS mmap flush: 1 comp level: 0 mmap size 528384B Control descriptor is not initialized mmap size 528384B Looking at the vmlinux_path (8 entries long) Using /proc/kcore for kernel data Using /proc/kallsyms for symbols symbol:unmap_start file:(null) line:0 offset:0 return:0 lazy:(null) symbol:unmap_complete file:(null) line:0 offset:0 return:0 lazy:(null) symbol:map_start file:(null) line:0 offset:0 return:0 lazy:(null) symbol:map_complete file:(null) line:0 offset:0 return:0 lazy:(null) symbol:reloc_start file:(null) line:0 offset:0 return:0 lazy:(null) symbol:reloc_complete file:(null) line:0 offset:0 return:0 lazy:(null) symbol:init_start file:(null) line:0 offset:0 return:0 lazy:(null) symbol:init_complete file:(null) line:0 offset:0 return:0 lazy:(null) symbol:lll_lock_wait_private file:(null) line:0 offset:0 return:0 lazy:(null) symbol:lll_lock_wait file:(null) line:0 offset:0 return:0 lazy:(null) symbol:setjmp file:(null) line:0 offset:0 return:0 lazy:(null) symbol:longjmp file:(null) line:0 offset:0 return:0 lazy:(null) symbol:longjmp_target file:(null) line:0 offset:0 return:0 lazy:(null) failed to write feature HYBRID_TOPOLOGY Signed-off-by: Yang Jihong Acked-by: Arnaldo Carvalho de Melo Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20231031105523.1472558-1-yangjihong1@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf.txt | 3 +++ tools/perf/perf.c | 30 ++++++++++++++++++++++++++++++ 2 files changed, 33 insertions(+) (limited to 'tools') diff --git a/tools/perf/Documentation/perf.txt b/tools/perf/Documentation/perf.txt index ba3df49c169d..a7cf7bc2f968 100644 --- a/tools/perf/Documentation/perf.txt +++ b/tools/perf/Documentation/perf.txt @@ -64,6 +64,9 @@ OPTIONS perf-event-open - Print perf_event_open() arguments and return value +--debug-file:: + Write debug output to a specified file. + DESCRIPTION ----------- Performance counters for Linux are a new kernel-based subsystem diff --git a/tools/perf/perf.c b/tools/perf/perf.c index d3fc8090413c..921bee0a6437 100644 --- a/tools/perf/perf.c +++ b/tools/perf/perf.c @@ -39,6 +39,7 @@ #include static int use_pager = -1; +static FILE *debug_fp = NULL; struct cmd_struct { const char *cmd; @@ -162,6 +163,19 @@ static void commit_pager_choice(void) } } +static int set_debug_file(const char *path) +{ + debug_fp = fopen(path, "w"); + if (!debug_fp) { + fprintf(stderr, "Open debug file '%s' failed: %s\n", + path, strerror(errno)); + return -1; + } + + debug_set_file(debug_fp); + return 0; +} + struct option options[] = { OPT_ARGUMENT("help", "help"), OPT_ARGUMENT("version", "version"), @@ -174,6 +188,7 @@ struct option options[] = { OPT_ARGUMENT("list-cmds", "list-cmds"), OPT_ARGUMENT("list-opts", "list-opts"), OPT_ARGUMENT("debug", "debug"), + OPT_ARGUMENT("debug-file", "debug-file"), OPT_END() }; @@ -287,6 +302,18 @@ static int handle_options(const char ***argv, int *argc, int *envchanged) (*argv)++; (*argc)--; + } else if (!strcmp(cmd, "--debug-file")) { + if (*argc < 2) { + fprintf(stderr, "No path given for --debug-file.\n"); + usage(perf_usage_string); + } + + if (set_debug_file((*argv)[1])) + usage(perf_usage_string); + + (*argv)++; + (*argc)--; + } else { fprintf(stderr, "Unknown option: %s\n", cmd); usage(perf_usage_string); @@ -547,5 +574,8 @@ int main(int argc, const char **argv) fprintf(stderr, "Failed to run command '%s': %s\n", cmd, str_error_r(errno, sbuf, sizeof(sbuf))); out: + if (debug_fp) + fclose(debug_fp); + return 1; } -- cgit v1.2.3 From d60469d7c0e5c4e8de10699377d2ba79004236a4 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 9 Nov 2023 15:59:51 -0800 Subject: perf dwarf-aux: Add die_find_variable_by_addr() The die_find_variable_by_addr() is to find a variables in the given DIE using given (PC-relative) address. Global variables will have a location expression with DW_OP_addr which has an address so can simply compare it with the address. <1><143a7>: Abbrev Number: 2 (DW_TAG_variable) <143a8> DW_AT_name : loops_per_jiffy <143ac> DW_AT_type : <0x1cca> <143b0> DW_AT_external : 1 <143b0> DW_AT_decl_file : 193 <143b1> DW_AT_decl_line : 213 <143b2> DW_AT_location : 9 byte block: 3 b0 46 41 82 ff ff ff ff (DW_OP_addr: ffffffff824146b0) Note that the type-offset should be calculated from the base address of the global variable. Signed-off-by: Namhyung Kim Acked-by: Masami Hiramatsu (Google) Cc: Adrian Hunter Cc: Andi Kleen Cc: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: linux-toolchains@vger.kernel.org Cc: linux-trace-devel@vger.kernel.org Link: https://lore.kernel.org/r/20231110000012.3538610-33-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/dwarf-aux.c | 79 +++++++++++++++++++++++++++++++++++++++++++++ tools/perf/util/dwarf-aux.h | 14 ++++++++ 2 files changed, 93 insertions(+) (limited to 'tools') diff --git a/tools/perf/util/dwarf-aux.c b/tools/perf/util/dwarf-aux.c index 652e6e7368a2..edd9e407bc74 100644 --- a/tools/perf/util/dwarf-aux.c +++ b/tools/perf/util/dwarf-aux.c @@ -1250,8 +1250,12 @@ out: struct find_var_data { /* Target instruction address */ Dwarf_Addr pc; + /* Target memory address (for global data) */ + Dwarf_Addr addr; /* Target register */ unsigned reg; + /* Access offset, set for global data */ + int offset; }; /* Max number of registers DW_OP_regN supports */ @@ -1312,6 +1316,81 @@ Dwarf_Die *die_find_variable_by_reg(Dwarf_Die *sc_die, Dwarf_Addr pc, int reg, }; return die_find_child(sc_die, __die_find_var_reg_cb, &data, die_mem); } + +/* Only checks direct child DIEs in the given scope */ +static int __die_find_var_addr_cb(Dwarf_Die *die_mem, void *arg) +{ + struct find_var_data *data = arg; + int tag = dwarf_tag(die_mem); + ptrdiff_t off = 0; + Dwarf_Attribute attr; + Dwarf_Addr base, start, end; + Dwarf_Word size; + Dwarf_Die type_die; + Dwarf_Op *ops; + size_t nops; + + if (tag != DW_TAG_variable) + return DIE_FIND_CB_SIBLING; + + if (dwarf_attr(die_mem, DW_AT_location, &attr) == NULL) + return DIE_FIND_CB_SIBLING; + + while ((off = dwarf_getlocations(&attr, off, &base, &start, &end, &ops, &nops)) > 0) { + if (ops->atom != DW_OP_addr) + continue; + + if (data->addr < ops->number) + continue; + + if (data->addr == ops->number) { + /* Update offset relative to the start of the variable */ + data->offset = 0; + return DIE_FIND_CB_END; + } + + if (die_get_real_type(die_mem, &type_die) == NULL) + continue; + + if (dwarf_aggregate_size(&type_die, &size) < 0) + continue; + + if (data->addr >= ops->number + size) + continue; + + /* Update offset relative to the start of the variable */ + data->offset = data->addr - ops->number; + return DIE_FIND_CB_END; + } + return DIE_FIND_CB_SIBLING; +} + +/** + * die_find_variable_by_addr - Find variable located at given address + * @sc_die: a scope DIE + * @pc: the program address to find + * @addr: the data address to find + * @die_mem: a buffer to save the resulting DIE + * @offset: the offset in the resulting type + * + * Find the variable DIE located at the given address (in PC-relative mode). + * This is usually for global variables. + */ +Dwarf_Die *die_find_variable_by_addr(Dwarf_Die *sc_die, Dwarf_Addr pc, + Dwarf_Addr addr, Dwarf_Die *die_mem, + int *offset) +{ + struct find_var_data data = { + .pc = pc, + .addr = addr, + }; + Dwarf_Die *result; + + result = die_find_child(sc_die, __die_find_var_addr_cb, &data, die_mem); + if (result) + *offset = data.offset; + return result; +} #endif /* diff --git a/tools/perf/util/dwarf-aux.h b/tools/perf/util/dwarf-aux.h index b6f430730bd1..0ddf61fd3f8b 100644 --- a/tools/perf/util/dwarf-aux.h +++ b/tools/perf/util/dwarf-aux.h @@ -141,6 +141,11 @@ int die_get_var_range(Dwarf_Die *sp_die, Dwarf_Die *vr_die, struct strbuf *buf); Dwarf_Die *die_find_variable_by_reg(Dwarf_Die *sc_die, Dwarf_Addr pc, int reg, Dwarf_Die *die_mem); +/* Find a (global) variable located in the 'addr' */ +Dwarf_Die *die_find_variable_by_addr(Dwarf_Die *sc_die, Dwarf_Addr pc, + Dwarf_Addr addr, Dwarf_Die *die_mem, + int *offset); + #else /* HAVE_DWARF_GETLOCATIONS_SUPPORT */ static inline int die_get_var_range(Dwarf_Die *sp_die __maybe_unused, @@ -158,6 +163,15 @@ static inline Dwarf_Die *die_find_variable_by_reg(Dwarf_Die *sc_die __maybe_unus return NULL; } +static inline Dwarf_Die *die_find_variable_by_addr(Dwarf_Die *sc_die __maybe_unused, + Dwarf_Addr pc __maybe_unused, + Dwarf_Addr addr __maybe_unused, + Dwarf_Die *die_mem __maybe_unused, + int *offset __maybe_unused) +{ + return NULL; +} + #endif /* HAVE_DWARF_GETLOCATIONS_SUPPORT */ #endif /* _DWARF_AUX_H */ -- cgit v1.2.3 From 5940a20a186bd74efd6d0dc0b2b7c77d891895d9 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Thu, 2 Nov 2023 10:56:46 -0700 Subject: perf mmap: Lazily initialize zstd streams to save memory when not using it Zstd streams create dictionaries that can require significant RAM, especially when there is one per-CPU. Tools like 'perf record' won't use the streams without the -z option, and so the creation of the streams is pure overhead. Switch to creating the streams on first use. Committer notes: ssize_t comes from sys/types.h, size_t from stddef.h. This worked on glibc as stdlib.h includes both, but not on musl libc. So do what 'man size_t' says and include sys/types.h and stddef.h instead of stdlib.h Signed-off-by: Ian Rogers Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Andi Kleen Cc: Athira Jajeev Cc: Changbin Du Cc: Colin Ian King Cc: Dmitrii Dolgov <9erthalion6@gmail.com> Cc: German Gomez Cc: Huacai Chen Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: K Prateek Nayak Cc: Kajol Jain Cc: Kan Liang Cc: Leo Yan Cc: Li Dong Cc: Liam Howlett Cc: Mark Rutland Cc: Masami Hiramatsu (Google) Cc: Miguel Ojeda Cc: Ming Wang Cc: Namhyung Kim Cc: Nick Terrell Cc: Paolo Bonzini Cc: Peter Zijlstra Cc: Ravi Bangoria Cc: Sandipan Das Cc: Sean Christopherson Cc: Steinar H. Gunderson Cc: Vincent Whitchurch Cc: Wenyu Liu Cc: Yang Jihong Link: https://lore.kernel.org/r/20231102175735.2272696-5-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-record.c | 26 ++++++++++++++----- tools/perf/util/compress.h | 7 +++-- tools/perf/util/mmap.c | 5 ++-- tools/perf/util/mmap.h | 1 - tools/perf/util/zstd.c | 63 ++++++++++++++++++++++++--------------------- 5 files changed, 59 insertions(+), 43 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 8ec818568662..9b4f3805ca92 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -270,7 +270,7 @@ static int record__write(struct record *rec, struct mmap *map __maybe_unused, static int record__aio_enabled(struct record *rec); static int record__comp_enabled(struct record *rec); -static size_t zstd_compress(struct perf_session *session, struct mmap *map, +static ssize_t zstd_compress(struct perf_session *session, struct mmap *map, void *dst, size_t dst_size, void *src, size_t src_size); #ifdef HAVE_AIO_SUPPORT @@ -405,9 +405,13 @@ static int record__aio_pushfn(struct mmap *map, void *to, void *buf, size_t size */ if (record__comp_enabled(aio->rec)) { - size = zstd_compress(aio->rec->session, NULL, aio->data + aio->size, - mmap__mmap_len(map) - aio->size, - buf, size); + ssize_t compressed = zstd_compress(aio->rec->session, NULL, aio->data + aio->size, + mmap__mmap_len(map) - aio->size, + buf, size); + if (compressed < 0) + return (int)compressed; + + size = compressed; } else { memcpy(aio->data + aio->size, buf, size); } @@ -633,7 +637,13 @@ static int record__pushfn(struct mmap *map, void *to, void *bf, size_t size) struct record *rec = to; if (record__comp_enabled(rec)) { - size = zstd_compress(rec->session, map, map->data, mmap__mmap_len(map), bf, size); + ssize_t compressed = zstd_compress(rec->session, map, map->data, + mmap__mmap_len(map), bf, size); + + if (compressed < 0) + return (int)compressed; + + size = compressed; bf = map->data; } @@ -1527,10 +1537,10 @@ static size_t process_comp_header(void *record, size_t increment) return size; } -static size_t zstd_compress(struct perf_session *session, struct mmap *map, +static ssize_t zstd_compress(struct perf_session *session, struct mmap *map, void *dst, size_t dst_size, void *src, size_t src_size) { - size_t compressed; + ssize_t compressed; size_t max_record_size = PERF_SAMPLE_MAX_SIZE - sizeof(struct perf_record_compressed) - 1; struct zstd_data *zstd_data = &session->zstd_data; @@ -1539,6 +1549,8 @@ static size_t zstd_compress(struct perf_session *session, struct mmap *map, compressed = zstd_compress_stream_to_records(zstd_data, dst, dst_size, src, src_size, max_record_size, process_comp_header); + if (compressed < 0) + return compressed; if (map && map->file) { thread->bytes_transferred += src_size; diff --git a/tools/perf/util/compress.h b/tools/perf/util/compress.h index 0cd3369af2a4..b29109cd3609 100644 --- a/tools/perf/util/compress.h +++ b/tools/perf/util/compress.h @@ -3,6 +3,8 @@ #define PERF_COMPRESS_H #include +#include +#include #ifdef HAVE_ZSTD_SUPPORT #include #endif @@ -21,6 +23,7 @@ struct zstd_data { #ifdef HAVE_ZSTD_SUPPORT ZSTD_CStream *cstream; ZSTD_DStream *dstream; + int comp_level; #endif }; @@ -29,7 +32,7 @@ struct zstd_data { int zstd_init(struct zstd_data *data, int level); int zstd_fini(struct zstd_data *data); -size_t zstd_compress_stream_to_records(struct zstd_data *data, void *dst, size_t dst_size, +ssize_t zstd_compress_stream_to_records(struct zstd_data *data, void *dst, size_t dst_size, void *src, size_t src_size, size_t max_record_size, size_t process_header(void *record, size_t increment)); @@ -48,7 +51,7 @@ static inline int zstd_fini(struct zstd_data *data __maybe_unused) } static inline -size_t zstd_compress_stream_to_records(struct zstd_data *data __maybe_unused, +ssize_t zstd_compress_stream_to_records(struct zstd_data *data __maybe_unused, void *dst __maybe_unused, size_t dst_size __maybe_unused, void *src __maybe_unused, size_t src_size __maybe_unused, size_t max_record_size __maybe_unused, diff --git a/tools/perf/util/mmap.c b/tools/perf/util/mmap.c index 49093b21ee2d..122ee198a86e 100644 --- a/tools/perf/util/mmap.c +++ b/tools/perf/util/mmap.c @@ -295,15 +295,14 @@ int mmap__mmap(struct mmap *map, struct mmap_params *mp, int fd, struct perf_cpu map->core.flush = mp->flush; - map->comp_level = mp->comp_level; #ifndef PYTHON_PERF - if (zstd_init(&map->zstd_data, map->comp_level)) { + if (zstd_init(&map->zstd_data, mp->comp_level)) { pr_debug2("failed to init mmap compressor, error %d\n", errno); return -1; } #endif - if (map->comp_level && !perf_mmap__aio_enabled(map)) { + if (mp->comp_level && !perf_mmap__aio_enabled(map)) { map->data = mmap(NULL, mmap__mmap_len(map), PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, 0, 0); if (map->data == MAP_FAILED) { diff --git a/tools/perf/util/mmap.h b/tools/perf/util/mmap.h index f944c3cd5efa..0df6e1621c7e 100644 --- a/tools/perf/util/mmap.h +++ b/tools/perf/util/mmap.h @@ -39,7 +39,6 @@ struct mmap { #endif struct mmap_cpu_mask affinity_mask; void *data; - int comp_level; struct perf_data_file *file; struct zstd_data zstd_data; }; diff --git a/tools/perf/util/zstd.c b/tools/perf/util/zstd.c index 48dd2b018c47..57027e0ac7b6 100644 --- a/tools/perf/util/zstd.c +++ b/tools/perf/util/zstd.c @@ -7,35 +7,9 @@ int zstd_init(struct zstd_data *data, int level) { - size_t ret; - - data->dstream = ZSTD_createDStream(); - if (data->dstream == NULL) { - pr_err("Couldn't create decompression stream.\n"); - return -1; - } - - ret = ZSTD_initDStream(data->dstream); - if (ZSTD_isError(ret)) { - pr_err("Failed to initialize decompression stream: %s\n", ZSTD_getErrorName(ret)); - return -1; - } - - if (!level) - return 0; - - data->cstream = ZSTD_createCStream(); - if (data->cstream == NULL) { - pr_err("Couldn't create compression stream.\n"); - return -1; - } - - ret = ZSTD_initCStream(data->cstream, level); - if (ZSTD_isError(ret)) { - pr_err("Failed to initialize compression stream: %s\n", ZSTD_getErrorName(ret)); - return -1; - } - + data->comp_level = level; + data->dstream = NULL; + data->cstream = NULL; return 0; } @@ -54,7 +28,7 @@ int zstd_fini(struct zstd_data *data) return 0; } -size_t zstd_compress_stream_to_records(struct zstd_data *data, void *dst, size_t dst_size, +ssize_t zstd_compress_stream_to_records(struct zstd_data *data, void *dst, size_t dst_size, void *src, size_t src_size, size_t max_record_size, size_t process_header(void *record, size_t increment)) { @@ -63,6 +37,21 @@ size_t zstd_compress_stream_to_records(struct zstd_data *data, void *dst, size_t ZSTD_outBuffer output; void *record; + if (!data->cstream) { + data->cstream = ZSTD_createCStream(); + if (data->cstream == NULL) { + pr_err("Couldn't create compression stream.\n"); + return -1; + } + + ret = ZSTD_initCStream(data->cstream, data->comp_level); + if (ZSTD_isError(ret)) { + pr_err("Failed to initialize compression stream: %s\n", + ZSTD_getErrorName(ret)); + return -1; + } + } + while (input.pos < input.size) { record = dst; size = process_header(record, 0); @@ -96,6 +85,20 @@ size_t zstd_decompress_stream(struct zstd_data *data, void *src, size_t src_size ZSTD_inBuffer input = { src, src_size, 0 }; ZSTD_outBuffer output = { dst, dst_size, 0 }; + if (!data->dstream) { + data->dstream = ZSTD_createDStream(); + if (data->dstream == NULL) { + pr_err("Couldn't create decompression stream.\n"); + return 0; + } + + ret = ZSTD_initDStream(data->dstream); + if (ZSTD_isError(ret)) { + pr_err("Failed to initialize decompression stream: %s\n", + ZSTD_getErrorName(ret)); + return 0; + } + } while (input.pos < input.size) { ret = ZSTD_decompressStream(data->dstream, &output, &input); if (ZSTD_isError(ret)) { -- cgit v1.2.3 From 3b99d46a1170754a06f379a83be8101c5f6bfc46 Mon Sep 17 00:00:00 2001 From: angquan yu Date: Tue, 28 Nov 2023 16:11:05 -0600 Subject: KVM: selftests: Actually print out magic token in NX hugepages skip message MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pass MAGIC_TOKEN to __TEST_REQUIRE() when printing the help message about needing to pass a magic value to manually run the NX hugepages test, otherwise the help message will contain garbage. In file included from x86_64/nx_huge_pages_test.c:15: x86_64/nx_huge_pages_test.c: In function ‘main’: include/test_util.h:40:32: error: format ‘%d’ expects a matching ‘int’ argument [-Werror=format=] 40 | ksft_exit_skip("- " fmt "\n", ##__VA_ARGS__); \ | ^~~~ x86_64/nx_huge_pages_test.c:259:9: note: in expansion of macro ‘__TEST_REQUIRE’ 259 | __TEST_REQUIRE(token == MAGIC_TOKEN, | ^~~~~~~~~~~~~~ Signed-off-by: angquan yu Link: https://lore.kernel.org/r/20231128221105.63093-1-angquan21@gmail.com [sean: rewrite shortlog+changelog] Signed-off-by: Sean Christopherson --- tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c b/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c index 18ac5c1952a3..83e25bccc139 100644 --- a/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c +++ b/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c @@ -259,7 +259,7 @@ int main(int argc, char **argv) __TEST_REQUIRE(token == MAGIC_TOKEN, "This test must be run with the magic token %d.\n" "This is done by nx_huge_pages_test.sh, which\n" - "also handles environment setup for the test."); + "also handles environment setup for the test.", MAGIC_TOKEN); run_test(reclaim_period_ms, false, reboot_permissions); run_test(reclaim_period_ms, true, reboot_permissions); -- cgit v1.2.3 From 48f0dfd8d3e212ab27b6db147ed10407ff6aaa88 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sat, 25 Nov 2023 20:31:25 +0100 Subject: libbpf: Add st_type argument to elf_resolve_syms_offsets function We need to get offsets for static variables in following changes, so making elf_resolve_syms_offsets to take st_type value as argument and passing it to elf_sym_iter_new. Signed-off-by: Jiri Olsa Signed-off-by: Andrii Nakryiko Acked-by: Andrii Nakryiko Acked-by: Song Liu Link: https://lore.kernel.org/bpf/20231125193130.834322-2-jolsa@kernel.org --- tools/lib/bpf/elf.c | 5 +++-- tools/lib/bpf/libbpf.c | 2 +- tools/lib/bpf/libbpf_internal.h | 3 ++- tools/testing/selftests/bpf/prog_tests/uprobe_multi_test.c | 2 +- 4 files changed, 7 insertions(+), 5 deletions(-) (limited to 'tools') diff --git a/tools/lib/bpf/elf.c b/tools/lib/bpf/elf.c index 2a62bf411bb3..b02faec748a5 100644 --- a/tools/lib/bpf/elf.c +++ b/tools/lib/bpf/elf.c @@ -407,7 +407,8 @@ static int symbol_cmp(const void *a, const void *b) * size, that needs to be released by the caller. */ int elf_resolve_syms_offsets(const char *binary_path, int cnt, - const char **syms, unsigned long **poffsets) + const char **syms, unsigned long **poffsets, + int st_type) { int sh_types[2] = { SHT_DYNSYM, SHT_SYMTAB }; int err = 0, i, cnt_done = 0; @@ -438,7 +439,7 @@ int elf_resolve_syms_offsets(const char *binary_path, int cnt, struct elf_sym_iter iter; struct elf_sym *sym; - err = elf_sym_iter_new(&iter, elf_fd.elf, binary_path, sh_types[i], STT_FUNC); + err = elf_sym_iter_new(&iter, elf_fd.elf, binary_path, sh_types[i], st_type); if (err == -ENOENT) continue; if (err) diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index e067be95da3c..ea9b8158c20d 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -11447,7 +11447,7 @@ bpf_program__attach_uprobe_multi(const struct bpf_program *prog, return libbpf_err_ptr(err); offsets = resolved_offsets; } else if (syms) { - err = elf_resolve_syms_offsets(path, cnt, syms, &resolved_offsets); + err = elf_resolve_syms_offsets(path, cnt, syms, &resolved_offsets, STT_FUNC); if (err < 0) return libbpf_err_ptr(err); offsets = resolved_offsets; diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h index f0f08635adb0..b5d334754e5d 100644 --- a/tools/lib/bpf/libbpf_internal.h +++ b/tools/lib/bpf/libbpf_internal.h @@ -594,7 +594,8 @@ int elf_open(const char *binary_path, struct elf_fd *elf_fd); void elf_close(struct elf_fd *elf_fd); int elf_resolve_syms_offsets(const char *binary_path, int cnt, - const char **syms, unsigned long **poffsets); + const char **syms, unsigned long **poffsets, + int st_type); int elf_resolve_pattern_offsets(const char *binary_path, const char *pattern, unsigned long **poffsets, size_t *pcnt); diff --git a/tools/testing/selftests/bpf/prog_tests/uprobe_multi_test.c b/tools/testing/selftests/bpf/prog_tests/uprobe_multi_test.c index cd051d3901a9..ece260cf2c0b 100644 --- a/tools/testing/selftests/bpf/prog_tests/uprobe_multi_test.c +++ b/tools/testing/selftests/bpf/prog_tests/uprobe_multi_test.c @@ -249,7 +249,7 @@ static void __test_link_api(struct child *child) int link_extra_fd = -1; int err; - err = elf_resolve_syms_offsets(path, 3, syms, (unsigned long **) &offsets); + err = elf_resolve_syms_offsets(path, 3, syms, (unsigned long **) &offsets, STT_FUNC); if (!ASSERT_OK(err, "elf_resolve_syms_offsets")) return; -- cgit v1.2.3 From e56fdbfb06e26a7066b070967badef4148528df2 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sat, 25 Nov 2023 20:31:27 +0100 Subject: bpf: Add link_info support for uprobe multi link Adding support to get uprobe_link details through bpf_link_info interface. Adding new struct uprobe_multi to struct bpf_link_info to carry the uprobe_multi link details. The uprobe_multi.count is passed from user space to denote size of array fields (offsets/ref_ctr_offsets/cookies). The actual array size is stored back to uprobe_multi.count (allowing user to find out the actual array size) and array fields are populated up to the user passed size. All the non-array fields (path/count/flags/pid) are always set. Signed-off-by: Jiri Olsa Signed-off-by: Andrii Nakryiko Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20231125193130.834322-4-jolsa@kernel.org --- include/uapi/linux/bpf.h | 10 ++++++ kernel/trace/bpf_trace.c | 72 ++++++++++++++++++++++++++++++++++++++++++ tools/include/uapi/linux/bpf.h | 10 ++++++ 3 files changed, 92 insertions(+) (limited to 'tools') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 7a5498242eaa..e88746ba7d21 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -6562,6 +6562,16 @@ struct bpf_link_info { __u32 flags; __u64 missed; } kprobe_multi; + struct { + __aligned_u64 path; + __aligned_u64 offsets; + __aligned_u64 ref_ctr_offsets; + __aligned_u64 cookies; + __u32 path_size; /* in/out: real path size on success, including zero byte */ + __u32 count; /* in/out: uprobe_multi offsets/ref_ctr_offsets/cookies count */ + __u32 flags; + __u32 pid; + } uprobe_multi; struct { __u32 type; /* enum bpf_perf_event_type */ __u32 :32; diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index ad0323f27288..c284a4ad0315 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -3042,6 +3042,7 @@ struct bpf_uprobe_multi_link { struct path path; struct bpf_link link; u32 cnt; + u32 flags; struct bpf_uprobe *uprobes; struct task_struct *task; }; @@ -3083,9 +3084,79 @@ static void bpf_uprobe_multi_link_dealloc(struct bpf_link *link) kfree(umulti_link); } +static int bpf_uprobe_multi_link_fill_link_info(const struct bpf_link *link, + struct bpf_link_info *info) +{ + u64 __user *uref_ctr_offsets = u64_to_user_ptr(info->uprobe_multi.ref_ctr_offsets); + u64 __user *ucookies = u64_to_user_ptr(info->uprobe_multi.cookies); + u64 __user *uoffsets = u64_to_user_ptr(info->uprobe_multi.offsets); + u64 __user *upath = u64_to_user_ptr(info->uprobe_multi.path); + u32 upath_size = info->uprobe_multi.path_size; + struct bpf_uprobe_multi_link *umulti_link; + u32 ucount = info->uprobe_multi.count; + int err = 0, i; + long left; + + if (!upath ^ !upath_size) + return -EINVAL; + + if ((uoffsets || uref_ctr_offsets || ucookies) && !ucount) + return -EINVAL; + + umulti_link = container_of(link, struct bpf_uprobe_multi_link, link); + info->uprobe_multi.count = umulti_link->cnt; + info->uprobe_multi.flags = umulti_link->flags; + info->uprobe_multi.pid = umulti_link->task ? + task_pid_nr_ns(umulti_link->task, task_active_pid_ns(current)) : 0; + + if (upath) { + char *p, *buf; + + upath_size = min_t(u32, upath_size, PATH_MAX); + + buf = kmalloc(upath_size, GFP_KERNEL); + if (!buf) + return -ENOMEM; + p = d_path(&umulti_link->path, buf, upath_size); + if (IS_ERR(p)) { + kfree(buf); + return PTR_ERR(p); + } + upath_size = buf + upath_size - p; + left = copy_to_user(upath, p, upath_size); + kfree(buf); + if (left) + return -EFAULT; + info->uprobe_multi.path_size = upath_size; + } + + if (!uoffsets && !ucookies && !uref_ctr_offsets) + return 0; + + if (ucount < umulti_link->cnt) + err = -ENOSPC; + else + ucount = umulti_link->cnt; + + for (i = 0; i < ucount; i++) { + if (uoffsets && + put_user(umulti_link->uprobes[i].offset, uoffsets + i)) + return -EFAULT; + if (uref_ctr_offsets && + put_user(umulti_link->uprobes[i].ref_ctr_offset, uref_ctr_offsets + i)) + return -EFAULT; + if (ucookies && + put_user(umulti_link->uprobes[i].cookie, ucookies + i)) + return -EFAULT; + } + + return err; +} + static const struct bpf_link_ops bpf_uprobe_multi_link_lops = { .release = bpf_uprobe_multi_link_release, .dealloc = bpf_uprobe_multi_link_dealloc, + .fill_link_info = bpf_uprobe_multi_link_fill_link_info, }; static int uprobe_prog_run(struct bpf_uprobe *uprobe, @@ -3274,6 +3345,7 @@ int bpf_uprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *pr link->uprobes = uprobes; link->path = path; link->task = task; + link->flags = flags; bpf_link_init(&link->link, BPF_LINK_TYPE_UPROBE_MULTI, &bpf_uprobe_multi_link_lops, prog); diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 7a5498242eaa..e88746ba7d21 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -6562,6 +6562,16 @@ struct bpf_link_info { __u32 flags; __u64 missed; } kprobe_multi; + struct { + __aligned_u64 path; + __aligned_u64 offsets; + __aligned_u64 ref_ctr_offsets; + __aligned_u64 cookies; + __u32 path_size; /* in/out: real path size on success, including zero byte */ + __u32 count; /* in/out: uprobe_multi offsets/ref_ctr_offsets/cookies count */ + __u32 flags; + __u32 pid; + } uprobe_multi; struct { __u32 type; /* enum bpf_perf_event_type */ __u32 :32; -- cgit v1.2.3 From 1703612885723869064f18e8816c6f3f87987748 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sat, 25 Nov 2023 20:31:28 +0100 Subject: selftests/bpf: Use bpf_link__destroy in fill_link_info tests The fill_link_info test keeps skeleton open and just creates various links. We are wrongly calling bpf_link__detach after each test to close them, we need to call bpf_link__destroy. Signed-off-by: Jiri Olsa Signed-off-by: Andrii Nakryiko Acked-by: Yonghong Song Acked-by: Yafang Shao Link: https://lore.kernel.org/bpf/20231125193130.834322-5-jolsa@kernel.org --- .../selftests/bpf/prog_tests/fill_link_info.c | 44 +++++++++++----------- 1 file changed, 23 insertions(+), 21 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/fill_link_info.c b/tools/testing/selftests/bpf/prog_tests/fill_link_info.c index 97142a4db374..9294cb8d7743 100644 --- a/tools/testing/selftests/bpf/prog_tests/fill_link_info.c +++ b/tools/testing/selftests/bpf/prog_tests/fill_link_info.c @@ -140,14 +140,14 @@ static void test_kprobe_fill_link_info(struct test_fill_link_info *skel, .retprobe = type == BPF_PERF_EVENT_KRETPROBE, ); ssize_t entry_offset = 0; + struct bpf_link *link; int link_fd, err; - skel->links.kprobe_run = bpf_program__attach_kprobe_opts(skel->progs.kprobe_run, - KPROBE_FUNC, &opts); - if (!ASSERT_OK_PTR(skel->links.kprobe_run, "attach_kprobe")) + link = bpf_program__attach_kprobe_opts(skel->progs.kprobe_run, KPROBE_FUNC, &opts); + if (!ASSERT_OK_PTR(link, "attach_kprobe")) return; - link_fd = bpf_link__fd(skel->links.kprobe_run); + link_fd = bpf_link__fd(link); if (!invalid) { /* See also arch_adjust_kprobe_addr(). */ if (skel->kconfig->CONFIG_X86_KERNEL_IBT) @@ -157,39 +157,41 @@ static void test_kprobe_fill_link_info(struct test_fill_link_info *skel, } else { kprobe_fill_invalid_user_buffer(link_fd); } - bpf_link__detach(skel->links.kprobe_run); + bpf_link__destroy(link); } static void test_tp_fill_link_info(struct test_fill_link_info *skel) { + struct bpf_link *link; int link_fd, err; - skel->links.tp_run = bpf_program__attach_tracepoint(skel->progs.tp_run, TP_CAT, TP_NAME); - if (!ASSERT_OK_PTR(skel->links.tp_run, "attach_tp")) + link = bpf_program__attach_tracepoint(skel->progs.tp_run, TP_CAT, TP_NAME); + if (!ASSERT_OK_PTR(link, "attach_tp")) return; - link_fd = bpf_link__fd(skel->links.tp_run); + link_fd = bpf_link__fd(link); err = verify_perf_link_info(link_fd, BPF_PERF_EVENT_TRACEPOINT, 0, 0, 0); ASSERT_OK(err, "verify_perf_link_info"); - bpf_link__detach(skel->links.tp_run); + bpf_link__destroy(link); } static void test_uprobe_fill_link_info(struct test_fill_link_info *skel, enum bpf_perf_event_type type) { + struct bpf_link *link; int link_fd, err; - skel->links.uprobe_run = bpf_program__attach_uprobe(skel->progs.uprobe_run, - type == BPF_PERF_EVENT_URETPROBE, - 0, /* self pid */ - UPROBE_FILE, uprobe_offset); - if (!ASSERT_OK_PTR(skel->links.uprobe_run, "attach_uprobe")) + link = bpf_program__attach_uprobe(skel->progs.uprobe_run, + type == BPF_PERF_EVENT_URETPROBE, + 0, /* self pid */ + UPROBE_FILE, uprobe_offset); + if (!ASSERT_OK_PTR(link, "attach_uprobe")) return; - link_fd = bpf_link__fd(skel->links.uprobe_run); + link_fd = bpf_link__fd(link); err = verify_perf_link_info(link_fd, type, 0, uprobe_offset, 0); ASSERT_OK(err, "verify_perf_link_info"); - bpf_link__detach(skel->links.uprobe_run); + bpf_link__destroy(link); } static int verify_kmulti_link_info(int fd, bool retprobe) @@ -278,24 +280,24 @@ static void test_kprobe_multi_fill_link_info(struct test_fill_link_info *skel, bool retprobe, bool invalid) { LIBBPF_OPTS(bpf_kprobe_multi_opts, opts); + struct bpf_link *link; int link_fd, err; opts.syms = kmulti_syms; opts.cnt = KMULTI_CNT; opts.retprobe = retprobe; - skel->links.kmulti_run = bpf_program__attach_kprobe_multi_opts(skel->progs.kmulti_run, - NULL, &opts); - if (!ASSERT_OK_PTR(skel->links.kmulti_run, "attach_kprobe_multi")) + link = bpf_program__attach_kprobe_multi_opts(skel->progs.kmulti_run, NULL, &opts); + if (!ASSERT_OK_PTR(link, "attach_kprobe_multi")) return; - link_fd = bpf_link__fd(skel->links.kmulti_run); + link_fd = bpf_link__fd(link); if (!invalid) { err = verify_kmulti_link_info(link_fd, retprobe); ASSERT_OK(err, "verify_kmulti_link_info"); } else { verify_kmulti_invalid_user_buffer(link_fd); } - bpf_link__detach(skel->links.kmulti_run); + bpf_link__destroy(link); } void test_fill_link_info(void) -- cgit v1.2.3 From 147c69307bcf67f1f01246f9acb794da9837f299 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sat, 25 Nov 2023 20:31:29 +0100 Subject: selftests/bpf: Add link_info test for uprobe_multi link Adding fill_link_info test for uprobe_multi link. Setting up uprobes with bogus ref_ctr_offsets and cookie values to test all the bpf_link_info::uprobe_multi fields. Signed-off-by: Jiri Olsa Signed-off-by: Andrii Nakryiko Acked-by: Song Liu Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20231125193130.834322-6-jolsa@kernel.org --- .../selftests/bpf/prog_tests/fill_link_info.c | 198 +++++++++++++++++++++ .../selftests/bpf/progs/test_fill_link_info.c | 6 + 2 files changed, 204 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/fill_link_info.c b/tools/testing/selftests/bpf/prog_tests/fill_link_info.c index 9294cb8d7743..d4b1901f7879 100644 --- a/tools/testing/selftests/bpf/prog_tests/fill_link_info.c +++ b/tools/testing/selftests/bpf/prog_tests/fill_link_info.c @@ -7,6 +7,7 @@ #include #include "trace_helpers.h" #include "test_fill_link_info.skel.h" +#include "bpf/libbpf_internal.h" #define TP_CAT "sched" #define TP_NAME "sched_switch" @@ -300,6 +301,196 @@ static void test_kprobe_multi_fill_link_info(struct test_fill_link_info *skel, bpf_link__destroy(link); } +#define SEC(name) __attribute__((section(name), used)) + +static short uprobe_link_info_sema_1 SEC(".probes"); +static short uprobe_link_info_sema_2 SEC(".probes"); +static short uprobe_link_info_sema_3 SEC(".probes"); + +noinline void uprobe_link_info_func_1(void) +{ + asm volatile (""); + uprobe_link_info_sema_1++; +} + +noinline void uprobe_link_info_func_2(void) +{ + asm volatile (""); + uprobe_link_info_sema_2++; +} + +noinline void uprobe_link_info_func_3(void) +{ + asm volatile (""); + uprobe_link_info_sema_3++; +} + +static int +verify_umulti_link_info(int fd, bool retprobe, __u64 *offsets, + __u64 *cookies, __u64 *ref_ctr_offsets) +{ + char path[PATH_MAX], path_buf[PATH_MAX]; + struct bpf_link_info info; + __u32 len = sizeof(info); + __u64 ref_ctr_offsets_buf[3]; + __u64 offsets_buf[3]; + __u64 cookies_buf[3]; + int i, err, bit; + __u32 count = 0; + + memset(path, 0, sizeof(path)); + err = readlink("/proc/self/exe", path, sizeof(path)); + if (!ASSERT_NEQ(err, -1, "readlink")) + return -1; + + for (bit = 0; bit < 8; bit++) { + memset(&info, 0, sizeof(info)); + info.uprobe_multi.path = ptr_to_u64(path_buf); + info.uprobe_multi.path_size = sizeof(path_buf); + info.uprobe_multi.count = count; + + if (bit & 0x1) + info.uprobe_multi.offsets = ptr_to_u64(offsets_buf); + if (bit & 0x2) + info.uprobe_multi.cookies = ptr_to_u64(cookies_buf); + if (bit & 0x4) + info.uprobe_multi.ref_ctr_offsets = ptr_to_u64(ref_ctr_offsets_buf); + + err = bpf_link_get_info_by_fd(fd, &info, &len); + if (!ASSERT_OK(err, "bpf_link_get_info_by_fd")) + return -1; + + if (!ASSERT_EQ(info.type, BPF_LINK_TYPE_UPROBE_MULTI, "info.type")) + return -1; + + ASSERT_EQ(info.uprobe_multi.pid, getpid(), "info.uprobe_multi.pid"); + ASSERT_EQ(info.uprobe_multi.count, 3, "info.uprobe_multi.count"); + ASSERT_EQ(info.uprobe_multi.flags & BPF_F_KPROBE_MULTI_RETURN, + retprobe, "info.uprobe_multi.flags.retprobe"); + ASSERT_EQ(info.uprobe_multi.path_size, strlen(path) + 1, "info.uprobe_multi.path_size"); + ASSERT_STREQ(path_buf, path, "info.uprobe_multi.path"); + + for (i = 0; i < info.uprobe_multi.count; i++) { + if (info.uprobe_multi.offsets) + ASSERT_EQ(offsets_buf[i], offsets[i], "info.uprobe_multi.offsets"); + if (info.uprobe_multi.cookies) + ASSERT_EQ(cookies_buf[i], cookies[i], "info.uprobe_multi.cookies"); + if (info.uprobe_multi.ref_ctr_offsets) { + ASSERT_EQ(ref_ctr_offsets_buf[i], ref_ctr_offsets[i], + "info.uprobe_multi.ref_ctr_offsets"); + } + } + count = count ?: info.uprobe_multi.count; + } + + return 0; +} + +static void verify_umulti_invalid_user_buffer(int fd) +{ + struct bpf_link_info info; + __u32 len = sizeof(info); + __u64 buf[3]; + int err; + + /* upath_size defined, not path */ + memset(&info, 0, sizeof(info)); + info.uprobe_multi.path_size = 3; + err = bpf_link_get_info_by_fd(fd, &info, &len); + ASSERT_EQ(err, -EINVAL, "failed_upath_size"); + + /* path defined, but small */ + memset(&info, 0, sizeof(info)); + info.uprobe_multi.path = ptr_to_u64(buf); + info.uprobe_multi.path_size = 3; + err = bpf_link_get_info_by_fd(fd, &info, &len); + ASSERT_LT(err, 0, "failed_upath_small"); + + /* path has wrong pointer */ + memset(&info, 0, sizeof(info)); + info.uprobe_multi.path_size = PATH_MAX; + info.uprobe_multi.path = 123; + err = bpf_link_get_info_by_fd(fd, &info, &len); + ASSERT_EQ(err, -EFAULT, "failed_bad_path_ptr"); + + /* count zero, with offsets */ + memset(&info, 0, sizeof(info)); + info.uprobe_multi.offsets = ptr_to_u64(buf); + err = bpf_link_get_info_by_fd(fd, &info, &len); + ASSERT_EQ(err, -EINVAL, "failed_count"); + + /* offsets not big enough */ + memset(&info, 0, sizeof(info)); + info.uprobe_multi.offsets = ptr_to_u64(buf); + info.uprobe_multi.count = 2; + err = bpf_link_get_info_by_fd(fd, &info, &len); + ASSERT_EQ(err, -ENOSPC, "failed_small_count"); + + /* offsets has wrong pointer */ + memset(&info, 0, sizeof(info)); + info.uprobe_multi.offsets = 123; + info.uprobe_multi.count = 3; + err = bpf_link_get_info_by_fd(fd, &info, &len); + ASSERT_EQ(err, -EFAULT, "failed_wrong_offsets"); +} + +static void test_uprobe_multi_fill_link_info(struct test_fill_link_info *skel, + bool retprobe, bool invalid) +{ + LIBBPF_OPTS(bpf_uprobe_multi_opts, opts, + .retprobe = retprobe, + ); + const char *syms[3] = { + "uprobe_link_info_func_1", + "uprobe_link_info_func_2", + "uprobe_link_info_func_3", + }; + __u64 cookies[3] = { + 0xdead, + 0xbeef, + 0xcafe, + }; + const char *sema[3] = { + "uprobe_link_info_sema_1", + "uprobe_link_info_sema_2", + "uprobe_link_info_sema_3", + }; + __u64 *offsets = NULL, *ref_ctr_offsets; + struct bpf_link *link; + int link_fd, err; + + err = elf_resolve_syms_offsets("/proc/self/exe", 3, sema, + (unsigned long **) &ref_ctr_offsets, STT_OBJECT); + if (!ASSERT_OK(err, "elf_resolve_syms_offsets_object")) + return; + + err = elf_resolve_syms_offsets("/proc/self/exe", 3, syms, + (unsigned long **) &offsets, STT_FUNC); + if (!ASSERT_OK(err, "elf_resolve_syms_offsets_func")) + goto out; + + opts.syms = syms; + opts.cookies = &cookies[0]; + opts.ref_ctr_offsets = (unsigned long *) &ref_ctr_offsets[0]; + opts.cnt = ARRAY_SIZE(syms); + + link = bpf_program__attach_uprobe_multi(skel->progs.umulti_run, 0, + "/proc/self/exe", NULL, &opts); + if (!ASSERT_OK_PTR(link, "bpf_program__attach_uprobe_multi")) + goto out; + + link_fd = bpf_link__fd(link); + if (invalid) + verify_umulti_invalid_user_buffer(link_fd); + else + verify_umulti_link_info(link_fd, retprobe, offsets, cookies, ref_ctr_offsets); + + bpf_link__destroy(link); +out: + free(ref_ctr_offsets); + free(offsets); +} + void test_fill_link_info(void) { struct test_fill_link_info *skel; @@ -339,6 +530,13 @@ void test_fill_link_info(void) if (test__start_subtest("kprobe_multi_invalid_ubuff")) test_kprobe_multi_fill_link_info(skel, true, true); + if (test__start_subtest("uprobe_multi_link_info")) + test_uprobe_multi_fill_link_info(skel, false, false); + if (test__start_subtest("uretprobe_multi_link_info")) + test_uprobe_multi_fill_link_info(skel, true, false); + if (test__start_subtest("uprobe_multi_invalid")) + test_uprobe_multi_fill_link_info(skel, false, true); + cleanup: test_fill_link_info__destroy(skel); } diff --git a/tools/testing/selftests/bpf/progs/test_fill_link_info.c b/tools/testing/selftests/bpf/progs/test_fill_link_info.c index 564f402d56fe..69509f8bb680 100644 --- a/tools/testing/selftests/bpf/progs/test_fill_link_info.c +++ b/tools/testing/selftests/bpf/progs/test_fill_link_info.c @@ -39,4 +39,10 @@ int BPF_PROG(kmulti_run) return 0; } +SEC("uprobe.multi") +int BPF_PROG(umulti_run) +{ + return 0; +} + char _license[] SEC("license") = "GPL"; -- cgit v1.2.3 From a7795698f8b6c48283fa4334eb313bc1350b2864 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sat, 25 Nov 2023 20:31:30 +0100 Subject: bpftool: Add support to display uprobe_multi links Adding support to display details for uprobe_multi links, both plain: # bpftool link -p ... 24: uprobe_multi prog 126 uprobe.multi path /home/jolsa/bpf/test_progs func_cnt 3 pid 4143 offset ref_ctr_offset cookies 0xd1f88 0xf5d5a8 0xdead 0xd1f8f 0xf5d5aa 0xbeef 0xd1f96 0xf5d5ac 0xcafe and json: # bpftool link -p [{ ... },{ "id": 24, "type": "uprobe_multi", "prog_id": 126, "retprobe": false, "path": "/home/jolsa/bpf/test_progs", "func_cnt": 3, "pid": 4143, "funcs": [{ "offset": 860040, "ref_ctr_offset": 16111016, "cookie": 57005 },{ "offset": 860047, "ref_ctr_offset": 16111018, "cookie": 48879 },{ "offset": 860054, "ref_ctr_offset": 16111020, "cookie": 51966 } ] } ] Signed-off-by: Jiri Olsa Signed-off-by: Andrii Nakryiko Reviewed-by: Quentin Monnet Acked-by: Song Liu Link: https://lore.kernel.org/bpf/20231125193130.834322-7-jolsa@kernel.org --- tools/bpf/bpftool/link.c | 105 ++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 103 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/bpf/bpftool/link.c b/tools/bpf/bpftool/link.c index a1528cde81ab..cb46667a6b2e 100644 --- a/tools/bpf/bpftool/link.c +++ b/tools/bpf/bpftool/link.c @@ -294,6 +294,37 @@ show_kprobe_multi_json(struct bpf_link_info *info, json_writer_t *wtr) jsonw_end_array(json_wtr); } +static __u64 *u64_to_arr(__u64 val) +{ + return (__u64 *) u64_to_ptr(val); +} + +static void +show_uprobe_multi_json(struct bpf_link_info *info, json_writer_t *wtr) +{ + __u32 i; + + jsonw_bool_field(json_wtr, "retprobe", + info->uprobe_multi.flags & BPF_F_UPROBE_MULTI_RETURN); + jsonw_string_field(json_wtr, "path", (char *) u64_to_ptr(info->uprobe_multi.path)); + jsonw_uint_field(json_wtr, "func_cnt", info->uprobe_multi.count); + jsonw_int_field(json_wtr, "pid", (int) info->uprobe_multi.pid); + jsonw_name(json_wtr, "funcs"); + jsonw_start_array(json_wtr); + + for (i = 0; i < info->uprobe_multi.count; i++) { + jsonw_start_object(json_wtr); + jsonw_uint_field(json_wtr, "offset", + u64_to_arr(info->uprobe_multi.offsets)[i]); + jsonw_uint_field(json_wtr, "ref_ctr_offset", + u64_to_arr(info->uprobe_multi.ref_ctr_offsets)[i]); + jsonw_uint_field(json_wtr, "cookie", + u64_to_arr(info->uprobe_multi.cookies)[i]); + jsonw_end_object(json_wtr); + } + jsonw_end_array(json_wtr); +} + static void show_perf_event_kprobe_json(struct bpf_link_info *info, json_writer_t *wtr) { @@ -465,6 +496,9 @@ static int show_link_close_json(int fd, struct bpf_link_info *info) case BPF_LINK_TYPE_KPROBE_MULTI: show_kprobe_multi_json(info, json_wtr); break; + case BPF_LINK_TYPE_UPROBE_MULTI: + show_uprobe_multi_json(info, json_wtr); + break; case BPF_LINK_TYPE_PERF_EVENT: switch (info->perf_event.type) { case BPF_PERF_EVENT_EVENT: @@ -674,6 +708,33 @@ static void show_kprobe_multi_plain(struct bpf_link_info *info) } } +static void show_uprobe_multi_plain(struct bpf_link_info *info) +{ + __u32 i; + + if (!info->uprobe_multi.count) + return; + + if (info->uprobe_multi.flags & BPF_F_UPROBE_MULTI_RETURN) + printf("\n\turetprobe.multi "); + else + printf("\n\tuprobe.multi "); + + printf("path %s ", (char *) u64_to_ptr(info->uprobe_multi.path)); + printf("func_cnt %u ", info->uprobe_multi.count); + + if (info->uprobe_multi.pid) + printf("pid %d ", info->uprobe_multi.pid); + + printf("\n\t%-16s %-16s %-16s", "offset", "ref_ctr_offset", "cookies"); + for (i = 0; i < info->uprobe_multi.count; i++) { + printf("\n\t0x%-16llx 0x%-16llx 0x%-16llx", + u64_to_arr(info->uprobe_multi.offsets)[i], + u64_to_arr(info->uprobe_multi.ref_ctr_offsets)[i], + u64_to_arr(info->uprobe_multi.cookies)[i]); + } +} + static void show_perf_event_kprobe_plain(struct bpf_link_info *info) { const char *buf; @@ -807,6 +868,9 @@ static int show_link_close_plain(int fd, struct bpf_link_info *info) case BPF_LINK_TYPE_KPROBE_MULTI: show_kprobe_multi_plain(info); break; + case BPF_LINK_TYPE_UPROBE_MULTI: + show_uprobe_multi_plain(info); + break; case BPF_LINK_TYPE_PERF_EVENT: switch (info->perf_event.type) { case BPF_PERF_EVENT_EVENT: @@ -846,8 +910,10 @@ static int show_link_close_plain(int fd, struct bpf_link_info *info) static int do_show_link(int fd) { + __u64 *ref_ctr_offsets = NULL, *offsets = NULL, *cookies = NULL; struct bpf_link_info info; __u32 len = sizeof(info); + char path_buf[PATH_MAX]; __u64 *addrs = NULL; char buf[PATH_MAX]; int count; @@ -889,6 +955,39 @@ again: goto again; } } + if (info.type == BPF_LINK_TYPE_UPROBE_MULTI && + !info.uprobe_multi.offsets) { + count = info.uprobe_multi.count; + if (count) { + offsets = calloc(count, sizeof(__u64)); + if (!offsets) { + p_err("mem alloc failed"); + close(fd); + return -ENOMEM; + } + info.uprobe_multi.offsets = ptr_to_u64(offsets); + ref_ctr_offsets = calloc(count, sizeof(__u64)); + if (!ref_ctr_offsets) { + p_err("mem alloc failed"); + free(offsets); + close(fd); + return -ENOMEM; + } + info.uprobe_multi.ref_ctr_offsets = ptr_to_u64(ref_ctr_offsets); + cookies = calloc(count, sizeof(__u64)); + if (!cookies) { + p_err("mem alloc failed"); + free(cookies); + free(offsets); + close(fd); + return -ENOMEM; + } + info.uprobe_multi.cookies = ptr_to_u64(cookies); + info.uprobe_multi.path = ptr_to_u64(path_buf); + info.uprobe_multi.path_size = sizeof(path_buf); + goto again; + } + } if (info.type == BPF_LINK_TYPE_PERF_EVENT) { switch (info.perf_event.type) { case BPF_PERF_EVENT_TRACEPOINT: @@ -924,8 +1023,10 @@ again: else show_link_close_plain(fd, &info); - if (addrs) - free(addrs); + free(ref_ctr_offsets); + free(cookies); + free(offsets); + free(addrs); close(fd); return 0; } -- cgit v1.2.3 From 2ce344b6891618063c0bebe22d9cdf9c086e0aa8 Mon Sep 17 00:00:00 2001 From: Akihiko Odaki Date: Sat, 25 Nov 2023 17:42:50 +0900 Subject: selftests/bpf: Choose pkg-config for the target pkg-config is used to build sign-file executable. It should use the library for the target instead of the host as it is called during tests. Signed-off-by: Akihiko Odaki Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20231125084253.85025-2-akihiko.odaki@daynix.com --- tools/testing/selftests/bpf/Makefile | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 9c27b67bc7b1..94825ef813d5 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -18,7 +18,7 @@ else GENDIR := $(abspath ../../../../include/generated) endif GENHDR := $(GENDIR)/autoconf.h -HOSTPKG_CONFIG := pkg-config +PKG_CONFIG ?= $(CROSS_COMPILE)pkg-config ifneq ($(wildcard $(GENHDR)),) GENFLAGS := -DHAVE_GENHDR @@ -219,9 +219,9 @@ $(OUTPUT)/urandom_read: urandom_read.c urandom_read_aux.c $(OUTPUT)/liburandom_r $(OUTPUT)/sign-file: ../../../../scripts/sign-file.c $(call msg,SIGN-FILE,,$@) - $(Q)$(CC) $(shell $(HOSTPKG_CONFIG) --cflags libcrypto 2> /dev/null) \ + $(Q)$(CC) $(shell $(PKG_CONFIG) --cflags libcrypto 2> /dev/null) \ $< -o $@ \ - $(shell $(HOSTPKG_CONFIG) --libs libcrypto 2> /dev/null || echo -lcrypto) + $(shell $(PKG_CONFIG) --libs libcrypto 2> /dev/null || echo -lcrypto) $(OUTPUT)/bpf_testmod.ko: $(VMLINUX_BTF) $(RESOLVE_BTFIDS) $(wildcard bpf_testmod/Makefile bpf_testmod/*.[ch]) $(call msg,MOD,,$@) -- cgit v1.2.3 From 18f6f9de98d1d0f7040e6e6c39fce8939f55520f Mon Sep 17 00:00:00 2001 From: Akihiko Odaki Date: Sat, 25 Nov 2023 17:42:51 +0900 Subject: selftests/bpf: Override PKG_CONFIG for static builds A library may need to depend on additional archive files for static builds so pkg-config should be instructed to list them. Signed-off-by: Akihiko Odaki Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20231125084253.85025-3-akihiko.odaki@daynix.com --- tools/testing/selftests/bpf/README.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/README.rst b/tools/testing/selftests/bpf/README.rst index cb9b95702ac6..9af79c7a9b58 100644 --- a/tools/testing/selftests/bpf/README.rst +++ b/tools/testing/selftests/bpf/README.rst @@ -77,7 +77,7 @@ In case of linker errors when running selftests, try using static linking: .. code-block:: console - $ LDLIBS=-static vmtest.sh + $ LDLIBS=-static PKG_CONFIG='pkg-config --static' vmtest.sh .. note:: Some distros may not support static linking. -- cgit v1.2.3 From 8998a479fd96b0b209dcb2feb468eba7eddb4ddb Mon Sep 17 00:00:00 2001 From: Akihiko Odaki Date: Sat, 25 Nov 2023 17:42:52 +0900 Subject: selftests/bpf: Use pkg-config for libelf When linking statically, libraries may require other dependencies to be included to ld flags. In particular, libelf may require libzstd. Use pkg-config to determine such dependencies. Signed-off-by: Akihiko Odaki Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20231125084253.85025-4-akihiko.odaki@daynix.com --- tools/testing/selftests/bpf/Makefile | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 94825ef813d5..617ae55c3bb5 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -29,13 +29,17 @@ SAN_CFLAGS ?= SAN_LDFLAGS ?= $(SAN_CFLAGS) RELEASE ?= OPT_FLAGS ?= $(if $(RELEASE),-O2,-O0) + +LIBELF_CFLAGS := $(shell $(PKG_CONFIG) libelf --cflags 2>/dev/null) +LIBELF_LIBS := $(shell $(PKG_CONFIG) libelf --libs 2>/dev/null || echo -lelf) + CFLAGS += -g $(OPT_FLAGS) -rdynamic \ -Wall -Werror \ - $(GENFLAGS) $(SAN_CFLAGS) \ + $(GENFLAGS) $(SAN_CFLAGS) $(LIBELF_CFLAGS) \ -I$(CURDIR) -I$(INCLUDE_DIR) -I$(GENDIR) -I$(LIBDIR) \ -I$(TOOLSINCDIR) -I$(APIDIR) -I$(OUTPUT) LDFLAGS += $(SAN_LDFLAGS) -LDLIBS += -lelf -lz -lrt -lpthread +LDLIBS += $(LIBELF_LIBS) -lz -lrt -lpthread ifneq ($(LLVM),) # Silence some warnings when compiled with clang -- cgit v1.2.3 From 83f2df9d66bc9e1e0dbd5d5586a701088f6a1d42 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Sun, 26 Nov 2023 14:58:58 -0800 Subject: tools: ynl-gen: always construct struct ynl_req_state struct ynl_req_state carries reply-related info from generated code into generic YNL code. While we don't need reply info to execute a request without a reply, we still need to pass in the struct, because it's also where we get the pointer to struct ynl_sock from. Passing NULL results in crashes if kernel returns an error or an unexpected reply. Fixes: dc0956c98f11 ("tools: ynl-gen: move the response reading logic into YNL") Link: https://lore.kernel.org/r/20231126225858.2144136-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- tools/net/ynl/generated/devlink-user.c | 87 +++++++++++++++++++++----------- tools/net/ynl/generated/ethtool-user.c | 51 ++++++++++++------- tools/net/ynl/generated/fou-user.c | 6 ++- tools/net/ynl/generated/handshake-user.c | 3 +- tools/net/ynl/ynl-gen-c.py | 10 ++-- 5 files changed, 102 insertions(+), 55 deletions(-) (limited to 'tools') diff --git a/tools/net/ynl/generated/devlink-user.c b/tools/net/ynl/generated/devlink-user.c index c12ca87ca2bb..8e757e249dab 100644 --- a/tools/net/ynl/generated/devlink-user.c +++ b/tools/net/ynl/generated/devlink-user.c @@ -2399,6 +2399,7 @@ void devlink_port_set_req_free(struct devlink_port_set_req *req) int devlink_port_set(struct ynl_sock *ys, struct devlink_port_set_req *req) { + struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; struct nlmsghdr *nlh; int err; @@ -2416,7 +2417,7 @@ int devlink_port_set(struct ynl_sock *ys, struct devlink_port_set_req *req) if (req->_present.port_function) devlink_dl_port_function_put(nlh, DEVLINK_ATTR_PORT_FUNCTION, &req->port_function); - err = ynl_exec(ys, nlh, NULL); + err = ynl_exec(ys, nlh, &yrs); if (err < 0) return -1; @@ -2537,6 +2538,7 @@ void devlink_port_del_req_free(struct devlink_port_del_req *req) int devlink_port_del(struct ynl_sock *ys, struct devlink_port_del_req *req) { + struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; struct nlmsghdr *nlh; int err; @@ -2550,7 +2552,7 @@ int devlink_port_del(struct ynl_sock *ys, struct devlink_port_del_req *req) if (req->_present.port_index) mnl_attr_put_u32(nlh, DEVLINK_ATTR_PORT_INDEX, req->port_index); - err = ynl_exec(ys, nlh, NULL); + err = ynl_exec(ys, nlh, &yrs); if (err < 0) return -1; @@ -2568,6 +2570,7 @@ void devlink_port_split_req_free(struct devlink_port_split_req *req) int devlink_port_split(struct ynl_sock *ys, struct devlink_port_split_req *req) { + struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; struct nlmsghdr *nlh; int err; @@ -2583,7 +2586,7 @@ int devlink_port_split(struct ynl_sock *ys, struct devlink_port_split_req *req) if (req->_present.port_split_count) mnl_attr_put_u32(nlh, DEVLINK_ATTR_PORT_SPLIT_COUNT, req->port_split_count); - err = ynl_exec(ys, nlh, NULL); + err = ynl_exec(ys, nlh, &yrs); if (err < 0) return -1; @@ -2602,6 +2605,7 @@ void devlink_port_unsplit_req_free(struct devlink_port_unsplit_req *req) int devlink_port_unsplit(struct ynl_sock *ys, struct devlink_port_unsplit_req *req) { + struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; struct nlmsghdr *nlh; int err; @@ -2615,7 +2619,7 @@ int devlink_port_unsplit(struct ynl_sock *ys, if (req->_present.port_index) mnl_attr_put_u32(nlh, DEVLINK_ATTR_PORT_INDEX, req->port_index); - err = ynl_exec(ys, nlh, NULL); + err = ynl_exec(ys, nlh, &yrs); if (err < 0) return -1; @@ -2926,6 +2930,7 @@ void devlink_sb_pool_set_req_free(struct devlink_sb_pool_set_req *req) int devlink_sb_pool_set(struct ynl_sock *ys, struct devlink_sb_pool_set_req *req) { + struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; struct nlmsghdr *nlh; int err; @@ -2945,7 +2950,7 @@ int devlink_sb_pool_set(struct ynl_sock *ys, if (req->_present.sb_pool_size) mnl_attr_put_u32(nlh, DEVLINK_ATTR_SB_POOL_SIZE, req->sb_pool_size); - err = ynl_exec(ys, nlh, NULL); + err = ynl_exec(ys, nlh, &yrs); if (err < 0) return -1; @@ -3126,6 +3131,7 @@ devlink_sb_port_pool_set_req_free(struct devlink_sb_port_pool_set_req *req) int devlink_sb_port_pool_set(struct ynl_sock *ys, struct devlink_sb_port_pool_set_req *req) { + struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; struct nlmsghdr *nlh; int err; @@ -3145,7 +3151,7 @@ int devlink_sb_port_pool_set(struct ynl_sock *ys, if (req->_present.sb_threshold) mnl_attr_put_u32(nlh, DEVLINK_ATTR_SB_THRESHOLD, req->sb_threshold); - err = ynl_exec(ys, nlh, NULL); + err = ynl_exec(ys, nlh, &yrs); if (err < 0) return -1; @@ -3334,6 +3340,7 @@ devlink_sb_tc_pool_bind_set_req_free(struct devlink_sb_tc_pool_bind_set_req *req int devlink_sb_tc_pool_bind_set(struct ynl_sock *ys, struct devlink_sb_tc_pool_bind_set_req *req) { + struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; struct nlmsghdr *nlh; int err; @@ -3357,7 +3364,7 @@ int devlink_sb_tc_pool_bind_set(struct ynl_sock *ys, if (req->_present.sb_threshold) mnl_attr_put_u32(nlh, DEVLINK_ATTR_SB_THRESHOLD, req->sb_threshold); - err = ynl_exec(ys, nlh, NULL); + err = ynl_exec(ys, nlh, &yrs); if (err < 0) return -1; @@ -3376,6 +3383,7 @@ void devlink_sb_occ_snapshot_req_free(struct devlink_sb_occ_snapshot_req *req) int devlink_sb_occ_snapshot(struct ynl_sock *ys, struct devlink_sb_occ_snapshot_req *req) { + struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; struct nlmsghdr *nlh; int err; @@ -3389,7 +3397,7 @@ int devlink_sb_occ_snapshot(struct ynl_sock *ys, if (req->_present.sb_index) mnl_attr_put_u32(nlh, DEVLINK_ATTR_SB_INDEX, req->sb_index); - err = ynl_exec(ys, nlh, NULL); + err = ynl_exec(ys, nlh, &yrs); if (err < 0) return -1; @@ -3409,6 +3417,7 @@ devlink_sb_occ_max_clear_req_free(struct devlink_sb_occ_max_clear_req *req) int devlink_sb_occ_max_clear(struct ynl_sock *ys, struct devlink_sb_occ_max_clear_req *req) { + struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; struct nlmsghdr *nlh; int err; @@ -3422,7 +3431,7 @@ int devlink_sb_occ_max_clear(struct ynl_sock *ys, if (req->_present.sb_index) mnl_attr_put_u32(nlh, DEVLINK_ATTR_SB_INDEX, req->sb_index); - err = ynl_exec(ys, nlh, NULL); + err = ynl_exec(ys, nlh, &yrs); if (err < 0) return -1; @@ -3544,6 +3553,7 @@ void devlink_eswitch_set_req_free(struct devlink_eswitch_set_req *req) int devlink_eswitch_set(struct ynl_sock *ys, struct devlink_eswitch_set_req *req) { + struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; struct nlmsghdr *nlh; int err; @@ -3561,7 +3571,7 @@ int devlink_eswitch_set(struct ynl_sock *ys, if (req->_present.eswitch_encap_mode) mnl_attr_put_u8(nlh, DEVLINK_ATTR_ESWITCH_ENCAP_MODE, req->eswitch_encap_mode); - err = ynl_exec(ys, nlh, NULL); + err = ynl_exec(ys, nlh, &yrs); if (err < 0) return -1; @@ -3895,6 +3905,7 @@ devlink_dpipe_table_counters_set_req_free(struct devlink_dpipe_table_counters_se int devlink_dpipe_table_counters_set(struct ynl_sock *ys, struct devlink_dpipe_table_counters_set_req *req) { + struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; struct nlmsghdr *nlh; int err; @@ -3910,7 +3921,7 @@ int devlink_dpipe_table_counters_set(struct ynl_sock *ys, if (req->_present.dpipe_table_counters_enabled) mnl_attr_put_u8(nlh, DEVLINK_ATTR_DPIPE_TABLE_COUNTERS_ENABLED, req->dpipe_table_counters_enabled); - err = ynl_exec(ys, nlh, NULL); + err = ynl_exec(ys, nlh, &yrs); if (err < 0) return -1; @@ -3929,6 +3940,7 @@ void devlink_resource_set_req_free(struct devlink_resource_set_req *req) int devlink_resource_set(struct ynl_sock *ys, struct devlink_resource_set_req *req) { + struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; struct nlmsghdr *nlh; int err; @@ -3944,7 +3956,7 @@ int devlink_resource_set(struct ynl_sock *ys, if (req->_present.resource_size) mnl_attr_put_u64(nlh, DEVLINK_ATTR_RESOURCE_SIZE, req->resource_size); - err = ynl_exec(ys, nlh, NULL); + err = ynl_exec(ys, nlh, &yrs); if (err < 0) return -1; @@ -4319,6 +4331,7 @@ void devlink_param_set_req_free(struct devlink_param_set_req *req) int devlink_param_set(struct ynl_sock *ys, struct devlink_param_set_req *req) { + struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; struct nlmsghdr *nlh; int err; @@ -4336,7 +4349,7 @@ int devlink_param_set(struct ynl_sock *ys, struct devlink_param_set_req *req) if (req->_present.param_value_cmode) mnl_attr_put_u8(nlh, DEVLINK_ATTR_PARAM_VALUE_CMODE, req->param_value_cmode); - err = ynl_exec(ys, nlh, NULL); + err = ynl_exec(ys, nlh, &yrs); if (err < 0) return -1; @@ -4631,6 +4644,7 @@ void devlink_region_del_req_free(struct devlink_region_del_req *req) int devlink_region_del(struct ynl_sock *ys, struct devlink_region_del_req *req) { + struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; struct nlmsghdr *nlh; int err; @@ -4648,7 +4662,7 @@ int devlink_region_del(struct ynl_sock *ys, struct devlink_region_del_req *req) if (req->_present.region_snapshot_id) mnl_attr_put_u32(nlh, DEVLINK_ATTR_REGION_SNAPSHOT_ID, req->region_snapshot_id); - err = ynl_exec(ys, nlh, NULL); + err = ynl_exec(ys, nlh, &yrs); if (err < 0) return -1; @@ -4922,6 +4936,7 @@ void devlink_port_param_set_req_free(struct devlink_port_param_set_req *req) int devlink_port_param_set(struct ynl_sock *ys, struct devlink_port_param_set_req *req) { + struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; struct nlmsghdr *nlh; int err; @@ -4935,7 +4950,7 @@ int devlink_port_param_set(struct ynl_sock *ys, if (req->_present.port_index) mnl_attr_put_u32(nlh, DEVLINK_ATTR_PORT_INDEX, req->port_index); - err = ynl_exec(ys, nlh, NULL); + err = ynl_exec(ys, nlh, &yrs); if (err < 0) return -1; @@ -5360,6 +5375,7 @@ devlink_health_reporter_set_req_free(struct devlink_health_reporter_set_req *req int devlink_health_reporter_set(struct ynl_sock *ys, struct devlink_health_reporter_set_req *req) { + struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; struct nlmsghdr *nlh; int err; @@ -5381,7 +5397,7 @@ int devlink_health_reporter_set(struct ynl_sock *ys, if (req->_present.health_reporter_auto_dump) mnl_attr_put_u8(nlh, DEVLINK_ATTR_HEALTH_REPORTER_AUTO_DUMP, req->health_reporter_auto_dump); - err = ynl_exec(ys, nlh, NULL); + err = ynl_exec(ys, nlh, &yrs); if (err < 0) return -1; @@ -5402,6 +5418,7 @@ devlink_health_reporter_recover_req_free(struct devlink_health_reporter_recover_ int devlink_health_reporter_recover(struct ynl_sock *ys, struct devlink_health_reporter_recover_req *req) { + struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; struct nlmsghdr *nlh; int err; @@ -5417,7 +5434,7 @@ int devlink_health_reporter_recover(struct ynl_sock *ys, if (req->_present.health_reporter_name_len) mnl_attr_put_strz(nlh, DEVLINK_ATTR_HEALTH_REPORTER_NAME, req->health_reporter_name); - err = ynl_exec(ys, nlh, NULL); + err = ynl_exec(ys, nlh, &yrs); if (err < 0) return -1; @@ -5438,6 +5455,7 @@ devlink_health_reporter_diagnose_req_free(struct devlink_health_reporter_diagnos int devlink_health_reporter_diagnose(struct ynl_sock *ys, struct devlink_health_reporter_diagnose_req *req) { + struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; struct nlmsghdr *nlh; int err; @@ -5453,7 +5471,7 @@ int devlink_health_reporter_diagnose(struct ynl_sock *ys, if (req->_present.health_reporter_name_len) mnl_attr_put_strz(nlh, DEVLINK_ATTR_HEALTH_REPORTER_NAME, req->health_reporter_name); - err = ynl_exec(ys, nlh, NULL); + err = ynl_exec(ys, nlh, &yrs); if (err < 0) return -1; @@ -5556,6 +5574,7 @@ devlink_health_reporter_dump_clear_req_free(struct devlink_health_reporter_dump_ int devlink_health_reporter_dump_clear(struct ynl_sock *ys, struct devlink_health_reporter_dump_clear_req *req) { + struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; struct nlmsghdr *nlh; int err; @@ -5571,7 +5590,7 @@ int devlink_health_reporter_dump_clear(struct ynl_sock *ys, if (req->_present.health_reporter_name_len) mnl_attr_put_strz(nlh, DEVLINK_ATTR_HEALTH_REPORTER_NAME, req->health_reporter_name); - err = ynl_exec(ys, nlh, NULL); + err = ynl_exec(ys, nlh, &yrs); if (err < 0) return -1; @@ -5592,6 +5611,7 @@ void devlink_flash_update_req_free(struct devlink_flash_update_req *req) int devlink_flash_update(struct ynl_sock *ys, struct devlink_flash_update_req *req) { + struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; struct nlmsghdr *nlh; int err; @@ -5609,7 +5629,7 @@ int devlink_flash_update(struct ynl_sock *ys, if (req->_present.flash_update_overwrite_mask) mnl_attr_put(nlh, DEVLINK_ATTR_FLASH_UPDATE_OVERWRITE_MASK, sizeof(struct nla_bitfield32), &req->flash_update_overwrite_mask); - err = ynl_exec(ys, nlh, NULL); + err = ynl_exec(ys, nlh, &yrs); if (err < 0) return -1; @@ -5780,6 +5800,7 @@ void devlink_trap_set_req_free(struct devlink_trap_set_req *req) int devlink_trap_set(struct ynl_sock *ys, struct devlink_trap_set_req *req) { + struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; struct nlmsghdr *nlh; int err; @@ -5795,7 +5816,7 @@ int devlink_trap_set(struct ynl_sock *ys, struct devlink_trap_set_req *req) if (req->_present.trap_action) mnl_attr_put_u8(nlh, DEVLINK_ATTR_TRAP_ACTION, req->trap_action); - err = ynl_exec(ys, nlh, NULL); + err = ynl_exec(ys, nlh, &yrs); if (err < 0) return -1; @@ -5968,6 +5989,7 @@ void devlink_trap_group_set_req_free(struct devlink_trap_group_set_req *req) int devlink_trap_group_set(struct ynl_sock *ys, struct devlink_trap_group_set_req *req) { + struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; struct nlmsghdr *nlh; int err; @@ -5985,7 +6007,7 @@ int devlink_trap_group_set(struct ynl_sock *ys, if (req->_present.trap_policer_id) mnl_attr_put_u32(nlh, DEVLINK_ATTR_TRAP_POLICER_ID, req->trap_policer_id); - err = ynl_exec(ys, nlh, NULL); + err = ynl_exec(ys, nlh, &yrs); if (err < 0) return -1; @@ -6152,6 +6174,7 @@ devlink_trap_policer_set_req_free(struct devlink_trap_policer_set_req *req) int devlink_trap_policer_set(struct ynl_sock *ys, struct devlink_trap_policer_set_req *req) { + struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; struct nlmsghdr *nlh; int err; @@ -6169,7 +6192,7 @@ int devlink_trap_policer_set(struct ynl_sock *ys, if (req->_present.trap_policer_burst) mnl_attr_put_u64(nlh, DEVLINK_ATTR_TRAP_POLICER_BURST, req->trap_policer_burst); - err = ynl_exec(ys, nlh, NULL); + err = ynl_exec(ys, nlh, &yrs); if (err < 0) return -1; @@ -6190,6 +6213,7 @@ devlink_health_reporter_test_req_free(struct devlink_health_reporter_test_req *r int devlink_health_reporter_test(struct ynl_sock *ys, struct devlink_health_reporter_test_req *req) { + struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; struct nlmsghdr *nlh; int err; @@ -6205,7 +6229,7 @@ int devlink_health_reporter_test(struct ynl_sock *ys, if (req->_present.health_reporter_name_len) mnl_attr_put_strz(nlh, DEVLINK_ATTR_HEALTH_REPORTER_NAME, req->health_reporter_name); - err = ynl_exec(ys, nlh, NULL); + err = ynl_exec(ys, nlh, &yrs); if (err < 0) return -1; @@ -6384,6 +6408,7 @@ void devlink_rate_set_req_free(struct devlink_rate_set_req *req) int devlink_rate_set(struct ynl_sock *ys, struct devlink_rate_set_req *req) { + struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; struct nlmsghdr *nlh; int err; @@ -6407,7 +6432,7 @@ int devlink_rate_set(struct ynl_sock *ys, struct devlink_rate_set_req *req) if (req->_present.rate_parent_node_name_len) mnl_attr_put_strz(nlh, DEVLINK_ATTR_RATE_PARENT_NODE_NAME, req->rate_parent_node_name); - err = ynl_exec(ys, nlh, NULL); + err = ynl_exec(ys, nlh, &yrs); if (err < 0) return -1; @@ -6427,6 +6452,7 @@ void devlink_rate_new_req_free(struct devlink_rate_new_req *req) int devlink_rate_new(struct ynl_sock *ys, struct devlink_rate_new_req *req) { + struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; struct nlmsghdr *nlh; int err; @@ -6450,7 +6476,7 @@ int devlink_rate_new(struct ynl_sock *ys, struct devlink_rate_new_req *req) if (req->_present.rate_parent_node_name_len) mnl_attr_put_strz(nlh, DEVLINK_ATTR_RATE_PARENT_NODE_NAME, req->rate_parent_node_name); - err = ynl_exec(ys, nlh, NULL); + err = ynl_exec(ys, nlh, &yrs); if (err < 0) return -1; @@ -6469,6 +6495,7 @@ void devlink_rate_del_req_free(struct devlink_rate_del_req *req) int devlink_rate_del(struct ynl_sock *ys, struct devlink_rate_del_req *req) { + struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; struct nlmsghdr *nlh; int err; @@ -6482,7 +6509,7 @@ int devlink_rate_del(struct ynl_sock *ys, struct devlink_rate_del_req *req) if (req->_present.rate_node_name_len) mnl_attr_put_strz(nlh, DEVLINK_ATTR_RATE_NODE_NAME, req->rate_node_name); - err = ynl_exec(ys, nlh, NULL); + err = ynl_exec(ys, nlh, &yrs); if (err < 0) return -1; @@ -6645,6 +6672,7 @@ void devlink_linecard_set_req_free(struct devlink_linecard_set_req *req) int devlink_linecard_set(struct ynl_sock *ys, struct devlink_linecard_set_req *req) { + struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; struct nlmsghdr *nlh; int err; @@ -6660,7 +6688,7 @@ int devlink_linecard_set(struct ynl_sock *ys, if (req->_present.linecard_type_len) mnl_attr_put_strz(nlh, DEVLINK_ATTR_LINECARD_TYPE, req->linecard_type); - err = ynl_exec(ys, nlh, NULL); + err = ynl_exec(ys, nlh, &yrs); if (err < 0) return -1; @@ -6810,6 +6838,7 @@ void devlink_selftests_run_req_free(struct devlink_selftests_run_req *req) int devlink_selftests_run(struct ynl_sock *ys, struct devlink_selftests_run_req *req) { + struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; struct nlmsghdr *nlh; int err; @@ -6823,7 +6852,7 @@ int devlink_selftests_run(struct ynl_sock *ys, if (req->_present.selftests) devlink_dl_selftest_id_put(nlh, DEVLINK_ATTR_SELFTESTS, &req->selftests); - err = ynl_exec(ys, nlh, NULL); + err = ynl_exec(ys, nlh, &yrs); if (err < 0) return -1; diff --git a/tools/net/ynl/generated/ethtool-user.c b/tools/net/ynl/generated/ethtool-user.c index 74b883a14958..660435639e2b 100644 --- a/tools/net/ynl/generated/ethtool-user.c +++ b/tools/net/ynl/generated/ethtool-user.c @@ -1843,6 +1843,7 @@ void ethtool_linkinfo_set_req_free(struct ethtool_linkinfo_set_req *req) int ethtool_linkinfo_set(struct ynl_sock *ys, struct ethtool_linkinfo_set_req *req) { + struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; struct nlmsghdr *nlh; int err; @@ -1862,7 +1863,7 @@ int ethtool_linkinfo_set(struct ynl_sock *ys, if (req->_present.transceiver) mnl_attr_put_u8(nlh, ETHTOOL_A_LINKINFO_TRANSCEIVER, req->transceiver); - err = ynl_exec(ys, nlh, NULL); + err = ynl_exec(ys, nlh, &yrs); if (err < 0) return -1; @@ -2067,6 +2068,7 @@ void ethtool_linkmodes_set_req_free(struct ethtool_linkmodes_set_req *req) int ethtool_linkmodes_set(struct ynl_sock *ys, struct ethtool_linkmodes_set_req *req) { + struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; struct nlmsghdr *nlh; int err; @@ -2094,7 +2096,7 @@ int ethtool_linkmodes_set(struct ynl_sock *ys, if (req->_present.rate_matching) mnl_attr_put_u8(nlh, ETHTOOL_A_LINKMODES_RATE_MATCHING, req->rate_matching); - err = ynl_exec(ys, nlh, NULL); + err = ynl_exec(ys, nlh, &yrs); if (err < 0) return -1; @@ -2398,6 +2400,7 @@ void ethtool_debug_set_req_free(struct ethtool_debug_set_req *req) int ethtool_debug_set(struct ynl_sock *ys, struct ethtool_debug_set_req *req) { + struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; struct nlmsghdr *nlh; int err; @@ -2409,7 +2412,7 @@ int ethtool_debug_set(struct ynl_sock *ys, struct ethtool_debug_set_req *req) if (req->_present.msgmask) ethtool_bitset_put(nlh, ETHTOOL_A_DEBUG_MSGMASK, &req->msgmask); - err = ynl_exec(ys, nlh, NULL); + err = ynl_exec(ys, nlh, &yrs); if (err < 0) return -1; @@ -2577,6 +2580,7 @@ void ethtool_wol_set_req_free(struct ethtool_wol_set_req *req) int ethtool_wol_set(struct ynl_sock *ys, struct ethtool_wol_set_req *req) { + struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; struct nlmsghdr *nlh; int err; @@ -2590,7 +2594,7 @@ int ethtool_wol_set(struct ynl_sock *ys, struct ethtool_wol_set_req *req) if (req->_present.sopass_len) mnl_attr_put(nlh, ETHTOOL_A_WOL_SOPASS, req->_present.sopass_len, req->sopass); - err = ynl_exec(ys, nlh, NULL); + err = ynl_exec(ys, nlh, &yrs); if (err < 0) return -1; @@ -3045,6 +3049,7 @@ void ethtool_privflags_set_req_free(struct ethtool_privflags_set_req *req) int ethtool_privflags_set(struct ynl_sock *ys, struct ethtool_privflags_set_req *req) { + struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; struct nlmsghdr *nlh; int err; @@ -3056,7 +3061,7 @@ int ethtool_privflags_set(struct ynl_sock *ys, if (req->_present.flags) ethtool_bitset_put(nlh, ETHTOOL_A_PRIVFLAGS_FLAGS, &req->flags); - err = ynl_exec(ys, nlh, NULL); + err = ynl_exec(ys, nlh, &yrs); if (err < 0) return -1; @@ -3273,6 +3278,7 @@ void ethtool_rings_set_req_free(struct ethtool_rings_set_req *req) int ethtool_rings_set(struct ynl_sock *ys, struct ethtool_rings_set_req *req) { + struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; struct nlmsghdr *nlh; int err; @@ -3312,7 +3318,7 @@ int ethtool_rings_set(struct ynl_sock *ys, struct ethtool_rings_set_req *req) if (req->_present.tx_push_buf_len_max) mnl_attr_put_u32(nlh, ETHTOOL_A_RINGS_TX_PUSH_BUF_LEN_MAX, req->tx_push_buf_len_max); - err = ynl_exec(ys, nlh, NULL); + err = ynl_exec(ys, nlh, &yrs); if (err < 0) return -1; @@ -3495,6 +3501,7 @@ void ethtool_channels_set_req_free(struct ethtool_channels_set_req *req) int ethtool_channels_set(struct ynl_sock *ys, struct ethtool_channels_set_req *req) { + struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; struct nlmsghdr *nlh; int err; @@ -3520,7 +3527,7 @@ int ethtool_channels_set(struct ynl_sock *ys, if (req->_present.combined_count) mnl_attr_put_u32(nlh, ETHTOOL_A_CHANNELS_COMBINED_COUNT, req->combined_count); - err = ynl_exec(ys, nlh, NULL); + err = ynl_exec(ys, nlh, &yrs); if (err < 0) return -1; @@ -3798,6 +3805,7 @@ void ethtool_coalesce_set_req_free(struct ethtool_coalesce_set_req *req) int ethtool_coalesce_set(struct ynl_sock *ys, struct ethtool_coalesce_set_req *req) { + struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; struct nlmsghdr *nlh; int err; @@ -3861,7 +3869,7 @@ int ethtool_coalesce_set(struct ynl_sock *ys, if (req->_present.tx_aggr_time_usecs) mnl_attr_put_u32(nlh, ETHTOOL_A_COALESCE_TX_AGGR_TIME_USECS, req->tx_aggr_time_usecs); - err = ynl_exec(ys, nlh, NULL); + err = ynl_exec(ys, nlh, &yrs); if (err < 0) return -1; @@ -4036,6 +4044,7 @@ void ethtool_pause_set_req_free(struct ethtool_pause_set_req *req) int ethtool_pause_set(struct ynl_sock *ys, struct ethtool_pause_set_req *req) { + struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; struct nlmsghdr *nlh; int err; @@ -4055,7 +4064,7 @@ int ethtool_pause_set(struct ynl_sock *ys, struct ethtool_pause_set_req *req) if (req->_present.stats_src) mnl_attr_put_u32(nlh, ETHTOOL_A_PAUSE_STATS_SRC, req->stats_src); - err = ynl_exec(ys, nlh, NULL); + err = ynl_exec(ys, nlh, &yrs); if (err < 0) return -1; @@ -4242,6 +4251,7 @@ void ethtool_eee_set_req_free(struct ethtool_eee_set_req *req) int ethtool_eee_set(struct ynl_sock *ys, struct ethtool_eee_set_req *req) { + struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; struct nlmsghdr *nlh; int err; @@ -4263,7 +4273,7 @@ int ethtool_eee_set(struct ynl_sock *ys, struct ethtool_eee_set_req *req) if (req->_present.tx_lpi_timer) mnl_attr_put_u32(nlh, ETHTOOL_A_EEE_TX_LPI_TIMER, req->tx_lpi_timer); - err = ynl_exec(ys, nlh, NULL); + err = ynl_exec(ys, nlh, &yrs); if (err < 0) return -1; @@ -4437,6 +4447,7 @@ void ethtool_cable_test_act_req_free(struct ethtool_cable_test_act_req *req) int ethtool_cable_test_act(struct ynl_sock *ys, struct ethtool_cable_test_act_req *req) { + struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; struct nlmsghdr *nlh; int err; @@ -4446,7 +4457,7 @@ int ethtool_cable_test_act(struct ynl_sock *ys, if (req->_present.header) ethtool_header_put(nlh, ETHTOOL_A_CABLE_TEST_HEADER, &req->header); - err = ynl_exec(ys, nlh, NULL); + err = ynl_exec(ys, nlh, &yrs); if (err < 0) return -1; @@ -4465,6 +4476,7 @@ ethtool_cable_test_tdr_act_req_free(struct ethtool_cable_test_tdr_act_req *req) int ethtool_cable_test_tdr_act(struct ynl_sock *ys, struct ethtool_cable_test_tdr_act_req *req) { + struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; struct nlmsghdr *nlh; int err; @@ -4474,7 +4486,7 @@ int ethtool_cable_test_tdr_act(struct ynl_sock *ys, if (req->_present.header) ethtool_header_put(nlh, ETHTOOL_A_CABLE_TEST_TDR_HEADER, &req->header); - err = ynl_exec(ys, nlh, NULL); + err = ynl_exec(ys, nlh, &yrs); if (err < 0) return -1; @@ -4782,6 +4794,7 @@ void ethtool_fec_set_req_free(struct ethtool_fec_set_req *req) int ethtool_fec_set(struct ynl_sock *ys, struct ethtool_fec_set_req *req) { + struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; struct nlmsghdr *nlh; int err; @@ -4799,7 +4812,7 @@ int ethtool_fec_set(struct ynl_sock *ys, struct ethtool_fec_set_req *req) if (req->_present.stats) ethtool_fec_stat_put(nlh, ETHTOOL_A_FEC_STATS, &req->stats); - err = ynl_exec(ys, nlh, NULL); + err = ynl_exec(ys, nlh, &yrs); if (err < 0) return -1; @@ -5235,6 +5248,7 @@ void ethtool_module_set_req_free(struct ethtool_module_set_req *req) int ethtool_module_set(struct ynl_sock *ys, struct ethtool_module_set_req *req) { + struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; struct nlmsghdr *nlh; int err; @@ -5248,7 +5262,7 @@ int ethtool_module_set(struct ynl_sock *ys, struct ethtool_module_set_req *req) if (req->_present.power_mode) mnl_attr_put_u8(nlh, ETHTOOL_A_MODULE_POWER_MODE, req->power_mode); - err = ynl_exec(ys, nlh, NULL); + err = ynl_exec(ys, nlh, &yrs); if (err < 0) return -1; @@ -5397,6 +5411,7 @@ void ethtool_pse_set_req_free(struct ethtool_pse_set_req *req) int ethtool_pse_set(struct ynl_sock *ys, struct ethtool_pse_set_req *req) { + struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; struct nlmsghdr *nlh; int err; @@ -5412,7 +5427,7 @@ int ethtool_pse_set(struct ynl_sock *ys, struct ethtool_pse_set_req *req) if (req->_present.pw_d_status) mnl_attr_put_u32(nlh, ETHTOOL_A_PODL_PSE_PW_D_STATUS, req->pw_d_status); - err = ynl_exec(ys, nlh, NULL); + err = ynl_exec(ys, nlh, &yrs); if (err < 0) return -1; @@ -5746,6 +5761,7 @@ void ethtool_plca_set_cfg_req_free(struct ethtool_plca_set_cfg_req *req) int ethtool_plca_set_cfg(struct ynl_sock *ys, struct ethtool_plca_set_cfg_req *req) { + struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; struct nlmsghdr *nlh; int err; @@ -5771,7 +5787,7 @@ int ethtool_plca_set_cfg(struct ynl_sock *ys, if (req->_present.burst_tmr) mnl_attr_put_u32(nlh, ETHTOOL_A_PLCA_BURST_TMR, req->burst_tmr); - err = ynl_exec(ys, nlh, NULL); + err = ynl_exec(ys, nlh, &yrs); if (err < 0) return -1; @@ -6124,6 +6140,7 @@ void ethtool_mm_set_req_free(struct ethtool_mm_set_req *req) int ethtool_mm_set(struct ynl_sock *ys, struct ethtool_mm_set_req *req) { + struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; struct nlmsghdr *nlh; int err; @@ -6143,7 +6160,7 @@ int ethtool_mm_set(struct ynl_sock *ys, struct ethtool_mm_set_req *req) if (req->_present.tx_min_frag_size) mnl_attr_put_u32(nlh, ETHTOOL_A_MM_TX_MIN_FRAG_SIZE, req->tx_min_frag_size); - err = ynl_exec(ys, nlh, NULL); + err = ynl_exec(ys, nlh, &yrs); if (err < 0) return -1; diff --git a/tools/net/ynl/generated/fou-user.c b/tools/net/ynl/generated/fou-user.c index 4271b5d43c58..f30bef23bc31 100644 --- a/tools/net/ynl/generated/fou-user.c +++ b/tools/net/ynl/generated/fou-user.c @@ -72,6 +72,7 @@ void fou_add_req_free(struct fou_add_req *req) int fou_add(struct ynl_sock *ys, struct fou_add_req *req) { + struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; struct nlmsghdr *nlh; int err; @@ -99,7 +100,7 @@ int fou_add(struct ynl_sock *ys, struct fou_add_req *req) if (req->_present.ifindex) mnl_attr_put_u32(nlh, FOU_ATTR_IFINDEX, req->ifindex); - err = ynl_exec(ys, nlh, NULL); + err = ynl_exec(ys, nlh, &yrs); if (err < 0) return -1; @@ -117,6 +118,7 @@ void fou_del_req_free(struct fou_del_req *req) int fou_del(struct ynl_sock *ys, struct fou_del_req *req) { + struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; struct nlmsghdr *nlh; int err; @@ -140,7 +142,7 @@ int fou_del(struct ynl_sock *ys, struct fou_del_req *req) if (req->_present.peer_v6_len) mnl_attr_put(nlh, FOU_ATTR_PEER_V6, req->_present.peer_v6_len, req->peer_v6); - err = ynl_exec(ys, nlh, NULL); + err = ynl_exec(ys, nlh, &yrs); if (err < 0) return -1; diff --git a/tools/net/ynl/generated/handshake-user.c b/tools/net/ynl/generated/handshake-user.c index 7c67765daf90..6901f8462cca 100644 --- a/tools/net/ynl/generated/handshake-user.c +++ b/tools/net/ynl/generated/handshake-user.c @@ -295,6 +295,7 @@ void handshake_done_req_free(struct handshake_done_req *req) int handshake_done(struct ynl_sock *ys, struct handshake_done_req *req) { + struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; struct nlmsghdr *nlh; int err; @@ -308,7 +309,7 @@ int handshake_done(struct ynl_sock *ys, struct handshake_done_req *req) for (unsigned int i = 0; i < req->n_remote_auth; i++) mnl_attr_put_u32(nlh, HANDSHAKE_A_DONE_REMOTE_AUTH, req->remote_auth[i]); - err = ynl_exec(ys, nlh, NULL); + err = ynl_exec(ys, nlh, &yrs); if (err < 0) return -1; diff --git a/tools/net/ynl/ynl-gen-c.py b/tools/net/ynl/ynl-gen-c.py index 3bd6b928c14f..8337aa6de25e 100755 --- a/tools/net/ynl/ynl-gen-c.py +++ b/tools/net/ynl/ynl-gen-c.py @@ -1709,14 +1709,14 @@ def print_req(ri): ret_ok = '0' ret_err = '-1' direction = "request" - local_vars = ['struct nlmsghdr *nlh;', + local_vars = ['struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };', + 'struct nlmsghdr *nlh;', 'int err;'] if 'reply' in ri.op[ri.op_mode]: ret_ok = 'rsp' ret_err = 'NULL' - local_vars += [f'{type_name(ri, rdir(direction))} *rsp;', - 'struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };'] + local_vars += [f'{type_name(ri, rdir(direction))} *rsp;'] print_prototype(ri, direction, terminate=False) ri.cw.block_start() @@ -1732,7 +1732,6 @@ def print_req(ri): attr.attr_put(ri, "req") ri.cw.nl() - parse_arg = "NULL" if 'reply' in ri.op[ri.op_mode]: ri.cw.p('rsp = calloc(1, sizeof(*rsp));') ri.cw.p('yrs.yarg.data = rsp;') @@ -1742,8 +1741,7 @@ def print_req(ri): else: ri.cw.p(f'yrs.rsp_cmd = {ri.op.rsp_value};') ri.cw.nl() - parse_arg = '&yrs' - ri.cw.p(f"err = ynl_exec(ys, nlh, {parse_arg});") + ri.cw.p("err = ynl_exec(ys, nlh, &yrs);") ri.cw.p('if (err < 0)') if 'reply' in ri.op[ri.op_mode]: ri.cw.p('goto err_free;') -- cgit v1.2.3 From a472ee42e6f60c8714e2306385687922afcba8c4 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 29 Nov 2023 12:47:17 -0300 Subject: perf test sigtrap: Generalize the BTF routine to reuse it in this test Move the part that loads the BTF info to a "btf__available()" that will lazy load the BTF info so that if we need it for some other test, which we will in the following cset, we can reuse it. At some point this will move from this specific 'perf test' entry to be used in other parts of perf, do it when needed. Cc: Adrian Hunter Cc: Clark Williams Cc: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Kate Carcia Cc: Namhyung Kim Cc: Thomas Gleixner Link: https://lore.kernel.org/r/20231129154718.326330-2-acme@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/sigtrap.c | 60 ++++++++++++++++++++++++++++++---------------- 1 file changed, 40 insertions(+), 20 deletions(-) (limited to 'tools') diff --git a/tools/perf/tests/sigtrap.c b/tools/perf/tests/sigtrap.c index 1de7478ec189..a1bc7c776254 100644 --- a/tools/perf/tests/sigtrap.c +++ b/tools/perf/tests/sigtrap.c @@ -57,36 +57,51 @@ static struct perf_event_attr make_event_attr(void) #ifdef HAVE_BPF_SKEL #include -static bool attr_has_sigtrap(void) +static struct btf *btf; + +static bool btf__available(void) { - bool ret = false; - struct btf *btf; - const struct btf_type *t; + if (btf == NULL) + btf = btf__load_vmlinux_btf(); + + return btf != NULL; +} + +static void btf__exit(void) +{ + btf__free(btf); + btf = NULL; +} + +static const struct btf_member *__btf_type__find_member_by_name(int type_id, const char *member_name) +{ + const struct btf_type *t = btf__type_by_id(btf, type_id); const struct btf_member *m; - const char *name; - int i, id; + int i; + + for (i = 0, m = btf_members(t); i < btf_vlen(t); i++, m++) { + const char *current_member_name = btf__name_by_offset(btf, m->name_off); + if (!strcmp(current_member_name, member_name)) + return m; + } + + return NULL; +} + +static bool attr_has_sigtrap(void) +{ + int id; - btf = btf__load_vmlinux_btf(); - if (btf == NULL) { + if (!btf__available()) { /* should be an old kernel */ return false; } id = btf__find_by_name_kind(btf, "perf_event_attr", BTF_KIND_STRUCT); if (id < 0) - goto out; + return false; - t = btf__type_by_id(btf, id); - for (i = 0, m = btf_members(t); i < btf_vlen(t); i++, m++) { - name = btf__name_by_offset(btf, m->name_off); - if (!strcmp(name, "sigtrap")) { - ret = true; - break; - } - } -out: - btf__free(btf); - return ret; + return __btf_type__find_member_by_name(id, "sigtrap") != NULL; } #else /* !HAVE_BPF_SKEL */ static bool attr_has_sigtrap(void) @@ -109,6 +124,10 @@ static bool attr_has_sigtrap(void) return ret; } + +static void btf__exit(void) +{ +} #endif /* HAVE_BPF_SKEL */ static void @@ -221,6 +240,7 @@ out_restore_sigaction: sigaction(SIGTRAP, &oldact, NULL); out: pthread_barrier_destroy(&barrier); + btf__exit(); return ret; } -- cgit v1.2.3 From 650e0bde43f35bb675e87e30f679a57cfa22e0e5 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 29 Nov 2023 12:47:18 -0300 Subject: perf tests sigtrap: Skip if running on a kernel with sleepable spinlocks There are issues as reported that need some more investigation on the RT kernel front, till that is addressed, skip this test. This test is already skipped for multiple hardware architectures where the tested kernel feature is not supported. Acked-by: Marco Elver Cc: Adrian Hunter Cc: Clark Williams Cc: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Juri Lelli Cc: Kate Carcia Cc: Marco Elver Cc: Mike Galbraith Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Thomas Gleixner Link: https://lore.kernel.org/all/e368f2c848d77fbc8d259f44e2055fe469c219cf.camel@gmx.de/ Link: https://lore.kernel.org/r/20231129154718.326330-3-acme@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/sigtrap.c | 46 ++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 44 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/perf/tests/sigtrap.c b/tools/perf/tests/sigtrap.c index a1bc7c776254..e6fd934b027a 100644 --- a/tools/perf/tests/sigtrap.c +++ b/tools/perf/tests/sigtrap.c @@ -103,6 +103,34 @@ static bool attr_has_sigtrap(void) return __btf_type__find_member_by_name(id, "sigtrap") != NULL; } + +static bool kernel_with_sleepable_spinlocks(void) +{ + const struct btf_member *member; + const struct btf_type *type; + const char *type_name; + int id; + + if (!btf__available()) + return false; + + id = btf__find_by_name_kind(btf, "spinlock", BTF_KIND_STRUCT); + if (id < 0) + return false; + + // Only RT has a "lock" member for "struct spinlock" + member = __btf_type__find_member_by_name(id, "lock"); + if (member == NULL) + return false; + + // But check its type as well + type = btf__type_by_id(btf, member->type); + if (!type || !btf_is_struct(type)) + return false; + + type_name = btf__name_by_offset(btf, type->name_off); + return type_name && !strcmp(type_name, "rt_mutex_base"); +} #else /* !HAVE_BPF_SKEL */ static bool attr_has_sigtrap(void) { @@ -125,6 +153,11 @@ static bool attr_has_sigtrap(void) return ret; } +static bool kernel_with_sleepable_spinlocks(void) +{ + return false; +} + static void btf__exit(void) { } @@ -166,7 +199,7 @@ static int run_test_threads(pthread_t *threads, pthread_barrier_t *barrier) static int run_stress_test(int fd, pthread_t *threads, pthread_barrier_t *barrier) { - int ret; + int ret, expected_sigtraps; ctx.iterate_on = 3000; @@ -175,7 +208,16 @@ static int run_stress_test(int fd, pthread_t *threads, pthread_barrier_t *barrie ret = run_test_threads(threads, barrier); TEST_ASSERT_EQUAL("disable failed", ioctl(fd, PERF_EVENT_IOC_DISABLE, 0), 0); - TEST_ASSERT_EQUAL("unexpected sigtraps", ctx.signal_count, NUM_THREADS * ctx.iterate_on); + expected_sigtraps = NUM_THREADS * ctx.iterate_on; + + if (ctx.signal_count < expected_sigtraps && kernel_with_sleepable_spinlocks()) { + pr_debug("Expected %d sigtraps, got %d, running on a kernel with sleepable spinlocks.\n", + expected_sigtraps, ctx.signal_count); + pr_debug("See https://lore.kernel.org/all/e368f2c848d77fbc8d259f44e2055fe469c219cf.camel@gmx.de/\n"); + return TEST_SKIP; + } else + TEST_ASSERT_EQUAL("unexpected sigtraps", ctx.signal_count, expected_sigtraps); + TEST_ASSERT_EQUAL("missing signals or incorrectly delivered", ctx.tids_want_signal, 0); TEST_ASSERT_VAL("unexpected si_addr", ctx.first_siginfo.si_addr == &ctx.iterate_on); #if 0 /* FIXME: enable when libc's signal.h has si_perf_{type,data} */ -- cgit v1.2.3 From 72a2a0a494ec9aefbca4ad64f46b8e3370809993 Mon Sep 17 00:00:00 2001 From: Likhitha Korrapati Date: Sun, 26 Nov 2023 02:09:14 -0500 Subject: perf test record+probe_libc_inet_pton: Fix call chain match on powerpc The perf test "probe libc's inet_pton & backtrace it with ping" fails on powerpc as below: # perf test -v "probe libc's inet_pton & backtrace it with ping" 85: probe libc's inet_pton & backtrace it with ping : --- start --- test child forked, pid 96028 ping 96056 [002] 127271.101961: probe_libc:inet_pton: (7fffa1779a60) 7fffa1779a60 __GI___inet_pton+0x0 (/usr/lib64/glibc-hwcaps/power10/libc.so.6) 7fffa172a73c getaddrinfo+0x121c (/usr/lib64/glibc-hwcaps/power10/libc.so.6) FAIL: expected backtrace entry "gaih_inet.*\+0x[[:xdigit:]]+[[:space:]]\(/usr/lib64/glibc-hwcaps/power10/libc.so.6\)$" got "7fffa172a73c getaddrinfo+0x121c (/usr/lib64/glibc-hwcaps/power10/libc.so.6)" test child finished with -1 ---- end ---- probe libc's inet_pton & backtrace it with ping: FAILED! This test installs a probe on libc's inet_pton function, which will use uprobes and then uses perf trace on a ping to localhost. It gets 3 levels deep backtrace and checks whether it is what we expected or not. The test started failing from RHEL 9.4 where as it works in previous distro version (RHEL 9.2). Test expects gaih_inet function to be part of backtrace. But in the glibc version (2.34-86) which is part of distro where it fails, this function is missing and hence the test is failing. From nm and ping command output we can confirm that gaih_inet function is not present in the expected backtrace for glibc version glibc-2.34-86 [root@xxx perf]# nm /usr/lib64/glibc-hwcaps/power10/libc.so.6 | grep gaih_inet 00000000001273e0 t gaih_inet_serv 00000000001cd8d8 r gaih_inet_typeproto [root@xxx perf]# perf script -i /tmp/perf.data.6E8 ping 104048 [000] 128582.508976: probe_libc:inet_pton: (7fff83779a60) 7fff83779a60 __GI___inet_pton+0x0 (/usr/lib64/glibc-hwcaps/power10/libc.so.6) 7fff8372a73c getaddrinfo+0x121c (/usr/lib64/glibc-hwcaps/power10/libc.so.6) 11dc73534 [unknown] (/usr/bin/ping) 7fff8362a8c4 __libc_start_call_main+0x84 (/usr/lib64/glibc-hwcaps/power10/libc.so.6) FAIL: expected backtrace entry "gaih_inet.*\+0x[[:xdigit:]]+[[:space:]]\(/usr/lib64/glibc-hwcaps/power10/libc.so.6\)$" got "7fff9d52a73c getaddrinfo+0x121c (/usr/lib64/glibc-hwcaps/power10/libc.so.6)" With version glibc-2.34-60 gaih_inet function is present as part of the expected backtrace. So we cannot just remove the gaih_inet function from the backtrace. [root@xxx perf]# nm /usr/lib64/glibc-hwcaps/power10/libc.so.6 | grep gaih_inet 0000000000130490 t gaih_inet.constprop.0 000000000012e830 t gaih_inet_serv 00000000001d45e4 r gaih_inet_typeproto [root@xxx perf]# ./perf script -i /tmp/perf.data.b6S ping 67906 [000] 22699.591699: probe_libc:inet_pton_3: (7fffbdd80820) 7fffbdd80820 __GI___inet_pton+0x0 (/usr/lib64/glibc-hwcaps/power10/libc.so.6) 7fffbdd31160 gaih_inet.constprop.0+0xcd0 (/usr/lib64/glibc-hwcaps/power10/libc.so.6) 7fffbdd31c7c getaddrinfo+0x14c (/usr/lib64/glibc-hwcaps/power10/libc.so.6) 1140d3558 [unknown] (/usr/bin/ping) This patch solves this issue by doing a conditional skip. If there is a gaih_inet function present in the libc then it will be added to the expected backtrace else the function will be skipped from being added to the expected backtrace. Output with the patch [root@xxx perf]# ./perf test -v "probe libc's inet_pton & backtrace it with ping" 83: probe libc's inet_pton & backtrace it with ping : --- start --- test child forked, pid 102662 ping 102692 [000] 127935.549973: probe_libc:inet_pton: (7fff93379a60) 7fff93379a60 __GI___inet_pton+0x0 (/usr/lib64/glibc-hwcaps/power10/libc.so.6) 7fff9332a73c getaddrinfo+0x121c (/usr/lib64/glibc-hwcaps/power10/libc.so.6) 11ef03534 [unknown] (/usr/bin/ping) test child finished with 0 ---- end ---- probe libc's inet_pton & backtrace it with ping: Ok Reported-by: Disha Goel Reviewed-by: Athira Jajeev Reviewed-by: Ian Rogers Signed-off-by: Likhitha Korrapati Tested-by: Disha Goel Cc: Adrian Hunter Cc: Disha Goel Cc: James Clark Cc: Jiri Olsa Cc: Kajol Jain Cc: Madhavan Srinivasan Cc: Namhyung Kim Cc: linuxppc-dev@lists.ozlabs.org Link: https://lore.kernel.org/r/20231126070914.175332-1-likhitha@linux.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/shell/record+probe_libc_inet_pton.sh | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/tests/shell/record+probe_libc_inet_pton.sh b/tools/perf/tests/shell/record+probe_libc_inet_pton.sh index eebeea6bdc76..72c65570db37 100755 --- a/tools/perf/tests/shell/record+probe_libc_inet_pton.sh +++ b/tools/perf/tests/shell/record+probe_libc_inet_pton.sh @@ -45,7 +45,10 @@ trace_libc_inet_pton_backtrace() { ;; ppc64|ppc64le) eventattr='max-stack=4' - echo "gaih_inet.*\+0x[[:xdigit:]]+[[:space:]]\($libc\)$" >> $expected + # Add gaih_inet to expected backtrace only if it is part of libc. + if nm $libc | grep -F -q gaih_inet.; then + echo "gaih_inet.*\+0x[[:xdigit:]]+[[:space:]]\($libc\)$" >> $expected + fi echo "getaddrinfo\+0x[[:xdigit:]]+[[:space:]]\($libc\)$" >> $expected echo ".*(\+0x[[:xdigit:]]+|\[unknown\])[[:space:]]\(.*/bin/ping.*\)$" >> $expected ;; -- cgit v1.2.3 From fc6543bb55d4077c44e577c321bbf158446c8000 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Sat, 28 Oct 2023 20:34:53 +0100 Subject: KVM: selftests: add -MP to CFLAGS Using -MD without -MP causes build failures when a header file is deleted or moved. With -MP, the compiler will emit phony targets for the header files it lists as dependencies, and the Makefiles won't refuse to attempt to rebuild a C unit which no longer includes the deleted header. Signed-off-by: David Woodhouse Link: https://lore.kernel.org/r/9fc8b5395321abbfcaf5d78477a9a7cd350b08e4.camel@infradead.org Signed-off-by: Sean Christopherson --- tools/testing/selftests/kvm/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index 69ce8e06b3a3..914987220a62 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -226,7 +226,7 @@ else LINUX_TOOL_ARCH_INCLUDE = $(top_srcdir)/tools/arch/$(ARCH)/include endif CFLAGS += -Wall -Wstrict-prototypes -Wuninitialized -O2 -g -std=gnu99 \ - -Wno-gnu-variable-sized-type-not-at-end -MD\ + -Wno-gnu-variable-sized-type-not-at-end -MD -MP \ -fno-builtin-memcmp -fno-builtin-memcpy -fno-builtin-memset \ -fno-builtin-strnlen \ -fno-stack-protector -fno-PIE -I$(LINUX_TOOL_INCLUDE) \ -- cgit v1.2.3 From 6542a00369284c951185be8fa2ed45cf423cce06 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 7 Nov 2023 17:09:52 -0800 Subject: KVM: selftests: Drop the single-underscore ioctl() helpers Drop _kvm_ioctl(), _vm_ioctl(), and _vcpu_ioctl(), as they are no longer used by anything other than the no-underscores variants (and may have never been used directly). The single-underscore variants were never intended to be a "feature", they were a stopgap of sorts to ease the conversion to pretty printing ioctl() names when reporting errors. Opportunistically add a comment explaining when to use __KVM_IOCTL_ERROR() versus KVM_IOCTL_ERROR(). The single-underscore macros were subtly ensuring that the name of the ioctl() was printed on error, i.e. it's all too easy to overlook the fact that using __KVM_IOCTL_ERROR() is intentional. Link: https://lore.kernel.org/r/20231108010953.560824-2-seanjc@google.com Signed-off-by: Sean Christopherson --- .../testing/selftests/kvm/include/kvm_util_base.h | 30 ++++++++++------------ 1 file changed, 13 insertions(+), 17 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/kvm/include/kvm_util_base.h b/tools/testing/selftests/kvm/include/kvm_util_base.h index 1b58f943562f..190f9c0ef584 100644 --- a/tools/testing/selftests/kvm/include/kvm_util_base.h +++ b/tools/testing/selftests/kvm/include/kvm_util_base.h @@ -267,6 +267,13 @@ static inline bool kvm_has_cap(long cap) #define __KVM_SYSCALL_ERROR(_name, _ret) \ "%s failed, rc: %i errno: %i (%s)", (_name), (_ret), errno, strerror(errno) +/* + * Use the "inner", double-underscore macro when reporting errors from within + * other macros so that the name of ioctl() and not its literal numeric value + * is printed on error. The "outer" macro is strongly preferred when reporting + * errors "directly", i.e. without an additional layer of macros, as it reduces + * the probability of passing in the wrong string. + */ #define __KVM_IOCTL_ERROR(_name, _ret) __KVM_SYSCALL_ERROR(_name, _ret) #define KVM_IOCTL_ERROR(_ioctl, _ret) __KVM_IOCTL_ERROR(#_ioctl, _ret) @@ -279,17 +286,13 @@ static inline bool kvm_has_cap(long cap) #define __kvm_ioctl(kvm_fd, cmd, arg) \ kvm_do_ioctl(kvm_fd, cmd, arg) - -#define _kvm_ioctl(kvm_fd, cmd, name, arg) \ +#define kvm_ioctl(kvm_fd, cmd, arg) \ ({ \ int ret = __kvm_ioctl(kvm_fd, cmd, arg); \ \ - TEST_ASSERT(!ret, __KVM_IOCTL_ERROR(name, ret)); \ + TEST_ASSERT(!ret, __KVM_IOCTL_ERROR(#cmd, ret)); \ }) -#define kvm_ioctl(kvm_fd, cmd, arg) \ - _kvm_ioctl(kvm_fd, cmd, #cmd, arg) - static __always_inline void static_assert_is_vm(struct kvm_vm *vm) { } #define __vm_ioctl(vm, cmd, arg) \ @@ -298,17 +301,13 @@ static __always_inline void static_assert_is_vm(struct kvm_vm *vm) { } kvm_do_ioctl((vm)->fd, cmd, arg); \ }) -#define _vm_ioctl(vm, cmd, name, arg) \ +#define vm_ioctl(vm, cmd, arg) \ ({ \ int ret = __vm_ioctl(vm, cmd, arg); \ \ - TEST_ASSERT(!ret, __KVM_IOCTL_ERROR(name, ret)); \ + TEST_ASSERT(!ret, __KVM_IOCTL_ERROR(#cmd, ret)); \ }) -#define vm_ioctl(vm, cmd, arg) \ - _vm_ioctl(vm, cmd, #cmd, arg) - - static __always_inline void static_assert_is_vcpu(struct kvm_vcpu *vcpu) { } #define __vcpu_ioctl(vcpu, cmd, arg) \ @@ -317,16 +316,13 @@ static __always_inline void static_assert_is_vcpu(struct kvm_vcpu *vcpu) { } kvm_do_ioctl((vcpu)->fd, cmd, arg); \ }) -#define _vcpu_ioctl(vcpu, cmd, name, arg) \ +#define vcpu_ioctl(vcpu, cmd, arg) \ ({ \ int ret = __vcpu_ioctl(vcpu, cmd, arg); \ \ - TEST_ASSERT(!ret, __KVM_IOCTL_ERROR(name, ret)); \ + TEST_ASSERT(!ret, __KVM_IOCTL_ERROR(#cmd, ret)); \ }) -#define vcpu_ioctl(vcpu, cmd, arg) \ - _vcpu_ioctl(vcpu, cmd, #cmd, arg) - /* * Looks up and returns the value corresponding to the capability * (KVM_CAP_*) given by cap. -- cgit v1.2.3 From 1b78d474ce4ecbf15a4a8b08994b8ed8ceec0dab Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 7 Nov 2023 17:09:53 -0800 Subject: KVM: selftests: Add logic to detect if ioctl() failed because VM was killed Add yet another macro to the VM/vCPU ioctl() framework to detect when an ioctl() failed because KVM killed/bugged the VM, i.e. when there was nothing wrong with the ioctl() itself. If KVM kills a VM, e.g. by way of a failed KVM_BUG_ON(), all subsequent VM and vCPU ioctl()s will fail with -EIO, which can be quite misleading and ultimately waste user/developer time. Use KVM_CHECK_EXTENSION on KVM_CAP_USER_MEMORY to detect if the VM is dead and/or bug, as KVM doesn't provide a dedicated ioctl(). Using a heuristic is obviously less than ideal, but practically speaking the logic is bulletproof barring a KVM change, and any such change would arguably break userspace, e.g. if KVM returns something other than -EIO. Without the detection, tearing down a bugged VM yields a cryptic failure when deleting memslots: ==== Test Assertion Failure ==== lib/kvm_util.c:689: !ret pid=45131 tid=45131 errno=5 - Input/output error 1 0x00000000004036c3: __vm_mem_region_delete at kvm_util.c:689 2 0x00000000004042f0: kvm_vm_free at kvm_util.c:724 (discriminator 12) 3 0x0000000000402929: race_sync_regs at sync_regs_test.c:193 4 0x0000000000401cab: main at sync_regs_test.c:334 (discriminator 6) 5 0x0000000000416f13: __libc_start_call_main at libc-start.o:? 6 0x000000000041855f: __libc_start_main_impl at ??:? 7 0x0000000000401d40: _start at ??:? KVM_SET_USER_MEMORY_REGION failed, rc: -1 errno: 5 (Input/output error) Which morphs into a more pointed error message with the detection: ==== Test Assertion Failure ==== lib/kvm_util.c:689: false pid=80347 tid=80347 errno=5 - Input/output error 1 0x00000000004039ab: __vm_mem_region_delete at kvm_util.c:689 (discriminator 5) 2 0x0000000000404660: kvm_vm_free at kvm_util.c:724 (discriminator 12) 3 0x0000000000402ac9: race_sync_regs at sync_regs_test.c:193 4 0x0000000000401cb7: main at sync_regs_test.c:334 (discriminator 6) 5 0x0000000000418263: __libc_start_call_main at libc-start.o:? 6 0x00000000004198af: __libc_start_main_impl at ??:? 7 0x0000000000401d90: _start at ??:? KVM killed/bugged the VM, check the kernel log for clues Suggested-by: Michal Luczaj Cc: Oliver Upton Cc: Colton Lewis Link: https://lore.kernel.org/r/20231108010953.560824-3-seanjc@google.com Signed-off-by: Sean Christopherson --- .../testing/selftests/kvm/include/kvm_util_base.h | 39 +++++++++++++++++++--- tools/testing/selftests/kvm/lib/kvm_util.c | 2 +- 2 files changed, 35 insertions(+), 6 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/kvm/include/kvm_util_base.h b/tools/testing/selftests/kvm/include/kvm_util_base.h index 190f9c0ef584..e0da036a13ae 100644 --- a/tools/testing/selftests/kvm/include/kvm_util_base.h +++ b/tools/testing/selftests/kvm/include/kvm_util_base.h @@ -301,11 +301,40 @@ static __always_inline void static_assert_is_vm(struct kvm_vm *vm) { } kvm_do_ioctl((vm)->fd, cmd, arg); \ }) +/* + * Assert that a VM or vCPU ioctl() succeeded, with extra magic to detect if + * the ioctl() failed because KVM killed/bugged the VM. To detect a dead VM, + * probe KVM_CAP_USER_MEMORY, which (a) has been supported by KVM since before + * selftests existed and (b) should never outright fail, i.e. is supposed to + * return 0 or 1. If KVM kills a VM, KVM returns -EIO for all ioctl()s for the + * VM and its vCPUs, including KVM_CHECK_EXTENSION. + */ +#define __TEST_ASSERT_VM_VCPU_IOCTL(cond, name, ret, vm) \ +do { \ + int __errno = errno; \ + \ + static_assert_is_vm(vm); \ + \ + if (cond) \ + break; \ + \ + if (errno == EIO && \ + __vm_ioctl(vm, KVM_CHECK_EXTENSION, (void *)KVM_CAP_USER_MEMORY) < 0) { \ + TEST_ASSERT(errno == EIO, "KVM killed the VM, should return -EIO"); \ + TEST_FAIL("KVM killed/bugged the VM, check the kernel log for clues"); \ + } \ + errno = __errno; \ + TEST_ASSERT(cond, __KVM_IOCTL_ERROR(name, ret)); \ +} while (0) + +#define TEST_ASSERT_VM_VCPU_IOCTL(cond, cmd, ret, vm) \ + __TEST_ASSERT_VM_VCPU_IOCTL(cond, #cmd, ret, vm) + #define vm_ioctl(vm, cmd, arg) \ ({ \ int ret = __vm_ioctl(vm, cmd, arg); \ \ - TEST_ASSERT(!ret, __KVM_IOCTL_ERROR(#cmd, ret)); \ + __TEST_ASSERT_VM_VCPU_IOCTL(!ret, #cmd, ret, vm); \ }) static __always_inline void static_assert_is_vcpu(struct kvm_vcpu *vcpu) { } @@ -320,7 +349,7 @@ static __always_inline void static_assert_is_vcpu(struct kvm_vcpu *vcpu) { } ({ \ int ret = __vcpu_ioctl(vcpu, cmd, arg); \ \ - TEST_ASSERT(!ret, __KVM_IOCTL_ERROR(#cmd, ret)); \ + __TEST_ASSERT_VM_VCPU_IOCTL(!ret, #cmd, ret, (vcpu)->vm); \ }) /* @@ -331,7 +360,7 @@ static inline int vm_check_cap(struct kvm_vm *vm, long cap) { int ret = __vm_ioctl(vm, KVM_CHECK_EXTENSION, (void *)cap); - TEST_ASSERT(ret >= 0, KVM_IOCTL_ERROR(KVM_CHECK_EXTENSION, ret)); + TEST_ASSERT_VM_VCPU_IOCTL(ret >= 0, KVM_CHECK_EXTENSION, ret, vm); return ret; } @@ -438,7 +467,7 @@ static inline int vm_get_stats_fd(struct kvm_vm *vm) { int fd = __vm_ioctl(vm, KVM_GET_STATS_FD, NULL); - TEST_ASSERT(fd >= 0, KVM_IOCTL_ERROR(KVM_GET_STATS_FD, fd)); + TEST_ASSERT_VM_VCPU_IOCTL(fd >= 0, KVM_GET_STATS_FD, fd, vm); return fd; } @@ -680,7 +709,7 @@ static inline int vcpu_get_stats_fd(struct kvm_vcpu *vcpu) { int fd = __vcpu_ioctl(vcpu, KVM_GET_STATS_FD, NULL); - TEST_ASSERT(fd >= 0, KVM_IOCTL_ERROR(KVM_GET_STATS_FD, fd)); + TEST_ASSERT_VM_VCPU_IOCTL(fd >= 0, KVM_CHECK_EXTENSION, fd, vcpu->vm); return fd; } diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index 9b29cbf49476..17a978b8a2c4 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -1271,7 +1271,7 @@ struct kvm_vcpu *__vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id) vcpu->vm = vm; vcpu->id = vcpu_id; vcpu->fd = __vm_ioctl(vm, KVM_CREATE_VCPU, (void *)(unsigned long)vcpu_id); - TEST_ASSERT(vcpu->fd >= 0, KVM_IOCTL_ERROR(KVM_CREATE_VCPU, vcpu->fd)); + TEST_ASSERT_VM_VCPU_IOCTL(vcpu->fd >= 0, KVM_CREATE_VCPU, vcpu->fd, vm); TEST_ASSERT(vcpu_mmap_sz() >= sizeof(*vcpu->run), "vcpu mmap size " "smaller than expected, vcpu_mmap_sz: %i expected_min: %zi", -- cgit v1.2.3 From e29f5d0c3c7c055b36ef55f9e92100450b2506a3 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 15 Aug 2023 15:00:30 -0700 Subject: KVM: selftests: Remove x86's so called "MMIO warning" test Remove x86's mmio_warning_test, as it is unnecessarily complex (there's no reason to fork, spawn threads, initialize srand(), etc..), unnecessarily restrictive (triggering triple fault is not unique to Intel CPUs without unrestricted guest), and provides no meaningful coverage beyond what basic fuzzing can achieve (running a vCPU with garbage is fuzzing's bread and butter). That the test has *all* of the above flaws is not coincidental, as the code was copy+pasted almost verbatim from the syzkaller reproducer that originally found the KVM bug (which has long since been fixed). Cc: Michal Luczaj Link: https://groups.google.com/g/syzkaller/c/lHfau8E3SOE Link: https://lore.kernel.org/r/20230815220030.560372-1-seanjc@google.com Signed-off-by: Sean Christopherson --- tools/testing/selftests/kvm/Makefile | 1 - .../selftests/kvm/x86_64/mmio_warning_test.c | 121 --------------------- 2 files changed, 122 deletions(-) delete mode 100644 tools/testing/selftests/kvm/x86_64/mmio_warning_test.c (limited to 'tools') diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index 914987220a62..4412b42d95de 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -86,7 +86,6 @@ TEST_GEN_PROGS_x86_64 += x86_64/hyperv_svm_test TEST_GEN_PROGS_x86_64 += x86_64/hyperv_tlb_flush TEST_GEN_PROGS_x86_64 += x86_64/kvm_clock_test TEST_GEN_PROGS_x86_64 += x86_64/kvm_pv_test -TEST_GEN_PROGS_x86_64 += x86_64/mmio_warning_test TEST_GEN_PROGS_x86_64 += x86_64/monitor_mwait_test TEST_GEN_PROGS_x86_64 += x86_64/nested_exceptions_test TEST_GEN_PROGS_x86_64 += x86_64/platform_info_test diff --git a/tools/testing/selftests/kvm/x86_64/mmio_warning_test.c b/tools/testing/selftests/kvm/x86_64/mmio_warning_test.c deleted file mode 100644 index ce1ccc4c1503..000000000000 --- a/tools/testing/selftests/kvm/x86_64/mmio_warning_test.c +++ /dev/null @@ -1,121 +0,0 @@ -/* - * mmio_warning_test - * - * Copyright (C) 2019, Google LLC. - * - * This work is licensed under the terms of the GNU GPL, version 2. - * - * Test that we don't get a kernel warning when we call KVM_RUN after a - * triple fault occurs. To get the triple fault to occur we call KVM_RUN - * on a VCPU that hasn't been properly setup. - * - */ - -#define _GNU_SOURCE -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#define NTHREAD 4 -#define NPROCESS 5 - -struct thread_context { - int kvmcpu; - struct kvm_run *run; -}; - -void *thr(void *arg) -{ - struct thread_context *tc = (struct thread_context *)arg; - int res; - int kvmcpu = tc->kvmcpu; - struct kvm_run *run = tc->run; - - res = ioctl(kvmcpu, KVM_RUN, 0); - pr_info("ret1=%d exit_reason=%d suberror=%d\n", - res, run->exit_reason, run->internal.suberror); - - return 0; -} - -void test(void) -{ - int i, kvm, kvmvm, kvmcpu; - pthread_t th[NTHREAD]; - struct kvm_run *run; - struct thread_context tc; - - kvm = open("/dev/kvm", O_RDWR); - TEST_ASSERT(kvm != -1, "failed to open /dev/kvm"); - kvmvm = __kvm_ioctl(kvm, KVM_CREATE_VM, NULL); - TEST_ASSERT(kvmvm > 0, KVM_IOCTL_ERROR(KVM_CREATE_VM, kvmvm)); - kvmcpu = ioctl(kvmvm, KVM_CREATE_VCPU, 0); - TEST_ASSERT(kvmcpu != -1, KVM_IOCTL_ERROR(KVM_CREATE_VCPU, kvmcpu)); - run = (struct kvm_run *)mmap(0, 4096, PROT_READ|PROT_WRITE, MAP_SHARED, - kvmcpu, 0); - tc.kvmcpu = kvmcpu; - tc.run = run; - srand(getpid()); - for (i = 0; i < NTHREAD; i++) { - pthread_create(&th[i], NULL, thr, (void *)(uintptr_t)&tc); - usleep(rand() % 10000); - } - for (i = 0; i < NTHREAD; i++) - pthread_join(th[i], NULL); -} - -int get_warnings_count(void) -{ - int warnings; - FILE *f; - - f = popen("dmesg | grep \"WARNING:\" | wc -l", "r"); - if (fscanf(f, "%d", &warnings) < 1) - warnings = 0; - pclose(f); - - return warnings; -} - -int main(void) -{ - int warnings_before, warnings_after; - - TEST_REQUIRE(host_cpu_is_intel); - - TEST_REQUIRE(!vm_is_unrestricted_guest(NULL)); - - warnings_before = get_warnings_count(); - - for (int i = 0; i < NPROCESS; ++i) { - int status; - int pid = fork(); - - if (pid < 0) - exit(1); - if (pid == 0) { - test(); - exit(0); - } - while (waitpid(pid, &status, __WALL) != pid) - ; - } - - warnings_after = get_warnings_count(); - TEST_ASSERT(warnings_before == warnings_after, - "Warnings found in kernel. Run 'dmesg' to inspect them."); - - return 0; -} -- cgit v1.2.3 From 341ac980eab90ac1f6c22ee9f9da83ed9604d899 Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Mon, 27 Nov 2023 11:03:07 -0800 Subject: xsk: Support tx_metadata_len For zerocopy mode, tx_desc->addr can point to an arbitrary offset and carry some TX metadata in the headroom. For copy mode, there is no way currently to populate skb metadata. Introduce new tx_metadata_len umem config option that indicates how many bytes to treat as metadata. Metadata bytes come prior to tx_desc address (same as in RX case). The size of the metadata has mostly the same constraints as XDP: - less than 256 bytes - 8-byte aligned (compared to 4-byte alignment on xdp, due to 8-byte timestamp in the completion) - non-zero This data is not interpreted in any way right now. Reviewed-by: Song Yoong Siang Signed-off-by: Stanislav Fomichev Reviewed-by: Jakub Kicinski Link: https://lore.kernel.org/r/20231127190319.1190813-2-sdf@google.com Signed-off-by: Alexei Starovoitov --- include/net/xdp_sock.h | 1 + include/net/xsk_buff_pool.h | 1 + include/uapi/linux/if_xdp.h | 1 + net/xdp/xdp_umem.c | 4 ++++ net/xdp/xsk.c | 12 +++++++++++- net/xdp/xsk_buff_pool.c | 1 + net/xdp/xsk_queue.h | 17 ++++++++++------- tools/include/uapi/linux/if_xdp.h | 1 + 8 files changed, 30 insertions(+), 8 deletions(-) (limited to 'tools') diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h index f83128007fb0..bcf765124f72 100644 --- a/include/net/xdp_sock.h +++ b/include/net/xdp_sock.h @@ -30,6 +30,7 @@ struct xdp_umem { struct user_struct *user; refcount_t users; u8 flags; + u8 tx_metadata_len; bool zc; struct page **pgs; int id; diff --git a/include/net/xsk_buff_pool.h b/include/net/xsk_buff_pool.h index b0bdff26fc88..1985ffaf9b0c 100644 --- a/include/net/xsk_buff_pool.h +++ b/include/net/xsk_buff_pool.h @@ -77,6 +77,7 @@ struct xsk_buff_pool { u32 chunk_size; u32 chunk_shift; u32 frame_len; + u8 tx_metadata_len; /* inherited from umem */ u8 cached_need_wakeup; bool uses_need_wakeup; bool dma_need_sync; diff --git a/include/uapi/linux/if_xdp.h b/include/uapi/linux/if_xdp.h index 8d48863472b9..2ecf79282c26 100644 --- a/include/uapi/linux/if_xdp.h +++ b/include/uapi/linux/if_xdp.h @@ -76,6 +76,7 @@ struct xdp_umem_reg { __u32 chunk_size; __u32 headroom; __u32 flags; + __u32 tx_metadata_len; }; struct xdp_statistics { diff --git a/net/xdp/xdp_umem.c b/net/xdp/xdp_umem.c index 06cead2b8e34..946a687fb8e8 100644 --- a/net/xdp/xdp_umem.c +++ b/net/xdp/xdp_umem.c @@ -199,6 +199,9 @@ static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr) if (headroom >= chunk_size - XDP_PACKET_HEADROOM) return -EINVAL; + if (mr->tx_metadata_len >= 256 || mr->tx_metadata_len % 8) + return -EINVAL; + umem->size = size; umem->headroom = headroom; umem->chunk_size = chunk_size; @@ -207,6 +210,7 @@ static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr) umem->pgs = NULL; umem->user = NULL; umem->flags = mr->flags; + umem->tx_metadata_len = mr->tx_metadata_len; INIT_LIST_HEAD(&umem->xsk_dma_list); refcount_set(&umem->users, 1); diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index ae9f8cb611f6..c904356e2800 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -1283,6 +1283,14 @@ struct xdp_umem_reg_v1 { __u32 headroom; }; +struct xdp_umem_reg_v2 { + __u64 addr; /* Start of packet data area */ + __u64 len; /* Length of packet data area */ + __u32 chunk_size; + __u32 headroom; + __u32 flags; +}; + static int xsk_setsockopt(struct socket *sock, int level, int optname, sockptr_t optval, unsigned int optlen) { @@ -1326,8 +1334,10 @@ static int xsk_setsockopt(struct socket *sock, int level, int optname, if (optlen < sizeof(struct xdp_umem_reg_v1)) return -EINVAL; - else if (optlen < sizeof(mr)) + else if (optlen < sizeof(struct xdp_umem_reg_v2)) mr_size = sizeof(struct xdp_umem_reg_v1); + else if (optlen < sizeof(mr)) + mr_size = sizeof(struct xdp_umem_reg_v2); if (copy_from_sockptr(&mr, optval, mr_size)) return -EFAULT; diff --git a/net/xdp/xsk_buff_pool.c b/net/xdp/xsk_buff_pool.c index 49cb9f9a09be..386eddcdf837 100644 --- a/net/xdp/xsk_buff_pool.c +++ b/net/xdp/xsk_buff_pool.c @@ -85,6 +85,7 @@ struct xsk_buff_pool *xp_create_and_assign_umem(struct xdp_sock *xs, XDP_PACKET_HEADROOM; pool->umem = umem; pool->addrs = umem->addrs; + pool->tx_metadata_len = umem->tx_metadata_len; INIT_LIST_HEAD(&pool->free_list); INIT_LIST_HEAD(&pool->xskb_list); INIT_LIST_HEAD(&pool->xsk_tx_list); diff --git a/net/xdp/xsk_queue.h b/net/xdp/xsk_queue.h index 13354a1e4280..c74a1372bcb9 100644 --- a/net/xdp/xsk_queue.h +++ b/net/xdp/xsk_queue.h @@ -143,15 +143,17 @@ static inline bool xp_unused_options_set(u32 options) static inline bool xp_aligned_validate_desc(struct xsk_buff_pool *pool, struct xdp_desc *desc) { - u64 offset = desc->addr & (pool->chunk_size - 1); + u64 addr = desc->addr - pool->tx_metadata_len; + u64 len = desc->len + pool->tx_metadata_len; + u64 offset = addr & (pool->chunk_size - 1); if (!desc->len) return false; - if (offset + desc->len > pool->chunk_size) + if (offset + len > pool->chunk_size) return false; - if (desc->addr >= pool->addrs_cnt) + if (addr >= pool->addrs_cnt) return false; if (xp_unused_options_set(desc->options)) @@ -162,16 +164,17 @@ static inline bool xp_aligned_validate_desc(struct xsk_buff_pool *pool, static inline bool xp_unaligned_validate_desc(struct xsk_buff_pool *pool, struct xdp_desc *desc) { - u64 addr = xp_unaligned_add_offset_to_addr(desc->addr); + u64 addr = xp_unaligned_add_offset_to_addr(desc->addr) - pool->tx_metadata_len; + u64 len = desc->len + pool->tx_metadata_len; if (!desc->len) return false; - if (desc->len > pool->chunk_size) + if (len > pool->chunk_size) return false; - if (addr >= pool->addrs_cnt || addr + desc->len > pool->addrs_cnt || - xp_desc_crosses_non_contig_pg(pool, addr, desc->len)) + if (addr >= pool->addrs_cnt || addr + len > pool->addrs_cnt || + xp_desc_crosses_non_contig_pg(pool, addr, len)) return false; if (xp_unused_options_set(desc->options)) diff --git a/tools/include/uapi/linux/if_xdp.h b/tools/include/uapi/linux/if_xdp.h index 73a47da885dc..34411a2e5b6c 100644 --- a/tools/include/uapi/linux/if_xdp.h +++ b/tools/include/uapi/linux/if_xdp.h @@ -76,6 +76,7 @@ struct xdp_umem_reg { __u32 chunk_size; __u32 headroom; __u32 flags; + __u32 tx_metadata_len; }; struct xdp_statistics { -- cgit v1.2.3 From 48eb03dd26304c24f03bdbb9382e89c8564e71df Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Mon, 27 Nov 2023 11:03:08 -0800 Subject: xsk: Add TX timestamp and TX checksum offload support This change actually defines the (initial) metadata layout that should be used by AF_XDP userspace (xsk_tx_metadata). The first field is flags which requests appropriate offloads, followed by the offload-specific fields. The supported per-device offloads are exported via netlink (new xsk-flags). The offloads themselves are still implemented in a bit of a framework-y fashion that's left from my initial kfunc attempt. I'm introducing new xsk_tx_metadata_ops which drivers are supposed to implement. The drivers are also supposed to call xsk_tx_metadata_request/xsk_tx_metadata_complete in the right places. Since xsk_tx_metadata_{request,_complete} are static inline, we don't incur any extra overhead doing indirect calls. The benefit of this scheme is as follows: - keeps all metadata layout parsing away from driver code - makes it easy to grep and see which drivers implement what - don't need any extra flags to maintain to keep track of what offloads are implemented; if the callback is implemented - the offload is supported (used by netlink reporting code) Two offloads are defined right now: 1. XDP_TXMD_FLAGS_CHECKSUM: skb-style csum_start+csum_offset 2. XDP_TXMD_FLAGS_TIMESTAMP: writes TX timestamp back into metadata area upon completion (tx_timestamp field) XDP_TXMD_FLAGS_TIMESTAMP is also implemented for XDP_COPY mode: it writes SW timestamp from the skb destructor (note I'm reusing hwtstamps to pass metadata pointer). The struct is forward-compatible and can be extended in the future by appending more fields. Reviewed-by: Song Yoong Siang Signed-off-by: Stanislav Fomichev Acked-by: Jakub Kicinski Link: https://lore.kernel.org/r/20231127190319.1190813-3-sdf@google.com Signed-off-by: Alexei Starovoitov --- Documentation/netlink/specs/netdev.yaml | 19 +++++- include/linux/netdevice.h | 2 + include/linux/skbuff.h | 14 +++- include/net/xdp_sock.h | 110 ++++++++++++++++++++++++++++++++ include/net/xdp_sock_drv.h | 13 ++++ include/net/xsk_buff_pool.h | 6 ++ include/uapi/linux/if_xdp.h | 38 +++++++++++ include/uapi/linux/netdev.h | 16 +++++ net/core/netdev-genl.c | 13 +++- net/xdp/xsk.c | 34 ++++++++++ net/xdp/xsk_queue.h | 2 +- tools/include/uapi/linux/if_xdp.h | 52 +++++++++++++-- tools/include/uapi/linux/netdev.h | 16 +++++ tools/net/ynl/generated/netdev-user.c | 19 ++++++ tools/net/ynl/generated/netdev-user.h | 3 + 15 files changed, 348 insertions(+), 9 deletions(-) (limited to 'tools') diff --git a/Documentation/netlink/specs/netdev.yaml b/Documentation/netlink/specs/netdev.yaml index 14511b13f305..00439bcbd2e3 100644 --- a/Documentation/netlink/specs/netdev.yaml +++ b/Documentation/netlink/specs/netdev.yaml @@ -45,7 +45,6 @@ definitions: - type: flags name: xdp-rx-metadata - render-max: true entries: - name: timestamp @@ -55,6 +54,18 @@ definitions: name: hash doc: Device is capable of exposing receive packet hash via bpf_xdp_metadata_rx_hash(). + - + type: flags + name: xsk-flags + entries: + - + name: tx-timestamp + doc: + HW timestamping egress packets is supported by the driver. + - + name: tx-checksum + doc: + L3 checksum HW offload is supported by the driver. attribute-sets: - @@ -86,6 +97,11 @@ attribute-sets: See Documentation/networking/xdp-rx-metadata.rst for more details. type: u64 enum: xdp-rx-metadata + - + name: xsk-features + doc: Bitmask of enabled AF_XDP features. + type: u64 + enum: xsk-flags operations: list: @@ -103,6 +119,7 @@ operations: - xdp-features - xdp-zc-max-segs - xdp-rx-metadata-features + - xsk-features dump: reply: *dev-all - diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index e87caa81f70c..08da8b28c816 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1865,6 +1865,7 @@ enum netdev_stat_type { * @netdev_ops: Includes several pointers to callbacks, * if one wants to override the ndo_*() functions * @xdp_metadata_ops: Includes pointers to XDP metadata callbacks. + * @xsk_tx_metadata_ops: Includes pointers to AF_XDP TX metadata callbacks. * @ethtool_ops: Management operations * @l3mdev_ops: Layer 3 master device operations * @ndisc_ops: Includes callbacks for different IPv6 neighbour @@ -2128,6 +2129,7 @@ struct net_device { unsigned long long priv_flags; const struct net_device_ops *netdev_ops; const struct xdp_metadata_ops *xdp_metadata_ops; + const struct xsk_tx_metadata_ops *xsk_tx_metadata_ops; int ifindex; unsigned short gflags; unsigned short hard_header_len; diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 27998f73183e..b370eb8d70f7 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -566,6 +566,15 @@ struct ubuf_info_msgzc { int mm_account_pinned_pages(struct mmpin *mmp, size_t size); void mm_unaccount_pinned_pages(struct mmpin *mmp); +/* Preserve some data across TX submission and completion. + * + * Note, this state is stored in the driver. Extending the layout + * might need some special care. + */ +struct xsk_tx_metadata_compl { + __u64 *tx_timestamp; +}; + /* This data is invariant across clones and lives at * the end of the header data, ie. at skb->end. */ @@ -578,7 +587,10 @@ struct skb_shared_info { /* Warning: this field is not always filled in (UFO)! */ unsigned short gso_segs; struct sk_buff *frag_list; - struct skb_shared_hwtstamps hwtstamps; + union { + struct skb_shared_hwtstamps hwtstamps; + struct xsk_tx_metadata_compl xsk_meta; + }; unsigned int gso_type; u32 tskey; diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h index bcf765124f72..3cb4dc9bd70e 100644 --- a/include/net/xdp_sock.h +++ b/include/net/xdp_sock.h @@ -93,12 +93,105 @@ struct xdp_sock { struct xsk_queue *cq_tmp; /* Only as tmp storage before bind */ }; +/* + * AF_XDP TX metadata hooks for network devices. + * The following hooks can be defined; unless noted otherwise, they are + * optional and can be filled with a null pointer. + * + * void (*tmo_request_timestamp)(void *priv) + * Called when AF_XDP frame requested egress timestamp. + * + * u64 (*tmo_fill_timestamp)(void *priv) + * Called when AF_XDP frame, that had requested egress timestamp, + * received a completion. The hook needs to return the actual HW timestamp. + * + * void (*tmo_request_checksum)(u16 csum_start, u16 csum_offset, void *priv) + * Called when AF_XDP frame requested HW checksum offload. csum_start + * indicates position where checksumming should start. + * csum_offset indicates position where checksum should be stored. + * + */ +struct xsk_tx_metadata_ops { + void (*tmo_request_timestamp)(void *priv); + u64 (*tmo_fill_timestamp)(void *priv); + void (*tmo_request_checksum)(u16 csum_start, u16 csum_offset, void *priv); +}; + #ifdef CONFIG_XDP_SOCKETS int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp); int __xsk_map_redirect(struct xdp_sock *xs, struct xdp_buff *xdp); void __xsk_map_flush(void); +/** + * xsk_tx_metadata_to_compl - Save enough relevant metadata information + * to perform tx completion in the future. + * @meta: pointer to AF_XDP metadata area + * @compl: pointer to output struct xsk_tx_metadata_to_compl + * + * This function should be called by the networking device when + * it prepares AF_XDP egress packet. The value of @compl should be stored + * and passed to xsk_tx_metadata_complete upon TX completion. + */ +static inline void xsk_tx_metadata_to_compl(struct xsk_tx_metadata *meta, + struct xsk_tx_metadata_compl *compl) +{ + if (!meta) + return; + + if (meta->flags & XDP_TXMD_FLAGS_TIMESTAMP) + compl->tx_timestamp = &meta->completion.tx_timestamp; + else + compl->tx_timestamp = NULL; +} + +/** + * xsk_tx_metadata_request - Evaluate AF_XDP TX metadata at submission + * and call appropriate xsk_tx_metadata_ops operation. + * @meta: pointer to AF_XDP metadata area + * @ops: pointer to struct xsk_tx_metadata_ops + * @priv: pointer to driver-private aread + * + * This function should be called by the networking device when + * it prepares AF_XDP egress packet. + */ +static inline void xsk_tx_metadata_request(const struct xsk_tx_metadata *meta, + const struct xsk_tx_metadata_ops *ops, + void *priv) +{ + if (!meta) + return; + + if (ops->tmo_request_timestamp) + if (meta->flags & XDP_TXMD_FLAGS_TIMESTAMP) + ops->tmo_request_timestamp(priv); + + if (ops->tmo_request_checksum) + if (meta->flags & XDP_TXMD_FLAGS_CHECKSUM) + ops->tmo_request_checksum(meta->request.csum_start, + meta->request.csum_offset, priv); +} + +/** + * xsk_tx_metadata_complete - Evaluate AF_XDP TX metadata at completion + * and call appropriate xsk_tx_metadata_ops operation. + * @compl: pointer to completion metadata produced from xsk_tx_metadata_to_compl + * @ops: pointer to struct xsk_tx_metadata_ops + * @priv: pointer to driver-private aread + * + * This function should be called by the networking device upon + * AF_XDP egress completion. + */ +static inline void xsk_tx_metadata_complete(struct xsk_tx_metadata_compl *compl, + const struct xsk_tx_metadata_ops *ops, + void *priv) +{ + if (!compl) + return; + + *compl->tx_timestamp = ops->tmo_fill_timestamp(priv); +} + #else static inline int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp) @@ -115,6 +208,23 @@ static inline void __xsk_map_flush(void) { } +static inline void xsk_tx_metadata_to_compl(struct xsk_tx_metadata *meta, + struct xsk_tx_metadata_compl *compl) +{ +} + +static inline void xsk_tx_metadata_request(struct xsk_tx_metadata *meta, + const struct xsk_tx_metadata_ops *ops, + void *priv) +{ +} + +static inline void xsk_tx_metadata_complete(struct xsk_tx_metadata_compl *compl, + const struct xsk_tx_metadata_ops *ops, + void *priv) +{ +} + #endif /* CONFIG_XDP_SOCKETS */ #if defined(CONFIG_XDP_SOCKETS) && defined(CONFIG_DEBUG_NET) diff --git a/include/net/xdp_sock_drv.h b/include/net/xdp_sock_drv.h index 1f6fc8c7a84c..e2558ac3e195 100644 --- a/include/net/xdp_sock_drv.h +++ b/include/net/xdp_sock_drv.h @@ -165,6 +165,14 @@ static inline void *xsk_buff_raw_get_data(struct xsk_buff_pool *pool, u64 addr) return xp_raw_get_data(pool, addr); } +static inline struct xsk_tx_metadata *xsk_buff_get_metadata(struct xsk_buff_pool *pool, u64 addr) +{ + if (!pool->tx_metadata_len) + return NULL; + + return xp_raw_get_data(pool, addr) - pool->tx_metadata_len; +} + static inline void xsk_buff_dma_sync_for_cpu(struct xdp_buff *xdp, struct xsk_buff_pool *pool) { struct xdp_buff_xsk *xskb = container_of(xdp, struct xdp_buff_xsk, xdp); @@ -324,6 +332,11 @@ static inline void *xsk_buff_raw_get_data(struct xsk_buff_pool *pool, u64 addr) return NULL; } +static inline struct xsk_tx_metadata *xsk_buff_get_metadata(struct xsk_buff_pool *pool, u64 addr) +{ + return NULL; +} + static inline void xsk_buff_dma_sync_for_cpu(struct xdp_buff *xdp, struct xsk_buff_pool *pool) { } diff --git a/include/net/xsk_buff_pool.h b/include/net/xsk_buff_pool.h index 1985ffaf9b0c..97f5cc10d79e 100644 --- a/include/net/xsk_buff_pool.h +++ b/include/net/xsk_buff_pool.h @@ -33,6 +33,7 @@ struct xdp_buff_xsk { }; #define XSK_CHECK_PRIV_TYPE(t) BUILD_BUG_ON(sizeof(t) > offsetofend(struct xdp_buff_xsk, cb)) +#define XSK_TX_COMPL_FITS(t) BUILD_BUG_ON(sizeof(struct xsk_tx_metadata_compl) > sizeof(t)) struct xsk_dma_map { dma_addr_t *dma_pages; @@ -234,4 +235,9 @@ static inline u64 xp_get_handle(struct xdp_buff_xsk *xskb) return xskb->orig_addr + (offset << XSK_UNALIGNED_BUF_OFFSET_SHIFT); } +static inline bool xp_tx_metadata_enabled(const struct xsk_buff_pool *pool) +{ + return pool->tx_metadata_len > 0; +} + #endif /* XSK_BUFF_POOL_H_ */ diff --git a/include/uapi/linux/if_xdp.h b/include/uapi/linux/if_xdp.h index 2ecf79282c26..95de66d5a26c 100644 --- a/include/uapi/linux/if_xdp.h +++ b/include/uapi/linux/if_xdp.h @@ -106,6 +106,41 @@ struct xdp_options { #define XSK_UNALIGNED_BUF_ADDR_MASK \ ((1ULL << XSK_UNALIGNED_BUF_OFFSET_SHIFT) - 1) +/* Request transmit timestamp. Upon completion, put it into tx_timestamp + * field of struct xsk_tx_metadata. + */ +#define XDP_TXMD_FLAGS_TIMESTAMP (1 << 0) + +/* Request transmit checksum offload. Checksum start position and offset + * are communicated via csum_start and csum_offset fields of struct + * xsk_tx_metadata. + */ +#define XDP_TXMD_FLAGS_CHECKSUM (1 << 1) + +/* AF_XDP offloads request. 'request' union member is consumed by the driver + * when the packet is being transmitted. 'completion' union member is + * filled by the driver when the transmit completion arrives. + */ +struct xsk_tx_metadata { + __u64 flags; + + union { + struct { + /* XDP_TXMD_FLAGS_CHECKSUM */ + + /* Offset from desc->addr where checksumming should start. */ + __u16 csum_start; + /* Offset from csum_start where checksum should be stored. */ + __u16 csum_offset; + } request; + + struct { + /* XDP_TXMD_FLAGS_TIMESTAMP */ + __u64 tx_timestamp; + } completion; + }; +}; + /* Rx/Tx descriptor */ struct xdp_desc { __u64 addr; @@ -122,4 +157,7 @@ struct xdp_desc { */ #define XDP_PKT_CONTD (1 << 0) +/* TX packet carries valid metadata. */ +#define XDP_TX_METADATA (1 << 1) + #endif /* _LINUX_IF_XDP_H */ diff --git a/include/uapi/linux/netdev.h b/include/uapi/linux/netdev.h index 2943a151d4f1..48d5477a668c 100644 --- a/include/uapi/linux/netdev.h +++ b/include/uapi/linux/netdev.h @@ -53,12 +53,28 @@ enum netdev_xdp_rx_metadata { NETDEV_XDP_RX_METADATA_MASK = 3, }; +/** + * enum netdev_xsk_flags + * @NETDEV_XSK_FLAGS_TX_TIMESTAMP: HW timestamping egress packets is supported + * by the driver. + * @NETDEV_XSK_FLAGS_TX_CHECKSUM: L3 checksum HW offload is supported by the + * driver. + */ +enum netdev_xsk_flags { + NETDEV_XSK_FLAGS_TX_TIMESTAMP = 1, + NETDEV_XSK_FLAGS_TX_CHECKSUM = 2, + + /* private: */ + NETDEV_XSK_FLAGS_MASK = 3, +}; + enum { NETDEV_A_DEV_IFINDEX = 1, NETDEV_A_DEV_PAD, NETDEV_A_DEV_XDP_FEATURES, NETDEV_A_DEV_XDP_ZC_MAX_SEGS, NETDEV_A_DEV_XDP_RX_METADATA_FEATURES, + NETDEV_A_DEV_XSK_FEATURES, __NETDEV_A_DEV_MAX, NETDEV_A_DEV_MAX = (__NETDEV_A_DEV_MAX - 1) diff --git a/net/core/netdev-genl.c b/net/core/netdev-genl.c index fe61f85bcf33..10f2124e9e23 100644 --- a/net/core/netdev-genl.c +++ b/net/core/netdev-genl.c @@ -6,6 +6,7 @@ #include #include #include +#include #include "netdev-genl-gen.h" @@ -13,6 +14,7 @@ static int netdev_nl_dev_fill(struct net_device *netdev, struct sk_buff *rsp, const struct genl_info *info) { + u64 xsk_features = 0; u64 xdp_rx_meta = 0; void *hdr; @@ -26,11 +28,20 @@ netdev_nl_dev_fill(struct net_device *netdev, struct sk_buff *rsp, XDP_METADATA_KFUNC_xxx #undef XDP_METADATA_KFUNC + if (netdev->xsk_tx_metadata_ops) { + if (netdev->xsk_tx_metadata_ops->tmo_fill_timestamp) + xsk_features |= NETDEV_XSK_FLAGS_TX_TIMESTAMP; + if (netdev->xsk_tx_metadata_ops->tmo_request_checksum) + xsk_features |= NETDEV_XSK_FLAGS_TX_CHECKSUM; + } + if (nla_put_u32(rsp, NETDEV_A_DEV_IFINDEX, netdev->ifindex) || nla_put_u64_64bit(rsp, NETDEV_A_DEV_XDP_FEATURES, netdev->xdp_features, NETDEV_A_DEV_PAD) || nla_put_u64_64bit(rsp, NETDEV_A_DEV_XDP_RX_METADATA_FEATURES, - xdp_rx_meta, NETDEV_A_DEV_PAD)) { + xdp_rx_meta, NETDEV_A_DEV_PAD) || + nla_put_u64_64bit(rsp, NETDEV_A_DEV_XSK_FEATURES, + xsk_features, NETDEV_A_DEV_PAD)) { genlmsg_cancel(rsp, hdr); return -EINVAL; } diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index c904356e2800..e83ade32f1fd 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -571,6 +571,13 @@ static u32 xsk_get_num_desc(struct sk_buff *skb) static void xsk_destruct_skb(struct sk_buff *skb) { + struct xsk_tx_metadata_compl *compl = &skb_shinfo(skb)->xsk_meta; + + if (compl->tx_timestamp) { + /* sw completion timestamp, not a real one */ + *compl->tx_timestamp = ktime_get_tai_fast_ns(); + } + xsk_cq_submit_locked(xdp_sk(skb->sk), xsk_get_num_desc(skb)); sock_wfree(skb); } @@ -655,8 +662,10 @@ static struct sk_buff *xsk_build_skb_zerocopy(struct xdp_sock *xs, static struct sk_buff *xsk_build_skb(struct xdp_sock *xs, struct xdp_desc *desc) { + struct xsk_tx_metadata *meta = NULL; struct net_device *dev = xs->dev; struct sk_buff *skb = xs->skb; + bool first_frag = false; int err; if (dev->priv_flags & IFF_TX_SKB_NO_LINEAR) { @@ -687,6 +696,8 @@ static struct sk_buff *xsk_build_skb(struct xdp_sock *xs, kfree_skb(skb); goto free_err; } + + first_frag = true; } else { int nr_frags = skb_shinfo(skb)->nr_frags; struct page *page; @@ -709,12 +720,35 @@ static struct sk_buff *xsk_build_skb(struct xdp_sock *xs, skb_add_rx_frag(skb, nr_frags, page, 0, len, 0); } + + if (first_frag && desc->options & XDP_TX_METADATA) { + if (unlikely(xs->pool->tx_metadata_len == 0)) { + err = -EINVAL; + goto free_err; + } + + meta = buffer - xs->pool->tx_metadata_len; + + if (meta->flags & XDP_TXMD_FLAGS_CHECKSUM) { + if (unlikely(meta->request.csum_start + + meta->request.csum_offset + + sizeof(__sum16) > len)) { + err = -EINVAL; + goto free_err; + } + + skb->csum_start = hr + meta->request.csum_start; + skb->csum_offset = meta->request.csum_offset; + skb->ip_summed = CHECKSUM_PARTIAL; + } + } } skb->dev = dev; skb->priority = READ_ONCE(xs->sk.sk_priority); skb->mark = READ_ONCE(xs->sk.sk_mark); skb->destructor = xsk_destruct_skb; + xsk_tx_metadata_to_compl(meta, &skb_shinfo(skb)->xsk_meta); xsk_set_destructor_arg(skb); return skb; diff --git a/net/xdp/xsk_queue.h b/net/xdp/xsk_queue.h index c74a1372bcb9..6f2d1621c992 100644 --- a/net/xdp/xsk_queue.h +++ b/net/xdp/xsk_queue.h @@ -137,7 +137,7 @@ static inline bool xskq_cons_read_addr_unchecked(struct xsk_queue *q, u64 *addr) static inline bool xp_unused_options_set(u32 options) { - return options & ~XDP_PKT_CONTD; + return options & ~(XDP_PKT_CONTD | XDP_TX_METADATA); } static inline bool xp_aligned_validate_desc(struct xsk_buff_pool *pool, diff --git a/tools/include/uapi/linux/if_xdp.h b/tools/include/uapi/linux/if_xdp.h index 34411a2e5b6c..d0882edc1642 100644 --- a/tools/include/uapi/linux/if_xdp.h +++ b/tools/include/uapi/linux/if_xdp.h @@ -26,11 +26,11 @@ */ #define XDP_USE_NEED_WAKEUP (1 << 3) /* By setting this option, userspace application indicates that it can - * handle multiple descriptors per packet thus enabling xsk core to split + * handle multiple descriptors per packet thus enabling AF_XDP to split * multi-buffer XDP frames into multiple Rx descriptors. Without this set - * such frames will be dropped by xsk. + * such frames will be dropped. */ -#define XDP_USE_SG (1 << 4) +#define XDP_USE_SG (1 << 4) /* Flags for xsk_umem_config flags */ #define XDP_UMEM_UNALIGNED_CHUNK_FLAG (1 << 0) @@ -106,6 +106,41 @@ struct xdp_options { #define XSK_UNALIGNED_BUF_ADDR_MASK \ ((1ULL << XSK_UNALIGNED_BUF_OFFSET_SHIFT) - 1) +/* Request transmit timestamp. Upon completion, put it into tx_timestamp + * field of union xsk_tx_metadata. + */ +#define XDP_TXMD_FLAGS_TIMESTAMP (1 << 0) + +/* Request transmit checksum offload. Checksum start position and offset + * are communicated via csum_start and csum_offset fields of union + * xsk_tx_metadata. + */ +#define XDP_TXMD_FLAGS_CHECKSUM (1 << 1) + +/* AF_XDP offloads request. 'request' union member is consumed by the driver + * when the packet is being transmitted. 'completion' union member is + * filled by the driver when the transmit completion arrives. + */ +struct xsk_tx_metadata { + __u64 flags; + + union { + struct { + /* XDP_TXMD_FLAGS_CHECKSUM */ + + /* Offset from desc->addr where checksumming should start. */ + __u16 csum_start; + /* Offset from csum_start where checksum should be stored. */ + __u16 csum_offset; + } request; + + struct { + /* XDP_TXMD_FLAGS_TIMESTAMP */ + __u64 tx_timestamp; + } completion; + }; +}; + /* Rx/Tx descriptor */ struct xdp_desc { __u64 addr; @@ -113,9 +148,16 @@ struct xdp_desc { __u32 options; }; -/* Flag indicating packet constitutes of multiple buffers*/ +/* UMEM descriptor is __u64 */ + +/* Flag indicating that the packet continues with the buffer pointed out by the + * next frame in the ring. The end of the packet is signalled by setting this + * bit to zero. For single buffer packets, every descriptor has 'options' set + * to 0 and this maintains backward compatibility. + */ #define XDP_PKT_CONTD (1 << 0) -/* UMEM descriptor is __u64 */ +/* TX packet carries valid metadata. */ +#define XDP_TX_METADATA (1 << 1) #endif /* _LINUX_IF_XDP_H */ diff --git a/tools/include/uapi/linux/netdev.h b/tools/include/uapi/linux/netdev.h index 2943a151d4f1..48d5477a668c 100644 --- a/tools/include/uapi/linux/netdev.h +++ b/tools/include/uapi/linux/netdev.h @@ -53,12 +53,28 @@ enum netdev_xdp_rx_metadata { NETDEV_XDP_RX_METADATA_MASK = 3, }; +/** + * enum netdev_xsk_flags + * @NETDEV_XSK_FLAGS_TX_TIMESTAMP: HW timestamping egress packets is supported + * by the driver. + * @NETDEV_XSK_FLAGS_TX_CHECKSUM: L3 checksum HW offload is supported by the + * driver. + */ +enum netdev_xsk_flags { + NETDEV_XSK_FLAGS_TX_TIMESTAMP = 1, + NETDEV_XSK_FLAGS_TX_CHECKSUM = 2, + + /* private: */ + NETDEV_XSK_FLAGS_MASK = 3, +}; + enum { NETDEV_A_DEV_IFINDEX = 1, NETDEV_A_DEV_PAD, NETDEV_A_DEV_XDP_FEATURES, NETDEV_A_DEV_XDP_ZC_MAX_SEGS, NETDEV_A_DEV_XDP_RX_METADATA_FEATURES, + NETDEV_A_DEV_XSK_FEATURES, __NETDEV_A_DEV_MAX, NETDEV_A_DEV_MAX = (__NETDEV_A_DEV_MAX - 1) diff --git a/tools/net/ynl/generated/netdev-user.c b/tools/net/ynl/generated/netdev-user.c index b5ffe8cd1144..6283d87dad37 100644 --- a/tools/net/ynl/generated/netdev-user.c +++ b/tools/net/ynl/generated/netdev-user.c @@ -58,6 +58,19 @@ const char *netdev_xdp_rx_metadata_str(enum netdev_xdp_rx_metadata value) return netdev_xdp_rx_metadata_strmap[value]; } +static const char * const netdev_xsk_flags_strmap[] = { + [0] = "tx-timestamp", + [1] = "tx-checksum", +}; + +const char *netdev_xsk_flags_str(enum netdev_xsk_flags value) +{ + value = ffs(value) - 1; + if (value < 0 || value >= (int)MNL_ARRAY_SIZE(netdev_xsk_flags_strmap)) + return NULL; + return netdev_xsk_flags_strmap[value]; +} + /* Policies */ struct ynl_policy_attr netdev_dev_policy[NETDEV_A_DEV_MAX + 1] = { [NETDEV_A_DEV_IFINDEX] = { .name = "ifindex", .type = YNL_PT_U32, }, @@ -65,6 +78,7 @@ struct ynl_policy_attr netdev_dev_policy[NETDEV_A_DEV_MAX + 1] = { [NETDEV_A_DEV_XDP_FEATURES] = { .name = "xdp-features", .type = YNL_PT_U64, }, [NETDEV_A_DEV_XDP_ZC_MAX_SEGS] = { .name = "xdp-zc-max-segs", .type = YNL_PT_U32, }, [NETDEV_A_DEV_XDP_RX_METADATA_FEATURES] = { .name = "xdp-rx-metadata-features", .type = YNL_PT_U64, }, + [NETDEV_A_DEV_XSK_FEATURES] = { .name = "xsk-features", .type = YNL_PT_U64, }, }; struct ynl_policy_nest netdev_dev_nest = { @@ -116,6 +130,11 @@ int netdev_dev_get_rsp_parse(const struct nlmsghdr *nlh, void *data) return MNL_CB_ERROR; dst->_present.xdp_rx_metadata_features = 1; dst->xdp_rx_metadata_features = mnl_attr_get_u64(attr); + } else if (type == NETDEV_A_DEV_XSK_FEATURES) { + if (ynl_attr_validate(yarg, attr)) + return MNL_CB_ERROR; + dst->_present.xsk_features = 1; + dst->xsk_features = mnl_attr_get_u64(attr); } } diff --git a/tools/net/ynl/generated/netdev-user.h b/tools/net/ynl/generated/netdev-user.h index 4fafac879df3..39af1908444b 100644 --- a/tools/net/ynl/generated/netdev-user.h +++ b/tools/net/ynl/generated/netdev-user.h @@ -19,6 +19,7 @@ extern const struct ynl_family ynl_netdev_family; const char *netdev_op_str(int op); const char *netdev_xdp_act_str(enum netdev_xdp_act value); const char *netdev_xdp_rx_metadata_str(enum netdev_xdp_rx_metadata value); +const char *netdev_xsk_flags_str(enum netdev_xsk_flags value); /* Common nested types */ /* ============== NETDEV_CMD_DEV_GET ============== */ @@ -50,12 +51,14 @@ struct netdev_dev_get_rsp { __u32 xdp_features:1; __u32 xdp_zc_max_segs:1; __u32 xdp_rx_metadata_features:1; + __u32 xsk_features:1; } _present; __u32 ifindex; __u64 xdp_features; __u32 xdp_zc_max_segs; __u64 xdp_rx_metadata_features; + __u64 xsk_features; }; void netdev_dev_get_rsp_free(struct netdev_dev_get_rsp *rsp); -- cgit v1.2.3 From 9276009d35d3f65e083b95d30a4f967baaae0fc6 Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Mon, 27 Nov 2023 11:03:09 -0800 Subject: tools: ynl: Print xsk-features from the sample In a similar fashion we do for the other bit masks. Fix mask parsing (>= vs >) while we are it. Signed-off-by: Stanislav Fomichev Reviewed-by: Jakub Kicinski Link: https://lore.kernel.org/r/20231127190319.1190813-4-sdf@google.com Signed-off-by: Alexei Starovoitov --- tools/net/ynl/samples/netdev.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/net/ynl/samples/netdev.c b/tools/net/ynl/samples/netdev.c index b828225daad0..591b90e21890 100644 --- a/tools/net/ynl/samples/netdev.c +++ b/tools/net/ynl/samples/netdev.c @@ -33,17 +33,23 @@ static void netdev_print_device(struct netdev_dev_get_rsp *d, unsigned int op) return; printf("xdp-features (%llx):", d->xdp_features); - for (int i = 0; d->xdp_features > 1U << i; i++) { + for (int i = 0; d->xdp_features >= 1U << i; i++) { if (d->xdp_features & (1U << i)) printf(" %s", netdev_xdp_act_str(1 << i)); } printf(" xdp-rx-metadata-features (%llx):", d->xdp_rx_metadata_features); - for (int i = 0; d->xdp_rx_metadata_features > 1U << i; i++) { + for (int i = 0; d->xdp_rx_metadata_features >= 1U << i; i++) { if (d->xdp_rx_metadata_features & (1U << i)) printf(" %s", netdev_xdp_rx_metadata_str(1 << i)); } + printf(" xsk-features (%llx):", d->xsk_features); + for (int i = 0; d->xsk_features >= 1U << i; i++) { + if (d->xsk_features & (1U << i)) + printf(" %s", netdev_xsk_flags_str(1 << i)); + } + printf(" xdp-zc-max-segs=%u", d->xdp_zc_max_segs); name = netdev_op_str(op); -- cgit v1.2.3 From 11614723af26e7c32fcb704d8f30fdf60c1122dc Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Mon, 27 Nov 2023 11:03:14 -0800 Subject: xsk: Add option to calculate TX checksum in SW For XDP_COPY mode, add a UMEM option XDP_UMEM_TX_SW_CSUM to call skb_checksum_help in transmit path. Might be useful to debugging issues with real hardware. I also use this mode in the selftests. Signed-off-by: Stanislav Fomichev Link: https://lore.kernel.org/r/20231127190319.1190813-9-sdf@google.com Signed-off-by: Alexei Starovoitov --- Documentation/networking/xsk-tx-metadata.rst | 9 +++++++++ include/net/xsk_buff_pool.h | 1 + include/uapi/linux/if_xdp.h | 8 +++++++- net/xdp/xdp_umem.c | 7 ++++++- net/xdp/xsk.c | 6 ++++++ net/xdp/xsk_buff_pool.c | 1 + tools/include/uapi/linux/if_xdp.h | 8 +++++++- 7 files changed, 37 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/Documentation/networking/xsk-tx-metadata.rst b/Documentation/networking/xsk-tx-metadata.rst index 4f376560b23f..97ecfa480d00 100644 --- a/Documentation/networking/xsk-tx-metadata.rst +++ b/Documentation/networking/xsk-tx-metadata.rst @@ -50,6 +50,15 @@ packet's ``struct xdp_desc`` descriptor should set ``XDP_TX_METADATA`` bit in the ``options`` field. Also note that in a multi-buffer packet only the first chunk should carry the metadata. +Software TX Checksum +==================== + +For development and testing purposes its possible to pass +``XDP_UMEM_TX_SW_CSUM`` flag to ``XDP_UMEM_REG`` UMEM registration call. +In this case, when running in ``XDK_COPY`` mode, the TX checksum +is calculated on the CPU. Do not enable this option in production because +it will negatively affect performance. + Querying Device Capabilities ============================ diff --git a/include/net/xsk_buff_pool.h b/include/net/xsk_buff_pool.h index 97f5cc10d79e..8d48d37ab7c0 100644 --- a/include/net/xsk_buff_pool.h +++ b/include/net/xsk_buff_pool.h @@ -83,6 +83,7 @@ struct xsk_buff_pool { bool uses_need_wakeup; bool dma_need_sync; bool unaligned; + bool tx_sw_csum; void *addrs; /* Mutual exclusion of the completion ring in the SKB mode. Two cases to protect: * NAPI TX thread and sendmsg error paths in the SKB destructor callback and when diff --git a/include/uapi/linux/if_xdp.h b/include/uapi/linux/if_xdp.h index 95de66d5a26c..d31698410410 100644 --- a/include/uapi/linux/if_xdp.h +++ b/include/uapi/linux/if_xdp.h @@ -33,7 +33,13 @@ #define XDP_USE_SG (1 << 4) /* Flags for xsk_umem_config flags */ -#define XDP_UMEM_UNALIGNED_CHUNK_FLAG (1 << 0) +#define XDP_UMEM_UNALIGNED_CHUNK_FLAG (1 << 0) + +/* Force checksum calculation in software. Can be used for testing or + * working around potential HW issues. This option causes performance + * degradation and only works in XDP_COPY mode. + */ +#define XDP_UMEM_TX_SW_CSUM (1 << 1) struct sockaddr_xdp { __u16 sxdp_family; diff --git a/net/xdp/xdp_umem.c b/net/xdp/xdp_umem.c index 946a687fb8e8..caa340134b0e 100644 --- a/net/xdp/xdp_umem.c +++ b/net/xdp/xdp_umem.c @@ -148,6 +148,11 @@ static int xdp_umem_account_pages(struct xdp_umem *umem) return 0; } +#define XDP_UMEM_FLAGS_VALID ( \ + XDP_UMEM_UNALIGNED_CHUNK_FLAG | \ + XDP_UMEM_TX_SW_CSUM | \ + 0) + static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr) { bool unaligned_chunks = mr->flags & XDP_UMEM_UNALIGNED_CHUNK_FLAG; @@ -167,7 +172,7 @@ static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr) return -EINVAL; } - if (mr->flags & ~XDP_UMEM_UNALIGNED_CHUNK_FLAG) + if (mr->flags & ~XDP_UMEM_FLAGS_VALID) return -EINVAL; if (!unaligned_chunks && !is_power_of_2(chunk_size)) diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index d66ba9d6154f..281d49b4fca4 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -744,6 +744,12 @@ static struct sk_buff *xsk_build_skb(struct xdp_sock *xs, skb->csum_start = hr + meta->request.csum_start; skb->csum_offset = meta->request.csum_offset; skb->ip_summed = CHECKSUM_PARTIAL; + + if (unlikely(xs->pool->tx_sw_csum)) { + err = skb_checksum_help(skb); + if (err) + goto free_err; + } } } } diff --git a/net/xdp/xsk_buff_pool.c b/net/xdp/xsk_buff_pool.c index 386eddcdf837..4f6f538a5462 100644 --- a/net/xdp/xsk_buff_pool.c +++ b/net/xdp/xsk_buff_pool.c @@ -86,6 +86,7 @@ struct xsk_buff_pool *xp_create_and_assign_umem(struct xdp_sock *xs, pool->umem = umem; pool->addrs = umem->addrs; pool->tx_metadata_len = umem->tx_metadata_len; + pool->tx_sw_csum = umem->flags & XDP_UMEM_TX_SW_CSUM; INIT_LIST_HEAD(&pool->free_list); INIT_LIST_HEAD(&pool->xskb_list); INIT_LIST_HEAD(&pool->xsk_tx_list); diff --git a/tools/include/uapi/linux/if_xdp.h b/tools/include/uapi/linux/if_xdp.h index d0882edc1642..638c606dfa74 100644 --- a/tools/include/uapi/linux/if_xdp.h +++ b/tools/include/uapi/linux/if_xdp.h @@ -33,7 +33,13 @@ #define XDP_USE_SG (1 << 4) /* Flags for xsk_umem_config flags */ -#define XDP_UMEM_UNALIGNED_CHUNK_FLAG (1 << 0) +#define XDP_UMEM_UNALIGNED_CHUNK_FLAG (1 << 0) + +/* Force checksum calculation in software. Can be used for testing or + * working around potential HW issues. This option causes performance + * degradation and only works in XDP_COPY mode. + */ +#define XDP_UMEM_TX_SW_CSUM (1 << 1) struct sockaddr_xdp { __u16 sxdp_family; -- cgit v1.2.3 From df3ed0003ec4994ce8ed4818c435c481281df89e Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Mon, 27 Nov 2023 11:03:15 -0800 Subject: selftests/xsk: Support tx_metadata_len Add new config field and propagate to UMEM registration setsockopt. Signed-off-by: Stanislav Fomichev Link: https://lore.kernel.org/r/20231127190319.1190813-10-sdf@google.com Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/xsk.c | 3 +++ tools/testing/selftests/bpf/xsk.h | 1 + 2 files changed, 4 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/xsk.c b/tools/testing/selftests/bpf/xsk.c index e574711eeb84..25d568abf0f2 100644 --- a/tools/testing/selftests/bpf/xsk.c +++ b/tools/testing/selftests/bpf/xsk.c @@ -115,6 +115,7 @@ static void xsk_set_umem_config(struct xsk_umem_config *cfg, cfg->frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE; cfg->frame_headroom = XSK_UMEM__DEFAULT_FRAME_HEADROOM; cfg->flags = XSK_UMEM__DEFAULT_FLAGS; + cfg->tx_metadata_len = 0; return; } @@ -123,6 +124,7 @@ static void xsk_set_umem_config(struct xsk_umem_config *cfg, cfg->frame_size = usr_cfg->frame_size; cfg->frame_headroom = usr_cfg->frame_headroom; cfg->flags = usr_cfg->flags; + cfg->tx_metadata_len = usr_cfg->tx_metadata_len; } static int xsk_set_xdp_socket_config(struct xsk_socket_config *cfg, @@ -252,6 +254,7 @@ int xsk_umem__create(struct xsk_umem **umem_ptr, void *umem_area, mr.chunk_size = umem->config.frame_size; mr.headroom = umem->config.frame_headroom; mr.flags = umem->config.flags; + mr.tx_metadata_len = umem->config.tx_metadata_len; err = setsockopt(umem->fd, SOL_XDP, XDP_UMEM_REG, &mr, sizeof(mr)); if (err) { diff --git a/tools/testing/selftests/bpf/xsk.h b/tools/testing/selftests/bpf/xsk.h index 771570bc3731..93c2cc413cfc 100644 --- a/tools/testing/selftests/bpf/xsk.h +++ b/tools/testing/selftests/bpf/xsk.h @@ -200,6 +200,7 @@ struct xsk_umem_config { __u32 frame_size; __u32 frame_headroom; __u32 flags; + __u32 tx_metadata_len; }; int xsk_attach_xdp_program(struct bpf_program *prog, int ifindex, u32 xdp_flags); -- cgit v1.2.3 From f6642de0c3e94d3ef6f44e127d11fcf4138873f7 Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Mon, 27 Nov 2023 11:03:16 -0800 Subject: selftests/bpf: Add csum helpers Checksum helpers will be used to calculate pseudo-header checksum in AF_XDP metadata selftests. The helpers are mirroring existing kernel ones: - csum_tcpudp_magic : IPv4 pseudo header csum - csum_ipv6_magic : IPv6 pseudo header csum - csum_fold : fold csum and do one's complement Signed-off-by: Stanislav Fomichev Link: https://lore.kernel.org/r/20231127190319.1190813-11-sdf@google.com Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/network_helpers.h | 43 +++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/network_helpers.h b/tools/testing/selftests/bpf/network_helpers.h index 34f1200a781b..94b9be24e39b 100644 --- a/tools/testing/selftests/bpf/network_helpers.h +++ b/tools/testing/selftests/bpf/network_helpers.h @@ -71,4 +71,47 @@ struct nstoken; */ struct nstoken *open_netns(const char *name); void close_netns(struct nstoken *token); + +static __u16 csum_fold(__u32 csum) +{ + csum = (csum & 0xffff) + (csum >> 16); + csum = (csum & 0xffff) + (csum >> 16); + + return (__u16)~csum; +} + +static inline __sum16 csum_tcpudp_magic(__be32 saddr, __be32 daddr, + __u32 len, __u8 proto, + __wsum csum) +{ + __u64 s = csum; + + s += (__u32)saddr; + s += (__u32)daddr; + s += htons(proto + len); + s = (s & 0xffffffff) + (s >> 32); + s = (s & 0xffffffff) + (s >> 32); + + return csum_fold((__u32)s); +} + +static inline __sum16 csum_ipv6_magic(const struct in6_addr *saddr, + const struct in6_addr *daddr, + __u32 len, __u8 proto, + __wsum csum) +{ + __u64 s = csum; + int i; + + for (i = 0; i < 4; i++) + s += (__u32)saddr->s6_addr32[i]; + for (i = 0; i < 4; i++) + s += (__u32)daddr->s6_addr32[i]; + s += htons(proto + len); + s = (s & 0xffffffff) + (s >> 32); + s = (s & 0xffffffff) + (s >> 32); + + return csum_fold((__u32)s); +} + #endif -- cgit v1.2.3 From 40808a237d9c8fcaa3eaeefe2ac59e4733907478 Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Mon, 27 Nov 2023 11:03:17 -0800 Subject: selftests/bpf: Add TX side to xdp_metadata Request TX timestamp and make sure it's not empty. Request TX checksum offload (SW-only) and make sure it's resolved to the correct one. Signed-off-by: Stanislav Fomichev Link: https://lore.kernel.org/r/20231127190319.1190813-12-sdf@google.com Signed-off-by: Alexei Starovoitov --- .../selftests/bpf/prog_tests/xdp_metadata.c | 33 +++++++++++++++++++--- 1 file changed, 29 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c b/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c index 4439ba9392f8..33cdf88efa6b 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c @@ -56,7 +56,8 @@ static int open_xsk(int ifindex, struct xsk *xsk) .fill_size = XSK_RING_PROD__DEFAULT_NUM_DESCS, .comp_size = XSK_RING_CONS__DEFAULT_NUM_DESCS, .frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE, - .flags = XDP_UMEM_UNALIGNED_CHUNK_FLAG, + .flags = XDP_UMEM_UNALIGNED_CHUNK_FLAG | XDP_UMEM_TX_SW_CSUM, + .tx_metadata_len = sizeof(struct xsk_tx_metadata), }; __u32 idx; u64 addr; @@ -138,6 +139,7 @@ static void ip_csum(struct iphdr *iph) static int generate_packet(struct xsk *xsk, __u16 dst_port) { + struct xsk_tx_metadata *meta; struct xdp_desc *tx_desc; struct udphdr *udph; struct ethhdr *eth; @@ -151,10 +153,14 @@ static int generate_packet(struct xsk *xsk, __u16 dst_port) return -1; tx_desc = xsk_ring_prod__tx_desc(&xsk->tx, idx); - tx_desc->addr = idx % (UMEM_NUM / 2) * UMEM_FRAME_SIZE; + tx_desc->addr = idx % (UMEM_NUM / 2) * UMEM_FRAME_SIZE + sizeof(struct xsk_tx_metadata); printf("%p: tx_desc[%u]->addr=%llx\n", xsk, idx, tx_desc->addr); data = xsk_umem__get_data(xsk->umem_area, tx_desc->addr); + meta = data - sizeof(struct xsk_tx_metadata); + memset(meta, 0, sizeof(*meta)); + meta->flags = XDP_TXMD_FLAGS_TIMESTAMP; + eth = data; iph = (void *)(eth + 1); udph = (void *)(iph + 1); @@ -178,11 +184,17 @@ static int generate_packet(struct xsk *xsk, __u16 dst_port) udph->source = htons(AF_XDP_SOURCE_PORT); udph->dest = htons(dst_port); udph->len = htons(sizeof(*udph) + UDP_PAYLOAD_BYTES); - udph->check = 0; + udph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, + ntohs(udph->len), IPPROTO_UDP, 0); memset(udph + 1, 0xAA, UDP_PAYLOAD_BYTES); + meta->flags |= XDP_TXMD_FLAGS_CHECKSUM; + meta->request.csum_start = sizeof(*eth) + sizeof(*iph); + meta->request.csum_offset = offsetof(struct udphdr, check); + tx_desc->len = sizeof(*eth) + sizeof(*iph) + sizeof(*udph) + UDP_PAYLOAD_BYTES; + tx_desc->options |= XDP_TX_METADATA; xsk_ring_prod__submit(&xsk->tx, 1); ret = sendto(xsk_socket__fd(xsk->socket), NULL, 0, MSG_DONTWAIT, NULL, 0); @@ -194,13 +206,21 @@ static int generate_packet(struct xsk *xsk, __u16 dst_port) static void complete_tx(struct xsk *xsk) { - __u32 idx; + struct xsk_tx_metadata *meta; __u64 addr; + void *data; + __u32 idx; if (ASSERT_EQ(xsk_ring_cons__peek(&xsk->comp, 1, &idx), 1, "xsk_ring_cons__peek")) { addr = *xsk_ring_cons__comp_addr(&xsk->comp, idx); printf("%p: complete tx idx=%u addr=%llx\n", xsk, idx, addr); + + data = xsk_umem__get_data(xsk->umem_area, addr); + meta = data - sizeof(struct xsk_tx_metadata); + + ASSERT_NEQ(meta->completion.tx_timestamp, 0, "tx_timestamp"); + xsk_ring_cons__release(&xsk->comp, 1); } } @@ -221,6 +241,7 @@ static int verify_xsk_metadata(struct xsk *xsk) const struct xdp_desc *rx_desc; struct pollfd fds = {}; struct xdp_meta *meta; + struct udphdr *udph; struct ethhdr *eth; struct iphdr *iph; __u64 comp_addr; @@ -257,6 +278,7 @@ static int verify_xsk_metadata(struct xsk *xsk) ASSERT_EQ(eth->h_proto, htons(ETH_P_IP), "eth->h_proto"); iph = (void *)(eth + 1); ASSERT_EQ((int)iph->version, 4, "iph->version"); + udph = (void *)(iph + 1); /* custom metadata */ @@ -270,6 +292,9 @@ static int verify_xsk_metadata(struct xsk *xsk) ASSERT_EQ(meta->rx_hash_type, 0, "rx_hash_type"); + /* checksum offload */ + ASSERT_EQ(udph->check, htons(0x721c), "csum"); + xsk_ring_cons__release(&xsk->rx, 1); refill_rx(xsk, comp_addr); -- cgit v1.2.3 From 12b4b7963d3cca253db3c31aa7611b988699e30f Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Mon, 27 Nov 2023 11:03:18 -0800 Subject: selftests/bpf: Convert xdp_hw_metadata to XDP_USE_NEED_WAKEUP This is the recommended way to run AF_XDP, so let's use it in the test. Also, some unrelated changes to now blow up the log too much: - change default mode to zerocopy and add -c to use copy mode - small fixes for the flags/sizes/prints - add print_tstamp_delta to print timestamp + reference Signed-off-by: Stanislav Fomichev Link: https://lore.kernel.org/r/20231127190319.1190813-13-sdf@google.com Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/xdp_hw_metadata.c | 73 ++++++++++++++++++--------- 1 file changed, 49 insertions(+), 24 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/xdp_hw_metadata.c b/tools/testing/selftests/bpf/xdp_hw_metadata.c index c3ba40d0b9de..4484b17c7a56 100644 --- a/tools/testing/selftests/bpf/xdp_hw_metadata.c +++ b/tools/testing/selftests/bpf/xdp_hw_metadata.c @@ -32,7 +32,7 @@ #include "xdp_metadata.h" -#define UMEM_NUM 16 +#define UMEM_NUM 256 #define UMEM_FRAME_SIZE XSK_UMEM__DEFAULT_FRAME_SIZE #define UMEM_SIZE (UMEM_FRAME_SIZE * UMEM_NUM) #define XDP_FLAGS (XDP_FLAGS_DRV_MODE | XDP_FLAGS_REPLACE) @@ -48,7 +48,7 @@ struct xsk { }; struct xdp_hw_metadata *bpf_obj; -__u16 bind_flags = XDP_COPY; +__u16 bind_flags = XDP_USE_NEED_WAKEUP | XDP_ZEROCOPY; struct xsk *rx_xsk; const char *ifname; int ifindex; @@ -68,7 +68,7 @@ static int open_xsk(int ifindex, struct xsk *xsk, __u32 queue_id) .fill_size = XSK_RING_PROD__DEFAULT_NUM_DESCS, .comp_size = XSK_RING_CONS__DEFAULT_NUM_DESCS, .frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE, - .flags = XDP_UMEM_UNALIGNED_CHUNK_FLAG, + .flags = XSK_UMEM__DEFAULT_FLAGS, }; __u32 idx; u64 addr; @@ -110,7 +110,7 @@ static int open_xsk(int ifindex, struct xsk *xsk, __u32 queue_id) for (i = 0; i < UMEM_NUM / 2; i++) { addr = (UMEM_NUM / 2 + i) * UMEM_FRAME_SIZE; printf("%p: rx_desc[%d] -> %lx\n", xsk, i, addr); - *xsk_ring_prod__fill_addr(&xsk->fill, i) = addr; + *xsk_ring_prod__fill_addr(&xsk->fill, idx + i) = addr; } xsk_ring_prod__submit(&xsk->fill, ret); @@ -131,12 +131,22 @@ static void refill_rx(struct xsk *xsk, __u64 addr) __u32 idx; if (xsk_ring_prod__reserve(&xsk->fill, 1, &idx) == 1) { - printf("%p: complete idx=%u addr=%llx\n", xsk, idx, addr); + printf("%p: complete rx idx=%u addr=%llx\n", xsk, idx, addr); *xsk_ring_prod__fill_addr(&xsk->fill, idx) = addr; xsk_ring_prod__submit(&xsk->fill, 1); } } +static int kick_tx(struct xsk *xsk) +{ + return sendto(xsk_socket__fd(xsk->socket), NULL, 0, MSG_DONTWAIT, NULL, 0); +} + +static int kick_rx(struct xsk *xsk) +{ + return recvfrom(xsk_socket__fd(xsk->socket), NULL, 0, MSG_DONTWAIT, NULL, NULL); +} + #define NANOSEC_PER_SEC 1000000000 /* 10^9 */ static __u64 gettime(clockid_t clock_id) { @@ -152,6 +162,17 @@ static __u64 gettime(clockid_t clock_id) return (__u64) t.tv_sec * NANOSEC_PER_SEC + t.tv_nsec; } +static void print_tstamp_delta(const char *name, const char *refname, + __u64 tstamp, __u64 reference) +{ + __s64 delta = (__s64)reference - (__s64)tstamp; + + printf("%s: %llu (sec:%0.4f) delta to %s sec:%0.4f (%0.3f usec)\n", + name, tstamp, (double)tstamp / NANOSEC_PER_SEC, refname, + (double)delta / NANOSEC_PER_SEC, + (double)delta / 1000); +} + static void verify_xdp_metadata(void *data, clockid_t clock_id) { struct xdp_meta *meta; @@ -167,22 +188,13 @@ static void verify_xdp_metadata(void *data, clockid_t clock_id) printf("rx_timestamp: %llu (sec:%0.4f)\n", meta->rx_timestamp, (double)meta->rx_timestamp / NANOSEC_PER_SEC); if (meta->rx_timestamp) { - __u64 usr_clock = gettime(clock_id); - __u64 xdp_clock = meta->xdp_timestamp; - __s64 delta_X = xdp_clock - meta->rx_timestamp; - __s64 delta_X2U = usr_clock - xdp_clock; - - printf("XDP RX-time: %llu (sec:%0.4f) delta sec:%0.4f (%0.3f usec)\n", - xdp_clock, (double)xdp_clock / NANOSEC_PER_SEC, - (double)delta_X / NANOSEC_PER_SEC, - (double)delta_X / 1000); - - printf("AF_XDP time: %llu (sec:%0.4f) delta sec:%0.4f (%0.3f usec)\n", - usr_clock, (double)usr_clock / NANOSEC_PER_SEC, - (double)delta_X2U / NANOSEC_PER_SEC, - (double)delta_X2U / 1000); - } + __u64 ref_tstamp = gettime(clock_id); + print_tstamp_delta("HW RX-time", "User RX-time", + meta->rx_timestamp, ref_tstamp); + print_tstamp_delta("XDP RX-time", "User RX-time", + meta->xdp_timestamp, ref_tstamp); + } } static void verify_skb_metadata(int fd) @@ -252,6 +264,13 @@ static int verify_metadata(struct xsk *rx_xsk, int rxq, int server_fd, clockid_t while (true) { errno = 0; + + for (i = 0; i < rxq; i++) { + ret = kick_rx(&rx_xsk[i]); + if (ret) + printf("kick_rx ret=%d\n", ret); + } + ret = poll(fds, rxq + 1, 1000); printf("poll: %d (%d) skip=%llu fail=%llu redir=%llu\n", ret, errno, bpf_obj->bss->pkts_skip, @@ -420,8 +439,9 @@ static void print_usage(void) { const char *usage = "Usage: xdp_hw_metadata [OPTIONS] [IFNAME]\n" - " -m Enable multi-buffer XDP for larger MTU\n" + " -c Run in copy mode (zerocopy is default)\n" " -h Display this help and exit\n\n" + " -m Enable multi-buffer XDP for larger MTU\n" "Generate test packets on the other machine with:\n" " echo -n xdp | nc -u -q1 9091\n"; @@ -432,14 +452,19 @@ static void read_args(int argc, char *argv[]) { int opt; - while ((opt = getopt(argc, argv, "mh")) != -1) { + while ((opt = getopt(argc, argv, "chm")) != -1) { switch (opt) { - case 'm': - bind_flags |= XDP_USE_SG; + case 'c': + bind_flags &= ~XDP_USE_NEED_WAKEUP; + bind_flags &= ~XDP_ZEROCOPY; + bind_flags |= XDP_COPY; break; case 'h': print_usage(); exit(0); + case 'm': + bind_flags |= XDP_USE_SG; + break; case '?': if (isprint(optopt)) fprintf(stderr, "Unknown option: -%c\n", optopt); -- cgit v1.2.3 From 60523115c1b10c8855492d84c1ba88af452e221c Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Mon, 27 Nov 2023 11:03:19 -0800 Subject: selftests/bpf: Add TX side to xdp_hw_metadata When we get a packet on port 9091, we swap src/dst and send it out. At this point we also request the timestamp and checksum offloads. Checksum offload is verified by looking at the tcpdump on the other side. The tool prints pseudo-header csum and the final one it expects. The final checksum actually matches the incoming packets checksum because we only flip the src/dst and don't change the payload. Some other related changes: - switched to zerocopy mode by default; new flag can be used to force old behavior - request fixed tx_metadata_len headroom - some other small fixes (umem size, fill idx+i, etc) mvbz3:~# ./xdp_hw_metadata eth3 ... xsk_ring_cons__peek: 1 0x19546f8: rx_desc[0]->addr=80100 addr=80100 comp_addr=80100 rx_hash: 0x80B7EA8B with RSS type:0x2A rx_timestamp: 1697580171852147395 (sec:1697580171.8521) HW RX-time: 1697580171852147395 (sec:1697580171.8521), delta to User RX-time sec:0.2797 (279673.082 usec) XDP RX-time: 1697580172131699047 (sec:1697580172.1317), delta to User RX-time sec:0.0001 (121.430 usec) 0x19546f8: ping-pong with csum=3b8e (want d862) csum_start=54 csum_offset=6 0x19546f8: complete tx idx=0 addr=8 tx_timestamp: 1697580172056756493 (sec:1697580172.0568) HW TX-complete-time: 1697580172056756493 (sec:1697580172.0568), delta to User TX-complete-time sec:0.0852 (85175.537 usec) XDP RX-time: 1697580172131699047 (sec:1697580172.1317), delta to User TX-complete-time sec:0.0102 (10232.983 usec) HW RX-time: 1697580171852147395 (sec:1697580171.8521), delta to HW TX-complete-time sec:0.2046 (204609.098 usec) 0x19546f8: complete rx idx=128 addr=80100 mvbz4:~# nc -Nu -q1 ${MVBZ3_LINK_LOCAL_IP}%eth3 9091 mvbz4:~# tcpdump -vvx -i eth3 udp tcpdump: listening on eth3, link-type EN10MB (Ethernet), snapshot length 262144 bytes 12:26:09.301074 IP6 (flowlabel 0x35fa5, hlim 127, next-header UDP (17) payload length: 11) fe80::1270:fdff:fe48:1087.55807 > fe80::1270:fdff:fe48:1077.9091: [bad udp cksum 0x3b8e -> 0xde7e!] UDP, length 3 0x0000: 6003 5fa5 000b 117f fe80 0000 0000 0000 0x0010: 1270 fdff fe48 1087 fe80 0000 0000 0000 0x0020: 1270 fdff fe48 1077 d9ff 2383 000b 3b8e 0x0030: 7864 70 12:26:09.301976 IP6 (flowlabel 0x35fa5, hlim 127, next-header UDP (17) payload length: 11) fe80::1270:fdff:fe48:1077.9091 > fe80::1270:fdff:fe48:1087.55807: [udp sum ok] UDP, length 3 0x0000: 6003 5fa5 000b 117f fe80 0000 0000 0000 0x0010: 1270 fdff fe48 1077 fe80 0000 0000 0000 0x0020: 1270 fdff fe48 1087 2383 d9ff 000b de7e 0x0030: 7864 70 Signed-off-by: Stanislav Fomichev Link: https://lore.kernel.org/r/20231127190319.1190813-14-sdf@google.com Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/xdp_hw_metadata.c | 164 +++++++++++++++++++++++++- 1 file changed, 160 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/xdp_hw_metadata.c b/tools/testing/selftests/bpf/xdp_hw_metadata.c index 4484b17c7a56..3291625ba4fb 100644 --- a/tools/testing/selftests/bpf/xdp_hw_metadata.c +++ b/tools/testing/selftests/bpf/xdp_hw_metadata.c @@ -10,7 +10,9 @@ * - rx_hash * * TX: - * - TBD + * - UDP 9091 packets trigger TX reply + * - TX HW timestamp is requested and reported back upon completion + * - TX checksum is requested */ #include @@ -24,11 +26,14 @@ #include #include #include +#include #include #include #include #include #include +#include +#include #include "xdp_metadata.h" @@ -53,6 +58,9 @@ struct xsk *rx_xsk; const char *ifname; int ifindex; int rxq; +bool skip_tx; +__u64 last_hw_rx_timestamp; +__u64 last_xdp_rx_timestamp; void test__fail(void) { /* for network_helpers.c */ } @@ -69,6 +77,7 @@ static int open_xsk(int ifindex, struct xsk *xsk, __u32 queue_id) .comp_size = XSK_RING_CONS__DEFAULT_NUM_DESCS, .frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE, .flags = XSK_UMEM__DEFAULT_FLAGS, + .tx_metadata_len = sizeof(struct xsk_tx_metadata), }; __u32 idx; u64 addr; @@ -185,15 +194,19 @@ static void verify_xdp_metadata(void *data, clockid_t clock_id) printf("rx_hash: 0x%X with RSS type:0x%X\n", meta->rx_hash, meta->rx_hash_type); - printf("rx_timestamp: %llu (sec:%0.4f)\n", meta->rx_timestamp, - (double)meta->rx_timestamp / NANOSEC_PER_SEC); if (meta->rx_timestamp) { __u64 ref_tstamp = gettime(clock_id); + /* store received timestamps to calculate a delta at tx */ + last_hw_rx_timestamp = meta->rx_timestamp; + last_xdp_rx_timestamp = meta->xdp_timestamp; + print_tstamp_delta("HW RX-time", "User RX-time", meta->rx_timestamp, ref_tstamp); print_tstamp_delta("XDP RX-time", "User RX-time", meta->xdp_timestamp, ref_tstamp); + } else { + printf("No rx_timestamp\n"); } } @@ -242,6 +255,129 @@ static void verify_skb_metadata(int fd) printf("skb hwtstamp is not found!\n"); } +static bool complete_tx(struct xsk *xsk, clockid_t clock_id) +{ + struct xsk_tx_metadata *meta; + __u64 addr; + void *data; + __u32 idx; + + if (!xsk_ring_cons__peek(&xsk->comp, 1, &idx)) + return false; + + addr = *xsk_ring_cons__comp_addr(&xsk->comp, idx); + data = xsk_umem__get_data(xsk->umem_area, addr); + meta = data - sizeof(struct xsk_tx_metadata); + + printf("%p: complete tx idx=%u addr=%llx\n", xsk, idx, addr); + + if (meta->completion.tx_timestamp) { + __u64 ref_tstamp = gettime(clock_id); + + print_tstamp_delta("HW TX-complete-time", "User TX-complete-time", + meta->completion.tx_timestamp, ref_tstamp); + print_tstamp_delta("XDP RX-time", "User TX-complete-time", + last_xdp_rx_timestamp, ref_tstamp); + print_tstamp_delta("HW RX-time", "HW TX-complete-time", + last_hw_rx_timestamp, meta->completion.tx_timestamp); + } else { + printf("No tx_timestamp\n"); + } + + xsk_ring_cons__release(&xsk->comp, 1); + + return true; +} + +#define swap(a, b, len) do { \ + for (int i = 0; i < len; i++) { \ + __u8 tmp = ((__u8 *)a)[i]; \ + ((__u8 *)a)[i] = ((__u8 *)b)[i]; \ + ((__u8 *)b)[i] = tmp; \ + } \ +} while (0) + +static void ping_pong(struct xsk *xsk, void *rx_packet, clockid_t clock_id) +{ + struct xsk_tx_metadata *meta; + struct ipv6hdr *ip6h = NULL; + struct iphdr *iph = NULL; + struct xdp_desc *tx_desc; + struct udphdr *udph; + struct ethhdr *eth; + __sum16 want_csum; + void *data; + __u32 idx; + int ret; + int len; + + ret = xsk_ring_prod__reserve(&xsk->tx, 1, &idx); + if (ret != 1) { + printf("%p: failed to reserve tx slot\n", xsk); + return; + } + + tx_desc = xsk_ring_prod__tx_desc(&xsk->tx, idx); + tx_desc->addr = idx % (UMEM_NUM / 2) * UMEM_FRAME_SIZE + sizeof(struct xsk_tx_metadata); + data = xsk_umem__get_data(xsk->umem_area, tx_desc->addr); + + meta = data - sizeof(struct xsk_tx_metadata); + memset(meta, 0, sizeof(*meta)); + meta->flags = XDP_TXMD_FLAGS_TIMESTAMP; + + eth = rx_packet; + + if (eth->h_proto == htons(ETH_P_IP)) { + iph = (void *)(eth + 1); + udph = (void *)(iph + 1); + } else if (eth->h_proto == htons(ETH_P_IPV6)) { + ip6h = (void *)(eth + 1); + udph = (void *)(ip6h + 1); + } else { + printf("%p: failed to detect IP version for ping pong %04x\n", xsk, eth->h_proto); + xsk_ring_prod__cancel(&xsk->tx, 1); + return; + } + + len = ETH_HLEN; + if (ip6h) + len += sizeof(*ip6h) + ntohs(ip6h->payload_len); + if (iph) + len += ntohs(iph->tot_len); + + swap(eth->h_dest, eth->h_source, ETH_ALEN); + if (iph) + swap(&iph->saddr, &iph->daddr, 4); + else + swap(&ip6h->saddr, &ip6h->daddr, 16); + swap(&udph->source, &udph->dest, 2); + + want_csum = udph->check; + if (ip6h) + udph->check = ~csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr, + ntohs(udph->len), IPPROTO_UDP, 0); + else + udph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, + ntohs(udph->len), IPPROTO_UDP, 0); + + meta->flags |= XDP_TXMD_FLAGS_CHECKSUM; + if (iph) + meta->request.csum_start = sizeof(*eth) + sizeof(*iph); + else + meta->request.csum_start = sizeof(*eth) + sizeof(*ip6h); + meta->request.csum_offset = offsetof(struct udphdr, check); + + printf("%p: ping-pong with csum=%04x (want %04x) csum_start=%d csum_offset=%d\n", + xsk, ntohs(udph->check), ntohs(want_csum), + meta->request.csum_start, meta->request.csum_offset); + + memcpy(data, rx_packet, len); /* don't share umem chunk for simplicity */ + tx_desc->options |= XDP_TX_METADATA; + tx_desc->len = len; + + xsk_ring_prod__submit(&xsk->tx, 1); +} + static int verify_metadata(struct xsk *rx_xsk, int rxq, int server_fd, clockid_t clock_id) { const struct xdp_desc *rx_desc; @@ -307,6 +443,22 @@ peek: verify_xdp_metadata(xsk_umem__get_data(xsk->umem_area, addr), clock_id); first_seg = false; + + if (!skip_tx) { + /* mirror first chunk back */ + ping_pong(xsk, xsk_umem__get_data(xsk->umem_area, addr), + clock_id); + + ret = kick_tx(xsk); + if (ret) + printf("kick_tx ret=%d\n", ret); + + for (int j = 0; j < 500; j++) { + if (complete_tx(xsk, clock_id)) + break; + usleep(10*1000); + } + } } xsk_ring_cons__release(&xsk->rx, 1); @@ -442,6 +594,7 @@ static void print_usage(void) " -c Run in copy mode (zerocopy is default)\n" " -h Display this help and exit\n\n" " -m Enable multi-buffer XDP for larger MTU\n" + " -r Don't generate AF_XDP reply (rx metadata only)\n" "Generate test packets on the other machine with:\n" " echo -n xdp | nc -u -q1 9091\n"; @@ -452,7 +605,7 @@ static void read_args(int argc, char *argv[]) { int opt; - while ((opt = getopt(argc, argv, "chm")) != -1) { + while ((opt = getopt(argc, argv, "chmr")) != -1) { switch (opt) { case 'c': bind_flags &= ~XDP_USE_NEED_WAKEUP; @@ -465,6 +618,9 @@ static void read_args(int argc, char *argv[]) case 'm': bind_flags |= XDP_USE_SG; break; + case 'r': + skip_tx = true; + break; case '?': if (isprint(optopt)) fprintf(stderr, "Unknown option: -%c\n", optopt); -- cgit v1.2.3 From 51354f700d400e55b329361e1386b04695e6e5c1 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Tue, 28 Nov 2023 17:25:57 -0800 Subject: bpf, sockmap: Add af_unix test with both sockets in map This adds a test where both pairs of a af_unix paired socket are put into a BPF map. This ensures that when we tear down the af_unix pair we don't have any issues on sockmap side with ordering and reference counting. Signed-off-by: John Fastabend Signed-off-by: Daniel Borkmann Reviewed-by: Jakub Sitnicki Link: https://lore.kernel.org/bpf/20231129012557.95371-3-john.fastabend@gmail.com --- .../selftests/bpf/prog_tests/sockmap_listen.c | 51 +++++++++++++++++----- .../selftests/bpf/progs/test_sockmap_listen.c | 7 +++ 2 files changed, 47 insertions(+), 11 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c index a934d430c20c..a92807bfcd13 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c +++ b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c @@ -1337,7 +1337,8 @@ static void test_redir(struct test_sockmap_listen *skel, struct bpf_map *map, } static void pairs_redir_to_connected(int cli0, int peer0, int cli1, int peer1, - int sock_mapfd, int verd_mapfd, enum redir_mode mode) + int sock_mapfd, int nop_mapfd, + int verd_mapfd, enum redir_mode mode) { const char *log_prefix = redir_mode_str(mode); unsigned int pass; @@ -1351,6 +1352,12 @@ static void pairs_redir_to_connected(int cli0, int peer0, int cli1, int peer1, if (err) return; + if (nop_mapfd >= 0) { + err = add_to_sockmap(nop_mapfd, cli0, cli1); + if (err) + return; + } + n = write(cli1, "a", 1); if (n < 0) FAIL_ERRNO("%s: write", log_prefix); @@ -1387,7 +1394,7 @@ static void unix_redir_to_connected(int sotype, int sock_mapfd, goto close0; c1 = sfd[0], p1 = sfd[1]; - pairs_redir_to_connected(c0, p0, c1, p1, sock_mapfd, verd_mapfd, mode); + pairs_redir_to_connected(c0, p0, c1, p1, sock_mapfd, -1, verd_mapfd, mode); xclose(c1); xclose(p1); @@ -1677,7 +1684,7 @@ static void udp_redir_to_connected(int family, int sock_mapfd, int verd_mapfd, if (err) goto close_cli0; - pairs_redir_to_connected(c0, p0, c1, p1, sock_mapfd, verd_mapfd, mode); + pairs_redir_to_connected(c0, p0, c1, p1, sock_mapfd, -1, verd_mapfd, mode); xclose(c1); xclose(p1); @@ -1735,7 +1742,7 @@ static void inet_unix_redir_to_connected(int family, int type, int sock_mapfd, if (err) goto close; - pairs_redir_to_connected(c0, p0, c1, p1, sock_mapfd, verd_mapfd, mode); + pairs_redir_to_connected(c0, p0, c1, p1, sock_mapfd, -1, verd_mapfd, mode); xclose(c1); xclose(p1); @@ -1770,8 +1777,10 @@ static void inet_unix_skb_redir_to_connected(struct test_sockmap_listen *skel, xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_VERDICT); } -static void unix_inet_redir_to_connected(int family, int type, int sock_mapfd, - int verd_mapfd, enum redir_mode mode) +static void unix_inet_redir_to_connected(int family, int type, + int sock_mapfd, int nop_mapfd, + int verd_mapfd, + enum redir_mode mode) { int c0, c1, p0, p1; int sfd[2]; @@ -1785,7 +1794,8 @@ static void unix_inet_redir_to_connected(int family, int type, int sock_mapfd, goto close_cli0; c1 = sfd[0], p1 = sfd[1]; - pairs_redir_to_connected(c0, p0, c1, p1, sock_mapfd, verd_mapfd, mode); + pairs_redir_to_connected(c0, p0, c1, p1, + sock_mapfd, nop_mapfd, verd_mapfd, mode); xclose(c1); xclose(p1); @@ -1799,6 +1809,7 @@ static void unix_inet_skb_redir_to_connected(struct test_sockmap_listen *skel, struct bpf_map *inner_map, int family) { int verdict = bpf_program__fd(skel->progs.prog_skb_verdict); + int nop_map = bpf_map__fd(skel->maps.nop_map); int verdict_map = bpf_map__fd(skel->maps.verdict_map); int sock_map = bpf_map__fd(inner_map); int err; @@ -1808,14 +1819,32 @@ static void unix_inet_skb_redir_to_connected(struct test_sockmap_listen *skel, return; skel->bss->test_ingress = false; - unix_inet_redir_to_connected(family, SOCK_DGRAM, sock_map, verdict_map, + unix_inet_redir_to_connected(family, SOCK_DGRAM, + sock_map, -1, verdict_map, REDIR_EGRESS); - unix_inet_redir_to_connected(family, SOCK_STREAM, sock_map, verdict_map, + unix_inet_redir_to_connected(family, SOCK_DGRAM, + sock_map, -1, verdict_map, + REDIR_EGRESS); + + unix_inet_redir_to_connected(family, SOCK_DGRAM, + sock_map, nop_map, verdict_map, + REDIR_EGRESS); + unix_inet_redir_to_connected(family, SOCK_STREAM, + sock_map, nop_map, verdict_map, REDIR_EGRESS); skel->bss->test_ingress = true; - unix_inet_redir_to_connected(family, SOCK_DGRAM, sock_map, verdict_map, + unix_inet_redir_to_connected(family, SOCK_DGRAM, + sock_map, -1, verdict_map, + REDIR_INGRESS); + unix_inet_redir_to_connected(family, SOCK_STREAM, + sock_map, -1, verdict_map, + REDIR_INGRESS); + + unix_inet_redir_to_connected(family, SOCK_DGRAM, + sock_map, nop_map, verdict_map, REDIR_INGRESS); - unix_inet_redir_to_connected(family, SOCK_STREAM, sock_map, verdict_map, + unix_inet_redir_to_connected(family, SOCK_STREAM, + sock_map, nop_map, verdict_map, REDIR_INGRESS); xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_VERDICT); diff --git a/tools/testing/selftests/bpf/progs/test_sockmap_listen.c b/tools/testing/selftests/bpf/progs/test_sockmap_listen.c index 464d35bd57c7..b7250eb9c30c 100644 --- a/tools/testing/selftests/bpf/progs/test_sockmap_listen.c +++ b/tools/testing/selftests/bpf/progs/test_sockmap_listen.c @@ -14,6 +14,13 @@ struct { __type(value, __u64); } sock_map SEC(".maps"); +struct { + __uint(type, BPF_MAP_TYPE_SOCKMAP); + __uint(max_entries, 2); + __type(key, __u32); + __type(value, __u64); +} nop_map SEC(".maps"); + struct { __uint(type, BPF_MAP_TYPE_SOCKHASH); __uint(max_entries, 2); -- cgit v1.2.3 From ee1eb9de81dbdbf078df659062e4df256a009627 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 29 Nov 2023 11:36:19 -0800 Subject: tools: ynl: fix build of the page-pool sample MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The name of the "destroyed" field in the reply was not changed in the sample after we started calling it "detach_time". page-pool.c: In function ‘main’: page-pool.c:84:33: error: ‘struct ’ has no member named ‘destroyed’ 84 | if (pp->_present.destroyed) | ^ Fixes: 637567e4a3ef ("tools: ynl: add sample for getting page-pool information") Link: https://lore.kernel.org/r/20231129193622.2912353-2-kuba@kernel.org Signed-off-by: Jakub Kicinski --- tools/net/ynl/samples/page-pool.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/net/ynl/samples/page-pool.c b/tools/net/ynl/samples/page-pool.c index 18d359713469..098b5190d0e5 100644 --- a/tools/net/ynl/samples/page-pool.c +++ b/tools/net/ynl/samples/page-pool.c @@ -81,7 +81,7 @@ int main(int argc, char **argv) struct stat *s = find_ifc(&a, pp->ifindex); count(s, 1, pp); - if (pp->_present.destroyed) + if (pp->_present.detach_time) count(s, 0, pp); } netdev_page_pool_get_list_free(pools); -- cgit v1.2.3 From 929003723f6d1998155bf0472f97d6c75c3db93f Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 29 Nov 2023 11:36:20 -0800 Subject: tools: ynl: make sure we use local headers for page-pool MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Building samples generates the following warning: In file included from page-pool.c:11: generated/netdev-user.h:21:45: warning: ‘enum netdev_xdp_rx_metadata’ declared inside parameter list will not be visible outside of this definition or declaration 21 | const char *netdev_xdp_rx_metadata_str(enum netdev_xdp_rx_metadata value); | ^~~~~~~~~~~~~~~~~~~~~~ Our magic way of including uAPI headers assumes the sample name matches the family name. We need to copy the flags over. Fixes: 637567e4a3ef ("tools: ynl: add sample for getting page-pool information") Link: https://lore.kernel.org/r/20231129193622.2912353-3-kuba@kernel.org Signed-off-by: Jakub Kicinski --- tools/net/ynl/samples/Makefile | 2 ++ 1 file changed, 2 insertions(+) (limited to 'tools') diff --git a/tools/net/ynl/samples/Makefile b/tools/net/ynl/samples/Makefile index 1afefc266b7a..28bdb1557a54 100644 --- a/tools/net/ynl/samples/Makefile +++ b/tools/net/ynl/samples/Makefile @@ -18,6 +18,8 @@ include $(wildcard *.d) all: $(BINS) +CFLAGS_page-pool=$(CFLAGS_netdev) + $(BINS): ../lib/ynl.a ../generated/protos.a $(SRCS) @echo -e '\tCC sample $@' @$(COMPILE.c) $(CFLAGS_$@) $@.c -o $@.o -- cgit v1.2.3 From 9cf9b57082411ad42d6d12c7b857e60add673877 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 29 Nov 2023 11:36:21 -0800 Subject: tools: ynl: order building samples after generated code Parallel builds of ynl: make -C tools/net/ynl/ -j 4 don't work correctly right now. samples get handled before generated, so build of samples does not notice that protos.a has changed. Order samples to be last. Link: https://lore.kernel.org/r/20231129193622.2912353-4-kuba@kernel.org Signed-off-by: Jakub Kicinski --- tools/net/ynl/Makefile | 2 ++ 1 file changed, 2 insertions(+) (limited to 'tools') diff --git a/tools/net/ynl/Makefile b/tools/net/ynl/Makefile index d664b36deb5b..da1aa10bbcc3 100644 --- a/tools/net/ynl/Makefile +++ b/tools/net/ynl/Makefile @@ -4,6 +4,8 @@ SUBDIRS = lib generated samples all: $(SUBDIRS) +samples: | lib generated + $(SUBDIRS): @if [ -f "$@/Makefile" ] ; then \ $(MAKE) -C $@ ; \ -- cgit v1.2.3 From a115b9279f4897b8afdfbc30035b1382c047fc26 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 29 Nov 2023 11:36:22 -0800 Subject: tools: ynl: don't skip regeneration from make targets Commit 2b7ac0c87d98 ("tools: ynl-gen: don't touch the output file if content is the same") is working too well. It was added so that ynl-regen -f doesn't make us rebuild half of the kernel, if there are no actual changes in any generated code. When ynl-gen-c is called by make, however, we're better off trusting make's tracking and overwrite the file. Otherwise if output is identical we won't update file timestamps and make will retry code gen on every invocation. Link: https://lore.kernel.org/r/20231129193622.2912353-5-kuba@kernel.org Signed-off-by: Jakub Kicinski --- tools/net/ynl/ynl-gen-c.py | 12 ++++++++---- tools/net/ynl/ynl-regen.sh | 4 ++-- 2 files changed, 10 insertions(+), 6 deletions(-) (limited to 'tools') diff --git a/tools/net/ynl/ynl-gen-c.py b/tools/net/ynl/ynl-gen-c.py index cbbda276f6d1..ba1c3611c6fa 100755 --- a/tools/net/ynl/ynl-gen-c.py +++ b/tools/net/ynl/ynl-gen-c.py @@ -1164,8 +1164,9 @@ class RenderInfo: class CodeWriter: - def __init__(self, nlib, out_file=None): + def __init__(self, nlib, out_file=None, overwrite=True): self.nlib = nlib + self._overwrite = overwrite self._nl = False self._block_end = False @@ -1186,8 +1187,9 @@ class CodeWriter: return # Avoid modifying the file if contents didn't change self._out.flush() - if os.path.isfile(self._out_file) and filecmp.cmp(self._out.name, self._out_file, shallow=False): - return + if not self._overwrite and os.path.isfile(self._out_file): + if filecmp.cmp(self._out.name, self._out_file, shallow=False): + return with open(self._out_file, 'w+') as out_file: self._out.seek(0) shutil.copyfileobj(self._out, out_file) @@ -2516,6 +2518,8 @@ def main(): parser.add_argument('--header', dest='header', action='store_true', default=None) parser.add_argument('--source', dest='header', action='store_false') parser.add_argument('--user-header', nargs='+', default=[]) + parser.add_argument('--cmp-out', action='store_true', default=None, + help='Do not overwrite the output file if the new output is identical to the old') parser.add_argument('--exclude-op', action='append', default=[]) parser.add_argument('-o', dest='out_file', type=str, default=None) args = parser.parse_args() @@ -2543,7 +2547,7 @@ def main(): print(f'Message enum-model {parsed.msg_id_model} not supported for {args.mode} generation') os.sys.exit(1) - cw = CodeWriter(BaseNlLib(), args.out_file) + cw = CodeWriter(BaseNlLib(), args.out_file, overwrite=(not args.cmp_out)) _, spec_kernel = find_kernel_root(args.spec) if args.mode == 'uapi' or args.header: diff --git a/tools/net/ynl/ynl-regen.sh b/tools/net/ynl/ynl-regen.sh index bdba24066cf1..a37304dcc88e 100755 --- a/tools/net/ynl/ynl-regen.sh +++ b/tools/net/ynl/ynl-regen.sh @@ -30,8 +30,8 @@ for f in $files; do fi echo -e "\tGEN ${params[2]}\t$f" - $TOOL --mode ${params[2]} --${params[3]} --spec $KDIR/${params[0]} \ - $args -o $f + $TOOL --cmp-out --mode ${params[2]} --${params[3]} \ + --spec $KDIR/${params[0]} $args -o $f done popd >>/dev/null -- cgit v1.2.3 From 06848c0f341ee3f9226ed01e519c72e4d2b6f001 Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Tue, 28 Nov 2023 15:18:46 -0800 Subject: selftests: mptcp: add evts_get_info helper This patch adds a new helper get_info_value(), using 'sed' command to parse the value of the given item name in the line with the given keyword, to make chk_mptcp_info() and pedit_action_pkts() more readable. Also add another helper evts_get_info() to use get_info_value() to parse the output of 'pm_nl_ctl events' command, to make all the userspace pm selftests more readable, both in mptcp_join.sh and userspace_pm.sh. Reviewed-by: Matthieu Baerts Signed-off-by: Geliang Tang Signed-off-by: Mat Martineau Link: https://lore.kernel.org/r/20231128-send-net-next-2023107-v4-2-8d6b94150f6b@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/mptcp/mptcp_join.sh | 19 +++-- tools/testing/selftests/net/mptcp/mptcp_lib.sh | 10 +++ tools/testing/selftests/net/mptcp/userspace_pm.sh | 86 ++++++++++------------- 3 files changed, 57 insertions(+), 58 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index 3c94f2f194d6..d24b0e5e73ef 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -1869,10 +1869,8 @@ chk_mptcp_info() print_check "mptcp_info ${info1:0:8}=$exp1:$exp2" - cnt1=$(ss -N $ns1 -inmHM | grep "$info1:" | - sed -n 's/.*\('"$info1"':\)\([[:digit:]]*\).*$/\2/p;q') - cnt2=$(ss -N $ns2 -inmHM | grep "$info2:" | - sed -n 's/.*\('"$info2"':\)\([[:digit:]]*\).*$/\2/p;q') + cnt1=$(ss -N $ns1 -inmHM | mptcp_lib_get_info_value "$info1" "$info1") + cnt2=$(ss -N $ns2 -inmHM | mptcp_lib_get_info_value "$info2" "$info2") # 'ss' only display active connections and counters that are not 0. [ -z "$cnt1" ] && cnt1=0 [ -z "$cnt2" ] && cnt2=0 @@ -2848,13 +2846,13 @@ verify_listener_events() return fi - type=$(grep "type:$e_type," $evt | sed -n 's/.*\(type:\)\([[:digit:]]*\).*$/\2/p;q') - family=$(grep "type:$e_type," $evt | sed -n 's/.*\(family:\)\([[:digit:]]*\).*$/\2/p;q') - sport=$(grep "type:$e_type," $evt | sed -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q') + type=$(mptcp_lib_evts_get_info type "$evt" "$e_type") + family=$(mptcp_lib_evts_get_info family "$evt" "$e_type") + sport=$(mptcp_lib_evts_get_info sport "$evt" "$e_type") if [ $family ] && [ $family = $AF_INET6 ]; then - saddr=$(grep "type:$e_type," $evt | sed -n 's/.*\(saddr6:\)\([0-9a-f:.]*\).*$/\2/p;q') + saddr=$(mptcp_lib_evts_get_info saddr6 "$evt" "$e_type") else - saddr=$(grep "type:$e_type," $evt | sed -n 's/.*\(saddr4:\)\([0-9.]*\).*$/\2/p;q') + saddr=$(mptcp_lib_evts_get_info saddr4 "$evt" "$e_type") fi if [ $type ] && [ $type = $e_type ] && @@ -3249,8 +3247,7 @@ fastclose_tests() pedit_action_pkts() { tc -n $ns2 -j -s action show action pedit index 100 | \ - grep "packets" | \ - sed 's/.*"packets":\([0-9]\+\),.*/\1/' + mptcp_lib_get_info_value \"packets\" packets } fail_tests() diff --git a/tools/testing/selftests/net/mptcp/mptcp_lib.sh b/tools/testing/selftests/net/mptcp/mptcp_lib.sh index 92a5befe8039..56cbd57abbae 100644 --- a/tools/testing/selftests/net/mptcp/mptcp_lib.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_lib.sh @@ -207,3 +207,13 @@ mptcp_lib_result_print_all_tap() { printf "%s\n" "${subtest}" done } + +# get the value of keyword $1 in the line marked by keyword $2 +mptcp_lib_get_info_value() { + grep "${2}" | sed -n 's/.*\('"${1}"':\)\([0-9a-f:.]*\).*$/\2/p;q' +} + +# $1: info name ; $2: evts_ns ; $3: event type +mptcp_lib_evts_get_info() { + mptcp_lib_get_info_value "${1}" "^type:${3:-1}," < "${2}" +} diff --git a/tools/testing/selftests/net/mptcp/userspace_pm.sh b/tools/testing/selftests/net/mptcp/userspace_pm.sh index b25a3e33eb25..2413059a42e5 100755 --- a/tools/testing/selftests/net/mptcp/userspace_pm.sh +++ b/tools/testing/selftests/net/mptcp/userspace_pm.sh @@ -247,14 +247,11 @@ make_connection() local server_token local server_serverside - client_token=$(sed --unbuffered -n 's/.*\(token:\)\([[:digit:]]*\).*$/\2/p;q' "$client_evts") - client_port=$(sed --unbuffered -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q' "$client_evts") - client_serverside=$(sed --unbuffered -n 's/.*\(server_side:\)\([[:digit:]]*\).*$/\2/p;q'\ - "$client_evts") - server_token=$(grep "type:1," "$server_evts" | - sed --unbuffered -n 's/.*\(token:\)\([[:digit:]]*\).*$/\2/p;q') - server_serverside=$(grep "type:1," "$server_evts" | - sed --unbuffered -n 's/.*\(server_side:\)\([[:digit:]]*\).*$/\2/p;q') + client_token=$(mptcp_lib_evts_get_info token "$client_evts") + client_port=$(mptcp_lib_evts_get_info sport "$client_evts") + client_serverside=$(mptcp_lib_evts_get_info server_side "$client_evts") + server_token=$(mptcp_lib_evts_get_info token "$server_evts") + server_serverside=$(mptcp_lib_evts_get_info server_side "$server_evts") print_test "Established IP${is_v6} MPTCP Connection ns2 => ns1" if [ "$client_token" != "" ] && [ "$server_token" != "" ] && [ "$client_serverside" = 0 ] && @@ -340,16 +337,16 @@ verify_announce_event() local dport local id - type=$(sed --unbuffered -n 's/.*\(type:\)\([[:digit:]]*\).*$/\2/p;q' "$evt") - token=$(sed --unbuffered -n 's/.*\(token:\)\([[:digit:]]*\).*$/\2/p;q' "$evt") + type=$(mptcp_lib_evts_get_info type "$evt" $e_type) + token=$(mptcp_lib_evts_get_info token "$evt" $e_type) if [ "$e_af" = "v6" ] then - addr=$(sed --unbuffered -n 's/.*\(daddr6:\)\([0-9a-f:.]*\).*$/\2/p;q' "$evt") + addr=$(mptcp_lib_evts_get_info daddr6 "$evt" $e_type) else - addr=$(sed --unbuffered -n 's/.*\(daddr4:\)\([0-9.]*\).*$/\2/p;q' "$evt") + addr=$(mptcp_lib_evts_get_info daddr4 "$evt" $e_type) fi - dport=$(sed --unbuffered -n 's/.*\(dport:\)\([[:digit:]]*\).*$/\2/p;q' "$evt") - id=$(sed --unbuffered -n 's/.*\(rem_id:\)\([[:digit:]]*\).*$/\2/p;q' "$evt") + dport=$(mptcp_lib_evts_get_info dport "$evt" $e_type) + id=$(mptcp_lib_evts_get_info rem_id "$evt" $e_type) check_expected "type" "token" "addr" "dport" "id" } @@ -367,7 +364,7 @@ test_announce() $client_addr_id dev ns2eth1 > /dev/null 2>&1 local type - type=$(sed --unbuffered -n 's/.*\(type:\)\([[:digit:]]*\).*$/\2/p;q' "$server_evts") + type=$(mptcp_lib_evts_get_info type "$server_evts") print_test "ADD_ADDR 10.0.2.2 (ns2) => ns1, invalid token" if [ "$type" = "" ] then @@ -446,9 +443,9 @@ verify_remove_event() local token local id - type=$(sed --unbuffered -n 's/.*\(type:\)\([[:digit:]]*\).*$/\2/p;q' "$evt") - token=$(sed --unbuffered -n 's/.*\(token:\)\([[:digit:]]*\).*$/\2/p;q' "$evt") - id=$(sed --unbuffered -n 's/.*\(rem_id:\)\([[:digit:]]*\).*$/\2/p;q' "$evt") + type=$(mptcp_lib_evts_get_info type "$evt" $e_type) + token=$(mptcp_lib_evts_get_info token "$evt" $e_type) + id=$(mptcp_lib_evts_get_info rem_id "$evt" $e_type) check_expected "type" "token" "id" } @@ -466,7 +463,7 @@ test_remove() $client_addr_id > /dev/null 2>&1 print_test "RM_ADDR id:${client_addr_id} ns2 => ns1, invalid token" local type - type=$(sed --unbuffered -n 's/.*\(type:\)\([[:digit:]]*\).*$/\2/p;q' "$server_evts") + type=$(mptcp_lib_evts_get_info type "$server_evts") if [ "$type" = "" ] then test_pass @@ -479,7 +476,7 @@ test_remove() ip netns exec "$ns2" ./pm_nl_ctl rem token "$client4_token" id\ $invalid_id > /dev/null 2>&1 print_test "RM_ADDR id:${invalid_id} ns2 => ns1, invalid id" - type=$(sed --unbuffered -n 's/.*\(type:\)\([[:digit:]]*\).*$/\2/p;q' "$server_evts") + type=$(mptcp_lib_evts_get_info type "$server_evts") if [ "$type" = "" ] then test_pass @@ -583,19 +580,19 @@ verify_subflow_events() fi fi - type=$(sed --unbuffered -n 's/.*\(type:\)\([[:digit:]]*\).*$/\2/p;q' "$evt") - token=$(sed --unbuffered -n 's/.*\(token:\)\([[:digit:]]*\).*$/\2/p;q' "$evt") - family=$(sed --unbuffered -n 's/.*\(family:\)\([[:digit:]]*\).*$/\2/p;q' "$evt") - dport=$(sed --unbuffered -n 's/.*\(dport:\)\([[:digit:]]*\).*$/\2/p;q' "$evt") - locid=$(sed --unbuffered -n 's/.*\(loc_id:\)\([[:digit:]]*\).*$/\2/p;q' "$evt") - remid=$(sed --unbuffered -n 's/.*\(rem_id:\)\([[:digit:]]*\).*$/\2/p;q' "$evt") + type=$(mptcp_lib_evts_get_info type "$evt" $e_type) + token=$(mptcp_lib_evts_get_info token "$evt" $e_type) + family=$(mptcp_lib_evts_get_info family "$evt" $e_type) + dport=$(mptcp_lib_evts_get_info dport "$evt" $e_type) + locid=$(mptcp_lib_evts_get_info loc_id "$evt" $e_type) + remid=$(mptcp_lib_evts_get_info rem_id "$evt" $e_type) if [ "$family" = "$AF_INET6" ] then - saddr=$(sed --unbuffered -n 's/.*\(saddr6:\)\([0-9a-f:.]*\).*$/\2/p;q' "$evt") - daddr=$(sed --unbuffered -n 's/.*\(daddr6:\)\([0-9a-f:.]*\).*$/\2/p;q' "$evt") + saddr=$(mptcp_lib_evts_get_info saddr6 "$evt" $e_type) + daddr=$(mptcp_lib_evts_get_info daddr6 "$evt" $e_type) else - saddr=$(sed --unbuffered -n 's/.*\(saddr4:\)\([0-9.]*\).*$/\2/p;q' "$evt") - daddr=$(sed --unbuffered -n 's/.*\(daddr4:\)\([0-9.]*\).*$/\2/p;q' "$evt") + saddr=$(mptcp_lib_evts_get_info saddr4 "$evt" $e_type) + daddr=$(mptcp_lib_evts_get_info daddr4 "$evt" $e_type) fi check_expected "type" "token" "daddr" "dport" "family" "saddr" "locid" "remid" @@ -630,7 +627,7 @@ test_subflows() kill_wait $listener_pid local sport - sport=$(sed --unbuffered -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q' "$server_evts") + sport=$(mptcp_lib_evts_get_info sport "$server_evts" $SUB_ESTABLISHED) # DESTROY_SUBFLOW from server to client machine :>"$server_evts" @@ -668,7 +665,7 @@ test_subflows() # Delete the listener from the client ns, if one was created kill_wait $listener_pid - sport=$(sed --unbuffered -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q' "$server_evts") + sport=$(mptcp_lib_evts_get_info sport "$server_evts" $SUB_ESTABLISHED) # DESTROY_SUBFLOW6 from server to client machine :>"$server_evts" @@ -707,7 +704,7 @@ test_subflows() # Delete the listener from the client ns, if one was created kill_wait $listener_pid - sport=$(sed --unbuffered -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q' "$server_evts") + sport=$(mptcp_lib_evts_get_info sport "$server_evts" $SUB_ESTABLISHED) # DESTROY_SUBFLOW from server to client machine :>"$server_evts" @@ -745,7 +742,7 @@ test_subflows() # Delete the listener from the server ns, if one was created kill_wait $listener_pid - sport=$(sed --unbuffered -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q' "$client_evts") + sport=$(mptcp_lib_evts_get_info sport "$client_evts" $SUB_ESTABLISHED) # DESTROY_SUBFLOW from client to server machine :>"$client_evts" @@ -784,7 +781,7 @@ test_subflows() # Delete the listener from the server ns, if one was created kill_wait $listener_pid - sport=$(sed --unbuffered -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q' "$client_evts") + sport=$(mptcp_lib_evts_get_info sport "$client_evts" $SUB_ESTABLISHED) # DESTROY_SUBFLOW6 from client to server machine :>"$client_evts" @@ -821,7 +818,7 @@ test_subflows() # Delete the listener from the server ns, if one was created kill_wait $listener_pid - sport=$(sed --unbuffered -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q' "$client_evts") + sport=$(mptcp_lib_evts_get_info sport "$client_evts" $SUB_ESTABLISHED) # DESTROY_SUBFLOW from client to server machine :>"$client_evts" @@ -867,7 +864,7 @@ test_subflows_v4_v6_mix() # Delete the listener from the server ns, if one was created kill_wait $listener_pid - sport=$(sed --unbuffered -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q' "$client_evts") + sport=$(mptcp_lib_evts_get_info sport "$client_evts" $SUB_ESTABLISHED) # DESTROY_SUBFLOW from client to server machine :>"$client_evts" @@ -933,18 +930,13 @@ verify_listener_events() print_test "CLOSE_LISTENER $e_saddr:$e_sport" fi - type=$(grep "type:$e_type," $evt | - sed --unbuffered -n 's/.*\(type:\)\([[:digit:]]*\).*$/\2/p;q') - family=$(grep "type:$e_type," $evt | - sed --unbuffered -n 's/.*\(family:\)\([[:digit:]]*\).*$/\2/p;q') - sport=$(grep "type:$e_type," $evt | - sed --unbuffered -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q') + type=$(mptcp_lib_evts_get_info type $evt $e_type) + family=$(mptcp_lib_evts_get_info family $evt $e_type) + sport=$(mptcp_lib_evts_get_info sport $evt $e_type) if [ $family ] && [ $family = $AF_INET6 ]; then - saddr=$(grep "type:$e_type," $evt | - sed --unbuffered -n 's/.*\(saddr6:\)\([0-9a-f:.]*\).*$/\2/p;q') + saddr=$(mptcp_lib_evts_get_info saddr6 $evt $e_type) else - saddr=$(grep "type:$e_type," $evt | - sed --unbuffered -n 's/.*\(saddr4:\)\([0-9.]*\).*$/\2/p;q') + saddr=$(mptcp_lib_evts_get_info saddr4 $evt $e_type) fi check_expected "type" "family" "saddr" "sport" -- cgit v1.2.3 From 80775412882e273b8ef62124fae861cde8e6fb3d Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Tue, 28 Nov 2023 15:18:47 -0800 Subject: selftests: mptcp: add chk_subflows_total helper This patch adds a new helper chk_subflows_total(), in it use the newly added counter mptcpi_subflows_total to get the "correct" amount of subflows, including the initial one. To be compatible with old 'ss' or kernel versions not supporting this counter, get the total subflows by listing TCP connections that are MPTCP subflows: ss -ti state state established state syn-sent state syn-recv | grep -c tcp-ulp-mptcp. Reviewed-by: Matthieu Baerts Signed-off-by: Geliang Tang Signed-off-by: Mat Martineau Link: https://lore.kernel.org/r/20231128-send-net-next-2023107-v4-3-8d6b94150f6b@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/mptcp/mptcp_join.sh | 42 ++++++++++++++++++++++++- 1 file changed, 41 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index d24b0e5e73ef..566fa0f6506f 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -1867,7 +1867,7 @@ chk_mptcp_info() local cnt2 local dump_stats - print_check "mptcp_info ${info1:0:8}=$exp1:$exp2" + print_check "mptcp_info ${info1:0:15}=$exp1:$exp2" cnt1=$(ss -N $ns1 -inmHM | mptcp_lib_get_info_value "$info1" "$info1") cnt2=$(ss -N $ns2 -inmHM | mptcp_lib_get_info_value "$info2" "$info2") @@ -1888,6 +1888,42 @@ chk_mptcp_info() fi } +# $1: subflows in ns1 ; $2: subflows in ns2 +# number of all subflows, including the initial subflow. +chk_subflows_total() +{ + local cnt1 + local cnt2 + local info="subflows_total" + local dump_stats + + # if subflows_total counter is supported, use it: + if [ -n "$(ss -N $ns1 -inmHM | mptcp_lib_get_info_value $info $info)" ]; then + chk_mptcp_info $info $1 $info $2 + return + fi + + print_check "$info $1:$2" + + # if not, count the TCP connections that are in fact MPTCP subflows + cnt1=$(ss -N $ns1 -ti state established state syn-sent state syn-recv | + grep -c tcp-ulp-mptcp) + cnt2=$(ss -N $ns2 -ti state established state syn-sent state syn-recv | + grep -c tcp-ulp-mptcp) + + if [ "$1" != "$cnt1" ] || [ "$2" != "$cnt2" ]; then + fail_test "got subflows $cnt1:$cnt2 expected $1:$2" + dump_stats=1 + else + print_ok + fi + + if [ "$dump_stats" = 1 ]; then + ss -N $ns1 -ti + ss -N $ns2 -ti + fi +} + chk_link_usage() { local ns=$1 @@ -3431,10 +3467,12 @@ userspace_tests() chk_join_nr 1 1 1 chk_add_nr 1 1 chk_mptcp_info subflows 1 subflows 1 + chk_subflows_total 2 2 chk_mptcp_info add_addr_signal 1 add_addr_accepted 1 userspace_pm_rm_sf_addr_ns1 10.0.2.1 10 chk_rm_nr 1 1 invert chk_mptcp_info subflows 0 subflows 0 + chk_subflows_total 1 1 kill_events_pids wait $tests_pid fi @@ -3451,9 +3489,11 @@ userspace_tests() userspace_pm_add_sf 10.0.3.2 20 chk_join_nr 1 1 1 chk_mptcp_info subflows 1 subflows 1 + chk_subflows_total 2 2 userspace_pm_rm_sf_addr_ns2 10.0.3.2 20 chk_rm_nr 1 1 chk_mptcp_info subflows 0 subflows 0 + chk_subflows_total 1 1 kill_events_pids wait $tests_pid fi -- cgit v1.2.3 From 757c828ce94905a2975873d5e90a376c701b2b90 Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Tue, 28 Nov 2023 15:18:48 -0800 Subject: selftests: mptcp: update userspace pm test helpers This patch adds a new argument namespace to userspace_pm_add_addr() and userspace_pm_add_sf() to make these two helper more versatile. Add two more versatile helpers for userspace pm remove subflow or address: userspace_pm_rm_addr() and userspace_pm_rm_sf(). The original test helpers userspace_pm_rm_sf_addr_ns1() and userspace_pm_rm_sf_addr_ns2() can be replaced by these new helpers. Reviewed-by: Matthieu Baerts Signed-off-by: Geliang Tang Signed-off-by: Mat Martineau Link: https://lore.kernel.org/r/20231128-send-net-next-2023107-v4-4-8d6b94150f6b@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/mptcp/mptcp_join.sh | 102 ++++++++++++------------ 1 file changed, 50 insertions(+), 52 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index 566fa0f6506f..6d84c7f5296a 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -2848,6 +2848,7 @@ backup_tests() fi } +SUB_ESTABLISHED=10 # MPTCP_EVENT_SUB_ESTABLISHED LISTENER_CREATED=15 #MPTCP_EVENT_LISTENER_CREATED LISTENER_CLOSED=16 #MPTCP_EVENT_LISTENER_CLOSED @@ -3308,75 +3309,70 @@ fail_tests() fi } +# $1: ns ; $2: addr ; $3: id userspace_pm_add_addr() { - local addr=$1 - local id=$2 + local evts=$evts_ns1 local tk - tk=$(grep "type:1," "$evts_ns1" | - sed -n 's/.*\(token:\)\([[:digit:]]*\).*$/\2/p;q') - ip netns exec $ns1 ./pm_nl_ctl ann $addr token $tk id $id + [ "$1" == "$ns2" ] && evts=$evts_ns2 + tk=$(mptcp_lib_evts_get_info token "$evts") + + ip netns exec $1 ./pm_nl_ctl ann $2 token $tk id $3 sleep 1 } -userspace_pm_rm_sf_addr_ns1() +# $1: ns ; $2: id +userspace_pm_rm_addr() { - local addr=$1 - local id=$2 - local tk sp da dp - local cnt_addr cnt_sf - - tk=$(grep "type:1," "$evts_ns1" | - sed -n 's/.*\(token:\)\([[:digit:]]*\).*$/\2/p;q') - sp=$(grep "type:10" "$evts_ns1" | - sed -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q') - da=$(grep "type:10" "$evts_ns1" | - sed -n 's/.*\(daddr6:\)\([0-9a-f:.]*\).*$/\2/p;q') - dp=$(grep "type:10" "$evts_ns1" | - sed -n 's/.*\(dport:\)\([[:digit:]]*\).*$/\2/p;q') - cnt_addr=$(rm_addr_count ${ns1}) - cnt_sf=$(rm_sf_count ${ns1}) - ip netns exec $ns1 ./pm_nl_ctl rem token $tk id $id - ip netns exec $ns1 ./pm_nl_ctl dsf lip "::ffff:$addr" \ - lport $sp rip $da rport $dp token $tk - wait_rm_addr $ns1 "${cnt_addr}" - wait_rm_sf $ns1 "${cnt_sf}" + local evts=$evts_ns1 + local tk + local cnt + + [ "$1" == "$ns2" ] && evts=$evts_ns2 + tk=$(mptcp_lib_evts_get_info token "$evts") + + cnt=$(rm_addr_count ${1}) + ip netns exec $1 ./pm_nl_ctl rem token $tk id $2 + wait_rm_addr $1 "${cnt}" } +# $1: ns ; $2: addr ; $3: id userspace_pm_add_sf() { - local addr=$1 - local id=$2 + local evts=$evts_ns1 local tk da dp - tk=$(sed -n 's/.*\(token:\)\([[:digit:]]*\).*$/\2/p;q' "$evts_ns2") - da=$(sed -n 's/.*\(daddr4:\)\([0-9.]*\).*$/\2/p;q' "$evts_ns2") - dp=$(sed -n 's/.*\(dport:\)\([[:digit:]]*\).*$/\2/p;q' "$evts_ns2") - ip netns exec $ns2 ./pm_nl_ctl csf lip $addr lid $id \ + [ "$1" == "$ns2" ] && evts=$evts_ns2 + tk=$(mptcp_lib_evts_get_info token "$evts") + da=$(mptcp_lib_evts_get_info daddr4 "$evts") + dp=$(mptcp_lib_evts_get_info dport "$evts") + + ip netns exec $1 ./pm_nl_ctl csf lip $2 lid $3 \ rip $da rport $dp token $tk sleep 1 } -userspace_pm_rm_sf_addr_ns2() +# $1: ns ; $2: addr $3: event type +userspace_pm_rm_sf() { - local addr=$1 - local id=$2 + local evts=$evts_ns1 + local t=${3:-1} + local ip=4 local tk da dp sp - local cnt_addr cnt_sf - - tk=$(sed -n 's/.*\(token:\)\([[:digit:]]*\).*$/\2/p;q' "$evts_ns2") - da=$(sed -n 's/.*\(daddr4:\)\([0-9.]*\).*$/\2/p;q' "$evts_ns2") - dp=$(sed -n 's/.*\(dport:\)\([[:digit:]]*\).*$/\2/p;q' "$evts_ns2") - sp=$(grep "type:10" "$evts_ns2" | - sed -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q') - cnt_addr=$(rm_addr_count ${ns2}) - cnt_sf=$(rm_sf_count ${ns2}) - ip netns exec $ns2 ./pm_nl_ctl rem token $tk id $id - ip netns exec $ns2 ./pm_nl_ctl dsf lip $addr lport $sp \ + local cnt + + [ "$1" == "$ns2" ] && evts=$evts_ns2 + if is_v6 $2; then ip=6; fi + tk=$(mptcp_lib_evts_get_info token "$evts") + da=$(mptcp_lib_evts_get_info "daddr$ip" "$evts" $t) + dp=$(mptcp_lib_evts_get_info dport "$evts" $t) + sp=$(mptcp_lib_evts_get_info sport "$evts" $t) + + cnt=$(rm_sf_count ${1}) + ip netns exec $1 ./pm_nl_ctl dsf lip $2 lport $sp \ rip $da rport $dp token $tk - wait_rm_addr $ns2 "${cnt_addr}" - wait_rm_sf $ns2 "${cnt_sf}" + wait_rm_sf $1 "${cnt}" } userspace_tests() @@ -3463,13 +3459,14 @@ userspace_tests() run_tests $ns1 $ns2 10.0.1.1 & local tests_pid=$! wait_mpj $ns1 - userspace_pm_add_addr 10.0.2.1 10 + userspace_pm_add_addr $ns1 10.0.2.1 10 chk_join_nr 1 1 1 chk_add_nr 1 1 chk_mptcp_info subflows 1 subflows 1 chk_subflows_total 2 2 chk_mptcp_info add_addr_signal 1 add_addr_accepted 1 - userspace_pm_rm_sf_addr_ns1 10.0.2.1 10 + userspace_pm_rm_addr $ns1 10 + userspace_pm_rm_sf $ns1 "::ffff:10.0.2.1" $SUB_ESTABLISHED chk_rm_nr 1 1 invert chk_mptcp_info subflows 0 subflows 0 chk_subflows_total 1 1 @@ -3486,11 +3483,12 @@ userspace_tests() run_tests $ns1 $ns2 10.0.1.1 & local tests_pid=$! wait_mpj $ns2 - userspace_pm_add_sf 10.0.3.2 20 + userspace_pm_add_sf $ns2 10.0.3.2 20 chk_join_nr 1 1 1 chk_mptcp_info subflows 1 subflows 1 chk_subflows_total 2 2 - userspace_pm_rm_sf_addr_ns2 10.0.3.2 20 + userspace_pm_rm_addr $ns2 20 + userspace_pm_rm_sf $ns2 10.0.3.2 $SUB_ESTABLISHED chk_rm_nr 1 1 chk_mptcp_info subflows 0 subflows 0 chk_subflows_total 1 1 -- cgit v1.2.3 From b2e2248f365a7ef0687fe048c335fe1a32f98b36 Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Tue, 28 Nov 2023 15:18:49 -0800 Subject: selftests: mptcp: userspace pm create id 0 subflow This patch adds a selftest to create id 0 subflow. Pass id 0 to the helper userspace_pm_add_sf() to create id 0 subflow. chk_mptcp_info shows one subflow but chk_subflows_total shows two subflows in each namespace. Reviewed-by: Matthieu Baerts Signed-off-by: Geliang Tang Signed-off-by: Mat Martineau Link: https://lore.kernel.org/r/20231128-send-net-next-2023107-v4-5-8d6b94150f6b@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/mptcp/mptcp_join.sh | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index 6d84c7f5296a..4357a46ca3f3 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -3495,6 +3495,25 @@ userspace_tests() kill_events_pids wait $tests_pid fi + + # userspace pm create id 0 subflow + if reset_with_events "userspace pm create id 0 subflow" && + continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then + set_userspace_pm $ns2 + pm_nl_set_limits $ns1 0 1 + speed=5 \ + run_tests $ns1 $ns2 10.0.1.1 & + local tests_pid=$! + wait_mpj $ns2 + chk_mptcp_info subflows 0 subflows 0 + chk_subflows_total 1 1 + userspace_pm_add_sf $ns2 10.0.3.2 0 + chk_join_nr 1 1 1 + chk_mptcp_info subflows 1 subflows 1 + chk_subflows_total 2 2 + kill_events_pids + wait $tests_pid + fi } endpoint_tests() -- cgit v1.2.3 From e3b47e460b4bd0c61d0082f1ac02650dcb79e5d1 Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Tue, 28 Nov 2023 15:18:51 -0800 Subject: selftests: mptcp: userspace pm remove initial subflow This patch adds a selftest for userspace PM to remove the initial subflow. Use userspace_pm_add_sf() to add a subflow, and pass initial IP address to userspace_pm_rm_sf() to remove the initial subflow. Reviewed-by: Matthieu Baerts Signed-off-by: Geliang Tang Signed-off-by: Mat Martineau Link: https://lore.kernel.org/r/20231128-send-net-next-2023107-v4-7-8d6b94150f6b@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/mptcp/mptcp_join.sh | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index 4357a46ca3f3..812a5ee54158 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -3514,6 +3514,30 @@ userspace_tests() kill_events_pids wait $tests_pid fi + + # userspace pm remove initial subflow + if reset_with_events "userspace pm remove initial subflow" && + continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then + set_userspace_pm $ns2 + pm_nl_set_limits $ns1 0 1 + speed=5 \ + run_tests $ns1 $ns2 10.0.1.1 & + local tests_pid=$! + wait_mpj $ns2 + userspace_pm_add_sf $ns2 10.0.3.2 20 + chk_join_nr 1 1 1 + chk_mptcp_info subflows 1 subflows 1 + chk_subflows_total 2 2 + userspace_pm_rm_sf $ns2 10.0.1.2 + # we don't look at the counter linked to the RM_ADDR but + # to the one linked to the subflows that have been removed + chk_rm_nr 0 1 + chk_rst_nr 0 0 invert + chk_mptcp_info subflows 1 subflows 1 + chk_subflows_total 1 1 + kill_events_pids + wait $tests_pid + fi } endpoint_tests() -- cgit v1.2.3 From b9fb176081fb216c43d923888f0d53d9e3a5965b Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Tue, 28 Nov 2023 15:18:52 -0800 Subject: selftests: mptcp: userspace pm send RM_ADDR for ID 0 This patch adds a selftest for userspace PM to remove id 0 address. Use userspace_pm_add_addr() helper to add an id 10 address, then use userspace_pm_rm_addr() helper to remove id 0 address. Reviewed-by: Matthieu Baerts Signed-off-by: Geliang Tang Signed-off-by: Mat Martineau Link: https://lore.kernel.org/r/20231128-send-net-next-2023107-v4-8-8d6b94150f6b@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/mptcp/mptcp_join.sh | 26 +++++++++++++++++++++++++ 1 file changed, 26 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index 812a5ee54158..a98a72e55d2c 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -3538,6 +3538,32 @@ userspace_tests() kill_events_pids wait $tests_pid fi + + # userspace pm send RM_ADDR for ID 0 + if reset_with_events "userspace pm send RM_ADDR for ID 0" && + continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then + set_userspace_pm $ns1 + pm_nl_set_limits $ns2 1 1 + speed=5 \ + run_tests $ns1 $ns2 10.0.1.1 & + local tests_pid=$! + wait_mpj $ns1 + userspace_pm_add_addr $ns1 10.0.2.1 10 + chk_join_nr 1 1 1 + chk_add_nr 1 1 + chk_mptcp_info subflows 1 subflows 1 + chk_subflows_total 2 2 + chk_mptcp_info add_addr_signal 1 add_addr_accepted 1 + userspace_pm_rm_addr $ns1 0 + # we don't look at the counter linked to the subflows that + # have been removed but to the one linked to the RM_ADDR + chk_rm_nr 1 0 invert + chk_rst_nr 0 0 invert + chk_mptcp_info subflows 1 subflows 1 + chk_subflows_total 1 1 + kill_events_pids + wait $tests_pid + fi } endpoint_tests() -- cgit v1.2.3 From bdbef0a6ff10603895b0ba39f56bf874cb2b551a Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Tue, 28 Nov 2023 15:18:53 -0800 Subject: selftests: mptcp: add mptcp_lib_kill_wait To avoid duplicated code in different MPTCP selftests, we can add and use helpers defined in mptcp_lib.sh. Export kill_wait() helper in userspace_pm.sh into mptcp_lib.sh and rename it as mptcp_lib_kill_wait(). It can be used to instead of kill_wait() in mptcp_join.sh. Use the new helper in both scripts. Reviewed-by: Matthieu Baerts Signed-off-by: Geliang Tang Signed-off-by: Mat Martineau Link: https://lore.kernel.org/r/20231128-send-net-next-2023107-v4-9-8d6b94150f6b@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/mptcp/mptcp_join.sh | 10 ++------ tools/testing/selftests/net/mptcp/mptcp_lib.sh | 9 +++++++ tools/testing/selftests/net/mptcp/userspace_pm.sh | 31 ++++++++--------------- 3 files changed, 22 insertions(+), 28 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index a98a72e55d2c..e7557f6a0dc1 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -682,16 +682,10 @@ wait_mpj() done } -kill_wait() -{ - kill $1 > /dev/null 2>&1 - wait $1 2>/dev/null -} - kill_events_pids() { - kill_wait $evts_ns1_pid - kill_wait $evts_ns2_pid + mptcp_lib_kill_wait $evts_ns1_pid + mptcp_lib_kill_wait $evts_ns2_pid } kill_tests_wait() diff --git a/tools/testing/selftests/net/mptcp/mptcp_lib.sh b/tools/testing/selftests/net/mptcp/mptcp_lib.sh index 56cbd57abbae..e421b658d748 100644 --- a/tools/testing/selftests/net/mptcp/mptcp_lib.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_lib.sh @@ -217,3 +217,12 @@ mptcp_lib_get_info_value() { mptcp_lib_evts_get_info() { mptcp_lib_get_info_value "${1}" "^type:${3:-1}," < "${2}" } + +# $1: PID +mptcp_lib_kill_wait() { + [ "${1}" -eq 0 ] && return 0 + + kill -SIGUSR1 "${1}" > /dev/null 2>&1 + kill "${1}" > /dev/null 2>&1 + wait "${1}" 2>/dev/null +} diff --git a/tools/testing/selftests/net/mptcp/userspace_pm.sh b/tools/testing/selftests/net/mptcp/userspace_pm.sh index 2413059a42e5..f4e352494f05 100755 --- a/tools/testing/selftests/net/mptcp/userspace_pm.sh +++ b/tools/testing/selftests/net/mptcp/userspace_pm.sh @@ -108,15 +108,6 @@ test_fail() mptcp_lib_result_fail "${test_name}" } -kill_wait() -{ - [ $1 -eq 0 ] && return 0 - - kill -SIGUSR1 $1 > /dev/null 2>&1 - kill $1 > /dev/null 2>&1 - wait $1 2>/dev/null -} - # This function is used in the cleanup trap #shellcheck disable=SC2317 cleanup() @@ -128,7 +119,7 @@ cleanup() for pid in $client4_pid $server4_pid $client6_pid $server6_pid\ $server_evts_pid $client_evts_pid do - kill_wait $pid + mptcp_lib_kill_wait $pid done local netns @@ -210,7 +201,7 @@ make_connection() fi :>"$client_evts" if [ $client_evts_pid -ne 0 ]; then - kill_wait $client_evts_pid + mptcp_lib_kill_wait $client_evts_pid fi ip netns exec "$ns2" ./pm_nl_ctl events >> "$client_evts" 2>&1 & client_evts_pid=$! @@ -219,7 +210,7 @@ make_connection() fi :>"$server_evts" if [ $server_evts_pid -ne 0 ]; then - kill_wait $server_evts_pid + mptcp_lib_kill_wait $server_evts_pid fi ip netns exec "$ns1" ./pm_nl_ctl events >> "$server_evts" 2>&1 & server_evts_pid=$! @@ -624,7 +615,7 @@ test_subflows() "10.0.2.2" "$client4_port" "23" "$client_addr_id" "ns1" "ns2" # Delete the listener from the client ns, if one was created - kill_wait $listener_pid + mptcp_lib_kill_wait $listener_pid local sport sport=$(mptcp_lib_evts_get_info sport "$server_evts" $SUB_ESTABLISHED) @@ -663,7 +654,7 @@ test_subflows() "$client_addr_id" "ns1" "ns2" # Delete the listener from the client ns, if one was created - kill_wait $listener_pid + mptcp_lib_kill_wait $listener_pid sport=$(mptcp_lib_evts_get_info sport "$server_evts" $SUB_ESTABLISHED) @@ -702,7 +693,7 @@ test_subflows() "$client_addr_id" "ns1" "ns2" # Delete the listener from the client ns, if one was created - kill_wait $listener_pid + mptcp_lib_kill_wait $listener_pid sport=$(mptcp_lib_evts_get_info sport "$server_evts" $SUB_ESTABLISHED) @@ -740,7 +731,7 @@ test_subflows() "10.0.2.1" "$app4_port" "23" "$server_addr_id" "ns2" "ns1" # Delete the listener from the server ns, if one was created - kill_wait $listener_pid + mptcp_lib_kill_wait $listener_pid sport=$(mptcp_lib_evts_get_info sport "$client_evts" $SUB_ESTABLISHED) @@ -779,7 +770,7 @@ test_subflows() "$server_addr_id" "ns2" "ns1" # Delete the listener from the server ns, if one was created - kill_wait $listener_pid + mptcp_lib_kill_wait $listener_pid sport=$(mptcp_lib_evts_get_info sport "$client_evts" $SUB_ESTABLISHED) @@ -816,7 +807,7 @@ test_subflows() "10.0.2.2" "10.0.2.1" "$new4_port" "23" "$server_addr_id" "ns2" "ns1" # Delete the listener from the server ns, if one was created - kill_wait $listener_pid + mptcp_lib_kill_wait $listener_pid sport=$(mptcp_lib_evts_get_info sport "$client_evts" $SUB_ESTABLISHED) @@ -862,7 +853,7 @@ test_subflows_v4_v6_mix() "$server_addr_id" "ns2" "ns1" # Delete the listener from the server ns, if one was created - kill_wait $listener_pid + mptcp_lib_kill_wait $listener_pid sport=$(mptcp_lib_evts_get_info sport "$client_evts" $SUB_ESTABLISHED) @@ -974,7 +965,7 @@ test_listener() sleep 0.5 # Delete the listener from the client ns, if one was created - kill_wait $listener_pid + mptcp_lib_kill_wait $listener_pid sleep 0.5 verify_listener_events $client_evts $LISTENER_CLOSED $AF_INET 10.0.2.2 $client4_port -- cgit v1.2.3 From b850f2c7dd85ecd14a333685c4ffd23f12665e94 Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Tue, 28 Nov 2023 15:18:54 -0800 Subject: selftests: mptcp: add mptcp_lib_is_v6 To avoid duplicated code in different MPTCP selftests, we can add and use helpers defined in mptcp_lib.sh. is_v6() helper is defined in mptcp_connect.sh, mptcp_join.sh and mptcp_sockopt.sh, so export it into mptcp_lib.sh and rename it as mptcp_lib_is_v6(). Use this new helper in all scripts. Reviewed-by: Matthieu Baerts Signed-off-by: Geliang Tang Signed-off-by: Mat Martineau Link: https://lore.kernel.org/r/20231128-send-net-next-2023107-v4-10-8d6b94150f6b@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/mptcp/mptcp_connect.sh | 16 +++++----------- tools/testing/selftests/net/mptcp/mptcp_join.sh | 14 ++++---------- tools/testing/selftests/net/mptcp/mptcp_lib.sh | 5 +++++ tools/testing/selftests/net/mptcp/mptcp_sockopt.sh | 8 +------- 4 files changed, 15 insertions(+), 28 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.sh b/tools/testing/selftests/net/mptcp/mptcp_connect.sh index b1fc8afd072d..4cf62b2b0480 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_connect.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_connect.sh @@ -310,12 +310,6 @@ check_mptcp_disabled() return 0 } -# $1: IP address -is_v6() -{ - [ -z "${1##*:*}" ] -} - do_ping() { local listener_ns="$1" @@ -324,7 +318,7 @@ do_ping() local ping_args="-q -c 1" local rc=0 - if is_v6 "${connect_addr}"; then + if mptcp_lib_is_v6 "${connect_addr}"; then $ipv6 || return 0 ping_args="${ping_args} -6" fi @@ -635,12 +629,12 @@ run_tests_lo() fi # skip if we don't want v6 - if ! $ipv6 && is_v6 "${connect_addr}"; then + if ! $ipv6 && mptcp_lib_is_v6 "${connect_addr}"; then return 0 fi local local_addr - if is_v6 "${connect_addr}"; then + if mptcp_lib_is_v6 "${connect_addr}"; then local_addr="::" else local_addr="0.0.0.0" @@ -708,7 +702,7 @@ run_test_transparent() TEST_GROUP="${msg}" # skip if we don't want v6 - if ! $ipv6 && is_v6 "${connect_addr}"; then + if ! $ipv6 && mptcp_lib_is_v6 "${connect_addr}"; then return 0 fi @@ -741,7 +735,7 @@ EOF fi local local_addr - if is_v6 "${connect_addr}"; then + if mptcp_lib_is_v6 "${connect_addr}"; then local_addr="::" r6flag="-6" else diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index e7557f6a0dc1..2ec5c5006d38 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -587,12 +587,6 @@ link_failure() done } -# $1: IP address -is_v6() -{ - [ -z "${1##*:*}" ] -} - # $1: ns, $2: port wait_local_port_listen() { @@ -895,7 +889,7 @@ pm_nl_set_endpoint() local id=10 while [ $add_nr_ns1 -gt 0 ]; do local addr - if is_v6 "${connect_addr}"; then + if mptcp_lib_is_v6 "${connect_addr}"; then addr="dead:beef:$counter::1" else addr="10.0.$counter.1" @@ -947,7 +941,7 @@ pm_nl_set_endpoint() local id=20 while [ $add_nr_ns2 -gt 0 ]; do local addr - if is_v6 "${connect_addr}"; then + if mptcp_lib_is_v6 "${connect_addr}"; then addr="dead:beef:$counter::2" else addr="10.0.$counter.2" @@ -989,7 +983,7 @@ pm_nl_set_endpoint() pm_nl_flush_endpoint ${connector_ns} elif [ $rm_nr_ns2 -eq 9 ]; then local addr - if is_v6 "${connect_addr}"; then + if mptcp_lib_is_v6 "${connect_addr}"; then addr="dead:beef:1::2" else addr="10.0.1.2" @@ -3357,7 +3351,7 @@ userspace_pm_rm_sf() local cnt [ "$1" == "$ns2" ] && evts=$evts_ns2 - if is_v6 $2; then ip=6; fi + if mptcp_lib_is_v6 $2; then ip=6; fi tk=$(mptcp_lib_evts_get_info token "$evts") da=$(mptcp_lib_evts_get_info "daddr$ip" "$evts" $t) dp=$(mptcp_lib_evts_get_info dport "$evts" $t) diff --git a/tools/testing/selftests/net/mptcp/mptcp_lib.sh b/tools/testing/selftests/net/mptcp/mptcp_lib.sh index e421b658d748..447292cad33c 100644 --- a/tools/testing/selftests/net/mptcp/mptcp_lib.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_lib.sh @@ -226,3 +226,8 @@ mptcp_lib_kill_wait() { kill "${1}" > /dev/null 2>&1 wait "${1}" 2>/dev/null } + +# $1: IP address +mptcp_lib_is_v6() { + [ -z "${1##*:*}" ] +} diff --git a/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh b/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh index a817af6616ec..bfa744e350ef 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh @@ -161,12 +161,6 @@ check_transfer() return 0 } -# $1: IP address -is_v6() -{ - [ -z "${1##*:*}" ] -} - do_transfer() { local listener_ns="$1" @@ -183,7 +177,7 @@ do_transfer() local mptcp_connect="./mptcp_connect -r 20" local local_addr ip - if is_v6 "${connect_addr}"; then + if mptcp_lib_is_v6 "${connect_addr}"; then local_addr="::" ip=ipv6 else -- cgit v1.2.3 From 61c131f5d4d2b79904af2fdcb2839a9db8e7c55c Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Tue, 28 Nov 2023 15:18:55 -0800 Subject: selftests: mptcp: add mptcp_lib_get_counter To avoid duplicated code in different MPTCP selftests, we can add and use helpers defined in mptcp_lib.sh. The helper get_counter() in mptcp_join.sh and get_mib_counter() in mptcp_connect.sh have the same functionality, export get_counter() into mptcp_lib.sh and rename it as mptcp_lib_get_counter(). Use this new helper instead of get_counter() and get_mib_counter(). Use this helper in test_prio() in userspace_pm.sh too instead of open-coding. Reviewed-by: Matthieu Baerts Signed-off-by: Geliang Tang Signed-off-by: Mat Martineau Link: https://lore.kernel.org/r/20231128-send-net-next-2023107-v4-11-8d6b94150f6b@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/mptcp/mptcp_connect.sh | 41 ++++------ tools/testing/selftests/net/mptcp/mptcp_join.sh | 88 +++++++++------------- tools/testing/selftests/net/mptcp/mptcp_lib.sh | 16 ++++ tools/testing/selftests/net/mptcp/userspace_pm.sh | 14 ++-- 4 files changed, 73 insertions(+), 86 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.sh b/tools/testing/selftests/net/mptcp/mptcp_connect.sh index 4cf62b2b0480..3b971d1617d8 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_connect.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_connect.sh @@ -335,21 +335,6 @@ do_ping() return 0 } -# $1: ns, $2: MIB counter -get_mib_counter() -{ - local listener_ns="${1}" - local mib="${2}" - - # strip the header - ip netns exec "${listener_ns}" \ - nstat -z -a "${mib}" | \ - tail -n+2 | \ - while read a count c rest; do - echo $count - done -} - # $1: ns, $2: port wait_local_port_listen() { @@ -435,12 +420,12 @@ do_transfer() nstat -n fi - local stat_synrx_last_l=$(get_mib_counter "${listener_ns}" "MPTcpExtMPCapableSYNRX") - local stat_ackrx_last_l=$(get_mib_counter "${listener_ns}" "MPTcpExtMPCapableACKRX") - local stat_cookietx_last=$(get_mib_counter "${listener_ns}" "TcpExtSyncookiesSent") - local stat_cookierx_last=$(get_mib_counter "${listener_ns}" "TcpExtSyncookiesRecv") - local stat_csum_err_s=$(get_mib_counter "${listener_ns}" "MPTcpExtDataCsumErr") - local stat_csum_err_c=$(get_mib_counter "${connector_ns}" "MPTcpExtDataCsumErr") + local stat_synrx_last_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableSYNRX") + local stat_ackrx_last_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableACKRX") + local stat_cookietx_last=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtSyncookiesSent") + local stat_cookierx_last=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtSyncookiesRecv") + local stat_csum_err_s=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtDataCsumErr") + local stat_csum_err_c=$(mptcp_lib_get_counter "${connector_ns}" "MPTcpExtDataCsumErr") timeout ${timeout_test} \ ip netns exec ${listener_ns} \ @@ -503,11 +488,11 @@ do_transfer() check_transfer $cin $sout "file received by server" rets=$? - local stat_synrx_now_l=$(get_mib_counter "${listener_ns}" "MPTcpExtMPCapableSYNRX") - local stat_ackrx_now_l=$(get_mib_counter "${listener_ns}" "MPTcpExtMPCapableACKRX") - local stat_cookietx_now=$(get_mib_counter "${listener_ns}" "TcpExtSyncookiesSent") - local stat_cookierx_now=$(get_mib_counter "${listener_ns}" "TcpExtSyncookiesRecv") - local stat_ooo_now=$(get_mib_counter "${listener_ns}" "TcpExtTCPOFOQueue") + local stat_synrx_now_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableSYNRX") + local stat_ackrx_now_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableACKRX") + local stat_cookietx_now=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtSyncookiesSent") + local stat_cookierx_now=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtSyncookiesRecv") + local stat_ooo_now=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtTCPOFOQueue") expect_synrx=$((stat_synrx_last_l)) expect_ackrx=$((stat_ackrx_last_l)) @@ -536,8 +521,8 @@ do_transfer() fi if $checksum; then - local csum_err_s=$(get_mib_counter "${listener_ns}" "MPTcpExtDataCsumErr") - local csum_err_c=$(get_mib_counter "${connector_ns}" "MPTcpExtDataCsumErr") + local csum_err_s=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtDataCsumErr") + local csum_err_c=$(mptcp_lib_get_counter "${connector_ns}" "MPTcpExtDataCsumErr") local csum_err_s_nr=$((csum_err_s - stat_csum_err_s)) if [ $csum_err_s_nr -gt 0 ]; then diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index 2ec5c5006d38..a5d66d1bfeb4 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -605,25 +605,9 @@ wait_local_port_listen() done } -# $1: ns ; $2: counter -get_counter() -{ - local ns="${1}" - local counter="${2}" - local count - - count=$(ip netns exec ${ns} nstat -asz "${counter}" | awk 'NR==1 {next} {print $2}') - if [ -z "${count}" ]; then - mptcp_lib_fail_if_expected_feature "${counter} counter" - return 1 - fi - - echo "${count}" -} - rm_addr_count() { - get_counter "${1}" "MPTcpExtRmAddr" + mptcp_lib_get_counter "${1}" "MPTcpExtRmAddr" } # $1: ns, $2: old rm_addr counter in $ns @@ -643,7 +627,7 @@ wait_rm_addr() rm_sf_count() { - get_counter "${1}" "MPTcpExtRmSubflow" + mptcp_lib_get_counter "${1}" "MPTcpExtRmSubflow" } # $1: ns, $2: old rm_sf counter in $ns @@ -666,11 +650,11 @@ wait_mpj() local ns="${1}" local cnt old_cnt - old_cnt=$(get_counter ${ns} "MPTcpExtMPJoinAckRx") + old_cnt=$(mptcp_lib_get_counter ${ns} "MPTcpExtMPJoinAckRx") local i for i in $(seq 10); do - cnt=$(get_counter ${ns} "MPTcpExtMPJoinAckRx") + cnt=$(mptcp_lib_get_counter ${ns} "MPTcpExtMPJoinAckRx") [ "$cnt" = "${old_cnt}" ] || break sleep 0.1 done @@ -1272,7 +1256,7 @@ chk_csum_nr() fi print_check "sum" - count=$(get_counter ${ns1} "MPTcpExtDataCsumErr") + count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtDataCsumErr") if [ "$count" != "$csum_ns1" ]; then extra_msg="$extra_msg ns1=$count" fi @@ -1285,7 +1269,7 @@ chk_csum_nr() print_ok fi print_check "csum" - count=$(get_counter ${ns2} "MPTcpExtDataCsumErr") + count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtDataCsumErr") if [ "$count" != "$csum_ns2" ]; then extra_msg="$extra_msg ns2=$count" fi @@ -1329,7 +1313,7 @@ chk_fail_nr() fi print_check "ftx" - count=$(get_counter ${ns_tx} "MPTcpExtMPFailTx") + count=$(mptcp_lib_get_counter ${ns_tx} "MPTcpExtMPFailTx") if [ "$count" != "$fail_tx" ]; then extra_msg="$extra_msg,tx=$count" fi @@ -1343,7 +1327,7 @@ chk_fail_nr() fi print_check "failrx" - count=$(get_counter ${ns_rx} "MPTcpExtMPFailRx") + count=$(mptcp_lib_get_counter ${ns_rx} "MPTcpExtMPFailRx") if [ "$count" != "$fail_rx" ]; then extra_msg="$extra_msg,rx=$count" fi @@ -1376,7 +1360,7 @@ chk_fclose_nr() fi print_check "ctx" - count=$(get_counter ${ns_tx} "MPTcpExtMPFastcloseTx") + count=$(mptcp_lib_get_counter ${ns_tx} "MPTcpExtMPFastcloseTx") if [ -z "$count" ]; then print_skip elif [ "$count" != "$fclose_tx" ]; then @@ -1387,7 +1371,7 @@ chk_fclose_nr() fi print_check "fclzrx" - count=$(get_counter ${ns_rx} "MPTcpExtMPFastcloseRx") + count=$(mptcp_lib_get_counter ${ns_rx} "MPTcpExtMPFastcloseRx") if [ -z "$count" ]; then print_skip elif [ "$count" != "$fclose_rx" ]; then @@ -1417,7 +1401,7 @@ chk_rst_nr() fi print_check "rtx" - count=$(get_counter ${ns_tx} "MPTcpExtMPRstTx") + count=$(mptcp_lib_get_counter ${ns_tx} "MPTcpExtMPRstTx") if [ -z "$count" ]; then print_skip # accept more rst than expected except if we don't expect any @@ -1429,7 +1413,7 @@ chk_rst_nr() fi print_check "rstrx" - count=$(get_counter ${ns_rx} "MPTcpExtMPRstRx") + count=$(mptcp_lib_get_counter ${ns_rx} "MPTcpExtMPRstRx") if [ -z "$count" ]; then print_skip # accept more rst than expected except if we don't expect any @@ -1450,7 +1434,7 @@ chk_infi_nr() local count print_check "itx" - count=$(get_counter ${ns2} "MPTcpExtInfiniteMapTx") + count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtInfiniteMapTx") if [ -z "$count" ]; then print_skip elif [ "$count" != "$infi_tx" ]; then @@ -1460,7 +1444,7 @@ chk_infi_nr() fi print_check "infirx" - count=$(get_counter ${ns1} "MPTcpExtInfiniteMapRx") + count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtInfiniteMapRx") if [ -z "$count" ]; then print_skip elif [ "$count" != "$infi_rx" ]; then @@ -1489,7 +1473,7 @@ chk_join_nr() fi print_check "syn" - count=$(get_counter ${ns1} "MPTcpExtMPJoinSynRx") + count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMPJoinSynRx") if [ -z "$count" ]; then print_skip elif [ "$count" != "$syn_nr" ]; then @@ -1500,7 +1484,7 @@ chk_join_nr() print_check "synack" with_cookie=$(ip netns exec $ns2 sysctl -n net.ipv4.tcp_syncookies) - count=$(get_counter ${ns2} "MPTcpExtMPJoinSynAckRx") + count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtMPJoinSynAckRx") if [ -z "$count" ]; then print_skip elif [ "$count" != "$syn_ack_nr" ]; then @@ -1517,7 +1501,7 @@ chk_join_nr() fi print_check "ack" - count=$(get_counter ${ns1} "MPTcpExtMPJoinAckRx") + count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMPJoinAckRx") if [ -z "$count" ]; then print_skip elif [ "$count" != "$ack_nr" ]; then @@ -1550,8 +1534,8 @@ chk_stale_nr() print_check "stale" - stale_nr=$(get_counter ${ns} "MPTcpExtSubflowStale") - recover_nr=$(get_counter ${ns} "MPTcpExtSubflowRecover") + stale_nr=$(mptcp_lib_get_counter ${ns} "MPTcpExtSubflowStale") + recover_nr=$(mptcp_lib_get_counter ${ns} "MPTcpExtSubflowRecover") if [ -z "$stale_nr" ] || [ -z "$recover_nr" ]; then print_skip elif [ $stale_nr -lt $stale_min ] || @@ -1588,7 +1572,7 @@ chk_add_nr() timeout=$(ip netns exec $ns1 sysctl -n net.mptcp.add_addr_timeout) print_check "add" - count=$(get_counter ${ns2} "MPTcpExtAddAddr") + count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtAddAddr") if [ -z "$count" ]; then print_skip # if the test configured a short timeout tolerate greater then expected @@ -1600,7 +1584,7 @@ chk_add_nr() fi print_check "echo" - count=$(get_counter ${ns1} "MPTcpExtEchoAdd") + count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtEchoAdd") if [ -z "$count" ]; then print_skip elif [ "$count" != "$echo_nr" ]; then @@ -1611,7 +1595,7 @@ chk_add_nr() if [ $port_nr -gt 0 ]; then print_check "pt" - count=$(get_counter ${ns2} "MPTcpExtPortAdd") + count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtPortAdd") if [ -z "$count" ]; then print_skip elif [ "$count" != "$port_nr" ]; then @@ -1621,7 +1605,7 @@ chk_add_nr() fi print_check "syn" - count=$(get_counter ${ns1} "MPTcpExtMPJoinPortSynRx") + count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMPJoinPortSynRx") if [ -z "$count" ]; then print_skip elif [ "$count" != "$syn_nr" ]; then @@ -1632,7 +1616,7 @@ chk_add_nr() fi print_check "synack" - count=$(get_counter ${ns2} "MPTcpExtMPJoinPortSynAckRx") + count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtMPJoinPortSynAckRx") if [ -z "$count" ]; then print_skip elif [ "$count" != "$syn_ack_nr" ]; then @@ -1643,7 +1627,7 @@ chk_add_nr() fi print_check "ack" - count=$(get_counter ${ns1} "MPTcpExtMPJoinPortAckRx") + count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMPJoinPortAckRx") if [ -z "$count" ]; then print_skip elif [ "$count" != "$ack_nr" ]; then @@ -1654,7 +1638,7 @@ chk_add_nr() fi print_check "syn" - count=$(get_counter ${ns1} "MPTcpExtMismatchPortSynRx") + count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMismatchPortSynRx") if [ -z "$count" ]; then print_skip elif [ "$count" != "$mis_syn_nr" ]; then @@ -1665,7 +1649,7 @@ chk_add_nr() fi print_check "ack" - count=$(get_counter ${ns1} "MPTcpExtMismatchPortAckRx") + count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMismatchPortAckRx") if [ -z "$count" ]; then print_skip elif [ "$count" != "$mis_ack_nr" ]; then @@ -1687,7 +1671,7 @@ chk_add_tx_nr() timeout=$(ip netns exec $ns1 sysctl -n net.mptcp.add_addr_timeout) print_check "add TX" - count=$(get_counter ${ns1} "MPTcpExtAddAddrTx") + count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtAddAddrTx") if [ -z "$count" ]; then print_skip # if the test configured a short timeout tolerate greater then expected @@ -1699,7 +1683,7 @@ chk_add_tx_nr() fi print_check "echo TX" - count=$(get_counter ${ns2} "MPTcpExtEchoAddTx") + count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtEchoAddTx") if [ -z "$count" ]; then print_skip elif [ "$count" != "$echo_tx_nr" ]; then @@ -1737,7 +1721,7 @@ chk_rm_nr() fi print_check "rm" - count=$(get_counter ${addr_ns} "MPTcpExtRmAddr") + count=$(mptcp_lib_get_counter ${addr_ns} "MPTcpExtRmAddr") if [ -z "$count" ]; then print_skip elif [ "$count" != "$rm_addr_nr" ]; then @@ -1747,13 +1731,13 @@ chk_rm_nr() fi print_check "rmsf" - count=$(get_counter ${subflow_ns} "MPTcpExtRmSubflow") + count=$(mptcp_lib_get_counter ${subflow_ns} "MPTcpExtRmSubflow") if [ -z "$count" ]; then print_skip elif [ -n "$simult" ]; then local cnt suffix - cnt=$(get_counter ${addr_ns} "MPTcpExtRmSubflow") + cnt=$(mptcp_lib_get_counter ${addr_ns} "MPTcpExtRmSubflow") # in case of simult flush, the subflow removal count on each side is # unreliable @@ -1782,7 +1766,7 @@ chk_rm_tx_nr() local rm_addr_tx_nr=$1 print_check "rm TX" - count=$(get_counter ${ns2} "MPTcpExtRmAddrTx") + count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtRmAddrTx") if [ -z "$count" ]; then print_skip elif [ "$count" != "$rm_addr_tx_nr" ]; then @@ -1799,7 +1783,7 @@ chk_prio_nr() local count print_check "ptx" - count=$(get_counter ${ns1} "MPTcpExtMPPrioTx") + count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMPPrioTx") if [ -z "$count" ]; then print_skip elif [ "$count" != "$mp_prio_nr_tx" ]; then @@ -1809,7 +1793,7 @@ chk_prio_nr() fi print_check "prx" - count=$(get_counter ${ns1} "MPTcpExtMPPrioRx") + count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMPPrioRx") if [ -z "$count" ]; then print_skip elif [ "$count" != "$mp_prio_nr_rx" ]; then @@ -1943,7 +1927,7 @@ wait_attempt_fail() while [ $time -lt $timeout_ms ]; do local cnt - cnt=$(get_counter ${ns} "TcpAttemptFails") + cnt=$(mptcp_lib_get_counter ${ns} "TcpAttemptFails") [ "$cnt" = 1 ] && return 1 time=$((time + 100)) diff --git a/tools/testing/selftests/net/mptcp/mptcp_lib.sh b/tools/testing/selftests/net/mptcp/mptcp_lib.sh index 447292cad33c..718c79dda2b3 100644 --- a/tools/testing/selftests/net/mptcp/mptcp_lib.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_lib.sh @@ -231,3 +231,19 @@ mptcp_lib_kill_wait() { mptcp_lib_is_v6() { [ -z "${1##*:*}" ] } + +# $1: ns, $2: MIB counter +mptcp_lib_get_counter() { + local ns="${1}" + local counter="${2}" + local count + + count=$(ip netns exec "${ns}" nstat -asz "${counter}" | + awk 'NR==1 {next} {print $2}') + if [ -z "${count}" ]; then + mptcp_lib_fail_if_expected_feature "${counter} counter" + return 1 + fi + + echo "${count}" +} diff --git a/tools/testing/selftests/net/mptcp/userspace_pm.sh b/tools/testing/selftests/net/mptcp/userspace_pm.sh index f4e352494f05..f136956f6c95 100755 --- a/tools/testing/selftests/net/mptcp/userspace_pm.sh +++ b/tools/testing/selftests/net/mptcp/userspace_pm.sh @@ -884,9 +884,10 @@ test_prio() # Check TX print_test "MP_PRIO TX" - count=$(ip netns exec "$ns2" nstat -as | grep MPTcpExtMPPrioTx | awk '{print $2}') - [ -z "$count" ] && count=0 - if [ $count != 1 ]; then + count=$(mptcp_lib_get_counter "$ns2" "MPTcpExtMPPrioTx") + if [ -z "$count" ]; then + test_skip + elif [ $count != 1 ]; then test_fail "Count != 1: ${count}" else test_pass @@ -894,9 +895,10 @@ test_prio() # Check RX print_test "MP_PRIO RX" - count=$(ip netns exec "$ns1" nstat -as | grep MPTcpExtMPPrioRx | awk '{print $2}') - [ -z "$count" ] && count=0 - if [ $count != 1 ]; then + count=$(mptcp_lib_get_counter "$ns1" "MPTcpExtMPPrioRx") + if [ -z "$count" ]; then + test_skip + elif [ $count != 1 ]; then test_fail "Count != 1: ${count}" else test_pass -- cgit v1.2.3 From 119931cc88ce7073b9a289c13abaf2348d55fdfa Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Tue, 28 Nov 2023 15:18:56 -0800 Subject: selftests: mptcp: add missing oflag=append In mptcp_connect.sh we are missing something like "oflag=append" because this will write "${rem}" bytes at the beginning of the file where there is already some random bytes. It should write that at the end. This patch adds this missing 'oflag=append' flag for 'dd' command in make_file(). Suggested-by: Matthieu Baerts Reviewed-by: Matthieu Baerts Signed-off-by: Geliang Tang Signed-off-by: Mat Martineau Link: https://lore.kernel.org/r/20231128-send-net-next-2023107-v4-12-8d6b94150f6b@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/mptcp/mptcp_connect.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.sh b/tools/testing/selftests/net/mptcp/mptcp_connect.sh index 3b971d1617d8..c4f08976c418 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_connect.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_connect.sh @@ -593,7 +593,7 @@ make_file() rem=$((SIZE - (ksize * 1024))) dd if=/dev/urandom of="$name" bs=1024 count=$ksize 2> /dev/null - dd if=/dev/urandom conv=notrunc of="$name" bs=1 count=$rem 2> /dev/null + dd if=/dev/urandom conv=notrunc of="$name" oflag=append bs=1 count=$rem 2> /dev/null echo -e "\nMPTCP_TEST_FILE_END_MARKER" >> "$name" echo "Created $name (size $(du -b "$name")) containing data sent by $who" -- cgit v1.2.3 From 3a96dea9f88763cfa29e15f681f95fca8952ec77 Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Tue, 28 Nov 2023 15:18:57 -0800 Subject: selftests: mptcp: add mptcp_lib_make_file To avoid duplicated code in different MPTCP selftests, we can add and use helpers defined in mptcp_lib.sh. make_file() helper in mptcp_sockopt.sh and userspace_pm.sh are the same. Export it into mptcp_lib.sh and rename it as mptcp_lib_kill_wait(). Use it in both mptcp_connect.sh and mptcp_join.sh. Reviewed-by: Matthieu Baerts Signed-off-by: Geliang Tang Signed-off-by: Mat Martineau Link: https://lore.kernel.org/r/20231128-send-net-next-2023107-v4-13-8d6b94150f6b@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/mptcp/mptcp_connect.sh | 3 +-- tools/testing/selftests/net/mptcp/mptcp_join.sh | 3 +-- tools/testing/selftests/net/mptcp/mptcp_lib.sh | 9 +++++++++ tools/testing/selftests/net/mptcp/mptcp_sockopt.sh | 3 +-- tools/testing/selftests/net/mptcp/userspace_pm.sh | 12 +----------- 5 files changed, 13 insertions(+), 17 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.sh b/tools/testing/selftests/net/mptcp/mptcp_connect.sh index c4f08976c418..d20e278480fb 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_connect.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_connect.sh @@ -592,9 +592,8 @@ make_file() ksize=$((SIZE / 1024)) rem=$((SIZE - (ksize * 1024))) - dd if=/dev/urandom of="$name" bs=1024 count=$ksize 2> /dev/null + mptcp_lib_make_file $name 1024 $ksize dd if=/dev/urandom conv=notrunc of="$name" oflag=append bs=1 count=$rem 2> /dev/null - echo -e "\nMPTCP_TEST_FILE_END_MARKER" >> "$name" echo "Created $name (size $(du -b "$name")) containing data sent by $who" } diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index a5d66d1bfeb4..37f434cbfa44 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -1171,8 +1171,7 @@ make_file() local who=$2 local size=$3 - dd if=/dev/urandom of="$name" bs=1024 count=$size 2> /dev/null - echo -e "\nMPTCP_TEST_FILE_END_MARKER" >> "$name" + mptcp_lib_make_file $name 1024 $size print_info "Test file (size $size KB) for $who" } diff --git a/tools/testing/selftests/net/mptcp/mptcp_lib.sh b/tools/testing/selftests/net/mptcp/mptcp_lib.sh index 718c79dda2b3..61be4f29a69a 100644 --- a/tools/testing/selftests/net/mptcp/mptcp_lib.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_lib.sh @@ -247,3 +247,12 @@ mptcp_lib_get_counter() { echo "${count}" } + +mptcp_lib_make_file() { + local name="${1}" + local bs="${2}" + local size="${3}" + + dd if=/dev/urandom of="${name}" bs="${bs}" count="${size}" 2> /dev/null + echo -e "\nMPTCP_TEST_FILE_END_MARKER" >> "${name}" +} diff --git a/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh b/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh index bfa744e350ef..96dc46eda133 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh @@ -251,8 +251,7 @@ make_file() local who=$2 local size=$3 - dd if=/dev/urandom of="$name" bs=1024 count=$size 2> /dev/null - echo -e "\nMPTCP_TEST_FILE_END_MARKER" >> "$name" + mptcp_lib_make_file $name 1024 $size echo "Created $name (size $size KB) containing data sent by $who" } diff --git a/tools/testing/selftests/net/mptcp/userspace_pm.sh b/tools/testing/selftests/net/mptcp/userspace_pm.sh index f136956f6c95..6167837f48e1 100755 --- a/tools/testing/selftests/net/mptcp/userspace_pm.sh +++ b/tools/testing/selftests/net/mptcp/userspace_pm.sh @@ -164,22 +164,12 @@ print_title "Init" print_test "Created network namespaces ns1, ns2" test_pass -make_file() -{ - # Store a chunk of data in a file to transmit over an MPTCP connection - local name=$1 - local ksize=1 - - dd if=/dev/urandom of="$name" bs=2 count=$ksize 2> /dev/null - echo -e "\nMPTCP_TEST_FILE_END_MARKER" >> "$name" -} - make_connection() { if [ -z "$file" ]; then file=$(mktemp) fi - make_file "$file" "client" + mptcp_lib_make_file "$file" 2 1 local is_v6=$1 local app_port=$app4_port -- cgit v1.2.3 From 9d9095bbc24df4432b38fb33a3cf59ea1e9213d0 Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Tue, 28 Nov 2023 15:18:58 -0800 Subject: selftests: mptcp: add mptcp_lib_check_transfer To avoid duplicated code in different MPTCP selftests, we can add and use helpers defined in mptcp_lib.sh. check_transfer() and print_file_err() helpers are defined both in mptcp_connect.sh and mptcp_sockopt.sh, export them into mptcp_lib.sh and rename them with mptcp_lib_ prefix. And use them in all scripts. Note: In mptcp_sockopt.sh it is OK to drop 'ret=1' in check_transfer() because it will be set in run_tests() anyway. Reviewed-by: Matthieu Baerts Signed-off-by: Geliang Tang Signed-off-by: Mat Martineau Link: https://lore.kernel.org/r/20231128-send-net-next-2023107-v4-14-8d6b94150f6b@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/mptcp/mptcp_connect.sh | 29 ++-------------------- tools/testing/selftests/net/mptcp/mptcp_join.sh | 11 ++------ tools/testing/selftests/net/mptcp/mptcp_lib.sh | 24 ++++++++++++++++++ tools/testing/selftests/net/mptcp/mptcp_sockopt.sh | 28 +-------------------- 4 files changed, 29 insertions(+), 63 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.sh b/tools/testing/selftests/net/mptcp/mptcp_connect.sh index d20e278480fb..537f180aa51e 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_connect.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_connect.sh @@ -254,31 +254,6 @@ else set_ethtool_flags "$ns4" ns4eth3 "$ethtool_args" fi -print_file_err() -{ - ls -l "$1" 1>&2 - echo "Trailing bytes are: " - tail -c 27 "$1" -} - -check_transfer() -{ - local in=$1 - local out=$2 - local what=$3 - - cmp "$in" "$out" > /dev/null 2>&1 - if [ $? -ne 0 ] ;then - echo "[ FAIL ] $what does not match (in, out):" - print_file_err "$in" - print_file_err "$out" - - return 1 - fi - - return 0 -} - check_mptcp_disabled() { local disabled_ns="ns_disabled-$rndh" @@ -483,9 +458,9 @@ do_transfer() return 1 fi - check_transfer $sin $cout "file received by client" + mptcp_lib_check_transfer $sin $cout "file received by client" retc=$? - check_transfer $cin $sout "file received by server" + mptcp_lib_check_transfer $cin $sout "file received by server" rets=$? local stat_synrx_now_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableSYNRX") diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index 37f434cbfa44..420e4b3f9ad0 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -511,13 +511,6 @@ get_failed_tests_ids() done | sort -n } -print_file_err() -{ - ls -l "$1" 1>&2 - echo -n "Trailing bytes are: " - tail -c 27 "$1" -} - check_transfer() { local in=$1 @@ -548,8 +541,8 @@ check_transfer() local sum=$((0${a} + 0${b})) if [ $check_invert -eq 0 ] || [ $sum -ne $((0xff)) ]; then fail_test "$what does not match (in, out):" - print_file_err "$in" - print_file_err "$out" + mptcp_lib_print_file_err "$in" + mptcp_lib_print_file_err "$out" return 1 else diff --git a/tools/testing/selftests/net/mptcp/mptcp_lib.sh b/tools/testing/selftests/net/mptcp/mptcp_lib.sh index 61be4f29a69a..86f4003ab6f6 100644 --- a/tools/testing/selftests/net/mptcp/mptcp_lib.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_lib.sh @@ -256,3 +256,27 @@ mptcp_lib_make_file() { dd if=/dev/urandom of="${name}" bs="${bs}" count="${size}" 2> /dev/null echo -e "\nMPTCP_TEST_FILE_END_MARKER" >> "${name}" } + +# $1: file +mptcp_lib_print_file_err() { + ls -l "${1}" 1>&2 + echo "Trailing bytes are: " + tail -c 27 "${1}" +} + +# $1: input file ; $2: output file ; $3: what kind of file +mptcp_lib_check_transfer() { + local in="${1}" + local out="${2}" + local what="${3}" + + if ! cmp "$in" "$out" > /dev/null 2>&1; then + echo "[ FAIL ] $what does not match (in, out):" + mptcp_lib_print_file_err "$in" + mptcp_lib_print_file_err "$out" + + return 1 + fi + + return 0 +} diff --git a/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh b/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh index 96dc46eda133..c643872ddf47 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh @@ -135,32 +135,6 @@ check_mark() return 0 } -print_file_err() -{ - ls -l "$1" 1>&2 - echo "Trailing bytes are: " - tail -c 27 "$1" -} - -check_transfer() -{ - local in=$1 - local out=$2 - local what=$3 - - cmp "$in" "$out" > /dev/null 2>&1 - if [ $? -ne 0 ] ;then - echo "[ FAIL ] $what does not match (in, out):" - print_file_err "$in" - print_file_err "$out" - ret=1 - - return 1 - fi - - return 0 -} - do_transfer() { local listener_ns="$1" @@ -232,7 +206,7 @@ do_transfer() check_mark $connector_ns 4 || retc=1 fi - check_transfer $cin $sout "file received by server" + mptcp_lib_check_transfer $cin $sout "file received by server" rets=$? mptcp_lib_result_code "${retc}" "mark ${ip}" -- cgit v1.2.3 From 9369777c29395730cec967e7d0f48aed872b7110 Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Tue, 28 Nov 2023 15:18:59 -0800 Subject: selftests: mptcp: add mptcp_lib_wait_local_port_listen To avoid duplicated code in different MPTCP selftests, we can add and use helpers defined in mptcp_lib.sh. wait_local_port_listen() helper is defined in diag.sh, mptcp_connect.sh, mptcp_join.sh and simult_flows.sh, export it into mptcp_lib.sh and rename it with mptcp_lib_ prefix. Use this new helper in all these scripts. Note: We only have IPv4 connections in this helper, not looking at IPv6 (tcp6) but that's OK because we only have IPv4 connections here in diag.sh. Reviewed-by: Matthieu Baerts Signed-off-by: Geliang Tang Signed-off-by: Mat Martineau Link: https://lore.kernel.org/r/20231128-send-net-next-2023107-v4-15-8d6b94150f6b@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/mptcp/diag.sh | 23 +++------------------- tools/testing/selftests/net/mptcp/mptcp_connect.sh | 19 +----------------- tools/testing/selftests/net/mptcp/mptcp_join.sh | 20 +------------------ tools/testing/selftests/net/mptcp/mptcp_lib.sh | 18 +++++++++++++++++ tools/testing/selftests/net/mptcp/simult_flows.sh | 19 +----------------- 5 files changed, 24 insertions(+), 75 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/net/mptcp/diag.sh b/tools/testing/selftests/net/mptcp/diag.sh index 85a8ee9395b3..95b498efacd1 100755 --- a/tools/testing/selftests/net/mptcp/diag.sh +++ b/tools/testing/selftests/net/mptcp/diag.sh @@ -182,23 +182,6 @@ chk_msk_inuse() __chk_nr get_msk_inuse $expected "$msg" 0 } -# $1: ns, $2: port -wait_local_port_listen() -{ - local listener_ns="${1}" - local port="${2}" - - local port_hex i - - port_hex="$(printf "%04X" "${port}")" - for i in $(seq 10); do - ip netns exec "${listener_ns}" cat /proc/net/tcp | \ - awk "BEGIN {rc=1} {if (\$2 ~ /:${port_hex}\$/ && \$4 ~ /0A/) {rc=0; exit}} END {exit rc}" && - break - sleep 0.1 - done -} - wait_connected() { local listener_ns="${1}" @@ -222,7 +205,7 @@ echo "a" | \ ip netns exec $ns \ ./mptcp_connect -p 10000 -l -t ${timeout_poll} -w 20 \ 0.0.0.0 >/dev/null & -wait_local_port_listen $ns 10000 +mptcp_lib_wait_local_port_listen $ns 10000 chk_msk_nr 0 "no msk on netns creation" chk_msk_listen 10000 @@ -245,7 +228,7 @@ echo "a" | \ ip netns exec $ns \ ./mptcp_connect -p 10001 -l -s TCP -t ${timeout_poll} -w 20 \ 0.0.0.0 >/dev/null & -wait_local_port_listen $ns 10001 +mptcp_lib_wait_local_port_listen $ns 10001 echo "b" | \ timeout ${timeout_test} \ ip netns exec $ns \ @@ -266,7 +249,7 @@ for I in `seq 1 $NR_CLIENTS`; do ./mptcp_connect -p $((I+10001)) -l -w 20 \ -t ${timeout_poll} 0.0.0.0 >/dev/null & done -wait_local_port_listen $ns $((NR_CLIENTS + 10001)) +mptcp_lib_wait_local_port_listen $ns $((NR_CLIENTS + 10001)) for I in `seq 1 $NR_CLIENTS`; do echo "b" | \ diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.sh b/tools/testing/selftests/net/mptcp/mptcp_connect.sh index 537f180aa51e..7898d62fce0b 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_connect.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_connect.sh @@ -310,23 +310,6 @@ do_ping() return 0 } -# $1: ns, $2: port -wait_local_port_listen() -{ - local listener_ns="${1}" - local port="${2}" - - local port_hex i - - port_hex="$(printf "%04X" "${port}")" - for i in $(seq 10); do - ip netns exec "${listener_ns}" cat /proc/net/tcp* | \ - awk "BEGIN {rc=1} {if (\$2 ~ /:${port_hex}\$/ && \$4 ~ /0A/) {rc=0; exit}} END {exit rc}" && - break - sleep 0.1 - done -} - do_transfer() { local listener_ns="$1" @@ -408,7 +391,7 @@ do_transfer() $extra_args $local_addr < "$sin" > "$sout" & local spid=$! - wait_local_port_listen "${listener_ns}" "${port}" + mptcp_lib_wait_local_port_listen "${listener_ns}" "${port}" local start start=$(date +%s%3N) diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index 420e4b3f9ad0..8362ea454af3 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -580,24 +580,6 @@ link_failure() done } -# $1: ns, $2: port -wait_local_port_listen() -{ - local listener_ns="${1}" - local port="${2}" - - local port_hex - port_hex="$(printf "%04X" "${port}")" - - local i - for i in $(seq 10); do - ip netns exec "${listener_ns}" cat /proc/net/tcp* | \ - awk "BEGIN {rc=1} {if (\$2 ~ /:${port_hex}\$/ && \$4 ~ /0A/) {rc=0; exit}} END {exit rc}" && - break - sleep 0.1 - done -} - rm_addr_count() { mptcp_lib_get_counter "${1}" "MPTcpExtRmAddr" @@ -1082,7 +1064,7 @@ do_transfer() fi local spid=$! - wait_local_port_listen "${listener_ns}" "${port}" + mptcp_lib_wait_local_port_listen "${listener_ns}" "${port}" extra_cl_args="$extra_args $extra_cl_args" if [ "$test_linkfail" -eq 0 ];then diff --git a/tools/testing/selftests/net/mptcp/mptcp_lib.sh b/tools/testing/selftests/net/mptcp/mptcp_lib.sh index 86f4003ab6f6..022262a2cfe0 100644 --- a/tools/testing/selftests/net/mptcp/mptcp_lib.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_lib.sh @@ -280,3 +280,21 @@ mptcp_lib_check_transfer() { return 0 } + +# $1: ns, $2: port +mptcp_lib_wait_local_port_listen() { + local listener_ns="${1}" + local port="${2}" + + local port_hex + port_hex="$(printf "%04X" "${port}")" + + local _ + for _ in $(seq 10); do + ip netns exec "${listener_ns}" cat /proc/net/tcp* | \ + awk "BEGIN {rc=1} {if (\$2 ~ /:${port_hex}\$/ && \$4 ~ /0A/) \ + {rc=0; exit}} END {exit rc}" && + break + sleep 0.1 + done +} diff --git a/tools/testing/selftests/net/mptcp/simult_flows.sh b/tools/testing/selftests/net/mptcp/simult_flows.sh index ce9203b817f8..ae8ad5d6fb9d 100755 --- a/tools/testing/selftests/net/mptcp/simult_flows.sh +++ b/tools/testing/selftests/net/mptcp/simult_flows.sh @@ -123,23 +123,6 @@ setup() grep -q ' kmemleak_init$\| lockdep_init$\| kasan_init$\| prove_locking$' /proc/kallsyms && slack=$((slack+550)) } -# $1: ns, $2: port -wait_local_port_listen() -{ - local listener_ns="${1}" - local port="${2}" - - local port_hex i - - port_hex="$(printf "%04X" "${port}")" - for i in $(seq 10); do - ip netns exec "${listener_ns}" cat /proc/net/tcp* | \ - awk "BEGIN {rc=1} {if (\$2 ~ /:${port_hex}\$/ && \$4 ~ /0A/) {rc=0; exit}} END {exit rc}" && - break - sleep 0.1 - done -} - do_transfer() { local cin=$1 @@ -179,7 +162,7 @@ do_transfer() 0.0.0.0 < "$sin" > "$sout" & local spid=$! - wait_local_port_listen "${ns3}" "${port}" + mptcp_lib_wait_local_port_listen "${ns3}" "${port}" timeout ${timeout_test} \ ip netns exec ${ns1} \ -- cgit v1.2.3 From e1c0b9ef26e5e46fd5c2df9e7f9686e786723f53 Mon Sep 17 00:00:00 2001 From: angquan yu Date: Tue, 28 Nov 2023 21:57:26 -0600 Subject: selftests:breakpoints: Fix Format String Warning in breakpoint_test This commit resolves a compiler warning regardingthe use of non-literal format strings in breakpoint_test.c. The functions `ksft_test_result_pass` and `ksft_test_result_fail` were previously called with a variable `msg` directly, which could potentially lead to format string vulnerabilities. Changes made: - Modified the calls to `ksft_test_result_pass` and `ksft_test_result_fail` by adding a "%s" format specifier. This explicitly declares `msg` as a string argument, adhering to safer coding practices and resolving the compiler warning. This change does not affect the functional behavior of the code but ensures better code safety and compliance with recommended C programming standards. The previous warning is "breakpoint_test.c:287:17: warning: format not a string literal and no format arguments [-Wformat-security] 287 | ksft_test_result_pass(msg); | ^~~~~~~~~~~~~~~~~~~~~ breakpoint_test.c:289:17: warning: format not a string literal and no format arguments [-Wformat-security] 289 | ksft_test_result_fail(msg); | " Signed-off-by: angquan yu Signed-off-by: Shuah Khan --- tools/testing/selftests/breakpoints/breakpoint_test.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/breakpoints/breakpoint_test.c b/tools/testing/selftests/breakpoints/breakpoint_test.c index 3266cc9293fe..d46962a24724 100644 --- a/tools/testing/selftests/breakpoints/breakpoint_test.c +++ b/tools/testing/selftests/breakpoints/breakpoint_test.c @@ -284,9 +284,9 @@ static void check_success(const char *msg) nr_tests++; if (ret) - ksft_test_result_pass(msg); + ksft_test_result_pass("%s", msg); else - ksft_test_result_fail(msg); + ksft_test_result_fail("%s", msg); } static void launch_instruction_breakpoints(char *buf, int local, int global) -- cgit v1.2.3 From 5e551899788b0731761052f21febdfef668e511f Mon Sep 17 00:00:00 2001 From: angquan yu Date: Tue, 28 Nov 2023 15:48:54 -0600 Subject: selftests/breakpoints: Fix format specifier in ksft_print_msg in step_after_suspend_test.c In the function 'tools/testing/selftests/breakpoints/run_test' within step_after_suspend_test.c, the ksft_print_msg function call incorrectly used '$s' as a format specifier. This commit corrects this typo to use the proper '%s' format specifier, ensuring the error message from waitpid() is correctly displayed. The issue manifested as a compilation warning (too many arguments for format [-Wformat-extra-args]), potentially obscuring actual runtime errors and complicating debugging processes. This fix enhances the clarity of error messages during test failures and ensures compliance with standard C format string conventions. Signed-off-by: angquan yu Signed-off-by: Shuah Khan --- tools/testing/selftests/breakpoints/step_after_suspend_test.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/breakpoints/step_after_suspend_test.c b/tools/testing/selftests/breakpoints/step_after_suspend_test.c index 2cf6f10ab7c4..b8703c499d28 100644 --- a/tools/testing/selftests/breakpoints/step_after_suspend_test.c +++ b/tools/testing/selftests/breakpoints/step_after_suspend_test.c @@ -89,7 +89,7 @@ int run_test(int cpu) wpid = waitpid(pid, &status, __WALL); if (wpid != pid) { - ksft_print_msg("waitpid() failed: $s\n", strerror(errno)); + ksft_print_msg("waitpid() failed: %s\n", strerror(errno)); return KSFT_FAIL; } if (WIFEXITED(status)) { -- cgit v1.2.3 From 9686e7f59b142095ac6ad0763312ec68fc34c51a Mon Sep 17 00:00:00 2001 From: angquan yu Date: Tue, 28 Nov 2023 21:36:15 -0600 Subject: selftests:x86: Fix Format String Warnings in lam.c This commit addresses compiler warnings in lam.c related to the usage of non-literal format strings without format arguments in the 'run_test' function. Warnings fixed: - Resolved warnings indicating that 'ksft_test_result_skip' and 'ksft_test_result' were called with 't->msg' as a format string without accompanying format arguments. Changes made: - Modified the calls to 'ksft_test_result_skip' and 'ksft_test_result' to explicitly include a format specifier ("%s") for 't->msg'. - This ensures that the string is safely treated as a format argument, adhering to safer coding practices and resolving the compiler warnings. Signed-off-by: angquan yu Signed-off-by: Shuah Khan --- tools/testing/selftests/x86/lam.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/x86/lam.c b/tools/testing/selftests/x86/lam.c index 8f9b06d9ce03..215b8150b7cc 100644 --- a/tools/testing/selftests/x86/lam.c +++ b/tools/testing/selftests/x86/lam.c @@ -817,7 +817,7 @@ static void run_test(struct testcases *test, int count) /* return 3 is not support LA57, the case should be skipped */ if (ret == 3) { - ksft_test_result_skip(t->msg); + ksft_test_result_skip("%s", t->msg); continue; } @@ -826,7 +826,7 @@ static void run_test(struct testcases *test, int count) else ret = !(t->expected); - ksft_test_result(ret, t->msg); + ksft_test_result(ret, "%s", t->msg); } } -- cgit v1.2.3 From 60e76e7ac088c5146d647cc5cc3f345b54489915 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Thu, 23 Nov 2023 10:45:47 +0000 Subject: kselftest/vDSO: Make test name reporting for vdso_abi_test tooling friendly The test results from vdso_abi_test are all formatted using a series of macros: #define VDSO_TEST_PASS_MSG() "\n%s(): PASS\n", __func__ #define VDSO_TEST_FAIL_MSG(x) "\n%s(): %s FAIL\n", __func__, x #define VDSO_TEST_SKIP_MSG(x) "\n%s(): SKIP: Could not find %s\n", __func__, x which don't play nicely with automated KTAP parsers since the actual KTAP lines are in the form ok 1 with no test name and we get an additional log line such as vdso_test_gettimeofday(): PASS with no preceeding # as KTAP requires. The lack of a test name means that many automation systems will have a hard time distinguishing between the different tests or correlating results between runs, the lack of # is less severe but could potentially cause confusion. Fix these issues by rewriting all the result reporting to include both the vDSO function name being tested and (where there is one) the name of the clock being tested in the main KTAP line. Since we have tests both with and without a specific clock we abandon the helper macros and just put the format strings used directly in the ksft_ API calls. When we fail to look up the relevant vDSO symbol we add a separate print statement explaining why the skip is being done. This gives output such as: ok 1 __vdso_gettimeofday # clock_id: CLOCK_REALTIME # The time is 1700673118.58091596 ok 2 __vdso_clock_gettime CLOCK_REALTIME which is much easier for test automation to work with. Signed-off-by: Mark Brown Signed-off-by: Shuah Khan --- tools/testing/selftests/vDSO/vdso_test_abi.c | 66 +++++++++++++++------------- 1 file changed, 36 insertions(+), 30 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/vDSO/vdso_test_abi.c b/tools/testing/selftests/vDSO/vdso_test_abi.c index 883ca85424bc..b304abae6e8f 100644 --- a/tools/testing/selftests/vDSO/vdso_test_abi.c +++ b/tools/testing/selftests/vDSO/vdso_test_abi.c @@ -33,9 +33,20 @@ typedef long (*vdso_clock_gettime_t)(clockid_t clk_id, struct timespec *ts); typedef long (*vdso_clock_getres_t)(clockid_t clk_id, struct timespec *ts); typedef time_t (*vdso_time_t)(time_t *t); -#define VDSO_TEST_PASS_MSG() "\n%s(): PASS\n", __func__ -#define VDSO_TEST_FAIL_MSG(x) "\n%s(): %s FAIL\n", __func__, x -#define VDSO_TEST_SKIP_MSG(x) "\n%s(): SKIP: Could not find %s\n", __func__, x +const char *vdso_clock_name[12] = { + "CLOCK_REALTIME", + "CLOCK_MONOTONIC", + "CLOCK_PROCESS_CPUTIME_ID", + "CLOCK_THREAD_CPUTIME_ID", + "CLOCK_MONOTONIC_RAW", + "CLOCK_REALTIME_COARSE", + "CLOCK_MONOTONIC_COARSE", + "CLOCK_BOOTTIME", + "CLOCK_REALTIME_ALARM", + "CLOCK_BOOTTIME_ALARM", + "CLOCK_SGI_CYCLE", + "CLOCK_TAI", +}; static void vdso_test_gettimeofday(void) { @@ -44,7 +55,8 @@ static void vdso_test_gettimeofday(void) (vdso_gettimeofday_t)vdso_sym(version, name[0]); if (!vdso_gettimeofday) { - ksft_test_result_skip(VDSO_TEST_SKIP_MSG(name[0])); + ksft_print_msg("Couldn't find %s\n", name[0]); + ksft_test_result_skip("%s\n", name[0]); return; } @@ -54,9 +66,9 @@ static void vdso_test_gettimeofday(void) if (ret == 0) { ksft_print_msg("The time is %lld.%06lld\n", (long long)tv.tv_sec, (long long)tv.tv_usec); - ksft_test_result_pass(VDSO_TEST_PASS_MSG()); + ksft_test_result_pass("%s\n", name[0]); } else { - ksft_test_result_fail(VDSO_TEST_FAIL_MSG(name[0])); + ksft_test_result_fail("%s\n", name[0]); } } @@ -67,7 +79,9 @@ static void vdso_test_clock_gettime(clockid_t clk_id) (vdso_clock_gettime_t)vdso_sym(version, name[1]); if (!vdso_clock_gettime) { - ksft_test_result_skip(VDSO_TEST_SKIP_MSG(name[1])); + ksft_print_msg("Couldn't find %s\n", name[1]); + ksft_test_result_skip("%s %s\n", name[1], + vdso_clock_name[clk_id]); return; } @@ -77,9 +91,11 @@ static void vdso_test_clock_gettime(clockid_t clk_id) if (ret == 0) { ksft_print_msg("The time is %lld.%06lld\n", (long long)ts.tv_sec, (long long)ts.tv_nsec); - ksft_test_result_pass(VDSO_TEST_PASS_MSG()); + ksft_test_result_pass("%s %s\n", name[1], + vdso_clock_name[clk_id]); } else { - ksft_test_result_fail(VDSO_TEST_FAIL_MSG(name[1])); + ksft_test_result_fail("%s %s\n", name[1], + vdso_clock_name[clk_id]); } } @@ -90,7 +106,8 @@ static void vdso_test_time(void) (vdso_time_t)vdso_sym(version, name[2]); if (!vdso_time) { - ksft_test_result_skip(VDSO_TEST_SKIP_MSG(name[2])); + ksft_print_msg("Couldn't find %s\n", name[2]); + ksft_test_result_skip("%s\n", name[2]); return; } @@ -99,9 +116,9 @@ static void vdso_test_time(void) if (ret > 0) { ksft_print_msg("The time in hours since January 1, 1970 is %lld\n", (long long)(ret / 3600)); - ksft_test_result_pass(VDSO_TEST_PASS_MSG()); + ksft_test_result_pass("%s\n", name[2]); } else { - ksft_test_result_fail(VDSO_TEST_FAIL_MSG(name[2])); + ksft_test_result_fail("%s\n", name[2]); } } @@ -114,7 +131,9 @@ static void vdso_test_clock_getres(clockid_t clk_id) (vdso_clock_getres_t)vdso_sym(version, name[3]); if (!vdso_clock_getres) { - ksft_test_result_skip(VDSO_TEST_SKIP_MSG(name[3])); + ksft_print_msg("Couldn't find %s\n", name[3]); + ksft_test_result_skip("%s %s\n", name[3], + vdso_clock_name[clk_id]); return; } @@ -137,27 +156,14 @@ static void vdso_test_clock_getres(clockid_t clk_id) clock_getres_fail++; if (clock_getres_fail > 0) { - ksft_test_result_fail(VDSO_TEST_FAIL_MSG(name[3])); + ksft_test_result_fail("%s %s\n", name[3], + vdso_clock_name[clk_id]); } else { - ksft_test_result_pass(VDSO_TEST_PASS_MSG()); + ksft_test_result_pass("%s %s\n", name[3], + vdso_clock_name[clk_id]); } } -const char *vdso_clock_name[12] = { - "CLOCK_REALTIME", - "CLOCK_MONOTONIC", - "CLOCK_PROCESS_CPUTIME_ID", - "CLOCK_THREAD_CPUTIME_ID", - "CLOCK_MONOTONIC_RAW", - "CLOCK_REALTIME_COARSE", - "CLOCK_MONOTONIC_COARSE", - "CLOCK_BOOTTIME", - "CLOCK_REALTIME_ALARM", - "CLOCK_BOOTTIME_ALARM", - "CLOCK_SGI_CYCLE", - "CLOCK_TAI", -}; - /* * This function calls vdso_test_clock_gettime and vdso_test_clock_getres * with different values for clock_id. -- cgit v1.2.3 From e63e1354125f923f1f5a393dd63c074427382e7e Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Thu, 23 Nov 2023 10:45:48 +0000 Subject: kselftest/vDSO: Fix message formatting for clock_id logging When logging the ID of the currently tested clock vdso_test_clock() puts a spurious newline at the start of the format string resulting in output such as # clock_id: CLOCK_BOOTTIME which is a valid but empty KTAP informational message followed by a non conferment output line. Remove the initial newline to create a more KTAP friendly # clock_id: CLOCK_BOOTTIME Signed-off-by: Mark Brown Signed-off-by: Shuah Khan --- tools/testing/selftests/vDSO/vdso_test_abi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/vDSO/vdso_test_abi.c b/tools/testing/selftests/vDSO/vdso_test_abi.c index b304abae6e8f..d0e247cca58a 100644 --- a/tools/testing/selftests/vDSO/vdso_test_abi.c +++ b/tools/testing/selftests/vDSO/vdso_test_abi.c @@ -170,7 +170,7 @@ static void vdso_test_clock_getres(clockid_t clk_id) */ static inline void vdso_test_clock(clockid_t clock_id) { - ksft_print_msg("\nclock_id: %s\n", vdso_clock_name[clock_id]); + ksft_print_msg("clock_id: %s\n", vdso_clock_name[clock_id]); vdso_test_clock_gettime(clock_id); -- cgit v1.2.3 From 25cfe960a858dd345176253088a8e56538007c22 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Thu, 23 Nov 2023 10:45:49 +0000 Subject: kselftest/vDSO: Use ksft_print_msg() rather than printf in vdso_test_abi There are a couple of raw printf() calls in vdso_test_abi which result in non KTAP conforment output such as [vDSO kselftest] VDSO_VERSION: LINUX_2.6 Convert them to use ksft_print_msg() so that they don't cause confusion for parsers. Signed-off-by: Mark Brown Signed-off-by: Shuah Khan --- tools/testing/selftests/vDSO/vdso_test_abi.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/vDSO/vdso_test_abi.c b/tools/testing/selftests/vDSO/vdso_test_abi.c index d0e247cca58a..96d32fd65b42 100644 --- a/tools/testing/selftests/vDSO/vdso_test_abi.c +++ b/tools/testing/selftests/vDSO/vdso_test_abi.c @@ -187,14 +187,14 @@ int main(int argc, char **argv) ksft_set_plan(VDSO_TEST_PLAN); if (!sysinfo_ehdr) { - printf("AT_SYSINFO_EHDR is not present!\n"); + ksft_print_msg("AT_SYSINFO_EHDR is not present!\n"); return KSFT_SKIP; } version = versions[VDSO_VERSION]; name = (const char **)&names[VDSO_NAMES]; - printf("[vDSO kselftest] VDSO_VERSION: %s\n", version); + ksft_print_msg("[vDSO kselftest] VDSO_VERSION: %s\n", version); vdso_init_from_sysinfo_ehdr(getauxval(AT_SYSINFO_EHDR)); -- cgit v1.2.3 From af76b2dec0984a079d8497bfa37d29a9b55932e1 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 30 Nov 2023 14:11:45 -0300 Subject: libapi: Add missing linux/types.h header to get the __u64 type on io.h There are functions using __u64, so we need to have the linux/types.h header otherwise we'll break when its not included before api/io.h. Fixes: e95770af4c4a280f ("tools api: Add a lightweight buffered reading api") Reviewed-by: Ian Rogers Cc: Adrian Hunter Cc: Jiri Olsa Cc: Namhyung Kim Link: https://lore.kernel.org/lkml/ZWjDPL+IzPPsuC3X@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/api/io.h | 1 + 1 file changed, 1 insertion(+) (limited to 'tools') diff --git a/tools/lib/api/io.h b/tools/lib/api/io.h index a77b74c5fb65..2a7fe9758813 100644 --- a/tools/lib/api/io.h +++ b/tools/lib/api/io.h @@ -12,6 +12,7 @@ #include #include #include +#include struct io { /* File descriptor being read/ */ -- cgit v1.2.3 From 366efbff58092fac48421fa34018bb34c326088e Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Mon, 27 Nov 2023 14:08:14 -0800 Subject: libperf: Lazily allocate/size mmap event copy The event copy in the mmap is used to have storage to read an event. Not all users of mmaps read the events, such as perf record. The amount of buffer was also statically set to PERF_SAMPLE_MAX_SIZE rather than the amount necessary from the header's event size. Switch to a model where the event_copy is reallocated if too small to the event's size. This adds the potential for the event to move, so if a copy of the event pointer were stored it could be broken. All the current users do: while(event = perf_mmap__read_event()) { ... } and so they would be broken due to the event being overwritten if they had stored the pointer. Manual inspection and address sanitizer testing also shows the event pointer not being stored. Signed-off-by: Ian Rogers Acked-by: Namhyung Kim Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Andi Kleen Cc: Athira Jajeev Cc: Changbin Du Cc: Colin Ian King Cc: Dmitrii Dolgov <9erthalion6@gmail.com> Cc: German Gomez Cc: Guilherme Amadio Cc: Huacai Chen Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: K Prateek Nayak Cc: Kajol Jain Cc: Kan Liang Cc: Leo Yan Cc: Li Dong Cc: Liam Howlett Cc: Mark Rutland Cc: Masami Hiramatsu (Google) Cc: Miguel Ojeda Cc: Ming Wang Cc: Nick Terrell Cc: Paolo Bonzini Cc: Peter Zijlstra Cc: Ravi Bangoria Cc: Sandipan Das Cc: Sean Christopherson Cc: Steinar H. Gunderson Cc: Vincent Whitchurch Cc: Wenyu Liu Cc: Yang Jihong Link: https://lore.kernel.org/r/20231127220902.1315692-3-irogers@google.com [ Replace two lines with equivalent zfree(&map->event_copy) ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/perf/include/internal/mmap.h | 3 ++- tools/lib/perf/mmap.c | 20 +++++++++++++++++--- 2 files changed, 19 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/lib/perf/include/internal/mmap.h b/tools/lib/perf/include/internal/mmap.h index 5a062af8e9d8..5f08cab61ece 100644 --- a/tools/lib/perf/include/internal/mmap.h +++ b/tools/lib/perf/include/internal/mmap.h @@ -33,7 +33,8 @@ struct perf_mmap { bool overwrite; u64 flush; libperf_unmap_cb_t unmap_cb; - char event_copy[PERF_SAMPLE_MAX_SIZE] __aligned(8); + void *event_copy; + size_t event_copy_sz; struct perf_mmap *next; }; diff --git a/tools/lib/perf/mmap.c b/tools/lib/perf/mmap.c index 2184814b37dd..0c903c2372c9 100644 --- a/tools/lib/perf/mmap.c +++ b/tools/lib/perf/mmap.c @@ -19,6 +19,7 @@ void perf_mmap__init(struct perf_mmap *map, struct perf_mmap *prev, bool overwrite, libperf_unmap_cb_t unmap_cb) { + /* Assume fields were zero initialized. */ map->fd = -1; map->overwrite = overwrite; map->unmap_cb = unmap_cb; @@ -51,13 +52,18 @@ int perf_mmap__mmap(struct perf_mmap *map, struct perf_mmap_param *mp, void perf_mmap__munmap(struct perf_mmap *map) { - if (map && map->base != NULL) { + if (!map) + return; + + zfree(&map->event_copy); + map->event_copy_sz = 0; + if (map->base) { munmap(map->base, perf_mmap__mmap_len(map)); map->base = NULL; map->fd = -1; refcount_set(&map->refcnt, 0); } - if (map && map->unmap_cb) + if (map->unmap_cb) map->unmap_cb(map); } @@ -223,9 +229,17 @@ static union perf_event *perf_mmap__read(struct perf_mmap *map, */ if ((*startp & map->mask) + size != ((*startp + size) & map->mask)) { unsigned int offset = *startp; - unsigned int len = min(sizeof(*event), size), cpy; + unsigned int len = size, cpy; void *dst = map->event_copy; + if (size > map->event_copy_sz) { + dst = realloc(map->event_copy, size); + if (!dst) + return NULL; + map->event_copy = dst; + map->event_copy_sz = size; + } + do { cpy = min(map->mask + 1 - (offset & map->mask), len); memcpy(dst, &data[offset & map->mask], cpy); -- cgit v1.2.3 From b6a15269cee255dc5fe75c249c701e3956d892cb Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Mon, 27 Nov 2023 14:08:16 -0800 Subject: tools api fs: Switch filename__read_str to use io.h filename__read_str() has its own string reading code that allocates memory before reading into it. The memory allocated is sized at BUFSIZ that is 8kb. Most strings are short and so most of this 8kb is wasted. Refactor io__getline(), as io__getdelim(), so that the newline character can be configurable and ignored in the case of filename__read_str(). Code like build_caches_for_cpu() in perf's header.c will read many strings and hold them in a data structure, in this case multiple strings per cache level per CPU. Using io.h's io__getline() avoids the wasted memory as strings are temporarily read into a buffer on the stack before being copied to a buffer that grows 128 bytes at a time and is never sized larger than the string. For a 16 hyperthread system the memory consumption of "perf record true" is reduced by 180kb, primarily through saving memory when reading the cache information. Signed-off-by: Ian Rogers Acked-by: Namhyung Kim Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Andi Kleen Cc: Athira Jajeev Cc: Changbin Du Cc: Colin Ian King Cc: Dmitrii Dolgov <9erthalion6@gmail.com> Cc: German Gomez Cc: Guilherme Amadio Cc: Huacai Chen Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: K Prateek Nayak Cc: Kajol Jain Cc: Kan Liang Cc: Leo Yan Cc: Li Dong Cc: Liam Howlett Cc: Mark Rutland Cc: Masami Hiramatsu (Google) Cc: Miguel Ojeda Cc: Ming Wang Cc: Nick Terrell Cc: Paolo Bonzini Cc: Peter Zijlstra Cc: Ravi Bangoria Cc: Sandipan Das Cc: Sean Christopherson Cc: Steinar H. Gunderson Cc: Vincent Whitchurch Cc: Wenyu Liu Cc: Yang Jihong Link: https://lore.kernel.org/r/20231127220902.1315692-5-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/api/fs/fs.c | 56 +++++++++++++-------------------------------------- tools/lib/api/io.h | 11 +++++++--- 2 files changed, 22 insertions(+), 45 deletions(-) (limited to 'tools') diff --git a/tools/lib/api/fs/fs.c b/tools/lib/api/fs/fs.c index 5cb0eeec2c8a..004f2af5504b 100644 --- a/tools/lib/api/fs/fs.c +++ b/tools/lib/api/fs/fs.c @@ -16,6 +16,7 @@ #include #include "fs.h" +#include "../io.h" #include "debug-internal.h" #define _STR(x) #x @@ -344,53 +345,24 @@ int filename__read_ull(const char *filename, unsigned long long *value) return filename__read_ull_base(filename, value, 0); } -#define STRERR_BUFSIZE 128 /* For the buffer size of strerror_r */ - int filename__read_str(const char *filename, char **buf, size_t *sizep) { - size_t size = 0, alloc_size = 0; - void *bf = NULL, *nbf; - int fd, n, err = 0; - char sbuf[STRERR_BUFSIZE]; + struct io io; + char bf[128]; + int err; - fd = open(filename, O_RDONLY); - if (fd < 0) + io.fd = open(filename, O_RDONLY); + if (io.fd < 0) return -errno; - - do { - if (size == alloc_size) { - alloc_size += BUFSIZ; - nbf = realloc(bf, alloc_size); - if (!nbf) { - err = -ENOMEM; - break; - } - - bf = nbf; - } - - n = read(fd, bf + size, alloc_size - size); - if (n < 0) { - if (size) { - pr_warn("read failed %d: %s\n", errno, - strerror_r(errno, sbuf, sizeof(sbuf))); - err = 0; - } else - err = -errno; - - break; - } - - size += n; - } while (n > 0); - - if (!err) { - *sizep = size; - *buf = bf; + io__init(&io, io.fd, bf, sizeof(bf)); + *buf = NULL; + err = io__getdelim(&io, buf, sizep, /*delim=*/-1); + if (err < 0) { + free(*buf); + *buf = NULL; } else - free(bf); - - close(fd); + err = 0; + close(io.fd); return err; } diff --git a/tools/lib/api/io.h b/tools/lib/api/io.h index 2a7fe9758813..84adf8102018 100644 --- a/tools/lib/api/io.h +++ b/tools/lib/api/io.h @@ -141,8 +141,8 @@ static inline int io__get_dec(struct io *io, __u64 *dec) } } -/* Read up to and including the first newline following the pattern of getline. */ -static inline ssize_t io__getline(struct io *io, char **line_out, size_t *line_len_out) +/* Read up to and including the first delim. */ +static inline ssize_t io__getdelim(struct io *io, char **line_out, size_t *line_len_out, int delim) { char buf[128]; int buf_pos = 0; @@ -152,7 +152,7 @@ static inline ssize_t io__getline(struct io *io, char **line_out, size_t *line_l /* TODO: reuse previously allocated memory. */ free(*line_out); - while (ch != '\n') { + while (ch != delim) { ch = io__get_char(io); if (ch < 0) @@ -185,4 +185,9 @@ err_out: return -ENOMEM; } +static inline ssize_t io__getline(struct io *io, char **line_out, size_t *line_len_out) +{ + return io__getdelim(io, line_out, line_len_out, /*delim=*/'\n'); +} + #endif /* __API_IO__ */ -- cgit v1.2.3 From f8846a1a3c54a53f1c30836a2b7143cfa5da223d Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Mon, 27 Nov 2023 14:08:17 -0800 Subject: tools api fs: Avoid reading whole file for a 1 byte bool sysfs__read_bool() used the first byte from a fully read file into a string. It then looked at the first byte's value. Avoid doing this and just read the first byte. Signed-off-by: Ian Rogers Acked-by: Namhyung Kim Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Andi Kleen Cc: Athira Jajeev Cc: Changbin Du Cc: Colin Ian King Cc: Dmitrii Dolgov <9erthalion6@gmail.com> Cc: German Gomez Cc: Guilherme Amadio Cc: Huacai Chen Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: K Prateek Nayak Cc: Kajol Jain Cc: Kan Liang Cc: Leo Yan Cc: Li Dong Cc: Liam Howlett Cc: Mark Rutland Cc: Masami Hiramatsu (Google) Cc: Miguel Ojeda Cc: Ming Wang Cc: Nick Terrell Cc: Paolo Bonzini Cc: Peter Zijlstra Cc: Ravi Bangoria Cc: Sandipan Das Cc: Sean Christopherson Cc: Steinar H. Gunderson Cc: Vincent Whitchurch Cc: Wenyu Liu Cc: Yang Jihong Link: https://lore.kernel.org/r/20231127220902.1315692-6-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/api/fs/fs.c | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) (limited to 'tools') diff --git a/tools/lib/api/fs/fs.c b/tools/lib/api/fs/fs.c index 004f2af5504b..337fde770e45 100644 --- a/tools/lib/api/fs/fs.c +++ b/tools/lib/api/fs/fs.c @@ -447,15 +447,22 @@ int sysfs__read_str(const char *entry, char **buf, size_t *sizep) int sysfs__read_bool(const char *entry, bool *value) { - char *buf; - size_t size; - int ret; + struct io io; + char bf[16]; + int ret = 0; + char path[PATH_MAX]; + const char *sysfs = sysfs__mountpoint(); + + if (!sysfs) + return -1; - ret = sysfs__read_str(entry, &buf, &size); - if (ret < 0) - return ret; + snprintf(path, sizeof(path), "%s/%s", sysfs, entry); + io.fd = open(path, O_RDONLY); + if (io.fd < 0) + return -errno; - switch (buf[0]) { + io__init(&io, io.fd, bf, sizeof(bf)); + switch (io__get_char(&io)) { case '1': case 'y': case 'Y': @@ -469,8 +476,7 @@ int sysfs__read_bool(const char *entry, bool *value) default: ret = -1; } - - free(buf); + close(io.fd); return ret; } -- cgit v1.2.3 From d837813ff42ef86dac8596dd491bf6869ac7a0e8 Mon Sep 17 00:00:00 2001 From: Osama Muhammad Date: Sun, 20 Aug 2023 19:13:54 +0500 Subject: selftests: prctl: Add prctl test for PR_GET_NAME This patch covers the testing of PR_GET_NAME by reading it's value from proc/self/task/pid/comm and matching it with the value returned by PR_GET_NAME. If the values are matched then it's successful, otherwise it fails. changes since v1: - Handled fscanf,fopen error checking. - Defined MAX_PATH_LEN. Signed-off-by: Osama Muhammad Signed-off-by: Shuah Khan --- tools/testing/selftests/prctl/set-process-name.c | 32 ++++++++++++++++++++++++ 1 file changed, 32 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/prctl/set-process-name.c b/tools/testing/selftests/prctl/set-process-name.c index 3bc5e0e09eb9..562f707ba771 100644 --- a/tools/testing/selftests/prctl/set-process-name.c +++ b/tools/testing/selftests/prctl/set-process-name.c @@ -12,6 +12,7 @@ #define CHANGE_NAME "changename" #define EMPTY_NAME "" #define TASK_COMM_LEN 16 +#define MAX_PATH_LEN 50 int set_name(char *name) { @@ -47,6 +48,35 @@ int check_null_pointer(char *check_name) return res; } +int check_name(void) +{ + + int pid; + + pid = getpid(); + FILE *fptr = NULL; + char path[MAX_PATH_LEN] = {}; + char name[TASK_COMM_LEN] = {}; + char output[TASK_COMM_LEN] = {}; + int j; + + j = snprintf(path, MAX_PATH_LEN, "/proc/self/task/%d/comm", pid); + fptr = fopen(path, "r"); + if (!fptr) + return -EIO; + + fscanf(fptr, "%s", output); + if (ferror(fptr)) + return -EIO; + + int res = prctl(PR_GET_NAME, name, NULL, NULL, NULL); + + if (res < 0) + return -errno; + + return !strcmp(output, name); +} + TEST(rename_process) { EXPECT_GE(set_name(CHANGE_NAME), 0); @@ -57,6 +87,8 @@ TEST(rename_process) { EXPECT_GE(set_name(CHANGE_NAME), 0); EXPECT_LT(check_null_pointer(CHANGE_NAME), 0); + + EXPECT_TRUE(check_name()); } TEST_HARNESS_MAIN -- cgit v1.2.3 From 49360d978411eeaeffb96938edb53ee75ba471bc Mon Sep 17 00:00:00 2001 From: Swarup Laxman Kotiaklapudi Date: Sat, 11 Nov 2023 23:08:06 +0530 Subject: selftests: capabilities: namespace create varies for root and normal user This patchset fixes TODO: "If we're already root, we could skip creating the userns." Change namespace creation for root and non-root user differently in create_and_enter_ns() function in this file: tools/testing/selftests/capabilities/test_execve.c Test result with root user: $sudo make TARGETS="capabilities" kselftest ... TAP version 13 1..1 timeout set to 45 selftests: capabilities: test_execve TAP version 13 1..12 [RUN] +++ Tests with uid == 0 +++ [NOTE] Using global UIDs for tests [RUN] Root => ep ... ok 12 Passed Totals: pass:12 fail:0 xfail:0 xpass:0 skip:0 error:0 ================================================== TAP version 13 1..9 [RUN] +++ Tests with uid != 0 +++ [NOTE] Using global UIDs for tests [RUN] Non-root => no caps ... ok 9 Passed Totals: pass:9 fail:0 xfail:0 xpass:0 skip:0 error:0 Test result without root or normal user: $make TARGETS="capabilities" kselftest ... timeout set to 45 selftests: capabilities: test_execve TAP version 13 1..12 [RUN] +++ Tests with uid == 0 +++ [NOTE] Using a user namespace for tests [RUN] Root => ep validate_cap:: Capabilities after execve were correct ok 1 Passed Check cap_ambient manipulation rules ok 2 PR_CAP_AMBIENT_RAISE failed on non-inheritable cap ok 3 PR_CAP_AMBIENT_RAISE failed on non-permitted cap ok 4 PR_CAP_AMBIENT_RAISE worked ok 5 Basic manipulation appears to work [RUN] Root +i => eip validate_cap:: Capabilities after execve were correct ok 6 Passed [RUN] UID 0 +ia => eipa validate_cap:: Capabilities after execve were correct ok 7 Passed ok 8 # SKIP SUID/SGID tests (needs privilege) Planned tests != run tests (12 != 8) Totals: pass:7 fail:0 xfail:0 xpass:0 skip:1 error:0 ================================================== TAP version 13 1..9 [RUN] +++ Tests with uid != 0 +++ [NOTE] Using a user namespace for tests [RUN] Non-root => no caps validate_cap:: Capabilities after execve were correct ok 1 Passed Check cap_ambient manipulation rules ok 2 PR_CAP_AMBIENT_RAISE failed on non-inheritable cap ok 3 PR_CAP_AMBIENT_RAISE failed on non-permitted cap ok 4 PR_CAP_AMBIENT_RAISE worked ok 5 Basic manipulation appears to work [RUN] Non-root +i => i validate_cap:: Capabilities after execve were correct ok 6 Passed [RUN] UID 1 +ia => eipa validate_cap:: Capabilities after execve were correct ok 7 Passed ok 8 # SKIP SUID/SGID tests (needs privilege) Planned tests != run tests (9 != 8) Totals: pass:7 fail:0 xfail:0 xpass:0 skip:1 error:0 Signed-off-by: Swarup Laxman Kotiaklapudi Signed-off-by: Shuah Khan --- tools/testing/selftests/capabilities/test_execve.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/capabilities/test_execve.c b/tools/testing/selftests/capabilities/test_execve.c index e3a352b020a7..7cde07a5df78 100644 --- a/tools/testing/selftests/capabilities/test_execve.c +++ b/tools/testing/selftests/capabilities/test_execve.c @@ -88,11 +88,7 @@ static bool create_and_enter_ns(uid_t inner_uid) outer_uid = getuid(); outer_gid = getgid(); - /* - * TODO: If we're already root, we could skip creating the userns. - */ - - if (unshare(CLONE_NEWNS) == 0) { + if (outer_uid == 0 && unshare(CLONE_NEWNS) == 0) { ksft_print_msg("[NOTE]\tUsing global UIDs for tests\n"); if (prctl(PR_SET_KEEPCAPS, 1, 0, 0, 0) != 0) ksft_exit_fail_msg("PR_SET_KEEPCAPS - %s\n", -- cgit v1.2.3 From 130a83879954a9fed35cf4474d223b4fcfd479fa Mon Sep 17 00:00:00 2001 From: Atul Kumar Pant Date: Mon, 6 Nov 2023 23:40:05 +0530 Subject: selftests: sched: Remove initialization to 0 for a static variable Fixes following checkpatch.pl issue: ERROR: do not initialise statics to 0 Signed-off-by: Atul Kumar Pant Signed-off-by: Shuah Khan --- tools/testing/selftests/sched/cs_prctl_test.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/sched/cs_prctl_test.c b/tools/testing/selftests/sched/cs_prctl_test.c index 3e1619b6bf2d..7ba057154343 100644 --- a/tools/testing/selftests/sched/cs_prctl_test.c +++ b/tools/testing/selftests/sched/cs_prctl_test.c @@ -72,7 +72,7 @@ struct child_args { static struct child_args procs[MAX_PROCESSES]; static int num_processes = 2; -static int need_cleanup = 0; +static int need_cleanup; static int _prctl(int option, unsigned long arg2, unsigned long arg3, unsigned long arg4, unsigned long arg5) -- cgit v1.2.3 From e8c780a5706066eba210e4c514968d95ba29e052 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 28 Nov 2023 20:14:27 -0800 Subject: docs: netlink: link to family documentations from spec info To increase the chances of people finding the rendered docs add a link to specs.rst and index.rst. Add a label in the generated index.rst and while at it adjust the title a little bit. Reviewed-by: Breno Leitao Reviewed-by: Donald Hunter Reviewed-by: Jiri Pirko Link: https://lore.kernel.org/r/20231129041427.2763074-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- Documentation/userspace-api/netlink/index.rst | 4 +++- Documentation/userspace-api/netlink/specs.rst | 2 +- tools/net/ynl/ynl-gen-rst.py | 8 +++++++- 3 files changed, 11 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/Documentation/userspace-api/netlink/index.rst b/Documentation/userspace-api/netlink/index.rst index 62725dafbbdb..c1b6765cc963 100644 --- a/Documentation/userspace-api/netlink/index.rst +++ b/Documentation/userspace-api/netlink/index.rst @@ -16,4 +16,6 @@ Netlink documentation for users. genetlink-legacy netlink-raw -See also :ref:`Documentation/core-api/netlink.rst `. +See also: + - :ref:`Documentation/core-api/netlink.rst ` + - :ref:`Documentation/networking/netlink_spec/index.rst ` diff --git a/Documentation/userspace-api/netlink/specs.rst b/Documentation/userspace-api/netlink/specs.rst index c1b951649113..1b50d97d8d7c 100644 --- a/Documentation/userspace-api/netlink/specs.rst +++ b/Documentation/userspace-api/netlink/specs.rst @@ -15,7 +15,7 @@ kernel headers directly. Internally kernel uses the YAML specs to generate: - the C uAPI header - - documentation of the protocol as a ReST file + - documentation of the protocol as a ReST file - see :ref:`Documentation/networking/netlink_spec/index.rst ` - policy tables for input attribute validation - operation tables diff --git a/tools/net/ynl/ynl-gen-rst.py b/tools/net/ynl/ynl-gen-rst.py index b6292109e236..8c62e040df5d 100755 --- a/tools/net/ynl/ynl-gen-rst.py +++ b/tools/net/ynl/ynl-gen-rst.py @@ -122,6 +122,11 @@ def rst_toctree(maxdepth: int = 2) -> str: return "\n".join(lines) +def rst_label(title: str) -> str: + """Return a formatted label""" + return f".. _{title}:\n\n" + + # Parsers # ======= @@ -349,7 +354,8 @@ def generate_main_index_rst(output: str) -> None: lines = [] lines.append(rst_header()) - lines.append(rst_title("Netlink Specification")) + lines.append(rst_label("specs")) + lines.append(rst_title("Netlink Family Specifications")) lines.append(rst_toctree(1)) index_dir = os.path.dirname(output) -- cgit v1.2.3 From f7580f00cc6e5bf57256d61c223d3ac6b4001ae7 Mon Sep 17 00:00:00 2001 From: Pedro Tammela Date: Wed, 29 Nov 2023 19:24:21 -0300 Subject: selftests: tc-testing: remove spurious nsPlugin usage Tests using DEV2 should not be run in a dedicated net namespace, and in parallel, as this device cannot be shared. Signed-off-by: Pedro Tammela Acked-by: Jamal Hadi Salim Link: https://lore.kernel.org/r/20231129222424.910148-2-pctammela@mojatatu.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/tc-testing/tc-tests/filters/tests.json | 6 ------ 1 file changed, 6 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/tc-testing/tc-tests/filters/tests.json b/tools/testing/selftests/tc-testing/tc-tests/filters/tests.json index 361235ad574b..4598f1d330fe 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/filters/tests.json +++ b/tools/testing/selftests/tc-testing/tc-tests/filters/tests.json @@ -48,9 +48,6 @@ "filter", "flower" ], - "plugins": { - "requires": "nsPlugin" - }, "setup": [ "$TC qdisc add dev $DEV2 ingress", "./tdc_batch.py $DEV2 $BATCH_FILE --share_action -n 1000000" @@ -72,9 +69,6 @@ "filter", "flower" ], - "plugins": { - "requires": "nsPlugin" - }, "setup": [ "$TC qdisc add dev $DEV2 ingress", "$TC filter add dev $DEV2 protocol ip prio 1 ingress flower dst_mac e4:11:22:11:4a:51 src_mac e4:11:22:11:4a:50 ip_proto tcp src_ip 1.1.1.1 dst_ip 2.2.2.2 action drop" -- cgit v1.2.3 From 74f7e7eeb1d2c1b00f1a8fa1a6666a509f274da8 Mon Sep 17 00:00:00 2001 From: Pedro Tammela Date: Wed, 29 Nov 2023 19:24:22 -0300 Subject: selftests: tc-testing: remove spurious './' from Makefile Patchwork CI didn't like the extra './', so remove it. Signed-off-by: Pedro Tammela Acked-by: Jamal Hadi Salim Link: https://lore.kernel.org/r/20231129222424.910148-3-pctammela@mojatatu.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/tc-testing/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/tc-testing/Makefile b/tools/testing/selftests/tc-testing/Makefile index e8b3dde4fa16..9153e3428a77 100644 --- a/tools/testing/selftests/tc-testing/Makefile +++ b/tools/testing/selftests/tc-testing/Makefile @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 -TEST_PROGS += ./tdc.sh +TEST_PROGS += tdc.sh TEST_FILES := action-ebpf tdc*.py Tdc*.py plugins plugin-lib tc-tests scripts include ../lib.mk -- cgit v1.2.3 From 7de8b2efafeb770e3f2113d506c31be7f4c9618e Mon Sep 17 00:00:00 2001 From: Pedro Tammela Date: Wed, 29 Nov 2023 19:24:23 -0300 Subject: selftests: tc-testing: rename concurrency.json to flower.json All tests in this file pertain to flower, so name it appropriately Signed-off-by: Pedro Tammela Acked-by: Jamal Hadi Salim Link: https://lore.kernel.org/r/20231129222424.910148-4-pctammela@mojatatu.com Signed-off-by: Jakub Kicinski --- .../tc-testing/tc-tests/filters/concurrency.json | 177 --------------------- .../tc-testing/tc-tests/filters/flower.json | 177 +++++++++++++++++++++ 2 files changed, 177 insertions(+), 177 deletions(-) delete mode 100644 tools/testing/selftests/tc-testing/tc-tests/filters/concurrency.json create mode 100644 tools/testing/selftests/tc-testing/tc-tests/filters/flower.json (limited to 'tools') diff --git a/tools/testing/selftests/tc-testing/tc-tests/filters/concurrency.json b/tools/testing/selftests/tc-testing/tc-tests/filters/concurrency.json deleted file mode 100644 index c2a433a4737e..000000000000 --- a/tools/testing/selftests/tc-testing/tc-tests/filters/concurrency.json +++ /dev/null @@ -1,177 +0,0 @@ -[ - { - "id": "e41d", - "name": "Add 1M flower filters with 10 parallel tc instances", - "category": [ - "filter", - "flower", - "concurrency" - ], - "setup": [ - "/bin/mkdir $BATCH_DIR", - "$TC qdisc add dev $DEV2 ingress", - "./tdc_multibatch.py $DEV2 $BATCH_DIR 100000 10 add" - ], - "cmdUnderTest": "bash -c \"find $BATCH_DIR/add* -print | xargs -n 1 -P 10 $TC -b\"", - "expExitCode": "0", - "verifyCmd": "$TC -s filter show dev $DEV2 ingress", - "matchPattern": "filter protocol ip pref 1 flower chain 0 handle", - "matchCount": "1000000", - "teardown": [ - "$TC qdisc del dev $DEV2 ingress", - "/bin/rm -rf $BATCH_DIR" - ] - }, - { - "id": "6f52", - "name": "Delete 1M flower filters with 10 parallel tc instances", - "category": [ - "filter", - "flower", - "concurrency" - ], - "setup": [ - "/bin/mkdir $BATCH_DIR", - "$TC qdisc add dev $DEV2 ingress", - "./tdc_multibatch.py $DEV2 $BATCH_DIR 1000000 1 add", - "$TC -b $BATCH_DIR/add_0", - "./tdc_multibatch.py $DEV2 $BATCH_DIR 100000 10 del" - ], - "cmdUnderTest": "bash -c \"find $BATCH_DIR/del* -print | xargs -n 1 -P 10 $TC -b\"", - "expExitCode": "0", - "verifyCmd": "$TC -s filter show dev $DEV2 ingress", - "matchPattern": "filter protocol ip pref 1 flower chain 0 handle", - "matchCount": "0", - "teardown": [ - "$TC qdisc del dev $DEV2 ingress", - "/bin/rm -rf $BATCH_DIR" - ] - }, - { - "id": "c9da", - "name": "Replace 1M flower filters with 10 parallel tc instances", - "category": [ - "filter", - "flower", - "concurrency" - ], - "setup": [ - "/bin/mkdir $BATCH_DIR", - "$TC qdisc add dev $DEV2 ingress", - "./tdc_multibatch.py $DEV2 $BATCH_DIR 1000000 1 add", - "$TC -b $BATCH_DIR/add_0", - "./tdc_multibatch.py $DEV2 $BATCH_DIR 100000 10 replace" - ], - "cmdUnderTest": "bash -c \"find $BATCH_DIR/replace* -print | xargs -n 1 -P 10 $TC -b\"", - "expExitCode": "0", - "verifyCmd": "$TC -s filter show dev $DEV2 ingress", - "matchPattern": "filter protocol ip pref 1 flower chain 0 handle", - "matchCount": "1000000", - "teardown": [ - "$TC qdisc del dev $DEV2 ingress", - "/bin/rm -rf $BATCH_DIR" - ] - }, - { - "id": "14be", - "name": "Concurrently replace same range of 100k flower filters from 10 tc instances", - "category": [ - "filter", - "flower", - "concurrency" - ], - "setup": [ - "/bin/mkdir $BATCH_DIR", - "$TC qdisc add dev $DEV2 ingress", - "./tdc_multibatch.py $DEV2 $BATCH_DIR 100000 1 add", - "$TC -b $BATCH_DIR/add_0", - "./tdc_multibatch.py -d $DEV2 $BATCH_DIR 100000 10 replace" - ], - "cmdUnderTest": "bash -c \"find $BATCH_DIR/replace* -print | xargs -n 1 -P 10 $TC -b\"", - "expExitCode": "0", - "verifyCmd": "$TC -s filter show dev $DEV2 ingress", - "matchPattern": "filter protocol ip pref 1 flower chain 0 handle", - "matchCount": "100000", - "teardown": [ - "$TC qdisc del dev $DEV2 ingress", - "/bin/rm -rf $BATCH_DIR" - ] - }, - { - "id": "0c44", - "name": "Concurrently delete same range of 100k flower filters from 10 tc instances", - "category": [ - "filter", - "flower", - "concurrency" - ], - "setup": [ - "/bin/mkdir $BATCH_DIR", - "$TC qdisc add dev $DEV2 ingress", - "./tdc_multibatch.py $DEV2 $BATCH_DIR 100000 1 add", - "$TC -b $BATCH_DIR/add_0", - "./tdc_multibatch.py -d $DEV2 $BATCH_DIR 100000 10 del" - ], - "cmdUnderTest": "bash -c \"find $BATCH_DIR/del* -print | xargs -n 1 -P 10 $TC -f -b\"", - "expExitCode": "123", - "verifyCmd": "$TC -s filter show dev $DEV2 ingress", - "matchPattern": "filter protocol ip pref 1 flower chain 0 handle", - "matchCount": "0", - "teardown": [ - "$TC qdisc del dev $DEV2 ingress", - "/bin/rm -rf $BATCH_DIR" - ] - }, - { - "id": "ab62", - "name": "Add and delete from same tp with 10 tc instances", - "category": [ - "filter", - "flower", - "concurrency" - ], - "setup": [ - "/bin/mkdir $BATCH_DIR", - "$TC qdisc add dev $DEV2 ingress", - "./tdc_multibatch.py -x init_ $DEV2 $BATCH_DIR 100000 5 add", - "bash -c \"find $BATCH_DIR/init_* -print | xargs -n 1 -P 5 $TC -b\"", - "./tdc_multibatch.py -x par_ -a 500001 -m 5 $DEV2 $BATCH_DIR 100000 5 add", - "./tdc_multibatch.py -x par_ $DEV2 $BATCH_DIR 100000 5 del" - ], - "cmdUnderTest": "bash -c \"find $BATCH_DIR/par_* -print | xargs -n 1 -P 10 $TC -b\"", - "expExitCode": "0", - "verifyCmd": "$TC -s filter show dev $DEV2 ingress", - "matchPattern": "filter protocol ip pref 1 flower chain 0 handle", - "matchCount": "500000", - "teardown": [ - "$TC qdisc del dev $DEV2 ingress", - "/bin/rm -rf $BATCH_DIR" - ] - }, - { - "id": "6e8f", - "name": "Replace and delete from same tp with 10 tc instances", - "category": [ - "filter", - "flower", - "concurrency" - ], - "setup": [ - "/bin/mkdir $BATCH_DIR", - "$TC qdisc add dev $DEV2 ingress", - "./tdc_multibatch.py -x init_ $DEV2 $BATCH_DIR 100000 10 add", - "bash -c \"find $BATCH_DIR/init_* -print | xargs -n 1 -P 5 $TC -b\"", - "./tdc_multibatch.py -x par_ -a 500001 -m 5 $DEV2 $BATCH_DIR 100000 5 replace", - "./tdc_multibatch.py -x par_ $DEV2 $BATCH_DIR 100000 5 del" - ], - "cmdUnderTest": "bash -c \"find $BATCH_DIR/par_* -print | xargs -n 1 -P 10 $TC -b\"", - "expExitCode": "0", - "verifyCmd": "$TC -s filter show dev $DEV2 ingress", - "matchPattern": "filter protocol ip pref 1 flower chain 0 handle", - "matchCount": "500000", - "teardown": [ - "$TC qdisc del dev $DEV2 ingress", - "/bin/rm -rf $BATCH_DIR" - ] - } -] diff --git a/tools/testing/selftests/tc-testing/tc-tests/filters/flower.json b/tools/testing/selftests/tc-testing/tc-tests/filters/flower.json new file mode 100644 index 000000000000..c2a433a4737e --- /dev/null +++ b/tools/testing/selftests/tc-testing/tc-tests/filters/flower.json @@ -0,0 +1,177 @@ +[ + { + "id": "e41d", + "name": "Add 1M flower filters with 10 parallel tc instances", + "category": [ + "filter", + "flower", + "concurrency" + ], + "setup": [ + "/bin/mkdir $BATCH_DIR", + "$TC qdisc add dev $DEV2 ingress", + "./tdc_multibatch.py $DEV2 $BATCH_DIR 100000 10 add" + ], + "cmdUnderTest": "bash -c \"find $BATCH_DIR/add* -print | xargs -n 1 -P 10 $TC -b\"", + "expExitCode": "0", + "verifyCmd": "$TC -s filter show dev $DEV2 ingress", + "matchPattern": "filter protocol ip pref 1 flower chain 0 handle", + "matchCount": "1000000", + "teardown": [ + "$TC qdisc del dev $DEV2 ingress", + "/bin/rm -rf $BATCH_DIR" + ] + }, + { + "id": "6f52", + "name": "Delete 1M flower filters with 10 parallel tc instances", + "category": [ + "filter", + "flower", + "concurrency" + ], + "setup": [ + "/bin/mkdir $BATCH_DIR", + "$TC qdisc add dev $DEV2 ingress", + "./tdc_multibatch.py $DEV2 $BATCH_DIR 1000000 1 add", + "$TC -b $BATCH_DIR/add_0", + "./tdc_multibatch.py $DEV2 $BATCH_DIR 100000 10 del" + ], + "cmdUnderTest": "bash -c \"find $BATCH_DIR/del* -print | xargs -n 1 -P 10 $TC -b\"", + "expExitCode": "0", + "verifyCmd": "$TC -s filter show dev $DEV2 ingress", + "matchPattern": "filter protocol ip pref 1 flower chain 0 handle", + "matchCount": "0", + "teardown": [ + "$TC qdisc del dev $DEV2 ingress", + "/bin/rm -rf $BATCH_DIR" + ] + }, + { + "id": "c9da", + "name": "Replace 1M flower filters with 10 parallel tc instances", + "category": [ + "filter", + "flower", + "concurrency" + ], + "setup": [ + "/bin/mkdir $BATCH_DIR", + "$TC qdisc add dev $DEV2 ingress", + "./tdc_multibatch.py $DEV2 $BATCH_DIR 1000000 1 add", + "$TC -b $BATCH_DIR/add_0", + "./tdc_multibatch.py $DEV2 $BATCH_DIR 100000 10 replace" + ], + "cmdUnderTest": "bash -c \"find $BATCH_DIR/replace* -print | xargs -n 1 -P 10 $TC -b\"", + "expExitCode": "0", + "verifyCmd": "$TC -s filter show dev $DEV2 ingress", + "matchPattern": "filter protocol ip pref 1 flower chain 0 handle", + "matchCount": "1000000", + "teardown": [ + "$TC qdisc del dev $DEV2 ingress", + "/bin/rm -rf $BATCH_DIR" + ] + }, + { + "id": "14be", + "name": "Concurrently replace same range of 100k flower filters from 10 tc instances", + "category": [ + "filter", + "flower", + "concurrency" + ], + "setup": [ + "/bin/mkdir $BATCH_DIR", + "$TC qdisc add dev $DEV2 ingress", + "./tdc_multibatch.py $DEV2 $BATCH_DIR 100000 1 add", + "$TC -b $BATCH_DIR/add_0", + "./tdc_multibatch.py -d $DEV2 $BATCH_DIR 100000 10 replace" + ], + "cmdUnderTest": "bash -c \"find $BATCH_DIR/replace* -print | xargs -n 1 -P 10 $TC -b\"", + "expExitCode": "0", + "verifyCmd": "$TC -s filter show dev $DEV2 ingress", + "matchPattern": "filter protocol ip pref 1 flower chain 0 handle", + "matchCount": "100000", + "teardown": [ + "$TC qdisc del dev $DEV2 ingress", + "/bin/rm -rf $BATCH_DIR" + ] + }, + { + "id": "0c44", + "name": "Concurrently delete same range of 100k flower filters from 10 tc instances", + "category": [ + "filter", + "flower", + "concurrency" + ], + "setup": [ + "/bin/mkdir $BATCH_DIR", + "$TC qdisc add dev $DEV2 ingress", + "./tdc_multibatch.py $DEV2 $BATCH_DIR 100000 1 add", + "$TC -b $BATCH_DIR/add_0", + "./tdc_multibatch.py -d $DEV2 $BATCH_DIR 100000 10 del" + ], + "cmdUnderTest": "bash -c \"find $BATCH_DIR/del* -print | xargs -n 1 -P 10 $TC -f -b\"", + "expExitCode": "123", + "verifyCmd": "$TC -s filter show dev $DEV2 ingress", + "matchPattern": "filter protocol ip pref 1 flower chain 0 handle", + "matchCount": "0", + "teardown": [ + "$TC qdisc del dev $DEV2 ingress", + "/bin/rm -rf $BATCH_DIR" + ] + }, + { + "id": "ab62", + "name": "Add and delete from same tp with 10 tc instances", + "category": [ + "filter", + "flower", + "concurrency" + ], + "setup": [ + "/bin/mkdir $BATCH_DIR", + "$TC qdisc add dev $DEV2 ingress", + "./tdc_multibatch.py -x init_ $DEV2 $BATCH_DIR 100000 5 add", + "bash -c \"find $BATCH_DIR/init_* -print | xargs -n 1 -P 5 $TC -b\"", + "./tdc_multibatch.py -x par_ -a 500001 -m 5 $DEV2 $BATCH_DIR 100000 5 add", + "./tdc_multibatch.py -x par_ $DEV2 $BATCH_DIR 100000 5 del" + ], + "cmdUnderTest": "bash -c \"find $BATCH_DIR/par_* -print | xargs -n 1 -P 10 $TC -b\"", + "expExitCode": "0", + "verifyCmd": "$TC -s filter show dev $DEV2 ingress", + "matchPattern": "filter protocol ip pref 1 flower chain 0 handle", + "matchCount": "500000", + "teardown": [ + "$TC qdisc del dev $DEV2 ingress", + "/bin/rm -rf $BATCH_DIR" + ] + }, + { + "id": "6e8f", + "name": "Replace and delete from same tp with 10 tc instances", + "category": [ + "filter", + "flower", + "concurrency" + ], + "setup": [ + "/bin/mkdir $BATCH_DIR", + "$TC qdisc add dev $DEV2 ingress", + "./tdc_multibatch.py -x init_ $DEV2 $BATCH_DIR 100000 10 add", + "bash -c \"find $BATCH_DIR/init_* -print | xargs -n 1 -P 5 $TC -b\"", + "./tdc_multibatch.py -x par_ -a 500001 -m 5 $DEV2 $BATCH_DIR 100000 5 replace", + "./tdc_multibatch.py -x par_ $DEV2 $BATCH_DIR 100000 5 del" + ], + "cmdUnderTest": "bash -c \"find $BATCH_DIR/par_* -print | xargs -n 1 -P 10 $TC -b\"", + "expExitCode": "0", + "verifyCmd": "$TC -s filter show dev $DEV2 ingress", + "matchPattern": "filter protocol ip pref 1 flower chain 0 handle", + "matchCount": "500000", + "teardown": [ + "$TC qdisc del dev $DEV2 ingress", + "/bin/rm -rf $BATCH_DIR" + ] + } +] -- cgit v1.2.3 From 0fbb5a54f9416953fa8a3857425a62ea6b1efd5c Mon Sep 17 00:00:00 2001 From: Pedro Tammela Date: Wed, 29 Nov 2023 19:24:24 -0300 Subject: selftests: tc-testing: remove filters/tests.json Remove this generic file and move the tests to their appropriate files Signed-off-by: Pedro Tammela Acked-by: Jamal Hadi Salim Link: https://lore.kernel.org/r/20231129222424.910148-5-pctammela@mojatatu.com Signed-off-by: Jakub Kicinski --- .../tc-testing/tc-tests/filters/flower.json | 98 ++++++++++++++++ .../tc-testing/tc-tests/filters/matchall.json | 23 ++++ .../tc-testing/tc-tests/filters/tests.json | 123 --------------------- 3 files changed, 121 insertions(+), 123 deletions(-) delete mode 100644 tools/testing/selftests/tc-testing/tc-tests/filters/tests.json (limited to 'tools') diff --git a/tools/testing/selftests/tc-testing/tc-tests/filters/flower.json b/tools/testing/selftests/tc-testing/tc-tests/filters/flower.json index c2a433a4737e..6b08c0642069 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/filters/flower.json +++ b/tools/testing/selftests/tc-testing/tc-tests/filters/flower.json @@ -173,5 +173,103 @@ "$TC qdisc del dev $DEV2 ingress", "/bin/rm -rf $BATCH_DIR" ] + }, + { + "id": "2ff3", + "name": "Add flower with max handle and then dump it", + "category": [ + "filter", + "flower" + ], + "setup": [ + "$TC qdisc add dev $DEV2 ingress" + ], + "cmdUnderTest": "$TC filter add dev $DEV2 protocol ip pref 1 ingress handle 0xffffffff flower action ok", + "expExitCode": "0", + "verifyCmd": "$TC filter show dev $DEV2 ingress", + "matchPattern": "filter protocol ip pref 1 flower.*handle 0xffffffff", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DEV2 ingress" + ] + }, + { + "id": "d052", + "name": "Add 1M filters with the same action", + "category": [ + "filter", + "flower" + ], + "setup": [ + "$TC qdisc add dev $DEV2 ingress", + "./tdc_batch.py $DEV2 $BATCH_FILE --share_action -n 1000000" + ], + "cmdUnderTest": "$TC -b $BATCH_FILE", + "expExitCode": "0", + "verifyCmd": "$TC actions list action gact", + "matchPattern": "action order 0: gact action drop.*index 1 ref 1000000 bind 1000000", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DEV2 ingress", + "/bin/rm $BATCH_FILE" + ] + }, + { + "id": "4cbd", + "name": "Try to add filter with duplicate key", + "category": [ + "filter", + "flower" + ], + "setup": [ + "$TC qdisc add dev $DEV2 ingress", + "$TC filter add dev $DEV2 protocol ip prio 1 ingress flower dst_mac e4:11:22:11:4a:51 src_mac e4:11:22:11:4a:50 ip_proto tcp src_ip 1.1.1.1 dst_ip 2.2.2.2 action drop" + ], + "cmdUnderTest": "$TC filter add dev $DEV2 protocol ip prio 1 ingress flower dst_mac e4:11:22:11:4a:51 src_mac e4:11:22:11:4a:50 ip_proto tcp src_ip 1.1.1.1 dst_ip 2.2.2.2 action drop", + "expExitCode": "2", + "verifyCmd": "$TC -s filter show dev $DEV2 ingress", + "matchPattern": "filter protocol ip pref 1 flower chain 0 handle", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DEV2 ingress" + ] + }, + { + "id": "7c65", + "name": "Add flower filter and then terse dump it", + "category": [ + "filter", + "flower" + ], + "setup": [ + "$TC qdisc add dev $DEV2 ingress" + ], + "cmdUnderTest": "$TC filter add dev $DEV2 protocol ip pref 1 ingress flower dst_mac e4:11:22:11:4a:51 action drop", + "expExitCode": "0", + "verifyCmd": "$TC -br filter show dev $DEV2 ingress", + "matchPattern": "filter protocol ip pref 1 flower.*handle", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DEV2 ingress" + ] + }, + { + "id": "d45e", + "name": "Add flower filter and verify that terse dump doesn't output filter key", + "category": [ + "filter", + "flower" + ], + "setup": [ + "$TC qdisc add dev $DEV2 ingress" + ], + "cmdUnderTest": "$TC filter add dev $DEV2 protocol ip pref 1 ingress flower dst_mac e4:11:22:11:4a:51 action drop", + "expExitCode": "0", + "verifyCmd": "$TC -br filter show dev $DEV2 ingress", + "matchPattern": " dst_mac e4:11:22:11:4a:51", + "matchCount": "0", + "teardown": [ + "$TC qdisc del dev $DEV2 ingress" + ] } ] diff --git a/tools/testing/selftests/tc-testing/tc-tests/filters/matchall.json b/tools/testing/selftests/tc-testing/tc-tests/filters/matchall.json index afa1b9b0c856..f8d28c415bc3 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/filters/matchall.json +++ b/tools/testing/selftests/tc-testing/tc-tests/filters/matchall.json @@ -480,5 +480,28 @@ "$TC qdisc del dev $DUMMY ingress", "$TC actions del action police index 199" ] + }, + { + "id": "2638", + "name": "Add matchall and try to get it", + "category": [ + "filter", + "matchall" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$TC qdisc add dev $DEV1 clsact", + "$TC filter add dev $DEV1 protocol all pref 1 ingress handle 0x1234 matchall action ok" + ], + "cmdUnderTest": "$TC filter get dev $DEV1 protocol all pref 1 ingress handle 0x1234 matchall", + "expExitCode": "0", + "verifyCmd": "$TC filter show dev $DEV1 ingress", + "matchPattern": "filter protocol all pref 1 matchall chain 0 handle 0x1234", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DEV1 clsact" + ] } ] diff --git a/tools/testing/selftests/tc-testing/tc-tests/filters/tests.json b/tools/testing/selftests/tc-testing/tc-tests/filters/tests.json deleted file mode 100644 index 4598f1d330fe..000000000000 --- a/tools/testing/selftests/tc-testing/tc-tests/filters/tests.json +++ /dev/null @@ -1,123 +0,0 @@ -[ - { - "id": "2638", - "name": "Add matchall and try to get it", - "category": [ - "filter", - "matchall" - ], - "plugins": { - "requires": "nsPlugin" - }, - "setup": [ - "$TC qdisc add dev $DEV1 clsact", - "$TC filter add dev $DEV1 protocol all pref 1 ingress handle 0x1234 matchall action ok" - ], - "cmdUnderTest": "$TC filter get dev $DEV1 protocol all pref 1 ingress handle 0x1234 matchall", - "expExitCode": "0", - "verifyCmd": "$TC filter show dev $DEV1 ingress", - "matchPattern": "filter protocol all pref 1 matchall chain 0 handle 0x1234", - "matchCount": "1", - "teardown": [ - "$TC qdisc del dev $DEV1 clsact" - ] - }, - { - "id": "2ff3", - "name": "Add flower with max handle and then dump it", - "category": [ - "filter", - "flower" - ], - "setup": [ - "$TC qdisc add dev $DEV2 ingress" - ], - "cmdUnderTest": "$TC filter add dev $DEV2 protocol ip pref 1 ingress handle 0xffffffff flower action ok", - "expExitCode": "0", - "verifyCmd": "$TC filter show dev $DEV2 ingress", - "matchPattern": "filter protocol ip pref 1 flower.*handle 0xffffffff", - "matchCount": "1", - "teardown": [ - "$TC qdisc del dev $DEV2 ingress" - ] - }, - { - "id": "d052", - "name": "Add 1M filters with the same action", - "category": [ - "filter", - "flower" - ], - "setup": [ - "$TC qdisc add dev $DEV2 ingress", - "./tdc_batch.py $DEV2 $BATCH_FILE --share_action -n 1000000" - ], - "cmdUnderTest": "$TC -b $BATCH_FILE", - "expExitCode": "0", - "verifyCmd": "$TC actions list action gact", - "matchPattern": "action order 0: gact action drop.*index 1 ref 1000000 bind 1000000", - "matchCount": "1", - "teardown": [ - "$TC qdisc del dev $DEV2 ingress", - "/bin/rm $BATCH_FILE" - ] - }, - { - "id": "4cbd", - "name": "Try to add filter with duplicate key", - "category": [ - "filter", - "flower" - ], - "setup": [ - "$TC qdisc add dev $DEV2 ingress", - "$TC filter add dev $DEV2 protocol ip prio 1 ingress flower dst_mac e4:11:22:11:4a:51 src_mac e4:11:22:11:4a:50 ip_proto tcp src_ip 1.1.1.1 dst_ip 2.2.2.2 action drop" - ], - "cmdUnderTest": "$TC filter add dev $DEV2 protocol ip prio 1 ingress flower dst_mac e4:11:22:11:4a:51 src_mac e4:11:22:11:4a:50 ip_proto tcp src_ip 1.1.1.1 dst_ip 2.2.2.2 action drop", - "expExitCode": "2", - "verifyCmd": "$TC -s filter show dev $DEV2 ingress", - "matchPattern": "filter protocol ip pref 1 flower chain 0 handle", - "matchCount": "1", - "teardown": [ - "$TC qdisc del dev $DEV2 ingress" - ] - }, - { - "id": "7c65", - "name": "Add flower filter and then terse dump it", - "category": [ - "filter", - "flower" - ], - "setup": [ - "$TC qdisc add dev $DEV2 ingress" - ], - "cmdUnderTest": "$TC filter add dev $DEV2 protocol ip pref 1 ingress flower dst_mac e4:11:22:11:4a:51 action drop", - "expExitCode": "0", - "verifyCmd": "$TC -br filter show dev $DEV2 ingress", - "matchPattern": "filter protocol ip pref 1 flower.*handle", - "matchCount": "1", - "teardown": [ - "$TC qdisc del dev $DEV2 ingress" - ] - }, - { - "id": "d45e", - "name": "Add flower filter and verify that terse dump doesn't output filter key", - "category": [ - "filter", - "flower" - ], - "setup": [ - "$TC qdisc add dev $DEV2 ingress" - ], - "cmdUnderTest": "$TC filter add dev $DEV2 protocol ip pref 1 ingress flower dst_mac e4:11:22:11:4a:51 action drop", - "expExitCode": "0", - "verifyCmd": "$TC -br filter show dev $DEV2 ingress", - "matchPattern": " dst_mac e4:11:22:11:4a:51", - "matchCount": "0", - "teardown": [ - "$TC qdisc del dev $DEV2 ingress" - ] - } -] -- cgit v1.2.3 From b6a3451e0847d5d70fb5fa2b2a80ab9f80bf2c7b Mon Sep 17 00:00:00 2001 From: Jeroen van Ingen Schenau Date: Thu, 30 Nov 2023 13:03:53 +0100 Subject: selftests/bpf: Fix erroneous bitmask operation xdp_synproxy_kern.c is a BPF program that generates SYN cookies on allowed TCP ports and sends SYNACKs to clients, accelerating synproxy iptables module. Fix the bitmask operation when checking the status of an existing conntrack entry within tcp_lookup() function. Do not AND with the bit position number, but with the bitmask value to check whether the entry found has the IPS_CONFIRMED flag set. Fixes: fb5cd0ce70d4 ("selftests/bpf: Add selftests for raw syncookie helpers") Signed-off-by: Jeroen van Ingen Schenau Signed-off-by: Daniel Borkmann Tested-by: Minh Le Hoang Link: https://lore.kernel.org/xdp-newbies/CAAi1gX7owA+Tcxq-titC-h-KPM7Ri-6ZhTNMhrnPq5gmYYwKow@mail.gmail.com/T/#u Link: https://lore.kernel.org/bpf/20231130120353.3084-1-jeroen.vaningenschenau@novoserve.com --- tools/testing/selftests/bpf/progs/xdp_synproxy_kern.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/progs/xdp_synproxy_kern.c b/tools/testing/selftests/bpf/progs/xdp_synproxy_kern.c index 80f620602d50..518329c666e9 100644 --- a/tools/testing/selftests/bpf/progs/xdp_synproxy_kern.c +++ b/tools/testing/selftests/bpf/progs/xdp_synproxy_kern.c @@ -467,13 +467,13 @@ static __always_inline int tcp_lookup(void *ctx, struct header_pointers *hdr, bo unsigned long status = ct->status; bpf_ct_release(ct); - if (status & IPS_CONFIRMED_BIT) + if (status & IPS_CONFIRMED) return XDP_PASS; } else if (ct_lookup_opts.error != -ENOENT) { return XDP_ABORTED; } - /* error == -ENOENT || !(status & IPS_CONFIRMED_BIT) */ + /* error == -ENOENT || !(status & IPS_CONFIRMED) */ return XDP_TX; } -- cgit v1.2.3 From 1af3bf2befc07c7198100949dd1bece02a7dbded Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 29 Nov 2023 14:49:13 -0800 Subject: KVM: selftests: Fix MWAIT error message when guest assertion fails Print out the test and vector as intended when a guest assert fails an assertion regarding MONITOR/MWAIT faulting. Unfortunately, the guest printf support doesn't detect such issues at compile-time, so the bug manifests as a confusing error message, e.g. in the most confusing case, the test complains that it got vector "0" instead of expected vector "0". Fixes: 0f52e4aaa614 ("KVM: selftests: Convert the MONITOR/MWAIT test to use printf guest asserts") Reviewed-by: Maxim Levitsky Link: https://lore.kernel.org/r/20231107182159.404770-1-seanjc@google.com Link: https://lore.kernel.org/r/20231129224916.532431-2-seanjc@google.com Signed-off-by: Sean Christopherson --- tools/testing/selftests/kvm/x86_64/monitor_mwait_test.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/kvm/x86_64/monitor_mwait_test.c b/tools/testing/selftests/kvm/x86_64/monitor_mwait_test.c index 80aa3d8b18f8..853802641e1e 100644 --- a/tools/testing/selftests/kvm/x86_64/monitor_mwait_test.c +++ b/tools/testing/selftests/kvm/x86_64/monitor_mwait_test.c @@ -27,10 +27,12 @@ do { \ \ if (fault_wanted) \ __GUEST_ASSERT((vector) == UD_VECTOR, \ - "Expected #UD on " insn " for testcase '0x%x', got '0x%x'", vector); \ + "Expected #UD on " insn " for testcase '0x%x', got '0x%x'", \ + testcase, vector); \ else \ __GUEST_ASSERT(!(vector), \ - "Expected success on " insn " for testcase '0x%x', got '0x%x'", vector); \ + "Expected success on " insn " for testcase '0x%x', got '0x%x'", \ + testcase, vector); \ } while (0) static void guest_monitor_wait(int testcase) -- cgit v1.2.3 From 4d53dcc5d0bc2c445e29cb14df6d2cf93091731f Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 29 Nov 2023 14:49:14 -0800 Subject: KVM: selftests: Fix benign %llx vs. %lx issues in guest asserts Convert %llx to %lx as appropriate in guest asserts. The guest printf implementation treats them the same as KVM selftests are 64-bit only, but strictly adhering to the correct format will allow annotating the underlying helpers with __printf() without introducing new warnings in the build. Link: https://lore.kernel.org/r/20231129224916.532431-3-seanjc@google.com Signed-off-by: Sean Christopherson --- tools/testing/selftests/kvm/set_memory_region_test.c | 6 +++--- tools/testing/selftests/kvm/x86_64/hyperv_features.c | 2 +- tools/testing/selftests/kvm/x86_64/private_mem_conversions_test.c | 2 +- tools/testing/selftests/kvm/x86_64/svm_nested_soft_inject_test.c | 4 ++-- tools/testing/selftests/kvm/x86_64/vmx_pmu_caps_test.c | 2 +- tools/testing/selftests/kvm/x86_64/xcr0_cpuid_test.c | 8 ++++---- 6 files changed, 12 insertions(+), 12 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/kvm/set_memory_region_test.c b/tools/testing/selftests/kvm/set_memory_region_test.c index 6637a0845acf..03ec7efd19aa 100644 --- a/tools/testing/selftests/kvm/set_memory_region_test.c +++ b/tools/testing/selftests/kvm/set_memory_region_test.c @@ -157,17 +157,17 @@ static void guest_code_move_memory_region(void) */ val = guest_spin_on_val(0); __GUEST_ASSERT(val == 1 || val == MMIO_VAL, - "Expected '1' or MMIO ('%llx'), got '%llx'", MMIO_VAL, val); + "Expected '1' or MMIO ('%lx'), got '%lx'", MMIO_VAL, val); /* Spin until the misaligning memory region move completes. */ val = guest_spin_on_val(MMIO_VAL); __GUEST_ASSERT(val == 1 || val == 0, - "Expected '0' or '1' (no MMIO), got '%llx'", val); + "Expected '0' or '1' (no MMIO), got '%lx'", val); /* Spin until the memory region starts to get re-aligned. */ val = guest_spin_on_val(0); __GUEST_ASSERT(val == 1 || val == MMIO_VAL, - "Expected '1' or MMIO ('%llx'), got '%llx'", MMIO_VAL, val); + "Expected '1' or MMIO ('%lx'), got '%lx'", MMIO_VAL, val); /* Spin until the re-aligning memory region move completes. */ val = guest_spin_on_val(MMIO_VAL); diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_features.c b/tools/testing/selftests/kvm/x86_64/hyperv_features.c index 9f28aa276c4e..4bb63b6ee4a0 100644 --- a/tools/testing/selftests/kvm/x86_64/hyperv_features.c +++ b/tools/testing/selftests/kvm/x86_64/hyperv_features.c @@ -66,7 +66,7 @@ static void guest_msr(struct msr_data *msr) if (msr->write) __GUEST_ASSERT(!vector, - "WRMSR(0x%x) to '0x%llx', RDMSR read '0x%llx'", + "WRMSR(0x%x) to '0x%lx', RDMSR read '0x%lx'", msr->idx, msr->write_val, msr_val); /* Invariant TSC bit appears when TSC invariant control MSR is written to */ diff --git a/tools/testing/selftests/kvm/x86_64/private_mem_conversions_test.c b/tools/testing/selftests/kvm/x86_64/private_mem_conversions_test.c index 4d6a37a5d896..65ad38b6be1f 100644 --- a/tools/testing/selftests/kvm/x86_64/private_mem_conversions_test.c +++ b/tools/testing/selftests/kvm/x86_64/private_mem_conversions_test.c @@ -35,7 +35,7 @@ do { \ \ for (i = 0; i < size; i++) \ __GUEST_ASSERT(mem[i] == pattern, \ - "Guest expected 0x%x at offset %lu (gpa 0x%llx), got 0x%x", \ + "Guest expected 0x%x at offset %lu (gpa 0x%lx), got 0x%x", \ pattern, i, gpa + i, mem[i]); \ } while (0) diff --git a/tools/testing/selftests/kvm/x86_64/svm_nested_soft_inject_test.c b/tools/testing/selftests/kvm/x86_64/svm_nested_soft_inject_test.c index 7ee44496cf97..0c7ce3d4e83a 100644 --- a/tools/testing/selftests/kvm/x86_64/svm_nested_soft_inject_test.c +++ b/tools/testing/selftests/kvm/x86_64/svm_nested_soft_inject_test.c @@ -103,7 +103,7 @@ static void l1_guest_code(struct svm_test_data *svm, uint64_t is_nmi, uint64_t i run_guest(vmcb, svm->vmcb_gpa); __GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_VMMCALL, - "Expected VMMCAL #VMEXIT, got '0x%x', info1 = '0x%llx, info2 = '0x%llx'", + "Expected VMMCAL #VMEXIT, got '0x%x', info1 = '0x%lx, info2 = '0x%lx'", vmcb->control.exit_code, vmcb->control.exit_info_1, vmcb->control.exit_info_2); @@ -133,7 +133,7 @@ static void l1_guest_code(struct svm_test_data *svm, uint64_t is_nmi, uint64_t i run_guest(vmcb, svm->vmcb_gpa); __GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_HLT, - "Expected HLT #VMEXIT, got '0x%x', info1 = '0x%llx, info2 = '0x%llx'", + "Expected HLT #VMEXIT, got '0x%x', info1 = '0x%lx, info2 = '0x%lx'", vmcb->control.exit_code, vmcb->control.exit_info_1, vmcb->control.exit_info_2); diff --git a/tools/testing/selftests/kvm/x86_64/vmx_pmu_caps_test.c b/tools/testing/selftests/kvm/x86_64/vmx_pmu_caps_test.c index ebbcb0a3f743..2a8d4ac2f020 100644 --- a/tools/testing/selftests/kvm/x86_64/vmx_pmu_caps_test.c +++ b/tools/testing/selftests/kvm/x86_64/vmx_pmu_caps_test.c @@ -56,7 +56,7 @@ static void guest_test_perf_capabilities_gp(uint64_t val) uint8_t vector = wrmsr_safe(MSR_IA32_PERF_CAPABILITIES, val); __GUEST_ASSERT(vector == GP_VECTOR, - "Expected #GP for value '0x%llx', got vector '0x%x'", + "Expected #GP for value '0x%lx', got vector '0x%x'", val, vector); } diff --git a/tools/testing/selftests/kvm/x86_64/xcr0_cpuid_test.c b/tools/testing/selftests/kvm/x86_64/xcr0_cpuid_test.c index 77d04a7bdadd..dc6217440db3 100644 --- a/tools/testing/selftests/kvm/x86_64/xcr0_cpuid_test.c +++ b/tools/testing/selftests/kvm/x86_64/xcr0_cpuid_test.c @@ -25,7 +25,7 @@ do { \ \ __GUEST_ASSERT((__supported & (xfeatures)) != (xfeatures) || \ __supported == ((xfeatures) | (dependencies)), \ - "supported = 0x%llx, xfeatures = 0x%llx, dependencies = 0x%llx", \ + "supported = 0x%lx, xfeatures = 0x%llx, dependencies = 0x%llx", \ __supported, (xfeatures), (dependencies)); \ } while (0) @@ -42,7 +42,7 @@ do { \ uint64_t __supported = (supported_xcr0) & (xfeatures); \ \ __GUEST_ASSERT(!__supported || __supported == (xfeatures), \ - "supported = 0x%llx, xfeatures = 0x%llx", \ + "supported = 0x%lx, xfeatures = 0x%llx", \ __supported, (xfeatures)); \ } while (0) @@ -81,7 +81,7 @@ static void guest_code(void) vector = xsetbv_safe(0, supported_xcr0); __GUEST_ASSERT(!vector, - "Expected success on XSETBV(0x%llx), got vector '0x%x'", + "Expected success on XSETBV(0x%lx), got vector '0x%x'", supported_xcr0, vector); for (i = 0; i < 64; i++) { @@ -90,7 +90,7 @@ static void guest_code(void) vector = xsetbv_safe(0, supported_xcr0 | BIT_ULL(i)); __GUEST_ASSERT(vector == GP_VECTOR, - "Expected #GP on XSETBV(0x%llx), supported XCR0 = %llx, got vector '0x%x'", + "Expected #GP on XSETBV(0x%llx), supported XCR0 = %lx, got vector '0x%x'", BIT_ULL(i), supported_xcr0, vector); } -- cgit v1.2.3 From f813e6d41baf2bbdd1624f9f01ac4f365eb78891 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 29 Nov 2023 14:49:15 -0800 Subject: KVM: selftests: Fix broken assert messages in Hyper-V features test Swap the ordering of parameters to guest asserts related to {RD,WR}MSR success/failure in the Hyper-V features test. As is, the output will be mangled and broken due to passing an integer as a string and vice versa. Opportunistically fix a benign %u vs. %lu issue as well. Link: https://lore.kernel.org/r/20231129224916.532431-4-seanjc@google.com Signed-off-by: Sean Christopherson --- tools/testing/selftests/kvm/x86_64/hyperv_features.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_features.c b/tools/testing/selftests/kvm/x86_64/hyperv_features.c index 4bb63b6ee4a0..29f6bdbce817 100644 --- a/tools/testing/selftests/kvm/x86_64/hyperv_features.c +++ b/tools/testing/selftests/kvm/x86_64/hyperv_features.c @@ -55,11 +55,11 @@ static void guest_msr(struct msr_data *msr) if (msr->fault_expected) __GUEST_ASSERT(vector == GP_VECTOR, "Expected #GP on %sMSR(0x%x), got vector '0x%x'", - msr->idx, msr->write ? "WR" : "RD", vector); + msr->write ? "WR" : "RD", msr->idx, vector); else __GUEST_ASSERT(!vector, "Expected success on %sMSR(0x%x), got vector '0x%x'", - msr->idx, msr->write ? "WR" : "RD", vector); + msr->write ? "WR" : "RD", msr->idx, vector); if (vector || is_write_only_msr(msr->idx)) goto done; @@ -102,11 +102,11 @@ static void guest_hcall(vm_vaddr_t pgs_gpa, struct hcall_data *hcall) vector = __hyperv_hypercall(hcall->control, input, output, &res); if (hcall->ud_expected) { __GUEST_ASSERT(vector == UD_VECTOR, - "Expected #UD for control '%u', got vector '0x%x'", + "Expected #UD for control '%lu', got vector '0x%x'", hcall->control, vector); } else { __GUEST_ASSERT(!vector, - "Expected no exception for control '%u', got vector '0x%x'", + "Expected no exception for control '%lu', got vector '0x%x'", hcall->control, vector); GUEST_ASSERT_EQ(res, hcall->expect); } -- cgit v1.2.3 From 1b2658e4c709135fe1910423d3216632f641baf9 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 29 Nov 2023 14:49:16 -0800 Subject: KVM: selftests: Annotate guest ucall, printf, and assert helpers with __printf() Annotate guest printf helpers with __printf() so that the compiler will warn about incorrect formatting at compile time (see git log for how easy it is to screw up with the formatting). Suggested-by: Maxim Levitsky Link: https://lore.kernel.org/r/20231129224916.532431-5-seanjc@google.com Signed-off-by: Sean Christopherson --- tools/testing/selftests/kvm/include/test_util.h | 2 +- tools/testing/selftests/kvm/include/ucall_common.h | 7 ++++--- 2 files changed, 5 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/kvm/include/test_util.h b/tools/testing/selftests/kvm/include/test_util.h index a0c7dd3a5b30..71a41fa924b7 100644 --- a/tools/testing/selftests/kvm/include/test_util.h +++ b/tools/testing/selftests/kvm/include/test_util.h @@ -191,7 +191,7 @@ static inline uint32_t atoi_non_negative(const char *name, const char *num_str) } int guest_vsnprintf(char *buf, int n, const char *fmt, va_list args); -int guest_snprintf(char *buf, int n, const char *fmt, ...); +__printf(3, 4) int guest_snprintf(char *buf, int n, const char *fmt, ...); char *strdup_printf(const char *fmt, ...) __attribute__((format(printf, 1, 2), nonnull(1))); diff --git a/tools/testing/selftests/kvm/include/ucall_common.h b/tools/testing/selftests/kvm/include/ucall_common.h index 0fb472a5a058..d9d6581b8d4f 100644 --- a/tools/testing/selftests/kvm/include/ucall_common.h +++ b/tools/testing/selftests/kvm/include/ucall_common.h @@ -34,9 +34,10 @@ void ucall_arch_do_ucall(vm_vaddr_t uc); void *ucall_arch_get_ucall(struct kvm_vcpu *vcpu); void ucall(uint64_t cmd, int nargs, ...); -void ucall_fmt(uint64_t cmd, const char *fmt, ...); -void ucall_assert(uint64_t cmd, const char *exp, const char *file, - unsigned int line, const char *fmt, ...); +__printf(2, 3) void ucall_fmt(uint64_t cmd, const char *fmt, ...); +__printf(5, 6) void ucall_assert(uint64_t cmd, const char *exp, + const char *file, unsigned int line, + const char *fmt, ...); uint64_t get_ucall(struct kvm_vcpu *vcpu, struct ucall *uc); void ucall_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa); int ucall_nr_pages_required(uint64_t page_size); -- cgit v1.2.3 From 6b0ae4566aba566a2ab4a2de9c59ab3d7f4b43c2 Mon Sep 17 00:00:00 2001 From: Song Liu Date: Wed, 29 Nov 2023 15:44:15 -0800 Subject: selftests/bpf: Sort config in alphabetic order Move CONFIG_VSOCKETS up, so the CONFIGs are in alphabetic order. Signed-off-by: Song Liu Link: https://lore.kernel.org/r/20231129234417.856536-5-song@kernel.org Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config index 3ec5927ec3e5..782876452acf 100644 --- a/tools/testing/selftests/bpf/config +++ b/tools/testing/selftests/bpf/config @@ -82,7 +82,7 @@ CONFIG_SECURITY=y CONFIG_SECURITYFS=y CONFIG_TEST_BPF=m CONFIG_USERFAULTFD=y +CONFIG_VSOCKETS=y CONFIG_VXLAN=y CONFIG_XDP_SOCKETS=y CONFIG_XFRM_INTERFACE=y -CONFIG_VSOCKETS=y -- cgit v1.2.3 From 341f06fdddf72cd60a10945152f69f0f1d614519 Mon Sep 17 00:00:00 2001 From: Song Liu Date: Wed, 29 Nov 2023 15:44:16 -0800 Subject: selftests/bpf: Add tests for filesystem kfuncs Add selftests for two new filesystem kfuncs: 1. bpf_get_file_xattr 2. bpf_get_fsverity_digest These tests simply make sure the two kfuncs work. Another selftest will be added to demonstrate how to use these kfuncs to verify file signature. CONFIG_FS_VERITY is added to selftests config. However, this is not sufficient to guarantee bpf_get_fsverity_digest works. This is because fsverity need to be enabled at file system level (for example, with tune2fs on ext4). If local file system doesn't have this feature enabled, just skip the test. Signed-off-by: Song Liu Link: https://lore.kernel.org/r/20231129234417.856536-6-song@kernel.org Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/bpf_kfuncs.h | 3 + tools/testing/selftests/bpf/config | 1 + tools/testing/selftests/bpf/prog_tests/fs_kfuncs.c | 134 +++++++++++++++++++++ tools/testing/selftests/bpf/progs/test_fsverity.c | 48 ++++++++ tools/testing/selftests/bpf/progs/test_get_xattr.c | 37 ++++++ 5 files changed, 223 insertions(+) create mode 100644 tools/testing/selftests/bpf/prog_tests/fs_kfuncs.c create mode 100644 tools/testing/selftests/bpf/progs/test_fsverity.c create mode 100644 tools/testing/selftests/bpf/progs/test_get_xattr.c (limited to 'tools') diff --git a/tools/testing/selftests/bpf/bpf_kfuncs.h b/tools/testing/selftests/bpf/bpf_kfuncs.h index 5ca68ff0b59f..c2c084a44eae 100644 --- a/tools/testing/selftests/bpf/bpf_kfuncs.h +++ b/tools/testing/selftests/bpf/bpf_kfuncs.h @@ -55,4 +55,7 @@ void *bpf_cast_to_kern_ctx(void *) __ksym; void *bpf_rdonly_cast(void *obj, __u32 btf_id) __ksym; +extern int bpf_get_file_xattr(struct file *file, const char *name, + struct bpf_dynptr *value_ptr) __ksym; +extern int bpf_get_fsverity_digest(struct file *file, struct bpf_dynptr *digest_ptr) __ksym; #endif diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config index 782876452acf..c125c441abc7 100644 --- a/tools/testing/selftests/bpf/config +++ b/tools/testing/selftests/bpf/config @@ -23,6 +23,7 @@ CONFIG_FPROBE=y CONFIG_FTRACE_SYSCALLS=y CONFIG_FUNCTION_ERROR_INJECTION=y CONFIG_FUNCTION_TRACER=y +CONFIG_FS_VERITY=y CONFIG_GENEVE=y CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y diff --git a/tools/testing/selftests/bpf/prog_tests/fs_kfuncs.c b/tools/testing/selftests/bpf/prog_tests/fs_kfuncs.c new file mode 100644 index 000000000000..d3196a4b089f --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/fs_kfuncs.c @@ -0,0 +1,134 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */ + +#include +#include +#include +#include +#include +#include +#include "test_get_xattr.skel.h" +#include "test_fsverity.skel.h" + +static const char testfile[] = "/tmp/test_progs_fs_kfuncs"; + +static void test_xattr(void) +{ + struct test_get_xattr *skel = NULL; + int fd = -1, err; + + fd = open(testfile, O_CREAT | O_RDONLY, 0644); + if (!ASSERT_GE(fd, 0, "create_file")) + return; + + close(fd); + fd = -1; + + err = setxattr(testfile, "user.kfuncs", "hello", sizeof("hello"), 0); + if (!ASSERT_OK(err, "setxattr")) + goto out; + + skel = test_get_xattr__open_and_load(); + if (!ASSERT_OK_PTR(skel, "test_get_xattr__open_and_load")) + goto out; + + skel->bss->monitored_pid = getpid(); + err = test_get_xattr__attach(skel); + + if (!ASSERT_OK(err, "test_get_xattr__attach")) + goto out; + + fd = open(testfile, O_RDONLY, 0644); + if (!ASSERT_GE(fd, 0, "open_file")) + goto out; + + ASSERT_EQ(skel->bss->found_xattr, 1, "found_xattr"); + +out: + close(fd); + test_get_xattr__destroy(skel); + remove(testfile); +} + +#ifndef SHA256_DIGEST_SIZE +#define SHA256_DIGEST_SIZE 32 +#endif + +static void test_fsverity(void) +{ + struct fsverity_enable_arg arg = {0}; + struct test_fsverity *skel = NULL; + struct fsverity_digest *d; + int fd, err; + char buffer[4096]; + + fd = open(testfile, O_CREAT | O_RDWR, 0644); + if (!ASSERT_GE(fd, 0, "create_file")) + return; + + /* Write random buffer, so the file is not empty */ + err = write(fd, buffer, 4096); + if (!ASSERT_EQ(err, 4096, "write_file")) + goto out; + close(fd); + + /* Reopen read-only, otherwise FS_IOC_ENABLE_VERITY will fail */ + fd = open(testfile, O_RDONLY, 0644); + if (!ASSERT_GE(fd, 0, "open_file1")) + return; + + /* Enable fsverity for the file. + * If the file system doesn't support verity, this will fail. Skip + * the test in such case. + */ + arg.version = 1; + arg.hash_algorithm = FS_VERITY_HASH_ALG_SHA256; + arg.block_size = 4096; + err = ioctl(fd, FS_IOC_ENABLE_VERITY, &arg); + if (err) { + printf("%s:SKIP:local fs doesn't support fsverity (%d)\n" + "To run this test, try enable CONFIG_FS_VERITY and enable FSVerity for the filesystem.\n", + __func__, errno); + test__skip(); + goto out; + } + + skel = test_fsverity__open_and_load(); + if (!ASSERT_OK_PTR(skel, "test_fsverity__open_and_load")) + goto out; + + /* Get fsverity_digest from ioctl */ + d = (struct fsverity_digest *)skel->bss->expected_digest; + d->digest_algorithm = FS_VERITY_HASH_ALG_SHA256; + d->digest_size = SHA256_DIGEST_SIZE; + err = ioctl(fd, FS_IOC_MEASURE_VERITY, skel->bss->expected_digest); + if (!ASSERT_OK(err, "ioctl_FS_IOC_MEASURE_VERITY")) + goto out; + + skel->bss->monitored_pid = getpid(); + err = test_fsverity__attach(skel); + if (!ASSERT_OK(err, "test_fsverity__attach")) + goto out; + + /* Reopen the file to trigger the program */ + close(fd); + fd = open(testfile, O_RDONLY); + if (!ASSERT_GE(fd, 0, "open_file2")) + goto out; + + ASSERT_EQ(skel->bss->got_fsverity, 1, "got_fsverity"); + ASSERT_EQ(skel->bss->digest_matches, 1, "digest_matches"); +out: + close(fd); + test_fsverity__destroy(skel); + remove(testfile); +} + +void test_fs_kfuncs(void) +{ + if (test__start_subtest("xattr")) + test_xattr(); + + if (test__start_subtest("fsverity")) + test_fsverity(); +} diff --git a/tools/testing/selftests/bpf/progs/test_fsverity.c b/tools/testing/selftests/bpf/progs/test_fsverity.c new file mode 100644 index 000000000000..3975495b75c8 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_fsverity.c @@ -0,0 +1,48 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */ + +#include "vmlinux.h" +#include +#include +#include "bpf_kfuncs.h" + +char _license[] SEC("license") = "GPL"; + +#ifndef SHA256_DIGEST_SIZE +#define SHA256_DIGEST_SIZE 32 +#endif + +#define SIZEOF_STRUCT_FSVERITY_DIGEST 4 /* sizeof(struct fsverity_digest) */ + +char expected_digest[SIZEOF_STRUCT_FSVERITY_DIGEST + SHA256_DIGEST_SIZE]; +char digest[SIZEOF_STRUCT_FSVERITY_DIGEST + SHA256_DIGEST_SIZE]; +__u32 monitored_pid; +__u32 got_fsverity; +__u32 digest_matches; + +SEC("lsm.s/file_open") +int BPF_PROG(test_file_open, struct file *f) +{ + struct bpf_dynptr digest_ptr; + __u32 pid; + int ret; + int i; + + pid = bpf_get_current_pid_tgid() >> 32; + if (pid != monitored_pid) + return 0; + + bpf_dynptr_from_mem(digest, sizeof(digest), 0, &digest_ptr); + ret = bpf_get_fsverity_digest(f, &digest_ptr); + if (ret < 0) + return 0; + got_fsverity = 1; + + for (i = 0; i < sizeof(digest); i++) { + if (digest[i] != expected_digest[i]) + return 0; + } + + digest_matches = 1; + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/test_get_xattr.c b/tools/testing/selftests/bpf/progs/test_get_xattr.c new file mode 100644 index 000000000000..7eb2a4e5a3e5 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_get_xattr.c @@ -0,0 +1,37 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */ + +#include "vmlinux.h" +#include +#include +#include "bpf_kfuncs.h" + +char _license[] SEC("license") = "GPL"; + +__u32 monitored_pid; +__u32 found_xattr; + +static const char expected_value[] = "hello"; +char value[32]; + +SEC("lsm.s/file_open") +int BPF_PROG(test_file_open, struct file *f) +{ + struct bpf_dynptr value_ptr; + __u32 pid; + int ret; + + pid = bpf_get_current_pid_tgid() >> 32; + if (pid != monitored_pid) + return 0; + + bpf_dynptr_from_mem(value, sizeof(value), 0, &value_ptr); + + ret = bpf_get_file_xattr(f, "user.kfuncs", &value_ptr); + if (ret != sizeof(expected_value)) + return 0; + if (bpf_strncmp(value, ret, expected_value)) + return 0; + found_xattr = 1; + return 0; +} -- cgit v1.2.3 From 1030e9154258b54e3c7dc07c39e7b6dcf24bc3d2 Mon Sep 17 00:00:00 2001 From: Song Liu Date: Wed, 29 Nov 2023 15:44:17 -0800 Subject: selftests/bpf: Add test that uses fsverity and xattr to sign a file This selftests shows a proof of concept method to use BPF LSM to enforce file signature. This test is added to verify_pkcs7_sig, so that some existing logic can be reused. This file signature method uses fsverity, which provides reliable and efficient hash (known as digest) of the file. The file digest is signed with asymmetic key, and the signature is stored in xattr. At the run time, BPF LSM reads file digest and the signature, and then checks them against the public key. Note that this solution does NOT require FS_VERITY_BUILTIN_SIGNATURES. fsverity is only used to provide file digest. The signature verification and access control is all implemented in BPF LSM. Signed-off-by: Song Liu Link: https://lore.kernel.org/r/20231129234417.856536-7-song@kernel.org Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/bpf_kfuncs.h | 7 + .../selftests/bpf/prog_tests/verify_pkcs7_sig.c | 165 ++++++++++++++++++++- .../selftests/bpf/progs/test_sig_in_xattr.c | 83 +++++++++++ .../selftests/bpf/progs/test_verify_pkcs7_sig.c | 8 +- tools/testing/selftests/bpf/verify_sig_setup.sh | 25 ++++ 5 files changed, 280 insertions(+), 8 deletions(-) create mode 100644 tools/testing/selftests/bpf/progs/test_sig_in_xattr.c (limited to 'tools') diff --git a/tools/testing/selftests/bpf/bpf_kfuncs.h b/tools/testing/selftests/bpf/bpf_kfuncs.h index c2c084a44eae..b4e78c1eb37b 100644 --- a/tools/testing/selftests/bpf/bpf_kfuncs.h +++ b/tools/testing/selftests/bpf/bpf_kfuncs.h @@ -58,4 +58,11 @@ void *bpf_rdonly_cast(void *obj, __u32 btf_id) __ksym; extern int bpf_get_file_xattr(struct file *file, const char *name, struct bpf_dynptr *value_ptr) __ksym; extern int bpf_get_fsverity_digest(struct file *file, struct bpf_dynptr *digest_ptr) __ksym; + +extern struct bpf_key *bpf_lookup_user_key(__u32 serial, __u64 flags) __ksym; +extern struct bpf_key *bpf_lookup_system_key(__u64 id) __ksym; +extern void bpf_key_put(struct bpf_key *key) __ksym; +extern int bpf_verify_pkcs7_signature(struct bpf_dynptr *data_ptr, + struct bpf_dynptr *sig_ptr, + struct bpf_key *trusted_keyring) __ksym; #endif diff --git a/tools/testing/selftests/bpf/prog_tests/verify_pkcs7_sig.c b/tools/testing/selftests/bpf/prog_tests/verify_pkcs7_sig.c index dd7f2bc70048..6c90372b772d 100644 --- a/tools/testing/selftests/bpf/prog_tests/verify_pkcs7_sig.c +++ b/tools/testing/selftests/bpf/prog_tests/verify_pkcs7_sig.c @@ -16,9 +16,12 @@ #include #include #include +#include +#include #include #include "test_verify_pkcs7_sig.skel.h" +#include "test_sig_in_xattr.skel.h" #define MAX_DATA_SIZE (1024 * 1024) #define MAX_SIG_SIZE 1024 @@ -26,6 +29,10 @@ #define VERIFY_USE_SECONDARY_KEYRING (1UL) #define VERIFY_USE_PLATFORM_KEYRING (2UL) +#ifndef SHA256_DIGEST_SIZE +#define SHA256_DIGEST_SIZE 32 +#endif + /* In stripped ARM and x86-64 modules, ~ is surprisingly rare. */ #define MODULE_SIG_STRING "~Module signature appended~\n" @@ -254,7 +261,7 @@ out: return ret; } -void test_verify_pkcs7_sig(void) +static void test_verify_pkcs7_sig_from_map(void) { libbpf_print_fn_t old_print_cb; char tmp_dir_template[] = "/tmp/verify_sigXXXXXX"; @@ -400,3 +407,159 @@ close_prog: skel->bss->monitored_pid = 0; test_verify_pkcs7_sig__destroy(skel); } + +static int get_signature_size(const char *sig_path) +{ + struct stat st; + + if (stat(sig_path, &st) == -1) + return -1; + + return st.st_size; +} + +static int add_signature_to_xattr(const char *data_path, const char *sig_path) +{ + char sig[MAX_SIG_SIZE] = {0}; + int fd, size, ret; + + if (sig_path) { + fd = open(sig_path, O_RDONLY); + if (fd < 0) + return -1; + + size = read(fd, sig, MAX_SIG_SIZE); + close(fd); + if (size <= 0) + return -1; + } else { + /* no sig_path, just write 32 bytes of zeros */ + size = 32; + } + ret = setxattr(data_path, "user.sig", sig, size, 0); + if (!ASSERT_OK(ret, "setxattr")) + return -1; + + return 0; +} + +static int test_open_file(struct test_sig_in_xattr *skel, char *data_path, + pid_t pid, bool should_success, char *name) +{ + int ret; + + skel->bss->monitored_pid = pid; + ret = open(data_path, O_RDONLY); + close(ret); + skel->bss->monitored_pid = 0; + + if (should_success) { + if (!ASSERT_GE(ret, 0, name)) + return -1; + } else { + if (!ASSERT_LT(ret, 0, name)) + return -1; + } + return 0; +} + +static void test_pkcs7_sig_fsverity(void) +{ + char data_path[PATH_MAX]; + char sig_path[PATH_MAX]; + char tmp_dir_template[] = "/tmp/verify_sigXXXXXX"; + char *tmp_dir; + struct test_sig_in_xattr *skel = NULL; + pid_t pid; + int ret; + + tmp_dir = mkdtemp(tmp_dir_template); + if (!ASSERT_OK_PTR(tmp_dir, "mkdtemp")) + return; + + snprintf(data_path, PATH_MAX, "%s/data-file", tmp_dir); + snprintf(sig_path, PATH_MAX, "%s/sig-file", tmp_dir); + + ret = _run_setup_process(tmp_dir, "setup"); + if (!ASSERT_OK(ret, "_run_setup_process")) + goto out; + + ret = _run_setup_process(tmp_dir, "fsverity-create-sign"); + + if (ret) { + printf("%s: SKIP: fsverity [sign|enable] doesn't work.\n" + "To run this test, try enable CONFIG_FS_VERITY and enable FSVerity for the filesystem.\n", + __func__); + test__skip(); + goto out; + } + + skel = test_sig_in_xattr__open(); + if (!ASSERT_OK_PTR(skel, "test_sig_in_xattr__open")) + goto out; + ret = get_signature_size(sig_path); + if (!ASSERT_GT(ret, 0, "get_signaure_size")) + goto out; + skel->bss->sig_size = ret; + skel->bss->user_keyring_serial = syscall(__NR_request_key, "keyring", + "ebpf_testing_keyring", NULL, + KEY_SPEC_SESSION_KEYRING); + memcpy(skel->bss->digest, "FSVerity", 8); + + ret = test_sig_in_xattr__load(skel); + if (!ASSERT_OK(ret, "test_sig_in_xattr__load")) + goto out; + + ret = test_sig_in_xattr__attach(skel); + if (!ASSERT_OK(ret, "test_sig_in_xattr__attach")) + goto out; + + pid = getpid(); + + /* Case 1: fsverity is not enabled, open should succeed */ + if (test_open_file(skel, data_path, pid, true, "open_1")) + goto out; + + /* Case 2: fsverity is enabled, xattr is missing, open should + * fail + */ + ret = _run_setup_process(tmp_dir, "fsverity-enable"); + if (!ASSERT_OK(ret, "fsverity-enable")) + goto out; + if (test_open_file(skel, data_path, pid, false, "open_2")) + goto out; + + /* Case 3: fsverity is enabled, xattr has valid signature, open + * should succeed + */ + ret = add_signature_to_xattr(data_path, sig_path); + if (!ASSERT_OK(ret, "add_signature_to_xattr_1")) + goto out; + + if (test_open_file(skel, data_path, pid, true, "open_3")) + goto out; + + /* Case 4: fsverity is enabled, xattr has invalid signature, open + * should fail + */ + ret = add_signature_to_xattr(data_path, NULL); + if (!ASSERT_OK(ret, "add_signature_to_xattr_2")) + goto out; + test_open_file(skel, data_path, pid, false, "open_4"); + +out: + _run_setup_process(tmp_dir, "cleanup"); + if (!skel) + return; + + skel->bss->monitored_pid = 0; + test_sig_in_xattr__destroy(skel); +} + +void test_verify_pkcs7_sig(void) +{ + if (test__start_subtest("pkcs7_sig_from_map")) + test_verify_pkcs7_sig_from_map(); + if (test__start_subtest("pkcs7_sig_fsverity")) + test_pkcs7_sig_fsverity(); +} diff --git a/tools/testing/selftests/bpf/progs/test_sig_in_xattr.c b/tools/testing/selftests/bpf/progs/test_sig_in_xattr.c new file mode 100644 index 000000000000..2f0eb1334d65 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_sig_in_xattr.c @@ -0,0 +1,83 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */ + +#include "vmlinux.h" +#include +#include +#include +#include "bpf_kfuncs.h" + +char _license[] SEC("license") = "GPL"; + +#ifndef SHA256_DIGEST_SIZE +#define SHA256_DIGEST_SIZE 32 +#endif + +#define MAX_SIG_SIZE 1024 + +/* By default, "fsverity sign" signs a file with fsverity_formatted_digest + * of the file. fsverity_formatted_digest on the kernel side is only used + * with CONFIG_FS_VERITY_BUILTIN_SIGNATURES. However, BPF LSM doesn't not + * require CONFIG_FS_VERITY_BUILTIN_SIGNATURES, so vmlinux.h may not have + * fsverity_formatted_digest. In this test, we intentionally avoid using + * fsverity_formatted_digest. + * + * Luckily, fsverity_formatted_digest is simply 8-byte magic followed by + * fsverity_digest. We use a char array of size fsverity_formatted_digest + * plus SHA256_DIGEST_SIZE. The magic part of it is filled by user space, + * and the rest of it is filled by bpf_get_fsverity_digest. + * + * Note that, generating signatures based on fsverity_formatted_digest is + * the design choice of this selftest (and "fsverity sign"). With BPF + * LSM, we have the flexibility to generate signature based on other data + * sets, for example, fsverity_digest or only the digest[] part of it. + */ +#define MAGIC_SIZE 8 +#define SIZEOF_STRUCT_FSVERITY_DIGEST 4 /* sizeof(struct fsverity_digest) */ +char digest[MAGIC_SIZE + SIZEOF_STRUCT_FSVERITY_DIGEST + SHA256_DIGEST_SIZE]; + +__u32 monitored_pid; +char sig[MAX_SIG_SIZE]; +__u32 sig_size; +__u32 user_keyring_serial; + +SEC("lsm.s/file_open") +int BPF_PROG(test_file_open, struct file *f) +{ + struct bpf_dynptr digest_ptr, sig_ptr; + struct bpf_key *trusted_keyring; + __u32 pid; + int ret; + + pid = bpf_get_current_pid_tgid() >> 32; + if (pid != monitored_pid) + return 0; + + /* digest_ptr points to fsverity_digest */ + bpf_dynptr_from_mem(digest + MAGIC_SIZE, sizeof(digest) - MAGIC_SIZE, 0, &digest_ptr); + + ret = bpf_get_fsverity_digest(f, &digest_ptr); + /* No verity, allow access */ + if (ret < 0) + return 0; + + /* Move digest_ptr to fsverity_formatted_digest */ + bpf_dynptr_from_mem(digest, sizeof(digest), 0, &digest_ptr); + + /* Read signature from xattr */ + bpf_dynptr_from_mem(sig, sizeof(sig), 0, &sig_ptr); + ret = bpf_get_file_xattr(f, "user.sig", &sig_ptr); + /* No signature, reject access */ + if (ret < 0) + return -EPERM; + + trusted_keyring = bpf_lookup_user_key(user_keyring_serial, 0); + if (!trusted_keyring) + return -ENOENT; + + /* Verify signature */ + ret = bpf_verify_pkcs7_signature(&digest_ptr, &sig_ptr, trusted_keyring); + + bpf_key_put(trusted_keyring); + return ret; +} diff --git a/tools/testing/selftests/bpf/progs/test_verify_pkcs7_sig.c b/tools/testing/selftests/bpf/progs/test_verify_pkcs7_sig.c index 7748cc23de8a..f42e9f3831a1 100644 --- a/tools/testing/selftests/bpf/progs/test_verify_pkcs7_sig.c +++ b/tools/testing/selftests/bpf/progs/test_verify_pkcs7_sig.c @@ -10,17 +10,11 @@ #include #include #include +#include "bpf_kfuncs.h" #define MAX_DATA_SIZE (1024 * 1024) #define MAX_SIG_SIZE 1024 -extern struct bpf_key *bpf_lookup_user_key(__u32 serial, __u64 flags) __ksym; -extern struct bpf_key *bpf_lookup_system_key(__u64 id) __ksym; -extern void bpf_key_put(struct bpf_key *key) __ksym; -extern int bpf_verify_pkcs7_signature(struct bpf_dynptr *data_ptr, - struct bpf_dynptr *sig_ptr, - struct bpf_key *trusted_keyring) __ksym; - __u32 monitored_pid; __u32 user_keyring_serial; __u64 system_keyring_id; diff --git a/tools/testing/selftests/bpf/verify_sig_setup.sh b/tools/testing/selftests/bpf/verify_sig_setup.sh index ba08922b4a27..f2cac42298ba 100755 --- a/tools/testing/selftests/bpf/verify_sig_setup.sh +++ b/tools/testing/selftests/bpf/verify_sig_setup.sh @@ -60,6 +60,27 @@ cleanup() { rm -rf ${tmp_dir} } +fsverity_create_sign_file() { + local tmp_dir="$1" + + data_file=${tmp_dir}/data-file + sig_file=${tmp_dir}/sig-file + dd if=/dev/urandom of=$data_file bs=1 count=12345 2> /dev/null + fsverity sign --key ${tmp_dir}/signing_key.pem $data_file $sig_file + + # We do not want to enable fsverity on $data_file yet. Try whether + # the file system support fsverity on a different file. + touch ${tmp_dir}/tmp-file + fsverity enable ${tmp_dir}/tmp-file +} + +fsverity_enable_file() { + local tmp_dir="$1" + + data_file=${tmp_dir}/data-file + fsverity enable $data_file +} + catch() { local exit_code="$1" @@ -86,6 +107,10 @@ main() setup "${tmp_dir}" elif [[ "${action}" == "cleanup" ]]; then cleanup "${tmp_dir}" + elif [[ "${action}" == "fsverity-create-sign" ]]; then + fsverity_create_sign_file "${tmp_dir}" + elif [[ "${action}" == "fsverity-enable" ]]; then + fsverity_enable_file "${tmp_dir}" else echo "Unknown action: ${action}" exit 1 -- cgit v1.2.3 From 5fad52bee30414270104525e3a0266327a6e9d11 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Sat, 2 Dec 2023 09:56:56 -0800 Subject: bpf: provide correct register name for exception callback retval check bpf_throw() is checking R1, so let's report R1 in the log. Acked-by: Eduard Zingerman Acked-by: Shung-Hsi Yu Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20231202175705.885270-3-andrii@kernel.org Signed-off-by: Alexei Starovoitov --- kernel/bpf/verifier.c | 12 ++++++------ tools/testing/selftests/bpf/progs/exceptions_assert.c | 2 +- tools/testing/selftests/bpf/progs/exceptions_fail.c | 2 +- 3 files changed, 8 insertions(+), 8 deletions(-) (limited to 'tools') diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 8e7b6072e3f4..25b9d470957e 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -11805,7 +11805,7 @@ static int fetch_kfunc_meta(struct bpf_verifier_env *env, return 0; } -static int check_return_code(struct bpf_verifier_env *env, int regno); +static int check_return_code(struct bpf_verifier_env *env, int regno, const char *reg_name); static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, int *insn_idx_p) @@ -11942,7 +11942,7 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, * to bpf_throw becomes the return value of the program. */ if (!env->exception_callback_subprog) { - err = check_return_code(env, BPF_REG_1); + err = check_return_code(env, BPF_REG_1, "R1"); if (err < 0) return err; } @@ -14972,7 +14972,7 @@ static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn) return 0; } -static int check_return_code(struct bpf_verifier_env *env, int regno) +static int check_return_code(struct bpf_verifier_env *env, int regno, const char *reg_name) { struct tnum enforce_attach_type_range = tnum_unknown; const struct bpf_prog *prog = env->prog; @@ -15026,7 +15026,7 @@ static int check_return_code(struct bpf_verifier_env *env, int regno) } if (!tnum_in(const_0, reg->var_off)) { - verbose_invalid_scalar(env, reg, &const_0, "async callback", "R0"); + verbose_invalid_scalar(env, reg, &const_0, "async callback", reg_name); return -EINVAL; } return 0; @@ -15126,7 +15126,7 @@ static int check_return_code(struct bpf_verifier_env *env, int regno) } if (!tnum_in(range, reg->var_off)) { - verbose_invalid_scalar(env, reg, &range, "program exit", "R0"); + verbose_invalid_scalar(env, reg, &range, "program exit", reg_name); if (prog->expected_attach_type == BPF_LSM_CGROUP && prog_type == BPF_PROG_TYPE_LSM && !prog->aux->attach_func_proto->type) @@ -17410,7 +17410,7 @@ process_bpf_exit_full: continue; } - err = check_return_code(env, BPF_REG_0); + err = check_return_code(env, BPF_REG_0, "R0"); if (err) return err; process_bpf_exit: diff --git a/tools/testing/selftests/bpf/progs/exceptions_assert.c b/tools/testing/selftests/bpf/progs/exceptions_assert.c index 49efaed143fc..575e7dd719c4 100644 --- a/tools/testing/selftests/bpf/progs/exceptions_assert.c +++ b/tools/testing/selftests/bpf/progs/exceptions_assert.c @@ -125,7 +125,7 @@ int check_assert_generic(struct __sk_buff *ctx) } SEC("?fentry/bpf_check") -__failure __msg("At program exit the register R0 has value (0x40; 0x0)") +__failure __msg("At program exit the register R1 has value (0x40; 0x0)") int check_assert_with_return(void *ctx) { bpf_assert_with(!ctx, 64); diff --git a/tools/testing/selftests/bpf/progs/exceptions_fail.c b/tools/testing/selftests/bpf/progs/exceptions_fail.c index 8c0ef2742208..81ead7512ba2 100644 --- a/tools/testing/selftests/bpf/progs/exceptions_fail.c +++ b/tools/testing/selftests/bpf/progs/exceptions_fail.c @@ -308,7 +308,7 @@ int reject_set_exception_cb_bad_ret1(void *ctx) } SEC("?fentry/bpf_check") -__failure __msg("At program exit the register R0 has value (0x40; 0x0) should") +__failure __msg("At program exit the register R1 has value (0x40; 0x0) should") int reject_set_exception_cb_bad_ret2(void *ctx) { bpf_throw(64); -- cgit v1.2.3 From 60a6b2c78c62d0a99ccb7ad5edc950f79e56306a Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Sat, 2 Dec 2023 09:56:59 -0800 Subject: selftests/bpf: add selftest validating callback result is enforced BPF verifier expects callback subprogs to return values from specified range (typically [0, 1]). This requires that r0 at exit is both precise (because we rely on specific value range) and is marked as read (otherwise state comparison will ignore such register as unimportant). Add a simple test that validates that all these conditions are enforced. Acked-by: Eduard Zingerman Acked-by: Shung-Hsi Yu Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20231202175705.885270-6-andrii@kernel.org Signed-off-by: Alexei Starovoitov --- .../bpf/progs/verifier_subprog_precision.c | 50 ++++++++++++++++++++++ 1 file changed, 50 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/progs/verifier_subprog_precision.c b/tools/testing/selftests/bpf/progs/verifier_subprog_precision.c index b5efcaeaa1ae..d41d2a8bb97e 100644 --- a/tools/testing/selftests/bpf/progs/verifier_subprog_precision.c +++ b/tools/testing/selftests/bpf/progs/verifier_subprog_precision.c @@ -117,6 +117,56 @@ __naked int global_subprog_result_precise(void) ); } +__naked __noinline __used +static unsigned long loop_callback_bad() +{ + /* bpf_loop() callback that can return values outside of [0, 1] range */ + asm volatile ( + "call %[bpf_get_prandom_u32];" + "if r0 s> 1000 goto 1f;" + "r0 = 0;" + "1:" + "goto +0;" /* checkpoint */ + /* bpf_loop() expects [0, 1] values, so branch above skipping + * r0 = 0; should lead to a failure, but if exit instruction + * doesn't enforce r0's precision, this callback will be + * successfully verified + */ + "exit;" + : + : __imm(bpf_get_prandom_u32) + : __clobber_common + ); +} + +SEC("?raw_tp") +__failure __log_level(2) +__flag(BPF_F_TEST_STATE_FREQ) +/* check that fallthrough code path marks r0 as precise */ +__msg("mark_precise: frame1: regs=r0 stack= before 11: (b7) r0 = 0") +/* check that we have branch code path doing its own validation */ +__msg("from 10 to 12: frame1: R0=scalar(smin=umin=1001") +/* check that branch code path marks r0 as precise, before failing */ +__msg("mark_precise: frame1: regs=r0 stack= before 9: (85) call bpf_get_prandom_u32#7") +__msg("At callback return the register R0 has value (0x0; 0x7fffffffffffffff) should have been in (0x0; 0x1)") +__naked int callback_precise_return_fail(void) +{ + asm volatile ( + "r1 = 1;" /* nr_loops */ + "r2 = %[loop_callback_bad];" /* callback_fn */ + "r3 = 0;" /* callback_ctx */ + "r4 = 0;" /* flags */ + "call %[bpf_loop];" + + "r0 = 0;" + "exit;" + : + : __imm_ptr(loop_callback_bad), + __imm(bpf_loop) + : __clobber_common + ); +} + SEC("?raw_tp") __success __log_level(2) /* First simulated path does not include callback body, -- cgit v1.2.3 From c871d0e00f0e8c207ce8ff89025e35cc49a8a3c3 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Sat, 2 Dec 2023 09:57:00 -0800 Subject: bpf: enforce precise retval range on program exit Similarly to subprog/callback logic, enforce return value of BPF program using more precise smin/smax range. We need to adjust a bunch of tests due to a changed format of an error message. Acked-by: Eduard Zingerman Acked-by: Shung-Hsi Yu Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20231202175705.885270-7-andrii@kernel.org Signed-off-by: Alexei Starovoitov --- kernel/bpf/verifier.c | 56 +++++++++++----------- .../selftests/bpf/progs/exceptions_assert.c | 2 +- .../testing/selftests/bpf/progs/exceptions_fail.c | 2 +- .../selftests/bpf/progs/test_global_func15.c | 2 +- tools/testing/selftests/bpf/progs/timer_failure.c | 2 +- .../selftests/bpf/progs/user_ringbuf_fail.c | 2 +- .../bpf/progs/verifier_cgroup_inv_retcode.c | 8 ++-- .../bpf/progs/verifier_netfilter_retcode.c | 2 +- .../bpf/progs/verifier_subprog_precision.c | 2 +- 9 files changed, 40 insertions(+), 38 deletions(-) (limited to 'tools') diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index f3d9d7de68da..9411c1046268 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -362,20 +362,23 @@ __printf(2, 3) static void verbose(void *private_data, const char *fmt, ...) static void verbose_invalid_scalar(struct bpf_verifier_env *env, struct bpf_reg_state *reg, - struct tnum *range, const char *ctx, + struct bpf_retval_range range, const char *ctx, const char *reg_name) { - char tn_buf[48]; + bool unknown = true; - verbose(env, "At %s the register %s ", ctx, reg_name); - if (!tnum_is_unknown(reg->var_off)) { - tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off); - verbose(env, "has value %s", tn_buf); - } else { - verbose(env, "has unknown scalar value"); + verbose(env, "At %s the register %s has", ctx, reg_name); + if (reg->smin_value > S64_MIN) { + verbose(env, " smin=%lld", reg->smin_value); + unknown = false; } - tnum_strn(tn_buf, sizeof(tn_buf), *range); - verbose(env, " should have been in %s\n", tn_buf); + if (reg->smax_value < S64_MAX) { + verbose(env, " smax=%lld", reg->smax_value); + unknown = false; + } + if (unknown) + verbose(env, " unknown scalar value"); + verbose(env, " should have been in [%d, %d]\n", range.minval, range.maxval); } static bool type_may_be_null(u32 type) @@ -9606,10 +9609,8 @@ static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx) /* enforce R0 return value range */ if (!retval_range_within(callee->callback_ret_range, r0)) { - struct tnum range = tnum_range(callee->callback_ret_range.minval, - callee->callback_ret_range.maxval); - - verbose_invalid_scalar(env, r0, &range, "callback return", "R0"); + verbose_invalid_scalar(env, r0, callee->callback_ret_range, + "callback return", "R0"); return -EINVAL; } if (!calls_callback(env, callee->callsite)) { @@ -14995,7 +14996,8 @@ static int check_return_code(struct bpf_verifier_env *env, int regno, const char struct tnum enforce_attach_type_range = tnum_unknown; const struct bpf_prog *prog = env->prog; struct bpf_reg_state *reg; - struct tnum range = tnum_range(0, 1), const_0 = tnum_const(0); + struct bpf_retval_range range = retval_range(0, 1); + struct bpf_retval_range const_0 = retval_range(0, 0); enum bpf_prog_type prog_type = resolve_prog_type(env->prog); int err; struct bpf_func_state *frame = env->cur_state->frame[0]; @@ -15043,8 +15045,8 @@ static int check_return_code(struct bpf_verifier_env *env, int regno, const char return -EINVAL; } - if (!tnum_in(const_0, reg->var_off)) { - verbose_invalid_scalar(env, reg, &const_0, "async callback", reg_name); + if (!retval_range_within(const_0, reg)) { + verbose_invalid_scalar(env, reg, const_0, "async callback", reg_name); return -EINVAL; } return 0; @@ -15070,14 +15072,14 @@ static int check_return_code(struct bpf_verifier_env *env, int regno, const char env->prog->expected_attach_type == BPF_CGROUP_INET4_GETSOCKNAME || env->prog->expected_attach_type == BPF_CGROUP_INET6_GETSOCKNAME || env->prog->expected_attach_type == BPF_CGROUP_UNIX_GETSOCKNAME) - range = tnum_range(1, 1); + range = retval_range(1, 1); if (env->prog->expected_attach_type == BPF_CGROUP_INET4_BIND || env->prog->expected_attach_type == BPF_CGROUP_INET6_BIND) - range = tnum_range(0, 3); + range = retval_range(0, 3); break; case BPF_PROG_TYPE_CGROUP_SKB: if (env->prog->expected_attach_type == BPF_CGROUP_INET_EGRESS) { - range = tnum_range(0, 3); + range = retval_range(0, 3); enforce_attach_type_range = tnum_range(2, 3); } break; @@ -15090,13 +15092,13 @@ static int check_return_code(struct bpf_verifier_env *env, int regno, const char case BPF_PROG_TYPE_RAW_TRACEPOINT: if (!env->prog->aux->attach_btf_id) return 0; - range = tnum_const(0); + range = retval_range(0, 0); break; case BPF_PROG_TYPE_TRACING: switch (env->prog->expected_attach_type) { case BPF_TRACE_FENTRY: case BPF_TRACE_FEXIT: - range = tnum_const(0); + range = retval_range(0, 0); break; case BPF_TRACE_RAW_TP: case BPF_MODIFY_RETURN: @@ -15108,7 +15110,7 @@ static int check_return_code(struct bpf_verifier_env *env, int regno, const char } break; case BPF_PROG_TYPE_SK_LOOKUP: - range = tnum_range(SK_DROP, SK_PASS); + range = retval_range(SK_DROP, SK_PASS); break; case BPF_PROG_TYPE_LSM: @@ -15122,12 +15124,12 @@ static int check_return_code(struct bpf_verifier_env *env, int regno, const char /* Make sure programs that attach to void * hooks don't try to modify return value. */ - range = tnum_range(1, 1); + range = retval_range(1, 1); } break; case BPF_PROG_TYPE_NETFILTER: - range = tnum_range(NF_DROP, NF_ACCEPT); + range = retval_range(NF_DROP, NF_ACCEPT); break; case BPF_PROG_TYPE_EXT: /* freplace program can return anything as its return value @@ -15143,8 +15145,8 @@ static int check_return_code(struct bpf_verifier_env *env, int regno, const char return -EINVAL; } - if (!tnum_in(range, reg->var_off)) { - verbose_invalid_scalar(env, reg, &range, "program exit", reg_name); + if (!retval_range_within(range, reg)) { + verbose_invalid_scalar(env, reg, range, "program exit", reg_name); if (prog->expected_attach_type == BPF_LSM_CGROUP && prog_type == BPF_PROG_TYPE_LSM && !prog->aux->attach_func_proto->type) diff --git a/tools/testing/selftests/bpf/progs/exceptions_assert.c b/tools/testing/selftests/bpf/progs/exceptions_assert.c index 575e7dd719c4..0ef81040da59 100644 --- a/tools/testing/selftests/bpf/progs/exceptions_assert.c +++ b/tools/testing/selftests/bpf/progs/exceptions_assert.c @@ -125,7 +125,7 @@ int check_assert_generic(struct __sk_buff *ctx) } SEC("?fentry/bpf_check") -__failure __msg("At program exit the register R1 has value (0x40; 0x0)") +__failure __msg("At program exit the register R1 has smin=64 smax=64") int check_assert_with_return(void *ctx) { bpf_assert_with(!ctx, 64); diff --git a/tools/testing/selftests/bpf/progs/exceptions_fail.c b/tools/testing/selftests/bpf/progs/exceptions_fail.c index 81ead7512ba2..9cceb6521143 100644 --- a/tools/testing/selftests/bpf/progs/exceptions_fail.c +++ b/tools/testing/selftests/bpf/progs/exceptions_fail.c @@ -308,7 +308,7 @@ int reject_set_exception_cb_bad_ret1(void *ctx) } SEC("?fentry/bpf_check") -__failure __msg("At program exit the register R1 has value (0x40; 0x0) should") +__failure __msg("At program exit the register R1 has smin=64 smax=64 should") int reject_set_exception_cb_bad_ret2(void *ctx) { bpf_throw(64); diff --git a/tools/testing/selftests/bpf/progs/test_global_func15.c b/tools/testing/selftests/bpf/progs/test_global_func15.c index b512d6a6c75e..f80207480e8a 100644 --- a/tools/testing/selftests/bpf/progs/test_global_func15.c +++ b/tools/testing/selftests/bpf/progs/test_global_func15.c @@ -13,7 +13,7 @@ __noinline int foo(unsigned int *v) } SEC("cgroup_skb/ingress") -__failure __msg("At program exit the register R0 has value") +__failure __msg("At program exit the register R0 has ") int global_func15(struct __sk_buff *skb) { unsigned int v = 1; diff --git a/tools/testing/selftests/bpf/progs/timer_failure.c b/tools/testing/selftests/bpf/progs/timer_failure.c index 226d33b5a05c..9000da1e2120 100644 --- a/tools/testing/selftests/bpf/progs/timer_failure.c +++ b/tools/testing/selftests/bpf/progs/timer_failure.c @@ -30,7 +30,7 @@ static int timer_cb_ret1(void *map, int *key, struct bpf_timer *timer) } SEC("fentry/bpf_fentry_test1") -__failure __msg("should have been in (0x0; 0x0)") +__failure __msg("should have been in [0, 0]") int BPF_PROG2(test_ret_1, int, a) { int key = 0; diff --git a/tools/testing/selftests/bpf/progs/user_ringbuf_fail.c b/tools/testing/selftests/bpf/progs/user_ringbuf_fail.c index 03ee946c6bf7..11ab25c42c36 100644 --- a/tools/testing/selftests/bpf/progs/user_ringbuf_fail.c +++ b/tools/testing/selftests/bpf/progs/user_ringbuf_fail.c @@ -184,7 +184,7 @@ invalid_drain_callback_return(struct bpf_dynptr *dynptr, void *context) * not be able to write to that pointer. */ SEC("?raw_tp") -__failure __msg("At callback return the register R0 has value") +__failure __msg("At callback return the register R0 has ") int user_ringbuf_callback_invalid_return(void *ctx) { bpf_user_ringbuf_drain(&user_ringbuf, invalid_drain_callback_return, NULL, 0); diff --git a/tools/testing/selftests/bpf/progs/verifier_cgroup_inv_retcode.c b/tools/testing/selftests/bpf/progs/verifier_cgroup_inv_retcode.c index d6c4a7f3f790..6e0f349f8f15 100644 --- a/tools/testing/selftests/bpf/progs/verifier_cgroup_inv_retcode.c +++ b/tools/testing/selftests/bpf/progs/verifier_cgroup_inv_retcode.c @@ -7,7 +7,7 @@ SEC("cgroup/sock") __description("bpf_exit with invalid return code. test1") -__failure __msg("R0 has value (0x0; 0xffffffff)") +__failure __msg("smin=0 smax=4294967295 should have been in [0, 1]") __naked void with_invalid_return_code_test1(void) { asm volatile (" \ @@ -30,7 +30,7 @@ __naked void with_invalid_return_code_test2(void) SEC("cgroup/sock") __description("bpf_exit with invalid return code. test3") -__failure __msg("R0 has value (0x0; 0x3)") +__failure __msg("smin=0 smax=3 should have been in [0, 1]") __naked void with_invalid_return_code_test3(void) { asm volatile (" \ @@ -53,7 +53,7 @@ __naked void with_invalid_return_code_test4(void) SEC("cgroup/sock") __description("bpf_exit with invalid return code. test5") -__failure __msg("R0 has value (0x2; 0x0)") +__failure __msg("smin=2 smax=2 should have been in [0, 1]") __naked void with_invalid_return_code_test5(void) { asm volatile (" \ @@ -75,7 +75,7 @@ __naked void with_invalid_return_code_test6(void) SEC("cgroup/sock") __description("bpf_exit with invalid return code. test7") -__failure __msg("R0 has unknown scalar value") +__failure __msg("R0 has unknown scalar value should have been in [0, 1]") __naked void with_invalid_return_code_test7(void) { asm volatile (" \ diff --git a/tools/testing/selftests/bpf/progs/verifier_netfilter_retcode.c b/tools/testing/selftests/bpf/progs/verifier_netfilter_retcode.c index 353ae6da00e1..e1ffa5d32ff0 100644 --- a/tools/testing/selftests/bpf/progs/verifier_netfilter_retcode.c +++ b/tools/testing/selftests/bpf/progs/verifier_netfilter_retcode.c @@ -39,7 +39,7 @@ __naked void with_valid_return_code_test3(void) SEC("netfilter") __description("bpf_exit with invalid return code. test4") -__failure __msg("R0 has value (0x2; 0x0)") +__failure __msg("R0 has smin=2 smax=2 should have been in [0, 1]") __naked void with_invalid_return_code_test4(void) { asm volatile (" \ diff --git a/tools/testing/selftests/bpf/progs/verifier_subprog_precision.c b/tools/testing/selftests/bpf/progs/verifier_subprog_precision.c index d41d2a8bb97e..0dfe3f8b69ac 100644 --- a/tools/testing/selftests/bpf/progs/verifier_subprog_precision.c +++ b/tools/testing/selftests/bpf/progs/verifier_subprog_precision.c @@ -148,7 +148,7 @@ __msg("mark_precise: frame1: regs=r0 stack= before 11: (b7) r0 = 0") __msg("from 10 to 12: frame1: R0=scalar(smin=umin=1001") /* check that branch code path marks r0 as precise, before failing */ __msg("mark_precise: frame1: regs=r0 stack= before 9: (85) call bpf_get_prandom_u32#7") -__msg("At callback return the register R0 has value (0x0; 0x7fffffffffffffff) should have been in (0x0; 0x1)") +__msg("At callback return the register R0 has smin=1001 should have been in [0, 1]") __naked int callback_precise_return_fail(void) { asm volatile ( -- cgit v1.2.3 From e02dea158ddaebe6e725be715e0009923b96ec8e Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Sat, 2 Dec 2023 09:57:03 -0800 Subject: selftests/bpf: validate async callback return value check correctness Adjust timer/timer_ret_1 test to validate more carefully verifier logic of enforcing async callback return value. This test will pass only if return result is marked precise and read. Acked-by: Eduard Zingerman Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20231202175705.885270-10-andrii@kernel.org Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/progs/timer_failure.c | 36 ++++++++++++++++++----- 1 file changed, 28 insertions(+), 8 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/progs/timer_failure.c b/tools/testing/selftests/bpf/progs/timer_failure.c index 9000da1e2120..9fbc69c77bbb 100644 --- a/tools/testing/selftests/bpf/progs/timer_failure.c +++ b/tools/testing/selftests/bpf/progs/timer_failure.c @@ -21,17 +21,37 @@ struct { __type(value, struct elem); } timer_map SEC(".maps"); -static int timer_cb_ret1(void *map, int *key, struct bpf_timer *timer) +__naked __noinline __used +static unsigned long timer_cb_ret_bad() { - if (bpf_get_smp_processor_id() % 2) - return 1; - else - return 0; + asm volatile ( + "call %[bpf_get_prandom_u32];" + "if r0 s> 1000 goto 1f;" + "r0 = 0;" + "1:" + "goto +0;" /* checkpoint */ + /* async callback is expected to return 0, so branch above + * skipping r0 = 0; should lead to a failure, but if exit + * instruction doesn't enforce r0's precision, this callback + * will be successfully verified + */ + "exit;" + : + : __imm(bpf_get_prandom_u32) + : __clobber_common + ); } SEC("fentry/bpf_fentry_test1") -__failure __msg("should have been in [0, 0]") -int BPF_PROG2(test_ret_1, int, a) +__log_level(2) +__flag(BPF_F_TEST_STATE_FREQ) +__failure +/* check that fallthrough code path marks r0 as precise */ +__msg("mark_precise: frame0: regs=r0 stack= before 22: (b7) r0 = 0") +/* check that branch code path marks r0 as precise */ +__msg("mark_precise: frame0: regs=r0 stack= before 24: (85) call bpf_get_prandom_u32#7") +__msg("should have been in [0, 0]") +long BPF_PROG2(test_bad_ret, int, a) { int key = 0; struct bpf_timer *timer; @@ -39,7 +59,7 @@ int BPF_PROG2(test_ret_1, int, a) timer = bpf_map_lookup_elem(&timer_map, &key); if (timer) { bpf_timer_init(timer, &timer_map, CLOCK_BOOTTIME); - bpf_timer_set_callback(timer, timer_cb_ret1); + bpf_timer_set_callback(timer, timer_cb_ret_bad); bpf_timer_start(timer, 1000, 0); } -- cgit v1.2.3 From 5c19e1d05e9e71b42d8e779f41959254239709da Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Sat, 2 Dec 2023 09:57:04 -0800 Subject: selftests/bpf: adjust global_func15 test to validate prog exit precision Add one more subtest to global_func15 selftest to validate that verifier properly marks r0 as precise and avoids erroneous state pruning of the branch that has return value outside of expected [0, 1] value. Acked-by: Eduard Zingerman Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20231202175705.885270-11-andrii@kernel.org Signed-off-by: Alexei Starovoitov --- .../selftests/bpf/progs/test_global_func15.c | 32 ++++++++++++++++++++++ 1 file changed, 32 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/progs/test_global_func15.c b/tools/testing/selftests/bpf/progs/test_global_func15.c index f80207480e8a..b4e089d6981d 100644 --- a/tools/testing/selftests/bpf/progs/test_global_func15.c +++ b/tools/testing/selftests/bpf/progs/test_global_func15.c @@ -22,3 +22,35 @@ int global_func15(struct __sk_buff *skb) return v; } + +SEC("cgroup_skb/ingress") +__log_level(2) __flag(BPF_F_TEST_STATE_FREQ) +__failure +/* check that fallthrough code path marks r0 as precise */ +__msg("mark_precise: frame0: regs=r0 stack= before 2: (b7) r0 = 1") +/* check that branch code path marks r0 as precise */ +__msg("mark_precise: frame0: regs=r0 stack= before 0: (85) call bpf_get_prandom_u32#7") +__msg("At program exit the register R0 has ") +__naked int global_func15_tricky_pruning(void) +{ + asm volatile ( + "call %[bpf_get_prandom_u32];" + "if r0 s> 1000 goto 1f;" + "r0 = 1;" + "1:" + "goto +0;" /* checkpoint */ + /* cgroup_skb/ingress program is expected to return [0, 1] + * values, so branch above makes sure that in a fallthrough + * case we have a valid 1 stored in R0 register, but in + * a branch case we assign some random value to R0. So if + * there is something wrong with precision tracking for R0 at + * program exit, we might erronenously prune branch case, + * because R0 in fallthrough case is imprecise (and thus any + * value is valid from POV of verifier is_state_equal() logic) + */ + "exit;" + : + : __imm(bpf_get_prandom_u32) + : __clobber_common + ); +} -- cgit v1.2.3 From 81eff2e36481c5cf4a2ac906ae56c3fbd3e6f305 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Sat, 2 Dec 2023 09:57:05 -0800 Subject: bpf: simplify tnum output if a fully known constant Emit tnum representation as just a constant if all bits are known. Use decimal-vs-hex logic to determine exact format of emitted constant value, just like it's done for register range values. For that move tnum_strn() to kernel/bpf/log.c to reuse decimal-vs-hex determination logic and constants. Acked-by: Shung-Hsi Yu Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20231202175705.885270-12-andrii@kernel.org Signed-off-by: Alexei Starovoitov --- kernel/bpf/log.c | 13 +++++++++++++ kernel/bpf/tnum.c | 6 ------ .../selftests/bpf/progs/verifier_direct_packet_access.c | 2 +- tools/testing/selftests/bpf/progs/verifier_int_ptr.c | 2 +- tools/testing/selftests/bpf/progs/verifier_stack_ptr.c | 4 ++-- 5 files changed, 17 insertions(+), 10 deletions(-) (limited to 'tools') diff --git a/kernel/bpf/log.c b/kernel/bpf/log.c index 3505f3e5ae96..55d019f30e91 100644 --- a/kernel/bpf/log.c +++ b/kernel/bpf/log.c @@ -539,6 +539,19 @@ static void verbose_snum(struct bpf_verifier_env *env, s64 num) verbose(env, "%#llx", num); } +int tnum_strn(char *str, size_t size, struct tnum a) +{ + /* print as a constant, if tnum is fully known */ + if (a.mask == 0) { + if (is_unum_decimal(a.value)) + return snprintf(str, size, "%llu", a.value); + else + return snprintf(str, size, "%#llx", a.value); + } + return snprintf(str, size, "(%#llx; %#llx)", a.value, a.mask); +} +EXPORT_SYMBOL_GPL(tnum_strn); + static void print_scalar_ranges(struct bpf_verifier_env *env, const struct bpf_reg_state *reg, const char **sep) diff --git a/kernel/bpf/tnum.c b/kernel/bpf/tnum.c index f4c91c9b27d7..9dbc31b25e3d 100644 --- a/kernel/bpf/tnum.c +++ b/kernel/bpf/tnum.c @@ -172,12 +172,6 @@ bool tnum_in(struct tnum a, struct tnum b) return a.value == b.value; } -int tnum_strn(char *str, size_t size, struct tnum a) -{ - return snprintf(str, size, "(%#llx; %#llx)", a.value, a.mask); -} -EXPORT_SYMBOL_GPL(tnum_strn); - int tnum_sbin(char *str, size_t size, struct tnum a) { size_t n; diff --git a/tools/testing/selftests/bpf/progs/verifier_direct_packet_access.c b/tools/testing/selftests/bpf/progs/verifier_direct_packet_access.c index 99a23dea8233..be95570ab382 100644 --- a/tools/testing/selftests/bpf/progs/verifier_direct_packet_access.c +++ b/tools/testing/selftests/bpf/progs/verifier_direct_packet_access.c @@ -411,7 +411,7 @@ l0_%=: r0 = 0; \ SEC("tc") __description("direct packet access: test17 (pruning, alignment)") -__failure __msg("misaligned packet access off 2+(0x0; 0x0)+15+-4 size 4") +__failure __msg("misaligned packet access off 2+0+15+-4 size 4") __flag(BPF_F_STRICT_ALIGNMENT) __naked void packet_access_test17_pruning_alignment(void) { diff --git a/tools/testing/selftests/bpf/progs/verifier_int_ptr.c b/tools/testing/selftests/bpf/progs/verifier_int_ptr.c index b054f9c48143..74d9cad469d9 100644 --- a/tools/testing/selftests/bpf/progs/verifier_int_ptr.c +++ b/tools/testing/selftests/bpf/progs/verifier_int_ptr.c @@ -67,7 +67,7 @@ __naked void ptr_to_long_half_uninitialized(void) SEC("cgroup/sysctl") __description("ARG_PTR_TO_LONG misaligned") -__failure __msg("misaligned stack access off (0x0; 0x0)+-20+0 size 8") +__failure __msg("misaligned stack access off 0+-20+0 size 8") __naked void arg_ptr_to_long_misaligned(void) { asm volatile (" \ diff --git a/tools/testing/selftests/bpf/progs/verifier_stack_ptr.c b/tools/testing/selftests/bpf/progs/verifier_stack_ptr.c index e0f77e3e7869..417c61cd4b19 100644 --- a/tools/testing/selftests/bpf/progs/verifier_stack_ptr.c +++ b/tools/testing/selftests/bpf/progs/verifier_stack_ptr.c @@ -37,7 +37,7 @@ __naked void ptr_to_stack_store_load(void) SEC("socket") __description("PTR_TO_STACK store/load - bad alignment on off") -__failure __msg("misaligned stack access off (0x0; 0x0)+-8+2 size 8") +__failure __msg("misaligned stack access off 0+-8+2 size 8") __failure_unpriv __naked void load_bad_alignment_on_off(void) { @@ -53,7 +53,7 @@ __naked void load_bad_alignment_on_off(void) SEC("socket") __description("PTR_TO_STACK store/load - bad alignment on reg") -__failure __msg("misaligned stack access off (0x0; 0x0)+-10+8 size 8") +__failure __msg("misaligned stack access off 0+-10+8 size 8") __failure_unpriv __naked void load_bad_alignment_on_reg(void) { -- cgit v1.2.3 From 072b6ad7cac6a868e56dec5f48a2e67d9ab8cb6e Mon Sep 17 00:00:00 2001 From: Nick Forrington Date: Thu, 2 Nov 2023 16:11:16 +0000 Subject: perf docs: Fix man page formatting for 'perf lock' This makes "CONTENTION" a top level section (rather than a subsection of "INFO"). Fixes: 79079f21f50a501f ("perf lock: Add -k and -F options to 'contention' subcommand") Reviewed-by: James Clark Signed-off-by: Nick Forrington Tested-by: Ian Rogers Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Link: https://lore.kernel.org/r/20231102161117.49533-1-nick.forrington@arm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-lock.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/Documentation/perf-lock.txt b/tools/perf/Documentation/perf-lock.txt index 503abcba1438..f5938d616d75 100644 --- a/tools/perf/Documentation/perf-lock.txt +++ b/tools/perf/Documentation/perf-lock.txt @@ -119,7 +119,7 @@ INFO OPTIONS CONTENTION OPTIONS --------------- +------------------ -k:: --key=:: -- cgit v1.2.3 From 556bed5c6d4167f9ffb5c8648cdd3c8e39aefec7 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 30 Nov 2023 18:46:36 -0300 Subject: perf beauty: Don't use 'find ... -printf' as it isn't available in busybox MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Namhyung reported: I'm seeing a build error on my Alpine linux image which uses busybox + musl libc: In file included from trace/beauty/arch_errno_names.c:1, from builtin-trace.c:899: /build/trace/beauty/generated/arch_errno_name_array.c: In function 'arch_syscalls__strerrno': /build/trace/beauty/generated/arch_errno_name_array.c:142:49: error: unused parameter 'arch' [-Werror=unused-parameter] 142 | const char *arch_syscalls__strerrno(const char *arch, int err) It looks like busybox find command doesn't have -printf option find: unrecognized: -printf , Yesterday 9:16 PM , BusyBox v1.36.1 (2023-07-27 17:12:24 UTC) multi-call binary. Usage: find [-HL] [PATH]... [OPTIONS] [ACTIONS] Search for files and perform actions on them. First failed action stops processing of current file. Defaults: PATH is current directory, action is '-print' So just remove it and pipe find's entry to a basename loop to produce the same result. Then use an alternative loop that relies on the shell to avoid needless forks and execs. The discussion about it generated the impetus to stop doing strcmps to find the right table at each errno to string translation but instead do this just once and then use a function pointer to the right arch specific table. Suggested-by: David Laight Reported-by: Namhyung Kim Cc: Adrian Hunter Cc: Hendrik Brueckner Cc: Ian Rogers Cc: Jiri Olsa Cc: Michael Petlan Cc: Thomas Richter Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/trace/beauty/arch_errno_names.sh | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/trace/beauty/arch_errno_names.sh b/tools/perf/trace/beauty/arch_errno_names.sh index cc09dcaa891e..b6e0767b4b34 100755 --- a/tools/perf/trace/beauty/arch_errno_names.sh +++ b/tools/perf/trace/beauty/arch_errno_names.sh @@ -76,7 +76,9 @@ EoHEADER # Create list of architectures that have a specific errno.h. archlist="" -for arch in $(find $toolsdir/arch -maxdepth 1 -mindepth 1 -type d -printf "%f\n" | sort -r); do +for f in $toolsdir/arch/*/include/uapi/asm/errno.h; do + d=${f%/include/uapi/asm/errno.h} + arch="${d##*/}" test -f $toolsdir/arch/$arch/include/uapi/asm/errno.h && archlist="$archlist $arch" done -- cgit v1.2.3 From b89710bd215e650f0aaf8ffe7104413d46d44392 Mon Sep 17 00:00:00 2001 From: Javier Carrasco Date: Mon, 27 Nov 2023 18:34:28 +0100 Subject: iio: add modifiers for A and B ultraviolet light Currently there are only two modifiers for ultraviolet light: a generic one for any ultraviolet light (IIO_MOD_LIGHT_UV) and one for deep ultraviolet (IIO_MOD_LIGHT_DUV), which is also referred as ultraviolet C (UV-C) band and covers short-wave ultraviolet. There are still no modifiers for the long-wave and medium-wave ultraviolet bands. These two bands are the main components used to obtain the UV index on the Earth's surface. Add modifiers for the ultraviolet A (UV-A) and ultraviolet B (UV-B) bands. Signed-off-by: Javier Carrasco Link: https://lore.kernel.org/r/20231110-veml6075-v3-1-6ee46775b422@gmail.com Signed-off-by: Jonathan Cameron --- Documentation/ABI/testing/sysfs-bus-iio | 7 +++++-- drivers/iio/industrialio-core.c | 2 ++ include/uapi/linux/iio/types.h | 2 ++ tools/iio/iio_event_monitor.c | 2 ++ 4 files changed, 11 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/Documentation/ABI/testing/sysfs-bus-iio b/Documentation/ABI/testing/sysfs-bus-iio index 0eadc08c3a13..0d3ec5fc45f2 100644 --- a/Documentation/ABI/testing/sysfs-bus-iio +++ b/Documentation/ABI/testing/sysfs-bus-iio @@ -1574,6 +1574,8 @@ What: /sys/.../iio:deviceX/in_intensityY_raw What: /sys/.../iio:deviceX/in_intensityY_ir_raw What: /sys/.../iio:deviceX/in_intensityY_both_raw What: /sys/.../iio:deviceX/in_intensityY_uv_raw +What: /sys/.../iio:deviceX/in_intensityY_uva_raw +What: /sys/.../iio:deviceX/in_intensityY_uvb_raw What: /sys/.../iio:deviceX/in_intensityY_duv_raw KernelVersion: 3.4 Contact: linux-iio@vger.kernel.org @@ -1582,8 +1584,9 @@ Description: that measurements contain visible and infrared light components or just infrared light, respectively. Modifier uv indicates that measurements contain ultraviolet light - components. Modifier duv indicates that measurements - contain deep ultraviolet light components. + components. Modifiers uva, uvb and duv indicate that + measurements contain A, B or deep (C) ultraviolet light + components respectively. What: /sys/.../iio:deviceX/in_uvindex_input KernelVersion: 4.6 diff --git a/drivers/iio/industrialio-core.c b/drivers/iio/industrialio-core.c index 34e1f8d0071c..f6a123d397db 100644 --- a/drivers/iio/industrialio-core.c +++ b/drivers/iio/industrialio-core.c @@ -117,6 +117,8 @@ static const char * const iio_modifier_names[] = { [IIO_MOD_LIGHT_GREEN] = "green", [IIO_MOD_LIGHT_BLUE] = "blue", [IIO_MOD_LIGHT_UV] = "uv", + [IIO_MOD_LIGHT_UVA] = "uva", + [IIO_MOD_LIGHT_UVB] = "uvb", [IIO_MOD_LIGHT_DUV] = "duv", [IIO_MOD_QUATERNION] = "quaternion", [IIO_MOD_TEMP_AMBIENT] = "ambient", diff --git a/include/uapi/linux/iio/types.h b/include/uapi/linux/iio/types.h index 9c2ffdcd6623..5060963707b1 100644 --- a/include/uapi/linux/iio/types.h +++ b/include/uapi/linux/iio/types.h @@ -91,6 +91,8 @@ enum iio_modifier { IIO_MOD_CO2, IIO_MOD_VOC, IIO_MOD_LIGHT_UV, + IIO_MOD_LIGHT_UVA, + IIO_MOD_LIGHT_UVB, IIO_MOD_LIGHT_DUV, IIO_MOD_PM1, IIO_MOD_PM2P5, diff --git a/tools/iio/iio_event_monitor.c b/tools/iio/iio_event_monitor.c index 2eaaa7123b04..8073c9e4fe46 100644 --- a/tools/iio/iio_event_monitor.c +++ b/tools/iio/iio_event_monitor.c @@ -105,6 +105,8 @@ static const char * const iio_modifier_names[] = { [IIO_MOD_LIGHT_GREEN] = "green", [IIO_MOD_LIGHT_BLUE] = "blue", [IIO_MOD_LIGHT_UV] = "uv", + [IIO_MOD_LIGHT_UVA] = "uva", + [IIO_MOD_LIGHT_UVB] = "uvb", [IIO_MOD_LIGHT_DUV] = "duv", [IIO_MOD_QUATERNION] = "quaternion", [IIO_MOD_TEMP_AMBIENT] = "ambient", -- cgit v1.2.3 From 153de60e8bfb4501e1462a2f74cb787c137b996c Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Mon, 4 Dec 2023 09:39:40 +0000 Subject: selftests/bpf: Fix spelling mistake "get_signaure_size" -> "get_signature_size" There is a spelling mistake in an ASSERT_GT message. Fix it. Signed-off-by: Colin Ian King Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20231204093940.2611954-1-colin.i.king@gmail.com --- tools/testing/selftests/bpf/prog_tests/verify_pkcs7_sig.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/verify_pkcs7_sig.c b/tools/testing/selftests/bpf/prog_tests/verify_pkcs7_sig.c index 6c90372b772d..ab0f02faa80c 100644 --- a/tools/testing/selftests/bpf/prog_tests/verify_pkcs7_sig.c +++ b/tools/testing/selftests/bpf/prog_tests/verify_pkcs7_sig.c @@ -498,7 +498,7 @@ static void test_pkcs7_sig_fsverity(void) if (!ASSERT_OK_PTR(skel, "test_sig_in_xattr__open")) goto out; ret = get_signature_size(sig_path); - if (!ASSERT_GT(ret, 0, "get_signaure_size")) + if (!ASSERT_GT(ret, 0, "get_signature_size")) goto out; skel->bss->sig_size = ret; skel->bss->user_keyring_serial = syscall(__NR_request_key, "keyring", -- cgit v1.2.3 From 54373b5d53c1f6aa6164ee5bea4761abb16b351c Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 1 Dec 2023 14:52:00 -0300 Subject: perf env: Introduce perf_env__arch_strerrno() That will cache the arch specific function translating error numbers to strings. Reviewed-by: Ian Rogers Cc: Adrian Hunter Cc: David Laight Cc: Jiri Olsa Cc: Namhyung Kim Link: https://lore.kernel.org/lkml/20231201203046.486596-2-acme@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-trace.c | 6 ++---- tools/perf/util/env.c | 12 ++++++++++++ tools/perf/util/env.h | 1 + 3 files changed, 15 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index e541d0e2777a..109b8e64fe69 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -2470,9 +2470,8 @@ static int trace__fprintf_callchain(struct trace *trace, struct perf_sample *sam static const char *errno_to_name(struct evsel *evsel, int err) { struct perf_env *env = evsel__env(evsel); - const char *arch_name = perf_env__arch(env); - return arch_syscalls__strerrno(arch_name, err); + return perf_env__arch_strerrno(env, err); } static int trace__sys_exit(struct trace *trace, struct evsel *evsel, @@ -4264,12 +4263,11 @@ static size_t thread__dump_stats(struct thread_trace *ttrace, printed += fprintf(fp, " %9.3f %9.2f%%\n", max, pct); if (trace->errno_summary && stats->nr_failures) { - const char *arch_name = perf_env__arch(trace->host->env); int e; for (e = 0; e < stats->max_errno; ++e) { if (stats->errnos[e] != 0) - fprintf(fp, "\t\t\t\t%s: %d\n", arch_syscalls__strerrno(arch_name, e + 1), stats->errnos[e]); + fprintf(fp, "\t\t\t\t%s: %d\n", perf_env__arch_strerrno(trace->host->env, e + 1), stats->errnos[e]); } } } diff --git a/tools/perf/util/env.c b/tools/perf/util/env.c index cbc18b22ace5..a632f33646bb 100644 --- a/tools/perf/util/env.c +++ b/tools/perf/util/env.c @@ -3,6 +3,7 @@ #include "debug.h" #include "env.h" #include "util/header.h" +#include "linux/compiler.h" #include #include #include "cgroup.h" @@ -12,6 +13,7 @@ #include #include "pmus.h" #include "strbuf.h" +#include "trace/beauty/beauty.h" struct perf_env perf_env; @@ -453,6 +455,16 @@ const char *perf_env__arch(struct perf_env *env) return normalize_arch(arch_name); } +const char *perf_env__arch_strerrno(struct perf_env *env __maybe_unused, int err __maybe_unused) +{ +#if defined(HAVE_SYSCALL_TABLE_SUPPORT) && defined(HAVE_LIBTRACEEVENT) + const char *arch_name = perf_env__arch(env); + return arch_syscalls__strerrno(arch_name, err); +#else + return "!(HAVE_SYSCALL_TABLE_SUPPORT && HAVE_LIBTRACEEVENT)"; +#endif +} + const char *perf_env__cpuid(struct perf_env *env) { int status; diff --git a/tools/perf/util/env.h b/tools/perf/util/env.h index 94596ff124d5..79f371879f45 100644 --- a/tools/perf/util/env.h +++ b/tools/perf/util/env.h @@ -164,6 +164,7 @@ int perf_env__read_cpu_topology_map(struct perf_env *env); void cpu_cache_level__free(struct cpu_cache_level *cache); const char *perf_env__arch(struct perf_env *env); +const char *perf_env__arch_strerrno(struct perf_env *env, int err); const char *perf_env__cpuid(struct perf_env *env); const char *perf_env__raw_arch(struct perf_env *env); int perf_env__nr_cpus_avail(struct perf_env *env); -- cgit v1.2.3 From 4acef67646f35e14d202d5f534c0fd68d7691b22 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 1 Dec 2023 17:07:34 -0300 Subject: perf env: Cache the arch specific strerrno function in perf_env__arch_strerrno() So that we don't have to go thru the series of strcmp(arch) calls for each id -> string translation. Reviewed-by: Ian Rogers Cc: Adrian Hunter Cc: David Laight Cc: Jiri Olsa Cc: Namhyung Kim Link: https://lore.kernel.org/lkml/20231201203046.486596-3-acme@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/trace/beauty/arch_errno_names.sh | 6 +++--- tools/perf/trace/beauty/beauty.h | 2 -- tools/perf/util/env.c | 6 ++++-- tools/perf/util/env.h | 5 +++++ 4 files changed, 12 insertions(+), 7 deletions(-) (limited to 'tools') diff --git a/tools/perf/trace/beauty/arch_errno_names.sh b/tools/perf/trace/beauty/arch_errno_names.sh index b6e0767b4b34..7df4bf5b55a3 100755 --- a/tools/perf/trace/beauty/arch_errno_names.sh +++ b/tools/perf/trace/beauty/arch_errno_names.sh @@ -57,13 +57,13 @@ create_arch_errno_table_func() archlist="$1" default="$2" - printf 'const char *arch_syscalls__strerrno(const char *arch, int err)\n' + printf 'arch_syscalls__strerrno_t *arch_syscalls__strerrno_function(const char *arch)\n' printf '{\n' for arch in $archlist; do printf '\tif (!strcmp(arch, "%s"))\n' $(arch_string "$arch") - printf '\t\treturn errno_to_name__%s(err);\n' $(arch_string "$arch") + printf '\t\treturn errno_to_name__%s;\n' $(arch_string "$arch") done - printf '\treturn errno_to_name__%s(err);\n' $(arch_string "$default") + printf '\treturn errno_to_name__%s;\n' $(arch_string "$default") printf '}\n' } diff --git a/tools/perf/trace/beauty/beauty.h b/tools/perf/trace/beauty/beauty.h index 788e8f6bd90e..9feb794f5c6e 100644 --- a/tools/perf/trace/beauty/beauty.h +++ b/tools/perf/trace/beauty/beauty.h @@ -251,6 +251,4 @@ size_t open__scnprintf_flags(unsigned long flags, char *bf, size_t size, bool sh void syscall_arg__set_ret_scnprintf(struct syscall_arg *arg, size_t (*ret_scnprintf)(char *bf, size_t size, struct syscall_arg *arg)); -const char *arch_syscalls__strerrno(const char *arch, int err); - #endif /* _PERF_TRACE_BEAUTY_H */ diff --git a/tools/perf/util/env.c b/tools/perf/util/env.c index a632f33646bb..c68b7a004f29 100644 --- a/tools/perf/util/env.c +++ b/tools/perf/util/env.c @@ -458,8 +458,10 @@ const char *perf_env__arch(struct perf_env *env) const char *perf_env__arch_strerrno(struct perf_env *env __maybe_unused, int err __maybe_unused) { #if defined(HAVE_SYSCALL_TABLE_SUPPORT) && defined(HAVE_LIBTRACEEVENT) - const char *arch_name = perf_env__arch(env); - return arch_syscalls__strerrno(arch_name, err); + if (env->arch_strerrno == NULL) + env->arch_strerrno = arch_syscalls__strerrno_function(perf_env__arch(env)); + + return env->arch_strerrno ? env->arch_strerrno(err) : "no arch specific strerrno function"; #else return "!(HAVE_SYSCALL_TABLE_SUPPORT && HAVE_LIBTRACEEVENT)"; #endif diff --git a/tools/perf/util/env.h b/tools/perf/util/env.h index 79f371879f45..bf7e3c4c211f 100644 --- a/tools/perf/util/env.h +++ b/tools/perf/util/env.h @@ -53,6 +53,10 @@ struct pmu_caps { char *pmu_name; }; +typedef const char *(arch_syscalls__strerrno_t)(int err); + +arch_syscalls__strerrno_t *arch_syscalls__strerrno_function(const char *arch); + struct perf_env { char *hostname; char *os_release; @@ -135,6 +139,7 @@ struct perf_env { */ bool enabled; } clock; + arch_syscalls__strerrno_t *arch_strerrno; }; enum perf_compress_type { -- cgit v1.2.3 From 28b01743ca752cea5ab182297d8b912b22f2a2d1 Mon Sep 17 00:00:00 2001 From: Veronika Molnarova Date: Fri, 1 Dec 2023 20:46:17 +0100 Subject: perf test record user-regs: Fix mask for vg register The 'vg' register for arm64 shows up in --user_regs as available when masking the variable AT_HWCAP with 1 << 22 returns '1' as done in perf_regs.c. However, in subtests for support of SVE, the check for the 'vg' register is done by masking the variable AT_HWCAP with the value 0x200000 which is equals to 1 << 21 instead of 1 << 22. This results in inconsistencies on certain systems where the test expects that the 'vg' register is not operational when it is, and vice-versa. During the testing on a machine that the test expected not to have the 'vg' register available, 'perf record' with the option --user-regs showed records for the 'vg' register together with all of the others, which means that the mask for the subtest of perf_event_attr is off by one. Change the value of the mask from 0x200000 to 0x400000 to correct it. Fixes: 9440ebdc333dd12e ("perf test arm64: Add attr tests for new VG register") Reviewed-by: Leo Yan Signed-off-by: Veronika Molnarova Cc: James Clark Cc: Michael Petlan Link: https://lore.kernel.org/r/20231201194617.13012-1-vmolnaro@redhat.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/attr/test-record-user-regs-no-sve-aarch64 | 2 +- tools/perf/tests/attr/test-record-user-regs-sve-aarch64 | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/perf/tests/attr/test-record-user-regs-no-sve-aarch64 b/tools/perf/tests/attr/test-record-user-regs-no-sve-aarch64 index fbb065842880..bed765450ca9 100644 --- a/tools/perf/tests/attr/test-record-user-regs-no-sve-aarch64 +++ b/tools/perf/tests/attr/test-record-user-regs-no-sve-aarch64 @@ -6,4 +6,4 @@ args = --no-bpf-event --user-regs=vg kill >/dev/null 2>&1 ret = 129 test_ret = true arch = aarch64 -auxv = auxv["AT_HWCAP"] & 0x200000 == 0 +auxv = auxv["AT_HWCAP"] & 0x400000 == 0 diff --git a/tools/perf/tests/attr/test-record-user-regs-sve-aarch64 b/tools/perf/tests/attr/test-record-user-regs-sve-aarch64 index c598c803221d..a65113cd7311 100644 --- a/tools/perf/tests/attr/test-record-user-regs-sve-aarch64 +++ b/tools/perf/tests/attr/test-record-user-regs-sve-aarch64 @@ -6,7 +6,7 @@ args = --no-bpf-event --user-regs=vg kill >/dev/null 2>&1 ret = 1 test_ret = true arch = aarch64 -auxv = auxv["AT_HWCAP"] & 0x200000 == 0x200000 +auxv = auxv["AT_HWCAP"] & 0x400000 == 0x400000 kernel_since = 6.1 [event:base-record] -- cgit v1.2.3 From e05501e8a84eee4f819f31b9ce663bddd01b3b69 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Mon, 6 Nov 2023 10:26:45 -0700 Subject: cxl: Add cxl_num_decoders_committed() usage to cxl_test Commit 458ba8189cb4 ("cxl: Add cxl_decoders_committed() helper") missed the conversion for cxl_test. Add usage of cxl_num_decoders_committed() to replace the open coding. Suggested-by: Alison Schofield Signed-off-by: Dave Jiang Reviewed-by: Fan Ni Link: https://lore.kernel.org/r/169929160525.824083.11813222229025394254.stgit@djiang5-mobl3 Signed-off-by: Dan Williams --- tools/testing/cxl/Kbuild | 1 + tools/testing/cxl/cxl_core_exports.c | 7 +++++++ tools/testing/cxl/test/cxl.c | 5 +++-- 3 files changed, 11 insertions(+), 2 deletions(-) create mode 100644 tools/testing/cxl/cxl_core_exports.c (limited to 'tools') diff --git a/tools/testing/cxl/Kbuild b/tools/testing/cxl/Kbuild index 90f3c9802ffb..95dc58b94178 100644 --- a/tools/testing/cxl/Kbuild +++ b/tools/testing/cxl/Kbuild @@ -62,5 +62,6 @@ cxl_core-$(CONFIG_TRACING) += $(CXL_CORE_SRC)/trace.o cxl_core-$(CONFIG_CXL_REGION) += $(CXL_CORE_SRC)/region.o cxl_core-y += config_check.o cxl_core-y += cxl_core_test.o +cxl_core-y += cxl_core_exports.o obj-m += test/ diff --git a/tools/testing/cxl/cxl_core_exports.c b/tools/testing/cxl/cxl_core_exports.c new file mode 100644 index 000000000000..077e6883921d --- /dev/null +++ b/tools/testing/cxl/cxl_core_exports.c @@ -0,0 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright(c) 2022 Intel Corporation. All rights reserved. */ + +#include "cxl.h" + +/* Exporting of cxl_core symbols that are only used by cxl_test */ +EXPORT_SYMBOL_NS_GPL(cxl_num_decoders_committed, CXL); diff --git a/tools/testing/cxl/test/cxl.c b/tools/testing/cxl/test/cxl.c index b88546299902..f4e517a0c774 100644 --- a/tools/testing/cxl/test/cxl.c +++ b/tools/testing/cxl/test/cxl.c @@ -669,10 +669,11 @@ static int mock_decoder_commit(struct cxl_decoder *cxld) return 0; dev_dbg(&port->dev, "%s commit\n", dev_name(&cxld->dev)); - if (port->commit_end + 1 != id) { + if (cxl_num_decoders_committed(port) != id) { dev_dbg(&port->dev, "%s: out of order commit, expected decoder%d.%d\n", - dev_name(&cxld->dev), port->id, port->commit_end + 1); + dev_name(&cxld->dev), port->id, + cxl_num_decoders_committed(port)); return -EBUSY; } -- cgit v1.2.3 From 1624918be84a8bcc4f592e55635bc4fe4a96460a Mon Sep 17 00:00:00 2001 From: Hou Tao Date: Mon, 4 Dec 2023 22:04:24 +0800 Subject: selftests/bpf: Add test cases for inner map Add test cases to test the race between the destroy of inner map due to map-in-map update and the access of inner map in bpf program. The following 4 combinations are added: (1) array map in map array + bpf program (2) array map in map array + sleepable bpf program (3) array map in map htab + bpf program (4) array map in map htab + sleepable bpf program Before applying the fixes, when running `./test_prog -a map_in_map`, the following error was reported: ================================================================== BUG: KASAN: slab-use-after-free in array_map_update_elem+0x48/0x3e0 Read of size 4 at addr ffff888114f33824 by task test_progs/1858 CPU: 1 PID: 1858 Comm: test_progs Tainted: G O 6.6.0+ #7 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996) ...... Call Trace: dump_stack_lvl+0x4a/0x90 print_report+0xd2/0x620 kasan_report+0xd1/0x110 __asan_load4+0x81/0xa0 array_map_update_elem+0x48/0x3e0 bpf_prog_be94a9f26772f5b7_access_map_in_array+0xe6/0xf6 trace_call_bpf+0x1aa/0x580 kprobe_perf_func+0xdd/0x430 kprobe_dispatcher+0xa0/0xb0 kprobe_ftrace_handler+0x18b/0x2e0 0xffffffffc02280f7 RIP: 0010:__x64_sys_getpgid+0x1/0x30 ...... Allocated by task 1857: kasan_save_stack+0x26/0x50 kasan_set_track+0x25/0x40 kasan_save_alloc_info+0x1e/0x30 __kasan_kmalloc+0x98/0xa0 __kmalloc_node+0x6a/0x150 __bpf_map_area_alloc+0x141/0x170 bpf_map_area_alloc+0x10/0x20 array_map_alloc+0x11f/0x310 map_create+0x28a/0xb40 __sys_bpf+0x753/0x37c0 __x64_sys_bpf+0x44/0x60 do_syscall_64+0x36/0xb0 entry_SYSCALL_64_after_hwframe+0x6e/0x76 Freed by task 11: kasan_save_stack+0x26/0x50 kasan_set_track+0x25/0x40 kasan_save_free_info+0x2b/0x50 __kasan_slab_free+0x113/0x190 slab_free_freelist_hook+0xd7/0x1e0 __kmem_cache_free+0x170/0x260 kfree+0x9b/0x160 kvfree+0x2d/0x40 bpf_map_area_free+0xe/0x20 array_map_free+0x120/0x2c0 bpf_map_free_deferred+0xd7/0x1e0 process_one_work+0x462/0x990 worker_thread+0x370/0x670 kthread+0x1b0/0x200 ret_from_fork+0x3a/0x70 ret_from_fork_asm+0x1b/0x30 Last potentially related work creation: kasan_save_stack+0x26/0x50 __kasan_record_aux_stack+0x94/0xb0 kasan_record_aux_stack_noalloc+0xb/0x20 __queue_work+0x331/0x950 queue_work_on+0x75/0x80 bpf_map_put+0xfa/0x160 bpf_map_fd_put_ptr+0xe/0x20 bpf_fd_array_map_update_elem+0x174/0x1b0 bpf_map_update_value+0x2b7/0x4a0 __sys_bpf+0x2551/0x37c0 __x64_sys_bpf+0x44/0x60 do_syscall_64+0x36/0xb0 entry_SYSCALL_64_after_hwframe+0x6e/0x76 Signed-off-by: Hou Tao Link: https://lore.kernel.org/r/20231204140425.1480317-7-houtao@huaweicloud.com Signed-off-by: Alexei Starovoitov --- .../testing/selftests/bpf/prog_tests/map_in_map.c | 141 +++++++++++++++++++++ .../selftests/bpf/progs/access_map_in_map.c | 93 ++++++++++++++ 2 files changed, 234 insertions(+) create mode 100644 tools/testing/selftests/bpf/prog_tests/map_in_map.c create mode 100644 tools/testing/selftests/bpf/progs/access_map_in_map.c (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/map_in_map.c b/tools/testing/selftests/bpf/prog_tests/map_in_map.c new file mode 100644 index 000000000000..d2a10eb4e5b5 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/map_in_map.c @@ -0,0 +1,141 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (C) 2023. Huawei Technologies Co., Ltd */ +#define _GNU_SOURCE +#include +#include +#include +#include +#include "access_map_in_map.skel.h" + +struct thread_ctx { + pthread_barrier_t barrier; + int outer_map_fd; + int start, abort; + int loop, err; +}; + +static int wait_for_start_or_abort(struct thread_ctx *ctx) +{ + while (!ctx->start && !ctx->abort) + usleep(1); + return ctx->abort ? -1 : 0; +} + +static void *update_map_fn(void *data) +{ + struct thread_ctx *ctx = data; + int loop = ctx->loop, err = 0; + + if (wait_for_start_or_abort(ctx) < 0) + return NULL; + pthread_barrier_wait(&ctx->barrier); + + while (loop-- > 0) { + int fd, zero = 0; + + fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, NULL, 4, 4, 1, NULL); + if (fd < 0) { + err |= 1; + pthread_barrier_wait(&ctx->barrier); + continue; + } + + /* Remove the old inner map */ + if (bpf_map_update_elem(ctx->outer_map_fd, &zero, &fd, 0) < 0) + err |= 2; + close(fd); + pthread_barrier_wait(&ctx->barrier); + } + + ctx->err = err; + + return NULL; +} + +static void *access_map_fn(void *data) +{ + struct thread_ctx *ctx = data; + int loop = ctx->loop; + + if (wait_for_start_or_abort(ctx) < 0) + return NULL; + pthread_barrier_wait(&ctx->barrier); + + while (loop-- > 0) { + /* Access the old inner map */ + syscall(SYS_getpgid); + pthread_barrier_wait(&ctx->barrier); + } + + return NULL; +} + +static void test_map_in_map_access(const char *prog_name, const char *map_name) +{ + struct access_map_in_map *skel; + struct bpf_map *outer_map; + struct bpf_program *prog; + struct thread_ctx ctx; + pthread_t tid[2]; + int err; + + skel = access_map_in_map__open(); + if (!ASSERT_OK_PTR(skel, "access_map_in_map open")) + return; + + prog = bpf_object__find_program_by_name(skel->obj, prog_name); + if (!ASSERT_OK_PTR(prog, "find program")) + goto out; + bpf_program__set_autoload(prog, true); + + outer_map = bpf_object__find_map_by_name(skel->obj, map_name); + if (!ASSERT_OK_PTR(outer_map, "find map")) + goto out; + + err = access_map_in_map__load(skel); + if (!ASSERT_OK(err, "access_map_in_map load")) + goto out; + + err = access_map_in_map__attach(skel); + if (!ASSERT_OK(err, "access_map_in_map attach")) + goto out; + + skel->bss->tgid = getpid(); + + memset(&ctx, 0, sizeof(ctx)); + pthread_barrier_init(&ctx.barrier, NULL, 2); + ctx.outer_map_fd = bpf_map__fd(outer_map); + ctx.loop = 4; + + err = pthread_create(&tid[0], NULL, update_map_fn, &ctx); + if (!ASSERT_OK(err, "close_thread")) + goto out; + + err = pthread_create(&tid[1], NULL, access_map_fn, &ctx); + if (!ASSERT_OK(err, "read_thread")) { + ctx.abort = 1; + pthread_join(tid[0], NULL); + goto out; + } + + ctx.start = 1; + pthread_join(tid[0], NULL); + pthread_join(tid[1], NULL); + + ASSERT_OK(ctx.err, "err"); +out: + access_map_in_map__destroy(skel); +} + +void test_map_in_map(void) +{ + if (test__start_subtest("acc_map_in_array")) + test_map_in_map_access("access_map_in_array", "outer_array_map"); + if (test__start_subtest("sleepable_acc_map_in_array")) + test_map_in_map_access("sleepable_access_map_in_array", "outer_array_map"); + if (test__start_subtest("acc_map_in_htab")) + test_map_in_map_access("access_map_in_htab", "outer_htab_map"); + if (test__start_subtest("sleepable_acc_map_in_htab")) + test_map_in_map_access("sleepable_access_map_in_htab", "outer_htab_map"); +} + diff --git a/tools/testing/selftests/bpf/progs/access_map_in_map.c b/tools/testing/selftests/bpf/progs/access_map_in_map.c new file mode 100644 index 000000000000..1126871c2ebd --- /dev/null +++ b/tools/testing/selftests/bpf/progs/access_map_in_map.c @@ -0,0 +1,93 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (C) 2023. Huawei Technologies Co., Ltd */ +#include +#include +#include + +#include "bpf_misc.h" + +struct inner_map_type { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(key_size, 4); + __uint(value_size, 4); + __uint(max_entries, 1); +} inner_map SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS); + __type(key, int); + __type(value, int); + __uint(max_entries, 1); + __array(values, struct inner_map_type); +} outer_array_map SEC(".maps") = { + .values = { + [0] = &inner_map, + }, +}; + +struct { + __uint(type, BPF_MAP_TYPE_HASH_OF_MAPS); + __type(key, int); + __type(value, int); + __uint(max_entries, 1); + __array(values, struct inner_map_type); +} outer_htab_map SEC(".maps") = { + .values = { + [0] = &inner_map, + }, +}; + +char _license[] SEC("license") = "GPL"; + +int tgid = 0; + +static int acc_map_in_map(void *outer_map) +{ + int i, key, value = 0xdeadbeef; + void *inner_map; + + if ((bpf_get_current_pid_tgid() >> 32) != tgid) + return 0; + + /* Find nonexistent inner map */ + key = 1; + inner_map = bpf_map_lookup_elem(outer_map, &key); + if (inner_map) + return 0; + + /* Find the old inner map */ + key = 0; + inner_map = bpf_map_lookup_elem(outer_map, &key); + if (!inner_map) + return 0; + + /* Wait for the old inner map to be replaced */ + for (i = 0; i < 2048; i++) + bpf_map_update_elem(inner_map, &key, &value, 0); + + return 0; +} + +SEC("?kprobe/" SYS_PREFIX "sys_getpgid") +int access_map_in_array(void *ctx) +{ + return acc_map_in_map(&outer_array_map); +} + +SEC("?fentry.s/" SYS_PREFIX "sys_getpgid") +int sleepable_access_map_in_array(void *ctx) +{ + return acc_map_in_map(&outer_array_map); +} + +SEC("?kprobe/" SYS_PREFIX "sys_getpgid") +int access_map_in_htab(void *ctx) +{ + return acc_map_in_map(&outer_htab_map); +} + +SEC("?fentry.s/" SYS_PREFIX "sys_getpgid") +int sleepable_access_map_in_htab(void *ctx) +{ + return acc_map_in_map(&outer_htab_map); +} -- cgit v1.2.3 From e3dd40828534a67931e0dd00fcd35846271fd4e8 Mon Sep 17 00:00:00 2001 From: Hou Tao Date: Mon, 4 Dec 2023 22:04:25 +0800 Subject: selftests/bpf: Test outer map update operations in syscall program Syscall program is running with rcu_read_lock_trace being held, so if bpf_map_update_elem() or bpf_map_delete_elem() invokes synchronize_rcu_tasks_trace() when operating on an outer map, there will be dead-lock, so add a test to guarantee that it is dead-lock free. Signed-off-by: Hou Tao Link: https://lore.kernel.org/r/20231204140425.1480317-8-houtao@huaweicloud.com Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/prog_tests/syscall.c | 30 +++++++- tools/testing/selftests/bpf/progs/syscall.c | 96 ++++++++++++++++++++++-- 2 files changed, 119 insertions(+), 7 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/syscall.c b/tools/testing/selftests/bpf/prog_tests/syscall.c index f4d40001155a..0be8301c0ffd 100644 --- a/tools/testing/selftests/bpf/prog_tests/syscall.c +++ b/tools/testing/selftests/bpf/prog_tests/syscall.c @@ -12,7 +12,7 @@ struct args { int btf_fd; }; -void test_syscall(void) +static void test_syscall_load_prog(void) { static char verifier_log[8192]; struct args ctx = { @@ -32,7 +32,7 @@ void test_syscall(void) if (!ASSERT_OK_PTR(skel, "skel_load")) goto cleanup; - prog_fd = bpf_program__fd(skel->progs.bpf_prog); + prog_fd = bpf_program__fd(skel->progs.load_prog); err = bpf_prog_test_run_opts(prog_fd, &tattr); ASSERT_EQ(err, 0, "err"); ASSERT_EQ(tattr.retval, 1, "retval"); @@ -53,3 +53,29 @@ cleanup: if (ctx.btf_fd > 0) close(ctx.btf_fd); } + +static void test_syscall_update_outer_map(void) +{ + LIBBPF_OPTS(bpf_test_run_opts, opts); + struct syscall *skel; + int err, prog_fd; + + skel = syscall__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_load")) + goto cleanup; + + prog_fd = bpf_program__fd(skel->progs.update_outer_map); + err = bpf_prog_test_run_opts(prog_fd, &opts); + ASSERT_EQ(err, 0, "err"); + ASSERT_EQ(opts.retval, 1, "retval"); +cleanup: + syscall__destroy(skel); +} + +void test_syscall(void) +{ + if (test__start_subtest("load_prog")) + test_syscall_load_prog(); + if (test__start_subtest("update_outer_map")) + test_syscall_update_outer_map(); +} diff --git a/tools/testing/selftests/bpf/progs/syscall.c b/tools/testing/selftests/bpf/progs/syscall.c index e550f728962d..3d3cafdebe72 100644 --- a/tools/testing/selftests/bpf/progs/syscall.c +++ b/tools/testing/selftests/bpf/progs/syscall.c @@ -6,9 +6,15 @@ #include #include <../../../tools/include/linux/filter.h> #include +#include +#include char _license[] SEC("license") = "GPL"; +struct bpf_map { + int id; +} __attribute__((preserve_access_index)); + struct args { __u64 log_buf; __u32 log_size; @@ -27,6 +33,37 @@ struct args { BTF_TYPE_ENC(name, BTF_INFO_ENC(BTF_KIND_INT, 0, 0), sz), \ BTF_INT_ENC(encoding, bits_offset, bits) +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __type(key, int); + __type(value, union bpf_attr); + __uint(max_entries, 1); +} bpf_attr_array SEC(".maps"); + +struct inner_map_type { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(key_size, 4); + __uint(value_size, 4); + __uint(max_entries, 1); +} inner_map SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS); + __type(key, int); + __type(value, int); + __uint(max_entries, 1); + __array(values, struct inner_map_type); +} outer_array_map SEC(".maps") = { + .values = { + [0] = &inner_map, + }, +}; + +static inline __u64 ptr_to_u64(const void *ptr) +{ + return (__u64) (unsigned long) ptr; +} + static int btf_load(void) { struct btf_blob { @@ -58,7 +95,7 @@ static int btf_load(void) } SEC("syscall") -int bpf_prog(struct args *ctx) +int load_prog(struct args *ctx) { static char license[] = "GPL"; static struct bpf_insn insns[] = { @@ -94,8 +131,8 @@ int bpf_prog(struct args *ctx) map_create_attr.max_entries = ctx->max_entries; map_create_attr.btf_fd = ret; - prog_load_attr.license = (long) license; - prog_load_attr.insns = (long) insns; + prog_load_attr.license = ptr_to_u64(license); + prog_load_attr.insns = ptr_to_u64(insns); prog_load_attr.log_buf = ctx->log_buf; prog_load_attr.log_size = ctx->log_size; prog_load_attr.log_level = 1; @@ -107,8 +144,8 @@ int bpf_prog(struct args *ctx) insns[3].imm = ret; map_update_attr.map_fd = ret; - map_update_attr.key = (long) &key; - map_update_attr.value = (long) &value; + map_update_attr.key = ptr_to_u64(&key); + map_update_attr.value = ptr_to_u64(&value); ret = bpf_sys_bpf(BPF_MAP_UPDATE_ELEM, &map_update_attr, sizeof(map_update_attr)); if (ret < 0) return ret; @@ -119,3 +156,52 @@ int bpf_prog(struct args *ctx) ctx->prog_fd = ret; return 1; } + +SEC("syscall") +int update_outer_map(void *ctx) +{ + int zero = 0, ret = 0, outer_fd = -1, inner_fd = -1, err; + const int attr_sz = sizeof(union bpf_attr); + union bpf_attr *attr; + + attr = bpf_map_lookup_elem((struct bpf_map *)&bpf_attr_array, &zero); + if (!attr) + goto out; + + memset(attr, 0, attr_sz); + attr->map_id = ((struct bpf_map *)&outer_array_map)->id; + outer_fd = bpf_sys_bpf(BPF_MAP_GET_FD_BY_ID, attr, attr_sz); + if (outer_fd < 0) + goto out; + + memset(attr, 0, attr_sz); + attr->map_type = BPF_MAP_TYPE_ARRAY; + attr->key_size = 4; + attr->value_size = 4; + attr->max_entries = 1; + inner_fd = bpf_sys_bpf(BPF_MAP_CREATE, attr, attr_sz); + if (inner_fd < 0) + goto out; + + memset(attr, 0, attr_sz); + attr->map_fd = outer_fd; + attr->key = ptr_to_u64(&zero); + attr->value = ptr_to_u64(&inner_fd); + err = bpf_sys_bpf(BPF_MAP_UPDATE_ELEM, attr, attr_sz); + if (err) + goto out; + + memset(attr, 0, attr_sz); + attr->map_fd = outer_fd; + attr->key = ptr_to_u64(&zero); + err = bpf_sys_bpf(BPF_MAP_DELETE_ELEM, attr, attr_sz); + if (err) + goto out; + ret = 1; +out: + if (inner_fd >= 0) + bpf_sys_close(inner_fd); + if (outer_fd >= 0) + bpf_sys_close(outer_fd); + return ret; +} -- cgit v1.2.3 From bc877956272f0521fef107838555817112a450dc Mon Sep 17 00:00:00 2001 From: Amritha Nambiar Date: Fri, 1 Dec 2023 15:28:29 -0800 Subject: netdev-genl: spec: Extend netdev netlink spec in YAML for queue Add support in netlink spec(netdev.yaml) for queue information. Add code generated from the spec. Note: The "queue-type" attribute takes values 0 and 1 for rx and tx queue type respectively. Signed-off-by: Amritha Nambiar Reviewed-by: Sridhar Samudrala Link: https://lore.kernel.org/r/170147330963.5260.2576294626647300472.stgit@anambiarhost.jf.intel.com Signed-off-by: Jakub Kicinski --- Documentation/netlink/specs/netdev.yaml | 52 +++++++++++ include/uapi/linux/netdev.h | 16 ++++ net/core/netdev-genl-gen.c | 26 ++++++ net/core/netdev-genl-gen.h | 3 + net/core/netdev-genl.c | 10 +++ tools/include/uapi/linux/netdev.h | 16 ++++ tools/net/ynl/generated/netdev-user.c | 153 ++++++++++++++++++++++++++++++++ tools/net/ynl/generated/netdev-user.h | 99 +++++++++++++++++++++ 8 files changed, 375 insertions(+) (limited to 'tools') diff --git a/Documentation/netlink/specs/netdev.yaml b/Documentation/netlink/specs/netdev.yaml index eef6358ec587..719e6aafbfdb 100644 --- a/Documentation/netlink/specs/netdev.yaml +++ b/Documentation/netlink/specs/netdev.yaml @@ -66,6 +66,10 @@ definitions: name: tx-checksum doc: L3 checksum HW offload is supported by the driver. + - + name: queue-type + type: enum + entries: [ rx, tx ] attribute-sets: - @@ -209,6 +213,31 @@ attribute-sets: name: recycle-released-refcnt type: uint + - + name: queue + attributes: + - + name: id + doc: Queue index; most queue types are indexed like a C array, with + indexes starting at 0 and ending at queue count - 1. Queue indexes + are scoped to an interface and queue type. + type: u32 + - + name: ifindex + doc: ifindex of the netdevice to which the queue belongs. + type: u32 + checks: + min: 1 + - + name: type + doc: Queue type as rx, tx. Each queue type defines a separate ID space. + type: u32 + enum: queue-type + - + name: napi-id + doc: ID of the NAPI instance which services this queue. + type: u32 + operations: list: - @@ -307,6 +336,29 @@ operations: dump: reply: *pp-stats-reply config-cond: page-pool-stats + - + name: queue-get + doc: Get queue information from the kernel. + Only configured queues will be reported (as opposed to all available + hardware queues). + attribute-set: queue + do: + request: + attributes: + - ifindex + - type + - id + reply: &queue-get-op + attributes: + - id + - type + - napi-id + - ifindex + dump: + request: + attributes: + - ifindex + reply: *queue-get-op mcast-groups: list: diff --git a/include/uapi/linux/netdev.h b/include/uapi/linux/netdev.h index 6244c0164976..f857960a7f06 100644 --- a/include/uapi/linux/netdev.h +++ b/include/uapi/linux/netdev.h @@ -62,6 +62,11 @@ enum netdev_xsk_flags { NETDEV_XSK_FLAGS_TX_CHECKSUM = 2, }; +enum netdev_queue_type { + NETDEV_QUEUE_TYPE_RX, + NETDEV_QUEUE_TYPE_TX, +}; + enum { NETDEV_A_DEV_IFINDEX = 1, NETDEV_A_DEV_PAD, @@ -104,6 +109,16 @@ enum { NETDEV_A_PAGE_POOL_STATS_MAX = (__NETDEV_A_PAGE_POOL_STATS_MAX - 1) }; +enum { + NETDEV_A_QUEUE_ID = 1, + NETDEV_A_QUEUE_IFINDEX, + NETDEV_A_QUEUE_TYPE, + NETDEV_A_QUEUE_NAPI_ID, + + __NETDEV_A_QUEUE_MAX, + NETDEV_A_QUEUE_MAX = (__NETDEV_A_QUEUE_MAX - 1) +}; + enum { NETDEV_CMD_DEV_GET = 1, NETDEV_CMD_DEV_ADD_NTF, @@ -114,6 +129,7 @@ enum { NETDEV_CMD_PAGE_POOL_DEL_NTF, NETDEV_CMD_PAGE_POOL_CHANGE_NTF, NETDEV_CMD_PAGE_POOL_STATS_GET, + NETDEV_CMD_QUEUE_GET, __NETDEV_CMD_MAX, NETDEV_CMD_MAX = (__NETDEV_CMD_MAX - 1) diff --git a/net/core/netdev-genl-gen.c b/net/core/netdev-genl-gen.c index dccd8c3a141e..b1dcf88c82cf 100644 --- a/net/core/netdev-genl-gen.c +++ b/net/core/netdev-genl-gen.c @@ -46,6 +46,18 @@ static const struct nla_policy netdev_page_pool_stats_get_nl_policy[NETDEV_A_PAG }; #endif /* CONFIG_PAGE_POOL_STATS */ +/* NETDEV_CMD_QUEUE_GET - do */ +static const struct nla_policy netdev_queue_get_do_nl_policy[NETDEV_A_QUEUE_TYPE + 1] = { + [NETDEV_A_QUEUE_IFINDEX] = NLA_POLICY_MIN(NLA_U32, 1), + [NETDEV_A_QUEUE_TYPE] = NLA_POLICY_MAX(NLA_U32, 1), + [NETDEV_A_QUEUE_ID] = { .type = NLA_U32, }, +}; + +/* NETDEV_CMD_QUEUE_GET - dump */ +static const struct nla_policy netdev_queue_get_dump_nl_policy[NETDEV_A_QUEUE_IFINDEX + 1] = { + [NETDEV_A_QUEUE_IFINDEX] = NLA_POLICY_MIN(NLA_U32, 1), +}; + /* Ops table for netdev */ static const struct genl_split_ops netdev_nl_ops[] = { { @@ -88,6 +100,20 @@ static const struct genl_split_ops netdev_nl_ops[] = { .flags = GENL_CMD_CAP_DUMP, }, #endif /* CONFIG_PAGE_POOL_STATS */ + { + .cmd = NETDEV_CMD_QUEUE_GET, + .doit = netdev_nl_queue_get_doit, + .policy = netdev_queue_get_do_nl_policy, + .maxattr = NETDEV_A_QUEUE_TYPE, + .flags = GENL_CMD_CAP_DO, + }, + { + .cmd = NETDEV_CMD_QUEUE_GET, + .dumpit = netdev_nl_queue_get_dumpit, + .policy = netdev_queue_get_dump_nl_policy, + .maxattr = NETDEV_A_QUEUE_IFINDEX, + .flags = GENL_CMD_CAP_DUMP, + }, }; static const struct genl_multicast_group netdev_nl_mcgrps[] = { diff --git a/net/core/netdev-genl-gen.h b/net/core/netdev-genl-gen.h index 649e4b46eccf..086623c1797a 100644 --- a/net/core/netdev-genl-gen.h +++ b/net/core/netdev-genl-gen.h @@ -23,6 +23,9 @@ int netdev_nl_page_pool_stats_get_doit(struct sk_buff *skb, struct genl_info *info); int netdev_nl_page_pool_stats_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb); +int netdev_nl_queue_get_doit(struct sk_buff *skb, struct genl_info *info); +int netdev_nl_queue_get_dumpit(struct sk_buff *skb, + struct netlink_callback *cb); enum { NETDEV_NLGRP_MGMT, diff --git a/net/core/netdev-genl.c b/net/core/netdev-genl.c index 10f2124e9e23..35e2d692f651 100644 --- a/net/core/netdev-genl.c +++ b/net/core/netdev-genl.c @@ -140,6 +140,16 @@ int netdev_nl_dev_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb) return skb->len; } +int netdev_nl_queue_get_doit(struct sk_buff *skb, struct genl_info *info) +{ + return -EOPNOTSUPP; +} + +int netdev_nl_queue_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb) +{ + return -EOPNOTSUPP; +} + static int netdev_genl_netdevice_event(struct notifier_block *nb, unsigned long event, void *ptr) { diff --git a/tools/include/uapi/linux/netdev.h b/tools/include/uapi/linux/netdev.h index 6244c0164976..f857960a7f06 100644 --- a/tools/include/uapi/linux/netdev.h +++ b/tools/include/uapi/linux/netdev.h @@ -62,6 +62,11 @@ enum netdev_xsk_flags { NETDEV_XSK_FLAGS_TX_CHECKSUM = 2, }; +enum netdev_queue_type { + NETDEV_QUEUE_TYPE_RX, + NETDEV_QUEUE_TYPE_TX, +}; + enum { NETDEV_A_DEV_IFINDEX = 1, NETDEV_A_DEV_PAD, @@ -104,6 +109,16 @@ enum { NETDEV_A_PAGE_POOL_STATS_MAX = (__NETDEV_A_PAGE_POOL_STATS_MAX - 1) }; +enum { + NETDEV_A_QUEUE_ID = 1, + NETDEV_A_QUEUE_IFINDEX, + NETDEV_A_QUEUE_TYPE, + NETDEV_A_QUEUE_NAPI_ID, + + __NETDEV_A_QUEUE_MAX, + NETDEV_A_QUEUE_MAX = (__NETDEV_A_QUEUE_MAX - 1) +}; + enum { NETDEV_CMD_DEV_GET = 1, NETDEV_CMD_DEV_ADD_NTF, @@ -114,6 +129,7 @@ enum { NETDEV_CMD_PAGE_POOL_DEL_NTF, NETDEV_CMD_PAGE_POOL_CHANGE_NTF, NETDEV_CMD_PAGE_POOL_STATS_GET, + NETDEV_CMD_QUEUE_GET, __NETDEV_CMD_MAX, NETDEV_CMD_MAX = (__NETDEV_CMD_MAX - 1) diff --git a/tools/net/ynl/generated/netdev-user.c b/tools/net/ynl/generated/netdev-user.c index 3b9dee94d4ce..fbf7e24ade91 100644 --- a/tools/net/ynl/generated/netdev-user.c +++ b/tools/net/ynl/generated/netdev-user.c @@ -23,6 +23,7 @@ static const char * const netdev_op_strmap[] = { [NETDEV_CMD_PAGE_POOL_DEL_NTF] = "page-pool-del-ntf", [NETDEV_CMD_PAGE_POOL_CHANGE_NTF] = "page-pool-change-ntf", [NETDEV_CMD_PAGE_POOL_STATS_GET] = "page-pool-stats-get", + [NETDEV_CMD_QUEUE_GET] = "queue-get", }; const char *netdev_op_str(int op) @@ -76,6 +77,18 @@ const char *netdev_xsk_flags_str(enum netdev_xsk_flags value) return netdev_xsk_flags_strmap[value]; } +static const char * const netdev_queue_type_strmap[] = { + [0] = "rx", + [1] = "tx", +}; + +const char *netdev_queue_type_str(enum netdev_queue_type value) +{ + if (value < 0 || value >= (int)MNL_ARRAY_SIZE(netdev_queue_type_strmap)) + return NULL; + return netdev_queue_type_strmap[value]; +} + /* Policies */ struct ynl_policy_attr netdev_page_pool_info_policy[NETDEV_A_PAGE_POOL_MAX + 1] = { [NETDEV_A_PAGE_POOL_ID] = { .name = "id", .type = YNL_PT_UINT, }, @@ -135,6 +148,18 @@ struct ynl_policy_nest netdev_page_pool_stats_nest = { .table = netdev_page_pool_stats_policy, }; +struct ynl_policy_attr netdev_queue_policy[NETDEV_A_QUEUE_MAX + 1] = { + [NETDEV_A_QUEUE_ID] = { .name = "id", .type = YNL_PT_U32, }, + [NETDEV_A_QUEUE_IFINDEX] = { .name = "ifindex", .type = YNL_PT_U32, }, + [NETDEV_A_QUEUE_TYPE] = { .name = "type", .type = YNL_PT_U32, }, + [NETDEV_A_QUEUE_NAPI_ID] = { .name = "napi-id", .type = YNL_PT_U32, }, +}; + +struct ynl_policy_nest netdev_queue_nest = { + .max_attr = NETDEV_A_QUEUE_MAX, + .table = netdev_queue_policy, +}; + /* Common nested types */ void netdev_page_pool_info_free(struct netdev_page_pool_info *obj) { @@ -617,6 +642,134 @@ free_list: return NULL; } +/* ============== NETDEV_CMD_QUEUE_GET ============== */ +/* NETDEV_CMD_QUEUE_GET - do */ +void netdev_queue_get_req_free(struct netdev_queue_get_req *req) +{ + free(req); +} + +void netdev_queue_get_rsp_free(struct netdev_queue_get_rsp *rsp) +{ + free(rsp); +} + +int netdev_queue_get_rsp_parse(const struct nlmsghdr *nlh, void *data) +{ + struct ynl_parse_arg *yarg = data; + struct netdev_queue_get_rsp *dst; + const struct nlattr *attr; + + dst = yarg->data; + + mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) { + unsigned int type = mnl_attr_get_type(attr); + + if (type == NETDEV_A_QUEUE_ID) { + if (ynl_attr_validate(yarg, attr)) + return MNL_CB_ERROR; + dst->_present.id = 1; + dst->id = mnl_attr_get_u32(attr); + } else if (type == NETDEV_A_QUEUE_TYPE) { + if (ynl_attr_validate(yarg, attr)) + return MNL_CB_ERROR; + dst->_present.type = 1; + dst->type = mnl_attr_get_u32(attr); + } else if (type == NETDEV_A_QUEUE_NAPI_ID) { + if (ynl_attr_validate(yarg, attr)) + return MNL_CB_ERROR; + dst->_present.napi_id = 1; + dst->napi_id = mnl_attr_get_u32(attr); + } else if (type == NETDEV_A_QUEUE_IFINDEX) { + if (ynl_attr_validate(yarg, attr)) + return MNL_CB_ERROR; + dst->_present.ifindex = 1; + dst->ifindex = mnl_attr_get_u32(attr); + } + } + + return MNL_CB_OK; +} + +struct netdev_queue_get_rsp * +netdev_queue_get(struct ynl_sock *ys, struct netdev_queue_get_req *req) +{ + struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; + struct netdev_queue_get_rsp *rsp; + struct nlmsghdr *nlh; + int err; + + nlh = ynl_gemsg_start_req(ys, ys->family_id, NETDEV_CMD_QUEUE_GET, 1); + ys->req_policy = &netdev_queue_nest; + yrs.yarg.rsp_policy = &netdev_queue_nest; + + if (req->_present.ifindex) + mnl_attr_put_u32(nlh, NETDEV_A_QUEUE_IFINDEX, req->ifindex); + if (req->_present.type) + mnl_attr_put_u32(nlh, NETDEV_A_QUEUE_TYPE, req->type); + if (req->_present.id) + mnl_attr_put_u32(nlh, NETDEV_A_QUEUE_ID, req->id); + + rsp = calloc(1, sizeof(*rsp)); + yrs.yarg.data = rsp; + yrs.cb = netdev_queue_get_rsp_parse; + yrs.rsp_cmd = NETDEV_CMD_QUEUE_GET; + + err = ynl_exec(ys, nlh, &yrs); + if (err < 0) + goto err_free; + + return rsp; + +err_free: + netdev_queue_get_rsp_free(rsp); + return NULL; +} + +/* NETDEV_CMD_QUEUE_GET - dump */ +void netdev_queue_get_list_free(struct netdev_queue_get_list *rsp) +{ + struct netdev_queue_get_list *next = rsp; + + while ((void *)next != YNL_LIST_END) { + rsp = next; + next = rsp->next; + + free(rsp); + } +} + +struct netdev_queue_get_list * +netdev_queue_get_dump(struct ynl_sock *ys, + struct netdev_queue_get_req_dump *req) +{ + struct ynl_dump_state yds = {}; + struct nlmsghdr *nlh; + int err; + + yds.ys = ys; + yds.alloc_sz = sizeof(struct netdev_queue_get_list); + yds.cb = netdev_queue_get_rsp_parse; + yds.rsp_cmd = NETDEV_CMD_QUEUE_GET; + yds.rsp_policy = &netdev_queue_nest; + + nlh = ynl_gemsg_start_dump(ys, ys->family_id, NETDEV_CMD_QUEUE_GET, 1); + ys->req_policy = &netdev_queue_nest; + + if (req->_present.ifindex) + mnl_attr_put_u32(nlh, NETDEV_A_QUEUE_IFINDEX, req->ifindex); + + err = ynl_exec_dump(ys, nlh, &yds); + if (err < 0) + goto free_list; + + return yds.first; + +free_list: + netdev_queue_get_list_free(yds.first); + return NULL; +} + static const struct ynl_ntf_info netdev_ntf_info[] = { [NETDEV_CMD_DEV_ADD_NTF] = { .alloc_sz = sizeof(struct netdev_dev_get_ntf), diff --git a/tools/net/ynl/generated/netdev-user.h b/tools/net/ynl/generated/netdev-user.h index cc3d80d1cf8c..d7daf6df3df0 100644 --- a/tools/net/ynl/generated/netdev-user.h +++ b/tools/net/ynl/generated/netdev-user.h @@ -20,6 +20,7 @@ const char *netdev_op_str(int op); const char *netdev_xdp_act_str(enum netdev_xdp_act value); const char *netdev_xdp_rx_metadata_str(enum netdev_xdp_rx_metadata value); const char *netdev_xsk_flags_str(enum netdev_xsk_flags value); +const char *netdev_queue_type_str(enum netdev_queue_type value); /* Common nested types */ struct netdev_page_pool_info { @@ -261,4 +262,102 @@ netdev_page_pool_stats_get_list_free(struct netdev_page_pool_stats_get_list *rsp struct netdev_page_pool_stats_get_list * netdev_page_pool_stats_get_dump(struct ynl_sock *ys); +/* ============== NETDEV_CMD_QUEUE_GET ============== */ +/* NETDEV_CMD_QUEUE_GET - do */ +struct netdev_queue_get_req { + struct { + __u32 ifindex:1; + __u32 type:1; + __u32 id:1; + } _present; + + __u32 ifindex; + enum netdev_queue_type type; + __u32 id; +}; + +static inline struct netdev_queue_get_req *netdev_queue_get_req_alloc(void) +{ + return calloc(1, sizeof(struct netdev_queue_get_req)); +} +void netdev_queue_get_req_free(struct netdev_queue_get_req *req); + +static inline void +netdev_queue_get_req_set_ifindex(struct netdev_queue_get_req *req, + __u32 ifindex) +{ + req->_present.ifindex = 1; + req->ifindex = ifindex; +} +static inline void +netdev_queue_get_req_set_type(struct netdev_queue_get_req *req, + enum netdev_queue_type type) +{ + req->_present.type = 1; + req->type = type; +} +static inline void +netdev_queue_get_req_set_id(struct netdev_queue_get_req *req, __u32 id) +{ + req->_present.id = 1; + req->id = id; +} + +struct netdev_queue_get_rsp { + struct { + __u32 id:1; + __u32 type:1; + __u32 napi_id:1; + __u32 ifindex:1; + } _present; + + __u32 id; + enum netdev_queue_type type; + __u32 napi_id; + __u32 ifindex; +}; + +void netdev_queue_get_rsp_free(struct netdev_queue_get_rsp *rsp); + +/* + * Get queue information from the kernel. Only configured queues will be reported (as opposed to all available hardware queues). + */ +struct netdev_queue_get_rsp * +netdev_queue_get(struct ynl_sock *ys, struct netdev_queue_get_req *req); + +/* NETDEV_CMD_QUEUE_GET - dump */ +struct netdev_queue_get_req_dump { + struct { + __u32 ifindex:1; + } _present; + + __u32 ifindex; +}; + +static inline struct netdev_queue_get_req_dump * +netdev_queue_get_req_dump_alloc(void) +{ + return calloc(1, sizeof(struct netdev_queue_get_req_dump)); +} +void netdev_queue_get_req_dump_free(struct netdev_queue_get_req_dump *req); + +static inline void +netdev_queue_get_req_dump_set_ifindex(struct netdev_queue_get_req_dump *req, + __u32 ifindex) +{ + req->_present.ifindex = 1; + req->ifindex = ifindex; +} + +struct netdev_queue_get_list { + struct netdev_queue_get_list *next; + struct netdev_queue_get_rsp obj __attribute__((aligned(8))); +}; + +void netdev_queue_get_list_free(struct netdev_queue_get_list *rsp); + +struct netdev_queue_get_list * +netdev_queue_get_dump(struct ynl_sock *ys, + struct netdev_queue_get_req_dump *req); + #endif /* _LINUX_NETDEV_GEN_H */ -- cgit v1.2.3 From ff9991499fb53575c45eb92cd064bcd7141bb572 Mon Sep 17 00:00:00 2001 From: Amritha Nambiar Date: Fri, 1 Dec 2023 15:28:51 -0800 Subject: netdev-genl: spec: Extend netdev netlink spec in YAML for NAPI Add support in netlink spec(netdev.yaml) for napi related information. Add code generated from the spec. Signed-off-by: Amritha Nambiar Reviewed-by: Sridhar Samudrala Link: https://lore.kernel.org/r/170147333119.5260.7050639053080529108.stgit@anambiarhost.jf.intel.com Signed-off-by: Jakub Kicinski --- Documentation/netlink/specs/netdev.yaml | 30 ++++++++ include/uapi/linux/netdev.h | 9 +++ net/core/netdev-genl-gen.c | 24 +++++++ net/core/netdev-genl-gen.h | 2 + net/core/netdev-genl.c | 10 +++ tools/include/uapi/linux/netdev.h | 9 +++ tools/net/ynl/generated/netdev-user.c | 124 ++++++++++++++++++++++++++++++++ tools/net/ynl/generated/netdev-user.h | 75 +++++++++++++++++++ 8 files changed, 283 insertions(+) (limited to 'tools') diff --git a/Documentation/netlink/specs/netdev.yaml b/Documentation/netlink/specs/netdev.yaml index 719e6aafbfdb..76d6b2e15b67 100644 --- a/Documentation/netlink/specs/netdev.yaml +++ b/Documentation/netlink/specs/netdev.yaml @@ -213,6 +213,19 @@ attribute-sets: name: recycle-released-refcnt type: uint + - + name: napi + attributes: + - + name: ifindex + doc: ifindex of the netdevice to which NAPI instance belongs. + type: u32 + checks: + min: 1 + - + name: id + doc: ID of the NAPI instance. + type: u32 - name: queue attributes: @@ -359,6 +372,23 @@ operations: attributes: - ifindex reply: *queue-get-op + - + name: napi-get + doc: Get information about NAPI instances configured on the system. + attribute-set: napi + do: + request: + attributes: + - id + reply: &napi-get-op + attributes: + - id + - ifindex + dump: + request: + attributes: + - ifindex + reply: *napi-get-op mcast-groups: list: diff --git a/include/uapi/linux/netdev.h b/include/uapi/linux/netdev.h index f857960a7f06..e7bdbcb01f22 100644 --- a/include/uapi/linux/netdev.h +++ b/include/uapi/linux/netdev.h @@ -109,6 +109,14 @@ enum { NETDEV_A_PAGE_POOL_STATS_MAX = (__NETDEV_A_PAGE_POOL_STATS_MAX - 1) }; +enum { + NETDEV_A_NAPI_IFINDEX = 1, + NETDEV_A_NAPI_ID, + + __NETDEV_A_NAPI_MAX, + NETDEV_A_NAPI_MAX = (__NETDEV_A_NAPI_MAX - 1) +}; + enum { NETDEV_A_QUEUE_ID = 1, NETDEV_A_QUEUE_IFINDEX, @@ -130,6 +138,7 @@ enum { NETDEV_CMD_PAGE_POOL_CHANGE_NTF, NETDEV_CMD_PAGE_POOL_STATS_GET, NETDEV_CMD_QUEUE_GET, + NETDEV_CMD_NAPI_GET, __NETDEV_CMD_MAX, NETDEV_CMD_MAX = (__NETDEV_CMD_MAX - 1) diff --git a/net/core/netdev-genl-gen.c b/net/core/netdev-genl-gen.c index b1dcf88c82cf..be7f2ebd61b2 100644 --- a/net/core/netdev-genl-gen.c +++ b/net/core/netdev-genl-gen.c @@ -58,6 +58,16 @@ static const struct nla_policy netdev_queue_get_dump_nl_policy[NETDEV_A_QUEUE_IF [NETDEV_A_QUEUE_IFINDEX] = NLA_POLICY_MIN(NLA_U32, 1), }; +/* NETDEV_CMD_NAPI_GET - do */ +static const struct nla_policy netdev_napi_get_do_nl_policy[NETDEV_A_NAPI_ID + 1] = { + [NETDEV_A_NAPI_ID] = { .type = NLA_U32, }, +}; + +/* NETDEV_CMD_NAPI_GET - dump */ +static const struct nla_policy netdev_napi_get_dump_nl_policy[NETDEV_A_NAPI_IFINDEX + 1] = { + [NETDEV_A_NAPI_IFINDEX] = NLA_POLICY_MIN(NLA_U32, 1), +}; + /* Ops table for netdev */ static const struct genl_split_ops netdev_nl_ops[] = { { @@ -114,6 +124,20 @@ static const struct genl_split_ops netdev_nl_ops[] = { .maxattr = NETDEV_A_QUEUE_IFINDEX, .flags = GENL_CMD_CAP_DUMP, }, + { + .cmd = NETDEV_CMD_NAPI_GET, + .doit = netdev_nl_napi_get_doit, + .policy = netdev_napi_get_do_nl_policy, + .maxattr = NETDEV_A_NAPI_ID, + .flags = GENL_CMD_CAP_DO, + }, + { + .cmd = NETDEV_CMD_NAPI_GET, + .dumpit = netdev_nl_napi_get_dumpit, + .policy = netdev_napi_get_dump_nl_policy, + .maxattr = NETDEV_A_NAPI_IFINDEX, + .flags = GENL_CMD_CAP_DUMP, + }, }; static const struct genl_multicast_group netdev_nl_mcgrps[] = { diff --git a/net/core/netdev-genl-gen.h b/net/core/netdev-genl-gen.h index 086623c1797a..a47f2bcbe4fa 100644 --- a/net/core/netdev-genl-gen.h +++ b/net/core/netdev-genl-gen.h @@ -26,6 +26,8 @@ int netdev_nl_page_pool_stats_get_dumpit(struct sk_buff *skb, int netdev_nl_queue_get_doit(struct sk_buff *skb, struct genl_info *info); int netdev_nl_queue_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb); +int netdev_nl_napi_get_doit(struct sk_buff *skb, struct genl_info *info); +int netdev_nl_napi_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb); enum { NETDEV_NLGRP_MGMT, diff --git a/net/core/netdev-genl.c b/net/core/netdev-genl.c index 3bc1661e6ebf..4c8ea6a4f015 100644 --- a/net/core/netdev-genl.c +++ b/net/core/netdev-genl.c @@ -155,6 +155,16 @@ int netdev_nl_dev_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb) return skb->len; } +int netdev_nl_napi_get_doit(struct sk_buff *skb, struct genl_info *info) +{ + return -EOPNOTSUPP; +} + +int netdev_nl_napi_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb) +{ + return -EOPNOTSUPP; +} + static int netdev_nl_queue_fill_one(struct sk_buff *rsp, struct net_device *netdev, u32 q_idx, u32 q_type, const struct genl_info *info) diff --git a/tools/include/uapi/linux/netdev.h b/tools/include/uapi/linux/netdev.h index f857960a7f06..e7bdbcb01f22 100644 --- a/tools/include/uapi/linux/netdev.h +++ b/tools/include/uapi/linux/netdev.h @@ -109,6 +109,14 @@ enum { NETDEV_A_PAGE_POOL_STATS_MAX = (__NETDEV_A_PAGE_POOL_STATS_MAX - 1) }; +enum { + NETDEV_A_NAPI_IFINDEX = 1, + NETDEV_A_NAPI_ID, + + __NETDEV_A_NAPI_MAX, + NETDEV_A_NAPI_MAX = (__NETDEV_A_NAPI_MAX - 1) +}; + enum { NETDEV_A_QUEUE_ID = 1, NETDEV_A_QUEUE_IFINDEX, @@ -130,6 +138,7 @@ enum { NETDEV_CMD_PAGE_POOL_CHANGE_NTF, NETDEV_CMD_PAGE_POOL_STATS_GET, NETDEV_CMD_QUEUE_GET, + NETDEV_CMD_NAPI_GET, __NETDEV_CMD_MAX, NETDEV_CMD_MAX = (__NETDEV_CMD_MAX - 1) diff --git a/tools/net/ynl/generated/netdev-user.c b/tools/net/ynl/generated/netdev-user.c index fbf7e24ade91..906b61554698 100644 --- a/tools/net/ynl/generated/netdev-user.c +++ b/tools/net/ynl/generated/netdev-user.c @@ -24,6 +24,7 @@ static const char * const netdev_op_strmap[] = { [NETDEV_CMD_PAGE_POOL_CHANGE_NTF] = "page-pool-change-ntf", [NETDEV_CMD_PAGE_POOL_STATS_GET] = "page-pool-stats-get", [NETDEV_CMD_QUEUE_GET] = "queue-get", + [NETDEV_CMD_NAPI_GET] = "napi-get", }; const char *netdev_op_str(int op) @@ -160,6 +161,16 @@ struct ynl_policy_nest netdev_queue_nest = { .table = netdev_queue_policy, }; +struct ynl_policy_attr netdev_napi_policy[NETDEV_A_NAPI_MAX + 1] = { + [NETDEV_A_NAPI_IFINDEX] = { .name = "ifindex", .type = YNL_PT_U32, }, + [NETDEV_A_NAPI_ID] = { .name = "id", .type = YNL_PT_U32, }, +}; + +struct ynl_policy_nest netdev_napi_nest = { + .max_attr = NETDEV_A_NAPI_MAX, + .table = netdev_napi_policy, +}; + /* Common nested types */ void netdev_page_pool_info_free(struct netdev_page_pool_info *obj) { @@ -770,6 +781,119 @@ free_list: return NULL; } +/* ============== NETDEV_CMD_NAPI_GET ============== */ +/* NETDEV_CMD_NAPI_GET - do */ +void netdev_napi_get_req_free(struct netdev_napi_get_req *req) +{ + free(req); +} + +void netdev_napi_get_rsp_free(struct netdev_napi_get_rsp *rsp) +{ + free(rsp); +} + +int netdev_napi_get_rsp_parse(const struct nlmsghdr *nlh, void *data) +{ + struct ynl_parse_arg *yarg = data; + struct netdev_napi_get_rsp *dst; + const struct nlattr *attr; + + dst = yarg->data; + + mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) { + unsigned int type = mnl_attr_get_type(attr); + + if (type == NETDEV_A_NAPI_ID) { + if (ynl_attr_validate(yarg, attr)) + return MNL_CB_ERROR; + dst->_present.id = 1; + dst->id = mnl_attr_get_u32(attr); + } else if (type == NETDEV_A_NAPI_IFINDEX) { + if (ynl_attr_validate(yarg, attr)) + return MNL_CB_ERROR; + dst->_present.ifindex = 1; + dst->ifindex = mnl_attr_get_u32(attr); + } + } + + return MNL_CB_OK; +} + +struct netdev_napi_get_rsp * +netdev_napi_get(struct ynl_sock *ys, struct netdev_napi_get_req *req) +{ + struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; + struct netdev_napi_get_rsp *rsp; + struct nlmsghdr *nlh; + int err; + + nlh = ynl_gemsg_start_req(ys, ys->family_id, NETDEV_CMD_NAPI_GET, 1); + ys->req_policy = &netdev_napi_nest; + yrs.yarg.rsp_policy = &netdev_napi_nest; + + if (req->_present.id) + mnl_attr_put_u32(nlh, NETDEV_A_NAPI_ID, req->id); + + rsp = calloc(1, sizeof(*rsp)); + yrs.yarg.data = rsp; + yrs.cb = netdev_napi_get_rsp_parse; + yrs.rsp_cmd = NETDEV_CMD_NAPI_GET; + + err = ynl_exec(ys, nlh, &yrs); + if (err < 0) + goto err_free; + + return rsp; + +err_free: + netdev_napi_get_rsp_free(rsp); + return NULL; +} + +/* NETDEV_CMD_NAPI_GET - dump */ +void netdev_napi_get_list_free(struct netdev_napi_get_list *rsp) +{ + struct netdev_napi_get_list *next = rsp; + + while ((void *)next != YNL_LIST_END) { + rsp = next; + next = rsp->next; + + free(rsp); + } +} + +struct netdev_napi_get_list * +netdev_napi_get_dump(struct ynl_sock *ys, struct netdev_napi_get_req_dump *req) +{ + struct ynl_dump_state yds = {}; + struct nlmsghdr *nlh; + int err; + + yds.ys = ys; + yds.alloc_sz = sizeof(struct netdev_napi_get_list); + yds.cb = netdev_napi_get_rsp_parse; + yds.rsp_cmd = NETDEV_CMD_NAPI_GET; + yds.rsp_policy = &netdev_napi_nest; + + nlh = ynl_gemsg_start_dump(ys, ys->family_id, NETDEV_CMD_NAPI_GET, 1); + ys->req_policy = &netdev_napi_nest; + + if (req->_present.ifindex) + mnl_attr_put_u32(nlh, NETDEV_A_NAPI_IFINDEX, req->ifindex); + + err = ynl_exec_dump(ys, nlh, &yds); + if (err < 0) + goto free_list; + + return yds.first; + +free_list: + netdev_napi_get_list_free(yds.first); + return NULL; +} + static const struct ynl_ntf_info netdev_ntf_info[] = { [NETDEV_CMD_DEV_ADD_NTF] = { .alloc_sz = sizeof(struct netdev_dev_get_ntf), diff --git a/tools/net/ynl/generated/netdev-user.h b/tools/net/ynl/generated/netdev-user.h index d7daf6df3df0..481c9e45b689 100644 --- a/tools/net/ynl/generated/netdev-user.h +++ b/tools/net/ynl/generated/netdev-user.h @@ -360,4 +360,79 @@ struct netdev_queue_get_list * netdev_queue_get_dump(struct ynl_sock *ys, struct netdev_queue_get_req_dump *req); +/* ============== NETDEV_CMD_NAPI_GET ============== */ +/* NETDEV_CMD_NAPI_GET - do */ +struct netdev_napi_get_req { + struct { + __u32 id:1; + } _present; + + __u32 id; +}; + +static inline struct netdev_napi_get_req *netdev_napi_get_req_alloc(void) +{ + return calloc(1, sizeof(struct netdev_napi_get_req)); +} +void netdev_napi_get_req_free(struct netdev_napi_get_req *req); + +static inline void +netdev_napi_get_req_set_id(struct netdev_napi_get_req *req, __u32 id) +{ + req->_present.id = 1; + req->id = id; +} + +struct netdev_napi_get_rsp { + struct { + __u32 id:1; + __u32 ifindex:1; + } _present; + + __u32 id; + __u32 ifindex; +}; + +void netdev_napi_get_rsp_free(struct netdev_napi_get_rsp *rsp); + +/* + * Get information about NAPI instances configured on the system. + */ +struct netdev_napi_get_rsp * +netdev_napi_get(struct ynl_sock *ys, struct netdev_napi_get_req *req); + +/* NETDEV_CMD_NAPI_GET - dump */ +struct netdev_napi_get_req_dump { + struct { + __u32 ifindex:1; + } _present; + + __u32 ifindex; +}; + +static inline struct netdev_napi_get_req_dump * +netdev_napi_get_req_dump_alloc(void) +{ + return calloc(1, sizeof(struct netdev_napi_get_req_dump)); +} +void netdev_napi_get_req_dump_free(struct netdev_napi_get_req_dump *req); + +static inline void +netdev_napi_get_req_dump_set_ifindex(struct netdev_napi_get_req_dump *req, + __u32 ifindex) +{ + req->_present.ifindex = 1; + req->ifindex = ifindex; +} + +struct netdev_napi_get_list { + struct netdev_napi_get_list *next; + struct netdev_napi_get_rsp obj __attribute__((aligned(8))); +}; + +void netdev_napi_get_list_free(struct netdev_napi_get_list *rsp); + +struct netdev_napi_get_list * +netdev_napi_get_dump(struct ynl_sock *ys, struct netdev_napi_get_req_dump *req); + #endif /* _LINUX_NETDEV_GEN_H */ -- cgit v1.2.3 From 5a5131d66fe02337de0b1b2e021b58f0f55c6df5 Mon Sep 17 00:00:00 2001 From: Amritha Nambiar Date: Fri, 1 Dec 2023 15:29:02 -0800 Subject: netdev-genl: spec: Add irq in netdev netlink YAML spec Add support in netlink spec(netdev.yaml) for interrupt number among the NAPI attributes. Add code generated from the spec. Signed-off-by: Amritha Nambiar Reviewed-by: Sridhar Samudrala Link: https://lore.kernel.org/r/170147334210.5260.18178387869057516983.stgit@anambiarhost.jf.intel.com Signed-off-by: Jakub Kicinski --- Documentation/netlink/specs/netdev.yaml | 5 +++++ include/uapi/linux/netdev.h | 1 + tools/include/uapi/linux/netdev.h | 1 + tools/net/ynl/generated/netdev-user.c | 6 ++++++ tools/net/ynl/generated/netdev-user.h | 2 ++ 5 files changed, 15 insertions(+) (limited to 'tools') diff --git a/Documentation/netlink/specs/netdev.yaml b/Documentation/netlink/specs/netdev.yaml index 76d6b2e15b67..a3a1c6ad521b 100644 --- a/Documentation/netlink/specs/netdev.yaml +++ b/Documentation/netlink/specs/netdev.yaml @@ -226,6 +226,10 @@ attribute-sets: name: id doc: ID of the NAPI instance. type: u32 + - + name: irq + doc: The associated interrupt vector number for the napi + type: u32 - name: queue attributes: @@ -384,6 +388,7 @@ operations: attributes: - id - ifindex + - irq dump: request: attributes: diff --git a/include/uapi/linux/netdev.h b/include/uapi/linux/netdev.h index e7bdbcb01f22..30fea409b71e 100644 --- a/include/uapi/linux/netdev.h +++ b/include/uapi/linux/netdev.h @@ -112,6 +112,7 @@ enum { enum { NETDEV_A_NAPI_IFINDEX = 1, NETDEV_A_NAPI_ID, + NETDEV_A_NAPI_IRQ, __NETDEV_A_NAPI_MAX, NETDEV_A_NAPI_MAX = (__NETDEV_A_NAPI_MAX - 1) diff --git a/tools/include/uapi/linux/netdev.h b/tools/include/uapi/linux/netdev.h index e7bdbcb01f22..30fea409b71e 100644 --- a/tools/include/uapi/linux/netdev.h +++ b/tools/include/uapi/linux/netdev.h @@ -112,6 +112,7 @@ enum { enum { NETDEV_A_NAPI_IFINDEX = 1, NETDEV_A_NAPI_ID, + NETDEV_A_NAPI_IRQ, __NETDEV_A_NAPI_MAX, NETDEV_A_NAPI_MAX = (__NETDEV_A_NAPI_MAX - 1) diff --git a/tools/net/ynl/generated/netdev-user.c b/tools/net/ynl/generated/netdev-user.c index 906b61554698..58e5196da4bd 100644 --- a/tools/net/ynl/generated/netdev-user.c +++ b/tools/net/ynl/generated/netdev-user.c @@ -164,6 +164,7 @@ struct ynl_policy_nest netdev_queue_nest = { struct ynl_policy_attr netdev_napi_policy[NETDEV_A_NAPI_MAX + 1] = { [NETDEV_A_NAPI_IFINDEX] = { .name = "ifindex", .type = YNL_PT_U32, }, [NETDEV_A_NAPI_ID] = { .name = "id", .type = YNL_PT_U32, }, + [NETDEV_A_NAPI_IRQ] = { .name = "irq", .type = YNL_PT_U32, }, }; struct ynl_policy_nest netdev_napi_nest = { @@ -210,6 +211,11 @@ int netdev_page_pool_info_parse(struct ynl_parse_arg *yarg, return MNL_CB_ERROR; dst->_present.ifindex = 1; dst->ifindex = mnl_attr_get_u32(attr); + } else if (type == NETDEV_A_NAPI_IRQ) { + if (ynl_attr_validate(yarg, attr)) + return MNL_CB_ERROR; + dst->_present.irq = 1; + dst->irq = mnl_attr_get_u32(attr); } } diff --git a/tools/net/ynl/generated/netdev-user.h b/tools/net/ynl/generated/netdev-user.h index 481c9e45b689..0c3224017c12 100644 --- a/tools/net/ynl/generated/netdev-user.h +++ b/tools/net/ynl/generated/netdev-user.h @@ -387,10 +387,12 @@ struct netdev_napi_get_rsp { struct { __u32 id:1; __u32 ifindex:1; + __u32 irq:1; } _present; __u32 id; __u32 ifindex; + __u32 irq; }; void netdev_napi_get_rsp_free(struct netdev_napi_get_rsp *rsp); -- cgit v1.2.3 From 8481a249a0eaf0000dbb18f7689ccd50ea9835cd Mon Sep 17 00:00:00 2001 From: Amritha Nambiar Date: Fri, 1 Dec 2023 15:29:13 -0800 Subject: netdev-genl: spec: Add PID in netdev netlink YAML spec Add support in netlink spec(netdev.yaml) for PID of the NAPI thread. Add code generated from the spec. Signed-off-by: Amritha Nambiar Reviewed-by: Sridhar Samudrala Link: https://lore.kernel.org/r/170147335301.5260.11872351477120434501.stgit@anambiarhost.jf.intel.com Signed-off-by: Jakub Kicinski --- Documentation/netlink/specs/netdev.yaml | 7 +++++++ include/uapi/linux/netdev.h | 1 + tools/include/uapi/linux/netdev.h | 1 + tools/net/ynl/generated/netdev-user.c | 6 ++++++ tools/net/ynl/generated/netdev-user.h | 2 ++ 5 files changed, 17 insertions(+) (limited to 'tools') diff --git a/Documentation/netlink/specs/netdev.yaml b/Documentation/netlink/specs/netdev.yaml index a3a1c6ad521b..f2c76d103bd8 100644 --- a/Documentation/netlink/specs/netdev.yaml +++ b/Documentation/netlink/specs/netdev.yaml @@ -230,6 +230,12 @@ attribute-sets: name: irq doc: The associated interrupt vector number for the napi type: u32 + - + name: pid + doc: PID of the napi thread, if NAPI is configured to operate in + threaded mode. If NAPI is not in threaded mode (i.e. uses normal + softirq context), the attribute will be absent. + type: u32 - name: queue attributes: @@ -389,6 +395,7 @@ operations: - id - ifindex - irq + - pid dump: request: attributes: diff --git a/include/uapi/linux/netdev.h b/include/uapi/linux/netdev.h index 30fea409b71e..424c5e28f495 100644 --- a/include/uapi/linux/netdev.h +++ b/include/uapi/linux/netdev.h @@ -113,6 +113,7 @@ enum { NETDEV_A_NAPI_IFINDEX = 1, NETDEV_A_NAPI_ID, NETDEV_A_NAPI_IRQ, + NETDEV_A_NAPI_PID, __NETDEV_A_NAPI_MAX, NETDEV_A_NAPI_MAX = (__NETDEV_A_NAPI_MAX - 1) diff --git a/tools/include/uapi/linux/netdev.h b/tools/include/uapi/linux/netdev.h index 30fea409b71e..424c5e28f495 100644 --- a/tools/include/uapi/linux/netdev.h +++ b/tools/include/uapi/linux/netdev.h @@ -113,6 +113,7 @@ enum { NETDEV_A_NAPI_IFINDEX = 1, NETDEV_A_NAPI_ID, NETDEV_A_NAPI_IRQ, + NETDEV_A_NAPI_PID, __NETDEV_A_NAPI_MAX, NETDEV_A_NAPI_MAX = (__NETDEV_A_NAPI_MAX - 1) diff --git a/tools/net/ynl/generated/netdev-user.c b/tools/net/ynl/generated/netdev-user.c index 58e5196da4bd..ed8bcb855a1d 100644 --- a/tools/net/ynl/generated/netdev-user.c +++ b/tools/net/ynl/generated/netdev-user.c @@ -165,6 +165,7 @@ struct ynl_policy_attr netdev_napi_policy[NETDEV_A_NAPI_MAX + 1] = { [NETDEV_A_NAPI_IFINDEX] = { .name = "ifindex", .type = YNL_PT_U32, }, [NETDEV_A_NAPI_ID] = { .name = "id", .type = YNL_PT_U32, }, [NETDEV_A_NAPI_IRQ] = { .name = "irq", .type = YNL_PT_U32, }, + [NETDEV_A_NAPI_PID] = { .name = "pid", .type = YNL_PT_U32, }, }; struct ynl_policy_nest netdev_napi_nest = { @@ -216,6 +217,11 @@ int netdev_page_pool_info_parse(struct ynl_parse_arg *yarg, return MNL_CB_ERROR; dst->_present.irq = 1; dst->irq = mnl_attr_get_u32(attr); + } else if (type == NETDEV_A_NAPI_PID) { + if (ynl_attr_validate(yarg, attr)) + return MNL_CB_ERROR; + dst->_present.pid = 1; + dst->pid = mnl_attr_get_u32(attr); } } diff --git a/tools/net/ynl/generated/netdev-user.h b/tools/net/ynl/generated/netdev-user.h index 0c3224017c12..3830cf2ab6b8 100644 --- a/tools/net/ynl/generated/netdev-user.h +++ b/tools/net/ynl/generated/netdev-user.h @@ -388,11 +388,13 @@ struct netdev_napi_get_rsp { __u32 id:1; __u32 ifindex:1; __u32 irq:1; + __u32 pid:1; } _present; __u32 id; __u32 ifindex; __u32 irq; + __u32 pid; }; void netdev_napi_get_rsp_free(struct netdev_napi_get_rsp *rsp); -- cgit v1.2.3 From 25ae948b447881bf689d459cd5bd4629d9c04b20 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Sat, 2 Dec 2023 10:00:57 +0800 Subject: selftests/net: add lib.sh Add a lib.sh for net selftests. This file can be used to define commonly used variables and functions. Some commonly used functions can be moved from forwarding/lib.sh to this lib file. e.g. busywait(). Add function setup_ns() for user to create unique namespaces with given prefix name. Reviewed-by: Petr Machata Signed-off-by: Hangbin Liu Signed-off-by: Paolo Abeni --- tools/testing/selftests/net/Makefile | 2 +- tools/testing/selftests/net/forwarding/lib.sh | 27 +-------- tools/testing/selftests/net/lib.sh | 85 +++++++++++++++++++++++++++ 3 files changed, 87 insertions(+), 27 deletions(-) create mode 100644 tools/testing/selftests/net/lib.sh (limited to 'tools') diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index 9274edfb76ff..14bd68da7466 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -54,7 +54,7 @@ TEST_PROGS += ip_local_port_range.sh TEST_PROGS += rps_default_mask.sh TEST_PROGS += big_tcp.sh TEST_PROGS_EXTENDED := in_netns.sh setup_loopback.sh setup_veth.sh -TEST_PROGS_EXTENDED += toeplitz_client.sh toeplitz.sh +TEST_PROGS_EXTENDED += toeplitz_client.sh toeplitz.sh lib.sh TEST_GEN_FILES = socket nettest TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy reuseport_addr_any TEST_GEN_FILES += tcp_mmap tcp_inq psock_snd txring_overwrite diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh index e37a15eda6c2..8f6ca458af9a 100755 --- a/tools/testing/selftests/net/forwarding/lib.sh +++ b/tools/testing/selftests/net/forwarding/lib.sh @@ -4,9 +4,6 @@ ############################################################################## # Defines -# Kselftest framework requirement - SKIP code is 4. -ksft_skip=4 - # Can be overridden by the configuration file. PING=${PING:=ping} PING6=${PING6:=ping6} @@ -41,6 +38,7 @@ if [[ -f $relative_path/forwarding.config ]]; then source "$relative_path/forwarding.config" fi +source ../lib.sh ############################################################################## # Sanity checks @@ -395,29 +393,6 @@ log_info() echo "INFO: $msg" } -busywait() -{ - local timeout=$1; shift - - local start_time="$(date -u +%s%3N)" - while true - do - local out - out=$("$@") - local ret=$? - if ((!ret)); then - echo -n "$out" - return 0 - fi - - local current_time="$(date -u +%s%3N)" - if ((current_time - start_time > timeout)); then - echo -n "$out" - return 1 - fi - done -} - not() { "$@" diff --git a/tools/testing/selftests/net/lib.sh b/tools/testing/selftests/net/lib.sh new file mode 100644 index 000000000000..518eca57b815 --- /dev/null +++ b/tools/testing/selftests/net/lib.sh @@ -0,0 +1,85 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +############################################################################## +# Defines + +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 + +############################################################################## +# Helpers +busywait() +{ + local timeout=$1; shift + + local start_time="$(date -u +%s%3N)" + while true + do + local out + out=$("$@") + local ret=$? + if ((!ret)); then + echo -n "$out" + return 0 + fi + + local current_time="$(date -u +%s%3N)" + if ((current_time - start_time > timeout)); then + echo -n "$out" + return 1 + fi + done +} + +cleanup_ns() +{ + local ns="" + local errexit=0 + local ret=0 + + # disable errexit temporary + if [[ $- =~ "e" ]]; then + errexit=1 + set +e + fi + + for ns in "$@"; do + ip netns delete "${ns}" &> /dev/null + if ! busywait 2 ip netns list \| grep -vq "^$ns$" &> /dev/null; then + echo "Warn: Failed to remove namespace $ns" + ret=1 + fi + done + + [ $errexit -eq 1 ] && set -e + return $ret +} + +# setup netns with given names as prefix. e.g +# setup_ns local remote +setup_ns() +{ + local ns="" + local ns_name="" + local ns_list="" + for ns_name in "$@"; do + # Some test may setup/remove same netns multi times + if unset ${ns_name} 2> /dev/null; then + ns="${ns_name,,}-$(mktemp -u XXXXXX)" + eval readonly ${ns_name}="$ns" + else + eval ns='$'${ns_name} + cleanup_ns "$ns" + + fi + + if ! ip netns add "$ns"; then + echo "Failed to create namespace $ns_name" + cleanup_ns "$ns_list" + return $ksft_skip + fi + ip -n "$ns" link set lo up + ns_list="$ns_list $ns" + done +} -- cgit v1.2.3 From 64227511ad57796fac514ad8df6f2830cc887563 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Sat, 2 Dec 2023 10:00:58 +0800 Subject: selftests/net: convert arp_ndisc_evict_nocarrier.sh to run it in unique namespace Here is the test result after conversion. ]# ./arp_ndisc_evict_nocarrier.sh run arp_evict_nocarrier=1 test ok run arp_evict_nocarrier=0 test ok run all.arp_evict_nocarrier=0 test ok run ndisc_evict_nocarrier=1 test ok run ndisc_evict_nocarrier=0 test ok run all.ndisc_evict_nocarrier=0 test ok Acked-by: David Ahern Signed-off-by: Hangbin Liu Signed-off-by: Paolo Abeni --- .../selftests/net/arp_ndisc_evict_nocarrier.sh | 46 ++++++++-------------- 1 file changed, 16 insertions(+), 30 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/net/arp_ndisc_evict_nocarrier.sh b/tools/testing/selftests/net/arp_ndisc_evict_nocarrier.sh index 4a110bb01e53..92eb880c52f2 100755 --- a/tools/testing/selftests/net/arp_ndisc_evict_nocarrier.sh +++ b/tools/testing/selftests/net/arp_ndisc_evict_nocarrier.sh @@ -12,7 +12,8 @@ # {arp,ndisc}_evict_nocarrer=0 should still contain the single ARP/ND entry # -readonly PEER_NS="ns-peer-$(mktemp -u XXXXXX)" +source lib.sh + readonly V4_ADDR0=10.0.10.1 readonly V4_ADDR1=10.0.10.2 readonly V6_ADDR0=2001:db8:91::1 @@ -22,43 +23,29 @@ ret=0 cleanup_v6() { - ip netns del me - ip netns del peer + cleanup_ns ${me} ${peer} sysctl -w net.ipv6.conf.veth1.ndisc_evict_nocarrier=1 >/dev/null 2>&1 sysctl -w net.ipv6.conf.all.ndisc_evict_nocarrier=1 >/dev/null 2>&1 } -create_ns() -{ - local n=${1} - - ip netns del ${n} 2>/dev/null - - ip netns add ${n} - ip netns set ${n} $((nsid++)) - ip -netns ${n} link set lo up -} - - setup_v6() { - create_ns me - create_ns peer + setup_ns me peer - IP="ip -netns me" + IP="ip -netns ${me}" $IP li add veth1 type veth peer name veth2 $IP li set veth1 up $IP -6 addr add $V6_ADDR0/64 dev veth1 nodad - $IP li set veth2 netns peer up - ip -netns peer -6 addr add $V6_ADDR1/64 dev veth2 nodad + $IP li set veth2 netns ${peer} up + ip -netns ${peer} -6 addr add $V6_ADDR1/64 dev veth2 nodad - ip netns exec me sysctl -w $1 >/dev/null 2>&1 + ip netns exec ${me} sysctl -w $1 >/dev/null 2>&1 # Establish an ND cache entry - ip netns exec me ping -6 -c1 -Iveth1 $V6_ADDR1 >/dev/null 2>&1 + ip netns exec ${me} ping -6 -c1 -Iveth1 $V6_ADDR1 >/dev/null 2>&1 # Should have the veth1 entry in ND table - ip netns exec me ip -6 neigh get $V6_ADDR1 dev veth1 >/dev/null 2>&1 + ip netns exec ${me} ip -6 neigh get $V6_ADDR1 dev veth1 >/dev/null 2>&1 if [ $? -ne 0 ]; then cleanup_v6 echo "failed" @@ -66,11 +53,11 @@ setup_v6() { fi # Set veth2 down, which will put veth1 in NOCARRIER state - ip netns exec peer ip link set veth2 down + ip netns exec ${peer} ip link set veth2 down } setup_v4() { - ip netns add "${PEER_NS}" + setup_ns PEER_NS ip link add name veth0 type veth peer name veth1 ip link set dev veth0 up ip link set dev veth1 netns "${PEER_NS}" @@ -99,8 +86,7 @@ setup_v4() { cleanup_v4() { ip neigh flush dev veth0 ip link del veth0 - local -r ns="$(ip netns list|grep $PEER_NS)" - [ -n "$ns" ] && ip netns del $ns 2>/dev/null + cleanup_ns $PEER_NS sysctl -w net.ipv4.conf.veth0.arp_evict_nocarrier=1 >/dev/null 2>&1 sysctl -w net.ipv4.conf.all.arp_evict_nocarrier=1 >/dev/null 2>&1 @@ -163,7 +149,7 @@ run_ndisc_evict_nocarrier_enabled() { setup_v6 "net.ipv6.conf.veth1.ndisc_evict_nocarrier=1" - ip netns exec me ip -6 neigh get $V6_ADDR1 dev veth1 >/dev/null 2>&1 + ip netns exec ${me} ip -6 neigh get $V6_ADDR1 dev veth1 >/dev/null 2>&1 if [ $? -eq 0 ];then echo "failed" @@ -180,7 +166,7 @@ run_ndisc_evict_nocarrier_disabled() { setup_v6 "net.ipv6.conf.veth1.ndisc_evict_nocarrier=0" - ip netns exec me ip -6 neigh get $V6_ADDR1 dev veth1 >/dev/null 2>&1 + ip netns exec ${me} ip -6 neigh get $V6_ADDR1 dev veth1 >/dev/null 2>&1 if [ $? -eq 0 ];then echo "ok" @@ -197,7 +183,7 @@ run_ndisc_evict_nocarrier_disabled_all() { setup_v6 "net.ipv6.conf.all.ndisc_evict_nocarrier=0" - ip netns exec me ip -6 neigh get $V6_ADDR1 dev veth1 >/dev/null 2>&1 + ip netns exec ${me} ip -6 neigh get $V6_ADDR1 dev veth1 >/dev/null 2>&1 if [ $? -eq 0 ];then echo "ok" -- cgit v1.2.3 From 7f770d28f2e5abfd442ad689ba1129dd66593529 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Sat, 2 Dec 2023 10:00:59 +0800 Subject: selftests/net: specify the interface when do arping When do arping, the interface need to be specified. Or we will get error: Interface "lo" is not ARPable. And the test failed. ]# ./arp_ndisc_untracked_subnets.sh TEST: test_arp: accept_arp=0 [ OK ] TEST: test_arp: accept_arp=1 [FAIL] TEST: test_arp: accept_arp=2 same_subnet=0 [ OK ] TEST: test_arp: accept_arp=2 same_subnet=1 [FAIL] After fix: ]# ./arp_ndisc_untracked_subnets.sh TEST: test_arp: accept_arp=0 [ OK ] TEST: test_arp: accept_arp=1 [ OK ] TEST: test_arp: accept_arp=2 same_subnet=0 [ OK ] TEST: test_arp: accept_arp=2 same_subnet=1 [ OK ] Fixes: 0ea7b0a454ca ("selftests: net: arp_ndisc_untracked_subnets: test for arp_accept and accept_untracked_na") Signed-off-by: Hangbin Liu Signed-off-by: Paolo Abeni --- tools/testing/selftests/net/arp_ndisc_untracked_subnets.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/net/arp_ndisc_untracked_subnets.sh b/tools/testing/selftests/net/arp_ndisc_untracked_subnets.sh index c899b446acb6..327427ec10f5 100755 --- a/tools/testing/selftests/net/arp_ndisc_untracked_subnets.sh +++ b/tools/testing/selftests/net/arp_ndisc_untracked_subnets.sh @@ -150,7 +150,7 @@ arp_test_gratuitous() { fi # Supply arp_accept option to set up which sets it in sysctl setup ${arp_accept} - ip netns exec ${HOST_NS} arping -A -U ${HOST_ADDR} -c1 2>&1 >/dev/null + ip netns exec ${HOST_NS} arping -A -I ${HOST_INTF} -U ${HOST_ADDR} -c1 2>&1 >/dev/null if verify_arp $1 $2; then printf " TEST: %-60s [ OK ]\n" "${test_msg[*]}" -- cgit v1.2.3 From 3a0f3367006f7cb4203e8eecc77ce7d63190a1df Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Sat, 2 Dec 2023 10:01:00 +0800 Subject: selftests/net: convert arp_ndisc_untracked_subnets.sh to run it in unique namespace Here is the test result after conversion. 2 tests also failed without this patch ]# ./arp_ndisc_untracked_subnets.sh TEST: test_arp: accept_arp=0 [ OK ] TEST: test_arp: accept_arp=1 [ OK ] TEST: test_arp: accept_arp=2 same_subnet=0 [ OK ] TEST: test_arp: accept_arp=2 same_subnet=1 [ OK ] TEST: test_ndisc: accept_untracked_na=0 [ OK ] TEST: test_ndisc: accept_untracked_na=1 [ OK ] TEST: test_ndisc: accept_untracked_na=2 same_subnet=0 [ OK ] TEST: test_ndisc: accept_untracked_na=2 same_subnet=1 [ OK ] Acked-by: David Ahern Signed-off-by: Hangbin Liu Signed-off-by: Paolo Abeni --- .../selftests/net/arp_ndisc_untracked_subnets.sh | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/net/arp_ndisc_untracked_subnets.sh b/tools/testing/selftests/net/arp_ndisc_untracked_subnets.sh index 327427ec10f5..a40c0e9bd023 100755 --- a/tools/testing/selftests/net/arp_ndisc_untracked_subnets.sh +++ b/tools/testing/selftests/net/arp_ndisc_untracked_subnets.sh @@ -5,16 +5,14 @@ # garp to the router. Router accepts or ignores based on its arp_accept # or accept_untracked_na configuration. +source lib.sh + TESTS="arp ndisc" -ROUTER_NS="ns-router" -ROUTER_NS_V6="ns-router-v6" ROUTER_INTF="veth-router" ROUTER_ADDR="10.0.10.1" ROUTER_ADDR_V6="2001:db8:abcd:0012::1" -HOST_NS="ns-host" -HOST_NS_V6="ns-host-v6" HOST_INTF="veth-host" HOST_ADDR="10.0.10.2" HOST_ADDR_V6="2001:db8:abcd:0012::2" @@ -23,13 +21,11 @@ SUBNET_WIDTH=24 PREFIX_WIDTH_V6=64 cleanup() { - ip netns del ${HOST_NS} - ip netns del ${ROUTER_NS} + cleanup_ns ${HOST_NS} ${ROUTER_NS} } cleanup_v6() { - ip netns del ${HOST_NS_V6} - ip netns del ${ROUTER_NS_V6} + cleanup_ns ${HOST_NS_V6} ${ROUTER_NS_V6} } setup() { @@ -37,8 +33,7 @@ setup() { local arp_accept=$1 # Set up two namespaces - ip netns add ${ROUTER_NS} - ip netns add ${HOST_NS} + setup_ns HOST_NS ROUTER_NS # Set up interfaces veth0 and veth1, which are pairs in separate # namespaces. veth0 is veth-router, veth1 is veth-host. @@ -72,8 +67,7 @@ setup_v6() { local accept_untracked_na=$1 # Set up two namespaces - ip netns add ${ROUTER_NS_V6} - ip netns add ${HOST_NS_V6} + setup_ns HOST_NS_V6 ROUTER_NS_V6 # Set up interfaces veth0 and veth1, which are pairs in separate # namespaces. veth0 is veth-router, veth1 is veth-host. -- cgit v1.2.3 From 7c16d485fec5d0010b992e75ad996e7382950879 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Sat, 2 Dec 2023 10:01:01 +0800 Subject: selftests/net: convert cmsg tests to make them run in unique namespace Here is the test result after conversion. ]# ./cmsg_ipv6.sh OK ]# ./cmsg_so_mark.sh OK ]# ./cmsg_time.sh OK Acked-by: David Ahern Signed-off-by: Hangbin Liu Signed-off-by: Paolo Abeni --- tools/testing/selftests/net/cmsg_ipv6.sh | 10 ++++------ tools/testing/selftests/net/cmsg_so_mark.sh | 7 ++++--- tools/testing/selftests/net/cmsg_time.sh | 7 ++++--- 3 files changed, 12 insertions(+), 12 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/net/cmsg_ipv6.sh b/tools/testing/selftests/net/cmsg_ipv6.sh index 330d0b1ceced..f30bd57d5e38 100755 --- a/tools/testing/selftests/net/cmsg_ipv6.sh +++ b/tools/testing/selftests/net/cmsg_ipv6.sh @@ -1,9 +1,8 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 -ksft_skip=4 +source lib.sh -NS=ns IP6=2001:db8:1::1/64 TGT6=2001:db8:1::2 TMPF=$(mktemp --suffix ".pcap") @@ -11,13 +10,11 @@ TMPF=$(mktemp --suffix ".pcap") cleanup() { rm -f $TMPF - ip netns del $NS + cleanup_ns $NS } trap cleanup EXIT -NSEXE="ip netns exec $NS" - tcpdump -h | grep immediate-mode >> /dev/null if [ $? -ne 0 ]; then echo "SKIP - tcpdump with --immediate-mode option required" @@ -25,7 +22,8 @@ if [ $? -ne 0 ]; then fi # Namespaces -ip netns add $NS +setup_ns NS +NSEXE="ip netns exec $NS" $NSEXE sysctl -w net.ipv4.ping_group_range='0 2147483647' > /dev/null diff --git a/tools/testing/selftests/net/cmsg_so_mark.sh b/tools/testing/selftests/net/cmsg_so_mark.sh index 1650b8622f2f..772ad0cc2630 100755 --- a/tools/testing/selftests/net/cmsg_so_mark.sh +++ b/tools/testing/selftests/net/cmsg_so_mark.sh @@ -1,7 +1,8 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 -NS=ns +source lib.sh + IP4=172.16.0.1/24 TGT4=172.16.0.2 IP6=2001:db8:1::1/64 @@ -10,13 +11,13 @@ MARK=1000 cleanup() { - ip netns del $NS + cleanup_ns $NS } trap cleanup EXIT # Namespaces -ip netns add $NS +setup_ns NS ip netns exec $NS sysctl -w net.ipv4.ping_group_range='0 2147483647' > /dev/null diff --git a/tools/testing/selftests/net/cmsg_time.sh b/tools/testing/selftests/net/cmsg_time.sh index 91161e1da734..af85267ad1e3 100755 --- a/tools/testing/selftests/net/cmsg_time.sh +++ b/tools/testing/selftests/net/cmsg_time.sh @@ -1,7 +1,8 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 -NS=ns +source lib.sh + IP4=172.16.0.1/24 TGT4=172.16.0.2 IP6=2001:db8:1::1/64 @@ -9,13 +10,13 @@ TGT6=2001:db8:1::2 cleanup() { - ip netns del $NS + cleanup_ns $NS } trap cleanup EXIT # Namespaces -ip netns add $NS +setup_ns NS ip netns exec $NS sysctl -w net.ipv4.ping_group_range='0 2147483647' > /dev/null -- cgit v1.2.3 From 0d8b488792e4f2e26f54248f8a0380b73e1ff992 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Sat, 2 Dec 2023 10:01:02 +0800 Subject: selftests/net: convert drop_monitor_tests.sh to run it in unique namespace Here is the test result after conversion. ]# ./drop_monitor_tests.sh Software drops test TEST: Capturing active software drops [ OK ] TEST: Capturing inactive software drops [ OK ] Hardware drops test TEST: Capturing active hardware drops [ OK ] TEST: Capturing inactive hardware drops [ OK ] Tests passed: 4 Tests failed: 0 Acked-by: David Ahern Signed-off-by: Hangbin Liu Signed-off-by: Paolo Abeni --- tools/testing/selftests/net/drop_monitor_tests.sh | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/net/drop_monitor_tests.sh b/tools/testing/selftests/net/drop_monitor_tests.sh index b7650e30d18b..7c4818c971fc 100755 --- a/tools/testing/selftests/net/drop_monitor_tests.sh +++ b/tools/testing/selftests/net/drop_monitor_tests.sh @@ -2,10 +2,8 @@ # SPDX-License-Identifier: GPL-2.0 # This test is for checking drop monitor functionality. - +source lib.sh ret=0 -# Kselftest framework requirement - SKIP code is 4. -ksft_skip=4 # all tests in this script. Can be overridden with -t option TESTS=" @@ -13,10 +11,6 @@ TESTS=" hw_drops " -IP="ip -netns ns1" -TC="tc -netns ns1" -DEVLINK="devlink -N ns1" -NS_EXEC="ip netns exec ns1" NETDEVSIM_PATH=/sys/bus/netdevsim/ DEV_ADDR=1337 DEV=netdevsim${DEV_ADDR} @@ -43,7 +37,7 @@ setup() modprobe netdevsim &> /dev/null set -e - ip netns add ns1 + setup_ns NS1 $IP link add dummy10 up type dummy $NS_EXEC echo "$DEV_ADDR 1" > ${NETDEVSIM_PATH}/new_device @@ -57,7 +51,7 @@ setup() cleanup() { $NS_EXEC echo "$DEV_ADDR" > ${NETDEVSIM_PATH}/del_device - ip netns del ns1 + cleanup_ns ${NS1} } sw_drops_test() @@ -194,8 +188,15 @@ if [ $? -ne 0 ]; then exit $ksft_skip fi -# start clean +# create netns first so we can get the namespace name +setup_ns NS1 cleanup &> /dev/null +trap cleanup EXIT + +IP="ip -netns ${NS1}" +TC="tc -netns ${NS1}" +DEVLINK="devlink -N ${NS1}" +NS_EXEC="ip netns exec ${NS1}" for t in $TESTS do -- cgit v1.2.3 From baf37f213c88ae9e620195f34509a9a4be7ffccd Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Sat, 2 Dec 2023 10:01:03 +0800 Subject: selftests/net: convert traceroute.sh to run it in unique namespace Here is the test result after conversion. ]# ./traceroute.sh TEST: IPV6 traceroute [ OK ] TEST: IPV4 traceroute [ OK ] Tests passed: 2 Tests failed: 0 Acked-by: David Ahern Signed-off-by: Hangbin Liu Signed-off-by: Paolo Abeni --- tools/testing/selftests/net/traceroute.sh | 82 ++++++++++++++----------------- 1 file changed, 36 insertions(+), 46 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/net/traceroute.sh b/tools/testing/selftests/net/traceroute.sh index de9ca97abc30..282f14760940 100755 --- a/tools/testing/selftests/net/traceroute.sh +++ b/tools/testing/selftests/net/traceroute.sh @@ -4,6 +4,7 @@ # Run traceroute/traceroute6 tests # +source lib.sh VERBOSE=0 PAUSE_ON_FAIL=no @@ -69,9 +70,6 @@ create_ns() [ -z "${addr}" ] && addr="-" [ -z "${addr6}" ] && addr6="-" - ip netns add ${ns} - - ip netns exec ${ns} ip link set lo up if [ "${addr}" != "-" ]; then ip netns exec ${ns} ip addr add dev lo ${addr} fi @@ -160,12 +158,7 @@ connect_ns() cleanup_traceroute6() { - local ns - - for ns in host-1 host-2 router-1 router-2 - do - ip netns del ${ns} 2>/dev/null - done + cleanup_ns $h1 $h2 $r1 $r2 } setup_traceroute6() @@ -176,33 +169,34 @@ setup_traceroute6() cleanup_traceroute6 set -e - create_ns host-1 - create_ns host-2 - create_ns router-1 - create_ns router-2 + setup_ns h1 h2 r1 r2 + create_ns $h1 + create_ns $h2 + create_ns $r1 + create_ns $r2 # Setup N3 - connect_ns router-2 eth3 - 2000:103::2/64 host-2 eth3 - 2000:103::4/64 - ip netns exec host-2 ip route add default via 2000:103::2 + connect_ns $r2 eth3 - 2000:103::2/64 $h2 eth3 - 2000:103::4/64 + ip netns exec $h2 ip route add default via 2000:103::2 # Setup N2 - connect_ns router-1 eth2 - 2000:102::1/64 router-2 eth2 - 2000:102::2/64 - ip netns exec router-1 ip route add default via 2000:102::2 + connect_ns $r1 eth2 - 2000:102::1/64 $r2 eth2 - 2000:102::2/64 + ip netns exec $r1 ip route add default via 2000:102::2 # Setup N1. host-1 and router-2 connect to a bridge in router-1. - ip netns exec router-1 ip link add name ${brdev} type bridge - ip netns exec router-1 ip link set ${brdev} up - ip netns exec router-1 ip addr add 2000:101::1/64 dev ${brdev} + ip netns exec $r1 ip link add name ${brdev} type bridge + ip netns exec $r1 ip link set ${brdev} up + ip netns exec $r1 ip addr add 2000:101::1/64 dev ${brdev} - connect_ns host-1 eth0 - 2000:101::3/64 router-1 eth0 - - - ip netns exec router-1 ip link set dev eth0 master ${brdev} - ip netns exec host-1 ip route add default via 2000:101::1 + connect_ns $h1 eth0 - 2000:101::3/64 $r1 eth0 - - + ip netns exec $r1 ip link set dev eth0 master ${brdev} + ip netns exec $h1 ip route add default via 2000:101::1 - connect_ns router-2 eth1 - 2000:101::2/64 router-1 eth1 - - - ip netns exec router-1 ip link set dev eth1 master ${brdev} + connect_ns $r2 eth1 - 2000:101::2/64 $r1 eth1 - - + ip netns exec $r1 ip link set dev eth1 master ${brdev} # Prime the network - ip netns exec host-1 ping6 -c5 2000:103::4 >/dev/null 2>&1 + ip netns exec $h1 ping6 -c5 2000:103::4 >/dev/null 2>&1 set +e } @@ -217,7 +211,7 @@ run_traceroute6() setup_traceroute6 # traceroute6 host-2 from host-1 (expects 2000:102::2) - run_cmd host-1 "traceroute6 2000:103::4 | grep -q 2000:102::2" + run_cmd $h1 "traceroute6 2000:103::4 | grep -q 2000:102::2" log_test $? 0 "IPV6 traceroute" cleanup_traceroute6 @@ -240,12 +234,7 @@ run_traceroute6() cleanup_traceroute() { - local ns - - for ns in host-1 host-2 router - do - ip netns del ${ns} 2>/dev/null - done + cleanup_ns $h1 $h2 $router } setup_traceroute() @@ -254,24 +243,25 @@ setup_traceroute() cleanup_traceroute set -e - create_ns host-1 - create_ns host-2 - create_ns router + setup_ns h1 h2 router + create_ns $h1 + create_ns $h2 + create_ns $router - connect_ns host-1 eth0 1.0.1.3/24 - \ - router eth1 1.0.3.1/24 - - ip netns exec host-1 ip route add default via 1.0.1.1 + connect_ns $h1 eth0 1.0.1.3/24 - \ + $router eth1 1.0.3.1/24 - + ip netns exec $h1 ip route add default via 1.0.1.1 - ip netns exec router ip addr add 1.0.1.1/24 dev eth1 - ip netns exec router sysctl -qw \ + ip netns exec $router ip addr add 1.0.1.1/24 dev eth1 + ip netns exec $router sysctl -qw \ net.ipv4.icmp_errors_use_inbound_ifaddr=1 - connect_ns host-2 eth0 1.0.2.4/24 - \ - router eth2 1.0.2.1/24 - - ip netns exec host-2 ip route add default via 1.0.2.1 + connect_ns $h2 eth0 1.0.2.4/24 - \ + $router eth2 1.0.2.1/24 - + ip netns exec $h2 ip route add default via 1.0.2.1 # Prime the network - ip netns exec host-1 ping -c5 1.0.2.4 >/dev/null 2>&1 + ip netns exec $h1 ping -c5 1.0.2.4 >/dev/null 2>&1 set +e } @@ -286,7 +276,7 @@ run_traceroute() setup_traceroute # traceroute host-2 from host-1 (expects 1.0.1.1). Takes a while. - run_cmd host-1 "traceroute 1.0.2.4 | grep -q 1.0.1.1" + run_cmd $h1 "traceroute 1.0.2.4 | grep -q 1.0.1.1" log_test $? 0 "IPV4 traceroute" cleanup_traceroute -- cgit v1.2.3 From c1516b3563aca82a35277dc8999370868cf20175 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Sat, 2 Dec 2023 10:01:04 +0800 Subject: selftests/net: convert icmp_redirect.sh to run it in unique namespace Here is the test result after conversion. # ./icmp_redirect.sh ########################################################################### Legacy routing ########################################################################### TEST: IPv4: redirect exception [ OK ] ... TEST: IPv4: mtu exception plus redirect [ OK ] TEST: IPv6: mtu exception plus redirect [ OK ] Tests passed: 40 Tests failed: 0 Tests xfailed: 0 Acked-by: David Ahern Signed-off-by: Hangbin Liu Signed-off-by: Paolo Abeni --- tools/testing/selftests/net/icmp_redirect.sh | 182 +++++++++++++-------------- 1 file changed, 88 insertions(+), 94 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/net/icmp_redirect.sh b/tools/testing/selftests/net/icmp_redirect.sh index 7b9d6e31b8e7..d6f0e449c029 100755 --- a/tools/testing/selftests/net/icmp_redirect.sh +++ b/tools/testing/selftests/net/icmp_redirect.sh @@ -19,6 +19,7 @@ # Route on r1 changed to go to r2 via eth0. This causes a redirect to be sent # from r1 to h1 telling h1 to use r2 when talking to h2. +source lib.sh VERBOSE=0 PAUSE_ON_FAIL=no @@ -140,11 +141,7 @@ get_linklocal() cleanup() { - local ns - - for ns in h1 h2 r1 r2; do - ip netns del $ns 2>/dev/null - done + cleanup_ns $h1 $h2 $r1 $r2 } create_vrf() @@ -171,102 +168,99 @@ setup() # # create nodes as namespaces - # - for ns in h1 h2 r1 r2; do - ip netns add $ns - ip -netns $ns li set lo up - - case "${ns}" in - h[12]) ip netns exec $ns sysctl -q -w net.ipv4.conf.all.accept_redirects=1 - ip netns exec $ns sysctl -q -w net.ipv6.conf.all.forwarding=0 - ip netns exec $ns sysctl -q -w net.ipv6.conf.all.accept_redirects=1 - ip netns exec $ns sysctl -q -w net.ipv6.conf.all.keep_addr_on_down=1 - ;; - r[12]) ip netns exec $ns sysctl -q -w net.ipv4.ip_forward=1 - ip netns exec $ns sysctl -q -w net.ipv4.conf.all.send_redirects=1 - ip netns exec $ns sysctl -q -w net.ipv4.conf.default.rp_filter=0 - ip netns exec $ns sysctl -q -w net.ipv4.conf.all.rp_filter=0 - - ip netns exec $ns sysctl -q -w net.ipv6.conf.all.forwarding=1 - ip netns exec $ns sysctl -q -w net.ipv6.route.mtu_expires=10 - esac + setup_ns h1 h2 r1 r2 + for ns in $h1 $h2 $r1 $r2; do + if echo $ns | grep -q h[12]-; then + ip netns exec $ns sysctl -q -w net.ipv4.conf.all.accept_redirects=1 + ip netns exec $ns sysctl -q -w net.ipv6.conf.all.forwarding=0 + ip netns exec $ns sysctl -q -w net.ipv6.conf.all.accept_redirects=1 + ip netns exec $ns sysctl -q -w net.ipv6.conf.all.keep_addr_on_down=1 + else + ip netns exec $ns sysctl -q -w net.ipv4.ip_forward=1 + ip netns exec $ns sysctl -q -w net.ipv4.conf.all.send_redirects=1 + ip netns exec $ns sysctl -q -w net.ipv4.conf.default.rp_filter=0 + ip netns exec $ns sysctl -q -w net.ipv4.conf.all.rp_filter=0 + + ip netns exec $ns sysctl -q -w net.ipv6.conf.all.forwarding=1 + ip netns exec $ns sysctl -q -w net.ipv6.route.mtu_expires=10 + fi done # # create interconnects # - ip -netns h1 li add eth0 type veth peer name r1h1 - ip -netns h1 li set r1h1 netns r1 name eth0 up + ip -netns $h1 li add eth0 type veth peer name r1h1 + ip -netns $h1 li set r1h1 netns $r1 name eth0 up - ip -netns h1 li add eth1 type veth peer name r2h1 - ip -netns h1 li set r2h1 netns r2 name eth0 up + ip -netns $h1 li add eth1 type veth peer name r2h1 + ip -netns $h1 li set r2h1 netns $r2 name eth0 up - ip -netns h2 li add eth0 type veth peer name r2h2 - ip -netns h2 li set eth0 up - ip -netns h2 li set r2h2 netns r2 name eth2 up + ip -netns $h2 li add eth0 type veth peer name r2h2 + ip -netns $h2 li set eth0 up + ip -netns $h2 li set r2h2 netns $r2 name eth2 up - ip -netns r1 li add eth1 type veth peer name r2r1 - ip -netns r1 li set eth1 up - ip -netns r1 li set r2r1 netns r2 name eth1 up + ip -netns $r1 li add eth1 type veth peer name r2r1 + ip -netns $r1 li set eth1 up + ip -netns $r1 li set r2r1 netns $r2 name eth1 up # # h1 # if [ "${WITH_VRF}" = "yes" ]; then - create_vrf "h1" + create_vrf "$h1" H1_VRF_ARG="vrf ${VRF}" H1_PING_ARG="-I ${VRF}" else H1_VRF_ARG= H1_PING_ARG= fi - ip -netns h1 li add br0 type bridge + ip -netns $h1 li add br0 type bridge if [ "${WITH_VRF}" = "yes" ]; then - ip -netns h1 li set br0 vrf ${VRF} up + ip -netns $h1 li set br0 vrf ${VRF} up else - ip -netns h1 li set br0 up + ip -netns $h1 li set br0 up fi - ip -netns h1 addr add dev br0 ${H1_N1_IP}/24 - ip -netns h1 -6 addr add dev br0 ${H1_N1_IP6}/64 nodad - ip -netns h1 li set eth0 master br0 up - ip -netns h1 li set eth1 master br0 up + ip -netns $h1 addr add dev br0 ${H1_N1_IP}/24 + ip -netns $h1 -6 addr add dev br0 ${H1_N1_IP6}/64 nodad + ip -netns $h1 li set eth0 master br0 up + ip -netns $h1 li set eth1 master br0 up # # h2 # - ip -netns h2 addr add dev eth0 ${H2_N2_IP}/24 - ip -netns h2 ro add default via ${R2_N2_IP} dev eth0 - ip -netns h2 -6 addr add dev eth0 ${H2_N2_IP6}/64 nodad - ip -netns h2 -6 ro add default via ${R2_N2_IP6} dev eth0 + ip -netns $h2 addr add dev eth0 ${H2_N2_IP}/24 + ip -netns $h2 ro add default via ${R2_N2_IP} dev eth0 + ip -netns $h2 -6 addr add dev eth0 ${H2_N2_IP6}/64 nodad + ip -netns $h2 -6 ro add default via ${R2_N2_IP6} dev eth0 # # r1 # - ip -netns r1 addr add dev eth0 ${R1_N1_IP}/24 - ip -netns r1 -6 addr add dev eth0 ${R1_N1_IP6}/64 nodad - ip -netns r1 addr add dev eth1 ${R1_R2_N1_IP}/30 - ip -netns r1 -6 addr add dev eth1 ${R1_R2_N1_IP6}/126 nodad + ip -netns $r1 addr add dev eth0 ${R1_N1_IP}/24 + ip -netns $r1 -6 addr add dev eth0 ${R1_N1_IP6}/64 nodad + ip -netns $r1 addr add dev eth1 ${R1_R2_N1_IP}/30 + ip -netns $r1 -6 addr add dev eth1 ${R1_R2_N1_IP6}/126 nodad # # r2 # - ip -netns r2 addr add dev eth0 ${R2_N1_IP}/24 - ip -netns r2 -6 addr add dev eth0 ${R2_N1_IP6}/64 nodad - ip -netns r2 addr add dev eth1 ${R2_R1_N1_IP}/30 - ip -netns r2 -6 addr add dev eth1 ${R2_R1_N1_IP6}/126 nodad - ip -netns r2 addr add dev eth2 ${R2_N2_IP}/24 - ip -netns r2 -6 addr add dev eth2 ${R2_N2_IP6}/64 nodad + ip -netns $r2 addr add dev eth0 ${R2_N1_IP}/24 + ip -netns $r2 -6 addr add dev eth0 ${R2_N1_IP6}/64 nodad + ip -netns $r2 addr add dev eth1 ${R2_R1_N1_IP}/30 + ip -netns $r2 -6 addr add dev eth1 ${R2_R1_N1_IP6}/126 nodad + ip -netns $r2 addr add dev eth2 ${R2_N2_IP}/24 + ip -netns $r2 -6 addr add dev eth2 ${R2_N2_IP6}/64 nodad sleep 2 - R1_LLADDR=$(get_linklocal r1 eth0) + R1_LLADDR=$(get_linklocal $r1 eth0) if [ $? -ne 0 ]; then echo "Error: Failed to get link-local address of r1's eth0" exit 1 fi log_debug "initial gateway is R1's lladdr = ${R1_LLADDR}" - R2_LLADDR=$(get_linklocal r2 eth0) + R2_LLADDR=$(get_linklocal $r2 eth0) if [ $? -ne 0 ]; then echo "Error: Failed to get link-local address of r2's eth0" exit 1 @@ -278,8 +272,8 @@ change_h2_mtu() { local mtu=$1 - run_cmd ip -netns h2 li set eth0 mtu ${mtu} - run_cmd ip -netns r2 li set eth2 mtu ${mtu} + run_cmd ip -netns $h2 li set eth0 mtu ${mtu} + run_cmd ip -netns $r2 li set eth2 mtu ${mtu} } check_exception() @@ -291,40 +285,40 @@ check_exception() # From 172.16.1.101: icmp_seq=1 Redirect Host(New nexthop: 172.16.1.102) if [ "$VERBOSE" = "1" ]; then echo "Commands to check for exception:" - run_cmd ip -netns h1 ro get ${H1_VRF_ARG} ${H2_N2_IP} - run_cmd ip -netns h1 -6 ro get ${H1_VRF_ARG} ${H2_N2_IP6} + run_cmd ip -netns $h1 ro get ${H1_VRF_ARG} ${H2_N2_IP} + run_cmd ip -netns $h1 -6 ro get ${H1_VRF_ARG} ${H2_N2_IP6} fi if [ -n "${mtu}" ]; then mtu=" mtu ${mtu}" fi if [ "$with_redirect" = "yes" ]; then - ip -netns h1 ro get ${H1_VRF_ARG} ${H2_N2_IP} | \ + ip -netns $h1 ro get ${H1_VRF_ARG} ${H2_N2_IP} | \ grep -q "cache expires [0-9]*sec${mtu}" elif [ -n "${mtu}" ]; then - ip -netns h1 ro get ${H1_VRF_ARG} ${H2_N2_IP} | \ + ip -netns $h1 ro get ${H1_VRF_ARG} ${H2_N2_IP} | \ grep -q "cache expires [0-9]*sec${mtu}" else # want to verify that neither mtu nor redirected appears in # the route get output. The -v will wipe out the cache line # if either are set so the last grep -q will not find a match - ip -netns h1 ro get ${H1_VRF_ARG} ${H2_N2_IP} | \ + ip -netns $h1 ro get ${H1_VRF_ARG} ${H2_N2_IP} | \ grep -E -v 'mtu|redirected' | grep -q "cache" fi log_test $? 0 "IPv4: ${desc}" 0 # No PMTU info for test "redirect" and "mtu exception plus redirect" if [ "$with_redirect" = "yes" ] && [ "$desc" != "redirect exception plus mtu" ]; then - ip -netns h1 -6 ro get ${H1_VRF_ARG} ${H2_N2_IP6} | \ + ip -netns $h1 -6 ro get ${H1_VRF_ARG} ${H2_N2_IP6} | \ grep -v "mtu" | grep -q "${H2_N2_IP6} .*via ${R2_LLADDR} dev br0" elif [ -n "${mtu}" ]; then - ip -netns h1 -6 ro get ${H1_VRF_ARG} ${H2_N2_IP6} | \ + ip -netns $h1 -6 ro get ${H1_VRF_ARG} ${H2_N2_IP6} | \ grep -q "${mtu}" else # IPv6 is a bit harder. First strip out the match if it # contains an mtu exception and then look for the first # gateway - R1's lladdr - ip -netns h1 -6 ro get ${H1_VRF_ARG} ${H2_N2_IP6} | \ + ip -netns $h1 -6 ro get ${H1_VRF_ARG} ${H2_N2_IP6} | \ grep -v "mtu" | grep -q "${R1_LLADDR}" fi log_test $? 0 "IPv6: ${desc}" 1 @@ -334,21 +328,21 @@ run_ping() { local sz=$1 - run_cmd ip netns exec h1 ping -q -M want -i 0.5 -c 10 -w 2 -s ${sz} ${H1_PING_ARG} ${H2_N2_IP} - run_cmd ip netns exec h1 ${ping6} -q -M want -i 0.5 -c 10 -w 2 -s ${sz} ${H1_PING_ARG} ${H2_N2_IP6} + run_cmd ip netns exec $h1 ping -q -M want -i 0.5 -c 10 -w 2 -s ${sz} ${H1_PING_ARG} ${H2_N2_IP} + run_cmd ip netns exec $h1 ${ping6} -q -M want -i 0.5 -c 10 -w 2 -s ${sz} ${H1_PING_ARG} ${H2_N2_IP6} } replace_route_new() { # r1 to h2 via r2 and eth0 - run_cmd ip -netns r1 nexthop replace id 1 via ${R2_N1_IP} dev eth0 - run_cmd ip -netns r1 nexthop replace id 2 via ${R2_LLADDR} dev eth0 + run_cmd ip -netns $r1 nexthop replace id 1 via ${R2_N1_IP} dev eth0 + run_cmd ip -netns $r1 nexthop replace id 2 via ${R2_LLADDR} dev eth0 } reset_route_new() { - run_cmd ip -netns r1 nexthop flush - run_cmd ip -netns h1 nexthop flush + run_cmd ip -netns $r1 nexthop flush + run_cmd ip -netns $h1 nexthop flush initial_route_new } @@ -356,34 +350,34 @@ reset_route_new() initial_route_new() { # r1 to h2 via r2 and eth1 - run_cmd ip -netns r1 nexthop add id 1 via ${R2_R1_N1_IP} dev eth1 - run_cmd ip -netns r1 ro add ${H2_N2} nhid 1 + run_cmd ip -netns $r1 nexthop add id 1 via ${R2_R1_N1_IP} dev eth1 + run_cmd ip -netns $r1 ro add ${H2_N2} nhid 1 - run_cmd ip -netns r1 nexthop add id 2 via ${R2_R1_N1_IP6} dev eth1 - run_cmd ip -netns r1 -6 ro add ${H2_N2_6} nhid 2 + run_cmd ip -netns $r1 nexthop add id 2 via ${R2_R1_N1_IP6} dev eth1 + run_cmd ip -netns $r1 -6 ro add ${H2_N2_6} nhid 2 # h1 to h2 via r1 - run_cmd ip -netns h1 nexthop add id 1 via ${R1_N1_IP} dev br0 - run_cmd ip -netns h1 ro add ${H1_VRF_ARG} ${H2_N2} nhid 1 + run_cmd ip -netns $h1 nexthop add id 1 via ${R1_N1_IP} dev br0 + run_cmd ip -netns $h1 ro add ${H1_VRF_ARG} ${H2_N2} nhid 1 - run_cmd ip -netns h1 nexthop add id 2 via ${R1_LLADDR} dev br0 - run_cmd ip -netns h1 -6 ro add ${H1_VRF_ARG} ${H2_N2_6} nhid 2 + run_cmd ip -netns $h1 nexthop add id 2 via ${R1_LLADDR} dev br0 + run_cmd ip -netns $h1 -6 ro add ${H1_VRF_ARG} ${H2_N2_6} nhid 2 } replace_route_legacy() { # r1 to h2 via r2 and eth0 - run_cmd ip -netns r1 ro replace ${H2_N2} via ${R2_N1_IP} dev eth0 - run_cmd ip -netns r1 -6 ro replace ${H2_N2_6} via ${R2_LLADDR} dev eth0 + run_cmd ip -netns $r1 ro replace ${H2_N2} via ${R2_N1_IP} dev eth0 + run_cmd ip -netns $r1 -6 ro replace ${H2_N2_6} via ${R2_LLADDR} dev eth0 } reset_route_legacy() { - run_cmd ip -netns r1 ro del ${H2_N2} - run_cmd ip -netns r1 -6 ro del ${H2_N2_6} + run_cmd ip -netns $r1 ro del ${H2_N2} + run_cmd ip -netns $r1 -6 ro del ${H2_N2_6} - run_cmd ip -netns h1 ro del ${H1_VRF_ARG} ${H2_N2} - run_cmd ip -netns h1 -6 ro del ${H1_VRF_ARG} ${H2_N2_6} + run_cmd ip -netns $h1 ro del ${H1_VRF_ARG} ${H2_N2} + run_cmd ip -netns $h1 -6 ro del ${H1_VRF_ARG} ${H2_N2_6} initial_route_legacy } @@ -391,22 +385,22 @@ reset_route_legacy() initial_route_legacy() { # r1 to h2 via r2 and eth1 - run_cmd ip -netns r1 ro add ${H2_N2} via ${R2_R1_N1_IP} dev eth1 - run_cmd ip -netns r1 -6 ro add ${H2_N2_6} via ${R2_R1_N1_IP6} dev eth1 + run_cmd ip -netns $r1 ro add ${H2_N2} via ${R2_R1_N1_IP} dev eth1 + run_cmd ip -netns $r1 -6 ro add ${H2_N2_6} via ${R2_R1_N1_IP6} dev eth1 # h1 to h2 via r1 # - IPv6 redirect only works if gateway is the LLA - run_cmd ip -netns h1 ro add ${H1_VRF_ARG} ${H2_N2} via ${R1_N1_IP} dev br0 - run_cmd ip -netns h1 -6 ro add ${H1_VRF_ARG} ${H2_N2_6} via ${R1_LLADDR} dev br0 + run_cmd ip -netns $h1 ro add ${H1_VRF_ARG} ${H2_N2} via ${R1_N1_IP} dev br0 + run_cmd ip -netns $h1 -6 ro add ${H1_VRF_ARG} ${H2_N2_6} via ${R1_LLADDR} dev br0 } check_connectivity() { local rc - run_cmd ip netns exec h1 ping -c1 -w1 ${H1_PING_ARG} ${H2_N2_IP} + run_cmd ip netns exec $h1 ping -c1 -w1 ${H1_PING_ARG} ${H2_N2_IP} rc=$? - run_cmd ip netns exec h1 ${ping6} -c1 -w1 ${H1_PING_ARG} ${H2_N2_IP6} + run_cmd ip netns exec $h1 ${ping6} -c1 -w1 ${H1_PING_ARG} ${H2_N2_IP6} [ $? -ne 0 ] && rc=$? return $rc -- cgit v1.2.3 From 80b74bd33421ddde1b716563e2e6e0da5edc5d9b Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Sat, 2 Dec 2023 10:01:05 +0800 Subject: sleftests/net: convert icmp.sh to run it in unique namespace Here is the test result after conversion. ]# ./icmp.sh OK Acked-by: David Ahern Signed-off-by: Hangbin Liu Signed-off-by: Paolo Abeni --- tools/testing/selftests/net/icmp.sh | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/net/icmp.sh b/tools/testing/selftests/net/icmp.sh index e4b04cd1644a..824cb0e35eff 100755 --- a/tools/testing/selftests/net/icmp.sh +++ b/tools/testing/selftests/net/icmp.sh @@ -18,8 +18,8 @@ # that address space, so the kernel should substitute the dummy address # 192.0.0.8 defined in RFC7600. -NS1=ns1 -NS2=ns2 +source lib.sh + H1_IP=172.16.0.1/32 H1_IP6=2001:db8:1::1 RT1=172.16.1.0/24 @@ -32,15 +32,13 @@ TMPFILE=$(mktemp) cleanup() { rm -f "$TMPFILE" - ip netns del $NS1 - ip netns del $NS2 + cleanup_ns $NS1 $NS2 } trap cleanup EXIT # Namespaces -ip netns add $NS1 -ip netns add $NS2 +setup_ns NS1 NS2 # Connectivity ip -netns $NS1 link add veth0 type veth peer name veth0 netns $NS2 -- cgit v1.2.3 From 2ab1ee827e976d411fd140225016c2e532e4df12 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Sat, 2 Dec 2023 10:01:06 +0800 Subject: selftests/net: convert ioam6.sh to run it in unique namespace Here is the test result after conversion. ]# ./ioam6.sh -------------------------------------------------------------------------- OUTPUT tests -------------------------------------------------------------------------- TEST: Unknown IOAM namespace (inline mode) [ OK ] TEST: Unknown IOAM namespace (encap mode) [ OK ] TEST: Missing trace room (inline mode) [ OK ] TEST: Missing trace room (encap mode) [ OK ] TEST: Trace type with bit 0 only (inline mode) [ OK ] ... TEST: Full supported trace (encap mode) [ OK ] -------------------------------------------------------------------------- GLOBAL tests -------------------------------------------------------------------------- TEST: Forward - Full supported trace (inline mode) [ OK ] TEST: Forward - Full supported trace (encap mode) [ OK ] - Tests passed: 88 - Tests failed: 0 Acked-by: David Ahern Reviewed-by: Justin Iurman Signed-off-by: Hangbin Liu Signed-off-by: Paolo Abeni --- tools/testing/selftests/net/ioam6.sh | 247 +++++++++++++++++------------------ 1 file changed, 121 insertions(+), 126 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/net/ioam6.sh b/tools/testing/selftests/net/ioam6.sh index 4ceb401da1bf..fe59ca3e5596 100755 --- a/tools/testing/selftests/net/ioam6.sh +++ b/tools/testing/selftests/net/ioam6.sh @@ -117,8 +117,7 @@ # | Schema Data | | # +-----------------------------------------------------------+ -# Kselftest framework requirement - SKIP code is 4. -ksft_skip=4 +source lib.sh ################################################################################ # # @@ -195,32 +194,32 @@ TESTS_GLOBAL=" check_kernel_compatibility() { - ip netns add ioam-tmp-node - ip link add name veth0 netns ioam-tmp-node type veth \ - peer name veth1 netns ioam-tmp-node + setup_ns ioam_tmp_node + ip link add name veth0 netns $ioam_tmp_node type veth \ + peer name veth1 netns $ioam_tmp_node - ip -netns ioam-tmp-node link set veth0 up - ip -netns ioam-tmp-node link set veth1 up + ip -netns $ioam_tmp_node link set veth0 up + ip -netns $ioam_tmp_node link set veth1 up - ip -netns ioam-tmp-node ioam namespace add 0 + ip -netns $ioam_tmp_node ioam namespace add 0 ns_ad=$? - ip -netns ioam-tmp-node ioam namespace show | grep -q "namespace 0" + ip -netns $ioam_tmp_node ioam namespace show | grep -q "namespace 0" ns_sh=$? if [[ $ns_ad != 0 || $ns_sh != 0 ]] then echo "SKIP: kernel version probably too old, missing ioam support" ip link del veth0 2>/dev/null || true - ip netns del ioam-tmp-node || true + cleanup_ns $ioam_tmp_node || true exit $ksft_skip fi - ip -netns ioam-tmp-node route add db02::/64 encap ioam6 mode inline \ + ip -netns $ioam_tmp_node route add db02::/64 encap ioam6 mode inline \ trace prealloc type 0x800000 ns 0 size 4 dev veth0 tr_ad=$? - ip -netns ioam-tmp-node -6 route | grep -q "encap ioam6" + ip -netns $ioam_tmp_node -6 route | grep -q "encap ioam6" tr_sh=$? if [[ $tr_ad != 0 || $tr_sh != 0 ]] @@ -228,12 +227,12 @@ check_kernel_compatibility() echo "SKIP: cannot attach an ioam trace to a route, did you compile" \ "without CONFIG_IPV6_IOAM6_LWTUNNEL?" ip link del veth0 2>/dev/null || true - ip netns del ioam-tmp-node || true + cleanup_ns $ioam_tmp_node || true exit $ksft_skip fi ip link del veth0 2>/dev/null || true - ip netns del ioam-tmp-node || true + cleanup_ns $ioam_tmp_node || true lsmod | grep -q "ip6_tunnel" ip6tnl_loaded=$? @@ -265,9 +264,7 @@ cleanup() ip link del ioam-veth-alpha 2>/dev/null || true ip link del ioam-veth-gamma 2>/dev/null || true - ip netns del ioam-node-alpha || true - ip netns del ioam-node-beta || true - ip netns del ioam-node-gamma || true + cleanup_ns $ioam_node_alpha $ioam_node_beta $ioam_node_gamma || true if [ $ip6tnl_loaded != 0 ] then @@ -277,69 +274,67 @@ cleanup() setup() { - ip netns add ioam-node-alpha - ip netns add ioam-node-beta - ip netns add ioam-node-gamma - - ip link add name ioam-veth-alpha netns ioam-node-alpha type veth \ - peer name ioam-veth-betaL netns ioam-node-beta - ip link add name ioam-veth-betaR netns ioam-node-beta type veth \ - peer name ioam-veth-gamma netns ioam-node-gamma - - ip -netns ioam-node-alpha link set ioam-veth-alpha name veth0 - ip -netns ioam-node-beta link set ioam-veth-betaL name veth0 - ip -netns ioam-node-beta link set ioam-veth-betaR name veth1 - ip -netns ioam-node-gamma link set ioam-veth-gamma name veth0 - - ip -netns ioam-node-alpha addr add db01::2/64 dev veth0 - ip -netns ioam-node-alpha link set veth0 up - ip -netns ioam-node-alpha link set lo up - ip -netns ioam-node-alpha route add db02::/64 via db01::1 dev veth0 - ip -netns ioam-node-alpha route del db01::/64 - ip -netns ioam-node-alpha route add db01::/64 dev veth0 - - ip -netns ioam-node-beta addr add db01::1/64 dev veth0 - ip -netns ioam-node-beta addr add db02::1/64 dev veth1 - ip -netns ioam-node-beta link set veth0 up - ip -netns ioam-node-beta link set veth1 up - ip -netns ioam-node-beta link set lo up - - ip -netns ioam-node-gamma addr add db02::2/64 dev veth0 - ip -netns ioam-node-gamma link set veth0 up - ip -netns ioam-node-gamma link set lo up - ip -netns ioam-node-gamma route add db01::/64 via db02::1 dev veth0 + setup_ns ioam_node_alpha ioam_node_beta ioam_node_gamma + + ip link add name ioam-veth-alpha netns $ioam_node_alpha type veth \ + peer name ioam-veth-betaL netns $ioam_node_beta + ip link add name ioam-veth-betaR netns $ioam_node_beta type veth \ + peer name ioam-veth-gamma netns $ioam_node_gamma + + ip -netns $ioam_node_alpha link set ioam-veth-alpha name veth0 + ip -netns $ioam_node_beta link set ioam-veth-betaL name veth0 + ip -netns $ioam_node_beta link set ioam-veth-betaR name veth1 + ip -netns $ioam_node_gamma link set ioam-veth-gamma name veth0 + + ip -netns $ioam_node_alpha addr add db01::2/64 dev veth0 + ip -netns $ioam_node_alpha link set veth0 up + ip -netns $ioam_node_alpha link set lo up + ip -netns $ioam_node_alpha route add db02::/64 via db01::1 dev veth0 + ip -netns $ioam_node_alpha route del db01::/64 + ip -netns $ioam_node_alpha route add db01::/64 dev veth0 + + ip -netns $ioam_node_beta addr add db01::1/64 dev veth0 + ip -netns $ioam_node_beta addr add db02::1/64 dev veth1 + ip -netns $ioam_node_beta link set veth0 up + ip -netns $ioam_node_beta link set veth1 up + ip -netns $ioam_node_beta link set lo up + + ip -netns $ioam_node_gamma addr add db02::2/64 dev veth0 + ip -netns $ioam_node_gamma link set veth0 up + ip -netns $ioam_node_gamma link set lo up + ip -netns $ioam_node_gamma route add db01::/64 via db02::1 dev veth0 # - IOAM config - - ip netns exec ioam-node-alpha sysctl -wq net.ipv6.ioam6_id=${ALPHA[0]} - ip netns exec ioam-node-alpha sysctl -wq net.ipv6.ioam6_id_wide=${ALPHA[1]} - ip netns exec ioam-node-alpha sysctl -wq net.ipv6.conf.veth0.ioam6_id=${ALPHA[4]} - ip netns exec ioam-node-alpha sysctl -wq net.ipv6.conf.veth0.ioam6_id_wide=${ALPHA[5]} - ip -netns ioam-node-alpha ioam namespace add 123 data ${ALPHA[6]} wide ${ALPHA[7]} - ip -netns ioam-node-alpha ioam schema add ${ALPHA[8]} "${ALPHA[9]}" - ip -netns ioam-node-alpha ioam namespace set 123 schema ${ALPHA[8]} - - ip netns exec ioam-node-beta sysctl -wq net.ipv6.conf.all.forwarding=1 - ip netns exec ioam-node-beta sysctl -wq net.ipv6.ioam6_id=${BETA[0]} - ip netns exec ioam-node-beta sysctl -wq net.ipv6.ioam6_id_wide=${BETA[1]} - ip netns exec ioam-node-beta sysctl -wq net.ipv6.conf.veth0.ioam6_enabled=1 - ip netns exec ioam-node-beta sysctl -wq net.ipv6.conf.veth0.ioam6_id=${BETA[2]} - ip netns exec ioam-node-beta sysctl -wq net.ipv6.conf.veth0.ioam6_id_wide=${BETA[3]} - ip netns exec ioam-node-beta sysctl -wq net.ipv6.conf.veth1.ioam6_id=${BETA[4]} - ip netns exec ioam-node-beta sysctl -wq net.ipv6.conf.veth1.ioam6_id_wide=${BETA[5]} - ip -netns ioam-node-beta ioam namespace add 123 data ${BETA[6]} wide ${BETA[7]} - ip -netns ioam-node-beta ioam schema add ${BETA[8]} "${BETA[9]}" - ip -netns ioam-node-beta ioam namespace set 123 schema ${BETA[8]} - - ip netns exec ioam-node-gamma sysctl -wq net.ipv6.ioam6_id=${GAMMA[0]} - ip netns exec ioam-node-gamma sysctl -wq net.ipv6.ioam6_id_wide=${GAMMA[1]} - ip netns exec ioam-node-gamma sysctl -wq net.ipv6.conf.veth0.ioam6_enabled=1 - ip netns exec ioam-node-gamma sysctl -wq net.ipv6.conf.veth0.ioam6_id=${GAMMA[2]} - ip netns exec ioam-node-gamma sysctl -wq net.ipv6.conf.veth0.ioam6_id_wide=${GAMMA[3]} - ip -netns ioam-node-gamma ioam namespace add 123 data ${GAMMA[6]} wide ${GAMMA[7]} + ip netns exec $ioam_node_alpha sysctl -wq net.ipv6.ioam6_id=${ALPHA[0]} + ip netns exec $ioam_node_alpha sysctl -wq net.ipv6.ioam6_id_wide=${ALPHA[1]} + ip netns exec $ioam_node_alpha sysctl -wq net.ipv6.conf.veth0.ioam6_id=${ALPHA[4]} + ip netns exec $ioam_node_alpha sysctl -wq net.ipv6.conf.veth0.ioam6_id_wide=${ALPHA[5]} + ip -netns $ioam_node_alpha ioam namespace add 123 data ${ALPHA[6]} wide ${ALPHA[7]} + ip -netns $ioam_node_alpha ioam schema add ${ALPHA[8]} "${ALPHA[9]}" + ip -netns $ioam_node_alpha ioam namespace set 123 schema ${ALPHA[8]} + + ip netns exec $ioam_node_beta sysctl -wq net.ipv6.conf.all.forwarding=1 + ip netns exec $ioam_node_beta sysctl -wq net.ipv6.ioam6_id=${BETA[0]} + ip netns exec $ioam_node_beta sysctl -wq net.ipv6.ioam6_id_wide=${BETA[1]} + ip netns exec $ioam_node_beta sysctl -wq net.ipv6.conf.veth0.ioam6_enabled=1 + ip netns exec $ioam_node_beta sysctl -wq net.ipv6.conf.veth0.ioam6_id=${BETA[2]} + ip netns exec $ioam_node_beta sysctl -wq net.ipv6.conf.veth0.ioam6_id_wide=${BETA[3]} + ip netns exec $ioam_node_beta sysctl -wq net.ipv6.conf.veth1.ioam6_id=${BETA[4]} + ip netns exec $ioam_node_beta sysctl -wq net.ipv6.conf.veth1.ioam6_id_wide=${BETA[5]} + ip -netns $ioam_node_beta ioam namespace add 123 data ${BETA[6]} wide ${BETA[7]} + ip -netns $ioam_node_beta ioam schema add ${BETA[8]} "${BETA[9]}" + ip -netns $ioam_node_beta ioam namespace set 123 schema ${BETA[8]} + + ip netns exec $ioam_node_gamma sysctl -wq net.ipv6.ioam6_id=${GAMMA[0]} + ip netns exec $ioam_node_gamma sysctl -wq net.ipv6.ioam6_id_wide=${GAMMA[1]} + ip netns exec $ioam_node_gamma sysctl -wq net.ipv6.conf.veth0.ioam6_enabled=1 + ip netns exec $ioam_node_gamma sysctl -wq net.ipv6.conf.veth0.ioam6_id=${GAMMA[2]} + ip netns exec $ioam_node_gamma sysctl -wq net.ipv6.conf.veth0.ioam6_id_wide=${GAMMA[3]} + ip -netns $ioam_node_gamma ioam namespace add 123 data ${GAMMA[6]} wide ${GAMMA[7]} sleep 1 - ip netns exec ioam-node-alpha ping6 -c 5 -W 1 db02::2 &>/dev/null + ip netns exec $ioam_node_alpha ping6 -c 5 -W 1 db02::2 &>/dev/null if [ $? != 0 ] then echo "Setup FAILED" @@ -412,7 +407,7 @@ run() echo # set OUTPUT settings - ip netns exec ioam-node-beta sysctl -wq net.ipv6.conf.veth0.ioam6_enabled=0 + ip netns exec $ioam_node_beta sysctl -wq net.ipv6.conf.veth0.ioam6_enabled=0 for t in $TESTS_OUTPUT do @@ -421,8 +416,8 @@ run() done # clean OUTPUT settings - ip netns exec ioam-node-beta sysctl -wq net.ipv6.conf.veth0.ioam6_enabled=1 - ip -netns ioam-node-alpha route change db01::/64 dev veth0 + ip netns exec $ioam_node_beta sysctl -wq net.ipv6.conf.veth0.ioam6_enabled=1 + ip -netns $ioam_node_alpha route change db01::/64 dev veth0 echo @@ -433,7 +428,7 @@ run() echo # set INPUT settings - ip -netns ioam-node-alpha ioam namespace del 123 + ip -netns $ioam_node_alpha ioam namespace del 123 for t in $TESTS_INPUT do @@ -442,10 +437,10 @@ run() done # clean INPUT settings - ip -netns ioam-node-alpha ioam namespace add 123 \ + ip -netns $ioam_node_alpha ioam namespace add 123 \ data ${ALPHA[6]} wide ${ALPHA[7]} - ip -netns ioam-node-alpha ioam namespace set 123 schema ${ALPHA[8]} - ip -netns ioam-node-alpha route change db01::/64 dev veth0 + ip -netns $ioam_node_alpha ioam namespace set 123 schema ${ALPHA[8]} + ip -netns $ioam_node_alpha route change db01::/64 dev veth0 echo printf "%0.s-" {1..74} @@ -488,15 +483,15 @@ out_undef_ns() local desc="Unknown IOAM namespace" [ "$1" = "encap" ] && mode="$1 tundst db01::1" || mode="$1" - [ "$1" = "encap" ] && ip -netns ioam-node-beta link set ip6tnl0 up + [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 up - ip -netns ioam-node-alpha route change db01::/64 encap ioam6 mode $mode \ + ip -netns $ioam_node_alpha route change db01::/64 encap ioam6 mode $mode \ trace prealloc type 0x800000 ns 0 size 4 dev veth0 - run_test ${FUNCNAME[0]} "${desc} ($1 mode)" ioam-node-alpha ioam-node-beta \ + run_test ${FUNCNAME[0]} "${desc} ($1 mode)" $ioam_node_alpha $ioam_node_beta \ db01::2 db01::1 veth0 0x800000 0 - [ "$1" = "encap" ] && ip -netns ioam-node-beta link set ip6tnl0 down + [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 down } out_no_room() @@ -508,15 +503,15 @@ out_no_room() local desc="Missing trace room" [ "$1" = "encap" ] && mode="$1 tundst db01::1" || mode="$1" - [ "$1" = "encap" ] && ip -netns ioam-node-beta link set ip6tnl0 up + [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 up - ip -netns ioam-node-alpha route change db01::/64 encap ioam6 mode $mode \ + ip -netns $ioam_node_alpha route change db01::/64 encap ioam6 mode $mode \ trace prealloc type 0xc00000 ns 123 size 4 dev veth0 - run_test ${FUNCNAME[0]} "${desc} ($1 mode)" ioam-node-alpha ioam-node-beta \ + run_test ${FUNCNAME[0]} "${desc} ($1 mode)" $ioam_node_alpha $ioam_node_beta \ db01::2 db01::1 veth0 0xc00000 123 - [ "$1" = "encap" ] && ip -netns ioam-node-beta link set ip6tnl0 down + [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 down } out_bits() @@ -532,11 +527,11 @@ out_bits() bit2size[22]=$(( $tmp + ${#ALPHA[9]} + ((4 - (${#ALPHA[9]} % 4)) % 4) )) [ "$1" = "encap" ] && mode="$1 tundst db01::1" || mode="$1" - [ "$1" = "encap" ] && ip -netns ioam-node-beta link set ip6tnl0 up + [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 up for i in {0..22} do - ip -netns ioam-node-alpha route change db01::/64 encap ioam6 mode $mode \ + ip -netns $ioam_node_alpha route change db01::/64 encap ioam6 mode $mode \ trace prealloc type ${bit2type[$i]} ns 123 size ${bit2size[$i]} \ dev veth0 &>/dev/null @@ -554,12 +549,12 @@ out_bits() log_test_failed "$descr" fi else - run_test "out_bit$i" "$descr ($1 mode)" ioam-node-alpha \ - ioam-node-beta db01::2 db01::1 veth0 ${bit2type[$i]} 123 + run_test "out_bit$i" "$descr ($1 mode)" $ioam_node_alpha \ + $ioam_node_beta db01::2 db01::1 veth0 ${bit2type[$i]} 123 fi done - [ "$1" = "encap" ] && ip -netns ioam-node-beta link set ip6tnl0 down + [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 down bit2size[22]=$tmp } @@ -573,15 +568,15 @@ out_full_supp_trace() local desc="Full supported trace" [ "$1" = "encap" ] && mode="$1 tundst db01::1" || mode="$1" - [ "$1" = "encap" ] && ip -netns ioam-node-beta link set ip6tnl0 up + [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 up - ip -netns ioam-node-alpha route change db01::/64 encap ioam6 mode $mode \ + ip -netns $ioam_node_alpha route change db01::/64 encap ioam6 mode $mode \ trace prealloc type 0xfff002 ns 123 size 100 dev veth0 - run_test ${FUNCNAME[0]} "${desc} ($1 mode)" ioam-node-alpha ioam-node-beta \ + run_test ${FUNCNAME[0]} "${desc} ($1 mode)" $ioam_node_alpha $ioam_node_beta \ db01::2 db01::1 veth0 0xfff002 123 - [ "$1" = "encap" ] && ip -netns ioam-node-beta link set ip6tnl0 down + [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 down } @@ -603,15 +598,15 @@ in_undef_ns() local desc="Unknown IOAM namespace" [ "$1" = "encap" ] && mode="$1 tundst db01::1" || mode="$1" - [ "$1" = "encap" ] && ip -netns ioam-node-beta link set ip6tnl0 up + [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 up - ip -netns ioam-node-alpha route change db01::/64 encap ioam6 mode $mode \ + ip -netns $ioam_node_alpha route change db01::/64 encap ioam6 mode $mode \ trace prealloc type 0x800000 ns 0 size 4 dev veth0 - run_test ${FUNCNAME[0]} "${desc} ($1 mode)" ioam-node-alpha ioam-node-beta \ + run_test ${FUNCNAME[0]} "${desc} ($1 mode)" $ioam_node_alpha $ioam_node_beta \ db01::2 db01::1 veth0 0x800000 0 - [ "$1" = "encap" ] && ip -netns ioam-node-beta link set ip6tnl0 down + [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 down } in_no_room() @@ -623,15 +618,15 @@ in_no_room() local desc="Missing trace room" [ "$1" = "encap" ] && mode="$1 tundst db01::1" || mode="$1" - [ "$1" = "encap" ] && ip -netns ioam-node-beta link set ip6tnl0 up + [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 up - ip -netns ioam-node-alpha route change db01::/64 encap ioam6 mode $mode \ + ip -netns $ioam_node_alpha route change db01::/64 encap ioam6 mode $mode \ trace prealloc type 0xc00000 ns 123 size 4 dev veth0 - run_test ${FUNCNAME[0]} "${desc} ($1 mode)" ioam-node-alpha ioam-node-beta \ + run_test ${FUNCNAME[0]} "${desc} ($1 mode)" $ioam_node_alpha $ioam_node_beta \ db01::2 db01::1 veth0 0xc00000 123 - [ "$1" = "encap" ] && ip -netns ioam-node-beta link set ip6tnl0 down + [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 down } in_bits() @@ -647,19 +642,19 @@ in_bits() bit2size[22]=$(( $tmp + ${#BETA[9]} + ((4 - (${#BETA[9]} % 4)) % 4) )) [ "$1" = "encap" ] && mode="$1 tundst db01::1" || mode="$1" - [ "$1" = "encap" ] && ip -netns ioam-node-beta link set ip6tnl0 up + [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 up for i in {0..11} {22..22} do - ip -netns ioam-node-alpha route change db01::/64 encap ioam6 mode $mode \ + ip -netns $ioam_node_alpha route change db01::/64 encap ioam6 mode $mode \ trace prealloc type ${bit2type[$i]} ns 123 size ${bit2size[$i]} \ dev veth0 - run_test "in_bit$i" "${desc//$i} ($1 mode)" ioam-node-alpha \ - ioam-node-beta db01::2 db01::1 veth0 ${bit2type[$i]} 123 + run_test "in_bit$i" "${desc//$i} ($1 mode)" $ioam_node_alpha \ + $ioam_node_beta db01::2 db01::1 veth0 ${bit2type[$i]} 123 done - [ "$1" = "encap" ] && ip -netns ioam-node-beta link set ip6tnl0 down + [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 down bit2size[22]=$tmp } @@ -675,22 +670,22 @@ in_oflag() # Exception: # Here, we need the sender to set the Overflow flag. For that, we will add # back the IOAM namespace that was previously configured on the sender. - ip -netns ioam-node-alpha ioam namespace add 123 + ip -netns $ioam_node_alpha ioam namespace add 123 [ "$1" = "encap" ] && mode="$1 tundst db01::1" || mode="$1" - [ "$1" = "encap" ] && ip -netns ioam-node-beta link set ip6tnl0 up + [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 up - ip -netns ioam-node-alpha route change db01::/64 encap ioam6 mode $mode \ + ip -netns $ioam_node_alpha route change db01::/64 encap ioam6 mode $mode \ trace prealloc type 0xc00000 ns 123 size 4 dev veth0 - run_test ${FUNCNAME[0]} "${desc} ($1 mode)" ioam-node-alpha ioam-node-beta \ + run_test ${FUNCNAME[0]} "${desc} ($1 mode)" $ioam_node_alpha $ioam_node_beta \ db01::2 db01::1 veth0 0xc00000 123 - [ "$1" = "encap" ] && ip -netns ioam-node-beta link set ip6tnl0 down + [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 down # And we clean the exception for this test to get things back to normal for # other INPUT tests - ip -netns ioam-node-alpha ioam namespace del 123 + ip -netns $ioam_node_alpha ioam namespace del 123 } in_full_supp_trace() @@ -702,15 +697,15 @@ in_full_supp_trace() local desc="Full supported trace" [ "$1" = "encap" ] && mode="$1 tundst db01::1" || mode="$1" - [ "$1" = "encap" ] && ip -netns ioam-node-beta link set ip6tnl0 up + [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 up - ip -netns ioam-node-alpha route change db01::/64 encap ioam6 mode $mode \ + ip -netns $ioam_node_alpha route change db01::/64 encap ioam6 mode $mode \ trace prealloc type 0xfff002 ns 123 size 80 dev veth0 - run_test ${FUNCNAME[0]} "${desc} ($1 mode)" ioam-node-alpha ioam-node-beta \ + run_test ${FUNCNAME[0]} "${desc} ($1 mode)" $ioam_node_alpha $ioam_node_beta \ db01::2 db01::1 veth0 0xfff002 123 - [ "$1" = "encap" ] && ip -netns ioam-node-beta link set ip6tnl0 down + [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 down } @@ -730,15 +725,15 @@ fwd_full_supp_trace() local desc="Forward - Full supported trace" [ "$1" = "encap" ] && mode="$1 tundst db02::2" || mode="$1" - [ "$1" = "encap" ] && ip -netns ioam-node-gamma link set ip6tnl0 up + [ "$1" = "encap" ] && ip -netns $ioam_node_gamma link set ip6tnl0 up - ip -netns ioam-node-alpha route change db02::/64 encap ioam6 mode $mode \ + ip -netns $ioam_node_alpha route change db02::/64 encap ioam6 mode $mode \ trace prealloc type 0xfff002 ns 123 size 244 via db01::1 dev veth0 - run_test ${FUNCNAME[0]} "${desc} ($1 mode)" ioam-node-alpha ioam-node-gamma \ + run_test ${FUNCNAME[0]} "${desc} ($1 mode)" $ioam_node_alpha $ioam_node_gamma \ db01::2 db02::2 veth0 0xfff002 123 - [ "$1" = "encap" ] && ip -netns ioam-node-gamma link set ip6tnl0 down + [ "$1" = "encap" ] && ip -netns $ioam_node_gamma link set ip6tnl0 down } -- cgit v1.2.3 From 4affb17c0d0e3708cd0088e81564af51e7e4d693 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Sat, 2 Dec 2023 10:01:07 +0800 Subject: selftests/net: convert l2tp.sh to run it in unique namespace Here is the test result after conversion. ]# ./l2tp.sh TEST: IPv4 basic L2TP tunnel [ OK ] TEST: IPv4 route through L2TP tunnel [ OK ] TEST: IPv6 basic L2TP tunnel [ OK ] TEST: IPv6 route through L2TP tunnel [ OK ] TEST: IPv4 basic L2TP tunnel - with IPsec [ OK ] TEST: IPv4 route through L2TP tunnel - with IPsec [ OK ] TEST: IPv6 basic L2TP tunnel - with IPsec [ OK ] TEST: IPv6 route through L2TP tunnel - with IPsec [ OK ] TEST: IPv4 basic L2TP tunnel [ OK ] TEST: IPv4 route through L2TP tunnel [ OK ] TEST: IPv6 basic L2TP tunnel - with IPsec [ OK ] TEST: IPv6 route through L2TP tunnel - with IPsec [ OK ] TEST: IPv4 basic L2TP tunnel - after IPsec teardown [ OK ] TEST: IPv4 route through L2TP tunnel - after IPsec teardown [ OK ] TEST: IPv6 basic L2TP tunnel - after IPsec teardown [ OK ] TEST: IPv6 route through L2TP tunnel - after IPsec teardown [ OK ] Tests passed: 16 Tests failed: 0 Acked-by: David Ahern Reviewed-by: James Chapman Signed-off-by: Hangbin Liu Signed-off-by: Paolo Abeni --- tools/testing/selftests/net/l2tp.sh | 130 +++++++++++++++++------------------- 1 file changed, 62 insertions(+), 68 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/net/l2tp.sh b/tools/testing/selftests/net/l2tp.sh index 5782433886fc..88de7166c8ae 100755 --- a/tools/testing/selftests/net/l2tp.sh +++ b/tools/testing/selftests/net/l2tp.sh @@ -13,6 +13,7 @@ # 10.1.1.1 | | 10.1.2.1 # 2001:db8:1::1 | | 2001:db8:2::1 +source lib.sh VERBOSE=0 PAUSE_ON_FAIL=no @@ -80,9 +81,6 @@ create_ns() [ -z "${addr}" ] && addr="-" [ -z "${addr6}" ] && addr6="-" - ip netns add ${ns} - - ip -netns ${ns} link set lo up if [ "${addr}" != "-" ]; then ip -netns ${ns} addr add dev lo ${addr} fi @@ -133,12 +131,7 @@ connect_ns() cleanup() { - local ns - - for ns in host-1 host-2 router - do - ip netns del ${ns} 2>/dev/null - done + cleanup_ns $host_1 $host_2 $router } setup_l2tp_ipv4() @@ -146,28 +139,28 @@ setup_l2tp_ipv4() # # configure l2tpv3 tunnel on host-1 # - ip -netns host-1 l2tp add tunnel tunnel_id 1041 peer_tunnel_id 1042 \ + ip -netns $host_1 l2tp add tunnel tunnel_id 1041 peer_tunnel_id 1042 \ encap ip local 10.1.1.1 remote 10.1.2.1 - ip -netns host-1 l2tp add session name l2tp4 tunnel_id 1041 \ + ip -netns $host_1 l2tp add session name l2tp4 tunnel_id 1041 \ session_id 1041 peer_session_id 1042 - ip -netns host-1 link set dev l2tp4 up - ip -netns host-1 addr add dev l2tp4 172.16.1.1 peer 172.16.1.2 + ip -netns $host_1 link set dev l2tp4 up + ip -netns $host_1 addr add dev l2tp4 172.16.1.1 peer 172.16.1.2 # # configure l2tpv3 tunnel on host-2 # - ip -netns host-2 l2tp add tunnel tunnel_id 1042 peer_tunnel_id 1041 \ + ip -netns $host_2 l2tp add tunnel tunnel_id 1042 peer_tunnel_id 1041 \ encap ip local 10.1.2.1 remote 10.1.1.1 - ip -netns host-2 l2tp add session name l2tp4 tunnel_id 1042 \ + ip -netns $host_2 l2tp add session name l2tp4 tunnel_id 1042 \ session_id 1042 peer_session_id 1041 - ip -netns host-2 link set dev l2tp4 up - ip -netns host-2 addr add dev l2tp4 172.16.1.2 peer 172.16.1.1 + ip -netns $host_2 link set dev l2tp4 up + ip -netns $host_2 addr add dev l2tp4 172.16.1.2 peer 172.16.1.1 # # add routes to loopback addresses # - ip -netns host-1 ro add 172.16.101.2/32 via 172.16.1.2 - ip -netns host-2 ro add 172.16.101.1/32 via 172.16.1.1 + ip -netns $host_1 ro add 172.16.101.2/32 via 172.16.1.2 + ip -netns $host_2 ro add 172.16.101.1/32 via 172.16.1.1 } setup_l2tp_ipv6() @@ -175,28 +168,28 @@ setup_l2tp_ipv6() # # configure l2tpv3 tunnel on host-1 # - ip -netns host-1 l2tp add tunnel tunnel_id 1061 peer_tunnel_id 1062 \ + ip -netns $host_1 l2tp add tunnel tunnel_id 1061 peer_tunnel_id 1062 \ encap ip local 2001:db8:1::1 remote 2001:db8:2::1 - ip -netns host-1 l2tp add session name l2tp6 tunnel_id 1061 \ + ip -netns $host_1 l2tp add session name l2tp6 tunnel_id 1061 \ session_id 1061 peer_session_id 1062 - ip -netns host-1 link set dev l2tp6 up - ip -netns host-1 addr add dev l2tp6 fc00:1::1 peer fc00:1::2 + ip -netns $host_1 link set dev l2tp6 up + ip -netns $host_1 addr add dev l2tp6 fc00:1::1 peer fc00:1::2 # # configure l2tpv3 tunnel on host-2 # - ip -netns host-2 l2tp add tunnel tunnel_id 1062 peer_tunnel_id 1061 \ + ip -netns $host_2 l2tp add tunnel tunnel_id 1062 peer_tunnel_id 1061 \ encap ip local 2001:db8:2::1 remote 2001:db8:1::1 - ip -netns host-2 l2tp add session name l2tp6 tunnel_id 1062 \ + ip -netns $host_2 l2tp add session name l2tp6 tunnel_id 1062 \ session_id 1062 peer_session_id 1061 - ip -netns host-2 link set dev l2tp6 up - ip -netns host-2 addr add dev l2tp6 fc00:1::2 peer fc00:1::1 + ip -netns $host_2 link set dev l2tp6 up + ip -netns $host_2 addr add dev l2tp6 fc00:1::2 peer fc00:1::1 # # add routes to loopback addresses # - ip -netns host-1 -6 ro add fc00:101::2/128 via fc00:1::2 - ip -netns host-2 -6 ro add fc00:101::1/128 via fc00:1::1 + ip -netns $host_1 -6 ro add fc00:101::2/128 via fc00:1::2 + ip -netns $host_2 -6 ro add fc00:101::1/128 via fc00:1::1 } setup() @@ -205,21 +198,22 @@ setup() cleanup set -e - create_ns host-1 172.16.101.1/32 fc00:101::1/128 - create_ns host-2 172.16.101.2/32 fc00:101::2/128 - create_ns router + setup_ns host_1 host_2 router + create_ns $host_1 172.16.101.1/32 fc00:101::1/128 + create_ns $host_2 172.16.101.2/32 fc00:101::2/128 + create_ns $router - connect_ns host-1 eth0 10.1.1.1/24 2001:db8:1::1/64 \ - router eth1 10.1.1.2/24 2001:db8:1::2/64 + connect_ns $host_1 eth0 10.1.1.1/24 2001:db8:1::1/64 \ + $router eth1 10.1.1.2/24 2001:db8:1::2/64 - connect_ns host-2 eth0 10.1.2.1/24 2001:db8:2::1/64 \ - router eth2 10.1.2.2/24 2001:db8:2::2/64 + connect_ns $host_2 eth0 10.1.2.1/24 2001:db8:2::1/64 \ + $router eth2 10.1.2.2/24 2001:db8:2::2/64 - ip -netns host-1 ro add 10.1.2.0/24 via 10.1.1.2 - ip -netns host-1 -6 ro add 2001:db8:2::/64 via 2001:db8:1::2 + ip -netns $host_1 ro add 10.1.2.0/24 via 10.1.1.2 + ip -netns $host_1 -6 ro add 2001:db8:2::/64 via 2001:db8:1::2 - ip -netns host-2 ro add 10.1.1.0/24 via 10.1.2.2 - ip -netns host-2 -6 ro add 2001:db8:1::/64 via 2001:db8:2::2 + ip -netns $host_2 ro add 10.1.1.0/24 via 10.1.2.2 + ip -netns $host_2 -6 ro add 2001:db8:1::/64 via 2001:db8:2::2 setup_l2tp_ipv4 setup_l2tp_ipv6 @@ -231,38 +225,38 @@ setup_ipsec() # # IPv4 # - run_cmd host-1 ip xfrm policy add \ + run_cmd $host_1 ip xfrm policy add \ src 10.1.1.1 dst 10.1.2.1 dir out \ tmpl proto esp mode transport - run_cmd host-1 ip xfrm policy add \ + run_cmd $host_1 ip xfrm policy add \ src 10.1.2.1 dst 10.1.1.1 dir in \ tmpl proto esp mode transport - run_cmd host-2 ip xfrm policy add \ + run_cmd $host_2 ip xfrm policy add \ src 10.1.1.1 dst 10.1.2.1 dir in \ tmpl proto esp mode transport - run_cmd host-2 ip xfrm policy add \ + run_cmd $host_2 ip xfrm policy add \ src 10.1.2.1 dst 10.1.1.1 dir out \ tmpl proto esp mode transport - ip -netns host-1 xfrm state add \ + ip -netns $host_1 xfrm state add \ src 10.1.1.1 dst 10.1.2.1 \ spi 0x1000 proto esp aead 'rfc4106(gcm(aes))' \ 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode transport - ip -netns host-1 xfrm state add \ + ip -netns $host_1 xfrm state add \ src 10.1.2.1 dst 10.1.1.1 \ spi 0x1001 proto esp aead 'rfc4106(gcm(aes))' \ 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode transport - ip -netns host-2 xfrm state add \ + ip -netns $host_2 xfrm state add \ src 10.1.1.1 dst 10.1.2.1 \ spi 0x1000 proto esp aead 'rfc4106(gcm(aes))' \ 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode transport - ip -netns host-2 xfrm state add \ + ip -netns $host_2 xfrm state add \ src 10.1.2.1 dst 10.1.1.1 \ spi 0x1001 proto esp aead 'rfc4106(gcm(aes))' \ 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode transport @@ -270,38 +264,38 @@ setup_ipsec() # # IPV6 # - run_cmd host-1 ip -6 xfrm policy add \ + run_cmd $host_1 ip -6 xfrm policy add \ src 2001:db8:1::1 dst 2001:db8:2::1 dir out \ tmpl proto esp mode transport - run_cmd host-1 ip -6 xfrm policy add \ + run_cmd $host_1 ip -6 xfrm policy add \ src 2001:db8:2::1 dst 2001:db8:1::1 dir in \ tmpl proto esp mode transport - run_cmd host-2 ip -6 xfrm policy add \ + run_cmd $host_2 ip -6 xfrm policy add \ src 2001:db8:1::1 dst 2001:db8:2::1 dir in \ tmpl proto esp mode transport - run_cmd host-2 ip -6 xfrm policy add \ + run_cmd $host_2 ip -6 xfrm policy add \ src 2001:db8:2::1 dst 2001:db8:1::1 dir out \ tmpl proto esp mode transport - ip -netns host-1 -6 xfrm state add \ + ip -netns $host_1 -6 xfrm state add \ src 2001:db8:1::1 dst 2001:db8:2::1 \ spi 0x1000 proto esp aead 'rfc4106(gcm(aes))' \ 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode transport - ip -netns host-1 -6 xfrm state add \ + ip -netns $host_1 -6 xfrm state add \ src 2001:db8:2::1 dst 2001:db8:1::1 \ spi 0x1001 proto esp aead 'rfc4106(gcm(aes))' \ 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode transport - ip -netns host-2 -6 xfrm state add \ + ip -netns $host_2 -6 xfrm state add \ src 2001:db8:1::1 dst 2001:db8:2::1 \ spi 0x1000 proto esp aead 'rfc4106(gcm(aes))' \ 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode transport - ip -netns host-2 -6 xfrm state add \ + ip -netns $host_2 -6 xfrm state add \ src 2001:db8:2::1 dst 2001:db8:1::1 \ spi 0x1001 proto esp aead 'rfc4106(gcm(aes))' \ 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode transport @@ -309,10 +303,10 @@ setup_ipsec() teardown_ipsec() { - run_cmd host-1 ip xfrm state flush - run_cmd host-1 ip xfrm policy flush - run_cmd host-2 ip xfrm state flush - run_cmd host-2 ip xfrm policy flush + run_cmd $host_1 ip xfrm state flush + run_cmd $host_1 ip xfrm policy flush + run_cmd $host_2 ip xfrm state flush + run_cmd $host_2 ip xfrm policy flush } ################################################################################ @@ -322,16 +316,16 @@ run_ping() { local desc="$1" - run_cmd host-1 ping -c1 -w1 172.16.1.2 + run_cmd $host_1 ping -c1 -w1 172.16.1.2 log_test $? 0 "IPv4 basic L2TP tunnel ${desc}" - run_cmd host-1 ping -c1 -w1 -I 172.16.101.1 172.16.101.2 + run_cmd $host_1 ping -c1 -w1 -I 172.16.101.1 172.16.101.2 log_test $? 0 "IPv4 route through L2TP tunnel ${desc}" - run_cmd host-1 ${ping6} -c1 -w1 fc00:1::2 + run_cmd $host_1 ${ping6} -c1 -w1 fc00:1::2 log_test $? 0 "IPv6 basic L2TP tunnel ${desc}" - run_cmd host-1 ${ping6} -c1 -w1 -I fc00:101::1 fc00:101::2 + run_cmd $host_1 ${ping6} -c1 -w1 -I fc00:101::1 fc00:101::2 log_test $? 0 "IPv6 route through L2TP tunnel ${desc}" } @@ -344,16 +338,16 @@ run_tests() setup_ipsec run_ping "- with IPsec" - run_cmd host-1 ping -c1 -w1 172.16.1.2 + run_cmd $host_1 ping -c1 -w1 172.16.1.2 log_test $? 0 "IPv4 basic L2TP tunnel ${desc}" - run_cmd host-1 ping -c1 -w1 -I 172.16.101.1 172.16.101.2 + run_cmd $host_1 ping -c1 -w1 -I 172.16.101.1 172.16.101.2 log_test $? 0 "IPv4 route through L2TP tunnel ${desc}" - run_cmd host-1 ${ping6} -c1 -w1 fc00:1::2 + run_cmd $host_1 ${ping6} -c1 -w1 fc00:1::2 log_test $? 0 "IPv6 basic L2TP tunnel - with IPsec" - run_cmd host-1 ${ping6} -c1 -w1 -I fc00:101::1 fc00:101::2 + run_cmd $host_1 ${ping6} -c1 -w1 -I fc00:101::1 fc00:101::2 log_test $? 0 "IPv6 route through L2TP tunnel - with IPsec" teardown_ipsec -- cgit v1.2.3 From 3e05fc0c56bbbd1470a80f63b156be52f1101c64 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Sat, 2 Dec 2023 10:01:08 +0800 Subject: selftests/net: convert ndisc_unsolicited_na_test.sh to run it in unique namespace Here is the test result after conversion. ]# ./ndisc_unsolicited_na_test.sh TEST: test_unsolicited_na: drop_unsolicited_na=0 accept_untracked_na=1 forwarding=1 [ OK ] TEST: test_unsolicited_na: drop_unsolicited_na=0 accept_untracked_na=0 forwarding=0 [ OK ] TEST: test_unsolicited_na: drop_unsolicited_na=0 accept_untracked_na=0 forwarding=1 [ OK ] TEST: test_unsolicited_na: drop_unsolicited_na=0 accept_untracked_na=1 forwarding=0 [ OK ] TEST: test_unsolicited_na: drop_unsolicited_na=1 accept_untracked_na=0 forwarding=0 [ OK ] TEST: test_unsolicited_na: drop_unsolicited_na=1 accept_untracked_na=0 forwarding=1 [ OK ] TEST: test_unsolicited_na: drop_unsolicited_na=1 accept_untracked_na=1 forwarding=0 [ OK ] TEST: test_unsolicited_na: drop_unsolicited_na=1 accept_untracked_na=1 forwarding=1 [ OK ] Tests passed: 8 Tests failed: 0 Acked-by: David Ahern Signed-off-by: Hangbin Liu Signed-off-by: Paolo Abeni --- .../selftests/net/ndisc_unsolicited_na_test.sh | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/net/ndisc_unsolicited_na_test.sh b/tools/testing/selftests/net/ndisc_unsolicited_na_test.sh index 86e621b7b9c7..5db69dad0cfc 100755 --- a/tools/testing/selftests/net/ndisc_unsolicited_na_test.sh +++ b/tools/testing/selftests/net/ndisc_unsolicited_na_test.sh @@ -10,16 +10,12 @@ # 0 1 0 Don't update NC # 0 1 1 Add a STALE NC entry +source lib.sh ret=0 -# Kselftest framework requirement - SKIP code is 4. -ksft_skip=4 PAUSE_ON_FAIL=no PAUSE=no -HOST_NS="ns-host" -ROUTER_NS="ns-router" - HOST_INTF="veth-host" ROUTER_INTF="veth-router" @@ -29,11 +25,6 @@ SUBNET_WIDTH=64 ROUTER_ADDR_WITH_MASK="${ROUTER_ADDR}/${SUBNET_WIDTH}" HOST_ADDR_WITH_MASK="${HOST_ADDR}/${SUBNET_WIDTH}" -IP_HOST="ip -6 -netns ${HOST_NS}" -IP_HOST_EXEC="ip netns exec ${HOST_NS}" -IP_ROUTER="ip -6 -netns ${ROUTER_NS}" -IP_ROUTER_EXEC="ip netns exec ${ROUTER_NS}" - tcpdump_stdout= tcpdump_stderr= @@ -76,8 +67,12 @@ setup() # Setup two namespaces and a veth tunnel across them. # On end of the tunnel is a router and the other end is a host. - ip netns add ${HOST_NS} - ip netns add ${ROUTER_NS} + setup_ns HOST_NS ROUTER_NS + IP_HOST="ip -6 -netns ${HOST_NS}" + IP_HOST_EXEC="ip netns exec ${HOST_NS}" + IP_ROUTER="ip -6 -netns ${ROUTER_NS}" + IP_ROUTER_EXEC="ip netns exec ${ROUTER_NS}" + ${IP_ROUTER} link add ${ROUTER_INTF} type veth \ peer name ${HOST_INTF} netns ${HOST_NS} -- cgit v1.2.3 From 90e271f65ee428ae5a75e783f5ba50a10dece09d Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Sat, 2 Dec 2023 10:01:09 +0800 Subject: selftests/net: convert sctp_vrf.sh to run it in unique namespace Here is the test result after conversion. ]# ./sctp_vrf.sh Testing For SCTP VRF: TEST 01: nobind, connect from client 1, l3mdev_accept=1, Y [PASS] ... TEST 12: bind vrf-2 & 1 in server, connect from client 1 & 2, N [PASS] ***v6 Tests Done*** Acked-by: David Ahern Reviewed-by: Xin Long Signed-off-by: Hangbin Liu Signed-off-by: Paolo Abeni --- tools/testing/selftests/net/sctp_vrf.sh | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/net/sctp_vrf.sh b/tools/testing/selftests/net/sctp_vrf.sh index c721e952e5f3..c854034b6aa1 100755 --- a/tools/testing/selftests/net/sctp_vrf.sh +++ b/tools/testing/selftests/net/sctp_vrf.sh @@ -6,13 +6,11 @@ # SERVER_NS # CLIENT_NS2 (veth1) <---> (veth2) -> vrf_s2 -CLIENT_NS1="client-ns1" -CLIENT_NS2="client-ns2" +source lib.sh CLIENT_IP4="10.0.0.1" CLIENT_IP6="2000::1" CLIENT_PORT=1234 -SERVER_NS="server-ns" SERVER_IP4="10.0.0.2" SERVER_IP6="2000::2" SERVER_PORT=1234 @@ -20,9 +18,7 @@ SERVER_PORT=1234 setup() { modprobe sctp modprobe sctp_diag - ip netns add $CLIENT_NS1 - ip netns add $CLIENT_NS2 - ip netns add $SERVER_NS + setup_ns CLIENT_NS1 CLIENT_NS2 SERVER_NS ip net exec $CLIENT_NS1 sysctl -w net.ipv6.conf.default.accept_dad=0 2>&1 >/dev/null ip net exec $CLIENT_NS2 sysctl -w net.ipv6.conf.default.accept_dad=0 2>&1 >/dev/null @@ -67,9 +63,7 @@ setup() { cleanup() { ip netns exec $SERVER_NS pkill sctp_hello 2>&1 >/dev/null - ip netns del "$CLIENT_NS1" - ip netns del "$CLIENT_NS2" - ip netns del "$SERVER_NS" + cleanup_ns $CLIENT_NS1 $CLIENT_NS2 $SERVER_NS } wait_server() { -- cgit v1.2.3 From 0f4765d0b48d90ede9788c7edb2e072eee20f88e Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Sat, 2 Dec 2023 10:01:10 +0800 Subject: selftests/net: convert unicast_extensions.sh to run it in unique namespace Here is the test result after conversion. # ./unicast_extensions.sh /usr/bin/which: no nettest in (/root/.local/bin:/root/bin:/usr/share/Modules/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin) ########################################################################### Unicast address extensions tests (behavior of reserved IPv4 addresses) ########################################################################### TEST: assign and ping within 240/4 (1 of 2) (is allowed) [ OK ] TEST: assign and ping within 240/4 (2 of 2) (is allowed) [ OK ] TEST: assign and ping within 0/8 (1 of 2) (is allowed) [ OK ] ... TEST: assign and ping class D address (is forbidden) [ OK ] TEST: routing using class D (is forbidden) [ OK ] TEST: routing using 127/8 (is forbidden) [ OK ] Acked-by: David Ahern Signed-off-by: Hangbin Liu Signed-off-by: Paolo Abeni --- tools/testing/selftests/net/unicast_extensions.sh | 99 +++++++++++------------ 1 file changed, 46 insertions(+), 53 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/net/unicast_extensions.sh b/tools/testing/selftests/net/unicast_extensions.sh index 2d10ccac898a..b7a2cb9e7477 100755 --- a/tools/testing/selftests/net/unicast_extensions.sh +++ b/tools/testing/selftests/net/unicast_extensions.sh @@ -28,8 +28,7 @@ # These tests provide an easy way to flip the expected result of any # of these behaviors for testing kernel patches that change them. -# Kselftest framework requirement - SKIP code is 4. -ksft_skip=4 +source ./lib.sh # nettest can be run from PATH or from same directory as this selftest if ! which nettest >/dev/null; then @@ -61,20 +60,20 @@ _do_segmenttest(){ # foo --- bar # Arguments: ip_a ip_b prefix_length test_description # - # Caller must set up foo-ns and bar-ns namespaces + # Caller must set up $foo_ns and $bar_ns namespaces # containing linked veth devices foo and bar, # respectively. - ip -n foo-ns address add $1/$3 dev foo || return 1 - ip -n foo-ns link set foo up || return 1 - ip -n bar-ns address add $2/$3 dev bar || return 1 - ip -n bar-ns link set bar up || return 1 + ip -n $foo_ns address add $1/$3 dev foo || return 1 + ip -n $foo_ns link set foo up || return 1 + ip -n $bar_ns address add $2/$3 dev bar || return 1 + ip -n $bar_ns link set bar up || return 1 - ip netns exec foo-ns timeout 2 ping -c 1 $2 || return 1 - ip netns exec bar-ns timeout 2 ping -c 1 $1 || return 1 + ip netns exec $foo_ns timeout 2 ping -c 1 $2 || return 1 + ip netns exec $bar_ns timeout 2 ping -c 1 $1 || return 1 - nettest -B -N bar-ns -O foo-ns -r $1 || return 1 - nettest -B -N foo-ns -O bar-ns -r $2 || return 1 + nettest -B -N $bar_ns -O $foo_ns -r $1 || return 1 + nettest -B -N $foo_ns -O $bar_ns -r $2 || return 1 return 0 } @@ -88,31 +87,31 @@ _do_route_test(){ # Arguments: foo_ip foo1_ip bar1_ip bar_ip prefix_len test_description # Displays test result and returns success or failure. - # Caller must set up foo-ns, bar-ns, and router-ns + # Caller must set up $foo_ns, $bar_ns, and $router_ns # containing linked veth devices foo-foo1, bar1-bar - # (foo in foo-ns, foo1 and bar1 in router-ns, and - # bar in bar-ns). - - ip -n foo-ns address add $1/$5 dev foo || return 1 - ip -n foo-ns link set foo up || return 1 - ip -n foo-ns route add default via $2 || return 1 - ip -n bar-ns address add $4/$5 dev bar || return 1 - ip -n bar-ns link set bar up || return 1 - ip -n bar-ns route add default via $3 || return 1 - ip -n router-ns address add $2/$5 dev foo1 || return 1 - ip -n router-ns link set foo1 up || return 1 - ip -n router-ns address add $3/$5 dev bar1 || return 1 - ip -n router-ns link set bar1 up || return 1 - - echo 1 | ip netns exec router-ns tee /proc/sys/net/ipv4/ip_forward - - ip netns exec foo-ns timeout 2 ping -c 1 $2 || return 1 - ip netns exec foo-ns timeout 2 ping -c 1 $4 || return 1 - ip netns exec bar-ns timeout 2 ping -c 1 $3 || return 1 - ip netns exec bar-ns timeout 2 ping -c 1 $1 || return 1 - - nettest -B -N bar-ns -O foo-ns -r $1 || return 1 - nettest -B -N foo-ns -O bar-ns -r $4 || return 1 + # (foo in $foo_ns, foo1 and bar1 in $router_ns, and + # bar in $bar_ns). + + ip -n $foo_ns address add $1/$5 dev foo || return 1 + ip -n $foo_ns link set foo up || return 1 + ip -n $foo_ns route add default via $2 || return 1 + ip -n $bar_ns address add $4/$5 dev bar || return 1 + ip -n $bar_ns link set bar up || return 1 + ip -n $bar_ns route add default via $3 || return 1 + ip -n $router_ns address add $2/$5 dev foo1 || return 1 + ip -n $router_ns link set foo1 up || return 1 + ip -n $router_ns address add $3/$5 dev bar1 || return 1 + ip -n $router_ns link set bar1 up || return 1 + + echo 1 | ip netns exec $router_ns tee /proc/sys/net/ipv4/ip_forward + + ip netns exec $foo_ns timeout 2 ping -c 1 $2 || return 1 + ip netns exec $foo_ns timeout 2 ping -c 1 $4 || return 1 + ip netns exec $bar_ns timeout 2 ping -c 1 $3 || return 1 + ip netns exec $bar_ns timeout 2 ping -c 1 $1 || return 1 + + nettest -B -N $bar_ns -O $foo_ns -r $1 || return 1 + nettest -B -N $foo_ns -O $bar_ns -r $4 || return 1 return 0 } @@ -121,17 +120,15 @@ segmenttest(){ # Sets up veth link and tries to connect over it. # Arguments: ip_a ip_b prefix_len test_description hide_output - ip netns add foo-ns - ip netns add bar-ns - ip link add foo netns foo-ns type veth peer name bar netns bar-ns + setup_ns foo_ns bar_ns + ip link add foo netns $foo_ns type veth peer name bar netns $bar_ns test_result=0 _do_segmenttest "$@" || test_result=1 - ip netns pids foo-ns | xargs -r kill -9 - ip netns pids bar-ns | xargs -r kill -9 - ip netns del foo-ns - ip netns del bar-ns + ip netns pids $foo_ns | xargs -r kill -9 + ip netns pids $bar_ns | xargs -r kill -9 + cleanup_ns $foo_ns $bar_ns show_output # inverted tests will expect failure instead of success @@ -147,21 +144,17 @@ route_test(){ # Returns success or failure. hide_output - ip netns add foo-ns - ip netns add bar-ns - ip netns add router-ns - ip link add foo netns foo-ns type veth peer name foo1 netns router-ns - ip link add bar netns bar-ns type veth peer name bar1 netns router-ns + setup_ns foo_ns bar_ns router_ns + ip link add foo netns $foo_ns type veth peer name foo1 netns $router_ns + ip link add bar netns $bar_ns type veth peer name bar1 netns $router_ns test_result=0 _do_route_test "$@" || test_result=1 - ip netns pids foo-ns | xargs -r kill -9 - ip netns pids bar-ns | xargs -r kill -9 - ip netns pids router-ns | xargs -r kill -9 - ip netns del foo-ns - ip netns del bar-ns - ip netns del router-ns + ip netns pids $foo_ns | xargs -r kill -9 + ip netns pids $bar_ns | xargs -r kill -9 + ip netns pids $router_ns | xargs -r kill -9 + cleanup_ns $foo_ns $bar_ns $router_ns show_output -- cgit v1.2.3 From 1b4c7e20bfd6cfe0efbc51756d930a9406d41ea7 Mon Sep 17 00:00:00 2001 From: Dave Marchevsky Date: Mon, 4 Dec 2023 13:17:22 -0800 Subject: selftests/bpf: Test bpf_kptr_xchg stashing of bpf_rb_root There was some confusion amongst Meta sched_ext folks regarding whether stashing bpf_rb_root - the tree itself, rather than a single node - was supported. This patch adds a small test which demonstrates this functionality: a local kptr with rb_root is created, a node is created and added to the tree, then the tree is kptr_xchg'd into a mapval. Signed-off-by: Dave Marchevsky Signed-off-by: Daniel Borkmann Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20231204211722.571346-1-davemarchevsky@fb.com --- .../selftests/bpf/prog_tests/local_kptr_stash.c | 23 ++++++++++ .../testing/selftests/bpf/progs/local_kptr_stash.c | 53 ++++++++++++++++++++++ 2 files changed, 76 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/local_kptr_stash.c b/tools/testing/selftests/bpf/prog_tests/local_kptr_stash.c index e6e50a394472..827e713f6cf1 100644 --- a/tools/testing/selftests/bpf/prog_tests/local_kptr_stash.c +++ b/tools/testing/selftests/bpf/prog_tests/local_kptr_stash.c @@ -48,6 +48,27 @@ static void test_local_kptr_stash_plain(void) local_kptr_stash__destroy(skel); } +static void test_local_kptr_stash_local_with_root(void) +{ + LIBBPF_OPTS(bpf_test_run_opts, opts, + .data_in = &pkt_v4, + .data_size_in = sizeof(pkt_v4), + .repeat = 1, + ); + struct local_kptr_stash *skel; + int ret; + + skel = local_kptr_stash__open_and_load(); + if (!ASSERT_OK_PTR(skel, "local_kptr_stash__open_and_load")) + return; + + ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.stash_local_with_root), &opts); + ASSERT_OK(ret, "local_kptr_stash_add_local_with_root run"); + ASSERT_OK(opts.retval, "local_kptr_stash_add_local_with_root retval"); + + local_kptr_stash__destroy(skel); +} + static void test_local_kptr_stash_unstash(void) { LIBBPF_OPTS(bpf_test_run_opts, opts, @@ -115,6 +136,8 @@ void test_local_kptr_stash(void) test_local_kptr_stash_simple(); if (test__start_subtest("local_kptr_stash_plain")) test_local_kptr_stash_plain(); + if (test__start_subtest("local_kptr_stash_local_with_root")) + test_local_kptr_stash_local_with_root(); if (test__start_subtest("local_kptr_stash_unstash")) test_local_kptr_stash_unstash(); if (test__start_subtest("refcount_acquire_without_unstash")) diff --git a/tools/testing/selftests/bpf/progs/local_kptr_stash.c b/tools/testing/selftests/bpf/progs/local_kptr_stash.c index 1769fdff6aea..75043ffc5dad 100644 --- a/tools/testing/selftests/bpf/progs/local_kptr_stash.c +++ b/tools/testing/selftests/bpf/progs/local_kptr_stash.c @@ -37,11 +37,18 @@ struct plain_local { long data; }; +struct local_with_root { + long key; + struct bpf_spin_lock l; + struct bpf_rb_root r __contains(node_data, node); +}; + struct map_value { struct prog_test_ref_kfunc *not_kptr; struct prog_test_ref_kfunc __kptr *val; struct node_data __kptr *node; struct plain_local __kptr *plain; + struct local_with_root __kptr *local_root; }; /* This is necessary so that LLVM generates BTF for node_data struct @@ -65,6 +72,17 @@ struct { __uint(max_entries, 2); } some_nodes SEC(".maps"); +static bool less(struct bpf_rb_node *a, const struct bpf_rb_node *b) +{ + struct node_data *node_a; + struct node_data *node_b; + + node_a = container_of(a, struct node_data, node); + node_b = container_of(b, struct node_data, node); + + return node_a->key < node_b->key; +} + static int create_and_stash(int idx, int val) { struct map_value *mapval; @@ -113,6 +131,41 @@ long stash_plain(void *ctx) return 0; } +SEC("tc") +long stash_local_with_root(void *ctx) +{ + struct local_with_root *res; + struct map_value *mapval; + struct node_data *n; + int idx = 0; + + mapval = bpf_map_lookup_elem(&some_nodes, &idx); + if (!mapval) + return 1; + + res = bpf_obj_new(typeof(*res)); + if (!res) + return 2; + res->key = 41; + + n = bpf_obj_new(typeof(*n)); + if (!n) { + bpf_obj_drop(res); + return 3; + } + + bpf_spin_lock(&res->l); + bpf_rbtree_add(&res->r, &n->node, less); + bpf_spin_unlock(&res->l); + + res = bpf_kptr_xchg(&mapval->local_root, res); + if (res) { + bpf_obj_drop(res); + return 4; + } + return 0; +} + SEC("tc") long unstash_rb_node(void *ctx) { -- cgit v1.2.3 From 5ffb260f754bf838507fe0c23d05254b33e2bf3d Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Mon, 4 Dec 2023 09:44:23 -0800 Subject: selftests/bpf: Make sure we trigger metadata kfuncs for dst 8080 xdp_metadata test is flaky sometimes: verify_xsk_metadata:FAIL:rx_hash_type unexpected rx_hash_type: actual 8 != expected 0 Where 8 means XDP_RSS_TYPE_L4_ANY and is exported from veth driver only when 'skb->l4_hash' condition is met. This makes me think that the program is triggering again for some other packet. Let's have a filter, similar to xdp_hw_metadata, where we trigger XDP kfuncs only for UDP packets destined to port 8080. Signed-off-by: Stanislav Fomichev Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20231204174423.3460052-1-sdf@google.com --- tools/testing/selftests/bpf/progs/xdp_metadata.c | 31 +++++++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/progs/xdp_metadata.c b/tools/testing/selftests/bpf/progs/xdp_metadata.c index d151d406a123..5d6c1245c310 100644 --- a/tools/testing/selftests/bpf/progs/xdp_metadata.c +++ b/tools/testing/selftests/bpf/progs/xdp_metadata.c @@ -27,11 +27,40 @@ extern int bpf_xdp_metadata_rx_hash(const struct xdp_md *ctx, __u32 *hash, SEC("xdp") int rx(struct xdp_md *ctx) { - void *data, *data_meta; + void *data, *data_meta, *data_end; + struct ipv6hdr *ip6h = NULL; + struct ethhdr *eth = NULL; + struct udphdr *udp = NULL; + struct iphdr *iph = NULL; struct xdp_meta *meta; u64 timestamp = -1; int ret; + data = (void *)(long)ctx->data; + data_end = (void *)(long)ctx->data_end; + eth = data; + if (eth + 1 < data_end) { + if (eth->h_proto == bpf_htons(ETH_P_IP)) { + iph = (void *)(eth + 1); + if (iph + 1 < data_end && iph->protocol == IPPROTO_UDP) + udp = (void *)(iph + 1); + } + if (eth->h_proto == bpf_htons(ETH_P_IPV6)) { + ip6h = (void *)(eth + 1); + if (ip6h + 1 < data_end && ip6h->nexthdr == IPPROTO_UDP) + udp = (void *)(ip6h + 1); + } + if (udp && udp + 1 > data_end) + udp = NULL; + } + + if (!udp) + return XDP_PASS; + + /* Forwarding UDP:8080 to AF_XDP */ + if (udp->dest != bpf_htons(8080)) + return XDP_PASS; + /* Reserve enough for all custom metadata. */ ret = bpf_xdp_adjust_meta(ctx, -(int)sizeof(struct xdp_meta)); -- cgit v1.2.3 From e2b005d6ec0e738df584190e21d2c7ada37266a0 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Mon, 4 Dec 2023 10:23:29 -0800 Subject: perf metrics: Avoid segv if default metricgroup isn't set A metric is default by having "Default" within its groups. The default metricgroup name needn't be set and this can result in segv in default_metricgroup_cmp and perf_stat__print_shadow_stats_metricgroup that assume it has a value when there is a Default metric group. To avoid the segv initialize the value to "". Fixes: 1c0e47956a8e ("perf metrics: Sort the Default metricgroup") Signed-off-by: Ian Rogers Reviewed-and-tested-by: Ilkka Koskinen Cc: James Clark Cc: Will Deacon Cc: Leo Yan Cc: Mike Leach Cc: Kajol Jain Cc: Kan Liang Cc: John Garry Cc: stable@vger.kernel.org Cc: linux-arm-kernel@lists.infradead.org Link: https://lore.kernel.org/r/20231204182330.654255-1-irogers@google.com Signed-off-by: Namhyung Kim --- tools/perf/util/metricgroup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c index 0484736d9fe4..ca3e0404f187 100644 --- a/tools/perf/util/metricgroup.c +++ b/tools/perf/util/metricgroup.c @@ -225,7 +225,7 @@ static struct metric *metric__new(const struct pmu_metric *pm, m->pmu = pm->pmu ?: "cpu"; m->metric_name = pm->metric_name; - m->default_metricgroup_name = pm->default_metricgroup_name; + m->default_metricgroup_name = pm->default_metricgroup_name ?: ""; m->modifier = NULL; if (modifier) { m->modifier = strdup(modifier); -- cgit v1.2.3 From 90fe70d4e23cb57253d2668a171d5695c332deb7 Mon Sep 17 00:00:00 2001 From: Ilkka Koskinen Date: Thu, 30 Nov 2023 18:15:48 -0800 Subject: perf vendor events arm64: AmpereOne: Add missing DefaultMetricgroupName fields AmpereOne metrics were missing DefaultMetricgroupName from metrics with "Default" in group name resulting perf to segfault. Add the missing field to address the issue. Fixes: 59faeaf80d02 ("perf vendor events arm64: Fix for AmpereOne metrics") Signed-off-by: Ilkka Koskinen Reviewed-by: Ian Rogers Cc: James Clark Cc: Will Deacon Cc: Leo Yan Cc: Mike Leach Cc: John Garry Cc: linux-arm-kernel@lists.infradead.org Link: https://lore.kernel.org/r/20231201021550.1109196-2-ilkka@os.amperecomputing.com Signed-off-by: Namhyung Kim --- tools/perf/pmu-events/arch/arm64/ampere/ampereone/metrics.json | 2 ++ 1 file changed, 2 insertions(+) (limited to 'tools') diff --git a/tools/perf/pmu-events/arch/arm64/ampere/ampereone/metrics.json b/tools/perf/pmu-events/arch/arm64/ampere/ampereone/metrics.json index e2848a9d4848..afcdad58ef89 100644 --- a/tools/perf/pmu-events/arch/arm64/ampere/ampereone/metrics.json +++ b/tools/perf/pmu-events/arch/arm64/ampere/ampereone/metrics.json @@ -231,6 +231,7 @@ "MetricName": "slots_lost_misspeculation_fraction", "MetricExpr": "100 * ((OP_SPEC - OP_RETIRED) / (CPU_CYCLES * #slots))", "BriefDescription": "Fraction of slots lost due to misspeculation", + "DefaultMetricgroupName": "TopdownL1", "MetricGroup": "Default;TopdownL1", "ScaleUnit": "1percent of slots" }, @@ -238,6 +239,7 @@ "MetricName": "retired_fraction", "MetricExpr": "100 * (OP_RETIRED / (CPU_CYCLES * #slots))", "BriefDescription": "Fraction of slots retiring, useful work", + "DefaultMetricgroupName": "TopdownL1", "MetricGroup": "Default;TopdownL1", "ScaleUnit": "1percent of slots" }, -- cgit v1.2.3 From f7c0e362a25f99fafa73d62a2e8c3da00cf1fc0e Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Sat, 2 Dec 2023 13:17:59 -0800 Subject: tools: ynl: remove generated user space code from git The ynl-generated user space C code is already above 25kLoC and is growing. The initial reason to commit these files was to make reviewing changes to the generator easier. Unfortunately, it has the opposite effect on reviewing changes to specs, and we get far more changes to specs than to the generator. Uncommit those fails, as they are generated on the fly as needed. netdev patchwork now runs a script on each series to create a diff of generated code on the fly, for the rare cases when looking at it is helpful: https://github.com/kuba-moo/nipa/blob/master/tests/series/ynl/ynl.sh Suggested-by: Paolo Abeni Acked-by: Chuck Lever Signed-off-by: Jakub Kicinski --- tools/net/ynl/generated/.gitignore | 2 + tools/net/ynl/generated/devlink-user.c | 6864 ------------------------------ tools/net/ynl/generated/devlink-user.h | 5255 ----------------------- tools/net/ynl/generated/ethtool-user.c | 6370 --------------------------- tools/net/ynl/generated/ethtool-user.h | 5535 ------------------------ tools/net/ynl/generated/fou-user.c | 330 -- tools/net/ynl/generated/fou-user.h | 343 -- tools/net/ynl/generated/handshake-user.c | 332 -- tools/net/ynl/generated/handshake-user.h | 145 - tools/net/ynl/generated/netdev-user.c | 952 ----- tools/net/ynl/generated/netdev-user.h | 442 -- tools/net/ynl/generated/nfsd-user.c | 203 - tools/net/ynl/generated/nfsd-user.h | 67 - 13 files changed, 2 insertions(+), 26838 deletions(-) create mode 100644 tools/net/ynl/generated/.gitignore delete mode 100644 tools/net/ynl/generated/devlink-user.c delete mode 100644 tools/net/ynl/generated/devlink-user.h delete mode 100644 tools/net/ynl/generated/ethtool-user.c delete mode 100644 tools/net/ynl/generated/ethtool-user.h delete mode 100644 tools/net/ynl/generated/fou-user.c delete mode 100644 tools/net/ynl/generated/fou-user.h delete mode 100644 tools/net/ynl/generated/handshake-user.c delete mode 100644 tools/net/ynl/generated/handshake-user.h delete mode 100644 tools/net/ynl/generated/netdev-user.c delete mode 100644 tools/net/ynl/generated/netdev-user.h delete mode 100644 tools/net/ynl/generated/nfsd-user.c delete mode 100644 tools/net/ynl/generated/nfsd-user.h (limited to 'tools') diff --git a/tools/net/ynl/generated/.gitignore b/tools/net/ynl/generated/.gitignore new file mode 100644 index 000000000000..ade488626d26 --- /dev/null +++ b/tools/net/ynl/generated/.gitignore @@ -0,0 +1,2 @@ +*-user.c +*-user.h diff --git a/tools/net/ynl/generated/devlink-user.c b/tools/net/ynl/generated/devlink-user.c deleted file mode 100644 index 8e757e249dab..000000000000 --- a/tools/net/ynl/generated/devlink-user.c +++ /dev/null @@ -1,6864 +0,0 @@ -// SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) -/* Do not edit directly, auto-generated from: */ -/* Documentation/netlink/specs/devlink.yaml */ -/* YNL-GEN user source */ - -#include -#include -#include "devlink-user.h" -#include "ynl.h" -#include - -#include -#include - -/* Enums */ -static const char * const devlink_op_strmap[] = { - [3] = "get", - // skip "port-get", duplicate reply value - [DEVLINK_CMD_PORT_NEW] = "port-new", - [13] = "sb-get", - [17] = "sb-pool-get", - [21] = "sb-port-pool-get", - [25] = "sb-tc-pool-bind-get", - [DEVLINK_CMD_ESWITCH_GET] = "eswitch-get", - [DEVLINK_CMD_DPIPE_TABLE_GET] = "dpipe-table-get", - [DEVLINK_CMD_DPIPE_ENTRIES_GET] = "dpipe-entries-get", - [DEVLINK_CMD_DPIPE_HEADERS_GET] = "dpipe-headers-get", - [DEVLINK_CMD_RESOURCE_DUMP] = "resource-dump", - [DEVLINK_CMD_RELOAD] = "reload", - [DEVLINK_CMD_PARAM_GET] = "param-get", - [DEVLINK_CMD_REGION_GET] = "region-get", - [DEVLINK_CMD_REGION_NEW] = "region-new", - [DEVLINK_CMD_REGION_READ] = "region-read", - [DEVLINK_CMD_PORT_PARAM_GET] = "port-param-get", - [DEVLINK_CMD_INFO_GET] = "info-get", - [DEVLINK_CMD_HEALTH_REPORTER_GET] = "health-reporter-get", - [DEVLINK_CMD_HEALTH_REPORTER_DUMP_GET] = "health-reporter-dump-get", - [63] = "trap-get", - [67] = "trap-group-get", - [71] = "trap-policer-get", - [76] = "rate-get", - [80] = "linecard-get", - [DEVLINK_CMD_SELFTESTS_GET] = "selftests-get", -}; - -const char *devlink_op_str(int op) -{ - if (op < 0 || op >= (int)MNL_ARRAY_SIZE(devlink_op_strmap)) - return NULL; - return devlink_op_strmap[op]; -} - -static const char * const devlink_sb_pool_type_strmap[] = { - [0] = "ingress", - [1] = "egress", -}; - -const char *devlink_sb_pool_type_str(enum devlink_sb_pool_type value) -{ - if (value < 0 || value >= (int)MNL_ARRAY_SIZE(devlink_sb_pool_type_strmap)) - return NULL; - return devlink_sb_pool_type_strmap[value]; -} - -static const char * const devlink_port_type_strmap[] = { - [0] = "notset", - [1] = "auto", - [2] = "eth", - [3] = "ib", -}; - -const char *devlink_port_type_str(enum devlink_port_type value) -{ - if (value < 0 || value >= (int)MNL_ARRAY_SIZE(devlink_port_type_strmap)) - return NULL; - return devlink_port_type_strmap[value]; -} - -static const char * const devlink_port_flavour_strmap[] = { - [0] = "physical", - [1] = "cpu", - [2] = "dsa", - [3] = "pci_pf", - [4] = "pci_vf", - [5] = "virtual", - [6] = "unused", - [7] = "pci_sf", -}; - -const char *devlink_port_flavour_str(enum devlink_port_flavour value) -{ - if (value < 0 || value >= (int)MNL_ARRAY_SIZE(devlink_port_flavour_strmap)) - return NULL; - return devlink_port_flavour_strmap[value]; -} - -static const char * const devlink_port_fn_state_strmap[] = { - [0] = "inactive", - [1] = "active", -}; - -const char *devlink_port_fn_state_str(enum devlink_port_fn_state value) -{ - if (value < 0 || value >= (int)MNL_ARRAY_SIZE(devlink_port_fn_state_strmap)) - return NULL; - return devlink_port_fn_state_strmap[value]; -} - -static const char * const devlink_port_fn_opstate_strmap[] = { - [0] = "detached", - [1] = "attached", -}; - -const char *devlink_port_fn_opstate_str(enum devlink_port_fn_opstate value) -{ - if (value < 0 || value >= (int)MNL_ARRAY_SIZE(devlink_port_fn_opstate_strmap)) - return NULL; - return devlink_port_fn_opstate_strmap[value]; -} - -static const char * const devlink_port_fn_attr_cap_strmap[] = { - [0] = "roce-bit", - [1] = "migratable-bit", - [2] = "ipsec-crypto-bit", - [3] = "ipsec-packet-bit", -}; - -const char *devlink_port_fn_attr_cap_str(enum devlink_port_fn_attr_cap value) -{ - if (value < 0 || value >= (int)MNL_ARRAY_SIZE(devlink_port_fn_attr_cap_strmap)) - return NULL; - return devlink_port_fn_attr_cap_strmap[value]; -} - -static const char * const devlink_sb_threshold_type_strmap[] = { - [0] = "static", - [1] = "dynamic", -}; - -const char *devlink_sb_threshold_type_str(enum devlink_sb_threshold_type value) -{ - if (value < 0 || value >= (int)MNL_ARRAY_SIZE(devlink_sb_threshold_type_strmap)) - return NULL; - return devlink_sb_threshold_type_strmap[value]; -} - -static const char * const devlink_eswitch_mode_strmap[] = { - [0] = "legacy", - [1] = "switchdev", -}; - -const char *devlink_eswitch_mode_str(enum devlink_eswitch_mode value) -{ - if (value < 0 || value >= (int)MNL_ARRAY_SIZE(devlink_eswitch_mode_strmap)) - return NULL; - return devlink_eswitch_mode_strmap[value]; -} - -static const char * const devlink_eswitch_inline_mode_strmap[] = { - [0] = "none", - [1] = "link", - [2] = "network", - [3] = "transport", -}; - -const char * -devlink_eswitch_inline_mode_str(enum devlink_eswitch_inline_mode value) -{ - if (value < 0 || value >= (int)MNL_ARRAY_SIZE(devlink_eswitch_inline_mode_strmap)) - return NULL; - return devlink_eswitch_inline_mode_strmap[value]; -} - -static const char * const devlink_eswitch_encap_mode_strmap[] = { - [0] = "none", - [1] = "basic", -}; - -const char * -devlink_eswitch_encap_mode_str(enum devlink_eswitch_encap_mode value) -{ - if (value < 0 || value >= (int)MNL_ARRAY_SIZE(devlink_eswitch_encap_mode_strmap)) - return NULL; - return devlink_eswitch_encap_mode_strmap[value]; -} - -static const char * const devlink_dpipe_match_type_strmap[] = { - [0] = "field-exact", -}; - -const char *devlink_dpipe_match_type_str(enum devlink_dpipe_match_type value) -{ - if (value < 0 || value >= (int)MNL_ARRAY_SIZE(devlink_dpipe_match_type_strmap)) - return NULL; - return devlink_dpipe_match_type_strmap[value]; -} - -static const char * const devlink_dpipe_action_type_strmap[] = { - [0] = "field-modify", -}; - -const char *devlink_dpipe_action_type_str(enum devlink_dpipe_action_type value) -{ - if (value < 0 || value >= (int)MNL_ARRAY_SIZE(devlink_dpipe_action_type_strmap)) - return NULL; - return devlink_dpipe_action_type_strmap[value]; -} - -static const char * const devlink_dpipe_field_mapping_type_strmap[] = { - [0] = "none", - [1] = "ifindex", -}; - -const char * -devlink_dpipe_field_mapping_type_str(enum devlink_dpipe_field_mapping_type value) -{ - if (value < 0 || value >= (int)MNL_ARRAY_SIZE(devlink_dpipe_field_mapping_type_strmap)) - return NULL; - return devlink_dpipe_field_mapping_type_strmap[value]; -} - -static const char * const devlink_resource_unit_strmap[] = { - [0] = "entry", -}; - -const char *devlink_resource_unit_str(enum devlink_resource_unit value) -{ - if (value < 0 || value >= (int)MNL_ARRAY_SIZE(devlink_resource_unit_strmap)) - return NULL; - return devlink_resource_unit_strmap[value]; -} - -static const char * const devlink_reload_action_strmap[] = { - [1] = "driver-reinit", - [2] = "fw-activate", -}; - -const char *devlink_reload_action_str(enum devlink_reload_action value) -{ - if (value < 0 || value >= (int)MNL_ARRAY_SIZE(devlink_reload_action_strmap)) - return NULL; - return devlink_reload_action_strmap[value]; -} - -static const char * const devlink_param_cmode_strmap[] = { - [0] = "runtime", - [1] = "driverinit", - [2] = "permanent", -}; - -const char *devlink_param_cmode_str(enum devlink_param_cmode value) -{ - if (value < 0 || value >= (int)MNL_ARRAY_SIZE(devlink_param_cmode_strmap)) - return NULL; - return devlink_param_cmode_strmap[value]; -} - -static const char * const devlink_flash_overwrite_strmap[] = { - [0] = "settings-bit", - [1] = "identifiers-bit", -}; - -const char *devlink_flash_overwrite_str(enum devlink_flash_overwrite value) -{ - if (value < 0 || value >= (int)MNL_ARRAY_SIZE(devlink_flash_overwrite_strmap)) - return NULL; - return devlink_flash_overwrite_strmap[value]; -} - -static const char * const devlink_trap_action_strmap[] = { - [0] = "drop", - [1] = "trap", - [2] = "mirror", -}; - -const char *devlink_trap_action_str(enum devlink_trap_action value) -{ - if (value < 0 || value >= (int)MNL_ARRAY_SIZE(devlink_trap_action_strmap)) - return NULL; - return devlink_trap_action_strmap[value]; -} - -/* Policies */ -struct ynl_policy_attr devlink_dl_dpipe_match_policy[DEVLINK_ATTR_MAX + 1] = { - [DEVLINK_ATTR_DPIPE_MATCH_TYPE] = { .name = "dpipe-match-type", .type = YNL_PT_U32, }, - [DEVLINK_ATTR_DPIPE_HEADER_ID] = { .name = "dpipe-header-id", .type = YNL_PT_U32, }, - [DEVLINK_ATTR_DPIPE_HEADER_GLOBAL] = { .name = "dpipe-header-global", .type = YNL_PT_U8, }, - [DEVLINK_ATTR_DPIPE_HEADER_INDEX] = { .name = "dpipe-header-index", .type = YNL_PT_U32, }, - [DEVLINK_ATTR_DPIPE_FIELD_ID] = { .name = "dpipe-field-id", .type = YNL_PT_U32, }, -}; - -struct ynl_policy_nest devlink_dl_dpipe_match_nest = { - .max_attr = DEVLINK_ATTR_MAX, - .table = devlink_dl_dpipe_match_policy, -}; - -struct ynl_policy_attr devlink_dl_dpipe_match_value_policy[DEVLINK_ATTR_MAX + 1] = { - [DEVLINK_ATTR_DPIPE_MATCH] = { .name = "dpipe-match", .type = YNL_PT_NEST, .nest = &devlink_dl_dpipe_match_nest, }, - [DEVLINK_ATTR_DPIPE_VALUE] = { .name = "dpipe-value", .type = YNL_PT_BINARY,}, - [DEVLINK_ATTR_DPIPE_VALUE_MASK] = { .name = "dpipe-value-mask", .type = YNL_PT_BINARY,}, - [DEVLINK_ATTR_DPIPE_VALUE_MAPPING] = { .name = "dpipe-value-mapping", .type = YNL_PT_U32, }, -}; - -struct ynl_policy_nest devlink_dl_dpipe_match_value_nest = { - .max_attr = DEVLINK_ATTR_MAX, - .table = devlink_dl_dpipe_match_value_policy, -}; - -struct ynl_policy_attr devlink_dl_dpipe_action_policy[DEVLINK_ATTR_MAX + 1] = { - [DEVLINK_ATTR_DPIPE_ACTION_TYPE] = { .name = "dpipe-action-type", .type = YNL_PT_U32, }, - [DEVLINK_ATTR_DPIPE_HEADER_ID] = { .name = "dpipe-header-id", .type = YNL_PT_U32, }, - [DEVLINK_ATTR_DPIPE_HEADER_GLOBAL] = { .name = "dpipe-header-global", .type = YNL_PT_U8, }, - [DEVLINK_ATTR_DPIPE_HEADER_INDEX] = { .name = "dpipe-header-index", .type = YNL_PT_U32, }, - [DEVLINK_ATTR_DPIPE_FIELD_ID] = { .name = "dpipe-field-id", .type = YNL_PT_U32, }, -}; - -struct ynl_policy_nest devlink_dl_dpipe_action_nest = { - .max_attr = DEVLINK_ATTR_MAX, - .table = devlink_dl_dpipe_action_policy, -}; - -struct ynl_policy_attr devlink_dl_dpipe_action_value_policy[DEVLINK_ATTR_MAX + 1] = { - [DEVLINK_ATTR_DPIPE_ACTION] = { .name = "dpipe-action", .type = YNL_PT_NEST, .nest = &devlink_dl_dpipe_action_nest, }, - [DEVLINK_ATTR_DPIPE_VALUE] = { .name = "dpipe-value", .type = YNL_PT_BINARY,}, - [DEVLINK_ATTR_DPIPE_VALUE_MASK] = { .name = "dpipe-value-mask", .type = YNL_PT_BINARY,}, - [DEVLINK_ATTR_DPIPE_VALUE_MAPPING] = { .name = "dpipe-value-mapping", .type = YNL_PT_U32, }, -}; - -struct ynl_policy_nest devlink_dl_dpipe_action_value_nest = { - .max_attr = DEVLINK_ATTR_MAX, - .table = devlink_dl_dpipe_action_value_policy, -}; - -struct ynl_policy_attr devlink_dl_dpipe_field_policy[DEVLINK_ATTR_MAX + 1] = { - [DEVLINK_ATTR_DPIPE_FIELD_NAME] = { .name = "dpipe-field-name", .type = YNL_PT_NUL_STR, }, - [DEVLINK_ATTR_DPIPE_FIELD_ID] = { .name = "dpipe-field-id", .type = YNL_PT_U32, }, - [DEVLINK_ATTR_DPIPE_FIELD_BITWIDTH] = { .name = "dpipe-field-bitwidth", .type = YNL_PT_U32, }, - [DEVLINK_ATTR_DPIPE_FIELD_MAPPING_TYPE] = { .name = "dpipe-field-mapping-type", .type = YNL_PT_U32, }, -}; - -struct ynl_policy_nest devlink_dl_dpipe_field_nest = { - .max_attr = DEVLINK_ATTR_MAX, - .table = devlink_dl_dpipe_field_policy, -}; - -struct ynl_policy_attr devlink_dl_resource_policy[DEVLINK_ATTR_MAX + 1] = { - [DEVLINK_ATTR_RESOURCE_NAME] = { .name = "resource-name", .type = YNL_PT_NUL_STR, }, - [DEVLINK_ATTR_RESOURCE_ID] = { .name = "resource-id", .type = YNL_PT_U64, }, - [DEVLINK_ATTR_RESOURCE_SIZE] = { .name = "resource-size", .type = YNL_PT_U64, }, - [DEVLINK_ATTR_RESOURCE_SIZE_NEW] = { .name = "resource-size-new", .type = YNL_PT_U64, }, - [DEVLINK_ATTR_RESOURCE_SIZE_VALID] = { .name = "resource-size-valid", .type = YNL_PT_U8, }, - [DEVLINK_ATTR_RESOURCE_SIZE_MIN] = { .name = "resource-size-min", .type = YNL_PT_U64, }, - [DEVLINK_ATTR_RESOURCE_SIZE_MAX] = { .name = "resource-size-max", .type = YNL_PT_U64, }, - [DEVLINK_ATTR_RESOURCE_SIZE_GRAN] = { .name = "resource-size-gran", .type = YNL_PT_U64, }, - [DEVLINK_ATTR_RESOURCE_UNIT] = { .name = "resource-unit", .type = YNL_PT_U8, }, - [DEVLINK_ATTR_RESOURCE_OCC] = { .name = "resource-occ", .type = YNL_PT_U64, }, -}; - -struct ynl_policy_nest devlink_dl_resource_nest = { - .max_attr = DEVLINK_ATTR_MAX, - .table = devlink_dl_resource_policy, -}; - -struct ynl_policy_attr devlink_dl_info_version_policy[DEVLINK_ATTR_MAX + 1] = { - [DEVLINK_ATTR_INFO_VERSION_NAME] = { .name = "info-version-name", .type = YNL_PT_NUL_STR, }, - [DEVLINK_ATTR_INFO_VERSION_VALUE] = { .name = "info-version-value", .type = YNL_PT_NUL_STR, }, -}; - -struct ynl_policy_nest devlink_dl_info_version_nest = { - .max_attr = DEVLINK_ATTR_MAX, - .table = devlink_dl_info_version_policy, -}; - -struct ynl_policy_attr devlink_dl_fmsg_policy[DEVLINK_ATTR_MAX + 1] = { - [DEVLINK_ATTR_FMSG_OBJ_NEST_START] = { .name = "fmsg-obj-nest-start", .type = YNL_PT_FLAG, }, - [DEVLINK_ATTR_FMSG_PAIR_NEST_START] = { .name = "fmsg-pair-nest-start", .type = YNL_PT_FLAG, }, - [DEVLINK_ATTR_FMSG_ARR_NEST_START] = { .name = "fmsg-arr-nest-start", .type = YNL_PT_FLAG, }, - [DEVLINK_ATTR_FMSG_NEST_END] = { .name = "fmsg-nest-end", .type = YNL_PT_FLAG, }, - [DEVLINK_ATTR_FMSG_OBJ_NAME] = { .name = "fmsg-obj-name", .type = YNL_PT_NUL_STR, }, -}; - -struct ynl_policy_nest devlink_dl_fmsg_nest = { - .max_attr = DEVLINK_ATTR_MAX, - .table = devlink_dl_fmsg_policy, -}; - -struct ynl_policy_attr devlink_dl_port_function_policy[DEVLINK_PORT_FUNCTION_ATTR_MAX + 1] = { - [DEVLINK_PORT_FUNCTION_ATTR_HW_ADDR] = { .name = "hw-addr", .type = YNL_PT_BINARY,}, - [DEVLINK_PORT_FN_ATTR_STATE] = { .name = "state", .type = YNL_PT_U8, }, - [DEVLINK_PORT_FN_ATTR_OPSTATE] = { .name = "opstate", .type = YNL_PT_U8, }, - [DEVLINK_PORT_FN_ATTR_CAPS] = { .name = "caps", .type = YNL_PT_BITFIELD32, }, -}; - -struct ynl_policy_nest devlink_dl_port_function_nest = { - .max_attr = DEVLINK_PORT_FUNCTION_ATTR_MAX, - .table = devlink_dl_port_function_policy, -}; - -struct ynl_policy_attr devlink_dl_reload_stats_entry_policy[DEVLINK_ATTR_MAX + 1] = { - [DEVLINK_ATTR_RELOAD_STATS_LIMIT] = { .name = "reload-stats-limit", .type = YNL_PT_U8, }, - [DEVLINK_ATTR_RELOAD_STATS_VALUE] = { .name = "reload-stats-value", .type = YNL_PT_U32, }, -}; - -struct ynl_policy_nest devlink_dl_reload_stats_entry_nest = { - .max_attr = DEVLINK_ATTR_MAX, - .table = devlink_dl_reload_stats_entry_policy, -}; - -struct ynl_policy_attr devlink_dl_reload_act_stats_policy[DEVLINK_ATTR_MAX + 1] = { - [DEVLINK_ATTR_RELOAD_STATS_ENTRY] = { .name = "reload-stats-entry", .type = YNL_PT_NEST, .nest = &devlink_dl_reload_stats_entry_nest, }, -}; - -struct ynl_policy_nest devlink_dl_reload_act_stats_nest = { - .max_attr = DEVLINK_ATTR_MAX, - .table = devlink_dl_reload_act_stats_policy, -}; - -struct ynl_policy_attr devlink_dl_selftest_id_policy[DEVLINK_ATTR_SELFTEST_ID_MAX + 1] = { - [DEVLINK_ATTR_SELFTEST_ID_FLASH] = { .name = "flash", .type = YNL_PT_FLAG, }, -}; - -struct ynl_policy_nest devlink_dl_selftest_id_nest = { - .max_attr = DEVLINK_ATTR_SELFTEST_ID_MAX, - .table = devlink_dl_selftest_id_policy, -}; - -struct ynl_policy_attr devlink_dl_dpipe_table_matches_policy[DEVLINK_ATTR_MAX + 1] = { - [DEVLINK_ATTR_DPIPE_MATCH] = { .name = "dpipe-match", .type = YNL_PT_NEST, .nest = &devlink_dl_dpipe_match_nest, }, -}; - -struct ynl_policy_nest devlink_dl_dpipe_table_matches_nest = { - .max_attr = DEVLINK_ATTR_MAX, - .table = devlink_dl_dpipe_table_matches_policy, -}; - -struct ynl_policy_attr devlink_dl_dpipe_table_actions_policy[DEVLINK_ATTR_MAX + 1] = { - [DEVLINK_ATTR_DPIPE_ACTION] = { .name = "dpipe-action", .type = YNL_PT_NEST, .nest = &devlink_dl_dpipe_action_nest, }, -}; - -struct ynl_policy_nest devlink_dl_dpipe_table_actions_nest = { - .max_attr = DEVLINK_ATTR_MAX, - .table = devlink_dl_dpipe_table_actions_policy, -}; - -struct ynl_policy_attr devlink_dl_dpipe_entry_match_values_policy[DEVLINK_ATTR_MAX + 1] = { - [DEVLINK_ATTR_DPIPE_MATCH_VALUE] = { .name = "dpipe-match-value", .type = YNL_PT_NEST, .nest = &devlink_dl_dpipe_match_value_nest, }, -}; - -struct ynl_policy_nest devlink_dl_dpipe_entry_match_values_nest = { - .max_attr = DEVLINK_ATTR_MAX, - .table = devlink_dl_dpipe_entry_match_values_policy, -}; - -struct ynl_policy_attr devlink_dl_dpipe_entry_action_values_policy[DEVLINK_ATTR_MAX + 1] = { - [DEVLINK_ATTR_DPIPE_ACTION_VALUE] = { .name = "dpipe-action-value", .type = YNL_PT_NEST, .nest = &devlink_dl_dpipe_action_value_nest, }, -}; - -struct ynl_policy_nest devlink_dl_dpipe_entry_action_values_nest = { - .max_attr = DEVLINK_ATTR_MAX, - .table = devlink_dl_dpipe_entry_action_values_policy, -}; - -struct ynl_policy_attr devlink_dl_dpipe_header_fields_policy[DEVLINK_ATTR_MAX + 1] = { - [DEVLINK_ATTR_DPIPE_FIELD] = { .name = "dpipe-field", .type = YNL_PT_NEST, .nest = &devlink_dl_dpipe_field_nest, }, -}; - -struct ynl_policy_nest devlink_dl_dpipe_header_fields_nest = { - .max_attr = DEVLINK_ATTR_MAX, - .table = devlink_dl_dpipe_header_fields_policy, -}; - -struct ynl_policy_attr devlink_dl_resource_list_policy[DEVLINK_ATTR_MAX + 1] = { - [DEVLINK_ATTR_RESOURCE] = { .name = "resource", .type = YNL_PT_NEST, .nest = &devlink_dl_resource_nest, }, -}; - -struct ynl_policy_nest devlink_dl_resource_list_nest = { - .max_attr = DEVLINK_ATTR_MAX, - .table = devlink_dl_resource_list_policy, -}; - -struct ynl_policy_attr devlink_dl_reload_act_info_policy[DEVLINK_ATTR_MAX + 1] = { - [DEVLINK_ATTR_RELOAD_ACTION] = { .name = "reload-action", .type = YNL_PT_U8, }, - [DEVLINK_ATTR_RELOAD_ACTION_STATS] = { .name = "reload-action-stats", .type = YNL_PT_NEST, .nest = &devlink_dl_reload_act_stats_nest, }, -}; - -struct ynl_policy_nest devlink_dl_reload_act_info_nest = { - .max_attr = DEVLINK_ATTR_MAX, - .table = devlink_dl_reload_act_info_policy, -}; - -struct ynl_policy_attr devlink_dl_dpipe_table_policy[DEVLINK_ATTR_MAX + 1] = { - [DEVLINK_ATTR_DPIPE_TABLE_NAME] = { .name = "dpipe-table-name", .type = YNL_PT_NUL_STR, }, - [DEVLINK_ATTR_DPIPE_TABLE_SIZE] = { .name = "dpipe-table-size", .type = YNL_PT_U64, }, - [DEVLINK_ATTR_DPIPE_TABLE_MATCHES] = { .name = "dpipe-table-matches", .type = YNL_PT_NEST, .nest = &devlink_dl_dpipe_table_matches_nest, }, - [DEVLINK_ATTR_DPIPE_TABLE_ACTIONS] = { .name = "dpipe-table-actions", .type = YNL_PT_NEST, .nest = &devlink_dl_dpipe_table_actions_nest, }, - [DEVLINK_ATTR_DPIPE_TABLE_COUNTERS_ENABLED] = { .name = "dpipe-table-counters-enabled", .type = YNL_PT_U8, }, - [DEVLINK_ATTR_DPIPE_TABLE_RESOURCE_ID] = { .name = "dpipe-table-resource-id", .type = YNL_PT_U64, }, - [DEVLINK_ATTR_DPIPE_TABLE_RESOURCE_UNITS] = { .name = "dpipe-table-resource-units", .type = YNL_PT_U64, }, -}; - -struct ynl_policy_nest devlink_dl_dpipe_table_nest = { - .max_attr = DEVLINK_ATTR_MAX, - .table = devlink_dl_dpipe_table_policy, -}; - -struct ynl_policy_attr devlink_dl_dpipe_entry_policy[DEVLINK_ATTR_MAX + 1] = { - [DEVLINK_ATTR_DPIPE_ENTRY_INDEX] = { .name = "dpipe-entry-index", .type = YNL_PT_U64, }, - [DEVLINK_ATTR_DPIPE_ENTRY_MATCH_VALUES] = { .name = "dpipe-entry-match-values", .type = YNL_PT_NEST, .nest = &devlink_dl_dpipe_entry_match_values_nest, }, - [DEVLINK_ATTR_DPIPE_ENTRY_ACTION_VALUES] = { .name = "dpipe-entry-action-values", .type = YNL_PT_NEST, .nest = &devlink_dl_dpipe_entry_action_values_nest, }, - [DEVLINK_ATTR_DPIPE_ENTRY_COUNTER] = { .name = "dpipe-entry-counter", .type = YNL_PT_U64, }, -}; - -struct ynl_policy_nest devlink_dl_dpipe_entry_nest = { - .max_attr = DEVLINK_ATTR_MAX, - .table = devlink_dl_dpipe_entry_policy, -}; - -struct ynl_policy_attr devlink_dl_dpipe_header_policy[DEVLINK_ATTR_MAX + 1] = { - [DEVLINK_ATTR_DPIPE_HEADER_NAME] = { .name = "dpipe-header-name", .type = YNL_PT_NUL_STR, }, - [DEVLINK_ATTR_DPIPE_HEADER_ID] = { .name = "dpipe-header-id", .type = YNL_PT_U32, }, - [DEVLINK_ATTR_DPIPE_HEADER_GLOBAL] = { .name = "dpipe-header-global", .type = YNL_PT_U8, }, - [DEVLINK_ATTR_DPIPE_HEADER_FIELDS] = { .name = "dpipe-header-fields", .type = YNL_PT_NEST, .nest = &devlink_dl_dpipe_header_fields_nest, }, -}; - -struct ynl_policy_nest devlink_dl_dpipe_header_nest = { - .max_attr = DEVLINK_ATTR_MAX, - .table = devlink_dl_dpipe_header_policy, -}; - -struct ynl_policy_attr devlink_dl_reload_stats_policy[DEVLINK_ATTR_MAX + 1] = { - [DEVLINK_ATTR_RELOAD_ACTION_INFO] = { .name = "reload-action-info", .type = YNL_PT_NEST, .nest = &devlink_dl_reload_act_info_nest, }, -}; - -struct ynl_policy_nest devlink_dl_reload_stats_nest = { - .max_attr = DEVLINK_ATTR_MAX, - .table = devlink_dl_reload_stats_policy, -}; - -struct ynl_policy_attr devlink_dl_dpipe_tables_policy[DEVLINK_ATTR_MAX + 1] = { - [DEVLINK_ATTR_DPIPE_TABLE] = { .name = "dpipe-table", .type = YNL_PT_NEST, .nest = &devlink_dl_dpipe_table_nest, }, -}; - -struct ynl_policy_nest devlink_dl_dpipe_tables_nest = { - .max_attr = DEVLINK_ATTR_MAX, - .table = devlink_dl_dpipe_tables_policy, -}; - -struct ynl_policy_attr devlink_dl_dpipe_entries_policy[DEVLINK_ATTR_MAX + 1] = { - [DEVLINK_ATTR_DPIPE_ENTRY] = { .name = "dpipe-entry", .type = YNL_PT_NEST, .nest = &devlink_dl_dpipe_entry_nest, }, -}; - -struct ynl_policy_nest devlink_dl_dpipe_entries_nest = { - .max_attr = DEVLINK_ATTR_MAX, - .table = devlink_dl_dpipe_entries_policy, -}; - -struct ynl_policy_attr devlink_dl_dpipe_headers_policy[DEVLINK_ATTR_MAX + 1] = { - [DEVLINK_ATTR_DPIPE_HEADER] = { .name = "dpipe-header", .type = YNL_PT_NEST, .nest = &devlink_dl_dpipe_header_nest, }, -}; - -struct ynl_policy_nest devlink_dl_dpipe_headers_nest = { - .max_attr = DEVLINK_ATTR_MAX, - .table = devlink_dl_dpipe_headers_policy, -}; - -struct ynl_policy_attr devlink_dl_dev_stats_policy[DEVLINK_ATTR_MAX + 1] = { - [DEVLINK_ATTR_RELOAD_STATS] = { .name = "reload-stats", .type = YNL_PT_NEST, .nest = &devlink_dl_reload_stats_nest, }, - [DEVLINK_ATTR_REMOTE_RELOAD_STATS] = { .name = "remote-reload-stats", .type = YNL_PT_NEST, .nest = &devlink_dl_reload_stats_nest, }, -}; - -struct ynl_policy_nest devlink_dl_dev_stats_nest = { - .max_attr = DEVLINK_ATTR_MAX, - .table = devlink_dl_dev_stats_policy, -}; - -struct ynl_policy_attr devlink_policy[DEVLINK_ATTR_MAX + 1] = { - [DEVLINK_ATTR_BUS_NAME] = { .name = "bus-name", .type = YNL_PT_NUL_STR, }, - [DEVLINK_ATTR_DEV_NAME] = { .name = "dev-name", .type = YNL_PT_NUL_STR, }, - [DEVLINK_ATTR_PORT_INDEX] = { .name = "port-index", .type = YNL_PT_U32, }, - [DEVLINK_ATTR_PORT_TYPE] = { .name = "port-type", .type = YNL_PT_U16, }, - [DEVLINK_ATTR_PORT_SPLIT_COUNT] = { .name = "port-split-count", .type = YNL_PT_U32, }, - [DEVLINK_ATTR_SB_INDEX] = { .name = "sb-index", .type = YNL_PT_U32, }, - [DEVLINK_ATTR_SB_POOL_INDEX] = { .name = "sb-pool-index", .type = YNL_PT_U16, }, - [DEVLINK_ATTR_SB_POOL_TYPE] = { .name = "sb-pool-type", .type = YNL_PT_U8, }, - [DEVLINK_ATTR_SB_POOL_SIZE] = { .name = "sb-pool-size", .type = YNL_PT_U32, }, - [DEVLINK_ATTR_SB_POOL_THRESHOLD_TYPE] = { .name = "sb-pool-threshold-type", .type = YNL_PT_U8, }, - [DEVLINK_ATTR_SB_THRESHOLD] = { .name = "sb-threshold", .type = YNL_PT_U32, }, - [DEVLINK_ATTR_SB_TC_INDEX] = { .name = "sb-tc-index", .type = YNL_PT_U16, }, - [DEVLINK_ATTR_ESWITCH_MODE] = { .name = "eswitch-mode", .type = YNL_PT_U16, }, - [DEVLINK_ATTR_ESWITCH_INLINE_MODE] = { .name = "eswitch-inline-mode", .type = YNL_PT_U16, }, - [DEVLINK_ATTR_DPIPE_TABLES] = { .name = "dpipe-tables", .type = YNL_PT_NEST, .nest = &devlink_dl_dpipe_tables_nest, }, - [DEVLINK_ATTR_DPIPE_TABLE] = { .name = "dpipe-table", .type = YNL_PT_NEST, .nest = &devlink_dl_dpipe_table_nest, }, - [DEVLINK_ATTR_DPIPE_TABLE_NAME] = { .name = "dpipe-table-name", .type = YNL_PT_NUL_STR, }, - [DEVLINK_ATTR_DPIPE_TABLE_SIZE] = { .name = "dpipe-table-size", .type = YNL_PT_U64, }, - [DEVLINK_ATTR_DPIPE_TABLE_MATCHES] = { .name = "dpipe-table-matches", .type = YNL_PT_NEST, .nest = &devlink_dl_dpipe_table_matches_nest, }, - [DEVLINK_ATTR_DPIPE_TABLE_ACTIONS] = { .name = "dpipe-table-actions", .type = YNL_PT_NEST, .nest = &devlink_dl_dpipe_table_actions_nest, }, - [DEVLINK_ATTR_DPIPE_TABLE_COUNTERS_ENABLED] = { .name = "dpipe-table-counters-enabled", .type = YNL_PT_U8, }, - [DEVLINK_ATTR_DPIPE_ENTRIES] = { .name = "dpipe-entries", .type = YNL_PT_NEST, .nest = &devlink_dl_dpipe_entries_nest, }, - [DEVLINK_ATTR_DPIPE_ENTRY] = { .name = "dpipe-entry", .type = YNL_PT_NEST, .nest = &devlink_dl_dpipe_entry_nest, }, - [DEVLINK_ATTR_DPIPE_ENTRY_INDEX] = { .name = "dpipe-entry-index", .type = YNL_PT_U64, }, - [DEVLINK_ATTR_DPIPE_ENTRY_MATCH_VALUES] = { .name = "dpipe-entry-match-values", .type = YNL_PT_NEST, .nest = &devlink_dl_dpipe_entry_match_values_nest, }, - [DEVLINK_ATTR_DPIPE_ENTRY_ACTION_VALUES] = { .name = "dpipe-entry-action-values", .type = YNL_PT_NEST, .nest = &devlink_dl_dpipe_entry_action_values_nest, }, - [DEVLINK_ATTR_DPIPE_ENTRY_COUNTER] = { .name = "dpipe-entry-counter", .type = YNL_PT_U64, }, - [DEVLINK_ATTR_DPIPE_MATCH] = { .name = "dpipe-match", .type = YNL_PT_NEST, .nest = &devlink_dl_dpipe_match_nest, }, - [DEVLINK_ATTR_DPIPE_MATCH_VALUE] = { .name = "dpipe-match-value", .type = YNL_PT_NEST, .nest = &devlink_dl_dpipe_match_value_nest, }, - [DEVLINK_ATTR_DPIPE_MATCH_TYPE] = { .name = "dpipe-match-type", .type = YNL_PT_U32, }, - [DEVLINK_ATTR_DPIPE_ACTION] = { .name = "dpipe-action", .type = YNL_PT_NEST, .nest = &devlink_dl_dpipe_action_nest, }, - [DEVLINK_ATTR_DPIPE_ACTION_VALUE] = { .name = "dpipe-action-value", .type = YNL_PT_NEST, .nest = &devlink_dl_dpipe_action_value_nest, }, - [DEVLINK_ATTR_DPIPE_ACTION_TYPE] = { .name = "dpipe-action-type", .type = YNL_PT_U32, }, - [DEVLINK_ATTR_DPIPE_VALUE] = { .name = "dpipe-value", .type = YNL_PT_BINARY,}, - [DEVLINK_ATTR_DPIPE_VALUE_MASK] = { .name = "dpipe-value-mask", .type = YNL_PT_BINARY,}, - [DEVLINK_ATTR_DPIPE_VALUE_MAPPING] = { .name = "dpipe-value-mapping", .type = YNL_PT_U32, }, - [DEVLINK_ATTR_DPIPE_HEADERS] = { .name = "dpipe-headers", .type = YNL_PT_NEST, .nest = &devlink_dl_dpipe_headers_nest, }, - [DEVLINK_ATTR_DPIPE_HEADER] = { .name = "dpipe-header", .type = YNL_PT_NEST, .nest = &devlink_dl_dpipe_header_nest, }, - [DEVLINK_ATTR_DPIPE_HEADER_NAME] = { .name = "dpipe-header-name", .type = YNL_PT_NUL_STR, }, - [DEVLINK_ATTR_DPIPE_HEADER_ID] = { .name = "dpipe-header-id", .type = YNL_PT_U32, }, - [DEVLINK_ATTR_DPIPE_HEADER_FIELDS] = { .name = "dpipe-header-fields", .type = YNL_PT_NEST, .nest = &devlink_dl_dpipe_header_fields_nest, }, - [DEVLINK_ATTR_DPIPE_HEADER_GLOBAL] = { .name = "dpipe-header-global", .type = YNL_PT_U8, }, - [DEVLINK_ATTR_DPIPE_HEADER_INDEX] = { .name = "dpipe-header-index", .type = YNL_PT_U32, }, - [DEVLINK_ATTR_DPIPE_FIELD] = { .name = "dpipe-field", .type = YNL_PT_NEST, .nest = &devlink_dl_dpipe_field_nest, }, - [DEVLINK_ATTR_DPIPE_FIELD_NAME] = { .name = "dpipe-field-name", .type = YNL_PT_NUL_STR, }, - [DEVLINK_ATTR_DPIPE_FIELD_ID] = { .name = "dpipe-field-id", .type = YNL_PT_U32, }, - [DEVLINK_ATTR_DPIPE_FIELD_BITWIDTH] = { .name = "dpipe-field-bitwidth", .type = YNL_PT_U32, }, - [DEVLINK_ATTR_DPIPE_FIELD_MAPPING_TYPE] = { .name = "dpipe-field-mapping-type", .type = YNL_PT_U32, }, - [DEVLINK_ATTR_PAD] = { .name = "pad", .type = YNL_PT_IGNORE, }, - [DEVLINK_ATTR_ESWITCH_ENCAP_MODE] = { .name = "eswitch-encap-mode", .type = YNL_PT_U8, }, - [DEVLINK_ATTR_RESOURCE_LIST] = { .name = "resource-list", .type = YNL_PT_NEST, .nest = &devlink_dl_resource_list_nest, }, - [DEVLINK_ATTR_RESOURCE] = { .name = "resource", .type = YNL_PT_NEST, .nest = &devlink_dl_resource_nest, }, - [DEVLINK_ATTR_RESOURCE_NAME] = { .name = "resource-name", .type = YNL_PT_NUL_STR, }, - [DEVLINK_ATTR_RESOURCE_ID] = { .name = "resource-id", .type = YNL_PT_U64, }, - [DEVLINK_ATTR_RESOURCE_SIZE] = { .name = "resource-size", .type = YNL_PT_U64, }, - [DEVLINK_ATTR_RESOURCE_SIZE_NEW] = { .name = "resource-size-new", .type = YNL_PT_U64, }, - [DEVLINK_ATTR_RESOURCE_SIZE_VALID] = { .name = "resource-size-valid", .type = YNL_PT_U8, }, - [DEVLINK_ATTR_RESOURCE_SIZE_MIN] = { .name = "resource-size-min", .type = YNL_PT_U64, }, - [DEVLINK_ATTR_RESOURCE_SIZE_MAX] = { .name = "resource-size-max", .type = YNL_PT_U64, }, - [DEVLINK_ATTR_RESOURCE_SIZE_GRAN] = { .name = "resource-size-gran", .type = YNL_PT_U64, }, - [DEVLINK_ATTR_RESOURCE_UNIT] = { .name = "resource-unit", .type = YNL_PT_U8, }, - [DEVLINK_ATTR_RESOURCE_OCC] = { .name = "resource-occ", .type = YNL_PT_U64, }, - [DEVLINK_ATTR_DPIPE_TABLE_RESOURCE_ID] = { .name = "dpipe-table-resource-id", .type = YNL_PT_U64, }, - [DEVLINK_ATTR_DPIPE_TABLE_RESOURCE_UNITS] = { .name = "dpipe-table-resource-units", .type = YNL_PT_U64, }, - [DEVLINK_ATTR_PORT_FLAVOUR] = { .name = "port-flavour", .type = YNL_PT_U16, }, - [DEVLINK_ATTR_PARAM_NAME] = { .name = "param-name", .type = YNL_PT_NUL_STR, }, - [DEVLINK_ATTR_PARAM_TYPE] = { .name = "param-type", .type = YNL_PT_U8, }, - [DEVLINK_ATTR_PARAM_VALUE_CMODE] = { .name = "param-value-cmode", .type = YNL_PT_U8, }, - [DEVLINK_ATTR_REGION_NAME] = { .name = "region-name", .type = YNL_PT_NUL_STR, }, - [DEVLINK_ATTR_REGION_SNAPSHOT_ID] = { .name = "region-snapshot-id", .type = YNL_PT_U32, }, - [DEVLINK_ATTR_REGION_CHUNK_ADDR] = { .name = "region-chunk-addr", .type = YNL_PT_U64, }, - [DEVLINK_ATTR_REGION_CHUNK_LEN] = { .name = "region-chunk-len", .type = YNL_PT_U64, }, - [DEVLINK_ATTR_INFO_DRIVER_NAME] = { .name = "info-driver-name", .type = YNL_PT_NUL_STR, }, - [DEVLINK_ATTR_INFO_SERIAL_NUMBER] = { .name = "info-serial-number", .type = YNL_PT_NUL_STR, }, - [DEVLINK_ATTR_INFO_VERSION_FIXED] = { .name = "info-version-fixed", .type = YNL_PT_NEST, .nest = &devlink_dl_info_version_nest, }, - [DEVLINK_ATTR_INFO_VERSION_RUNNING] = { .name = "info-version-running", .type = YNL_PT_NEST, .nest = &devlink_dl_info_version_nest, }, - [DEVLINK_ATTR_INFO_VERSION_STORED] = { .name = "info-version-stored", .type = YNL_PT_NEST, .nest = &devlink_dl_info_version_nest, }, - [DEVLINK_ATTR_INFO_VERSION_NAME] = { .name = "info-version-name", .type = YNL_PT_NUL_STR, }, - [DEVLINK_ATTR_INFO_VERSION_VALUE] = { .name = "info-version-value", .type = YNL_PT_NUL_STR, }, - [DEVLINK_ATTR_FMSG] = { .name = "fmsg", .type = YNL_PT_NEST, .nest = &devlink_dl_fmsg_nest, }, - [DEVLINK_ATTR_FMSG_OBJ_NEST_START] = { .name = "fmsg-obj-nest-start", .type = YNL_PT_FLAG, }, - [DEVLINK_ATTR_FMSG_PAIR_NEST_START] = { .name = "fmsg-pair-nest-start", .type = YNL_PT_FLAG, }, - [DEVLINK_ATTR_FMSG_ARR_NEST_START] = { .name = "fmsg-arr-nest-start", .type = YNL_PT_FLAG, }, - [DEVLINK_ATTR_FMSG_NEST_END] = { .name = "fmsg-nest-end", .type = YNL_PT_FLAG, }, - [DEVLINK_ATTR_FMSG_OBJ_NAME] = { .name = "fmsg-obj-name", .type = YNL_PT_NUL_STR, }, - [DEVLINK_ATTR_HEALTH_REPORTER_NAME] = { .name = "health-reporter-name", .type = YNL_PT_NUL_STR, }, - [DEVLINK_ATTR_HEALTH_REPORTER_GRACEFUL_PERIOD] = { .name = "health-reporter-graceful-period", .type = YNL_PT_U64, }, - [DEVLINK_ATTR_HEALTH_REPORTER_AUTO_RECOVER] = { .name = "health-reporter-auto-recover", .type = YNL_PT_U8, }, - [DEVLINK_ATTR_FLASH_UPDATE_FILE_NAME] = { .name = "flash-update-file-name", .type = YNL_PT_NUL_STR, }, - [DEVLINK_ATTR_FLASH_UPDATE_COMPONENT] = { .name = "flash-update-component", .type = YNL_PT_NUL_STR, }, - [DEVLINK_ATTR_PORT_PCI_PF_NUMBER] = { .name = "port-pci-pf-number", .type = YNL_PT_U16, }, - [DEVLINK_ATTR_TRAP_NAME] = { .name = "trap-name", .type = YNL_PT_NUL_STR, }, - [DEVLINK_ATTR_TRAP_ACTION] = { .name = "trap-action", .type = YNL_PT_U8, }, - [DEVLINK_ATTR_TRAP_GROUP_NAME] = { .name = "trap-group-name", .type = YNL_PT_NUL_STR, }, - [DEVLINK_ATTR_RELOAD_FAILED] = { .name = "reload-failed", .type = YNL_PT_U8, }, - [DEVLINK_ATTR_NETNS_FD] = { .name = "netns-fd", .type = YNL_PT_U32, }, - [DEVLINK_ATTR_NETNS_PID] = { .name = "netns-pid", .type = YNL_PT_U32, }, - [DEVLINK_ATTR_NETNS_ID] = { .name = "netns-id", .type = YNL_PT_U32, }, - [DEVLINK_ATTR_HEALTH_REPORTER_AUTO_DUMP] = { .name = "health-reporter-auto-dump", .type = YNL_PT_U8, }, - [DEVLINK_ATTR_TRAP_POLICER_ID] = { .name = "trap-policer-id", .type = YNL_PT_U32, }, - [DEVLINK_ATTR_TRAP_POLICER_RATE] = { .name = "trap-policer-rate", .type = YNL_PT_U64, }, - [DEVLINK_ATTR_TRAP_POLICER_BURST] = { .name = "trap-policer-burst", .type = YNL_PT_U64, }, - [DEVLINK_ATTR_PORT_FUNCTION] = { .name = "port-function", .type = YNL_PT_NEST, .nest = &devlink_dl_port_function_nest, }, - [DEVLINK_ATTR_PORT_CONTROLLER_NUMBER] = { .name = "port-controller-number", .type = YNL_PT_U32, }, - [DEVLINK_ATTR_FLASH_UPDATE_OVERWRITE_MASK] = { .name = "flash-update-overwrite-mask", .type = YNL_PT_BITFIELD32, }, - [DEVLINK_ATTR_RELOAD_ACTION] = { .name = "reload-action", .type = YNL_PT_U8, }, - [DEVLINK_ATTR_RELOAD_ACTIONS_PERFORMED] = { .name = "reload-actions-performed", .type = YNL_PT_BITFIELD32, }, - [DEVLINK_ATTR_RELOAD_LIMITS] = { .name = "reload-limits", .type = YNL_PT_BITFIELD32, }, - [DEVLINK_ATTR_DEV_STATS] = { .name = "dev-stats", .type = YNL_PT_NEST, .nest = &devlink_dl_dev_stats_nest, }, - [DEVLINK_ATTR_RELOAD_STATS] = { .name = "reload-stats", .type = YNL_PT_NEST, .nest = &devlink_dl_reload_stats_nest, }, - [DEVLINK_ATTR_RELOAD_STATS_ENTRY] = { .name = "reload-stats-entry", .type = YNL_PT_NEST, .nest = &devlink_dl_reload_stats_entry_nest, }, - [DEVLINK_ATTR_RELOAD_STATS_LIMIT] = { .name = "reload-stats-limit", .type = YNL_PT_U8, }, - [DEVLINK_ATTR_RELOAD_STATS_VALUE] = { .name = "reload-stats-value", .type = YNL_PT_U32, }, - [DEVLINK_ATTR_REMOTE_RELOAD_STATS] = { .name = "remote-reload-stats", .type = YNL_PT_NEST, .nest = &devlink_dl_reload_stats_nest, }, - [DEVLINK_ATTR_RELOAD_ACTION_INFO] = { .name = "reload-action-info", .type = YNL_PT_NEST, .nest = &devlink_dl_reload_act_info_nest, }, - [DEVLINK_ATTR_RELOAD_ACTION_STATS] = { .name = "reload-action-stats", .type = YNL_PT_NEST, .nest = &devlink_dl_reload_act_stats_nest, }, - [DEVLINK_ATTR_PORT_PCI_SF_NUMBER] = { .name = "port-pci-sf-number", .type = YNL_PT_U32, }, - [DEVLINK_ATTR_RATE_TX_SHARE] = { .name = "rate-tx-share", .type = YNL_PT_U64, }, - [DEVLINK_ATTR_RATE_TX_MAX] = { .name = "rate-tx-max", .type = YNL_PT_U64, }, - [DEVLINK_ATTR_RATE_NODE_NAME] = { .name = "rate-node-name", .type = YNL_PT_NUL_STR, }, - [DEVLINK_ATTR_RATE_PARENT_NODE_NAME] = { .name = "rate-parent-node-name", .type = YNL_PT_NUL_STR, }, - [DEVLINK_ATTR_LINECARD_INDEX] = { .name = "linecard-index", .type = YNL_PT_U32, }, - [DEVLINK_ATTR_LINECARD_TYPE] = { .name = "linecard-type", .type = YNL_PT_NUL_STR, }, - [DEVLINK_ATTR_SELFTESTS] = { .name = "selftests", .type = YNL_PT_NEST, .nest = &devlink_dl_selftest_id_nest, }, - [DEVLINK_ATTR_RATE_TX_PRIORITY] = { .name = "rate-tx-priority", .type = YNL_PT_U32, }, - [DEVLINK_ATTR_RATE_TX_WEIGHT] = { .name = "rate-tx-weight", .type = YNL_PT_U32, }, - [DEVLINK_ATTR_REGION_DIRECT] = { .name = "region-direct", .type = YNL_PT_FLAG, }, -}; - -struct ynl_policy_nest devlink_nest = { - .max_attr = DEVLINK_ATTR_MAX, - .table = devlink_policy, -}; - -/* Common nested types */ -void devlink_dl_dpipe_match_free(struct devlink_dl_dpipe_match *obj) -{ -} - -int devlink_dl_dpipe_match_parse(struct ynl_parse_arg *yarg, - const struct nlattr *nested) -{ - struct devlink_dl_dpipe_match *dst = yarg->data; - const struct nlattr *attr; - - mnl_attr_for_each_nested(attr, nested) { - unsigned int type = mnl_attr_get_type(attr); - - if (type == DEVLINK_ATTR_DPIPE_MATCH_TYPE) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.dpipe_match_type = 1; - dst->dpipe_match_type = mnl_attr_get_u32(attr); - } else if (type == DEVLINK_ATTR_DPIPE_HEADER_ID) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.dpipe_header_id = 1; - dst->dpipe_header_id = mnl_attr_get_u32(attr); - } else if (type == DEVLINK_ATTR_DPIPE_HEADER_GLOBAL) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.dpipe_header_global = 1; - dst->dpipe_header_global = mnl_attr_get_u8(attr); - } else if (type == DEVLINK_ATTR_DPIPE_HEADER_INDEX) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.dpipe_header_index = 1; - dst->dpipe_header_index = mnl_attr_get_u32(attr); - } else if (type == DEVLINK_ATTR_DPIPE_FIELD_ID) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.dpipe_field_id = 1; - dst->dpipe_field_id = mnl_attr_get_u32(attr); - } - } - - return 0; -} - -void -devlink_dl_dpipe_match_value_free(struct devlink_dl_dpipe_match_value *obj) -{ - unsigned int i; - - for (i = 0; i < obj->n_dpipe_match; i++) - devlink_dl_dpipe_match_free(&obj->dpipe_match[i]); - free(obj->dpipe_match); - free(obj->dpipe_value); - free(obj->dpipe_value_mask); -} - -int devlink_dl_dpipe_match_value_parse(struct ynl_parse_arg *yarg, - const struct nlattr *nested) -{ - struct devlink_dl_dpipe_match_value *dst = yarg->data; - unsigned int n_dpipe_match = 0; - const struct nlattr *attr; - struct ynl_parse_arg parg; - int i; - - parg.ys = yarg->ys; - - if (dst->dpipe_match) - return ynl_error_parse(yarg, "attribute already present (dl-dpipe-match-value.dpipe-match)"); - - mnl_attr_for_each_nested(attr, nested) { - unsigned int type = mnl_attr_get_type(attr); - - if (type == DEVLINK_ATTR_DPIPE_MATCH) { - n_dpipe_match++; - } else if (type == DEVLINK_ATTR_DPIPE_VALUE) { - unsigned int len; - - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - - len = mnl_attr_get_payload_len(attr); - dst->_present.dpipe_value_len = len; - dst->dpipe_value = malloc(len); - memcpy(dst->dpipe_value, mnl_attr_get_payload(attr), len); - } else if (type == DEVLINK_ATTR_DPIPE_VALUE_MASK) { - unsigned int len; - - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - - len = mnl_attr_get_payload_len(attr); - dst->_present.dpipe_value_mask_len = len; - dst->dpipe_value_mask = malloc(len); - memcpy(dst->dpipe_value_mask, mnl_attr_get_payload(attr), len); - } else if (type == DEVLINK_ATTR_DPIPE_VALUE_MAPPING) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.dpipe_value_mapping = 1; - dst->dpipe_value_mapping = mnl_attr_get_u32(attr); - } - } - - if (n_dpipe_match) { - dst->dpipe_match = calloc(n_dpipe_match, sizeof(*dst->dpipe_match)); - dst->n_dpipe_match = n_dpipe_match; - i = 0; - parg.rsp_policy = &devlink_dl_dpipe_match_nest; - mnl_attr_for_each_nested(attr, nested) { - if (mnl_attr_get_type(attr) == DEVLINK_ATTR_DPIPE_MATCH) { - parg.data = &dst->dpipe_match[i]; - if (devlink_dl_dpipe_match_parse(&parg, attr)) - return MNL_CB_ERROR; - i++; - } - } - } - - return 0; -} - -void devlink_dl_dpipe_action_free(struct devlink_dl_dpipe_action *obj) -{ -} - -int devlink_dl_dpipe_action_parse(struct ynl_parse_arg *yarg, - const struct nlattr *nested) -{ - struct devlink_dl_dpipe_action *dst = yarg->data; - const struct nlattr *attr; - - mnl_attr_for_each_nested(attr, nested) { - unsigned int type = mnl_attr_get_type(attr); - - if (type == DEVLINK_ATTR_DPIPE_ACTION_TYPE) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.dpipe_action_type = 1; - dst->dpipe_action_type = mnl_attr_get_u32(attr); - } else if (type == DEVLINK_ATTR_DPIPE_HEADER_ID) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.dpipe_header_id = 1; - dst->dpipe_header_id = mnl_attr_get_u32(attr); - } else if (type == DEVLINK_ATTR_DPIPE_HEADER_GLOBAL) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.dpipe_header_global = 1; - dst->dpipe_header_global = mnl_attr_get_u8(attr); - } else if (type == DEVLINK_ATTR_DPIPE_HEADER_INDEX) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.dpipe_header_index = 1; - dst->dpipe_header_index = mnl_attr_get_u32(attr); - } else if (type == DEVLINK_ATTR_DPIPE_FIELD_ID) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.dpipe_field_id = 1; - dst->dpipe_field_id = mnl_attr_get_u32(attr); - } - } - - return 0; -} - -void -devlink_dl_dpipe_action_value_free(struct devlink_dl_dpipe_action_value *obj) -{ - unsigned int i; - - for (i = 0; i < obj->n_dpipe_action; i++) - devlink_dl_dpipe_action_free(&obj->dpipe_action[i]); - free(obj->dpipe_action); - free(obj->dpipe_value); - free(obj->dpipe_value_mask); -} - -int devlink_dl_dpipe_action_value_parse(struct ynl_parse_arg *yarg, - const struct nlattr *nested) -{ - struct devlink_dl_dpipe_action_value *dst = yarg->data; - unsigned int n_dpipe_action = 0; - const struct nlattr *attr; - struct ynl_parse_arg parg; - int i; - - parg.ys = yarg->ys; - - if (dst->dpipe_action) - return ynl_error_parse(yarg, "attribute already present (dl-dpipe-action-value.dpipe-action)"); - - mnl_attr_for_each_nested(attr, nested) { - unsigned int type = mnl_attr_get_type(attr); - - if (type == DEVLINK_ATTR_DPIPE_ACTION) { - n_dpipe_action++; - } else if (type == DEVLINK_ATTR_DPIPE_VALUE) { - unsigned int len; - - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - - len = mnl_attr_get_payload_len(attr); - dst->_present.dpipe_value_len = len; - dst->dpipe_value = malloc(len); - memcpy(dst->dpipe_value, mnl_attr_get_payload(attr), len); - } else if (type == DEVLINK_ATTR_DPIPE_VALUE_MASK) { - unsigned int len; - - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - - len = mnl_attr_get_payload_len(attr); - dst->_present.dpipe_value_mask_len = len; - dst->dpipe_value_mask = malloc(len); - memcpy(dst->dpipe_value_mask, mnl_attr_get_payload(attr), len); - } else if (type == DEVLINK_ATTR_DPIPE_VALUE_MAPPING) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.dpipe_value_mapping = 1; - dst->dpipe_value_mapping = mnl_attr_get_u32(attr); - } - } - - if (n_dpipe_action) { - dst->dpipe_action = calloc(n_dpipe_action, sizeof(*dst->dpipe_action)); - dst->n_dpipe_action = n_dpipe_action; - i = 0; - parg.rsp_policy = &devlink_dl_dpipe_action_nest; - mnl_attr_for_each_nested(attr, nested) { - if (mnl_attr_get_type(attr) == DEVLINK_ATTR_DPIPE_ACTION) { - parg.data = &dst->dpipe_action[i]; - if (devlink_dl_dpipe_action_parse(&parg, attr)) - return MNL_CB_ERROR; - i++; - } - } - } - - return 0; -} - -void devlink_dl_dpipe_field_free(struct devlink_dl_dpipe_field *obj) -{ - free(obj->dpipe_field_name); -} - -int devlink_dl_dpipe_field_parse(struct ynl_parse_arg *yarg, - const struct nlattr *nested) -{ - struct devlink_dl_dpipe_field *dst = yarg->data; - const struct nlattr *attr; - - mnl_attr_for_each_nested(attr, nested) { - unsigned int type = mnl_attr_get_type(attr); - - if (type == DEVLINK_ATTR_DPIPE_FIELD_NAME) { - unsigned int len; - - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - - len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr)); - dst->_present.dpipe_field_name_len = len; - dst->dpipe_field_name = malloc(len + 1); - memcpy(dst->dpipe_field_name, mnl_attr_get_str(attr), len); - dst->dpipe_field_name[len] = 0; - } else if (type == DEVLINK_ATTR_DPIPE_FIELD_ID) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.dpipe_field_id = 1; - dst->dpipe_field_id = mnl_attr_get_u32(attr); - } else if (type == DEVLINK_ATTR_DPIPE_FIELD_BITWIDTH) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.dpipe_field_bitwidth = 1; - dst->dpipe_field_bitwidth = mnl_attr_get_u32(attr); - } else if (type == DEVLINK_ATTR_DPIPE_FIELD_MAPPING_TYPE) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.dpipe_field_mapping_type = 1; - dst->dpipe_field_mapping_type = mnl_attr_get_u32(attr); - } - } - - return 0; -} - -void devlink_dl_resource_free(struct devlink_dl_resource *obj) -{ - free(obj->resource_name); -} - -int devlink_dl_resource_parse(struct ynl_parse_arg *yarg, - const struct nlattr *nested) -{ - struct devlink_dl_resource *dst = yarg->data; - const struct nlattr *attr; - - mnl_attr_for_each_nested(attr, nested) { - unsigned int type = mnl_attr_get_type(attr); - - if (type == DEVLINK_ATTR_RESOURCE_NAME) { - unsigned int len; - - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - - len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr)); - dst->_present.resource_name_len = len; - dst->resource_name = malloc(len + 1); - memcpy(dst->resource_name, mnl_attr_get_str(attr), len); - dst->resource_name[len] = 0; - } else if (type == DEVLINK_ATTR_RESOURCE_ID) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.resource_id = 1; - dst->resource_id = mnl_attr_get_u64(attr); - } else if (type == DEVLINK_ATTR_RESOURCE_SIZE) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.resource_size = 1; - dst->resource_size = mnl_attr_get_u64(attr); - } else if (type == DEVLINK_ATTR_RESOURCE_SIZE_NEW) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.resource_size_new = 1; - dst->resource_size_new = mnl_attr_get_u64(attr); - } else if (type == DEVLINK_ATTR_RESOURCE_SIZE_VALID) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.resource_size_valid = 1; - dst->resource_size_valid = mnl_attr_get_u8(attr); - } else if (type == DEVLINK_ATTR_RESOURCE_SIZE_MIN) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.resource_size_min = 1; - dst->resource_size_min = mnl_attr_get_u64(attr); - } else if (type == DEVLINK_ATTR_RESOURCE_SIZE_MAX) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.resource_size_max = 1; - dst->resource_size_max = mnl_attr_get_u64(attr); - } else if (type == DEVLINK_ATTR_RESOURCE_SIZE_GRAN) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.resource_size_gran = 1; - dst->resource_size_gran = mnl_attr_get_u64(attr); - } else if (type == DEVLINK_ATTR_RESOURCE_UNIT) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.resource_unit = 1; - dst->resource_unit = mnl_attr_get_u8(attr); - } else if (type == DEVLINK_ATTR_RESOURCE_OCC) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.resource_occ = 1; - dst->resource_occ = mnl_attr_get_u64(attr); - } - } - - return 0; -} - -void devlink_dl_info_version_free(struct devlink_dl_info_version *obj) -{ - free(obj->info_version_name); - free(obj->info_version_value); -} - -int devlink_dl_info_version_parse(struct ynl_parse_arg *yarg, - const struct nlattr *nested) -{ - struct devlink_dl_info_version *dst = yarg->data; - const struct nlattr *attr; - - mnl_attr_for_each_nested(attr, nested) { - unsigned int type = mnl_attr_get_type(attr); - - if (type == DEVLINK_ATTR_INFO_VERSION_NAME) { - unsigned int len; - - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - - len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr)); - dst->_present.info_version_name_len = len; - dst->info_version_name = malloc(len + 1); - memcpy(dst->info_version_name, mnl_attr_get_str(attr), len); - dst->info_version_name[len] = 0; - } else if (type == DEVLINK_ATTR_INFO_VERSION_VALUE) { - unsigned int len; - - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - - len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr)); - dst->_present.info_version_value_len = len; - dst->info_version_value = malloc(len + 1); - memcpy(dst->info_version_value, mnl_attr_get_str(attr), len); - dst->info_version_value[len] = 0; - } - } - - return 0; -} - -void devlink_dl_fmsg_free(struct devlink_dl_fmsg *obj) -{ - free(obj->fmsg_obj_name); -} - -int devlink_dl_fmsg_parse(struct ynl_parse_arg *yarg, - const struct nlattr *nested) -{ - struct devlink_dl_fmsg *dst = yarg->data; - const struct nlattr *attr; - - mnl_attr_for_each_nested(attr, nested) { - unsigned int type = mnl_attr_get_type(attr); - - if (type == DEVLINK_ATTR_FMSG_OBJ_NEST_START) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.fmsg_obj_nest_start = 1; - } else if (type == DEVLINK_ATTR_FMSG_PAIR_NEST_START) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.fmsg_pair_nest_start = 1; - } else if (type == DEVLINK_ATTR_FMSG_ARR_NEST_START) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.fmsg_arr_nest_start = 1; - } else if (type == DEVLINK_ATTR_FMSG_NEST_END) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.fmsg_nest_end = 1; - } else if (type == DEVLINK_ATTR_FMSG_OBJ_NAME) { - unsigned int len; - - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - - len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr)); - dst->_present.fmsg_obj_name_len = len; - dst->fmsg_obj_name = malloc(len + 1); - memcpy(dst->fmsg_obj_name, mnl_attr_get_str(attr), len); - dst->fmsg_obj_name[len] = 0; - } - } - - return 0; -} - -void devlink_dl_port_function_free(struct devlink_dl_port_function *obj) -{ - free(obj->hw_addr); -} - -int devlink_dl_port_function_put(struct nlmsghdr *nlh, unsigned int attr_type, - struct devlink_dl_port_function *obj) -{ - struct nlattr *nest; - - nest = mnl_attr_nest_start(nlh, attr_type); - if (obj->_present.hw_addr_len) - mnl_attr_put(nlh, DEVLINK_PORT_FUNCTION_ATTR_HW_ADDR, obj->_present.hw_addr_len, obj->hw_addr); - if (obj->_present.state) - mnl_attr_put_u8(nlh, DEVLINK_PORT_FN_ATTR_STATE, obj->state); - if (obj->_present.opstate) - mnl_attr_put_u8(nlh, DEVLINK_PORT_FN_ATTR_OPSTATE, obj->opstate); - if (obj->_present.caps) - mnl_attr_put(nlh, DEVLINK_PORT_FN_ATTR_CAPS, sizeof(struct nla_bitfield32), &obj->caps); - mnl_attr_nest_end(nlh, nest); - - return 0; -} - -void -devlink_dl_reload_stats_entry_free(struct devlink_dl_reload_stats_entry *obj) -{ -} - -int devlink_dl_reload_stats_entry_parse(struct ynl_parse_arg *yarg, - const struct nlattr *nested) -{ - struct devlink_dl_reload_stats_entry *dst = yarg->data; - const struct nlattr *attr; - - mnl_attr_for_each_nested(attr, nested) { - unsigned int type = mnl_attr_get_type(attr); - - if (type == DEVLINK_ATTR_RELOAD_STATS_LIMIT) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.reload_stats_limit = 1; - dst->reload_stats_limit = mnl_attr_get_u8(attr); - } else if (type == DEVLINK_ATTR_RELOAD_STATS_VALUE) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.reload_stats_value = 1; - dst->reload_stats_value = mnl_attr_get_u32(attr); - } - } - - return 0; -} - -void devlink_dl_reload_act_stats_free(struct devlink_dl_reload_act_stats *obj) -{ - unsigned int i; - - for (i = 0; i < obj->n_reload_stats_entry; i++) - devlink_dl_reload_stats_entry_free(&obj->reload_stats_entry[i]); - free(obj->reload_stats_entry); -} - -int devlink_dl_reload_act_stats_parse(struct ynl_parse_arg *yarg, - const struct nlattr *nested) -{ - struct devlink_dl_reload_act_stats *dst = yarg->data; - unsigned int n_reload_stats_entry = 0; - const struct nlattr *attr; - struct ynl_parse_arg parg; - int i; - - parg.ys = yarg->ys; - - if (dst->reload_stats_entry) - return ynl_error_parse(yarg, "attribute already present (dl-reload-act-stats.reload-stats-entry)"); - - mnl_attr_for_each_nested(attr, nested) { - unsigned int type = mnl_attr_get_type(attr); - - if (type == DEVLINK_ATTR_RELOAD_STATS_ENTRY) { - n_reload_stats_entry++; - } - } - - if (n_reload_stats_entry) { - dst->reload_stats_entry = calloc(n_reload_stats_entry, sizeof(*dst->reload_stats_entry)); - dst->n_reload_stats_entry = n_reload_stats_entry; - i = 0; - parg.rsp_policy = &devlink_dl_reload_stats_entry_nest; - mnl_attr_for_each_nested(attr, nested) { - if (mnl_attr_get_type(attr) == DEVLINK_ATTR_RELOAD_STATS_ENTRY) { - parg.data = &dst->reload_stats_entry[i]; - if (devlink_dl_reload_stats_entry_parse(&parg, attr)) - return MNL_CB_ERROR; - i++; - } - } - } - - return 0; -} - -void devlink_dl_selftest_id_free(struct devlink_dl_selftest_id *obj) -{ -} - -int devlink_dl_selftest_id_put(struct nlmsghdr *nlh, unsigned int attr_type, - struct devlink_dl_selftest_id *obj) -{ - struct nlattr *nest; - - nest = mnl_attr_nest_start(nlh, attr_type); - if (obj->_present.flash) - mnl_attr_put(nlh, DEVLINK_ATTR_SELFTEST_ID_FLASH, 0, NULL); - mnl_attr_nest_end(nlh, nest); - - return 0; -} - -void -devlink_dl_dpipe_table_matches_free(struct devlink_dl_dpipe_table_matches *obj) -{ - unsigned int i; - - for (i = 0; i < obj->n_dpipe_match; i++) - devlink_dl_dpipe_match_free(&obj->dpipe_match[i]); - free(obj->dpipe_match); -} - -int devlink_dl_dpipe_table_matches_parse(struct ynl_parse_arg *yarg, - const struct nlattr *nested) -{ - struct devlink_dl_dpipe_table_matches *dst = yarg->data; - unsigned int n_dpipe_match = 0; - const struct nlattr *attr; - struct ynl_parse_arg parg; - int i; - - parg.ys = yarg->ys; - - if (dst->dpipe_match) - return ynl_error_parse(yarg, "attribute already present (dl-dpipe-table-matches.dpipe-match)"); - - mnl_attr_for_each_nested(attr, nested) { - unsigned int type = mnl_attr_get_type(attr); - - if (type == DEVLINK_ATTR_DPIPE_MATCH) { - n_dpipe_match++; - } - } - - if (n_dpipe_match) { - dst->dpipe_match = calloc(n_dpipe_match, sizeof(*dst->dpipe_match)); - dst->n_dpipe_match = n_dpipe_match; - i = 0; - parg.rsp_policy = &devlink_dl_dpipe_match_nest; - mnl_attr_for_each_nested(attr, nested) { - if (mnl_attr_get_type(attr) == DEVLINK_ATTR_DPIPE_MATCH) { - parg.data = &dst->dpipe_match[i]; - if (devlink_dl_dpipe_match_parse(&parg, attr)) - return MNL_CB_ERROR; - i++; - } - } - } - - return 0; -} - -void -devlink_dl_dpipe_table_actions_free(struct devlink_dl_dpipe_table_actions *obj) -{ - unsigned int i; - - for (i = 0; i < obj->n_dpipe_action; i++) - devlink_dl_dpipe_action_free(&obj->dpipe_action[i]); - free(obj->dpipe_action); -} - -int devlink_dl_dpipe_table_actions_parse(struct ynl_parse_arg *yarg, - const struct nlattr *nested) -{ - struct devlink_dl_dpipe_table_actions *dst = yarg->data; - unsigned int n_dpipe_action = 0; - const struct nlattr *attr; - struct ynl_parse_arg parg; - int i; - - parg.ys = yarg->ys; - - if (dst->dpipe_action) - return ynl_error_parse(yarg, "attribute already present (dl-dpipe-table-actions.dpipe-action)"); - - mnl_attr_for_each_nested(attr, nested) { - unsigned int type = mnl_attr_get_type(attr); - - if (type == DEVLINK_ATTR_DPIPE_ACTION) { - n_dpipe_action++; - } - } - - if (n_dpipe_action) { - dst->dpipe_action = calloc(n_dpipe_action, sizeof(*dst->dpipe_action)); - dst->n_dpipe_action = n_dpipe_action; - i = 0; - parg.rsp_policy = &devlink_dl_dpipe_action_nest; - mnl_attr_for_each_nested(attr, nested) { - if (mnl_attr_get_type(attr) == DEVLINK_ATTR_DPIPE_ACTION) { - parg.data = &dst->dpipe_action[i]; - if (devlink_dl_dpipe_action_parse(&parg, attr)) - return MNL_CB_ERROR; - i++; - } - } - } - - return 0; -} - -void -devlink_dl_dpipe_entry_match_values_free(struct devlink_dl_dpipe_entry_match_values *obj) -{ - unsigned int i; - - for (i = 0; i < obj->n_dpipe_match_value; i++) - devlink_dl_dpipe_match_value_free(&obj->dpipe_match_value[i]); - free(obj->dpipe_match_value); -} - -int devlink_dl_dpipe_entry_match_values_parse(struct ynl_parse_arg *yarg, - const struct nlattr *nested) -{ - struct devlink_dl_dpipe_entry_match_values *dst = yarg->data; - unsigned int n_dpipe_match_value = 0; - const struct nlattr *attr; - struct ynl_parse_arg parg; - int i; - - parg.ys = yarg->ys; - - if (dst->dpipe_match_value) - return ynl_error_parse(yarg, "attribute already present (dl-dpipe-entry-match-values.dpipe-match-value)"); - - mnl_attr_for_each_nested(attr, nested) { - unsigned int type = mnl_attr_get_type(attr); - - if (type == DEVLINK_ATTR_DPIPE_MATCH_VALUE) { - n_dpipe_match_value++; - } - } - - if (n_dpipe_match_value) { - dst->dpipe_match_value = calloc(n_dpipe_match_value, sizeof(*dst->dpipe_match_value)); - dst->n_dpipe_match_value = n_dpipe_match_value; - i = 0; - parg.rsp_policy = &devlink_dl_dpipe_match_value_nest; - mnl_attr_for_each_nested(attr, nested) { - if (mnl_attr_get_type(attr) == DEVLINK_ATTR_DPIPE_MATCH_VALUE) { - parg.data = &dst->dpipe_match_value[i]; - if (devlink_dl_dpipe_match_value_parse(&parg, attr)) - return MNL_CB_ERROR; - i++; - } - } - } - - return 0; -} - -void -devlink_dl_dpipe_entry_action_values_free(struct devlink_dl_dpipe_entry_action_values *obj) -{ - unsigned int i; - - for (i = 0; i < obj->n_dpipe_action_value; i++) - devlink_dl_dpipe_action_value_free(&obj->dpipe_action_value[i]); - free(obj->dpipe_action_value); -} - -int devlink_dl_dpipe_entry_action_values_parse(struct ynl_parse_arg *yarg, - const struct nlattr *nested) -{ - struct devlink_dl_dpipe_entry_action_values *dst = yarg->data; - unsigned int n_dpipe_action_value = 0; - const struct nlattr *attr; - struct ynl_parse_arg parg; - int i; - - parg.ys = yarg->ys; - - if (dst->dpipe_action_value) - return ynl_error_parse(yarg, "attribute already present (dl-dpipe-entry-action-values.dpipe-action-value)"); - - mnl_attr_for_each_nested(attr, nested) { - unsigned int type = mnl_attr_get_type(attr); - - if (type == DEVLINK_ATTR_DPIPE_ACTION_VALUE) { - n_dpipe_action_value++; - } - } - - if (n_dpipe_action_value) { - dst->dpipe_action_value = calloc(n_dpipe_action_value, sizeof(*dst->dpipe_action_value)); - dst->n_dpipe_action_value = n_dpipe_action_value; - i = 0; - parg.rsp_policy = &devlink_dl_dpipe_action_value_nest; - mnl_attr_for_each_nested(attr, nested) { - if (mnl_attr_get_type(attr) == DEVLINK_ATTR_DPIPE_ACTION_VALUE) { - parg.data = &dst->dpipe_action_value[i]; - if (devlink_dl_dpipe_action_value_parse(&parg, attr)) - return MNL_CB_ERROR; - i++; - } - } - } - - return 0; -} - -void -devlink_dl_dpipe_header_fields_free(struct devlink_dl_dpipe_header_fields *obj) -{ - unsigned int i; - - for (i = 0; i < obj->n_dpipe_field; i++) - devlink_dl_dpipe_field_free(&obj->dpipe_field[i]); - free(obj->dpipe_field); -} - -int devlink_dl_dpipe_header_fields_parse(struct ynl_parse_arg *yarg, - const struct nlattr *nested) -{ - struct devlink_dl_dpipe_header_fields *dst = yarg->data; - unsigned int n_dpipe_field = 0; - const struct nlattr *attr; - struct ynl_parse_arg parg; - int i; - - parg.ys = yarg->ys; - - if (dst->dpipe_field) - return ynl_error_parse(yarg, "attribute already present (dl-dpipe-header-fields.dpipe-field)"); - - mnl_attr_for_each_nested(attr, nested) { - unsigned int type = mnl_attr_get_type(attr); - - if (type == DEVLINK_ATTR_DPIPE_FIELD) { - n_dpipe_field++; - } - } - - if (n_dpipe_field) { - dst->dpipe_field = calloc(n_dpipe_field, sizeof(*dst->dpipe_field)); - dst->n_dpipe_field = n_dpipe_field; - i = 0; - parg.rsp_policy = &devlink_dl_dpipe_field_nest; - mnl_attr_for_each_nested(attr, nested) { - if (mnl_attr_get_type(attr) == DEVLINK_ATTR_DPIPE_FIELD) { - parg.data = &dst->dpipe_field[i]; - if (devlink_dl_dpipe_field_parse(&parg, attr)) - return MNL_CB_ERROR; - i++; - } - } - } - - return 0; -} - -void devlink_dl_resource_list_free(struct devlink_dl_resource_list *obj) -{ - unsigned int i; - - for (i = 0; i < obj->n_resource; i++) - devlink_dl_resource_free(&obj->resource[i]); - free(obj->resource); -} - -int devlink_dl_resource_list_parse(struct ynl_parse_arg *yarg, - const struct nlattr *nested) -{ - struct devlink_dl_resource_list *dst = yarg->data; - unsigned int n_resource = 0; - const struct nlattr *attr; - struct ynl_parse_arg parg; - int i; - - parg.ys = yarg->ys; - - if (dst->resource) - return ynl_error_parse(yarg, "attribute already present (dl-resource-list.resource)"); - - mnl_attr_for_each_nested(attr, nested) { - unsigned int type = mnl_attr_get_type(attr); - - if (type == DEVLINK_ATTR_RESOURCE) { - n_resource++; - } - } - - if (n_resource) { - dst->resource = calloc(n_resource, sizeof(*dst->resource)); - dst->n_resource = n_resource; - i = 0; - parg.rsp_policy = &devlink_dl_resource_nest; - mnl_attr_for_each_nested(attr, nested) { - if (mnl_attr_get_type(attr) == DEVLINK_ATTR_RESOURCE) { - parg.data = &dst->resource[i]; - if (devlink_dl_resource_parse(&parg, attr)) - return MNL_CB_ERROR; - i++; - } - } - } - - return 0; -} - -void devlink_dl_reload_act_info_free(struct devlink_dl_reload_act_info *obj) -{ - unsigned int i; - - for (i = 0; i < obj->n_reload_action_stats; i++) - devlink_dl_reload_act_stats_free(&obj->reload_action_stats[i]); - free(obj->reload_action_stats); -} - -int devlink_dl_reload_act_info_parse(struct ynl_parse_arg *yarg, - const struct nlattr *nested) -{ - struct devlink_dl_reload_act_info *dst = yarg->data; - unsigned int n_reload_action_stats = 0; - const struct nlattr *attr; - struct ynl_parse_arg parg; - int i; - - parg.ys = yarg->ys; - - if (dst->reload_action_stats) - return ynl_error_parse(yarg, "attribute already present (dl-reload-act-info.reload-action-stats)"); - - mnl_attr_for_each_nested(attr, nested) { - unsigned int type = mnl_attr_get_type(attr); - - if (type == DEVLINK_ATTR_RELOAD_ACTION) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.reload_action = 1; - dst->reload_action = mnl_attr_get_u8(attr); - } else if (type == DEVLINK_ATTR_RELOAD_ACTION_STATS) { - n_reload_action_stats++; - } - } - - if (n_reload_action_stats) { - dst->reload_action_stats = calloc(n_reload_action_stats, sizeof(*dst->reload_action_stats)); - dst->n_reload_action_stats = n_reload_action_stats; - i = 0; - parg.rsp_policy = &devlink_dl_reload_act_stats_nest; - mnl_attr_for_each_nested(attr, nested) { - if (mnl_attr_get_type(attr) == DEVLINK_ATTR_RELOAD_ACTION_STATS) { - parg.data = &dst->reload_action_stats[i]; - if (devlink_dl_reload_act_stats_parse(&parg, attr)) - return MNL_CB_ERROR; - i++; - } - } - } - - return 0; -} - -void devlink_dl_dpipe_table_free(struct devlink_dl_dpipe_table *obj) -{ - free(obj->dpipe_table_name); - devlink_dl_dpipe_table_matches_free(&obj->dpipe_table_matches); - devlink_dl_dpipe_table_actions_free(&obj->dpipe_table_actions); -} - -int devlink_dl_dpipe_table_parse(struct ynl_parse_arg *yarg, - const struct nlattr *nested) -{ - struct devlink_dl_dpipe_table *dst = yarg->data; - const struct nlattr *attr; - struct ynl_parse_arg parg; - - parg.ys = yarg->ys; - - mnl_attr_for_each_nested(attr, nested) { - unsigned int type = mnl_attr_get_type(attr); - - if (type == DEVLINK_ATTR_DPIPE_TABLE_NAME) { - unsigned int len; - - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - - len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr)); - dst->_present.dpipe_table_name_len = len; - dst->dpipe_table_name = malloc(len + 1); - memcpy(dst->dpipe_table_name, mnl_attr_get_str(attr), len); - dst->dpipe_table_name[len] = 0; - } else if (type == DEVLINK_ATTR_DPIPE_TABLE_SIZE) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.dpipe_table_size = 1; - dst->dpipe_table_size = mnl_attr_get_u64(attr); - } else if (type == DEVLINK_ATTR_DPIPE_TABLE_MATCHES) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.dpipe_table_matches = 1; - - parg.rsp_policy = &devlink_dl_dpipe_table_matches_nest; - parg.data = &dst->dpipe_table_matches; - if (devlink_dl_dpipe_table_matches_parse(&parg, attr)) - return MNL_CB_ERROR; - } else if (type == DEVLINK_ATTR_DPIPE_TABLE_ACTIONS) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.dpipe_table_actions = 1; - - parg.rsp_policy = &devlink_dl_dpipe_table_actions_nest; - parg.data = &dst->dpipe_table_actions; - if (devlink_dl_dpipe_table_actions_parse(&parg, attr)) - return MNL_CB_ERROR; - } else if (type == DEVLINK_ATTR_DPIPE_TABLE_COUNTERS_ENABLED) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.dpipe_table_counters_enabled = 1; - dst->dpipe_table_counters_enabled = mnl_attr_get_u8(attr); - } else if (type == DEVLINK_ATTR_DPIPE_TABLE_RESOURCE_ID) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.dpipe_table_resource_id = 1; - dst->dpipe_table_resource_id = mnl_attr_get_u64(attr); - } else if (type == DEVLINK_ATTR_DPIPE_TABLE_RESOURCE_UNITS) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.dpipe_table_resource_units = 1; - dst->dpipe_table_resource_units = mnl_attr_get_u64(attr); - } - } - - return 0; -} - -void devlink_dl_dpipe_entry_free(struct devlink_dl_dpipe_entry *obj) -{ - devlink_dl_dpipe_entry_match_values_free(&obj->dpipe_entry_match_values); - devlink_dl_dpipe_entry_action_values_free(&obj->dpipe_entry_action_values); -} - -int devlink_dl_dpipe_entry_parse(struct ynl_parse_arg *yarg, - const struct nlattr *nested) -{ - struct devlink_dl_dpipe_entry *dst = yarg->data; - const struct nlattr *attr; - struct ynl_parse_arg parg; - - parg.ys = yarg->ys; - - mnl_attr_for_each_nested(attr, nested) { - unsigned int type = mnl_attr_get_type(attr); - - if (type == DEVLINK_ATTR_DPIPE_ENTRY_INDEX) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.dpipe_entry_index = 1; - dst->dpipe_entry_index = mnl_attr_get_u64(attr); - } else if (type == DEVLINK_ATTR_DPIPE_ENTRY_MATCH_VALUES) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.dpipe_entry_match_values = 1; - - parg.rsp_policy = &devlink_dl_dpipe_entry_match_values_nest; - parg.data = &dst->dpipe_entry_match_values; - if (devlink_dl_dpipe_entry_match_values_parse(&parg, attr)) - return MNL_CB_ERROR; - } else if (type == DEVLINK_ATTR_DPIPE_ENTRY_ACTION_VALUES) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.dpipe_entry_action_values = 1; - - parg.rsp_policy = &devlink_dl_dpipe_entry_action_values_nest; - parg.data = &dst->dpipe_entry_action_values; - if (devlink_dl_dpipe_entry_action_values_parse(&parg, attr)) - return MNL_CB_ERROR; - } else if (type == DEVLINK_ATTR_DPIPE_ENTRY_COUNTER) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.dpipe_entry_counter = 1; - dst->dpipe_entry_counter = mnl_attr_get_u64(attr); - } - } - - return 0; -} - -void devlink_dl_dpipe_header_free(struct devlink_dl_dpipe_header *obj) -{ - free(obj->dpipe_header_name); - devlink_dl_dpipe_header_fields_free(&obj->dpipe_header_fields); -} - -int devlink_dl_dpipe_header_parse(struct ynl_parse_arg *yarg, - const struct nlattr *nested) -{ - struct devlink_dl_dpipe_header *dst = yarg->data; - const struct nlattr *attr; - struct ynl_parse_arg parg; - - parg.ys = yarg->ys; - - mnl_attr_for_each_nested(attr, nested) { - unsigned int type = mnl_attr_get_type(attr); - - if (type == DEVLINK_ATTR_DPIPE_HEADER_NAME) { - unsigned int len; - - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - - len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr)); - dst->_present.dpipe_header_name_len = len; - dst->dpipe_header_name = malloc(len + 1); - memcpy(dst->dpipe_header_name, mnl_attr_get_str(attr), len); - dst->dpipe_header_name[len] = 0; - } else if (type == DEVLINK_ATTR_DPIPE_HEADER_ID) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.dpipe_header_id = 1; - dst->dpipe_header_id = mnl_attr_get_u32(attr); - } else if (type == DEVLINK_ATTR_DPIPE_HEADER_GLOBAL) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.dpipe_header_global = 1; - dst->dpipe_header_global = mnl_attr_get_u8(attr); - } else if (type == DEVLINK_ATTR_DPIPE_HEADER_FIELDS) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.dpipe_header_fields = 1; - - parg.rsp_policy = &devlink_dl_dpipe_header_fields_nest; - parg.data = &dst->dpipe_header_fields; - if (devlink_dl_dpipe_header_fields_parse(&parg, attr)) - return MNL_CB_ERROR; - } - } - - return 0; -} - -void devlink_dl_reload_stats_free(struct devlink_dl_reload_stats *obj) -{ - unsigned int i; - - for (i = 0; i < obj->n_reload_action_info; i++) - devlink_dl_reload_act_info_free(&obj->reload_action_info[i]); - free(obj->reload_action_info); -} - -int devlink_dl_reload_stats_parse(struct ynl_parse_arg *yarg, - const struct nlattr *nested) -{ - struct devlink_dl_reload_stats *dst = yarg->data; - unsigned int n_reload_action_info = 0; - const struct nlattr *attr; - struct ynl_parse_arg parg; - int i; - - parg.ys = yarg->ys; - - if (dst->reload_action_info) - return ynl_error_parse(yarg, "attribute already present (dl-reload-stats.reload-action-info)"); - - mnl_attr_for_each_nested(attr, nested) { - unsigned int type = mnl_attr_get_type(attr); - - if (type == DEVLINK_ATTR_RELOAD_ACTION_INFO) { - n_reload_action_info++; - } - } - - if (n_reload_action_info) { - dst->reload_action_info = calloc(n_reload_action_info, sizeof(*dst->reload_action_info)); - dst->n_reload_action_info = n_reload_action_info; - i = 0; - parg.rsp_policy = &devlink_dl_reload_act_info_nest; - mnl_attr_for_each_nested(attr, nested) { - if (mnl_attr_get_type(attr) == DEVLINK_ATTR_RELOAD_ACTION_INFO) { - parg.data = &dst->reload_action_info[i]; - if (devlink_dl_reload_act_info_parse(&parg, attr)) - return MNL_CB_ERROR; - i++; - } - } - } - - return 0; -} - -void devlink_dl_dpipe_tables_free(struct devlink_dl_dpipe_tables *obj) -{ - unsigned int i; - - for (i = 0; i < obj->n_dpipe_table; i++) - devlink_dl_dpipe_table_free(&obj->dpipe_table[i]); - free(obj->dpipe_table); -} - -int devlink_dl_dpipe_tables_parse(struct ynl_parse_arg *yarg, - const struct nlattr *nested) -{ - struct devlink_dl_dpipe_tables *dst = yarg->data; - unsigned int n_dpipe_table = 0; - const struct nlattr *attr; - struct ynl_parse_arg parg; - int i; - - parg.ys = yarg->ys; - - if (dst->dpipe_table) - return ynl_error_parse(yarg, "attribute already present (dl-dpipe-tables.dpipe-table)"); - - mnl_attr_for_each_nested(attr, nested) { - unsigned int type = mnl_attr_get_type(attr); - - if (type == DEVLINK_ATTR_DPIPE_TABLE) { - n_dpipe_table++; - } - } - - if (n_dpipe_table) { - dst->dpipe_table = calloc(n_dpipe_table, sizeof(*dst->dpipe_table)); - dst->n_dpipe_table = n_dpipe_table; - i = 0; - parg.rsp_policy = &devlink_dl_dpipe_table_nest; - mnl_attr_for_each_nested(attr, nested) { - if (mnl_attr_get_type(attr) == DEVLINK_ATTR_DPIPE_TABLE) { - parg.data = &dst->dpipe_table[i]; - if (devlink_dl_dpipe_table_parse(&parg, attr)) - return MNL_CB_ERROR; - i++; - } - } - } - - return 0; -} - -void devlink_dl_dpipe_entries_free(struct devlink_dl_dpipe_entries *obj) -{ - unsigned int i; - - for (i = 0; i < obj->n_dpipe_entry; i++) - devlink_dl_dpipe_entry_free(&obj->dpipe_entry[i]); - free(obj->dpipe_entry); -} - -int devlink_dl_dpipe_entries_parse(struct ynl_parse_arg *yarg, - const struct nlattr *nested) -{ - struct devlink_dl_dpipe_entries *dst = yarg->data; - unsigned int n_dpipe_entry = 0; - const struct nlattr *attr; - struct ynl_parse_arg parg; - int i; - - parg.ys = yarg->ys; - - if (dst->dpipe_entry) - return ynl_error_parse(yarg, "attribute already present (dl-dpipe-entries.dpipe-entry)"); - - mnl_attr_for_each_nested(attr, nested) { - unsigned int type = mnl_attr_get_type(attr); - - if (type == DEVLINK_ATTR_DPIPE_ENTRY) { - n_dpipe_entry++; - } - } - - if (n_dpipe_entry) { - dst->dpipe_entry = calloc(n_dpipe_entry, sizeof(*dst->dpipe_entry)); - dst->n_dpipe_entry = n_dpipe_entry; - i = 0; - parg.rsp_policy = &devlink_dl_dpipe_entry_nest; - mnl_attr_for_each_nested(attr, nested) { - if (mnl_attr_get_type(attr) == DEVLINK_ATTR_DPIPE_ENTRY) { - parg.data = &dst->dpipe_entry[i]; - if (devlink_dl_dpipe_entry_parse(&parg, attr)) - return MNL_CB_ERROR; - i++; - } - } - } - - return 0; -} - -void devlink_dl_dpipe_headers_free(struct devlink_dl_dpipe_headers *obj) -{ - unsigned int i; - - for (i = 0; i < obj->n_dpipe_header; i++) - devlink_dl_dpipe_header_free(&obj->dpipe_header[i]); - free(obj->dpipe_header); -} - -int devlink_dl_dpipe_headers_parse(struct ynl_parse_arg *yarg, - const struct nlattr *nested) -{ - struct devlink_dl_dpipe_headers *dst = yarg->data; - unsigned int n_dpipe_header = 0; - const struct nlattr *attr; - struct ynl_parse_arg parg; - int i; - - parg.ys = yarg->ys; - - if (dst->dpipe_header) - return ynl_error_parse(yarg, "attribute already present (dl-dpipe-headers.dpipe-header)"); - - mnl_attr_for_each_nested(attr, nested) { - unsigned int type = mnl_attr_get_type(attr); - - if (type == DEVLINK_ATTR_DPIPE_HEADER) { - n_dpipe_header++; - } - } - - if (n_dpipe_header) { - dst->dpipe_header = calloc(n_dpipe_header, sizeof(*dst->dpipe_header)); - dst->n_dpipe_header = n_dpipe_header; - i = 0; - parg.rsp_policy = &devlink_dl_dpipe_header_nest; - mnl_attr_for_each_nested(attr, nested) { - if (mnl_attr_get_type(attr) == DEVLINK_ATTR_DPIPE_HEADER) { - parg.data = &dst->dpipe_header[i]; - if (devlink_dl_dpipe_header_parse(&parg, attr)) - return MNL_CB_ERROR; - i++; - } - } - } - - return 0; -} - -void devlink_dl_dev_stats_free(struct devlink_dl_dev_stats *obj) -{ - devlink_dl_reload_stats_free(&obj->reload_stats); - devlink_dl_reload_stats_free(&obj->remote_reload_stats); -} - -int devlink_dl_dev_stats_parse(struct ynl_parse_arg *yarg, - const struct nlattr *nested) -{ - struct devlink_dl_dev_stats *dst = yarg->data; - const struct nlattr *attr; - struct ynl_parse_arg parg; - - parg.ys = yarg->ys; - - mnl_attr_for_each_nested(attr, nested) { - unsigned int type = mnl_attr_get_type(attr); - - if (type == DEVLINK_ATTR_RELOAD_STATS) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.reload_stats = 1; - - parg.rsp_policy = &devlink_dl_reload_stats_nest; - parg.data = &dst->reload_stats; - if (devlink_dl_reload_stats_parse(&parg, attr)) - return MNL_CB_ERROR; - } else if (type == DEVLINK_ATTR_REMOTE_RELOAD_STATS) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.remote_reload_stats = 1; - - parg.rsp_policy = &devlink_dl_reload_stats_nest; - parg.data = &dst->remote_reload_stats; - if (devlink_dl_reload_stats_parse(&parg, attr)) - return MNL_CB_ERROR; - } - } - - return 0; -} - -/* ============== DEVLINK_CMD_GET ============== */ -/* DEVLINK_CMD_GET - do */ -void devlink_get_req_free(struct devlink_get_req *req) -{ - free(req->bus_name); - free(req->dev_name); - free(req); -} - -void devlink_get_rsp_free(struct devlink_get_rsp *rsp) -{ - free(rsp->bus_name); - free(rsp->dev_name); - devlink_dl_dev_stats_free(&rsp->dev_stats); - free(rsp); -} - -int devlink_get_rsp_parse(const struct nlmsghdr *nlh, void *data) -{ - struct ynl_parse_arg *yarg = data; - struct devlink_get_rsp *dst; - const struct nlattr *attr; - struct ynl_parse_arg parg; - - dst = yarg->data; - parg.ys = yarg->ys; - - mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) { - unsigned int type = mnl_attr_get_type(attr); - - if (type == DEVLINK_ATTR_BUS_NAME) { - unsigned int len; - - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - - len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr)); - dst->_present.bus_name_len = len; - dst->bus_name = malloc(len + 1); - memcpy(dst->bus_name, mnl_attr_get_str(attr), len); - dst->bus_name[len] = 0; - } else if (type == DEVLINK_ATTR_DEV_NAME) { - unsigned int len; - - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - - len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr)); - dst->_present.dev_name_len = len; - dst->dev_name = malloc(len + 1); - memcpy(dst->dev_name, mnl_attr_get_str(attr), len); - dst->dev_name[len] = 0; - } else if (type == DEVLINK_ATTR_RELOAD_FAILED) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.reload_failed = 1; - dst->reload_failed = mnl_attr_get_u8(attr); - } else if (type == DEVLINK_ATTR_DEV_STATS) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.dev_stats = 1; - - parg.rsp_policy = &devlink_dl_dev_stats_nest; - parg.data = &dst->dev_stats; - if (devlink_dl_dev_stats_parse(&parg, attr)) - return MNL_CB_ERROR; - } - } - - return MNL_CB_OK; -} - -struct devlink_get_rsp * -devlink_get(struct ynl_sock *ys, struct devlink_get_req *req) -{ - struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; - struct devlink_get_rsp *rsp; - struct nlmsghdr *nlh; - int err; - - nlh = ynl_gemsg_start_req(ys, ys->family_id, DEVLINK_CMD_GET, 1); - ys->req_policy = &devlink_nest; - yrs.yarg.rsp_policy = &devlink_nest; - - if (req->_present.bus_name_len) - mnl_attr_put_strz(nlh, DEVLINK_ATTR_BUS_NAME, req->bus_name); - if (req->_present.dev_name_len) - mnl_attr_put_strz(nlh, DEVLINK_ATTR_DEV_NAME, req->dev_name); - - rsp = calloc(1, sizeof(*rsp)); - yrs.yarg.data = rsp; - yrs.cb = devlink_get_rsp_parse; - yrs.rsp_cmd = 3; - - err = ynl_exec(ys, nlh, &yrs); - if (err < 0) - goto err_free; - - return rsp; - -err_free: - devlink_get_rsp_free(rsp); - return NULL; -} - -/* DEVLINK_CMD_GET - dump */ -void devlink_get_list_free(struct devlink_get_list *rsp) -{ - struct devlink_get_list *next = rsp; - - while ((void *)next != YNL_LIST_END) { - rsp = next; - next = rsp->next; - - free(rsp->obj.bus_name); - free(rsp->obj.dev_name); - devlink_dl_dev_stats_free(&rsp->obj.dev_stats); - free(rsp); - } -} - -struct devlink_get_list *devlink_get_dump(struct ynl_sock *ys) -{ - struct ynl_dump_state yds = {}; - struct nlmsghdr *nlh; - int err; - - yds.ys = ys; - yds.alloc_sz = sizeof(struct devlink_get_list); - yds.cb = devlink_get_rsp_parse; - yds.rsp_cmd = 3; - yds.rsp_policy = &devlink_nest; - - nlh = ynl_gemsg_start_dump(ys, ys->family_id, DEVLINK_CMD_GET, 1); - - err = ynl_exec_dump(ys, nlh, &yds); - if (err < 0) - goto free_list; - - return yds.first; - -free_list: - devlink_get_list_free(yds.first); - return NULL; -} - -/* ============== DEVLINK_CMD_PORT_GET ============== */ -/* DEVLINK_CMD_PORT_GET - do */ -void devlink_port_get_req_free(struct devlink_port_get_req *req) -{ - free(req->bus_name); - free(req->dev_name); - free(req); -} - -void devlink_port_get_rsp_free(struct devlink_port_get_rsp *rsp) -{ - free(rsp->bus_name); - free(rsp->dev_name); - free(rsp); -} - -int devlink_port_get_rsp_parse(const struct nlmsghdr *nlh, void *data) -{ - struct ynl_parse_arg *yarg = data; - struct devlink_port_get_rsp *dst; - const struct nlattr *attr; - - dst = yarg->data; - - mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) { - unsigned int type = mnl_attr_get_type(attr); - - if (type == DEVLINK_ATTR_BUS_NAME) { - unsigned int len; - - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - - len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr)); - dst->_present.bus_name_len = len; - dst->bus_name = malloc(len + 1); - memcpy(dst->bus_name, mnl_attr_get_str(attr), len); - dst->bus_name[len] = 0; - } else if (type == DEVLINK_ATTR_DEV_NAME) { - unsigned int len; - - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - - len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr)); - dst->_present.dev_name_len = len; - dst->dev_name = malloc(len + 1); - memcpy(dst->dev_name, mnl_attr_get_str(attr), len); - dst->dev_name[len] = 0; - } else if (type == DEVLINK_ATTR_PORT_INDEX) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.port_index = 1; - dst->port_index = mnl_attr_get_u32(attr); - } - } - - return MNL_CB_OK; -} - -struct devlink_port_get_rsp * -devlink_port_get(struct ynl_sock *ys, struct devlink_port_get_req *req) -{ - struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; - struct devlink_port_get_rsp *rsp; - struct nlmsghdr *nlh; - int err; - - nlh = ynl_gemsg_start_req(ys, ys->family_id, DEVLINK_CMD_PORT_GET, 1); - ys->req_policy = &devlink_nest; - yrs.yarg.rsp_policy = &devlink_nest; - - if (req->_present.bus_name_len) - mnl_attr_put_strz(nlh, DEVLINK_ATTR_BUS_NAME, req->bus_name); - if (req->_present.dev_name_len) - mnl_attr_put_strz(nlh, DEVLINK_ATTR_DEV_NAME, req->dev_name); - if (req->_present.port_index) - mnl_attr_put_u32(nlh, DEVLINK_ATTR_PORT_INDEX, req->port_index); - - rsp = calloc(1, sizeof(*rsp)); - yrs.yarg.data = rsp; - yrs.cb = devlink_port_get_rsp_parse; - yrs.rsp_cmd = 7; - - err = ynl_exec(ys, nlh, &yrs); - if (err < 0) - goto err_free; - - return rsp; - -err_free: - devlink_port_get_rsp_free(rsp); - return NULL; -} - -/* DEVLINK_CMD_PORT_GET - dump */ -int devlink_port_get_rsp_dump_parse(const struct nlmsghdr *nlh, void *data) -{ - struct devlink_port_get_rsp_dump *dst; - struct ynl_parse_arg *yarg = data; - const struct nlattr *attr; - - dst = yarg->data; - - mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) { - unsigned int type = mnl_attr_get_type(attr); - - if (type == DEVLINK_ATTR_BUS_NAME) { - unsigned int len; - - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - - len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr)); - dst->_present.bus_name_len = len; - dst->bus_name = malloc(len + 1); - memcpy(dst->bus_name, mnl_attr_get_str(attr), len); - dst->bus_name[len] = 0; - } else if (type == DEVLINK_ATTR_DEV_NAME) { - unsigned int len; - - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - - len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr)); - dst->_present.dev_name_len = len; - dst->dev_name = malloc(len + 1); - memcpy(dst->dev_name, mnl_attr_get_str(attr), len); - dst->dev_name[len] = 0; - } else if (type == DEVLINK_ATTR_PORT_INDEX) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.port_index = 1; - dst->port_index = mnl_attr_get_u32(attr); - } - } - - return MNL_CB_OK; -} - -void devlink_port_get_rsp_list_free(struct devlink_port_get_rsp_list *rsp) -{ - struct devlink_port_get_rsp_list *next = rsp; - - while ((void *)next != YNL_LIST_END) { - rsp = next; - next = rsp->next; - - free(rsp->obj.bus_name); - free(rsp->obj.dev_name); - free(rsp); - } -} - -struct devlink_port_get_rsp_list * -devlink_port_get_dump(struct ynl_sock *ys, - struct devlink_port_get_req_dump *req) -{ - struct ynl_dump_state yds = {}; - struct nlmsghdr *nlh; - int err; - - yds.ys = ys; - yds.alloc_sz = sizeof(struct devlink_port_get_rsp_list); - yds.cb = devlink_port_get_rsp_dump_parse; - yds.rsp_cmd = 7; - yds.rsp_policy = &devlink_nest; - - nlh = ynl_gemsg_start_dump(ys, ys->family_id, DEVLINK_CMD_PORT_GET, 1); - ys->req_policy = &devlink_nest; - - if (req->_present.bus_name_len) - mnl_attr_put_strz(nlh, DEVLINK_ATTR_BUS_NAME, req->bus_name); - if (req->_present.dev_name_len) - mnl_attr_put_strz(nlh, DEVLINK_ATTR_DEV_NAME, req->dev_name); - - err = ynl_exec_dump(ys, nlh, &yds); - if (err < 0) - goto free_list; - - return yds.first; - -free_list: - devlink_port_get_rsp_list_free(yds.first); - return NULL; -} - -/* ============== DEVLINK_CMD_PORT_SET ============== */ -/* DEVLINK_CMD_PORT_SET - do */ -void devlink_port_set_req_free(struct devlink_port_set_req *req) -{ - free(req->bus_name); - free(req->dev_name); - devlink_dl_port_function_free(&req->port_function); - free(req); -} - -int devlink_port_set(struct ynl_sock *ys, struct devlink_port_set_req *req) -{ - struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; - struct nlmsghdr *nlh; - int err; - - nlh = ynl_gemsg_start_req(ys, ys->family_id, DEVLINK_CMD_PORT_SET, 1); - ys->req_policy = &devlink_nest; - - if (req->_present.bus_name_len) - mnl_attr_put_strz(nlh, DEVLINK_ATTR_BUS_NAME, req->bus_name); - if (req->_present.dev_name_len) - mnl_attr_put_strz(nlh, DEVLINK_ATTR_DEV_NAME, req->dev_name); - if (req->_present.port_index) - mnl_attr_put_u32(nlh, DEVLINK_ATTR_PORT_INDEX, req->port_index); - if (req->_present.port_type) - mnl_attr_put_u16(nlh, DEVLINK_ATTR_PORT_TYPE, req->port_type); - if (req->_present.port_function) - devlink_dl_port_function_put(nlh, DEVLINK_ATTR_PORT_FUNCTION, &req->port_function); - - err = ynl_exec(ys, nlh, &yrs); - if (err < 0) - return -1; - - return 0; -} - -/* ============== DEVLINK_CMD_PORT_NEW ============== */ -/* DEVLINK_CMD_PORT_NEW - do */ -void devlink_port_new_req_free(struct devlink_port_new_req *req) -{ - free(req->bus_name); - free(req->dev_name); - free(req); -} - -void devlink_port_new_rsp_free(struct devlink_port_new_rsp *rsp) -{ - free(rsp->bus_name); - free(rsp->dev_name); - free(rsp); -} - -int devlink_port_new_rsp_parse(const struct nlmsghdr *nlh, void *data) -{ - struct ynl_parse_arg *yarg = data; - struct devlink_port_new_rsp *dst; - const struct nlattr *attr; - - dst = yarg->data; - - mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) { - unsigned int type = mnl_attr_get_type(attr); - - if (type == DEVLINK_ATTR_BUS_NAME) { - unsigned int len; - - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - - len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr)); - dst->_present.bus_name_len = len; - dst->bus_name = malloc(len + 1); - memcpy(dst->bus_name, mnl_attr_get_str(attr), len); - dst->bus_name[len] = 0; - } else if (type == DEVLINK_ATTR_DEV_NAME) { - unsigned int len; - - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - - len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr)); - dst->_present.dev_name_len = len; - dst->dev_name = malloc(len + 1); - memcpy(dst->dev_name, mnl_attr_get_str(attr), len); - dst->dev_name[len] = 0; - } else if (type == DEVLINK_ATTR_PORT_INDEX) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.port_index = 1; - dst->port_index = mnl_attr_get_u32(attr); - } - } - - return MNL_CB_OK; -} - -struct devlink_port_new_rsp * -devlink_port_new(struct ynl_sock *ys, struct devlink_port_new_req *req) -{ - struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; - struct devlink_port_new_rsp *rsp; - struct nlmsghdr *nlh; - int err; - - nlh = ynl_gemsg_start_req(ys, ys->family_id, DEVLINK_CMD_PORT_NEW, 1); - ys->req_policy = &devlink_nest; - yrs.yarg.rsp_policy = &devlink_nest; - - if (req->_present.bus_name_len) - mnl_attr_put_strz(nlh, DEVLINK_ATTR_BUS_NAME, req->bus_name); - if (req->_present.dev_name_len) - mnl_attr_put_strz(nlh, DEVLINK_ATTR_DEV_NAME, req->dev_name); - if (req->_present.port_index) - mnl_attr_put_u32(nlh, DEVLINK_ATTR_PORT_INDEX, req->port_index); - if (req->_present.port_flavour) - mnl_attr_put_u16(nlh, DEVLINK_ATTR_PORT_FLAVOUR, req->port_flavour); - if (req->_present.port_pci_pf_number) - mnl_attr_put_u16(nlh, DEVLINK_ATTR_PORT_PCI_PF_NUMBER, req->port_pci_pf_number); - if (req->_present.port_pci_sf_number) - mnl_attr_put_u32(nlh, DEVLINK_ATTR_PORT_PCI_SF_NUMBER, req->port_pci_sf_number); - if (req->_present.port_controller_number) - mnl_attr_put_u32(nlh, DEVLINK_ATTR_PORT_CONTROLLER_NUMBER, req->port_controller_number); - - rsp = calloc(1, sizeof(*rsp)); - yrs.yarg.data = rsp; - yrs.cb = devlink_port_new_rsp_parse; - yrs.rsp_cmd = DEVLINK_CMD_PORT_NEW; - - err = ynl_exec(ys, nlh, &yrs); - if (err < 0) - goto err_free; - - return rsp; - -err_free: - devlink_port_new_rsp_free(rsp); - return NULL; -} - -/* ============== DEVLINK_CMD_PORT_DEL ============== */ -/* DEVLINK_CMD_PORT_DEL - do */ -void devlink_port_del_req_free(struct devlink_port_del_req *req) -{ - free(req->bus_name); - free(req->dev_name); - free(req); -} - -int devlink_port_del(struct ynl_sock *ys, struct devlink_port_del_req *req) -{ - struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; - struct nlmsghdr *nlh; - int err; - - nlh = ynl_gemsg_start_req(ys, ys->family_id, DEVLINK_CMD_PORT_DEL, 1); - ys->req_policy = &devlink_nest; - - if (req->_present.bus_name_len) - mnl_attr_put_strz(nlh, DEVLINK_ATTR_BUS_NAME, req->bus_name); - if (req->_present.dev_name_len) - mnl_attr_put_strz(nlh, DEVLINK_ATTR_DEV_NAME, req->dev_name); - if (req->_present.port_index) - mnl_attr_put_u32(nlh, DEVLINK_ATTR_PORT_INDEX, req->port_index); - - err = ynl_exec(ys, nlh, &yrs); - if (err < 0) - return -1; - - return 0; -} - -/* ============== DEVLINK_CMD_PORT_SPLIT ============== */ -/* DEVLINK_CMD_PORT_SPLIT - do */ -void devlink_port_split_req_free(struct devlink_port_split_req *req) -{ - free(req->bus_name); - free(req->dev_name); - free(req); -} - -int devlink_port_split(struct ynl_sock *ys, struct devlink_port_split_req *req) -{ - struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; - struct nlmsghdr *nlh; - int err; - - nlh = ynl_gemsg_start_req(ys, ys->family_id, DEVLINK_CMD_PORT_SPLIT, 1); - ys->req_policy = &devlink_nest; - - if (req->_present.bus_name_len) - mnl_attr_put_strz(nlh, DEVLINK_ATTR_BUS_NAME, req->bus_name); - if (req->_present.dev_name_len) - mnl_attr_put_strz(nlh, DEVLINK_ATTR_DEV_NAME, req->dev_name); - if (req->_present.port_index) - mnl_attr_put_u32(nlh, DEVLINK_ATTR_PORT_INDEX, req->port_index); - if (req->_present.port_split_count) - mnl_attr_put_u32(nlh, DEVLINK_ATTR_PORT_SPLIT_COUNT, req->port_split_count); - - err = ynl_exec(ys, nlh, &yrs); - if (err < 0) - return -1; - - return 0; -} - -/* ============== DEVLINK_CMD_PORT_UNSPLIT ============== */ -/* DEVLINK_CMD_PORT_UNSPLIT - do */ -void devlink_port_unsplit_req_free(struct devlink_port_unsplit_req *req) -{ - free(req->bus_name); - free(req->dev_name); - free(req); -} - -int devlink_port_unsplit(struct ynl_sock *ys, - struct devlink_port_unsplit_req *req) -{ - struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; - struct nlmsghdr *nlh; - int err; - - nlh = ynl_gemsg_start_req(ys, ys->family_id, DEVLINK_CMD_PORT_UNSPLIT, 1); - ys->req_policy = &devlink_nest; - - if (req->_present.bus_name_len) - mnl_attr_put_strz(nlh, DEVLINK_ATTR_BUS_NAME, req->bus_name); - if (req->_present.dev_name_len) - mnl_attr_put_strz(nlh, DEVLINK_ATTR_DEV_NAME, req->dev_name); - if (req->_present.port_index) - mnl_attr_put_u32(nlh, DEVLINK_ATTR_PORT_INDEX, req->port_index); - - err = ynl_exec(ys, nlh, &yrs); - if (err < 0) - return -1; - - return 0; -} - -/* ============== DEVLINK_CMD_SB_GET ============== */ -/* DEVLINK_CMD_SB_GET - do */ -void devlink_sb_get_req_free(struct devlink_sb_get_req *req) -{ - free(req->bus_name); - free(req->dev_name); - free(req); -} - -void devlink_sb_get_rsp_free(struct devlink_sb_get_rsp *rsp) -{ - free(rsp->bus_name); - free(rsp->dev_name); - free(rsp); -} - -int devlink_sb_get_rsp_parse(const struct nlmsghdr *nlh, void *data) -{ - struct ynl_parse_arg *yarg = data; - struct devlink_sb_get_rsp *dst; - const struct nlattr *attr; - - dst = yarg->data; - - mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) { - unsigned int type = mnl_attr_get_type(attr); - - if (type == DEVLINK_ATTR_BUS_NAME) { - unsigned int len; - - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - - len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr)); - dst->_present.bus_name_len = len; - dst->bus_name = malloc(len + 1); - memcpy(dst->bus_name, mnl_attr_get_str(attr), len); - dst->bus_name[len] = 0; - } else if (type == DEVLINK_ATTR_DEV_NAME) { - unsigned int len; - - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - - len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr)); - dst->_present.dev_name_len = len; - dst->dev_name = malloc(len + 1); - memcpy(dst->dev_name, mnl_attr_get_str(attr), len); - dst->dev_name[len] = 0; - } else if (type == DEVLINK_ATTR_SB_INDEX) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.sb_index = 1; - dst->sb_index = mnl_attr_get_u32(attr); - } - } - - return MNL_CB_OK; -} - -struct devlink_sb_get_rsp * -devlink_sb_get(struct ynl_sock *ys, struct devlink_sb_get_req *req) -{ - struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; - struct devlink_sb_get_rsp *rsp; - struct nlmsghdr *nlh; - int err; - - nlh = ynl_gemsg_start_req(ys, ys->family_id, DEVLINK_CMD_SB_GET, 1); - ys->req_policy = &devlink_nest; - yrs.yarg.rsp_policy = &devlink_nest; - - if (req->_present.bus_name_len) - mnl_attr_put_strz(nlh, DEVLINK_ATTR_BUS_NAME, req->bus_name); - if (req->_present.dev_name_len) - mnl_attr_put_strz(nlh, DEVLINK_ATTR_DEV_NAME, req->dev_name); - if (req->_present.sb_index) - mnl_attr_put_u32(nlh, DEVLINK_ATTR_SB_INDEX, req->sb_index); - - rsp = calloc(1, sizeof(*rsp)); - yrs.yarg.data = rsp; - yrs.cb = devlink_sb_get_rsp_parse; - yrs.rsp_cmd = 13; - - err = ynl_exec(ys, nlh, &yrs); - if (err < 0) - goto err_free; - - return rsp; - -err_free: - devlink_sb_get_rsp_free(rsp); - return NULL; -} - -/* DEVLINK_CMD_SB_GET - dump */ -void devlink_sb_get_list_free(struct devlink_sb_get_list *rsp) -{ - struct devlink_sb_get_list *next = rsp; - - while ((void *)next != YNL_LIST_END) { - rsp = next; - next = rsp->next; - - free(rsp->obj.bus_name); - free(rsp->obj.dev_name); - free(rsp); - } -} - -struct devlink_sb_get_list * -devlink_sb_get_dump(struct ynl_sock *ys, struct devlink_sb_get_req_dump *req) -{ - struct ynl_dump_state yds = {}; - struct nlmsghdr *nlh; - int err; - - yds.ys = ys; - yds.alloc_sz = sizeof(struct devlink_sb_get_list); - yds.cb = devlink_sb_get_rsp_parse; - yds.rsp_cmd = 13; - yds.rsp_policy = &devlink_nest; - - nlh = ynl_gemsg_start_dump(ys, ys->family_id, DEVLINK_CMD_SB_GET, 1); - ys->req_policy = &devlink_nest; - - if (req->_present.bus_name_len) - mnl_attr_put_strz(nlh, DEVLINK_ATTR_BUS_NAME, req->bus_name); - if (req->_present.dev_name_len) - mnl_attr_put_strz(nlh, DEVLINK_ATTR_DEV_NAME, req->dev_name); - - err = ynl_exec_dump(ys, nlh, &yds); - if (err < 0) - goto free_list; - - return yds.first; - -free_list: - devlink_sb_get_list_free(yds.first); - return NULL; -} - -/* ============== DEVLINK_CMD_SB_POOL_GET ============== */ -/* DEVLINK_CMD_SB_POOL_GET - do */ -void devlink_sb_pool_get_req_free(struct devlink_sb_pool_get_req *req) -{ - free(req->bus_name); - free(req->dev_name); - free(req); -} - -void devlink_sb_pool_get_rsp_free(struct devlink_sb_pool_get_rsp *rsp) -{ - free(rsp->bus_name); - free(rsp->dev_name); - free(rsp); -} - -int devlink_sb_pool_get_rsp_parse(const struct nlmsghdr *nlh, void *data) -{ - struct devlink_sb_pool_get_rsp *dst; - struct ynl_parse_arg *yarg = data; - const struct nlattr *attr; - - dst = yarg->data; - - mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) { - unsigned int type = mnl_attr_get_type(attr); - - if (type == DEVLINK_ATTR_BUS_NAME) { - unsigned int len; - - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - - len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr)); - dst->_present.bus_name_len = len; - dst->bus_name = malloc(len + 1); - memcpy(dst->bus_name, mnl_attr_get_str(attr), len); - dst->bus_name[len] = 0; - } else if (type == DEVLINK_ATTR_DEV_NAME) { - unsigned int len; - - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - - len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr)); - dst->_present.dev_name_len = len; - dst->dev_name = malloc(len + 1); - memcpy(dst->dev_name, mnl_attr_get_str(attr), len); - dst->dev_name[len] = 0; - } else if (type == DEVLINK_ATTR_SB_INDEX) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.sb_index = 1; - dst->sb_index = mnl_attr_get_u32(attr); - } else if (type == DEVLINK_ATTR_SB_POOL_INDEX) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.sb_pool_index = 1; - dst->sb_pool_index = mnl_attr_get_u16(attr); - } - } - - return MNL_CB_OK; -} - -struct devlink_sb_pool_get_rsp * -devlink_sb_pool_get(struct ynl_sock *ys, struct devlink_sb_pool_get_req *req) -{ - struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; - struct devlink_sb_pool_get_rsp *rsp; - struct nlmsghdr *nlh; - int err; - - nlh = ynl_gemsg_start_req(ys, ys->family_id, DEVLINK_CMD_SB_POOL_GET, 1); - ys->req_policy = &devlink_nest; - yrs.yarg.rsp_policy = &devlink_nest; - - if (req->_present.bus_name_len) - mnl_attr_put_strz(nlh, DEVLINK_ATTR_BUS_NAME, req->bus_name); - if (req->_present.dev_name_len) - mnl_attr_put_strz(nlh, DEVLINK_ATTR_DEV_NAME, req->dev_name); - if (req->_present.sb_index) - mnl_attr_put_u32(nlh, DEVLINK_ATTR_SB_INDEX, req->sb_index); - if (req->_present.sb_pool_index) - mnl_attr_put_u16(nlh, DEVLINK_ATTR_SB_POOL_INDEX, req->sb_pool_index); - - rsp = calloc(1, sizeof(*rsp)); - yrs.yarg.data = rsp; - yrs.cb = devlink_sb_pool_get_rsp_parse; - yrs.rsp_cmd = 17; - - err = ynl_exec(ys, nlh, &yrs); - if (err < 0) - goto err_free; - - return rsp; - -err_free: - devlink_sb_pool_get_rsp_free(rsp); - return NULL; -} - -/* DEVLINK_CMD_SB_POOL_GET - dump */ -void devlink_sb_pool_get_list_free(struct devlink_sb_pool_get_list *rsp) -{ - struct devlink_sb_pool_get_list *next = rsp; - - while ((void *)next != YNL_LIST_END) { - rsp = next; - next = rsp->next; - - free(rsp->obj.bus_name); - free(rsp->obj.dev_name); - free(rsp); - } -} - -struct devlink_sb_pool_get_list * -devlink_sb_pool_get_dump(struct ynl_sock *ys, - struct devlink_sb_pool_get_req_dump *req) -{ - struct ynl_dump_state yds = {}; - struct nlmsghdr *nlh; - int err; - - yds.ys = ys; - yds.alloc_sz = sizeof(struct devlink_sb_pool_get_list); - yds.cb = devlink_sb_pool_get_rsp_parse; - yds.rsp_cmd = 17; - yds.rsp_policy = &devlink_nest; - - nlh = ynl_gemsg_start_dump(ys, ys->family_id, DEVLINK_CMD_SB_POOL_GET, 1); - ys->req_policy = &devlink_nest; - - if (req->_present.bus_name_len) - mnl_attr_put_strz(nlh, DEVLINK_ATTR_BUS_NAME, req->bus_name); - if (req->_present.dev_name_len) - mnl_attr_put_strz(nlh, DEVLINK_ATTR_DEV_NAME, req->dev_name); - - err = ynl_exec_dump(ys, nlh, &yds); - if (err < 0) - goto free_list; - - return yds.first; - -free_list: - devlink_sb_pool_get_list_free(yds.first); - return NULL; -} - -/* ============== DEVLINK_CMD_SB_POOL_SET ============== */ -/* DEVLINK_CMD_SB_POOL_SET - do */ -void devlink_sb_pool_set_req_free(struct devlink_sb_pool_set_req *req) -{ - free(req->bus_name); - free(req->dev_name); - free(req); -} - -int devlink_sb_pool_set(struct ynl_sock *ys, - struct devlink_sb_pool_set_req *req) -{ - struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; - struct nlmsghdr *nlh; - int err; - - nlh = ynl_gemsg_start_req(ys, ys->family_id, DEVLINK_CMD_SB_POOL_SET, 1); - ys->req_policy = &devlink_nest; - - if (req->_present.bus_name_len) - mnl_attr_put_strz(nlh, DEVLINK_ATTR_BUS_NAME, req->bus_name); - if (req->_present.dev_name_len) - mnl_attr_put_strz(nlh, DEVLINK_ATTR_DEV_NAME, req->dev_name); - if (req->_present.sb_index) - mnl_attr_put_u32(nlh, DEVLINK_ATTR_SB_INDEX, req->sb_index); - if (req->_present.sb_pool_index) - mnl_attr_put_u16(nlh, DEVLINK_ATTR_SB_POOL_INDEX, req->sb_pool_index); - if (req->_present.sb_pool_threshold_type) - mnl_attr_put_u8(nlh, DEVLINK_ATTR_SB_POOL_THRESHOLD_TYPE, req->sb_pool_threshold_type); - if (req->_present.sb_pool_size) - mnl_attr_put_u32(nlh, DEVLINK_ATTR_SB_POOL_SIZE, req->sb_pool_size); - - err = ynl_exec(ys, nlh, &yrs); - if (err < 0) - return -1; - - return 0; -} - -/* ============== DEVLINK_CMD_SB_PORT_POOL_GET ============== */ -/* DEVLINK_CMD_SB_PORT_POOL_GET - do */ -void -devlink_sb_port_pool_get_req_free(struct devlink_sb_port_pool_get_req *req) -{ - free(req->bus_name); - free(req->dev_name); - free(req); -} - -void -devlink_sb_port_pool_get_rsp_free(struct devlink_sb_port_pool_get_rsp *rsp) -{ - free(rsp->bus_name); - free(rsp->dev_name); - free(rsp); -} - -int devlink_sb_port_pool_get_rsp_parse(const struct nlmsghdr *nlh, void *data) -{ - struct devlink_sb_port_pool_get_rsp *dst; - struct ynl_parse_arg *yarg = data; - const struct nlattr *attr; - - dst = yarg->data; - - mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) { - unsigned int type = mnl_attr_get_type(attr); - - if (type == DEVLINK_ATTR_BUS_NAME) { - unsigned int len; - - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - - len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr)); - dst->_present.bus_name_len = len; - dst->bus_name = malloc(len + 1); - memcpy(dst->bus_name, mnl_attr_get_str(attr), len); - dst->bus_name[len] = 0; - } else if (type == DEVLINK_ATTR_DEV_NAME) { - unsigned int len; - - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - - len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr)); - dst->_present.dev_name_len = len; - dst->dev_name = malloc(len + 1); - memcpy(dst->dev_name, mnl_attr_get_str(attr), len); - dst->dev_name[len] = 0; - } else if (type == DEVLINK_ATTR_PORT_INDEX) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.port_index = 1; - dst->port_index = mnl_attr_get_u32(attr); - } else if (type == DEVLINK_ATTR_SB_INDEX) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.sb_index = 1; - dst->sb_index = mnl_attr_get_u32(attr); - } else if (type == DEVLINK_ATTR_SB_POOL_INDEX) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.sb_pool_index = 1; - dst->sb_pool_index = mnl_attr_get_u16(attr); - } - } - - return MNL_CB_OK; -} - -struct devlink_sb_port_pool_get_rsp * -devlink_sb_port_pool_get(struct ynl_sock *ys, - struct devlink_sb_port_pool_get_req *req) -{ - struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; - struct devlink_sb_port_pool_get_rsp *rsp; - struct nlmsghdr *nlh; - int err; - - nlh = ynl_gemsg_start_req(ys, ys->family_id, DEVLINK_CMD_SB_PORT_POOL_GET, 1); - ys->req_policy = &devlink_nest; - yrs.yarg.rsp_policy = &devlink_nest; - - if (req->_present.bus_name_len) - mnl_attr_put_strz(nlh, DEVLINK_ATTR_BUS_NAME, req->bus_name); - if (req->_present.dev_name_len) - mnl_attr_put_strz(nlh, DEVLINK_ATTR_DEV_NAME, req->dev_name); - if (req->_present.port_index) - mnl_attr_put_u32(nlh, DEVLINK_ATTR_PORT_INDEX, req->port_index); - if (req->_present.sb_index) - mnl_attr_put_u32(nlh, DEVLINK_ATTR_SB_INDEX, req->sb_index); - if (req->_present.sb_pool_index) - mnl_attr_put_u16(nlh, DEVLINK_ATTR_SB_POOL_INDEX, req->sb_pool_index); - - rsp = calloc(1, sizeof(*rsp)); - yrs.yarg.data = rsp; - yrs.cb = devlink_sb_port_pool_get_rsp_parse; - yrs.rsp_cmd = 21; - - err = ynl_exec(ys, nlh, &yrs); - if (err < 0) - goto err_free; - - return rsp; - -err_free: - devlink_sb_port_pool_get_rsp_free(rsp); - return NULL; -} - -/* DEVLINK_CMD_SB_PORT_POOL_GET - dump */ -void -devlink_sb_port_pool_get_list_free(struct devlink_sb_port_pool_get_list *rsp) -{ - struct devlink_sb_port_pool_get_list *next = rsp; - - while ((void *)next != YNL_LIST_END) { - rsp = next; - next = rsp->next; - - free(rsp->obj.bus_name); - free(rsp->obj.dev_name); - free(rsp); - } -} - -struct devlink_sb_port_pool_get_list * -devlink_sb_port_pool_get_dump(struct ynl_sock *ys, - struct devlink_sb_port_pool_get_req_dump *req) -{ - struct ynl_dump_state yds = {}; - struct nlmsghdr *nlh; - int err; - - yds.ys = ys; - yds.alloc_sz = sizeof(struct devlink_sb_port_pool_get_list); - yds.cb = devlink_sb_port_pool_get_rsp_parse; - yds.rsp_cmd = 21; - yds.rsp_policy = &devlink_nest; - - nlh = ynl_gemsg_start_dump(ys, ys->family_id, DEVLINK_CMD_SB_PORT_POOL_GET, 1); - ys->req_policy = &devlink_nest; - - if (req->_present.bus_name_len) - mnl_attr_put_strz(nlh, DEVLINK_ATTR_BUS_NAME, req->bus_name); - if (req->_present.dev_name_len) - mnl_attr_put_strz(nlh, DEVLINK_ATTR_DEV_NAME, req->dev_name); - - err = ynl_exec_dump(ys, nlh, &yds); - if (err < 0) - goto free_list; - - return yds.first; - -free_list: - devlink_sb_port_pool_get_list_free(yds.first); - return NULL; -} - -/* ============== DEVLINK_CMD_SB_PORT_POOL_SET ============== */ -/* DEVLINK_CMD_SB_PORT_POOL_SET - do */ -void -devlink_sb_port_pool_set_req_free(struct devlink_sb_port_pool_set_req *req) -{ - free(req->bus_name); - free(req->dev_name); - free(req); -} - -int devlink_sb_port_pool_set(struct ynl_sock *ys, - struct devlink_sb_port_pool_set_req *req) -{ - struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; - struct nlmsghdr *nlh; - int err; - - nlh = ynl_gemsg_start_req(ys, ys->family_id, DEVLINK_CMD_SB_PORT_POOL_SET, 1); - ys->req_policy = &devlink_nest; - - if (req->_present.bus_name_len) - mnl_attr_put_strz(nlh, DEVLINK_ATTR_BUS_NAME, req->bus_name); - if (req->_present.dev_name_len) - mnl_attr_put_strz(nlh, DEVLINK_ATTR_DEV_NAME, req->dev_name); - if (req->_present.port_index) - mnl_attr_put_u32(nlh, DEVLINK_ATTR_PORT_INDEX, req->port_index); - if (req->_present.sb_index) - mnl_attr_put_u32(nlh, DEVLINK_ATTR_SB_INDEX, req->sb_index); - if (req->_present.sb_pool_index) - mnl_attr_put_u16(nlh, DEVLINK_ATTR_SB_POOL_INDEX, req->sb_pool_index); - if (req->_present.sb_threshold) - mnl_attr_put_u32(nlh, DEVLINK_ATTR_SB_THRESHOLD, req->sb_threshold); - - err = ynl_exec(ys, nlh, &yrs); - if (err < 0) - return -1; - - return 0; -} - -/* ============== DEVLINK_CMD_SB_TC_POOL_BIND_GET ============== */ -/* DEVLINK_CMD_SB_TC_POOL_BIND_GET - do */ -void -devlink_sb_tc_pool_bind_get_req_free(struct devlink_sb_tc_pool_bind_get_req *req) -{ - free(req->bus_name); - free(req->dev_name); - free(req); -} - -void -devlink_sb_tc_pool_bind_get_rsp_free(struct devlink_sb_tc_pool_bind_get_rsp *rsp) -{ - free(rsp->bus_name); - free(rsp->dev_name); - free(rsp); -} - -int devlink_sb_tc_pool_bind_get_rsp_parse(const struct nlmsghdr *nlh, - void *data) -{ - struct devlink_sb_tc_pool_bind_get_rsp *dst; - struct ynl_parse_arg *yarg = data; - const struct nlattr *attr; - - dst = yarg->data; - - mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) { - unsigned int type = mnl_attr_get_type(attr); - - if (type == DEVLINK_ATTR_BUS_NAME) { - unsigned int len; - - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - - len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr)); - dst->_present.bus_name_len = len; - dst->bus_name = malloc(len + 1); - memcpy(dst->bus_name, mnl_attr_get_str(attr), len); - dst->bus_name[len] = 0; - } else if (type == DEVLINK_ATTR_DEV_NAME) { - unsigned int len; - - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - - len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr)); - dst->_present.dev_name_len = len; - dst->dev_name = malloc(len + 1); - memcpy(dst->dev_name, mnl_attr_get_str(attr), len); - dst->dev_name[len] = 0; - } else if (type == DEVLINK_ATTR_PORT_INDEX) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.port_index = 1; - dst->port_index = mnl_attr_get_u32(attr); - } else if (type == DEVLINK_ATTR_SB_INDEX) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.sb_index = 1; - dst->sb_index = mnl_attr_get_u32(attr); - } else if (type == DEVLINK_ATTR_SB_POOL_TYPE) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.sb_pool_type = 1; - dst->sb_pool_type = mnl_attr_get_u8(attr); - } else if (type == DEVLINK_ATTR_SB_TC_INDEX) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.sb_tc_index = 1; - dst->sb_tc_index = mnl_attr_get_u16(attr); - } - } - - return MNL_CB_OK; -} - -struct devlink_sb_tc_pool_bind_get_rsp * -devlink_sb_tc_pool_bind_get(struct ynl_sock *ys, - struct devlink_sb_tc_pool_bind_get_req *req) -{ - struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; - struct devlink_sb_tc_pool_bind_get_rsp *rsp; - struct nlmsghdr *nlh; - int err; - - nlh = ynl_gemsg_start_req(ys, ys->family_id, DEVLINK_CMD_SB_TC_POOL_BIND_GET, 1); - ys->req_policy = &devlink_nest; - yrs.yarg.rsp_policy = &devlink_nest; - - if (req->_present.bus_name_len) - mnl_attr_put_strz(nlh, DEVLINK_ATTR_BUS_NAME, req->bus_name); - if (req->_present.dev_name_len) - mnl_attr_put_strz(nlh, DEVLINK_ATTR_DEV_NAME, req->dev_name); - if (req->_present.port_index) - mnl_attr_put_u32(nlh, DEVLINK_ATTR_PORT_INDEX, req->port_index); - if (req->_present.sb_index) - mnl_attr_put_u32(nlh, DEVLINK_ATTR_SB_INDEX, req->sb_index); - if (req->_present.sb_pool_type) - mnl_attr_put_u8(nlh, DEVLINK_ATTR_SB_POOL_TYPE, req->sb_pool_type); - if (req->_present.sb_tc_index) - mnl_attr_put_u16(nlh, DEVLINK_ATTR_SB_TC_INDEX, req->sb_tc_index); - - rsp = calloc(1, sizeof(*rsp)); - yrs.yarg.data = rsp; - yrs.cb = devlink_sb_tc_pool_bind_get_rsp_parse; - yrs.rsp_cmd = 25; - - err = ynl_exec(ys, nlh, &yrs); - if (err < 0) - goto err_free; - - return rsp; - -err_free: - devlink_sb_tc_pool_bind_get_rsp_free(rsp); - return NULL; -} - -/* DEVLINK_CMD_SB_TC_POOL_BIND_GET - dump */ -void -devlink_sb_tc_pool_bind_get_list_free(struct devlink_sb_tc_pool_bind_get_list *rsp) -{ - struct devlink_sb_tc_pool_bind_get_list *next = rsp; - - while ((void *)next != YNL_LIST_END) { - rsp = next; - next = rsp->next; - - free(rsp->obj.bus_name); - free(rsp->obj.dev_name); - free(rsp); - } -} - -struct devlink_sb_tc_pool_bind_get_list * -devlink_sb_tc_pool_bind_get_dump(struct ynl_sock *ys, - struct devlink_sb_tc_pool_bind_get_req_dump *req) -{ - struct ynl_dump_state yds = {}; - struct nlmsghdr *nlh; - int err; - - yds.ys = ys; - yds.alloc_sz = sizeof(struct devlink_sb_tc_pool_bind_get_list); - yds.cb = devlink_sb_tc_pool_bind_get_rsp_parse; - yds.rsp_cmd = 25; - yds.rsp_policy = &devlink_nest; - - nlh = ynl_gemsg_start_dump(ys, ys->family_id, DEVLINK_CMD_SB_TC_POOL_BIND_GET, 1); - ys->req_policy = &devlink_nest; - - if (req->_present.bus_name_len) - mnl_attr_put_strz(nlh, DEVLINK_ATTR_BUS_NAME, req->bus_name); - if (req->_present.dev_name_len) - mnl_attr_put_strz(nlh, DEVLINK_ATTR_DEV_NAME, req->dev_name); - - err = ynl_exec_dump(ys, nlh, &yds); - if (err < 0) - goto free_list; - - return yds.first; - -free_list: - devlink_sb_tc_pool_bind_get_list_free(yds.first); - return NULL; -} - -/* ============== DEVLINK_CMD_SB_TC_POOL_BIND_SET ============== */ -/* DEVLINK_CMD_SB_TC_POOL_BIND_SET - do */ -void -devlink_sb_tc_pool_bind_set_req_free(struct devlink_sb_tc_pool_bind_set_req *req) -{ - free(req->bus_name); - free(req->dev_name); - free(req); -} - -int devlink_sb_tc_pool_bind_set(struct ynl_sock *ys, - struct devlink_sb_tc_pool_bind_set_req *req) -{ - struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; - struct nlmsghdr *nlh; - int err; - - nlh = ynl_gemsg_start_req(ys, ys->family_id, DEVLINK_CMD_SB_TC_POOL_BIND_SET, 1); - ys->req_policy = &devlink_nest; - - if (req->_present.bus_name_len) - mnl_attr_put_strz(nlh, DEVLINK_ATTR_BUS_NAME, req->bus_name); - if (req->_present.dev_name_len) - mnl_attr_put_strz(nlh, DEVLINK_ATTR_DEV_NAME, req->dev_name); - if (req->_present.port_index) - mnl_attr_put_u32(nlh, DEVLINK_ATTR_PORT_INDEX, req->port_index); - if (req->_present.sb_index) - mnl_attr_put_u32(nlh, DEVLINK_ATTR_SB_INDEX, req->sb_index); - if (req->_present.sb_pool_index) - mnl_attr_put_u16(nlh, DEVLINK_ATTR_SB_POOL_INDEX, req->sb_pool_index); - if (req->_present.sb_pool_type) - mnl_attr_put_u8(nlh, DEVLINK_ATTR_SB_POOL_TYPE, req->sb_pool_type); - if (req->_present.sb_tc_index) - mnl_attr_put_u16(nlh, DEVLINK_ATTR_SB_TC_INDEX, req->sb_tc_index); - if (req->_present.sb_threshold) - mnl_attr_put_u32(nlh, DEVLINK_ATTR_SB_THRESHOLD, req->sb_threshold); - - err = ynl_exec(ys, nlh, &yrs); - if (err < 0) - return -1; - - return 0; -} - -/* ============== DEVLINK_CMD_SB_OCC_SNAPSHOT ============== */ -/* DEVLINK_CMD_SB_OCC_SNAPSHOT - do */ -void devlink_sb_occ_snapshot_req_free(struct devlink_sb_occ_snapshot_req *req) -{ - free(req->bus_name); - free(req->dev_name); - free(req); -} - -int devlink_sb_occ_snapshot(struct ynl_sock *ys, - struct devlink_sb_occ_snapshot_req *req) -{ - struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; - struct nlmsghdr *nlh; - int err; - - nlh = ynl_gemsg_start_req(ys, ys->family_id, DEVLINK_CMD_SB_OCC_SNAPSHOT, 1); - ys->req_policy = &devlink_nest; - - if (req->_present.bus_name_len) - mnl_attr_put_strz(nlh, DEVLINK_ATTR_BUS_NAME, req->bus_name); - if (req->_present.dev_name_len) - mnl_attr_put_strz(nlh, DEVLINK_ATTR_DEV_NAME, req->dev_name); - if (req->_present.sb_index) - mnl_attr_put_u32(nlh, DEVLINK_ATTR_SB_INDEX, req->sb_index); - - err = ynl_exec(ys, nlh, &yrs); - if (err < 0) - return -1; - - return 0; -} - -/* ============== DEVLINK_CMD_SB_OCC_MAX_CLEAR ============== */ -/* DEVLINK_CMD_SB_OCC_MAX_CLEAR - do */ -void -devlink_sb_occ_max_clear_req_free(struct devlink_sb_occ_max_clear_req *req) -{ - free(req->bus_name); - free(req->dev_name); - free(req); -} - -int devlink_sb_occ_max_clear(struct ynl_sock *ys, - struct devlink_sb_occ_max_clear_req *req) -{ - struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; - struct nlmsghdr *nlh; - int err; - - nlh = ynl_gemsg_start_req(ys, ys->family_id, DEVLINK_CMD_SB_OCC_MAX_CLEAR, 1); - ys->req_policy = &devlink_nest; - - if (req->_present.bus_name_len) - mnl_attr_put_strz(nlh, DEVLINK_ATTR_BUS_NAME, req->bus_name); - if (req->_present.dev_name_len) - mnl_attr_put_strz(nlh, DEVLINK_ATTR_DEV_NAME, req->dev_name); - if (req->_present.sb_index) - mnl_attr_put_u32(nlh, DEVLINK_ATTR_SB_INDEX, req->sb_index); - - err = ynl_exec(ys, nlh, &yrs); - if (err < 0) - return -1; - - return 0; -} - -/* ============== DEVLINK_CMD_ESWITCH_GET ============== */ -/* DEVLINK_CMD_ESWITCH_GET - do */ -void devlink_eswitch_get_req_free(struct devlink_eswitch_get_req *req) -{ - free(req->bus_name); - free(req->dev_name); - free(req); -} - -void devlink_eswitch_get_rsp_free(struct devlink_eswitch_get_rsp *rsp) -{ - free(rsp->bus_name); - free(rsp->dev_name); - free(rsp); -} - -int devlink_eswitch_get_rsp_parse(const struct nlmsghdr *nlh, void *data) -{ - struct devlink_eswitch_get_rsp *dst; - struct ynl_parse_arg *yarg = data; - const struct nlattr *attr; - - dst = yarg->data; - - mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) { - unsigned int type = mnl_attr_get_type(attr); - - if (type == DEVLINK_ATTR_BUS_NAME) { - unsigned int len; - - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - - len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr)); - dst->_present.bus_name_len = len; - dst->bus_name = malloc(len + 1); - memcpy(dst->bus_name, mnl_attr_get_str(attr), len); - dst->bus_name[len] = 0; - } else if (type == DEVLINK_ATTR_DEV_NAME) { - unsigned int len; - - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - - len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr)); - dst->_present.dev_name_len = len; - dst->dev_name = malloc(len + 1); - memcpy(dst->dev_name, mnl_attr_get_str(attr), len); - dst->dev_name[len] = 0; - } else if (type == DEVLINK_ATTR_ESWITCH_MODE) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.eswitch_mode = 1; - dst->eswitch_mode = mnl_attr_get_u16(attr); - } else if (type == DEVLINK_ATTR_ESWITCH_INLINE_MODE) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.eswitch_inline_mode = 1; - dst->eswitch_inline_mode = mnl_attr_get_u16(attr); - } else if (type == DEVLINK_ATTR_ESWITCH_ENCAP_MODE) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.eswitch_encap_mode = 1; - dst->eswitch_encap_mode = mnl_attr_get_u8(attr); - } - } - - return MNL_CB_OK; -} - -struct devlink_eswitch_get_rsp * -devlink_eswitch_get(struct ynl_sock *ys, struct devlink_eswitch_get_req *req) -{ - struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; - struct devlink_eswitch_get_rsp *rsp; - struct nlmsghdr *nlh; - int err; - - nlh = ynl_gemsg_start_req(ys, ys->family_id, DEVLINK_CMD_ESWITCH_GET, 1); - ys->req_policy = &devlink_nest; - yrs.yarg.rsp_policy = &devlink_nest; - - if (req->_present.bus_name_len) - mnl_attr_put_strz(nlh, DEVLINK_ATTR_BUS_NAME, req->bus_name); - if (req->_present.dev_name_len) - mnl_attr_put_strz(nlh, DEVLINK_ATTR_DEV_NAME, req->dev_name); - - rsp = calloc(1, sizeof(*rsp)); - yrs.yarg.data = rsp; - yrs.cb = devlink_eswitch_get_rsp_parse; - yrs.rsp_cmd = DEVLINK_CMD_ESWITCH_GET; - - err = ynl_exec(ys, nlh, &yrs); - if (err < 0) - goto err_free; - - return rsp; - -err_free: - devlink_eswitch_get_rsp_free(rsp); - return NULL; -} - -/* ============== DEVLINK_CMD_ESWITCH_SET ============== */ -/* DEVLINK_CMD_ESWITCH_SET - do */ -void devlink_eswitch_set_req_free(struct devlink_eswitch_set_req *req) -{ - free(req->bus_name); - free(req->dev_name); - free(req); -} - -int devlink_eswitch_set(struct ynl_sock *ys, - struct devlink_eswitch_set_req *req) -{ - struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; - struct nlmsghdr *nlh; - int err; - - nlh = ynl_gemsg_start_req(ys, ys->family_id, DEVLINK_CMD_ESWITCH_SET, 1); - ys->req_policy = &devlink_nest; - - if (req->_present.bus_name_len) - mnl_attr_put_strz(nlh, DEVLINK_ATTR_BUS_NAME, req->bus_name); - if (req->_present.dev_name_len) - mnl_attr_put_strz(nlh, DEVLINK_ATTR_DEV_NAME, req->dev_name); - if (req->_present.eswitch_mode) - mnl_attr_put_u16(nlh, DEVLINK_ATTR_ESWITCH_MODE, req->eswitch_mode); - if (req->_present.eswitch_inline_mode) - mnl_attr_put_u16(nlh, DEVLINK_ATTR_ESWITCH_INLINE_MODE, req->eswitch_inline_mode); - if (req->_present.eswitch_encap_mode) - mnl_attr_put_u8(nlh, DEVLINK_ATTR_ESWITCH_ENCAP_MODE, req->eswitch_encap_mode); - - err = ynl_exec(ys, nlh, &yrs); - if (err < 0) - return -1; - - return 0; -} - -/* ============== DEVLINK_CMD_DPIPE_TABLE_GET ============== */ -/* DEVLINK_CMD_DPIPE_TABLE_GET - do */ -void devlink_dpipe_table_get_req_free(struct devlink_dpipe_table_get_req *req) -{ - free(req->bus_name); - free(req->dev_name); - free(req->dpipe_table_name); - free(req); -} - -void devlink_dpipe_table_get_rsp_free(struct devlink_dpipe_table_get_rsp *rsp) -{ - free(rsp->bus_name); - free(rsp->dev_name); - devlink_dl_dpipe_tables_free(&rsp->dpipe_tables); - free(rsp); -} - -int devlink_dpipe_table_get_rsp_parse(const struct nlmsghdr *nlh, void *data) -{ - struct devlink_dpipe_table_get_rsp *dst; - struct ynl_parse_arg *yarg = data; - const struct nlattr *attr; - struct ynl_parse_arg parg; - - dst = yarg->data; - parg.ys = yarg->ys; - - mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) { - unsigned int type = mnl_attr_get_type(attr); - - if (type == DEVLINK_ATTR_BUS_NAME) { - unsigned int len; - - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - - len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr)); - dst->_present.bus_name_len = len; - dst->bus_name = malloc(len + 1); - memcpy(dst->bus_name, mnl_attr_get_str(attr), len); - dst->bus_name[len] = 0; - } else if (type == DEVLINK_ATTR_DEV_NAME) { - unsigned int len; - - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - - len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr)); - dst->_present.dev_name_len = len; - dst->dev_name = malloc(len + 1); - memcpy(dst->dev_name, mnl_attr_get_str(attr), len); - dst->dev_name[len] = 0; - } else if (type == DEVLINK_ATTR_DPIPE_TABLES) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.dpipe_tables = 1; - - parg.rsp_policy = &devlink_dl_dpipe_tables_nest; - parg.data = &dst->dpipe_tables; - if (devlink_dl_dpipe_tables_parse(&parg, attr)) - return MNL_CB_ERROR; - } - } - - return MNL_CB_OK; -} - -struct devlink_dpipe_table_get_rsp * -devlink_dpipe_table_get(struct ynl_sock *ys, - struct devlink_dpipe_table_get_req *req) -{ - struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; - struct devlink_dpipe_table_get_rsp *rsp; - struct nlmsghdr *nlh; - int err; - - nlh = ynl_gemsg_start_req(ys, ys->family_id, DEVLINK_CMD_DPIPE_TABLE_GET, 1); - ys->req_policy = &devlink_nest; - yrs.yarg.rsp_policy = &devlink_nest; - - if (req->_present.bus_name_len) - mnl_attr_put_strz(nlh, DEVLINK_ATTR_BUS_NAME, req->bus_name); - if (req->_present.dev_name_len) - mnl_attr_put_strz(nlh, DEVLINK_ATTR_DEV_NAME, req->dev_name); - if (req->_present.dpipe_table_name_len) - mnl_attr_put_strz(nlh, DEVLINK_ATTR_DPIPE_TABLE_NAME, req->dpipe_table_name); - - rsp = calloc(1, sizeof(*rsp)); - yrs.yarg.data = rsp; - yrs.cb = devlink_dpipe_table_get_rsp_parse; - yrs.rsp_cmd = DEVLINK_CMD_DPIPE_TABLE_GET; - - err = ynl_exec(ys, nlh, &yrs); - if (err < 0) - goto err_free; - - return rsp; - -err_free: - devlink_dpipe_table_get_rsp_free(rsp); - return NULL; -} - -/* ============== DEVLINK_CMD_DPIPE_ENTRIES_GET ============== */ -/* DEVLINK_CMD_DPIPE_ENTRIES_GET - do */ -void -devlink_dpipe_entries_get_req_free(struct devlink_dpipe_entries_get_req *req) -{ - free(req->bus_name); - free(req->dev_name); - free(req->dpipe_table_name); - free(req); -} - -void -devlink_dpipe_entries_get_rsp_free(struct devlink_dpipe_entries_get_rsp *rsp) -{ - free(rsp->bus_name); - free(rsp->dev_name); - devlink_dl_dpipe_entries_free(&rsp->dpipe_entries); - free(rsp); -} - -int devlink_dpipe_entries_get_rsp_parse(const struct nlmsghdr *nlh, void *data) -{ - struct devlink_dpipe_entries_get_rsp *dst; - struct ynl_parse_arg *yarg = data; - const struct nlattr *attr; - struct ynl_parse_arg parg; - - dst = yarg->data; - parg.ys = yarg->ys; - - mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) { - unsigned int type = mnl_attr_get_type(attr); - - if (type == DEVLINK_ATTR_BUS_NAME) { - unsigned int len; - - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - - len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr)); - dst->_present.bus_name_len = len; - dst->bus_name = malloc(len + 1); - memcpy(dst->bus_name, mnl_attr_get_str(attr), len); - dst->bus_name[len] = 0; - } else if (type == DEVLINK_ATTR_DEV_NAME) { - unsigned int len; - - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - - len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr)); - dst->_present.dev_name_len = len; - dst->dev_name = malloc(len + 1); - memcpy(dst->dev_name, mnl_attr_get_str(attr), len); - dst->dev_name[len] = 0; - } else if (type == DEVLINK_ATTR_DPIPE_ENTRIES) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.dpipe_entries = 1; - - parg.rsp_policy = &devlink_dl_dpipe_entries_nest; - parg.data = &dst->dpipe_entries; - if (devlink_dl_dpipe_entries_parse(&parg, attr)) - return MNL_CB_ERROR; - } - } - - return MNL_CB_OK; -} - -struct devlink_dpipe_entries_get_rsp * -devlink_dpipe_entries_get(struct ynl_sock *ys, - struct devlink_dpipe_entries_get_req *req) -{ - struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; - struct devlink_dpipe_entries_get_rsp *rsp; - struct nlmsghdr *nlh; - int err; - - nlh = ynl_gemsg_start_req(ys, ys->family_id, DEVLINK_CMD_DPIPE_ENTRIES_GET, 1); - ys->req_policy = &devlink_nest; - yrs.yarg.rsp_policy = &devlink_nest; - - if (req->_present.bus_name_len) - mnl_attr_put_strz(nlh, DEVLINK_ATTR_BUS_NAME, req->bus_name); - if (req->_present.dev_name_len) - mnl_attr_put_strz(nlh, DEVLINK_ATTR_DEV_NAME, req->dev_name); - if (req->_present.dpipe_table_name_len) - mnl_attr_put_strz(nlh, DEVLINK_ATTR_DPIPE_TABLE_NAME, req->dpipe_table_name); - - rsp = calloc(1, sizeof(*rsp)); - yrs.yarg.data = rsp; - yrs.cb = devlink_dpipe_entries_get_rsp_parse; - yrs.rsp_cmd = DEVLINK_CMD_DPIPE_ENTRIES_GET; - - err = ynl_exec(ys, nlh, &yrs); - if (err < 0) - goto err_free; - - return rsp; - -err_free: - devlink_dpipe_entries_get_rsp_free(rsp); - return NULL; -} - -/* ============== DEVLINK_CMD_DPIPE_HEADERS_GET ============== */ -/* DEVLINK_CMD_DPIPE_HEADERS_GET - do */ -void -devlink_dpipe_headers_get_req_free(struct devlink_dpipe_headers_get_req *req) -{ - free(req->bus_name); - free(req->dev_name); - free(req); -} - -void -devlink_dpipe_headers_get_rsp_free(struct devlink_dpipe_headers_get_rsp *rsp) -{ - free(rsp->bus_name); - free(rsp->dev_name); - devlink_dl_dpipe_headers_free(&rsp->dpipe_headers); - free(rsp); -} - -int devlink_dpipe_headers_get_rsp_parse(const struct nlmsghdr *nlh, void *data) -{ - struct devlink_dpipe_headers_get_rsp *dst; - struct ynl_parse_arg *yarg = data; - const struct nlattr *attr; - struct ynl_parse_arg parg; - - dst = yarg->data; - parg.ys = yarg->ys; - - mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) { - unsigned int type = mnl_attr_get_type(attr); - - if (type == DEVLINK_ATTR_BUS_NAME) { - unsigned int len; - - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - - len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr)); - dst->_present.bus_name_len = len; - dst->bus_name = malloc(len + 1); - memcpy(dst->bus_name, mnl_attr_get_str(attr), len); - dst->bus_name[len] = 0; - } else if (type == DEVLINK_ATTR_DEV_NAME) { - unsigned int len; - - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - - len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr)); - dst->_present.dev_name_len = len; - dst->dev_name = malloc(len + 1); - memcpy(dst->dev_name, mnl_attr_get_str(attr), len); - dst->dev_name[len] = 0; - } else if (type == DEVLINK_ATTR_DPIPE_HEADERS) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.dpipe_headers = 1; - - parg.rsp_policy = &devlink_dl_dpipe_headers_nest; - parg.data = &dst->dpipe_headers; - if (devlink_dl_dpipe_headers_parse(&parg, attr)) - return MNL_CB_ERROR; - } - } - - return MNL_CB_OK; -} - -struct devlink_dpipe_headers_get_rsp * -devlink_dpipe_headers_get(struct ynl_sock *ys, - struct devlink_dpipe_headers_get_req *req) -{ - struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; - struct devlink_dpipe_headers_get_rsp *rsp; - struct nlmsghdr *nlh; - int err; - - nlh = ynl_gemsg_start_req(ys, ys->family_id, DEVLINK_CMD_DPIPE_HEADERS_GET, 1); - ys->req_policy = &devlink_nest; - yrs.yarg.rsp_policy = &devlink_nest; - - if (req->_present.bus_name_len) - mnl_attr_put_strz(nlh, DEVLINK_ATTR_BUS_NAME, req->bus_name); - if (req->_present.dev_name_len) - mnl_attr_put_strz(nlh, DEVLINK_ATTR_DEV_NAME, req->dev_name); - - rsp = calloc(1, sizeof(*rsp)); - yrs.yarg.data = rsp; - yrs.cb = devlink_dpipe_headers_get_rsp_parse; - yrs.rsp_cmd = DEVLINK_CMD_DPIPE_HEADERS_GET; - - err = ynl_exec(ys, nlh, &yrs); - if (err < 0) - goto err_free; - - return rsp; - -err_free: - devlink_dpipe_headers_get_rsp_free(rsp); - return NULL; -} - -/* ============== DEVLINK_CMD_DPIPE_TABLE_COUNTERS_SET ============== */ -/* DEVLINK_CMD_DPIPE_TABLE_COUNTERS_SET - do */ -void -devlink_dpipe_table_counters_set_req_free(struct devlink_dpipe_table_counters_set_req *req) -{ - free(req->bus_name); - free(req->dev_name); - free(req->dpipe_table_name); - free(req); -} - -int devlink_dpipe_table_counters_set(struct ynl_sock *ys, - struct devlink_dpipe_table_counters_set_req *req) -{ - struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; - struct nlmsghdr *nlh; - int err; - - nlh = ynl_gemsg_start_req(ys, ys->family_id, DEVLINK_CMD_DPIPE_TABLE_COUNTERS_SET, 1); - ys->req_policy = &devlink_nest; - - if (req->_present.bus_name_len) - mnl_attr_put_strz(nlh, DEVLINK_ATTR_BUS_NAME, req->bus_name); - if (req->_present.dev_name_len) - mnl_attr_put_strz(nlh, DEVLINK_ATTR_DEV_NAME, req->dev_name); - if (req->_present.dpipe_table_name_len) - mnl_attr_put_strz(nlh, DEVLINK_ATTR_DPIPE_TABLE_NAME, req->dpipe_table_name); - if (req->_present.dpipe_table_counters_enabled) - mnl_attr_put_u8(nlh, DEVLINK_ATTR_DPIPE_TABLE_COUNTERS_ENABLED, req->dpipe_table_counters_enabled); - - err = ynl_exec(ys, nlh, &yrs); - if (err < 0) - return -1; - - return 0; -} - -/* ============== DEVLINK_CMD_RESOURCE_SET ============== */ -/* DEVLINK_CMD_RESOURCE_SET - do */ -void devlink_resource_set_req_free(struct devlink_resource_set_req *req) -{ - free(req->bus_name); - free(req->dev_name); - free(req); -} - -int devlink_resource_set(struct ynl_sock *ys, - struct devlink_resource_set_req *req) -{ - struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; - struct nlmsghdr *nlh; - int err; - - nlh = ynl_gemsg_start_req(ys, ys->family_id, DEVLINK_CMD_RESOURCE_SET, 1); - ys->req_policy = &devlink_nest; - - if (req->_present.bus_name_len) - mnl_attr_put_strz(nlh, DEVLINK_ATTR_BUS_NAME, req->bus_name); - if (req->_present.dev_name_len) - mnl_attr_put_strz(nlh, DEVLINK_ATTR_DEV_NAME, req->dev_name); - if (req->_present.resource_id) - mnl_attr_put_u64(nlh, DEVLINK_ATTR_RESOURCE_ID, req->resource_id); - if (req->_present.resource_size) - mnl_attr_put_u64(nlh, DEVLINK_ATTR_RESOURCE_SIZE, req->resource_size); - - err = ynl_exec(ys, nlh, &yrs); - if (err < 0) - return -1; - - return 0; -} - -/* ============== DEVLINK_CMD_RESOURCE_DUMP ============== */ -/* DEVLINK_CMD_RESOURCE_DUMP - do */ -void devlink_resource_dump_req_free(struct devlink_resource_dump_req *req) -{ - free(req->bus_name); - free(req->dev_name); - free(req); -} - -void devlink_resource_dump_rsp_free(struct devlink_resource_dump_rsp *rsp) -{ - free(rsp->bus_name); - free(rsp->dev_name); - devlink_dl_resource_list_free(&rsp->resource_list); - free(rsp); -} - -int devlink_resource_dump_rsp_parse(const struct nlmsghdr *nlh, void *data) -{ - struct devlink_resource_dump_rsp *dst; - struct ynl_parse_arg *yarg = data; - const struct nlattr *attr; - struct ynl_parse_arg parg; - - dst = yarg->data; - parg.ys = yarg->ys; - - mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) { - unsigned int type = mnl_attr_get_type(attr); - - if (type == DEVLINK_ATTR_BUS_NAME) { - unsigned int len; - - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - - len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr)); - dst->_present.bus_name_len = len; - dst->bus_name = malloc(len + 1); - memcpy(dst->bus_name, mnl_attr_get_str(attr), len); - dst->bus_name[len] = 0; - } else if (type == DEVLINK_ATTR_DEV_NAME) { - unsigned int len; - - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - - len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr)); - dst->_present.dev_name_len = len; - dst->dev_name = malloc(len + 1); - memcpy(dst->dev_name, mnl_attr_get_str(attr), len); - dst->dev_name[len] = 0; - } else if (type == DEVLINK_ATTR_RESOURCE_LIST) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.resource_list = 1; - - parg.rsp_policy = &devlink_dl_resource_list_nest; - parg.data = &dst->resource_list; - if (devlink_dl_resource_list_parse(&parg, attr)) - return MNL_CB_ERROR; - } - } - - return MNL_CB_OK; -} - -struct devlink_resource_dump_rsp * -devlink_resource_dump(struct ynl_sock *ys, - struct devlink_resource_dump_req *req) -{ - struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; - struct devlink_resource_dump_rsp *rsp; - struct nlmsghdr *nlh; - int err; - - nlh = ynl_gemsg_start_req(ys, ys->family_id, DEVLINK_CMD_RESOURCE_DUMP, 1); - ys->req_policy = &devlink_nest; - yrs.yarg.rsp_policy = &devlink_nest; - - if (req->_present.bus_name_len) - mnl_attr_put_strz(nlh, DEVLINK_ATTR_BUS_NAME, req->bus_name); - if (req->_present.dev_name_len) - mnl_attr_put_strz(nlh, DEVLINK_ATTR_DEV_NAME, req->dev_name); - - rsp = calloc(1, sizeof(*rsp)); - yrs.yarg.data = rsp; - yrs.cb = devlink_resource_dump_rsp_parse; - yrs.rsp_cmd = DEVLINK_CMD_RESOURCE_DUMP; - - err = ynl_exec(ys, nlh, &yrs); - if (err < 0) - goto err_free; - - return rsp; - -err_free: - devlink_resource_dump_rsp_free(rsp); - return NULL; -} - -/* ============== DEVLINK_CMD_RELOAD ============== */ -/* DEVLINK_CMD_RELOAD - do */ -void devlink_reload_req_free(struct devlink_reload_req *req) -{ - free(req->bus_name); - free(req->dev_name); - free(req); -} - -void devlink_reload_rsp_free(struct devlink_reload_rsp *rsp) -{ - free(rsp->bus_name); - free(rsp->dev_name); - free(rsp); -} - -int devlink_reload_rsp_parse(const struct nlmsghdr *nlh, void *data) -{ - struct ynl_parse_arg *yarg = data; - struct devlink_reload_rsp *dst; - const struct nlattr *attr; - - dst = yarg->data; - - mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) { - unsigned int type = mnl_attr_get_type(attr); - - if (type == DEVLINK_ATTR_BUS_NAME) { - unsigned int len; - - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - - len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr)); - dst->_present.bus_name_len = len; - dst->bus_name = malloc(len + 1); - memcpy(dst->bus_name, mnl_attr_get_str(attr), len); - dst->bus_name[len] = 0; - } else if (type == DEVLINK_ATTR_DEV_NAME) { - unsigned int len; - - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - - len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr)); - dst->_present.dev_name_len = len; - dst->dev_name = malloc(len + 1); - memcpy(dst->dev_name, mnl_attr_get_str(attr), len); - dst->dev_name[len] = 0; - } else if (type == DEVLINK_ATTR_RELOAD_ACTIONS_PERFORMED) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.reload_actions_performed = 1; - memcpy(&dst->reload_actions_performed, mnl_attr_get_payload(attr), sizeof(struct nla_bitfield32)); - } - } - - return MNL_CB_OK; -} - -struct devlink_reload_rsp * -devlink_reload(struct ynl_sock *ys, struct devlink_reload_req *req) -{ - struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; - struct devlink_reload_rsp *rsp; - struct nlmsghdr *nlh; - int err; - - nlh = ynl_gemsg_start_req(ys, ys->family_id, DEVLINK_CMD_RELOAD, 1); - ys->req_policy = &devlink_nest; - yrs.yarg.rsp_policy = &devlink_nest; - - if (req->_present.bus_name_len) - mnl_attr_put_strz(nlh, DEVLINK_ATTR_BUS_NAME, req->bus_name); - if (req->_present.dev_name_len) - mnl_attr_put_strz(nlh, DEVLINK_ATTR_DEV_NAME, req->dev_name); - if (req->_present.reload_action) - mnl_attr_put_u8(nlh, DEVLINK_ATTR_RELOAD_ACTION, req->reload_action); - if (req->_present.reload_limits) - mnl_attr_put(nlh, DEVLINK_ATTR_RELOAD_LIMITS, sizeof(struct nla_bitfield32), &req->reload_limits); - if (req->_present.netns_pid) - mnl_attr_put_u32(nlh, DEVLINK_ATTR_NETNS_PID, req->netns_pid); - if (req->_present.netns_fd) - mnl_attr_put_u32(nlh, DEVLINK_ATTR_NETNS_FD, req->netns_fd); - if (req->_present.netns_id) - mnl_attr_put_u32(nlh, DEVLINK_ATTR_NETNS_ID, req->netns_id); - - rsp = calloc(1, sizeof(*rsp)); - yrs.yarg.data = rsp; - yrs.cb = devlink_reload_rsp_parse; - yrs.rsp_cmd = DEVLINK_CMD_RELOAD; - - err = ynl_exec(ys, nlh, &yrs); - if (err < 0) - goto err_free; - - return rsp; - -err_free: - devlink_reload_rsp_free(rsp); - return NULL; -} - -/* ============== DEVLINK_CMD_PARAM_GET ============== */ -/* DEVLINK_CMD_PARAM_GET - do */ -void devlink_param_get_req_free(struct devlink_param_get_req *req) -{ - free(req->bus_name); - free(req->dev_name); - free(req->param_name); - free(req); -} - -void devlink_param_get_rsp_free(struct devlink_param_get_rsp *rsp) -{ - free(rsp->bus_name); - free(rsp->dev_name); - free(rsp->param_name); - free(rsp); -} - -int devlink_param_get_rsp_parse(const struct nlmsghdr *nlh, void *data) -{ - struct devlink_param_get_rsp *dst; - struct ynl_parse_arg *yarg = data; - const struct nlattr *attr; - - dst = yarg->data; - - mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) { - unsigned int type = mnl_attr_get_type(attr); - - if (type == DEVLINK_ATTR_BUS_NAME) { - unsigned int len; - - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - - len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr)); - dst->_present.bus_name_len = len; - dst->bus_name = malloc(len + 1); - memcpy(dst->bus_name, mnl_attr_get_str(attr), len); - dst->bus_name[len] = 0; - } else if (type == DEVLINK_ATTR_DEV_NAME) { - unsigned int len; - - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - - len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr)); - dst->_present.dev_name_len = len; - dst->dev_name = malloc(len + 1); - memcpy(dst->dev_name, mnl_attr_get_str(attr), len); - dst->dev_name[len] = 0; - } else if (type == DEVLINK_ATTR_PARAM_NAME) { - unsigned int len; - - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - - len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr)); - dst->_present.param_name_len = len; - dst->param_name = malloc(len + 1); - memcpy(dst->param_name, mnl_attr_get_str(attr), len); - dst->param_name[len] = 0; - } - } - - return MNL_CB_OK; -} - -struct devlink_param_get_rsp * -devlink_param_get(struct ynl_sock *ys, struct devlink_param_get_req *req) -{ - struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; - struct devlink_param_get_rsp *rsp; - struct nlmsghdr *nlh; - int err; - - nlh = ynl_gemsg_start_req(ys, ys->family_id, DEVLINK_CMD_PARAM_GET, 1); - ys->req_policy = &devlink_nest; - yrs.yarg.rsp_policy = &devlink_nest; - - if (req->_present.bus_name_len) - mnl_attr_put_strz(nlh, DEVLINK_ATTR_BUS_NAME, req->bus_name); - if (req->_present.dev_name_len) - mnl_attr_put_strz(nlh, DEVLINK_ATTR_DEV_NAME, req->dev_name); - if (req->_present.param_name_len) - mnl_attr_put_strz(nlh, DEVLINK_ATTR_PARAM_NAME, req->param_name); - - rsp = calloc(1, sizeof(*rsp)); - yrs.yarg.data = rsp; - yrs.cb = devlink_param_get_rsp_parse; - yrs.rsp_cmd = DEVLINK_CMD_PARAM_GET; - - err = ynl_exec(ys, nlh, &yrs); - if (err < 0) - goto err_free; - - return rsp; - -err_free: - devlink_param_get_rsp_free(rsp); - return NULL; -} - -/* DEVLINK_CMD_PARAM_GET - dump */ -void devlink_param_get_list_free(struct devlink_param_get_list *rsp) -{ - struct devlink_param_get_list *next = rsp; - - while ((void *)next != YNL_LIST_END) { - rsp = next; - next = rsp->next; - - free(rsp->obj.bus_name); - free(rsp->obj.dev_name); - free(rsp->obj.param_name); - free(rsp); - } -} - -struct devlink_param_get_list * -devlink_param_get_dump(struct ynl_sock *ys, - struct devlink_param_get_req_dump *req) -{ - struct ynl_dump_state yds = {}; - struct nlmsghdr *nlh; - int err; - - yds.ys = ys; - yds.alloc_sz = sizeof(struct devlink_param_get_list); - yds.cb = devlink_param_get_rsp_parse; - yds.rsp_cmd = DEVLINK_CMD_PARAM_GET; - yds.rsp_policy = &devlink_nest; - - nlh = ynl_gemsg_start_dump(ys, ys->family_id, DEVLINK_CMD_PARAM_GET, 1); - ys->req_policy = &devlink_nest; - - if (req->_present.bus_name_len) - mnl_attr_put_strz(nlh, DEVLINK_ATTR_BUS_NAME, req->bus_name); - if (req->_present.dev_name_len) - mnl_attr_put_strz(nlh, DEVLINK_ATTR_DEV_NAME, req->dev_name); - - err = ynl_exec_dump(ys, nlh, &yds); - if (err < 0) - goto free_list; - - return yds.first; - -free_list: - devlink_param_get_list_free(yds.first); - return NULL; -} - -/* ============== DEVLINK_CMD_PARAM_SET ============== */ -/* DEVLINK_CMD_PARAM_SET - do */ -void devlink_param_set_req_free(struct devlink_param_set_req *req) -{ - free(req->bus_name); - free(req->dev_name); - free(req->param_name); - free(req); -} - -int devlink_param_set(struct ynl_sock *ys, struct devlink_param_set_req *req) -{ - struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; - struct nlmsghdr *nlh; - int err; - - nlh = ynl_gemsg_start_req(ys, ys->family_id, DEVLINK_CMD_PARAM_SET, 1); - ys->req_policy = &devlink_nest; - - if (req->_present.bus_name_len) - mnl_attr_put_strz(nlh, DEVLINK_ATTR_BUS_NAME, req->bus_name); - if (req->_present.dev_name_len) - mnl_attr_put_strz(nlh, DEVLINK_ATTR_DEV_NAME, req->dev_name); - if (req->_present.param_name_len) - mnl_attr_put_strz(nlh, DEVLINK_ATTR_PARAM_NAME, req->param_name); - if (req->_present.param_type) - mnl_attr_put_u8(nlh, DEVLINK_ATTR_PARAM_TYPE, req->param_type); - if (req->_present.param_value_cmode) - mnl_attr_put_u8(nlh, DEVLINK_ATTR_PARAM_VALUE_CMODE, req->param_value_cmode); - - err = ynl_exec(ys, nlh, &yrs); - if (err < 0) - return -1; - - return 0; -} - -/* ============== DEVLINK_CMD_REGION_GET ============== */ -/* DEVLINK_CMD_REGION_GET - do */ -void devlink_region_get_req_free(struct devlink_region_get_req *req) -{ - free(req->bus_name); - free(req->dev_name); - free(req->region_name); - free(req); -} - -void devlink_region_get_rsp_free(struct devlink_region_get_rsp *rsp) -{ - free(rsp->bus_name); - free(rsp->dev_name); - free(rsp->region_name); - free(rsp); -} - -int devlink_region_get_rsp_parse(const struct nlmsghdr *nlh, void *data) -{ - struct devlink_region_get_rsp *dst; - struct ynl_parse_arg *yarg = data; - const struct nlattr *attr; - - dst = yarg->data; - - mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) { - unsigned int type = mnl_attr_get_type(attr); - - if (type == DEVLINK_ATTR_BUS_NAME) { - unsigned int len; - - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - - len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr)); - dst->_present.bus_name_len = len; - dst->bus_name = malloc(len + 1); - memcpy(dst->bus_name, mnl_attr_get_str(attr), len); - dst->bus_name[len] = 0; - } else if (type == DEVLINK_ATTR_DEV_NAME) { - unsigned int len; - - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - - len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr)); - dst->_present.dev_name_len = len; - dst->dev_name = malloc(len + 1); - memcpy(dst->dev_name, mnl_attr_get_str(attr), len); - dst->dev_name[len] = 0; - } else if (type == DEVLINK_ATTR_PORT_INDEX) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.port_index = 1; - dst->port_index = mnl_attr_get_u32(attr); - } else if (type == DEVLINK_ATTR_REGION_NAME) { - unsigned int len; - - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - - len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr)); - dst->_present.region_name_len = len; - dst->region_name = malloc(len + 1); - memcpy(dst->region_name, mnl_attr_get_str(attr), len); - dst->region_name[len] = 0; - } - } - - return MNL_CB_OK; -} - -struct devlink_region_get_rsp * -devlink_region_get(struct ynl_sock *ys, struct devlink_region_get_req *req) -{ - struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; - struct devlink_region_get_rsp *rsp; - struct nlmsghdr *nlh; - int err; - - nlh = ynl_gemsg_start_req(ys, ys->family_id, DEVLINK_CMD_REGION_GET, 1); - ys->req_policy = &devlink_nest; - yrs.yarg.rsp_policy = &devlink_nest; - - if (req->_present.bus_name_len) - mnl_attr_put_strz(nlh, DEVLINK_ATTR_BUS_NAME, req->bus_name); - if (req->_present.dev_name_len) - mnl_attr_put_strz(nlh, DEVLINK_ATTR_DEV_NAME, req->dev_name); - if (req->_present.port_index) - mnl_attr_put_u32(nlh, DEVLINK_ATTR_PORT_INDEX, req->port_index); - if (req->_present.region_name_len) - mnl_attr_put_strz(nlh, DEVLINK_ATTR_REGION_NAME, req->region_name); - - rsp = calloc(1, sizeof(*rsp)); - yrs.yarg.data = rsp; - yrs.cb = devlink_region_get_rsp_parse; - yrs.rsp_cmd = DEVLINK_CMD_REGION_GET; - - err = ynl_exec(ys, nlh, &yrs); - if (err < 0) - goto err_free; - - return rsp; - -err_free: - devlink_region_get_rsp_free(rsp); - return NULL; -} - -/* DEVLINK_CMD_REGION_GET - dump */ -void devlink_region_get_list_free(struct devlink_region_get_list *rsp) -{ - struct devlink_region_get_list *next = rsp; - - while ((void *)next != YNL_LIST_END) { - rsp = next; - next = rsp->next; - - free(rsp->obj.bus_name); - free(rsp->obj.dev_name); - free(rsp->obj.region_name); - free(rsp); - } -} - -struct devlink_region_get_list * -devlink_region_get_dump(struct ynl_sock *ys, - struct devlink_region_get_req_dump *req) -{ - struct ynl_dump_state yds = {}; - struct nlmsghdr *nlh; - int err; - - yds.ys = ys; - yds.alloc_sz = sizeof(struct devlink_region_get_list); - yds.cb = devlink_region_get_rsp_parse; - yds.rsp_cmd = DEVLINK_CMD_REGION_GET; - yds.rsp_policy = &devlink_nest; - - nlh = ynl_gemsg_start_dump(ys, ys->family_id, DEVLINK_CMD_REGION_GET, 1); - ys->req_policy = &devlink_nest; - - if (req->_present.bus_name_len) - mnl_attr_put_strz(nlh, DEVLINK_ATTR_BUS_NAME, req->bus_name); - if (req->_present.dev_name_len) - mnl_attr_put_strz(nlh, DEVLINK_ATTR_DEV_NAME, req->dev_name); - - err = ynl_exec_dump(ys, nlh, &yds); - if (err < 0) - goto free_list; - - return yds.first; - -free_list: - devlink_region_get_list_free(yds.first); - return NULL; -} - -/* ============== DEVLINK_CMD_REGION_NEW ============== */ -/* DEVLINK_CMD_REGION_NEW - do */ -void devlink_region_new_req_free(struct devlink_region_new_req *req) -{ - free(req->bus_name); - free(req->dev_name); - free(req->region_name); - free(req); -} - -void devlink_region_new_rsp_free(struct devlink_region_new_rsp *rsp) -{ - free(rsp->bus_name); - free(rsp->dev_name); - free(rsp->region_name); - free(rsp); -} - -int devlink_region_new_rsp_parse(const struct nlmsghdr *nlh, void *data) -{ - struct devlink_region_new_rsp *dst; - struct ynl_parse_arg *yarg = data; - const struct nlattr *attr; - - dst = yarg->data; - - mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) { - unsigned int type = mnl_attr_get_type(attr); - - if (type == DEVLINK_ATTR_BUS_NAME) { - unsigned int len; - - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - - len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr)); - dst->_present.bus_name_len = len; - dst->bus_name = malloc(len + 1); - memcpy(dst->bus_name, mnl_attr_get_str(attr), len); - dst->bus_name[len] = 0; - } else if (type == DEVLINK_ATTR_DEV_NAME) { - unsigned int len; - - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - - len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr)); - dst->_present.dev_name_len = len; - dst->dev_name = malloc(len + 1); - memcpy(dst->dev_name, mnl_attr_get_str(attr), len); - dst->dev_name[len] = 0; - } else if (type == DEVLINK_ATTR_PORT_INDEX) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.port_index = 1; - dst->port_index = mnl_attr_get_u32(attr); - } else if (type == DEVLINK_ATTR_REGION_NAME) { - unsigned int len; - - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - - len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr)); - dst->_present.region_name_len = len; - dst->region_name = malloc(len + 1); - memcpy(dst->region_name, mnl_attr_get_str(attr), len); - dst->region_name[len] = 0; - } else if (type == DEVLINK_ATTR_REGION_SNAPSHOT_ID) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.region_snapshot_id = 1; - dst->region_snapshot_id = mnl_attr_get_u32(attr); - } - } - - return MNL_CB_OK; -} - -struct devlink_region_new_rsp * -devlink_region_new(struct ynl_sock *ys, struct devlink_region_new_req *req) -{ - struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; - struct devlink_region_new_rsp *rsp; - struct nlmsghdr *nlh; - int err; - - nlh = ynl_gemsg_start_req(ys, ys->family_id, DEVLINK_CMD_REGION_NEW, 1); - ys->req_policy = &devlink_nest; - yrs.yarg.rsp_policy = &devlink_nest; - - if (req->_present.bus_name_len) - mnl_attr_put_strz(nlh, DEVLINK_ATTR_BUS_NAME, req->bus_name); - if (req->_present.dev_name_len) - mnl_attr_put_strz(nlh, DEVLINK_ATTR_DEV_NAME, req->dev_name); - if (req->_present.port_index) - mnl_attr_put_u32(nlh, DEVLINK_ATTR_PORT_INDEX, req->port_index); - if (req->_present.region_name_len) - mnl_attr_put_strz(nlh, DEVLINK_ATTR_REGION_NAME, req->region_name); - if (req->_present.region_snapshot_id) - mnl_attr_put_u32(nlh, DEVLINK_ATTR_REGION_SNAPSHOT_ID, req->region_snapshot_id); - - rsp = calloc(1, sizeof(*rsp)); - yrs.yarg.data = rsp; - yrs.cb = devlink_region_new_rsp_parse; - yrs.rsp_cmd = DEVLINK_CMD_REGION_NEW; - - err = ynl_exec(ys, nlh, &yrs); - if (err < 0) - goto err_free; - - return rsp; - -err_free: - devlink_region_new_rsp_free(rsp); - return NULL; -} - -/* ============== DEVLINK_CMD_REGION_DEL ============== */ -/* DEVLINK_CMD_REGION_DEL - do */ -void devlink_region_del_req_free(struct devlink_region_del_req *req) -{ - free(req->bus_name); - free(req->dev_name); - free(req->region_name); - free(req); -} - -int devlink_region_del(struct ynl_sock *ys, struct devlink_region_del_req *req) -{ - struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; - struct nlmsghdr *nlh; - int err; - - nlh = ynl_gemsg_start_req(ys, ys->family_id, DEVLINK_CMD_REGION_DEL, 1); - ys->req_policy = &devlink_nest; - - if (req->_present.bus_name_len) - mnl_attr_put_strz(nlh, DEVLINK_ATTR_BUS_NAME, req->bus_name); - if (req->_present.dev_name_len) - mnl_attr_put_strz(nlh, DEVLINK_ATTR_DEV_NAME, req->dev_name); - if (req->_present.port_index) - mnl_attr_put_u32(nlh, DEVLINK_ATTR_PORT_INDEX, req->port_index); - if (req->_present.region_name_len) - mnl_attr_put_strz(nlh, DEVLINK_ATTR_REGION_NAME, req->region_name); - if (req->_present.region_snapshot_id) - mnl_attr_put_u32(nlh, DEVLINK_ATTR_REGION_SNAPSHOT_ID, req->region_snapshot_id); - - err = ynl_exec(ys, nlh, &yrs); - if (err < 0) - return -1; - - return 0; -} - -/* ============== DEVLINK_CMD_REGION_READ ============== */ -/* DEVLINK_CMD_REGION_READ - dump */ -int devlink_region_read_rsp_dump_parse(const struct nlmsghdr *nlh, void *data) -{ - struct devlink_region_read_rsp_dump *dst; - struct ynl_parse_arg *yarg = data; - const struct nlattr *attr; - - dst = yarg->data; - - mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) { - unsigned int type = mnl_attr_get_type(attr); - - if (type == DEVLINK_ATTR_BUS_NAME) { - unsigned int len; - - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - - len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr)); - dst->_present.bus_name_len = len; - dst->bus_name = malloc(len + 1); - memcpy(dst->bus_name, mnl_attr_get_str(attr), len); - dst->bus_name[len] = 0; - } else if (type == DEVLINK_ATTR_DEV_NAME) { - unsigned int len; - - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - - len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr)); - dst->_present.dev_name_len = len; - dst->dev_name = malloc(len + 1); - memcpy(dst->dev_name, mnl_attr_get_str(attr), len); - dst->dev_name[len] = 0; - } else if (type == DEVLINK_ATTR_PORT_INDEX) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.port_index = 1; - dst->port_index = mnl_attr_get_u32(attr); - } else if (type == DEVLINK_ATTR_REGION_NAME) { - unsigned int len; - - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - - len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr)); - dst->_present.region_name_len = len; - dst->region_name = malloc(len + 1); - memcpy(dst->region_name, mnl_attr_get_str(attr), len); - dst->region_name[len] = 0; - } - } - - return MNL_CB_OK; -} - -void -devlink_region_read_rsp_list_free(struct devlink_region_read_rsp_list *rsp) -{ - struct devlink_region_read_rsp_list *next = rsp; - - while ((void *)next != YNL_LIST_END) { - rsp = next; - next = rsp->next; - - free(rsp->obj.bus_name); - free(rsp->obj.dev_name); - free(rsp->obj.region_name); - free(rsp); - } -} - -struct devlink_region_read_rsp_list * -devlink_region_read_dump(struct ynl_sock *ys, - struct devlink_region_read_req_dump *req) -{ - struct ynl_dump_state yds = {}; - struct nlmsghdr *nlh; - int err; - - yds.ys = ys; - yds.alloc_sz = sizeof(struct devlink_region_read_rsp_list); - yds.cb = devlink_region_read_rsp_dump_parse; - yds.rsp_cmd = DEVLINK_CMD_REGION_READ; - yds.rsp_policy = &devlink_nest; - - nlh = ynl_gemsg_start_dump(ys, ys->family_id, DEVLINK_CMD_REGION_READ, 1); - ys->req_policy = &devlink_nest; - - if (req->_present.bus_name_len) - mnl_attr_put_strz(nlh, DEVLINK_ATTR_BUS_NAME, req->bus_name); - if (req->_present.dev_name_len) - mnl_attr_put_strz(nlh, DEVLINK_ATTR_DEV_NAME, req->dev_name); - if (req->_present.port_index) - mnl_attr_put_u32(nlh, DEVLINK_ATTR_PORT_INDEX, req->port_index); - if (req->_present.region_name_len) - mnl_attr_put_strz(nlh, DEVLINK_ATTR_REGION_NAME, req->region_name); - if (req->_present.region_snapshot_id) - mnl_attr_put_u32(nlh, DEVLINK_ATTR_REGION_SNAPSHOT_ID, req->region_snapshot_id); - if (req->_present.region_direct) - mnl_attr_put(nlh, DEVLINK_ATTR_REGION_DIRECT, 0, NULL); - if (req->_present.region_chunk_addr) - mnl_attr_put_u64(nlh, DEVLINK_ATTR_REGION_CHUNK_ADDR, req->region_chunk_addr); - if (req->_present.region_chunk_len) - mnl_attr_put_u64(nlh, DEVLINK_ATTR_REGION_CHUNK_LEN, req->region_chunk_len); - - err = ynl_exec_dump(ys, nlh, &yds); - if (err < 0) - goto free_list; - - return yds.first; - -free_list: - devlink_region_read_rsp_list_free(yds.first); - return NULL; -} - -/* ============== DEVLINK_CMD_PORT_PARAM_GET ============== */ -/* DEVLINK_CMD_PORT_PARAM_GET - do */ -void devlink_port_param_get_req_free(struct devlink_port_param_get_req *req) -{ - free(req->bus_name); - free(req->dev_name); - free(req); -} - -void devlink_port_param_get_rsp_free(struct devlink_port_param_get_rsp *rsp) -{ - free(rsp->bus_name); - free(rsp->dev_name); - free(rsp); -} - -int devlink_port_param_get_rsp_parse(const struct nlmsghdr *nlh, void *data) -{ - struct devlink_port_param_get_rsp *dst; - struct ynl_parse_arg *yarg = data; - const struct nlattr *attr; - - dst = yarg->data; - - mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) { - unsigned int type = mnl_attr_get_type(attr); - - if (type == DEVLINK_ATTR_BUS_NAME) { - unsigned int len; - - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - - len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr)); - dst->_present.bus_name_len = len; - dst->bus_name = malloc(len + 1); - memcpy(dst->bus_name, mnl_attr_get_str(attr), len); - dst->bus_name[len] = 0; - } else if (type == DEVLINK_ATTR_DEV_NAME) { - unsigned int len; - - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - - len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr)); - dst->_present.dev_name_len = len; - dst->dev_name = malloc(len + 1); - memcpy(dst->dev_name, mnl_attr_get_str(attr), len); - dst->dev_name[len] = 0; - } else if (type == DEVLINK_ATTR_PORT_INDEX) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.port_index = 1; - dst->port_index = mnl_attr_get_u32(attr); - } - } - - return MNL_CB_OK; -} - -struct devlink_port_param_get_rsp * -devlink_port_param_get(struct ynl_sock *ys, - struct devlink_port_param_get_req *req) -{ - struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; - struct devlink_port_param_get_rsp *rsp; - struct nlmsghdr *nlh; - int err; - - nlh = ynl_gemsg_start_req(ys, ys->family_id, DEVLINK_CMD_PORT_PARAM_GET, 1); - ys->req_policy = &devlink_nest; - yrs.yarg.rsp_policy = &devlink_nest; - - if (req->_present.bus_name_len) - mnl_attr_put_strz(nlh, DEVLINK_ATTR_BUS_NAME, req->bus_name); - if (req->_present.dev_name_len) - mnl_attr_put_strz(nlh, DEVLINK_ATTR_DEV_NAME, req->dev_name); - if (req->_present.port_index) - mnl_attr_put_u32(nlh, DEVLINK_ATTR_PORT_INDEX, req->port_index); - - rsp = calloc(1, sizeof(*rsp)); - yrs.yarg.data = rsp; - yrs.cb = devlink_port_param_get_rsp_parse; - yrs.rsp_cmd = DEVLINK_CMD_PORT_PARAM_GET; - - err = ynl_exec(ys, nlh, &yrs); - if (err < 0) - goto err_free; - - return rsp; - -err_free: - devlink_port_param_get_rsp_free(rsp); - return NULL; -} - -/* DEVLINK_CMD_PORT_PARAM_GET - dump */ -void devlink_port_param_get_list_free(struct devlink_port_param_get_list *rsp) -{ - struct devlink_port_param_get_list *next = rsp; - - while ((void *)next != YNL_LIST_END) { - rsp = next; - next = rsp->next; - - free(rsp->obj.bus_name); - free(rsp->obj.dev_name); - free(rsp); - } -} - -struct devlink_port_param_get_list * -devlink_port_param_get_dump(struct ynl_sock *ys) -{ - struct ynl_dump_state yds = {}; - struct nlmsghdr *nlh; - int err; - - yds.ys = ys; - yds.alloc_sz = sizeof(struct devlink_port_param_get_list); - yds.cb = devlink_port_param_get_rsp_parse; - yds.rsp_cmd = DEVLINK_CMD_PORT_PARAM_GET; - yds.rsp_policy = &devlink_nest; - - nlh = ynl_gemsg_start_dump(ys, ys->family_id, DEVLINK_CMD_PORT_PARAM_GET, 1); - - err = ynl_exec_dump(ys, nlh, &yds); - if (err < 0) - goto free_list; - - return yds.first; - -free_list: - devlink_port_param_get_list_free(yds.first); - return NULL; -} - -/* ============== DEVLINK_CMD_PORT_PARAM_SET ============== */ -/* DEVLINK_CMD_PORT_PARAM_SET - do */ -void devlink_port_param_set_req_free(struct devlink_port_param_set_req *req) -{ - free(req->bus_name); - free(req->dev_name); - free(req); -} - -int devlink_port_param_set(struct ynl_sock *ys, - struct devlink_port_param_set_req *req) -{ - struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; - struct nlmsghdr *nlh; - int err; - - nlh = ynl_gemsg_start_req(ys, ys->family_id, DEVLINK_CMD_PORT_PARAM_SET, 1); - ys->req_policy = &devlink_nest; - - if (req->_present.bus_name_len) - mnl_attr_put_strz(nlh, DEVLINK_ATTR_BUS_NAME, req->bus_name); - if (req->_present.dev_name_len) - mnl_attr_put_strz(nlh, DEVLINK_ATTR_DEV_NAME, req->dev_name); - if (req->_present.port_index) - mnl_attr_put_u32(nlh, DEVLINK_ATTR_PORT_INDEX, req->port_index); - - err = ynl_exec(ys, nlh, &yrs); - if (err < 0) - return -1; - - return 0; -} - -/* ============== DEVLINK_CMD_INFO_GET ============== */ -/* DEVLINK_CMD_INFO_GET - do */ -void devlink_info_get_req_free(struct devlink_info_get_req *req) -{ - free(req->bus_name); - free(req->dev_name); - free(req); -} - -void devlink_info_get_rsp_free(struct devlink_info_get_rsp *rsp) -{ - unsigned int i; - - free(rsp->bus_name); - free(rsp->dev_name); - free(rsp->info_driver_name); - free(rsp->info_serial_number); - for (i = 0; i < rsp->n_info_version_fixed; i++) - devlink_dl_info_version_free(&rsp->info_version_fixed[i]); - free(rsp->info_version_fixed); - for (i = 0; i < rsp->n_info_version_running; i++) - devlink_dl_info_version_free(&rsp->info_version_running[i]); - free(rsp->info_version_running); - for (i = 0; i < rsp->n_info_version_stored; i++) - devlink_dl_info_version_free(&rsp->info_version_stored[i]); - free(rsp->info_version_stored); - free(rsp); -} - -int devlink_info_get_rsp_parse(const struct nlmsghdr *nlh, void *data) -{ - unsigned int n_info_version_running = 0; - unsigned int n_info_version_stored = 0; - unsigned int n_info_version_fixed = 0; - struct ynl_parse_arg *yarg = data; - struct devlink_info_get_rsp *dst; - const struct nlattr *attr; - struct ynl_parse_arg parg; - int i; - - dst = yarg->data; - parg.ys = yarg->ys; - - if (dst->info_version_fixed) - return ynl_error_parse(yarg, "attribute already present (devlink.info-version-fixed)"); - if (dst->info_version_running) - return ynl_error_parse(yarg, "attribute already present (devlink.info-version-running)"); - if (dst->info_version_stored) - return ynl_error_parse(yarg, "attribute already present (devlink.info-version-stored)"); - - mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) { - unsigned int type = mnl_attr_get_type(attr); - - if (type == DEVLINK_ATTR_BUS_NAME) { - unsigned int len; - - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - - len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr)); - dst->_present.bus_name_len = len; - dst->bus_name = malloc(len + 1); - memcpy(dst->bus_name, mnl_attr_get_str(attr), len); - dst->bus_name[len] = 0; - } else if (type == DEVLINK_ATTR_DEV_NAME) { - unsigned int len; - - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - - len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr)); - dst->_present.dev_name_len = len; - dst->dev_name = malloc(len + 1); - memcpy(dst->dev_name, mnl_attr_get_str(attr), len); - dst->dev_name[len] = 0; - } else if (type == DEVLINK_ATTR_INFO_DRIVER_NAME) { - unsigned int len; - - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - - len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr)); - dst->_present.info_driver_name_len = len; - dst->info_driver_name = malloc(len + 1); - memcpy(dst->info_driver_name, mnl_attr_get_str(attr), len); - dst->info_driver_name[len] = 0; - } else if (type == DEVLINK_ATTR_INFO_SERIAL_NUMBER) { - unsigned int len; - - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - - len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr)); - dst->_present.info_serial_number_len = len; - dst->info_serial_number = malloc(len + 1); - memcpy(dst->info_serial_number, mnl_attr_get_str(attr), len); - dst->info_serial_number[len] = 0; - } else if (type == DEVLINK_ATTR_INFO_VERSION_FIXED) { - n_info_version_fixed++; - } else if (type == DEVLINK_ATTR_INFO_VERSION_RUNNING) { - n_info_version_running++; - } else if (type == DEVLINK_ATTR_INFO_VERSION_STORED) { - n_info_version_stored++; - } - } - - if (n_info_version_fixed) { - dst->info_version_fixed = calloc(n_info_version_fixed, sizeof(*dst->info_version_fixed)); - dst->n_info_version_fixed = n_info_version_fixed; - i = 0; - parg.rsp_policy = &devlink_dl_info_version_nest; - mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) { - if (mnl_attr_get_type(attr) == DEVLINK_ATTR_INFO_VERSION_FIXED) { - parg.data = &dst->info_version_fixed[i]; - if (devlink_dl_info_version_parse(&parg, attr)) - return MNL_CB_ERROR; - i++; - } - } - } - if (n_info_version_running) { - dst->info_version_running = calloc(n_info_version_running, sizeof(*dst->info_version_running)); - dst->n_info_version_running = n_info_version_running; - i = 0; - parg.rsp_policy = &devlink_dl_info_version_nest; - mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) { - if (mnl_attr_get_type(attr) == DEVLINK_ATTR_INFO_VERSION_RUNNING) { - parg.data = &dst->info_version_running[i]; - if (devlink_dl_info_version_parse(&parg, attr)) - return MNL_CB_ERROR; - i++; - } - } - } - if (n_info_version_stored) { - dst->info_version_stored = calloc(n_info_version_stored, sizeof(*dst->info_version_stored)); - dst->n_info_version_stored = n_info_version_stored; - i = 0; - parg.rsp_policy = &devlink_dl_info_version_nest; - mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) { - if (mnl_attr_get_type(attr) == DEVLINK_ATTR_INFO_VERSION_STORED) { - parg.data = &dst->info_version_stored[i]; - if (devlink_dl_info_version_parse(&parg, attr)) - return MNL_CB_ERROR; - i++; - } - } - } - - return MNL_CB_OK; -} - -struct devlink_info_get_rsp * -devlink_info_get(struct ynl_sock *ys, struct devlink_info_get_req *req) -{ - struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; - struct devlink_info_get_rsp *rsp; - struct nlmsghdr *nlh; - int err; - - nlh = ynl_gemsg_start_req(ys, ys->family_id, DEVLINK_CMD_INFO_GET, 1); - ys->req_policy = &devlink_nest; - yrs.yarg.rsp_policy = &devlink_nest; - - if (req->_present.bus_name_len) - mnl_attr_put_strz(nlh, DEVLINK_ATTR_BUS_NAME, req->bus_name); - if (req->_present.dev_name_len) - mnl_attr_put_strz(nlh, DEVLINK_ATTR_DEV_NAME, req->dev_name); - - rsp = calloc(1, sizeof(*rsp)); - yrs.yarg.data = rsp; - yrs.cb = devlink_info_get_rsp_parse; - yrs.rsp_cmd = DEVLINK_CMD_INFO_GET; - - err = ynl_exec(ys, nlh, &yrs); - if (err < 0) - goto err_free; - - return rsp; - -err_free: - devlink_info_get_rsp_free(rsp); - return NULL; -} - -/* DEVLINK_CMD_INFO_GET - dump */ -void devlink_info_get_list_free(struct devlink_info_get_list *rsp) -{ - struct devlink_info_get_list *next = rsp; - - while ((void *)next != YNL_LIST_END) { - unsigned int i; - - rsp = next; - next = rsp->next; - - free(rsp->obj.bus_name); - free(rsp->obj.dev_name); - free(rsp->obj.info_driver_name); - free(rsp->obj.info_serial_number); - for (i = 0; i < rsp->obj.n_info_version_fixed; i++) - devlink_dl_info_version_free(&rsp->obj.info_version_fixed[i]); - free(rsp->obj.info_version_fixed); - for (i = 0; i < rsp->obj.n_info_version_running; i++) - devlink_dl_info_version_free(&rsp->obj.info_version_running[i]); - free(rsp->obj.info_version_running); - for (i = 0; i < rsp->obj.n_info_version_stored; i++) - devlink_dl_info_version_free(&rsp->obj.info_version_stored[i]); - free(rsp->obj.info_version_stored); - free(rsp); - } -} - -struct devlink_info_get_list *devlink_info_get_dump(struct ynl_sock *ys) -{ - struct ynl_dump_state yds = {}; - struct nlmsghdr *nlh; - int err; - - yds.ys = ys; - yds.alloc_sz = sizeof(struct devlink_info_get_list); - yds.cb = devlink_info_get_rsp_parse; - yds.rsp_cmd = DEVLINK_CMD_INFO_GET; - yds.rsp_policy = &devlink_nest; - - nlh = ynl_gemsg_start_dump(ys, ys->family_id, DEVLINK_CMD_INFO_GET, 1); - - err = ynl_exec_dump(ys, nlh, &yds); - if (err < 0) - goto free_list; - - return yds.first; - -free_list: - devlink_info_get_list_free(yds.first); - return NULL; -} - -/* ============== DEVLINK_CMD_HEALTH_REPORTER_GET ============== */ -/* DEVLINK_CMD_HEALTH_REPORTER_GET - do */ -void -devlink_health_reporter_get_req_free(struct devlink_health_reporter_get_req *req) -{ - free(req->bus_name); - free(req->dev_name); - free(req->health_reporter_name); - free(req); -} - -void -devlink_health_reporter_get_rsp_free(struct devlink_health_reporter_get_rsp *rsp) -{ - free(rsp->bus_name); - free(rsp->dev_name); - free(rsp->health_reporter_name); - free(rsp); -} - -int devlink_health_reporter_get_rsp_parse(const struct nlmsghdr *nlh, - void *data) -{ - struct devlink_health_reporter_get_rsp *dst; - struct ynl_parse_arg *yarg = data; - const struct nlattr *attr; - - dst = yarg->data; - - mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) { - unsigned int type = mnl_attr_get_type(attr); - - if (type == DEVLINK_ATTR_BUS_NAME) { - unsigned int len; - - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - - len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr)); - dst->_present.bus_name_len = len; - dst->bus_name = malloc(len + 1); - memcpy(dst->bus_name, mnl_attr_get_str(attr), len); - dst->bus_name[len] = 0; - } else if (type == DEVLINK_ATTR_DEV_NAME) { - unsigned int len; - - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - - len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr)); - dst->_present.dev_name_len = len; - dst->dev_name = malloc(len + 1); - memcpy(dst->dev_name, mnl_attr_get_str(attr), len); - dst->dev_name[len] = 0; - } else if (type == DEVLINK_ATTR_PORT_INDEX) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.port_index = 1; - dst->port_index = mnl_attr_get_u32(attr); - } else if (type == DEVLINK_ATTR_HEALTH_REPORTER_NAME) { - unsigned int len; - - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - - len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr)); - dst->_present.health_reporter_name_len = len; - dst->health_reporter_name = malloc(len + 1); - memcpy(dst->health_reporter_name, mnl_attr_get_str(attr), len); - dst->health_reporter_name[len] = 0; - } - } - - return MNL_CB_OK; -} - -struct devlink_health_reporter_get_rsp * -devlink_health_reporter_get(struct ynl_sock *ys, - struct devlink_health_reporter_get_req *req) -{ - struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; - struct devlink_health_reporter_get_rsp *rsp; - struct nlmsghdr *nlh; - int err; - - nlh = ynl_gemsg_start_req(ys, ys->family_id, DEVLINK_CMD_HEALTH_REPORTER_GET, 1); - ys->req_policy = &devlink_nest; - yrs.yarg.rsp_policy = &devlink_nest; - - if (req->_present.bus_name_len) - mnl_attr_put_strz(nlh, DEVLINK_ATTR_BUS_NAME, req->bus_name); - if (req->_present.dev_name_len) - mnl_attr_put_strz(nlh, DEVLINK_ATTR_DEV_NAME, req->dev_name); - if (req->_present.port_index) - mnl_attr_put_u32(nlh, DEVLINK_ATTR_PORT_INDEX, req->port_index); - if (req->_present.health_reporter_name_len) - mnl_attr_put_strz(nlh, DEVLINK_ATTR_HEALTH_REPORTER_NAME, req->health_reporter_name); - - rsp = calloc(1, sizeof(*rsp)); - yrs.yarg.data = rsp; - yrs.cb = devlink_health_reporter_get_rsp_parse; - yrs.rsp_cmd = DEVLINK_CMD_HEALTH_REPORTER_GET; - - err = ynl_exec(ys, nlh, &yrs); - if (err < 0) - goto err_free; - - return rsp; - -err_free: - devlink_health_reporter_get_rsp_free(rsp); - return NULL; -} - -/* DEVLINK_CMD_HEALTH_REPORTER_GET - dump */ -void -devlink_health_reporter_get_list_free(struct devlink_health_reporter_get_list *rsp) -{ - struct devlink_health_reporter_get_list *next = rsp; - - while ((void *)next != YNL_LIST_END) { - rsp = next; - next = rsp->next; - - free(rsp->obj.bus_name); - free(rsp->obj.dev_name); - free(rsp->obj.health_reporter_name); - free(rsp); - } -} - -struct devlink_health_reporter_get_list * -devlink_health_reporter_get_dump(struct ynl_sock *ys, - struct devlink_health_reporter_get_req_dump *req) -{ - struct ynl_dump_state yds = {}; - struct nlmsghdr *nlh; - int err; - - yds.ys = ys; - yds.alloc_sz = sizeof(struct devlink_health_reporter_get_list); - yds.cb = devlink_health_reporter_get_rsp_parse; - yds.rsp_cmd = DEVLINK_CMD_HEALTH_REPORTER_GET; - yds.rsp_policy = &devlink_nest; - - nlh = ynl_gemsg_start_dump(ys, ys->family_id, DEVLINK_CMD_HEALTH_REPORTER_GET, 1); - ys->req_policy = &devlink_nest; - - if (req->_present.bus_name_len) - mnl_attr_put_strz(nlh, DEVLINK_ATTR_BUS_NAME, req->bus_name); - if (req->_present.dev_name_len) - mnl_attr_put_strz(nlh, DEVLINK_ATTR_DEV_NAME, req->dev_name); - if (req->_present.port_index) - mnl_attr_put_u32(nlh, DEVLINK_ATTR_PORT_INDEX, req->port_index); - - err = ynl_exec_dump(ys, nlh, &yds); - if (err < 0) - goto free_list; - - return yds.first; - -free_list: - devlink_health_reporter_get_list_free(yds.first); - return NULL; -} - -/* ============== DEVLINK_CMD_HEALTH_REPORTER_SET ============== */ -/* DEVLINK_CMD_HEALTH_REPORTER_SET - do */ -void -devlink_health_reporter_set_req_free(struct devlink_health_reporter_set_req *req) -{ - free(req->bus_name); - free(req->dev_name); - free(req->health_reporter_name); - free(req); -} - -int devlink_health_reporter_set(struct ynl_sock *ys, - struct devlink_health_reporter_set_req *req) -{ - struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; - struct nlmsghdr *nlh; - int err; - - nlh = ynl_gemsg_start_req(ys, ys->family_id, DEVLINK_CMD_HEALTH_REPORTER_SET, 1); - ys->req_policy = &devlink_nest; - - if (req->_present.bus_name_len) - mnl_attr_put_strz(nlh, DEVLINK_ATTR_BUS_NAME, req->bus_name); - if (req->_present.dev_name_len) - mnl_attr_put_strz(nlh, DEVLINK_ATTR_DEV_NAME, req->dev_name); - if (req->_present.port_index) - mnl_attr_put_u32(nlh, DEVLINK_ATTR_PORT_INDEX, req->port_index); - if (req->_present.health_reporter_name_len) - mnl_attr_put_strz(nlh, DEVLINK_ATTR_HEALTH_REPORTER_NAME, req->health_reporter_name); - if (req->_present.health_reporter_graceful_period) - mnl_attr_put_u64(nlh, DEVLINK_ATTR_HEALTH_REPORTER_GRACEFUL_PERIOD, req->health_reporter_graceful_period); - if (req->_present.health_reporter_auto_recover) - mnl_attr_put_u8(nlh, DEVLINK_ATTR_HEALTH_REPORTER_AUTO_RECOVER, req->health_reporter_auto_recover); - if (req->_present.health_reporter_auto_dump) - mnl_attr_put_u8(nlh, DEVLINK_ATTR_HEALTH_REPORTER_AUTO_DUMP, req->health_reporter_auto_dump); - - err = ynl_exec(ys, nlh, &yrs); - if (err < 0) - return -1; - - return 0; -} - -/* ============== DEVLINK_CMD_HEALTH_REPORTER_RECOVER ============== */ -/* DEVLINK_CMD_HEALTH_REPORTER_RECOVER - do */ -void -devlink_health_reporter_recover_req_free(struct devlink_health_reporter_recover_req *req) -{ - free(req->bus_name); - free(req->dev_name); - free(req->health_reporter_name); - free(req); -} - -int devlink_health_reporter_recover(struct ynl_sock *ys, - struct devlink_health_reporter_recover_req *req) -{ - struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; - struct nlmsghdr *nlh; - int err; - - nlh = ynl_gemsg_start_req(ys, ys->family_id, DEVLINK_CMD_HEALTH_REPORTER_RECOVER, 1); - ys->req_policy = &devlink_nest; - - if (req->_present.bus_name_len) - mnl_attr_put_strz(nlh, DEVLINK_ATTR_BUS_NAME, req->bus_name); - if (req->_present.dev_name_len) - mnl_attr_put_strz(nlh, DEVLINK_ATTR_DEV_NAME, req->dev_name); - if (req->_present.port_index) - mnl_attr_put_u32(nlh, DEVLINK_ATTR_PORT_INDEX, req->port_index); - if (req->_present.health_reporter_name_len) - mnl_attr_put_strz(nlh, DEVLINK_ATTR_HEALTH_REPORTER_NAME, req->health_reporter_name); - - err = ynl_exec(ys, nlh, &yrs); - if (err < 0) - return -1; - - return 0; -} - -/* ============== DEVLINK_CMD_HEALTH_REPORTER_DIAGNOSE ============== */ -/* DEVLINK_CMD_HEALTH_REPORTER_DIAGNOSE - do */ -void -devlink_health_reporter_diagnose_req_free(struct devlink_health_reporter_diagnose_req *req) -{ - free(req->bus_name); - free(req->dev_name); - free(req->health_reporter_name); - free(req); -} - -int devlink_health_reporter_diagnose(struct ynl_sock *ys, - struct devlink_health_reporter_diagnose_req *req) -{ - struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; - struct nlmsghdr *nlh; - int err; - - nlh = ynl_gemsg_start_req(ys, ys->family_id, DEVLINK_CMD_HEALTH_REPORTER_DIAGNOSE, 1); - ys->req_policy = &devlink_nest; - - if (req->_present.bus_name_len) - mnl_attr_put_strz(nlh, DEVLINK_ATTR_BUS_NAME, req->bus_name); - if (req->_present.dev_name_len) - mnl_attr_put_strz(nlh, DEVLINK_ATTR_DEV_NAME, req->dev_name); - if (req->_present.port_index) - mnl_attr_put_u32(nlh, DEVLINK_ATTR_PORT_INDEX, req->port_index); - if (req->_present.health_reporter_name_len) - mnl_attr_put_strz(nlh, DEVLINK_ATTR_HEALTH_REPORTER_NAME, req->health_reporter_name); - - err = ynl_exec(ys, nlh, &yrs); - if (err < 0) - return -1; - - return 0; -} - -/* ============== DEVLINK_CMD_HEALTH_REPORTER_DUMP_GET ============== */ -/* DEVLINK_CMD_HEALTH_REPORTER_DUMP_GET - dump */ -int devlink_health_reporter_dump_get_rsp_dump_parse(const struct nlmsghdr *nlh, - void *data) -{ - struct devlink_health_reporter_dump_get_rsp_dump *dst; - struct ynl_parse_arg *yarg = data; - const struct nlattr *attr; - struct ynl_parse_arg parg; - - dst = yarg->data; - parg.ys = yarg->ys; - - mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) { - unsigned int type = mnl_attr_get_type(attr); - - if (type == DEVLINK_ATTR_FMSG) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.fmsg = 1; - - parg.rsp_policy = &devlink_dl_fmsg_nest; - parg.data = &dst->fmsg; - if (devlink_dl_fmsg_parse(&parg, attr)) - return MNL_CB_ERROR; - } - } - - return MNL_CB_OK; -} - -void -devlink_health_reporter_dump_get_rsp_list_free(struct devlink_health_reporter_dump_get_rsp_list *rsp) -{ - struct devlink_health_reporter_dump_get_rsp_list *next = rsp; - - while ((void *)next != YNL_LIST_END) { - rsp = next; - next = rsp->next; - - devlink_dl_fmsg_free(&rsp->obj.fmsg); - free(rsp); - } -} - -struct devlink_health_reporter_dump_get_rsp_list * -devlink_health_reporter_dump_get_dump(struct ynl_sock *ys, - struct devlink_health_reporter_dump_get_req_dump *req) -{ - struct ynl_dump_state yds = {}; - struct nlmsghdr *nlh; - int err; - - yds.ys = ys; - yds.alloc_sz = sizeof(struct devlink_health_reporter_dump_get_rsp_list); - yds.cb = devlink_health_reporter_dump_get_rsp_dump_parse; - yds.rsp_cmd = DEVLINK_CMD_HEALTH_REPORTER_DUMP_GET; - yds.rsp_policy = &devlink_nest; - - nlh = ynl_gemsg_start_dump(ys, ys->family_id, DEVLINK_CMD_HEALTH_REPORTER_DUMP_GET, 1); - ys->req_policy = &devlink_nest; - - if (req->_present.bus_name_len) - mnl_attr_put_strz(nlh, DEVLINK_ATTR_BUS_NAME, req->bus_name); - if (req->_present.dev_name_len) - mnl_attr_put_strz(nlh, DEVLINK_ATTR_DEV_NAME, req->dev_name); - if (req->_present.port_index) - mnl_attr_put_u32(nlh, DEVLINK_ATTR_PORT_INDEX, req->port_index); - if (req->_present.health_reporter_name_len) - mnl_attr_put_strz(nlh, DEVLINK_ATTR_HEALTH_REPORTER_NAME, req->health_reporter_name); - - err = ynl_exec_dump(ys, nlh, &yds); - if (err < 0) - goto free_list; - - return yds.first; - -free_list: - devlink_health_reporter_dump_get_rsp_list_free(yds.first); - return NULL; -} - -/* ============== DEVLINK_CMD_HEALTH_REPORTER_DUMP_CLEAR ============== */ -/* DEVLINK_CMD_HEALTH_REPORTER_DUMP_CLEAR - do */ -void -devlink_health_reporter_dump_clear_req_free(struct devlink_health_reporter_dump_clear_req *req) -{ - free(req->bus_name); - free(req->dev_name); - free(req->health_reporter_name); - free(req); -} - -int devlink_health_reporter_dump_clear(struct ynl_sock *ys, - struct devlink_health_reporter_dump_clear_req *req) -{ - struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; - struct nlmsghdr *nlh; - int err; - - nlh = ynl_gemsg_start_req(ys, ys->family_id, DEVLINK_CMD_HEALTH_REPORTER_DUMP_CLEAR, 1); - ys->req_policy = &devlink_nest; - - if (req->_present.bus_name_len) - mnl_attr_put_strz(nlh, DEVLINK_ATTR_BUS_NAME, req->bus_name); - if (req->_present.dev_name_len) - mnl_attr_put_strz(nlh, DEVLINK_ATTR_DEV_NAME, req->dev_name); - if (req->_present.port_index) - mnl_attr_put_u32(nlh, DEVLINK_ATTR_PORT_INDEX, req->port_index); - if (req->_present.health_reporter_name_len) - mnl_attr_put_strz(nlh, DEVLINK_ATTR_HEALTH_REPORTER_NAME, req->health_reporter_name); - - err = ynl_exec(ys, nlh, &yrs); - if (err < 0) - return -1; - - return 0; -} - -/* ============== DEVLINK_CMD_FLASH_UPDATE ============== */ -/* DEVLINK_CMD_FLASH_UPDATE - do */ -void devlink_flash_update_req_free(struct devlink_flash_update_req *req) -{ - free(req->bus_name); - free(req->dev_name); - free(req->flash_update_file_name); - free(req->flash_update_component); - free(req); -} - -int devlink_flash_update(struct ynl_sock *ys, - struct devlink_flash_update_req *req) -{ - struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; - struct nlmsghdr *nlh; - int err; - - nlh = ynl_gemsg_start_req(ys, ys->family_id, DEVLINK_CMD_FLASH_UPDATE, 1); - ys->req_policy = &devlink_nest; - - if (req->_present.bus_name_len) - mnl_attr_put_strz(nlh, DEVLINK_ATTR_BUS_NAME, req->bus_name); - if (req->_present.dev_name_len) - mnl_attr_put_strz(nlh, DEVLINK_ATTR_DEV_NAME, req->dev_name); - if (req->_present.flash_update_file_name_len) - mnl_attr_put_strz(nlh, DEVLINK_ATTR_FLASH_UPDATE_FILE_NAME, req->flash_update_file_name); - if (req->_present.flash_update_component_len) - mnl_attr_put_strz(nlh, DEVLINK_ATTR_FLASH_UPDATE_COMPONENT, req->flash_update_component); - if (req->_present.flash_update_overwrite_mask) - mnl_attr_put(nlh, DEVLINK_ATTR_FLASH_UPDATE_OVERWRITE_MASK, sizeof(struct nla_bitfield32), &req->flash_update_overwrite_mask); - - err = ynl_exec(ys, nlh, &yrs); - if (err < 0) - return -1; - - return 0; -} - -/* ============== DEVLINK_CMD_TRAP_GET ============== */ -/* DEVLINK_CMD_TRAP_GET - do */ -void devlink_trap_get_req_free(struct devlink_trap_get_req *req) -{ - free(req->bus_name); - free(req->dev_name); - free(req->trap_name); - free(req); -} - -void devlink_trap_get_rsp_free(struct devlink_trap_get_rsp *rsp) -{ - free(rsp->bus_name); - free(rsp->dev_name); - free(rsp->trap_name); - free(rsp); -} - -int devlink_trap_get_rsp_parse(const struct nlmsghdr *nlh, void *data) -{ - struct ynl_parse_arg *yarg = data; - struct devlink_trap_get_rsp *dst; - const struct nlattr *attr; - - dst = yarg->data; - - mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) { - unsigned int type = mnl_attr_get_type(attr); - - if (type == DEVLINK_ATTR_BUS_NAME) { - unsigned int len; - - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - - len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr)); - dst->_present.bus_name_len = len; - dst->bus_name = malloc(len + 1); - memcpy(dst->bus_name, mnl_attr_get_str(attr), len); - dst->bus_name[len] = 0; - } else if (type == DEVLINK_ATTR_DEV_NAME) { - unsigned int len; - - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - - len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr)); - dst->_present.dev_name_len = len; - dst->dev_name = malloc(len + 1); - memcpy(dst->dev_name, mnl_attr_get_str(attr), len); - dst->dev_name[len] = 0; - } else if (type == DEVLINK_ATTR_TRAP_NAME) { - unsigned int len; - - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - - len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr)); - dst->_present.trap_name_len = len; - dst->trap_name = malloc(len + 1); - memcpy(dst->trap_name, mnl_attr_get_str(attr), len); - dst->trap_name[len] = 0; - } - } - - return MNL_CB_OK; -} - -struct devlink_trap_get_rsp * -devlink_trap_get(struct ynl_sock *ys, struct devlink_trap_get_req *req) -{ - struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; - struct devlink_trap_get_rsp *rsp; - struct nlmsghdr *nlh; - int err; - - nlh = ynl_gemsg_start_req(ys, ys->family_id, DEVLINK_CMD_TRAP_GET, 1); - ys->req_policy = &devlink_nest; - yrs.yarg.rsp_policy = &devlink_nest; - - if (req->_present.bus_name_len) - mnl_attr_put_strz(nlh, DEVLINK_ATTR_BUS_NAME, req->bus_name); - if (req->_present.dev_name_len) - mnl_attr_put_strz(nlh, DEVLINK_ATTR_DEV_NAME, req->dev_name); - if (req->_present.trap_name_len) - mnl_attr_put_strz(nlh, DEVLINK_ATTR_TRAP_NAME, req->trap_name); - - rsp = calloc(1, sizeof(*rsp)); - yrs.yarg.data = rsp; - yrs.cb = devlink_trap_get_rsp_parse; - yrs.rsp_cmd = 63; - - err = ynl_exec(ys, nlh, &yrs); - if (err < 0) - goto err_free; - - return rsp; - -err_free: - devlink_trap_get_rsp_free(rsp); - return NULL; -} - -/* DEVLINK_CMD_TRAP_GET - dump */ -void devlink_trap_get_list_free(struct devlink_trap_get_list *rsp) -{ - struct devlink_trap_get_list *next = rsp; - - while ((void *)next != YNL_LIST_END) { - rsp = next; - next = rsp->next; - - free(rsp->obj.bus_name); - free(rsp->obj.dev_name); - free(rsp->obj.trap_name); - free(rsp); - } -} - -struct devlink_trap_get_list * -devlink_trap_get_dump(struct ynl_sock *ys, - struct devlink_trap_get_req_dump *req) -{ - struct ynl_dump_state yds = {}; - struct nlmsghdr *nlh; - int err; - - yds.ys = ys; - yds.alloc_sz = sizeof(struct devlink_trap_get_list); - yds.cb = devlink_trap_get_rsp_parse; - yds.rsp_cmd = 63; - yds.rsp_policy = &devlink_nest; - - nlh = ynl_gemsg_start_dump(ys, ys->family_id, DEVLINK_CMD_TRAP_GET, 1); - ys->req_policy = &devlink_nest; - - if (req->_present.bus_name_len) - mnl_attr_put_strz(nlh, DEVLINK_ATTR_BUS_NAME, req->bus_name); - if (req->_present.dev_name_len) - mnl_attr_put_strz(nlh, DEVLINK_ATTR_DEV_NAME, req->dev_name); - - err = ynl_exec_dump(ys, nlh, &yds); - if (err < 0) - goto free_list; - - return yds.first; - -free_list: - devlink_trap_get_list_free(yds.first); - return NULL; -} - -/* ============== DEVLINK_CMD_TRAP_SET ============== */ -/* DEVLINK_CMD_TRAP_SET - do */ -void devlink_trap_set_req_free(struct devlink_trap_set_req *req) -{ - free(req->bus_name); - free(req->dev_name); - free(req->trap_name); - free(req); -} - -int devlink_trap_set(struct ynl_sock *ys, struct devlink_trap_set_req *req) -{ - struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; - struct nlmsghdr *nlh; - int err; - - nlh = ynl_gemsg_start_req(ys, ys->family_id, DEVLINK_CMD_TRAP_SET, 1); - ys->req_policy = &devlink_nest; - - if (req->_present.bus_name_len) - mnl_attr_put_strz(nlh, DEVLINK_ATTR_BUS_NAME, req->bus_name); - if (req->_present.dev_name_len) - mnl_attr_put_strz(nlh, DEVLINK_ATTR_DEV_NAME, req->dev_name); - if (req->_present.trap_name_len) - mnl_attr_put_strz(nlh, DEVLINK_ATTR_TRAP_NAME, req->trap_name); - if (req->_present.trap_action) - mnl_attr_put_u8(nlh, DEVLINK_ATTR_TRAP_ACTION, req->trap_action); - - err = ynl_exec(ys, nlh, &yrs); - if (err < 0) - return -1; - - return 0; -} - -/* ============== DEVLINK_CMD_TRAP_GROUP_GET ============== */ -/* DEVLINK_CMD_TRAP_GROUP_GET - do */ -void devlink_trap_group_get_req_free(struct devlink_trap_group_get_req *req) -{ - free(req->bus_name); - free(req->dev_name); - free(req->trap_group_name); - free(req); -} - -void devlink_trap_group_get_rsp_free(struct devlink_trap_group_get_rsp *rsp) -{ - free(rsp->bus_name); - free(rsp->dev_name); - free(rsp->trap_group_name); - free(rsp); -} - -int devlink_trap_group_get_rsp_parse(const struct nlmsghdr *nlh, void *data) -{ - struct devlink_trap_group_get_rsp *dst; - struct ynl_parse_arg *yarg = data; - const struct nlattr *attr; - - dst = yarg->data; - - mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) { - unsigned int type = mnl_attr_get_type(attr); - - if (type == DEVLINK_ATTR_BUS_NAME) { - unsigned int len; - - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - - len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr)); - dst->_present.bus_name_len = len; - dst->bus_name = malloc(len + 1); - memcpy(dst->bus_name, mnl_attr_get_str(attr), len); - dst->bus_name[len] = 0; - } else if (type == DEVLINK_ATTR_DEV_NAME) { - unsigned int len; - - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - - len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr)); - dst->_present.dev_name_len = len; - dst->dev_name = malloc(len + 1); - memcpy(dst->dev_name, mnl_attr_get_str(attr), len); - dst->dev_name[len] = 0; - } else if (type == DEVLINK_ATTR_TRAP_GROUP_NAME) { - unsigned int len; - - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - - len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr)); - dst->_present.trap_group_name_len = len; - dst->trap_group_name = malloc(len + 1); - memcpy(dst->trap_group_name, mnl_attr_get_str(attr), len); - dst->trap_group_name[len] = 0; - } - } - - return MNL_CB_OK; -} - -struct devlink_trap_group_get_rsp * -devlink_trap_group_get(struct ynl_sock *ys, - struct devlink_trap_group_get_req *req) -{ - struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; - struct devlink_trap_group_get_rsp *rsp; - struct nlmsghdr *nlh; - int err; - - nlh = ynl_gemsg_start_req(ys, ys->family_id, DEVLINK_CMD_TRAP_GROUP_GET, 1); - ys->req_policy = &devlink_nest; - yrs.yarg.rsp_policy = &devlink_nest; - - if (req->_present.bus_name_len) - mnl_attr_put_strz(nlh, DEVLINK_ATTR_BUS_NAME, req->bus_name); - if (req->_present.dev_name_len) - mnl_attr_put_strz(nlh, DEVLINK_ATTR_DEV_NAME, req->dev_name); - if (req->_present.trap_group_name_len) - mnl_attr_put_strz(nlh, DEVLINK_ATTR_TRAP_GROUP_NAME, req->trap_group_name); - - rsp = calloc(1, sizeof(*rsp)); - yrs.yarg.data = rsp; - yrs.cb = devlink_trap_group_get_rsp_parse; - yrs.rsp_cmd = 67; - - err = ynl_exec(ys, nlh, &yrs); - if (err < 0) - goto err_free; - - return rsp; - -err_free: - devlink_trap_group_get_rsp_free(rsp); - return NULL; -} - -/* DEVLINK_CMD_TRAP_GROUP_GET - dump */ -void devlink_trap_group_get_list_free(struct devlink_trap_group_get_list *rsp) -{ - struct devlink_trap_group_get_list *next = rsp; - - while ((void *)next != YNL_LIST_END) { - rsp = next; - next = rsp->next; - - free(rsp->obj.bus_name); - free(rsp->obj.dev_name); - free(rsp->obj.trap_group_name); - free(rsp); - } -} - -struct devlink_trap_group_get_list * -devlink_trap_group_get_dump(struct ynl_sock *ys, - struct devlink_trap_group_get_req_dump *req) -{ - struct ynl_dump_state yds = {}; - struct nlmsghdr *nlh; - int err; - - yds.ys = ys; - yds.alloc_sz = sizeof(struct devlink_trap_group_get_list); - yds.cb = devlink_trap_group_get_rsp_parse; - yds.rsp_cmd = 67; - yds.rsp_policy = &devlink_nest; - - nlh = ynl_gemsg_start_dump(ys, ys->family_id, DEVLINK_CMD_TRAP_GROUP_GET, 1); - ys->req_policy = &devlink_nest; - - if (req->_present.bus_name_len) - mnl_attr_put_strz(nlh, DEVLINK_ATTR_BUS_NAME, req->bus_name); - if (req->_present.dev_name_len) - mnl_attr_put_strz(nlh, DEVLINK_ATTR_DEV_NAME, req->dev_name); - - err = ynl_exec_dump(ys, nlh, &yds); - if (err < 0) - goto free_list; - - return yds.first; - -free_list: - devlink_trap_group_get_list_free(yds.first); - return NULL; -} - -/* ============== DEVLINK_CMD_TRAP_GROUP_SET ============== */ -/* DEVLINK_CMD_TRAP_GROUP_SET - do */ -void devlink_trap_group_set_req_free(struct devlink_trap_group_set_req *req) -{ - free(req->bus_name); - free(req->dev_name); - free(req->trap_group_name); - free(req); -} - -int devlink_trap_group_set(struct ynl_sock *ys, - struct devlink_trap_group_set_req *req) -{ - struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; - struct nlmsghdr *nlh; - int err; - - nlh = ynl_gemsg_start_req(ys, ys->family_id, DEVLINK_CMD_TRAP_GROUP_SET, 1); - ys->req_policy = &devlink_nest; - - if (req->_present.bus_name_len) - mnl_attr_put_strz(nlh, DEVLINK_ATTR_BUS_NAME, req->bus_name); - if (req->_present.dev_name_len) - mnl_attr_put_strz(nlh, DEVLINK_ATTR_DEV_NAME, req->dev_name); - if (req->_present.trap_group_name_len) - mnl_attr_put_strz(nlh, DEVLINK_ATTR_TRAP_GROUP_NAME, req->trap_group_name); - if (req->_present.trap_action) - mnl_attr_put_u8(nlh, DEVLINK_ATTR_TRAP_ACTION, req->trap_action); - if (req->_present.trap_policer_id) - mnl_attr_put_u32(nlh, DEVLINK_ATTR_TRAP_POLICER_ID, req->trap_policer_id); - - err = ynl_exec(ys, nlh, &yrs); - if (err < 0) - return -1; - - return 0; -} - -/* ============== DEVLINK_CMD_TRAP_POLICER_GET ============== */ -/* DEVLINK_CMD_TRAP_POLICER_GET - do */ -void -devlink_trap_policer_get_req_free(struct devlink_trap_policer_get_req *req) -{ - free(req->bus_name); - free(req->dev_name); - free(req); -} - -void -devlink_trap_policer_get_rsp_free(struct devlink_trap_policer_get_rsp *rsp) -{ - free(rsp->bus_name); - free(rsp->dev_name); - free(rsp); -} - -int devlink_trap_policer_get_rsp_parse(const struct nlmsghdr *nlh, void *data) -{ - struct devlink_trap_policer_get_rsp *dst; - struct ynl_parse_arg *yarg = data; - const struct nlattr *attr; - - dst = yarg->data; - - mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) { - unsigned int type = mnl_attr_get_type(attr); - - if (type == DEVLINK_ATTR_BUS_NAME) { - unsigned int len; - - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - - len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr)); - dst->_present.bus_name_len = len; - dst->bus_name = malloc(len + 1); - memcpy(dst->bus_name, mnl_attr_get_str(attr), len); - dst->bus_name[len] = 0; - } else if (type == DEVLINK_ATTR_DEV_NAME) { - unsigned int len; - - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - - len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr)); - dst->_present.dev_name_len = len; - dst->dev_name = malloc(len + 1); - memcpy(dst->dev_name, mnl_attr_get_str(attr), len); - dst->dev_name[len] = 0; - } else if (type == DEVLINK_ATTR_TRAP_POLICER_ID) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.trap_policer_id = 1; - dst->trap_policer_id = mnl_attr_get_u32(attr); - } - } - - return MNL_CB_OK; -} - -struct devlink_trap_policer_get_rsp * -devlink_trap_policer_get(struct ynl_sock *ys, - struct devlink_trap_policer_get_req *req) -{ - struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; - struct devlink_trap_policer_get_rsp *rsp; - struct nlmsghdr *nlh; - int err; - - nlh = ynl_gemsg_start_req(ys, ys->family_id, DEVLINK_CMD_TRAP_POLICER_GET, 1); - ys->req_policy = &devlink_nest; - yrs.yarg.rsp_policy = &devlink_nest; - - if (req->_present.bus_name_len) - mnl_attr_put_strz(nlh, DEVLINK_ATTR_BUS_NAME, req->bus_name); - if (req->_present.dev_name_len) - mnl_attr_put_strz(nlh, DEVLINK_ATTR_DEV_NAME, req->dev_name); - if (req->_present.trap_policer_id) - mnl_attr_put_u32(nlh, DEVLINK_ATTR_TRAP_POLICER_ID, req->trap_policer_id); - - rsp = calloc(1, sizeof(*rsp)); - yrs.yarg.data = rsp; - yrs.cb = devlink_trap_policer_get_rsp_parse; - yrs.rsp_cmd = 71; - - err = ynl_exec(ys, nlh, &yrs); - if (err < 0) - goto err_free; - - return rsp; - -err_free: - devlink_trap_policer_get_rsp_free(rsp); - return NULL; -} - -/* DEVLINK_CMD_TRAP_POLICER_GET - dump */ -void -devlink_trap_policer_get_list_free(struct devlink_trap_policer_get_list *rsp) -{ - struct devlink_trap_policer_get_list *next = rsp; - - while ((void *)next != YNL_LIST_END) { - rsp = next; - next = rsp->next; - - free(rsp->obj.bus_name); - free(rsp->obj.dev_name); - free(rsp); - } -} - -struct devlink_trap_policer_get_list * -devlink_trap_policer_get_dump(struct ynl_sock *ys, - struct devlink_trap_policer_get_req_dump *req) -{ - struct ynl_dump_state yds = {}; - struct nlmsghdr *nlh; - int err; - - yds.ys = ys; - yds.alloc_sz = sizeof(struct devlink_trap_policer_get_list); - yds.cb = devlink_trap_policer_get_rsp_parse; - yds.rsp_cmd = 71; - yds.rsp_policy = &devlink_nest; - - nlh = ynl_gemsg_start_dump(ys, ys->family_id, DEVLINK_CMD_TRAP_POLICER_GET, 1); - ys->req_policy = &devlink_nest; - - if (req->_present.bus_name_len) - mnl_attr_put_strz(nlh, DEVLINK_ATTR_BUS_NAME, req->bus_name); - if (req->_present.dev_name_len) - mnl_attr_put_strz(nlh, DEVLINK_ATTR_DEV_NAME, req->dev_name); - - err = ynl_exec_dump(ys, nlh, &yds); - if (err < 0) - goto free_list; - - return yds.first; - -free_list: - devlink_trap_policer_get_list_free(yds.first); - return NULL; -} - -/* ============== DEVLINK_CMD_TRAP_POLICER_SET ============== */ -/* DEVLINK_CMD_TRAP_POLICER_SET - do */ -void -devlink_trap_policer_set_req_free(struct devlink_trap_policer_set_req *req) -{ - free(req->bus_name); - free(req->dev_name); - free(req); -} - -int devlink_trap_policer_set(struct ynl_sock *ys, - struct devlink_trap_policer_set_req *req) -{ - struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; - struct nlmsghdr *nlh; - int err; - - nlh = ynl_gemsg_start_req(ys, ys->family_id, DEVLINK_CMD_TRAP_POLICER_SET, 1); - ys->req_policy = &devlink_nest; - - if (req->_present.bus_name_len) - mnl_attr_put_strz(nlh, DEVLINK_ATTR_BUS_NAME, req->bus_name); - if (req->_present.dev_name_len) - mnl_attr_put_strz(nlh, DEVLINK_ATTR_DEV_NAME, req->dev_name); - if (req->_present.trap_policer_id) - mnl_attr_put_u32(nlh, DEVLINK_ATTR_TRAP_POLICER_ID, req->trap_policer_id); - if (req->_present.trap_policer_rate) - mnl_attr_put_u64(nlh, DEVLINK_ATTR_TRAP_POLICER_RATE, req->trap_policer_rate); - if (req->_present.trap_policer_burst) - mnl_attr_put_u64(nlh, DEVLINK_ATTR_TRAP_POLICER_BURST, req->trap_policer_burst); - - err = ynl_exec(ys, nlh, &yrs); - if (err < 0) - return -1; - - return 0; -} - -/* ============== DEVLINK_CMD_HEALTH_REPORTER_TEST ============== */ -/* DEVLINK_CMD_HEALTH_REPORTER_TEST - do */ -void -devlink_health_reporter_test_req_free(struct devlink_health_reporter_test_req *req) -{ - free(req->bus_name); - free(req->dev_name); - free(req->health_reporter_name); - free(req); -} - -int devlink_health_reporter_test(struct ynl_sock *ys, - struct devlink_health_reporter_test_req *req) -{ - struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; - struct nlmsghdr *nlh; - int err; - - nlh = ynl_gemsg_start_req(ys, ys->family_id, DEVLINK_CMD_HEALTH_REPORTER_TEST, 1); - ys->req_policy = &devlink_nest; - - if (req->_present.bus_name_len) - mnl_attr_put_strz(nlh, DEVLINK_ATTR_BUS_NAME, req->bus_name); - if (req->_present.dev_name_len) - mnl_attr_put_strz(nlh, DEVLINK_ATTR_DEV_NAME, req->dev_name); - if (req->_present.port_index) - mnl_attr_put_u32(nlh, DEVLINK_ATTR_PORT_INDEX, req->port_index); - if (req->_present.health_reporter_name_len) - mnl_attr_put_strz(nlh, DEVLINK_ATTR_HEALTH_REPORTER_NAME, req->health_reporter_name); - - err = ynl_exec(ys, nlh, &yrs); - if (err < 0) - return -1; - - return 0; -} - -/* ============== DEVLINK_CMD_RATE_GET ============== */ -/* DEVLINK_CMD_RATE_GET - do */ -void devlink_rate_get_req_free(struct devlink_rate_get_req *req) -{ - free(req->bus_name); - free(req->dev_name); - free(req->rate_node_name); - free(req); -} - -void devlink_rate_get_rsp_free(struct devlink_rate_get_rsp *rsp) -{ - free(rsp->bus_name); - free(rsp->dev_name); - free(rsp->rate_node_name); - free(rsp); -} - -int devlink_rate_get_rsp_parse(const struct nlmsghdr *nlh, void *data) -{ - struct ynl_parse_arg *yarg = data; - struct devlink_rate_get_rsp *dst; - const struct nlattr *attr; - - dst = yarg->data; - - mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) { - unsigned int type = mnl_attr_get_type(attr); - - if (type == DEVLINK_ATTR_BUS_NAME) { - unsigned int len; - - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - - len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr)); - dst->_present.bus_name_len = len; - dst->bus_name = malloc(len + 1); - memcpy(dst->bus_name, mnl_attr_get_str(attr), len); - dst->bus_name[len] = 0; - } else if (type == DEVLINK_ATTR_DEV_NAME) { - unsigned int len; - - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - - len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr)); - dst->_present.dev_name_len = len; - dst->dev_name = malloc(len + 1); - memcpy(dst->dev_name, mnl_attr_get_str(attr), len); - dst->dev_name[len] = 0; - } else if (type == DEVLINK_ATTR_PORT_INDEX) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.port_index = 1; - dst->port_index = mnl_attr_get_u32(attr); - } else if (type == DEVLINK_ATTR_RATE_NODE_NAME) { - unsigned int len; - - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - - len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr)); - dst->_present.rate_node_name_len = len; - dst->rate_node_name = malloc(len + 1); - memcpy(dst->rate_node_name, mnl_attr_get_str(attr), len); - dst->rate_node_name[len] = 0; - } - } - - return MNL_CB_OK; -} - -struct devlink_rate_get_rsp * -devlink_rate_get(struct ynl_sock *ys, struct devlink_rate_get_req *req) -{ - struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; - struct devlink_rate_get_rsp *rsp; - struct nlmsghdr *nlh; - int err; - - nlh = ynl_gemsg_start_req(ys, ys->family_id, DEVLINK_CMD_RATE_GET, 1); - ys->req_policy = &devlink_nest; - yrs.yarg.rsp_policy = &devlink_nest; - - if (req->_present.bus_name_len) - mnl_attr_put_strz(nlh, DEVLINK_ATTR_BUS_NAME, req->bus_name); - if (req->_present.dev_name_len) - mnl_attr_put_strz(nlh, DEVLINK_ATTR_DEV_NAME, req->dev_name); - if (req->_present.port_index) - mnl_attr_put_u32(nlh, DEVLINK_ATTR_PORT_INDEX, req->port_index); - if (req->_present.rate_node_name_len) - mnl_attr_put_strz(nlh, DEVLINK_ATTR_RATE_NODE_NAME, req->rate_node_name); - - rsp = calloc(1, sizeof(*rsp)); - yrs.yarg.data = rsp; - yrs.cb = devlink_rate_get_rsp_parse; - yrs.rsp_cmd = 76; - - err = ynl_exec(ys, nlh, &yrs); - if (err < 0) - goto err_free; - - return rsp; - -err_free: - devlink_rate_get_rsp_free(rsp); - return NULL; -} - -/* DEVLINK_CMD_RATE_GET - dump */ -void devlink_rate_get_list_free(struct devlink_rate_get_list *rsp) -{ - struct devlink_rate_get_list *next = rsp; - - while ((void *)next != YNL_LIST_END) { - rsp = next; - next = rsp->next; - - free(rsp->obj.bus_name); - free(rsp->obj.dev_name); - free(rsp->obj.rate_node_name); - free(rsp); - } -} - -struct devlink_rate_get_list * -devlink_rate_get_dump(struct ynl_sock *ys, - struct devlink_rate_get_req_dump *req) -{ - struct ynl_dump_state yds = {}; - struct nlmsghdr *nlh; - int err; - - yds.ys = ys; - yds.alloc_sz = sizeof(struct devlink_rate_get_list); - yds.cb = devlink_rate_get_rsp_parse; - yds.rsp_cmd = 76; - yds.rsp_policy = &devlink_nest; - - nlh = ynl_gemsg_start_dump(ys, ys->family_id, DEVLINK_CMD_RATE_GET, 1); - ys->req_policy = &devlink_nest; - - if (req->_present.bus_name_len) - mnl_attr_put_strz(nlh, DEVLINK_ATTR_BUS_NAME, req->bus_name); - if (req->_present.dev_name_len) - mnl_attr_put_strz(nlh, DEVLINK_ATTR_DEV_NAME, req->dev_name); - - err = ynl_exec_dump(ys, nlh, &yds); - if (err < 0) - goto free_list; - - return yds.first; - -free_list: - devlink_rate_get_list_free(yds.first); - return NULL; -} - -/* ============== DEVLINK_CMD_RATE_SET ============== */ -/* DEVLINK_CMD_RATE_SET - do */ -void devlink_rate_set_req_free(struct devlink_rate_set_req *req) -{ - free(req->bus_name); - free(req->dev_name); - free(req->rate_node_name); - free(req->rate_parent_node_name); - free(req); -} - -int devlink_rate_set(struct ynl_sock *ys, struct devlink_rate_set_req *req) -{ - struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; - struct nlmsghdr *nlh; - int err; - - nlh = ynl_gemsg_start_req(ys, ys->family_id, DEVLINK_CMD_RATE_SET, 1); - ys->req_policy = &devlink_nest; - - if (req->_present.bus_name_len) - mnl_attr_put_strz(nlh, DEVLINK_ATTR_BUS_NAME, req->bus_name); - if (req->_present.dev_name_len) - mnl_attr_put_strz(nlh, DEVLINK_ATTR_DEV_NAME, req->dev_name); - if (req->_present.rate_node_name_len) - mnl_attr_put_strz(nlh, DEVLINK_ATTR_RATE_NODE_NAME, req->rate_node_name); - if (req->_present.rate_tx_share) - mnl_attr_put_u64(nlh, DEVLINK_ATTR_RATE_TX_SHARE, req->rate_tx_share); - if (req->_present.rate_tx_max) - mnl_attr_put_u64(nlh, DEVLINK_ATTR_RATE_TX_MAX, req->rate_tx_max); - if (req->_present.rate_tx_priority) - mnl_attr_put_u32(nlh, DEVLINK_ATTR_RATE_TX_PRIORITY, req->rate_tx_priority); - if (req->_present.rate_tx_weight) - mnl_attr_put_u32(nlh, DEVLINK_ATTR_RATE_TX_WEIGHT, req->rate_tx_weight); - if (req->_present.rate_parent_node_name_len) - mnl_attr_put_strz(nlh, DEVLINK_ATTR_RATE_PARENT_NODE_NAME, req->rate_parent_node_name); - - err = ynl_exec(ys, nlh, &yrs); - if (err < 0) - return -1; - - return 0; -} - -/* ============== DEVLINK_CMD_RATE_NEW ============== */ -/* DEVLINK_CMD_RATE_NEW - do */ -void devlink_rate_new_req_free(struct devlink_rate_new_req *req) -{ - free(req->bus_name); - free(req->dev_name); - free(req->rate_node_name); - free(req->rate_parent_node_name); - free(req); -} - -int devlink_rate_new(struct ynl_sock *ys, struct devlink_rate_new_req *req) -{ - struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; - struct nlmsghdr *nlh; - int err; - - nlh = ynl_gemsg_start_req(ys, ys->family_id, DEVLINK_CMD_RATE_NEW, 1); - ys->req_policy = &devlink_nest; - - if (req->_present.bus_name_len) - mnl_attr_put_strz(nlh, DEVLINK_ATTR_BUS_NAME, req->bus_name); - if (req->_present.dev_name_len) - mnl_attr_put_strz(nlh, DEVLINK_ATTR_DEV_NAME, req->dev_name); - if (req->_present.rate_node_name_len) - mnl_attr_put_strz(nlh, DEVLINK_ATTR_RATE_NODE_NAME, req->rate_node_name); - if (req->_present.rate_tx_share) - mnl_attr_put_u64(nlh, DEVLINK_ATTR_RATE_TX_SHARE, req->rate_tx_share); - if (req->_present.rate_tx_max) - mnl_attr_put_u64(nlh, DEVLINK_ATTR_RATE_TX_MAX, req->rate_tx_max); - if (req->_present.rate_tx_priority) - mnl_attr_put_u32(nlh, DEVLINK_ATTR_RATE_TX_PRIORITY, req->rate_tx_priority); - if (req->_present.rate_tx_weight) - mnl_attr_put_u32(nlh, DEVLINK_ATTR_RATE_TX_WEIGHT, req->rate_tx_weight); - if (req->_present.rate_parent_node_name_len) - mnl_attr_put_strz(nlh, DEVLINK_ATTR_RATE_PARENT_NODE_NAME, req->rate_parent_node_name); - - err = ynl_exec(ys, nlh, &yrs); - if (err < 0) - return -1; - - return 0; -} - -/* ============== DEVLINK_CMD_RATE_DEL ============== */ -/* DEVLINK_CMD_RATE_DEL - do */ -void devlink_rate_del_req_free(struct devlink_rate_del_req *req) -{ - free(req->bus_name); - free(req->dev_name); - free(req->rate_node_name); - free(req); -} - -int devlink_rate_del(struct ynl_sock *ys, struct devlink_rate_del_req *req) -{ - struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; - struct nlmsghdr *nlh; - int err; - - nlh = ynl_gemsg_start_req(ys, ys->family_id, DEVLINK_CMD_RATE_DEL, 1); - ys->req_policy = &devlink_nest; - - if (req->_present.bus_name_len) - mnl_attr_put_strz(nlh, DEVLINK_ATTR_BUS_NAME, req->bus_name); - if (req->_present.dev_name_len) - mnl_attr_put_strz(nlh, DEVLINK_ATTR_DEV_NAME, req->dev_name); - if (req->_present.rate_node_name_len) - mnl_attr_put_strz(nlh, DEVLINK_ATTR_RATE_NODE_NAME, req->rate_node_name); - - err = ynl_exec(ys, nlh, &yrs); - if (err < 0) - return -1; - - return 0; -} - -/* ============== DEVLINK_CMD_LINECARD_GET ============== */ -/* DEVLINK_CMD_LINECARD_GET - do */ -void devlink_linecard_get_req_free(struct devlink_linecard_get_req *req) -{ - free(req->bus_name); - free(req->dev_name); - free(req); -} - -void devlink_linecard_get_rsp_free(struct devlink_linecard_get_rsp *rsp) -{ - free(rsp->bus_name); - free(rsp->dev_name); - free(rsp); -} - -int devlink_linecard_get_rsp_parse(const struct nlmsghdr *nlh, void *data) -{ - struct devlink_linecard_get_rsp *dst; - struct ynl_parse_arg *yarg = data; - const struct nlattr *attr; - - dst = yarg->data; - - mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) { - unsigned int type = mnl_attr_get_type(attr); - - if (type == DEVLINK_ATTR_BUS_NAME) { - unsigned int len; - - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - - len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr)); - dst->_present.bus_name_len = len; - dst->bus_name = malloc(len + 1); - memcpy(dst->bus_name, mnl_attr_get_str(attr), len); - dst->bus_name[len] = 0; - } else if (type == DEVLINK_ATTR_DEV_NAME) { - unsigned int len; - - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - - len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr)); - dst->_present.dev_name_len = len; - dst->dev_name = malloc(len + 1); - memcpy(dst->dev_name, mnl_attr_get_str(attr), len); - dst->dev_name[len] = 0; - } else if (type == DEVLINK_ATTR_LINECARD_INDEX) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.linecard_index = 1; - dst->linecard_index = mnl_attr_get_u32(attr); - } - } - - return MNL_CB_OK; -} - -struct devlink_linecard_get_rsp * -devlink_linecard_get(struct ynl_sock *ys, struct devlink_linecard_get_req *req) -{ - struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; - struct devlink_linecard_get_rsp *rsp; - struct nlmsghdr *nlh; - int err; - - nlh = ynl_gemsg_start_req(ys, ys->family_id, DEVLINK_CMD_LINECARD_GET, 1); - ys->req_policy = &devlink_nest; - yrs.yarg.rsp_policy = &devlink_nest; - - if (req->_present.bus_name_len) - mnl_attr_put_strz(nlh, DEVLINK_ATTR_BUS_NAME, req->bus_name); - if (req->_present.dev_name_len) - mnl_attr_put_strz(nlh, DEVLINK_ATTR_DEV_NAME, req->dev_name); - if (req->_present.linecard_index) - mnl_attr_put_u32(nlh, DEVLINK_ATTR_LINECARD_INDEX, req->linecard_index); - - rsp = calloc(1, sizeof(*rsp)); - yrs.yarg.data = rsp; - yrs.cb = devlink_linecard_get_rsp_parse; - yrs.rsp_cmd = 80; - - err = ynl_exec(ys, nlh, &yrs); - if (err < 0) - goto err_free; - - return rsp; - -err_free: - devlink_linecard_get_rsp_free(rsp); - return NULL; -} - -/* DEVLINK_CMD_LINECARD_GET - dump */ -void devlink_linecard_get_list_free(struct devlink_linecard_get_list *rsp) -{ - struct devlink_linecard_get_list *next = rsp; - - while ((void *)next != YNL_LIST_END) { - rsp = next; - next = rsp->next; - - free(rsp->obj.bus_name); - free(rsp->obj.dev_name); - free(rsp); - } -} - -struct devlink_linecard_get_list * -devlink_linecard_get_dump(struct ynl_sock *ys, - struct devlink_linecard_get_req_dump *req) -{ - struct ynl_dump_state yds = {}; - struct nlmsghdr *nlh; - int err; - - yds.ys = ys; - yds.alloc_sz = sizeof(struct devlink_linecard_get_list); - yds.cb = devlink_linecard_get_rsp_parse; - yds.rsp_cmd = 80; - yds.rsp_policy = &devlink_nest; - - nlh = ynl_gemsg_start_dump(ys, ys->family_id, DEVLINK_CMD_LINECARD_GET, 1); - ys->req_policy = &devlink_nest; - - if (req->_present.bus_name_len) - mnl_attr_put_strz(nlh, DEVLINK_ATTR_BUS_NAME, req->bus_name); - if (req->_present.dev_name_len) - mnl_attr_put_strz(nlh, DEVLINK_ATTR_DEV_NAME, req->dev_name); - - err = ynl_exec_dump(ys, nlh, &yds); - if (err < 0) - goto free_list; - - return yds.first; - -free_list: - devlink_linecard_get_list_free(yds.first); - return NULL; -} - -/* ============== DEVLINK_CMD_LINECARD_SET ============== */ -/* DEVLINK_CMD_LINECARD_SET - do */ -void devlink_linecard_set_req_free(struct devlink_linecard_set_req *req) -{ - free(req->bus_name); - free(req->dev_name); - free(req->linecard_type); - free(req); -} - -int devlink_linecard_set(struct ynl_sock *ys, - struct devlink_linecard_set_req *req) -{ - struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; - struct nlmsghdr *nlh; - int err; - - nlh = ynl_gemsg_start_req(ys, ys->family_id, DEVLINK_CMD_LINECARD_SET, 1); - ys->req_policy = &devlink_nest; - - if (req->_present.bus_name_len) - mnl_attr_put_strz(nlh, DEVLINK_ATTR_BUS_NAME, req->bus_name); - if (req->_present.dev_name_len) - mnl_attr_put_strz(nlh, DEVLINK_ATTR_DEV_NAME, req->dev_name); - if (req->_present.linecard_index) - mnl_attr_put_u32(nlh, DEVLINK_ATTR_LINECARD_INDEX, req->linecard_index); - if (req->_present.linecard_type_len) - mnl_attr_put_strz(nlh, DEVLINK_ATTR_LINECARD_TYPE, req->linecard_type); - - err = ynl_exec(ys, nlh, &yrs); - if (err < 0) - return -1; - - return 0; -} - -/* ============== DEVLINK_CMD_SELFTESTS_GET ============== */ -/* DEVLINK_CMD_SELFTESTS_GET - do */ -void devlink_selftests_get_req_free(struct devlink_selftests_get_req *req) -{ - free(req->bus_name); - free(req->dev_name); - free(req); -} - -void devlink_selftests_get_rsp_free(struct devlink_selftests_get_rsp *rsp) -{ - free(rsp->bus_name); - free(rsp->dev_name); - free(rsp); -} - -int devlink_selftests_get_rsp_parse(const struct nlmsghdr *nlh, void *data) -{ - struct devlink_selftests_get_rsp *dst; - struct ynl_parse_arg *yarg = data; - const struct nlattr *attr; - - dst = yarg->data; - - mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) { - unsigned int type = mnl_attr_get_type(attr); - - if (type == DEVLINK_ATTR_BUS_NAME) { - unsigned int len; - - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - - len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr)); - dst->_present.bus_name_len = len; - dst->bus_name = malloc(len + 1); - memcpy(dst->bus_name, mnl_attr_get_str(attr), len); - dst->bus_name[len] = 0; - } else if (type == DEVLINK_ATTR_DEV_NAME) { - unsigned int len; - - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - - len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr)); - dst->_present.dev_name_len = len; - dst->dev_name = malloc(len + 1); - memcpy(dst->dev_name, mnl_attr_get_str(attr), len); - dst->dev_name[len] = 0; - } - } - - return MNL_CB_OK; -} - -struct devlink_selftests_get_rsp * -devlink_selftests_get(struct ynl_sock *ys, - struct devlink_selftests_get_req *req) -{ - struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; - struct devlink_selftests_get_rsp *rsp; - struct nlmsghdr *nlh; - int err; - - nlh = ynl_gemsg_start_req(ys, ys->family_id, DEVLINK_CMD_SELFTESTS_GET, 1); - ys->req_policy = &devlink_nest; - yrs.yarg.rsp_policy = &devlink_nest; - - if (req->_present.bus_name_len) - mnl_attr_put_strz(nlh, DEVLINK_ATTR_BUS_NAME, req->bus_name); - if (req->_present.dev_name_len) - mnl_attr_put_strz(nlh, DEVLINK_ATTR_DEV_NAME, req->dev_name); - - rsp = calloc(1, sizeof(*rsp)); - yrs.yarg.data = rsp; - yrs.cb = devlink_selftests_get_rsp_parse; - yrs.rsp_cmd = DEVLINK_CMD_SELFTESTS_GET; - - err = ynl_exec(ys, nlh, &yrs); - if (err < 0) - goto err_free; - - return rsp; - -err_free: - devlink_selftests_get_rsp_free(rsp); - return NULL; -} - -/* DEVLINK_CMD_SELFTESTS_GET - dump */ -void devlink_selftests_get_list_free(struct devlink_selftests_get_list *rsp) -{ - struct devlink_selftests_get_list *next = rsp; - - while ((void *)next != YNL_LIST_END) { - rsp = next; - next = rsp->next; - - free(rsp->obj.bus_name); - free(rsp->obj.dev_name); - free(rsp); - } -} - -struct devlink_selftests_get_list * -devlink_selftests_get_dump(struct ynl_sock *ys) -{ - struct ynl_dump_state yds = {}; - struct nlmsghdr *nlh; - int err; - - yds.ys = ys; - yds.alloc_sz = sizeof(struct devlink_selftests_get_list); - yds.cb = devlink_selftests_get_rsp_parse; - yds.rsp_cmd = DEVLINK_CMD_SELFTESTS_GET; - yds.rsp_policy = &devlink_nest; - - nlh = ynl_gemsg_start_dump(ys, ys->family_id, DEVLINK_CMD_SELFTESTS_GET, 1); - - err = ynl_exec_dump(ys, nlh, &yds); - if (err < 0) - goto free_list; - - return yds.first; - -free_list: - devlink_selftests_get_list_free(yds.first); - return NULL; -} - -/* ============== DEVLINK_CMD_SELFTESTS_RUN ============== */ -/* DEVLINK_CMD_SELFTESTS_RUN - do */ -void devlink_selftests_run_req_free(struct devlink_selftests_run_req *req) -{ - free(req->bus_name); - free(req->dev_name); - devlink_dl_selftest_id_free(&req->selftests); - free(req); -} - -int devlink_selftests_run(struct ynl_sock *ys, - struct devlink_selftests_run_req *req) -{ - struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; - struct nlmsghdr *nlh; - int err; - - nlh = ynl_gemsg_start_req(ys, ys->family_id, DEVLINK_CMD_SELFTESTS_RUN, 1); - ys->req_policy = &devlink_nest; - - if (req->_present.bus_name_len) - mnl_attr_put_strz(nlh, DEVLINK_ATTR_BUS_NAME, req->bus_name); - if (req->_present.dev_name_len) - mnl_attr_put_strz(nlh, DEVLINK_ATTR_DEV_NAME, req->dev_name); - if (req->_present.selftests) - devlink_dl_selftest_id_put(nlh, DEVLINK_ATTR_SELFTESTS, &req->selftests); - - err = ynl_exec(ys, nlh, &yrs); - if (err < 0) - return -1; - - return 0; -} - -const struct ynl_family ynl_devlink_family = { - .name = "devlink", -}; diff --git a/tools/net/ynl/generated/devlink-user.h b/tools/net/ynl/generated/devlink-user.h deleted file mode 100644 index 1db4edc36eaa..000000000000 --- a/tools/net/ynl/generated/devlink-user.h +++ /dev/null @@ -1,5255 +0,0 @@ -/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */ -/* Do not edit directly, auto-generated from: */ -/* Documentation/netlink/specs/devlink.yaml */ -/* YNL-GEN user header */ - -#ifndef _LINUX_DEVLINK_GEN_H -#define _LINUX_DEVLINK_GEN_H - -#include -#include -#include -#include -#include - -struct ynl_sock; - -extern const struct ynl_family ynl_devlink_family; - -/* Enums */ -const char *devlink_op_str(int op); -const char *devlink_sb_pool_type_str(enum devlink_sb_pool_type value); -const char *devlink_port_type_str(enum devlink_port_type value); -const char *devlink_port_flavour_str(enum devlink_port_flavour value); -const char *devlink_port_fn_state_str(enum devlink_port_fn_state value); -const char *devlink_port_fn_opstate_str(enum devlink_port_fn_opstate value); -const char *devlink_port_fn_attr_cap_str(enum devlink_port_fn_attr_cap value); -const char * -devlink_sb_threshold_type_str(enum devlink_sb_threshold_type value); -const char *devlink_eswitch_mode_str(enum devlink_eswitch_mode value); -const char * -devlink_eswitch_inline_mode_str(enum devlink_eswitch_inline_mode value); -const char * -devlink_eswitch_encap_mode_str(enum devlink_eswitch_encap_mode value); -const char *devlink_dpipe_match_type_str(enum devlink_dpipe_match_type value); -const char * -devlink_dpipe_action_type_str(enum devlink_dpipe_action_type value); -const char * -devlink_dpipe_field_mapping_type_str(enum devlink_dpipe_field_mapping_type value); -const char *devlink_resource_unit_str(enum devlink_resource_unit value); -const char *devlink_reload_action_str(enum devlink_reload_action value); -const char *devlink_param_cmode_str(enum devlink_param_cmode value); -const char *devlink_flash_overwrite_str(enum devlink_flash_overwrite value); -const char *devlink_trap_action_str(enum devlink_trap_action value); - -/* Common nested types */ -struct devlink_dl_dpipe_match { - struct { - __u32 dpipe_match_type:1; - __u32 dpipe_header_id:1; - __u32 dpipe_header_global:1; - __u32 dpipe_header_index:1; - __u32 dpipe_field_id:1; - } _present; - - enum devlink_dpipe_match_type dpipe_match_type; - __u32 dpipe_header_id; - __u8 dpipe_header_global; - __u32 dpipe_header_index; - __u32 dpipe_field_id; -}; - -struct devlink_dl_dpipe_match_value { - struct { - __u32 dpipe_value_len; - __u32 dpipe_value_mask_len; - __u32 dpipe_value_mapping:1; - } _present; - - unsigned int n_dpipe_match; - struct devlink_dl_dpipe_match *dpipe_match; - void *dpipe_value; - void *dpipe_value_mask; - __u32 dpipe_value_mapping; -}; - -struct devlink_dl_dpipe_action { - struct { - __u32 dpipe_action_type:1; - __u32 dpipe_header_id:1; - __u32 dpipe_header_global:1; - __u32 dpipe_header_index:1; - __u32 dpipe_field_id:1; - } _present; - - enum devlink_dpipe_action_type dpipe_action_type; - __u32 dpipe_header_id; - __u8 dpipe_header_global; - __u32 dpipe_header_index; - __u32 dpipe_field_id; -}; - -struct devlink_dl_dpipe_action_value { - struct { - __u32 dpipe_value_len; - __u32 dpipe_value_mask_len; - __u32 dpipe_value_mapping:1; - } _present; - - unsigned int n_dpipe_action; - struct devlink_dl_dpipe_action *dpipe_action; - void *dpipe_value; - void *dpipe_value_mask; - __u32 dpipe_value_mapping; -}; - -struct devlink_dl_dpipe_field { - struct { - __u32 dpipe_field_name_len; - __u32 dpipe_field_id:1; - __u32 dpipe_field_bitwidth:1; - __u32 dpipe_field_mapping_type:1; - } _present; - - char *dpipe_field_name; - __u32 dpipe_field_id; - __u32 dpipe_field_bitwidth; - enum devlink_dpipe_field_mapping_type dpipe_field_mapping_type; -}; - -struct devlink_dl_resource { - struct { - __u32 resource_name_len; - __u32 resource_id:1; - __u32 resource_size:1; - __u32 resource_size_new:1; - __u32 resource_size_valid:1; - __u32 resource_size_min:1; - __u32 resource_size_max:1; - __u32 resource_size_gran:1; - __u32 resource_unit:1; - __u32 resource_occ:1; - } _present; - - char *resource_name; - __u64 resource_id; - __u64 resource_size; - __u64 resource_size_new; - __u8 resource_size_valid; - __u64 resource_size_min; - __u64 resource_size_max; - __u64 resource_size_gran; - enum devlink_resource_unit resource_unit; - __u64 resource_occ; -}; - -struct devlink_dl_info_version { - struct { - __u32 info_version_name_len; - __u32 info_version_value_len; - } _present; - - char *info_version_name; - char *info_version_value; -}; - -struct devlink_dl_fmsg { - struct { - __u32 fmsg_obj_nest_start:1; - __u32 fmsg_pair_nest_start:1; - __u32 fmsg_arr_nest_start:1; - __u32 fmsg_nest_end:1; - __u32 fmsg_obj_name_len; - } _present; - - char *fmsg_obj_name; -}; - -struct devlink_dl_port_function { - struct { - __u32 hw_addr_len; - __u32 state:1; - __u32 opstate:1; - __u32 caps:1; - } _present; - - void *hw_addr; - enum devlink_port_fn_state state; - enum devlink_port_fn_opstate opstate; - struct nla_bitfield32 caps; -}; - -struct devlink_dl_reload_stats_entry { - struct { - __u32 reload_stats_limit:1; - __u32 reload_stats_value:1; - } _present; - - __u8 reload_stats_limit; - __u32 reload_stats_value; -}; - -struct devlink_dl_reload_act_stats { - unsigned int n_reload_stats_entry; - struct devlink_dl_reload_stats_entry *reload_stats_entry; -}; - -struct devlink_dl_selftest_id { - struct { - __u32 flash:1; - } _present; -}; - -struct devlink_dl_dpipe_table_matches { - unsigned int n_dpipe_match; - struct devlink_dl_dpipe_match *dpipe_match; -}; - -struct devlink_dl_dpipe_table_actions { - unsigned int n_dpipe_action; - struct devlink_dl_dpipe_action *dpipe_action; -}; - -struct devlink_dl_dpipe_entry_match_values { - unsigned int n_dpipe_match_value; - struct devlink_dl_dpipe_match_value *dpipe_match_value; -}; - -struct devlink_dl_dpipe_entry_action_values { - unsigned int n_dpipe_action_value; - struct devlink_dl_dpipe_action_value *dpipe_action_value; -}; - -struct devlink_dl_dpipe_header_fields { - unsigned int n_dpipe_field; - struct devlink_dl_dpipe_field *dpipe_field; -}; - -struct devlink_dl_resource_list { - unsigned int n_resource; - struct devlink_dl_resource *resource; -}; - -struct devlink_dl_reload_act_info { - struct { - __u32 reload_action:1; - } _present; - - enum devlink_reload_action reload_action; - unsigned int n_reload_action_stats; - struct devlink_dl_reload_act_stats *reload_action_stats; -}; - -struct devlink_dl_dpipe_table { - struct { - __u32 dpipe_table_name_len; - __u32 dpipe_table_size:1; - __u32 dpipe_table_matches:1; - __u32 dpipe_table_actions:1; - __u32 dpipe_table_counters_enabled:1; - __u32 dpipe_table_resource_id:1; - __u32 dpipe_table_resource_units:1; - } _present; - - char *dpipe_table_name; - __u64 dpipe_table_size; - struct devlink_dl_dpipe_table_matches dpipe_table_matches; - struct devlink_dl_dpipe_table_actions dpipe_table_actions; - __u8 dpipe_table_counters_enabled; - __u64 dpipe_table_resource_id; - __u64 dpipe_table_resource_units; -}; - -struct devlink_dl_dpipe_entry { - struct { - __u32 dpipe_entry_index:1; - __u32 dpipe_entry_match_values:1; - __u32 dpipe_entry_action_values:1; - __u32 dpipe_entry_counter:1; - } _present; - - __u64 dpipe_entry_index; - struct devlink_dl_dpipe_entry_match_values dpipe_entry_match_values; - struct devlink_dl_dpipe_entry_action_values dpipe_entry_action_values; - __u64 dpipe_entry_counter; -}; - -struct devlink_dl_dpipe_header { - struct { - __u32 dpipe_header_name_len; - __u32 dpipe_header_id:1; - __u32 dpipe_header_global:1; - __u32 dpipe_header_fields:1; - } _present; - - char *dpipe_header_name; - __u32 dpipe_header_id; - __u8 dpipe_header_global; - struct devlink_dl_dpipe_header_fields dpipe_header_fields; -}; - -struct devlink_dl_reload_stats { - unsigned int n_reload_action_info; - struct devlink_dl_reload_act_info *reload_action_info; -}; - -struct devlink_dl_dpipe_tables { - unsigned int n_dpipe_table; - struct devlink_dl_dpipe_table *dpipe_table; -}; - -struct devlink_dl_dpipe_entries { - unsigned int n_dpipe_entry; - struct devlink_dl_dpipe_entry *dpipe_entry; -}; - -struct devlink_dl_dpipe_headers { - unsigned int n_dpipe_header; - struct devlink_dl_dpipe_header *dpipe_header; -}; - -struct devlink_dl_dev_stats { - struct { - __u32 reload_stats:1; - __u32 remote_reload_stats:1; - } _present; - - struct devlink_dl_reload_stats reload_stats; - struct devlink_dl_reload_stats remote_reload_stats; -}; - -/* ============== DEVLINK_CMD_GET ============== */ -/* DEVLINK_CMD_GET - do */ -struct devlink_get_req { - struct { - __u32 bus_name_len; - __u32 dev_name_len; - } _present; - - char *bus_name; - char *dev_name; -}; - -static inline struct devlink_get_req *devlink_get_req_alloc(void) -{ - return calloc(1, sizeof(struct devlink_get_req)); -} -void devlink_get_req_free(struct devlink_get_req *req); - -static inline void -devlink_get_req_set_bus_name(struct devlink_get_req *req, const char *bus_name) -{ - free(req->bus_name); - req->_present.bus_name_len = strlen(bus_name); - req->bus_name = malloc(req->_present.bus_name_len + 1); - memcpy(req->bus_name, bus_name, req->_present.bus_name_len); - req->bus_name[req->_present.bus_name_len] = 0; -} -static inline void -devlink_get_req_set_dev_name(struct devlink_get_req *req, const char *dev_name) -{ - free(req->dev_name); - req->_present.dev_name_len = strlen(dev_name); - req->dev_name = malloc(req->_present.dev_name_len + 1); - memcpy(req->dev_name, dev_name, req->_present.dev_name_len); - req->dev_name[req->_present.dev_name_len] = 0; -} - -struct devlink_get_rsp { - struct { - __u32 bus_name_len; - __u32 dev_name_len; - __u32 reload_failed:1; - __u32 dev_stats:1; - } _present; - - char *bus_name; - char *dev_name; - __u8 reload_failed; - struct devlink_dl_dev_stats dev_stats; -}; - -void devlink_get_rsp_free(struct devlink_get_rsp *rsp); - -/* - * Get devlink instances. - */ -struct devlink_get_rsp * -devlink_get(struct ynl_sock *ys, struct devlink_get_req *req); - -/* DEVLINK_CMD_GET - dump */ -struct devlink_get_list { - struct devlink_get_list *next; - struct devlink_get_rsp obj __attribute__((aligned(8))); -}; - -void devlink_get_list_free(struct devlink_get_list *rsp); - -struct devlink_get_list *devlink_get_dump(struct ynl_sock *ys); - -/* ============== DEVLINK_CMD_PORT_GET ============== */ -/* DEVLINK_CMD_PORT_GET - do */ -struct devlink_port_get_req { - struct { - __u32 bus_name_len; - __u32 dev_name_len; - __u32 port_index:1; - } _present; - - char *bus_name; - char *dev_name; - __u32 port_index; -}; - -static inline struct devlink_port_get_req *devlink_port_get_req_alloc(void) -{ - return calloc(1, sizeof(struct devlink_port_get_req)); -} -void devlink_port_get_req_free(struct devlink_port_get_req *req); - -static inline void -devlink_port_get_req_set_bus_name(struct devlink_port_get_req *req, - const char *bus_name) -{ - free(req->bus_name); - req->_present.bus_name_len = strlen(bus_name); - req->bus_name = malloc(req->_present.bus_name_len + 1); - memcpy(req->bus_name, bus_name, req->_present.bus_name_len); - req->bus_name[req->_present.bus_name_len] = 0; -} -static inline void -devlink_port_get_req_set_dev_name(struct devlink_port_get_req *req, - const char *dev_name) -{ - free(req->dev_name); - req->_present.dev_name_len = strlen(dev_name); - req->dev_name = malloc(req->_present.dev_name_len + 1); - memcpy(req->dev_name, dev_name, req->_present.dev_name_len); - req->dev_name[req->_present.dev_name_len] = 0; -} -static inline void -devlink_port_get_req_set_port_index(struct devlink_port_get_req *req, - __u32 port_index) -{ - req->_present.port_index = 1; - req->port_index = port_index; -} - -struct devlink_port_get_rsp { - struct { - __u32 bus_name_len; - __u32 dev_name_len; - __u32 port_index:1; - } _present; - - char *bus_name; - char *dev_name; - __u32 port_index; -}; - -void devlink_port_get_rsp_free(struct devlink_port_get_rsp *rsp); - -/* - * Get devlink port instances. - */ -struct devlink_port_get_rsp * -devlink_port_get(struct ynl_sock *ys, struct devlink_port_get_req *req); - -/* DEVLINK_CMD_PORT_GET - dump */ -struct devlink_port_get_req_dump { - struct { - __u32 bus_name_len; - __u32 dev_name_len; - } _present; - - char *bus_name; - char *dev_name; -}; - -static inline struct devlink_port_get_req_dump * -devlink_port_get_req_dump_alloc(void) -{ - return calloc(1, sizeof(struct devlink_port_get_req_dump)); -} -void devlink_port_get_req_dump_free(struct devlink_port_get_req_dump *req); - -static inline void -devlink_port_get_req_dump_set_bus_name(struct devlink_port_get_req_dump *req, - const char *bus_name) -{ - free(req->bus_name); - req->_present.bus_name_len = strlen(bus_name); - req->bus_name = malloc(req->_present.bus_name_len + 1); - memcpy(req->bus_name, bus_name, req->_present.bus_name_len); - req->bus_name[req->_present.bus_name_len] = 0; -} -static inline void -devlink_port_get_req_dump_set_dev_name(struct devlink_port_get_req_dump *req, - const char *dev_name) -{ - free(req->dev_name); - req->_present.dev_name_len = strlen(dev_name); - req->dev_name = malloc(req->_present.dev_name_len + 1); - memcpy(req->dev_name, dev_name, req->_present.dev_name_len); - req->dev_name[req->_present.dev_name_len] = 0; -} - -struct devlink_port_get_rsp_dump { - struct { - __u32 bus_name_len; - __u32 dev_name_len; - __u32 port_index:1; - } _present; - - char *bus_name; - char *dev_name; - __u32 port_index; -}; - -struct devlink_port_get_rsp_list { - struct devlink_port_get_rsp_list *next; - struct devlink_port_get_rsp_dump obj __attribute__((aligned(8))); -}; - -void devlink_port_get_rsp_list_free(struct devlink_port_get_rsp_list *rsp); - -struct devlink_port_get_rsp_list * -devlink_port_get_dump(struct ynl_sock *ys, - struct devlink_port_get_req_dump *req); - -/* ============== DEVLINK_CMD_PORT_SET ============== */ -/* DEVLINK_CMD_PORT_SET - do */ -struct devlink_port_set_req { - struct { - __u32 bus_name_len; - __u32 dev_name_len; - __u32 port_index:1; - __u32 port_type:1; - __u32 port_function:1; - } _present; - - char *bus_name; - char *dev_name; - __u32 port_index; - enum devlink_port_type port_type; - struct devlink_dl_port_function port_function; -}; - -static inline struct devlink_port_set_req *devlink_port_set_req_alloc(void) -{ - return calloc(1, sizeof(struct devlink_port_set_req)); -} -void devlink_port_set_req_free(struct devlink_port_set_req *req); - -static inline void -devlink_port_set_req_set_bus_name(struct devlink_port_set_req *req, - const char *bus_name) -{ - free(req->bus_name); - req->_present.bus_name_len = strlen(bus_name); - req->bus_name = malloc(req->_present.bus_name_len + 1); - memcpy(req->bus_name, bus_name, req->_present.bus_name_len); - req->bus_name[req->_present.bus_name_len] = 0; -} -static inline void -devlink_port_set_req_set_dev_name(struct devlink_port_set_req *req, - const char *dev_name) -{ - free(req->dev_name); - req->_present.dev_name_len = strlen(dev_name); - req->dev_name = malloc(req->_present.dev_name_len + 1); - memcpy(req->dev_name, dev_name, req->_present.dev_name_len); - req->dev_name[req->_present.dev_name_len] = 0; -} -static inline void -devlink_port_set_req_set_port_index(struct devlink_port_set_req *req, - __u32 port_index) -{ - req->_present.port_index = 1; - req->port_index = port_index; -} -static inline void -devlink_port_set_req_set_port_type(struct devlink_port_set_req *req, - enum devlink_port_type port_type) -{ - req->_present.port_type = 1; - req->port_type = port_type; -} -static inline void -devlink_port_set_req_set_port_function_hw_addr(struct devlink_port_set_req *req, - const void *hw_addr, size_t len) -{ - free(req->port_function.hw_addr); - req->port_function._present.hw_addr_len = len; - req->port_function.hw_addr = malloc(req->port_function._present.hw_addr_len); - memcpy(req->port_function.hw_addr, hw_addr, req->port_function._present.hw_addr_len); -} -static inline void -devlink_port_set_req_set_port_function_state(struct devlink_port_set_req *req, - enum devlink_port_fn_state state) -{ - req->_present.port_function = 1; - req->port_function._present.state = 1; - req->port_function.state = state; -} -static inline void -devlink_port_set_req_set_port_function_opstate(struct devlink_port_set_req *req, - enum devlink_port_fn_opstate opstate) -{ - req->_present.port_function = 1; - req->port_function._present.opstate = 1; - req->port_function.opstate = opstate; -} -static inline void -devlink_port_set_req_set_port_function_caps(struct devlink_port_set_req *req, - struct nla_bitfield32 *caps) -{ - req->_present.port_function = 1; - req->port_function._present.caps = 1; - memcpy(&req->port_function.caps, caps, sizeof(struct nla_bitfield32)); -} - -/* - * Set devlink port instances. - */ -int devlink_port_set(struct ynl_sock *ys, struct devlink_port_set_req *req); - -/* ============== DEVLINK_CMD_PORT_NEW ============== */ -/* DEVLINK_CMD_PORT_NEW - do */ -struct devlink_port_new_req { - struct { - __u32 bus_name_len; - __u32 dev_name_len; - __u32 port_index:1; - __u32 port_flavour:1; - __u32 port_pci_pf_number:1; - __u32 port_pci_sf_number:1; - __u32 port_controller_number:1; - } _present; - - char *bus_name; - char *dev_name; - __u32 port_index; - enum devlink_port_flavour port_flavour; - __u16 port_pci_pf_number; - __u32 port_pci_sf_number; - __u32 port_controller_number; -}; - -static inline struct devlink_port_new_req *devlink_port_new_req_alloc(void) -{ - return calloc(1, sizeof(struct devlink_port_new_req)); -} -void devlink_port_new_req_free(struct devlink_port_new_req *req); - -static inline void -devlink_port_new_req_set_bus_name(struct devlink_port_new_req *req, - const char *bus_name) -{ - free(req->bus_name); - req->_present.bus_name_len = strlen(bus_name); - req->bus_name = malloc(req->_present.bus_name_len + 1); - memcpy(req->bus_name, bus_name, req->_present.bus_name_len); - req->bus_name[req->_present.bus_name_len] = 0; -} -static inline void -devlink_port_new_req_set_dev_name(struct devlink_port_new_req *req, - const char *dev_name) -{ - free(req->dev_name); - req->_present.dev_name_len = strlen(dev_name); - req->dev_name = malloc(req->_present.dev_name_len + 1); - memcpy(req->dev_name, dev_name, req->_present.dev_name_len); - req->dev_name[req->_present.dev_name_len] = 0; -} -static inline void -devlink_port_new_req_set_port_index(struct devlink_port_new_req *req, - __u32 port_index) -{ - req->_present.port_index = 1; - req->port_index = port_index; -} -static inline void -devlink_port_new_req_set_port_flavour(struct devlink_port_new_req *req, - enum devlink_port_flavour port_flavour) -{ - req->_present.port_flavour = 1; - req->port_flavour = port_flavour; -} -static inline void -devlink_port_new_req_set_port_pci_pf_number(struct devlink_port_new_req *req, - __u16 port_pci_pf_number) -{ - req->_present.port_pci_pf_number = 1; - req->port_pci_pf_number = port_pci_pf_number; -} -static inline void -devlink_port_new_req_set_port_pci_sf_number(struct devlink_port_new_req *req, - __u32 port_pci_sf_number) -{ - req->_present.port_pci_sf_number = 1; - req->port_pci_sf_number = port_pci_sf_number; -} -static inline void -devlink_port_new_req_set_port_controller_number(struct devlink_port_new_req *req, - __u32 port_controller_number) -{ - req->_present.port_controller_number = 1; - req->port_controller_number = port_controller_number; -} - -struct devlink_port_new_rsp { - struct { - __u32 bus_name_len; - __u32 dev_name_len; - __u32 port_index:1; - } _present; - - char *bus_name; - char *dev_name; - __u32 port_index; -}; - -void devlink_port_new_rsp_free(struct devlink_port_new_rsp *rsp); - -/* - * Create devlink port instances. - */ -struct devlink_port_new_rsp * -devlink_port_new(struct ynl_sock *ys, struct devlink_port_new_req *req); - -/* ============== DEVLINK_CMD_PORT_DEL ============== */ -/* DEVLINK_CMD_PORT_DEL - do */ -struct devlink_port_del_req { - struct { - __u32 bus_name_len; - __u32 dev_name_len; - __u32 port_index:1; - } _present; - - char *bus_name; - char *dev_name; - __u32 port_index; -}; - -static inline struct devlink_port_del_req *devlink_port_del_req_alloc(void) -{ - return calloc(1, sizeof(struct devlink_port_del_req)); -} -void devlink_port_del_req_free(struct devlink_port_del_req *req); - -static inline void -devlink_port_del_req_set_bus_name(struct devlink_port_del_req *req, - const char *bus_name) -{ - free(req->bus_name); - req->_present.bus_name_len = strlen(bus_name); - req->bus_name = malloc(req->_present.bus_name_len + 1); - memcpy(req->bus_name, bus_name, req->_present.bus_name_len); - req->bus_name[req->_present.bus_name_len] = 0; -} -static inline void -devlink_port_del_req_set_dev_name(struct devlink_port_del_req *req, - const char *dev_name) -{ - free(req->dev_name); - req->_present.dev_name_len = strlen(dev_name); - req->dev_name = malloc(req->_present.dev_name_len + 1); - memcpy(req->dev_name, dev_name, req->_present.dev_name_len); - req->dev_name[req->_present.dev_name_len] = 0; -} -static inline void -devlink_port_del_req_set_port_index(struct devlink_port_del_req *req, - __u32 port_index) -{ - req->_present.port_index = 1; - req->port_index = port_index; -} - -/* - * Delete devlink port instances. - */ -int devlink_port_del(struct ynl_sock *ys, struct devlink_port_del_req *req); - -/* ============== DEVLINK_CMD_PORT_SPLIT ============== */ -/* DEVLINK_CMD_PORT_SPLIT - do */ -struct devlink_port_split_req { - struct { - __u32 bus_name_len; - __u32 dev_name_len; - __u32 port_index:1; - __u32 port_split_count:1; - } _present; - - char *bus_name; - char *dev_name; - __u32 port_index; - __u32 port_split_count; -}; - -static inline struct devlink_port_split_req *devlink_port_split_req_alloc(void) -{ - return calloc(1, sizeof(struct devlink_port_split_req)); -} -void devlink_port_split_req_free(struct devlink_port_split_req *req); - -static inline void -devlink_port_split_req_set_bus_name(struct devlink_port_split_req *req, - const char *bus_name) -{ - free(req->bus_name); - req->_present.bus_name_len = strlen(bus_name); - req->bus_name = malloc(req->_present.bus_name_len + 1); - memcpy(req->bus_name, bus_name, req->_present.bus_name_len); - req->bus_name[req->_present.bus_name_len] = 0; -} -static inline void -devlink_port_split_req_set_dev_name(struct devlink_port_split_req *req, - const char *dev_name) -{ - free(req->dev_name); - req->_present.dev_name_len = strlen(dev_name); - req->dev_name = malloc(req->_present.dev_name_len + 1); - memcpy(req->dev_name, dev_name, req->_present.dev_name_len); - req->dev_name[req->_present.dev_name_len] = 0; -} -static inline void -devlink_port_split_req_set_port_index(struct devlink_port_split_req *req, - __u32 port_index) -{ - req->_present.port_index = 1; - req->port_index = port_index; -} -static inline void -devlink_port_split_req_set_port_split_count(struct devlink_port_split_req *req, - __u32 port_split_count) -{ - req->_present.port_split_count = 1; - req->port_split_count = port_split_count; -} - -/* - * Split devlink port instances. - */ -int devlink_port_split(struct ynl_sock *ys, struct devlink_port_split_req *req); - -/* ============== DEVLINK_CMD_PORT_UNSPLIT ============== */ -/* DEVLINK_CMD_PORT_UNSPLIT - do */ -struct devlink_port_unsplit_req { - struct { - __u32 bus_name_len; - __u32 dev_name_len; - __u32 port_index:1; - } _present; - - char *bus_name; - char *dev_name; - __u32 port_index; -}; - -static inline struct devlink_port_unsplit_req * -devlink_port_unsplit_req_alloc(void) -{ - return calloc(1, sizeof(struct devlink_port_unsplit_req)); -} -void devlink_port_unsplit_req_free(struct devlink_port_unsplit_req *req); - -static inline void -devlink_port_unsplit_req_set_bus_name(struct devlink_port_unsplit_req *req, - const char *bus_name) -{ - free(req->bus_name); - req->_present.bus_name_len = strlen(bus_name); - req->bus_name = malloc(req->_present.bus_name_len + 1); - memcpy(req->bus_name, bus_name, req->_present.bus_name_len); - req->bus_name[req->_present.bus_name_len] = 0; -} -static inline void -devlink_port_unsplit_req_set_dev_name(struct devlink_port_unsplit_req *req, - const char *dev_name) -{ - free(req->dev_name); - req->_present.dev_name_len = strlen(dev_name); - req->dev_name = malloc(req->_present.dev_name_len + 1); - memcpy(req->dev_name, dev_name, req->_present.dev_name_len); - req->dev_name[req->_present.dev_name_len] = 0; -} -static inline void -devlink_port_unsplit_req_set_port_index(struct devlink_port_unsplit_req *req, - __u32 port_index) -{ - req->_present.port_index = 1; - req->port_index = port_index; -} - -/* - * Unplit devlink port instances. - */ -int devlink_port_unsplit(struct ynl_sock *ys, - struct devlink_port_unsplit_req *req); - -/* ============== DEVLINK_CMD_SB_GET ============== */ -/* DEVLINK_CMD_SB_GET - do */ -struct devlink_sb_get_req { - struct { - __u32 bus_name_len; - __u32 dev_name_len; - __u32 sb_index:1; - } _present; - - char *bus_name; - char *dev_name; - __u32 sb_index; -}; - -static inline struct devlink_sb_get_req *devlink_sb_get_req_alloc(void) -{ - return calloc(1, sizeof(struct devlink_sb_get_req)); -} -void devlink_sb_get_req_free(struct devlink_sb_get_req *req); - -static inline void -devlink_sb_get_req_set_bus_name(struct devlink_sb_get_req *req, - const char *bus_name) -{ - free(req->bus_name); - req->_present.bus_name_len = strlen(bus_name); - req->bus_name = malloc(req->_present.bus_name_len + 1); - memcpy(req->bus_name, bus_name, req->_present.bus_name_len); - req->bus_name[req->_present.bus_name_len] = 0; -} -static inline void -devlink_sb_get_req_set_dev_name(struct devlink_sb_get_req *req, - const char *dev_name) -{ - free(req->dev_name); - req->_present.dev_name_len = strlen(dev_name); - req->dev_name = malloc(req->_present.dev_name_len + 1); - memcpy(req->dev_name, dev_name, req->_present.dev_name_len); - req->dev_name[req->_present.dev_name_len] = 0; -} -static inline void -devlink_sb_get_req_set_sb_index(struct devlink_sb_get_req *req, __u32 sb_index) -{ - req->_present.sb_index = 1; - req->sb_index = sb_index; -} - -struct devlink_sb_get_rsp { - struct { - __u32 bus_name_len; - __u32 dev_name_len; - __u32 sb_index:1; - } _present; - - char *bus_name; - char *dev_name; - __u32 sb_index; -}; - -void devlink_sb_get_rsp_free(struct devlink_sb_get_rsp *rsp); - -/* - * Get shared buffer instances. - */ -struct devlink_sb_get_rsp * -devlink_sb_get(struct ynl_sock *ys, struct devlink_sb_get_req *req); - -/* DEVLINK_CMD_SB_GET - dump */ -struct devlink_sb_get_req_dump { - struct { - __u32 bus_name_len; - __u32 dev_name_len; - } _present; - - char *bus_name; - char *dev_name; -}; - -static inline struct devlink_sb_get_req_dump * -devlink_sb_get_req_dump_alloc(void) -{ - return calloc(1, sizeof(struct devlink_sb_get_req_dump)); -} -void devlink_sb_get_req_dump_free(struct devlink_sb_get_req_dump *req); - -static inline void -devlink_sb_get_req_dump_set_bus_name(struct devlink_sb_get_req_dump *req, - const char *bus_name) -{ - free(req->bus_name); - req->_present.bus_name_len = strlen(bus_name); - req->bus_name = malloc(req->_present.bus_name_len + 1); - memcpy(req->bus_name, bus_name, req->_present.bus_name_len); - req->bus_name[req->_present.bus_name_len] = 0; -} -static inline void -devlink_sb_get_req_dump_set_dev_name(struct devlink_sb_get_req_dump *req, - const char *dev_name) -{ - free(req->dev_name); - req->_present.dev_name_len = strlen(dev_name); - req->dev_name = malloc(req->_present.dev_name_len + 1); - memcpy(req->dev_name, dev_name, req->_present.dev_name_len); - req->dev_name[req->_present.dev_name_len] = 0; -} - -struct devlink_sb_get_list { - struct devlink_sb_get_list *next; - struct devlink_sb_get_rsp obj __attribute__((aligned(8))); -}; - -void devlink_sb_get_list_free(struct devlink_sb_get_list *rsp); - -struct devlink_sb_get_list * -devlink_sb_get_dump(struct ynl_sock *ys, struct devlink_sb_get_req_dump *req); - -/* ============== DEVLINK_CMD_SB_POOL_GET ============== */ -/* DEVLINK_CMD_SB_POOL_GET - do */ -struct devlink_sb_pool_get_req { - struct { - __u32 bus_name_len; - __u32 dev_name_len; - __u32 sb_index:1; - __u32 sb_pool_index:1; - } _present; - - char *bus_name; - char *dev_name; - __u32 sb_index; - __u16 sb_pool_index; -}; - -static inline struct devlink_sb_pool_get_req * -devlink_sb_pool_get_req_alloc(void) -{ - return calloc(1, sizeof(struct devlink_sb_pool_get_req)); -} -void devlink_sb_pool_get_req_free(struct devlink_sb_pool_get_req *req); - -static inline void -devlink_sb_pool_get_req_set_bus_name(struct devlink_sb_pool_get_req *req, - const char *bus_name) -{ - free(req->bus_name); - req->_present.bus_name_len = strlen(bus_name); - req->bus_name = malloc(req->_present.bus_name_len + 1); - memcpy(req->bus_name, bus_name, req->_present.bus_name_len); - req->bus_name[req->_present.bus_name_len] = 0; -} -static inline void -devlink_sb_pool_get_req_set_dev_name(struct devlink_sb_pool_get_req *req, - const char *dev_name) -{ - free(req->dev_name); - req->_present.dev_name_len = strlen(dev_name); - req->dev_name = malloc(req->_present.dev_name_len + 1); - memcpy(req->dev_name, dev_name, req->_present.dev_name_len); - req->dev_name[req->_present.dev_name_len] = 0; -} -static inline void -devlink_sb_pool_get_req_set_sb_index(struct devlink_sb_pool_get_req *req, - __u32 sb_index) -{ - req->_present.sb_index = 1; - req->sb_index = sb_index; -} -static inline void -devlink_sb_pool_get_req_set_sb_pool_index(struct devlink_sb_pool_get_req *req, - __u16 sb_pool_index) -{ - req->_present.sb_pool_index = 1; - req->sb_pool_index = sb_pool_index; -} - -struct devlink_sb_pool_get_rsp { - struct { - __u32 bus_name_len; - __u32 dev_name_len; - __u32 sb_index:1; - __u32 sb_pool_index:1; - } _present; - - char *bus_name; - char *dev_name; - __u32 sb_index; - __u16 sb_pool_index; -}; - -void devlink_sb_pool_get_rsp_free(struct devlink_sb_pool_get_rsp *rsp); - -/* - * Get shared buffer pool instances. - */ -struct devlink_sb_pool_get_rsp * -devlink_sb_pool_get(struct ynl_sock *ys, struct devlink_sb_pool_get_req *req); - -/* DEVLINK_CMD_SB_POOL_GET - dump */ -struct devlink_sb_pool_get_req_dump { - struct { - __u32 bus_name_len; - __u32 dev_name_len; - } _present; - - char *bus_name; - char *dev_name; -}; - -static inline struct devlink_sb_pool_get_req_dump * -devlink_sb_pool_get_req_dump_alloc(void) -{ - return calloc(1, sizeof(struct devlink_sb_pool_get_req_dump)); -} -void -devlink_sb_pool_get_req_dump_free(struct devlink_sb_pool_get_req_dump *req); - -static inline void -devlink_sb_pool_get_req_dump_set_bus_name(struct devlink_sb_pool_get_req_dump *req, - const char *bus_name) -{ - free(req->bus_name); - req->_present.bus_name_len = strlen(bus_name); - req->bus_name = malloc(req->_present.bus_name_len + 1); - memcpy(req->bus_name, bus_name, req->_present.bus_name_len); - req->bus_name[req->_present.bus_name_len] = 0; -} -static inline void -devlink_sb_pool_get_req_dump_set_dev_name(struct devlink_sb_pool_get_req_dump *req, - const char *dev_name) -{ - free(req->dev_name); - req->_present.dev_name_len = strlen(dev_name); - req->dev_name = malloc(req->_present.dev_name_len + 1); - memcpy(req->dev_name, dev_name, req->_present.dev_name_len); - req->dev_name[req->_present.dev_name_len] = 0; -} - -struct devlink_sb_pool_get_list { - struct devlink_sb_pool_get_list *next; - struct devlink_sb_pool_get_rsp obj __attribute__((aligned(8))); -}; - -void devlink_sb_pool_get_list_free(struct devlink_sb_pool_get_list *rsp); - -struct devlink_sb_pool_get_list * -devlink_sb_pool_get_dump(struct ynl_sock *ys, - struct devlink_sb_pool_get_req_dump *req); - -/* ============== DEVLINK_CMD_SB_POOL_SET ============== */ -/* DEVLINK_CMD_SB_POOL_SET - do */ -struct devlink_sb_pool_set_req { - struct { - __u32 bus_name_len; - __u32 dev_name_len; - __u32 sb_index:1; - __u32 sb_pool_index:1; - __u32 sb_pool_threshold_type:1; - __u32 sb_pool_size:1; - } _present; - - char *bus_name; - char *dev_name; - __u32 sb_index; - __u16 sb_pool_index; - enum devlink_sb_threshold_type sb_pool_threshold_type; - __u32 sb_pool_size; -}; - -static inline struct devlink_sb_pool_set_req * -devlink_sb_pool_set_req_alloc(void) -{ - return calloc(1, sizeof(struct devlink_sb_pool_set_req)); -} -void devlink_sb_pool_set_req_free(struct devlink_sb_pool_set_req *req); - -static inline void -devlink_sb_pool_set_req_set_bus_name(struct devlink_sb_pool_set_req *req, - const char *bus_name) -{ - free(req->bus_name); - req->_present.bus_name_len = strlen(bus_name); - req->bus_name = malloc(req->_present.bus_name_len + 1); - memcpy(req->bus_name, bus_name, req->_present.bus_name_len); - req->bus_name[req->_present.bus_name_len] = 0; -} -static inline void -devlink_sb_pool_set_req_set_dev_name(struct devlink_sb_pool_set_req *req, - const char *dev_name) -{ - free(req->dev_name); - req->_present.dev_name_len = strlen(dev_name); - req->dev_name = malloc(req->_present.dev_name_len + 1); - memcpy(req->dev_name, dev_name, req->_present.dev_name_len); - req->dev_name[req->_present.dev_name_len] = 0; -} -static inline void -devlink_sb_pool_set_req_set_sb_index(struct devlink_sb_pool_set_req *req, - __u32 sb_index) -{ - req->_present.sb_index = 1; - req->sb_index = sb_index; -} -static inline void -devlink_sb_pool_set_req_set_sb_pool_index(struct devlink_sb_pool_set_req *req, - __u16 sb_pool_index) -{ - req->_present.sb_pool_index = 1; - req->sb_pool_index = sb_pool_index; -} -static inline void -devlink_sb_pool_set_req_set_sb_pool_threshold_type(struct devlink_sb_pool_set_req *req, - enum devlink_sb_threshold_type sb_pool_threshold_type) -{ - req->_present.sb_pool_threshold_type = 1; - req->sb_pool_threshold_type = sb_pool_threshold_type; -} -static inline void -devlink_sb_pool_set_req_set_sb_pool_size(struct devlink_sb_pool_set_req *req, - __u32 sb_pool_size) -{ - req->_present.sb_pool_size = 1; - req->sb_pool_size = sb_pool_size; -} - -/* - * Set shared buffer pool instances. - */ -int devlink_sb_pool_set(struct ynl_sock *ys, - struct devlink_sb_pool_set_req *req); - -/* ============== DEVLINK_CMD_SB_PORT_POOL_GET ============== */ -/* DEVLINK_CMD_SB_PORT_POOL_GET - do */ -struct devlink_sb_port_pool_get_req { - struct { - __u32 bus_name_len; - __u32 dev_name_len; - __u32 port_index:1; - __u32 sb_index:1; - __u32 sb_pool_index:1; - } _present; - - char *bus_name; - char *dev_name; - __u32 port_index; - __u32 sb_index; - __u16 sb_pool_index; -}; - -static inline struct devlink_sb_port_pool_get_req * -devlink_sb_port_pool_get_req_alloc(void) -{ - return calloc(1, sizeof(struct devlink_sb_port_pool_get_req)); -} -void -devlink_sb_port_pool_get_req_free(struct devlink_sb_port_pool_get_req *req); - -static inline void -devlink_sb_port_pool_get_req_set_bus_name(struct devlink_sb_port_pool_get_req *req, - const char *bus_name) -{ - free(req->bus_name); - req->_present.bus_name_len = strlen(bus_name); - req->bus_name = malloc(req->_present.bus_name_len + 1); - memcpy(req->bus_name, bus_name, req->_present.bus_name_len); - req->bus_name[req->_present.bus_name_len] = 0; -} -static inline void -devlink_sb_port_pool_get_req_set_dev_name(struct devlink_sb_port_pool_get_req *req, - const char *dev_name) -{ - free(req->dev_name); - req->_present.dev_name_len = strlen(dev_name); - req->dev_name = malloc(req->_present.dev_name_len + 1); - memcpy(req->dev_name, dev_name, req->_present.dev_name_len); - req->dev_name[req->_present.dev_name_len] = 0; -} -static inline void -devlink_sb_port_pool_get_req_set_port_index(struct devlink_sb_port_pool_get_req *req, - __u32 port_index) -{ - req->_present.port_index = 1; - req->port_index = port_index; -} -static inline void -devlink_sb_port_pool_get_req_set_sb_index(struct devlink_sb_port_pool_get_req *req, - __u32 sb_index) -{ - req->_present.sb_index = 1; - req->sb_index = sb_index; -} -static inline void -devlink_sb_port_pool_get_req_set_sb_pool_index(struct devlink_sb_port_pool_get_req *req, - __u16 sb_pool_index) -{ - req->_present.sb_pool_index = 1; - req->sb_pool_index = sb_pool_index; -} - -struct devlink_sb_port_pool_get_rsp { - struct { - __u32 bus_name_len; - __u32 dev_name_len; - __u32 port_index:1; - __u32 sb_index:1; - __u32 sb_pool_index:1; - } _present; - - char *bus_name; - char *dev_name; - __u32 port_index; - __u32 sb_index; - __u16 sb_pool_index; -}; - -void -devlink_sb_port_pool_get_rsp_free(struct devlink_sb_port_pool_get_rsp *rsp); - -/* - * Get shared buffer port-pool combinations and threshold. - */ -struct devlink_sb_port_pool_get_rsp * -devlink_sb_port_pool_get(struct ynl_sock *ys, - struct devlink_sb_port_pool_get_req *req); - -/* DEVLINK_CMD_SB_PORT_POOL_GET - dump */ -struct devlink_sb_port_pool_get_req_dump { - struct { - __u32 bus_name_len; - __u32 dev_name_len; - } _present; - - char *bus_name; - char *dev_name; -}; - -static inline struct devlink_sb_port_pool_get_req_dump * -devlink_sb_port_pool_get_req_dump_alloc(void) -{ - return calloc(1, sizeof(struct devlink_sb_port_pool_get_req_dump)); -} -void -devlink_sb_port_pool_get_req_dump_free(struct devlink_sb_port_pool_get_req_dump *req); - -static inline void -devlink_sb_port_pool_get_req_dump_set_bus_name(struct devlink_sb_port_pool_get_req_dump *req, - const char *bus_name) -{ - free(req->bus_name); - req->_present.bus_name_len = strlen(bus_name); - req->bus_name = malloc(req->_present.bus_name_len + 1); - memcpy(req->bus_name, bus_name, req->_present.bus_name_len); - req->bus_name[req->_present.bus_name_len] = 0; -} -static inline void -devlink_sb_port_pool_get_req_dump_set_dev_name(struct devlink_sb_port_pool_get_req_dump *req, - const char *dev_name) -{ - free(req->dev_name); - req->_present.dev_name_len = strlen(dev_name); - req->dev_name = malloc(req->_present.dev_name_len + 1); - memcpy(req->dev_name, dev_name, req->_present.dev_name_len); - req->dev_name[req->_present.dev_name_len] = 0; -} - -struct devlink_sb_port_pool_get_list { - struct devlink_sb_port_pool_get_list *next; - struct devlink_sb_port_pool_get_rsp obj __attribute__((aligned(8))); -}; - -void -devlink_sb_port_pool_get_list_free(struct devlink_sb_port_pool_get_list *rsp); - -struct devlink_sb_port_pool_get_list * -devlink_sb_port_pool_get_dump(struct ynl_sock *ys, - struct devlink_sb_port_pool_get_req_dump *req); - -/* ============== DEVLINK_CMD_SB_PORT_POOL_SET ============== */ -/* DEVLINK_CMD_SB_PORT_POOL_SET - do */ -struct devlink_sb_port_pool_set_req { - struct { - __u32 bus_name_len; - __u32 dev_name_len; - __u32 port_index:1; - __u32 sb_index:1; - __u32 sb_pool_index:1; - __u32 sb_threshold:1; - } _present; - - char *bus_name; - char *dev_name; - __u32 port_index; - __u32 sb_index; - __u16 sb_pool_index; - __u32 sb_threshold; -}; - -static inline struct devlink_sb_port_pool_set_req * -devlink_sb_port_pool_set_req_alloc(void) -{ - return calloc(1, sizeof(struct devlink_sb_port_pool_set_req)); -} -void -devlink_sb_port_pool_set_req_free(struct devlink_sb_port_pool_set_req *req); - -static inline void -devlink_sb_port_pool_set_req_set_bus_name(struct devlink_sb_port_pool_set_req *req, - const char *bus_name) -{ - free(req->bus_name); - req->_present.bus_name_len = strlen(bus_name); - req->bus_name = malloc(req->_present.bus_name_len + 1); - memcpy(req->bus_name, bus_name, req->_present.bus_name_len); - req->bus_name[req->_present.bus_name_len] = 0; -} -static inline void -devlink_sb_port_pool_set_req_set_dev_name(struct devlink_sb_port_pool_set_req *req, - const char *dev_name) -{ - free(req->dev_name); - req->_present.dev_name_len = strlen(dev_name); - req->dev_name = malloc(req->_present.dev_name_len + 1); - memcpy(req->dev_name, dev_name, req->_present.dev_name_len); - req->dev_name[req->_present.dev_name_len] = 0; -} -static inline void -devlink_sb_port_pool_set_req_set_port_index(struct devlink_sb_port_pool_set_req *req, - __u32 port_index) -{ - req->_present.port_index = 1; - req->port_index = port_index; -} -static inline void -devlink_sb_port_pool_set_req_set_sb_index(struct devlink_sb_port_pool_set_req *req, - __u32 sb_index) -{ - req->_present.sb_index = 1; - req->sb_index = sb_index; -} -static inline void -devlink_sb_port_pool_set_req_set_sb_pool_index(struct devlink_sb_port_pool_set_req *req, - __u16 sb_pool_index) -{ - req->_present.sb_pool_index = 1; - req->sb_pool_index = sb_pool_index; -} -static inline void -devlink_sb_port_pool_set_req_set_sb_threshold(struct devlink_sb_port_pool_set_req *req, - __u32 sb_threshold) -{ - req->_present.sb_threshold = 1; - req->sb_threshold = sb_threshold; -} - -/* - * Set shared buffer port-pool combinations and threshold. - */ -int devlink_sb_port_pool_set(struct ynl_sock *ys, - struct devlink_sb_port_pool_set_req *req); - -/* ============== DEVLINK_CMD_SB_TC_POOL_BIND_GET ============== */ -/* DEVLINK_CMD_SB_TC_POOL_BIND_GET - do */ -struct devlink_sb_tc_pool_bind_get_req { - struct { - __u32 bus_name_len; - __u32 dev_name_len; - __u32 port_index:1; - __u32 sb_index:1; - __u32 sb_pool_type:1; - __u32 sb_tc_index:1; - } _present; - - char *bus_name; - char *dev_name; - __u32 port_index; - __u32 sb_index; - enum devlink_sb_pool_type sb_pool_type; - __u16 sb_tc_index; -}; - -static inline struct devlink_sb_tc_pool_bind_get_req * -devlink_sb_tc_pool_bind_get_req_alloc(void) -{ - return calloc(1, sizeof(struct devlink_sb_tc_pool_bind_get_req)); -} -void -devlink_sb_tc_pool_bind_get_req_free(struct devlink_sb_tc_pool_bind_get_req *req); - -static inline void -devlink_sb_tc_pool_bind_get_req_set_bus_name(struct devlink_sb_tc_pool_bind_get_req *req, - const char *bus_name) -{ - free(req->bus_name); - req->_present.bus_name_len = strlen(bus_name); - req->bus_name = malloc(req->_present.bus_name_len + 1); - memcpy(req->bus_name, bus_name, req->_present.bus_name_len); - req->bus_name[req->_present.bus_name_len] = 0; -} -static inline void -devlink_sb_tc_pool_bind_get_req_set_dev_name(struct devlink_sb_tc_pool_bind_get_req *req, - const char *dev_name) -{ - free(req->dev_name); - req->_present.dev_name_len = strlen(dev_name); - req->dev_name = malloc(req->_present.dev_name_len + 1); - memcpy(req->dev_name, dev_name, req->_present.dev_name_len); - req->dev_name[req->_present.dev_name_len] = 0; -} -static inline void -devlink_sb_tc_pool_bind_get_req_set_port_index(struct devlink_sb_tc_pool_bind_get_req *req, - __u32 port_index) -{ - req->_present.port_index = 1; - req->port_index = port_index; -} -static inline void -devlink_sb_tc_pool_bind_get_req_set_sb_index(struct devlink_sb_tc_pool_bind_get_req *req, - __u32 sb_index) -{ - req->_present.sb_index = 1; - req->sb_index = sb_index; -} -static inline void -devlink_sb_tc_pool_bind_get_req_set_sb_pool_type(struct devlink_sb_tc_pool_bind_get_req *req, - enum devlink_sb_pool_type sb_pool_type) -{ - req->_present.sb_pool_type = 1; - req->sb_pool_type = sb_pool_type; -} -static inline void -devlink_sb_tc_pool_bind_get_req_set_sb_tc_index(struct devlink_sb_tc_pool_bind_get_req *req, - __u16 sb_tc_index) -{ - req->_present.sb_tc_index = 1; - req->sb_tc_index = sb_tc_index; -} - -struct devlink_sb_tc_pool_bind_get_rsp { - struct { - __u32 bus_name_len; - __u32 dev_name_len; - __u32 port_index:1; - __u32 sb_index:1; - __u32 sb_pool_type:1; - __u32 sb_tc_index:1; - } _present; - - char *bus_name; - char *dev_name; - __u32 port_index; - __u32 sb_index; - enum devlink_sb_pool_type sb_pool_type; - __u16 sb_tc_index; -}; - -void -devlink_sb_tc_pool_bind_get_rsp_free(struct devlink_sb_tc_pool_bind_get_rsp *rsp); - -/* - * Get shared buffer port-TC to pool bindings and threshold. - */ -struct devlink_sb_tc_pool_bind_get_rsp * -devlink_sb_tc_pool_bind_get(struct ynl_sock *ys, - struct devlink_sb_tc_pool_bind_get_req *req); - -/* DEVLINK_CMD_SB_TC_POOL_BIND_GET - dump */ -struct devlink_sb_tc_pool_bind_get_req_dump { - struct { - __u32 bus_name_len; - __u32 dev_name_len; - } _present; - - char *bus_name; - char *dev_name; -}; - -static inline struct devlink_sb_tc_pool_bind_get_req_dump * -devlink_sb_tc_pool_bind_get_req_dump_alloc(void) -{ - return calloc(1, sizeof(struct devlink_sb_tc_pool_bind_get_req_dump)); -} -void -devlink_sb_tc_pool_bind_get_req_dump_free(struct devlink_sb_tc_pool_bind_get_req_dump *req); - -static inline void -devlink_sb_tc_pool_bind_get_req_dump_set_bus_name(struct devlink_sb_tc_pool_bind_get_req_dump *req, - const char *bus_name) -{ - free(req->bus_name); - req->_present.bus_name_len = strlen(bus_name); - req->bus_name = malloc(req->_present.bus_name_len + 1); - memcpy(req->bus_name, bus_name, req->_present.bus_name_len); - req->bus_name[req->_present.bus_name_len] = 0; -} -static inline void -devlink_sb_tc_pool_bind_get_req_dump_set_dev_name(struct devlink_sb_tc_pool_bind_get_req_dump *req, - const char *dev_name) -{ - free(req->dev_name); - req->_present.dev_name_len = strlen(dev_name); - req->dev_name = malloc(req->_present.dev_name_len + 1); - memcpy(req->dev_name, dev_name, req->_present.dev_name_len); - req->dev_name[req->_present.dev_name_len] = 0; -} - -struct devlink_sb_tc_pool_bind_get_list { - struct devlink_sb_tc_pool_bind_get_list *next; - struct devlink_sb_tc_pool_bind_get_rsp obj __attribute__((aligned(8))); -}; - -void -devlink_sb_tc_pool_bind_get_list_free(struct devlink_sb_tc_pool_bind_get_list *rsp); - -struct devlink_sb_tc_pool_bind_get_list * -devlink_sb_tc_pool_bind_get_dump(struct ynl_sock *ys, - struct devlink_sb_tc_pool_bind_get_req_dump *req); - -/* ============== DEVLINK_CMD_SB_TC_POOL_BIND_SET ============== */ -/* DEVLINK_CMD_SB_TC_POOL_BIND_SET - do */ -struct devlink_sb_tc_pool_bind_set_req { - struct { - __u32 bus_name_len; - __u32 dev_name_len; - __u32 port_index:1; - __u32 sb_index:1; - __u32 sb_pool_index:1; - __u32 sb_pool_type:1; - __u32 sb_tc_index:1; - __u32 sb_threshold:1; - } _present; - - char *bus_name; - char *dev_name; - __u32 port_index; - __u32 sb_index; - __u16 sb_pool_index; - enum devlink_sb_pool_type sb_pool_type; - __u16 sb_tc_index; - __u32 sb_threshold; -}; - -static inline struct devlink_sb_tc_pool_bind_set_req * -devlink_sb_tc_pool_bind_set_req_alloc(void) -{ - return calloc(1, sizeof(struct devlink_sb_tc_pool_bind_set_req)); -} -void -devlink_sb_tc_pool_bind_set_req_free(struct devlink_sb_tc_pool_bind_set_req *req); - -static inline void -devlink_sb_tc_pool_bind_set_req_set_bus_name(struct devlink_sb_tc_pool_bind_set_req *req, - const char *bus_name) -{ - free(req->bus_name); - req->_present.bus_name_len = strlen(bus_name); - req->bus_name = malloc(req->_present.bus_name_len + 1); - memcpy(req->bus_name, bus_name, req->_present.bus_name_len); - req->bus_name[req->_present.bus_name_len] = 0; -} -static inline void -devlink_sb_tc_pool_bind_set_req_set_dev_name(struct devlink_sb_tc_pool_bind_set_req *req, - const char *dev_name) -{ - free(req->dev_name); - req->_present.dev_name_len = strlen(dev_name); - req->dev_name = malloc(req->_present.dev_name_len + 1); - memcpy(req->dev_name, dev_name, req->_present.dev_name_len); - req->dev_name[req->_present.dev_name_len] = 0; -} -static inline void -devlink_sb_tc_pool_bind_set_req_set_port_index(struct devlink_sb_tc_pool_bind_set_req *req, - __u32 port_index) -{ - req->_present.port_index = 1; - req->port_index = port_index; -} -static inline void -devlink_sb_tc_pool_bind_set_req_set_sb_index(struct devlink_sb_tc_pool_bind_set_req *req, - __u32 sb_index) -{ - req->_present.sb_index = 1; - req->sb_index = sb_index; -} -static inline void -devlink_sb_tc_pool_bind_set_req_set_sb_pool_index(struct devlink_sb_tc_pool_bind_set_req *req, - __u16 sb_pool_index) -{ - req->_present.sb_pool_index = 1; - req->sb_pool_index = sb_pool_index; -} -static inline void -devlink_sb_tc_pool_bind_set_req_set_sb_pool_type(struct devlink_sb_tc_pool_bind_set_req *req, - enum devlink_sb_pool_type sb_pool_type) -{ - req->_present.sb_pool_type = 1; - req->sb_pool_type = sb_pool_type; -} -static inline void -devlink_sb_tc_pool_bind_set_req_set_sb_tc_index(struct devlink_sb_tc_pool_bind_set_req *req, - __u16 sb_tc_index) -{ - req->_present.sb_tc_index = 1; - req->sb_tc_index = sb_tc_index; -} -static inline void -devlink_sb_tc_pool_bind_set_req_set_sb_threshold(struct devlink_sb_tc_pool_bind_set_req *req, - __u32 sb_threshold) -{ - req->_present.sb_threshold = 1; - req->sb_threshold = sb_threshold; -} - -/* - * Set shared buffer port-TC to pool bindings and threshold. - */ -int devlink_sb_tc_pool_bind_set(struct ynl_sock *ys, - struct devlink_sb_tc_pool_bind_set_req *req); - -/* ============== DEVLINK_CMD_SB_OCC_SNAPSHOT ============== */ -/* DEVLINK_CMD_SB_OCC_SNAPSHOT - do */ -struct devlink_sb_occ_snapshot_req { - struct { - __u32 bus_name_len; - __u32 dev_name_len; - __u32 sb_index:1; - } _present; - - char *bus_name; - char *dev_name; - __u32 sb_index; -}; - -static inline struct devlink_sb_occ_snapshot_req * -devlink_sb_occ_snapshot_req_alloc(void) -{ - return calloc(1, sizeof(struct devlink_sb_occ_snapshot_req)); -} -void devlink_sb_occ_snapshot_req_free(struct devlink_sb_occ_snapshot_req *req); - -static inline void -devlink_sb_occ_snapshot_req_set_bus_name(struct devlink_sb_occ_snapshot_req *req, - const char *bus_name) -{ - free(req->bus_name); - req->_present.bus_name_len = strlen(bus_name); - req->bus_name = malloc(req->_present.bus_name_len + 1); - memcpy(req->bus_name, bus_name, req->_present.bus_name_len); - req->bus_name[req->_present.bus_name_len] = 0; -} -static inline void -devlink_sb_occ_snapshot_req_set_dev_name(struct devlink_sb_occ_snapshot_req *req, - const char *dev_name) -{ - free(req->dev_name); - req->_present.dev_name_len = strlen(dev_name); - req->dev_name = malloc(req->_present.dev_name_len + 1); - memcpy(req->dev_name, dev_name, req->_present.dev_name_len); - req->dev_name[req->_present.dev_name_len] = 0; -} -static inline void -devlink_sb_occ_snapshot_req_set_sb_index(struct devlink_sb_occ_snapshot_req *req, - __u32 sb_index) -{ - req->_present.sb_index = 1; - req->sb_index = sb_index; -} - -/* - * Take occupancy snapshot of shared buffer. - */ -int devlink_sb_occ_snapshot(struct ynl_sock *ys, - struct devlink_sb_occ_snapshot_req *req); - -/* ============== DEVLINK_CMD_SB_OCC_MAX_CLEAR ============== */ -/* DEVLINK_CMD_SB_OCC_MAX_CLEAR - do */ -struct devlink_sb_occ_max_clear_req { - struct { - __u32 bus_name_len; - __u32 dev_name_len; - __u32 sb_index:1; - } _present; - - char *bus_name; - char *dev_name; - __u32 sb_index; -}; - -static inline struct devlink_sb_occ_max_clear_req * -devlink_sb_occ_max_clear_req_alloc(void) -{ - return calloc(1, sizeof(struct devlink_sb_occ_max_clear_req)); -} -void -devlink_sb_occ_max_clear_req_free(struct devlink_sb_occ_max_clear_req *req); - -static inline void -devlink_sb_occ_max_clear_req_set_bus_name(struct devlink_sb_occ_max_clear_req *req, - const char *bus_name) -{ - free(req->bus_name); - req->_present.bus_name_len = strlen(bus_name); - req->bus_name = malloc(req->_present.bus_name_len + 1); - memcpy(req->bus_name, bus_name, req->_present.bus_name_len); - req->bus_name[req->_present.bus_name_len] = 0; -} -static inline void -devlink_sb_occ_max_clear_req_set_dev_name(struct devlink_sb_occ_max_clear_req *req, - const char *dev_name) -{ - free(req->dev_name); - req->_present.dev_name_len = strlen(dev_name); - req->dev_name = malloc(req->_present.dev_name_len + 1); - memcpy(req->dev_name, dev_name, req->_present.dev_name_len); - req->dev_name[req->_present.dev_name_len] = 0; -} -static inline void -devlink_sb_occ_max_clear_req_set_sb_index(struct devlink_sb_occ_max_clear_req *req, - __u32 sb_index) -{ - req->_present.sb_index = 1; - req->sb_index = sb_index; -} - -/* - * Clear occupancy watermarks of shared buffer. - */ -int devlink_sb_occ_max_clear(struct ynl_sock *ys, - struct devlink_sb_occ_max_clear_req *req); - -/* ============== DEVLINK_CMD_ESWITCH_GET ============== */ -/* DEVLINK_CMD_ESWITCH_GET - do */ -struct devlink_eswitch_get_req { - struct { - __u32 bus_name_len; - __u32 dev_name_len; - } _present; - - char *bus_name; - char *dev_name; -}; - -static inline struct devlink_eswitch_get_req * -devlink_eswitch_get_req_alloc(void) -{ - return calloc(1, sizeof(struct devlink_eswitch_get_req)); -} -void devlink_eswitch_get_req_free(struct devlink_eswitch_get_req *req); - -static inline void -devlink_eswitch_get_req_set_bus_name(struct devlink_eswitch_get_req *req, - const char *bus_name) -{ - free(req->bus_name); - req->_present.bus_name_len = strlen(bus_name); - req->bus_name = malloc(req->_present.bus_name_len + 1); - memcpy(req->bus_name, bus_name, req->_present.bus_name_len); - req->bus_name[req->_present.bus_name_len] = 0; -} -static inline void -devlink_eswitch_get_req_set_dev_name(struct devlink_eswitch_get_req *req, - const char *dev_name) -{ - free(req->dev_name); - req->_present.dev_name_len = strlen(dev_name); - req->dev_name = malloc(req->_present.dev_name_len + 1); - memcpy(req->dev_name, dev_name, req->_present.dev_name_len); - req->dev_name[req->_present.dev_name_len] = 0; -} - -struct devlink_eswitch_get_rsp { - struct { - __u32 bus_name_len; - __u32 dev_name_len; - __u32 eswitch_mode:1; - __u32 eswitch_inline_mode:1; - __u32 eswitch_encap_mode:1; - } _present; - - char *bus_name; - char *dev_name; - enum devlink_eswitch_mode eswitch_mode; - enum devlink_eswitch_inline_mode eswitch_inline_mode; - enum devlink_eswitch_encap_mode eswitch_encap_mode; -}; - -void devlink_eswitch_get_rsp_free(struct devlink_eswitch_get_rsp *rsp); - -/* - * Get eswitch attributes. - */ -struct devlink_eswitch_get_rsp * -devlink_eswitch_get(struct ynl_sock *ys, struct devlink_eswitch_get_req *req); - -/* ============== DEVLINK_CMD_ESWITCH_SET ============== */ -/* DEVLINK_CMD_ESWITCH_SET - do */ -struct devlink_eswitch_set_req { - struct { - __u32 bus_name_len; - __u32 dev_name_len; - __u32 eswitch_mode:1; - __u32 eswitch_inline_mode:1; - __u32 eswitch_encap_mode:1; - } _present; - - char *bus_name; - char *dev_name; - enum devlink_eswitch_mode eswitch_mode; - enum devlink_eswitch_inline_mode eswitch_inline_mode; - enum devlink_eswitch_encap_mode eswitch_encap_mode; -}; - -static inline struct devlink_eswitch_set_req * -devlink_eswitch_set_req_alloc(void) -{ - return calloc(1, sizeof(struct devlink_eswitch_set_req)); -} -void devlink_eswitch_set_req_free(struct devlink_eswitch_set_req *req); - -static inline void -devlink_eswitch_set_req_set_bus_name(struct devlink_eswitch_set_req *req, - const char *bus_name) -{ - free(req->bus_name); - req->_present.bus_name_len = strlen(bus_name); - req->bus_name = malloc(req->_present.bus_name_len + 1); - memcpy(req->bus_name, bus_name, req->_present.bus_name_len); - req->bus_name[req->_present.bus_name_len] = 0; -} -static inline void -devlink_eswitch_set_req_set_dev_name(struct devlink_eswitch_set_req *req, - const char *dev_name) -{ - free(req->dev_name); - req->_present.dev_name_len = strlen(dev_name); - req->dev_name = malloc(req->_present.dev_name_len + 1); - memcpy(req->dev_name, dev_name, req->_present.dev_name_len); - req->dev_name[req->_present.dev_name_len] = 0; -} -static inline void -devlink_eswitch_set_req_set_eswitch_mode(struct devlink_eswitch_set_req *req, - enum devlink_eswitch_mode eswitch_mode) -{ - req->_present.eswitch_mode = 1; - req->eswitch_mode = eswitch_mode; -} -static inline void -devlink_eswitch_set_req_set_eswitch_inline_mode(struct devlink_eswitch_set_req *req, - enum devlink_eswitch_inline_mode eswitch_inline_mode) -{ - req->_present.eswitch_inline_mode = 1; - req->eswitch_inline_mode = eswitch_inline_mode; -} -static inline void -devlink_eswitch_set_req_set_eswitch_encap_mode(struct devlink_eswitch_set_req *req, - enum devlink_eswitch_encap_mode eswitch_encap_mode) -{ - req->_present.eswitch_encap_mode = 1; - req->eswitch_encap_mode = eswitch_encap_mode; -} - -/* - * Set eswitch attributes. - */ -int devlink_eswitch_set(struct ynl_sock *ys, - struct devlink_eswitch_set_req *req); - -/* ============== DEVLINK_CMD_DPIPE_TABLE_GET ============== */ -/* DEVLINK_CMD_DPIPE_TABLE_GET - do */ -struct devlink_dpipe_table_get_req { - struct { - __u32 bus_name_len; - __u32 dev_name_len; - __u32 dpipe_table_name_len; - } _present; - - char *bus_name; - char *dev_name; - char *dpipe_table_name; -}; - -static inline struct devlink_dpipe_table_get_req * -devlink_dpipe_table_get_req_alloc(void) -{ - return calloc(1, sizeof(struct devlink_dpipe_table_get_req)); -} -void devlink_dpipe_table_get_req_free(struct devlink_dpipe_table_get_req *req); - -static inline void -devlink_dpipe_table_get_req_set_bus_name(struct devlink_dpipe_table_get_req *req, - const char *bus_name) -{ - free(req->bus_name); - req->_present.bus_name_len = strlen(bus_name); - req->bus_name = malloc(req->_present.bus_name_len + 1); - memcpy(req->bus_name, bus_name, req->_present.bus_name_len); - req->bus_name[req->_present.bus_name_len] = 0; -} -static inline void -devlink_dpipe_table_get_req_set_dev_name(struct devlink_dpipe_table_get_req *req, - const char *dev_name) -{ - free(req->dev_name); - req->_present.dev_name_len = strlen(dev_name); - req->dev_name = malloc(req->_present.dev_name_len + 1); - memcpy(req->dev_name, dev_name, req->_present.dev_name_len); - req->dev_name[req->_present.dev_name_len] = 0; -} -static inline void -devlink_dpipe_table_get_req_set_dpipe_table_name(struct devlink_dpipe_table_get_req *req, - const char *dpipe_table_name) -{ - free(req->dpipe_table_name); - req->_present.dpipe_table_name_len = strlen(dpipe_table_name); - req->dpipe_table_name = malloc(req->_present.dpipe_table_name_len + 1); - memcpy(req->dpipe_table_name, dpipe_table_name, req->_present.dpipe_table_name_len); - req->dpipe_table_name[req->_present.dpipe_table_name_len] = 0; -} - -struct devlink_dpipe_table_get_rsp { - struct { - __u32 bus_name_len; - __u32 dev_name_len; - __u32 dpipe_tables:1; - } _present; - - char *bus_name; - char *dev_name; - struct devlink_dl_dpipe_tables dpipe_tables; -}; - -void devlink_dpipe_table_get_rsp_free(struct devlink_dpipe_table_get_rsp *rsp); - -/* - * Get dpipe table attributes. - */ -struct devlink_dpipe_table_get_rsp * -devlink_dpipe_table_get(struct ynl_sock *ys, - struct devlink_dpipe_table_get_req *req); - -/* ============== DEVLINK_CMD_DPIPE_ENTRIES_GET ============== */ -/* DEVLINK_CMD_DPIPE_ENTRIES_GET - do */ -struct devlink_dpipe_entries_get_req { - struct { - __u32 bus_name_len; - __u32 dev_name_len; - __u32 dpipe_table_name_len; - } _present; - - char *bus_name; - char *dev_name; - char *dpipe_table_name; -}; - -static inline struct devlink_dpipe_entries_get_req * -devlink_dpipe_entries_get_req_alloc(void) -{ - return calloc(1, sizeof(struct devlink_dpipe_entries_get_req)); -} -void -devlink_dpipe_entries_get_req_free(struct devlink_dpipe_entries_get_req *req); - -static inline void -devlink_dpipe_entries_get_req_set_bus_name(struct devlink_dpipe_entries_get_req *req, - const char *bus_name) -{ - free(req->bus_name); - req->_present.bus_name_len = strlen(bus_name); - req->bus_name = malloc(req->_present.bus_name_len + 1); - memcpy(req->bus_name, bus_name, req->_present.bus_name_len); - req->bus_name[req->_present.bus_name_len] = 0; -} -static inline void -devlink_dpipe_entries_get_req_set_dev_name(struct devlink_dpipe_entries_get_req *req, - const char *dev_name) -{ - free(req->dev_name); - req->_present.dev_name_len = strlen(dev_name); - req->dev_name = malloc(req->_present.dev_name_len + 1); - memcpy(req->dev_name, dev_name, req->_present.dev_name_len); - req->dev_name[req->_present.dev_name_len] = 0; -} -static inline void -devlink_dpipe_entries_get_req_set_dpipe_table_name(struct devlink_dpipe_entries_get_req *req, - const char *dpipe_table_name) -{ - free(req->dpipe_table_name); - req->_present.dpipe_table_name_len = strlen(dpipe_table_name); - req->dpipe_table_name = malloc(req->_present.dpipe_table_name_len + 1); - memcpy(req->dpipe_table_name, dpipe_table_name, req->_present.dpipe_table_name_len); - req->dpipe_table_name[req->_present.dpipe_table_name_len] = 0; -} - -struct devlink_dpipe_entries_get_rsp { - struct { - __u32 bus_name_len; - __u32 dev_name_len; - __u32 dpipe_entries:1; - } _present; - - char *bus_name; - char *dev_name; - struct devlink_dl_dpipe_entries dpipe_entries; -}; - -void -devlink_dpipe_entries_get_rsp_free(struct devlink_dpipe_entries_get_rsp *rsp); - -/* - * Get dpipe entries attributes. - */ -struct devlink_dpipe_entries_get_rsp * -devlink_dpipe_entries_get(struct ynl_sock *ys, - struct devlink_dpipe_entries_get_req *req); - -/* ============== DEVLINK_CMD_DPIPE_HEADERS_GET ============== */ -/* DEVLINK_CMD_DPIPE_HEADERS_GET - do */ -struct devlink_dpipe_headers_get_req { - struct { - __u32 bus_name_len; - __u32 dev_name_len; - } _present; - - char *bus_name; - char *dev_name; -}; - -static inline struct devlink_dpipe_headers_get_req * -devlink_dpipe_headers_get_req_alloc(void) -{ - return calloc(1, sizeof(struct devlink_dpipe_headers_get_req)); -} -void -devlink_dpipe_headers_get_req_free(struct devlink_dpipe_headers_get_req *req); - -static inline void -devlink_dpipe_headers_get_req_set_bus_name(struct devlink_dpipe_headers_get_req *req, - const char *bus_name) -{ - free(req->bus_name); - req->_present.bus_name_len = strlen(bus_name); - req->bus_name = malloc(req->_present.bus_name_len + 1); - memcpy(req->bus_name, bus_name, req->_present.bus_name_len); - req->bus_name[req->_present.bus_name_len] = 0; -} -static inline void -devlink_dpipe_headers_get_req_set_dev_name(struct devlink_dpipe_headers_get_req *req, - const char *dev_name) -{ - free(req->dev_name); - req->_present.dev_name_len = strlen(dev_name); - req->dev_name = malloc(req->_present.dev_name_len + 1); - memcpy(req->dev_name, dev_name, req->_present.dev_name_len); - req->dev_name[req->_present.dev_name_len] = 0; -} - -struct devlink_dpipe_headers_get_rsp { - struct { - __u32 bus_name_len; - __u32 dev_name_len; - __u32 dpipe_headers:1; - } _present; - - char *bus_name; - char *dev_name; - struct devlink_dl_dpipe_headers dpipe_headers; -}; - -void -devlink_dpipe_headers_get_rsp_free(struct devlink_dpipe_headers_get_rsp *rsp); - -/* - * Get dpipe headers attributes. - */ -struct devlink_dpipe_headers_get_rsp * -devlink_dpipe_headers_get(struct ynl_sock *ys, - struct devlink_dpipe_headers_get_req *req); - -/* ============== DEVLINK_CMD_DPIPE_TABLE_COUNTERS_SET ============== */ -/* DEVLINK_CMD_DPIPE_TABLE_COUNTERS_SET - do */ -struct devlink_dpipe_table_counters_set_req { - struct { - __u32 bus_name_len; - __u32 dev_name_len; - __u32 dpipe_table_name_len; - __u32 dpipe_table_counters_enabled:1; - } _present; - - char *bus_name; - char *dev_name; - char *dpipe_table_name; - __u8 dpipe_table_counters_enabled; -}; - -static inline struct devlink_dpipe_table_counters_set_req * -devlink_dpipe_table_counters_set_req_alloc(void) -{ - return calloc(1, sizeof(struct devlink_dpipe_table_counters_set_req)); -} -void -devlink_dpipe_table_counters_set_req_free(struct devlink_dpipe_table_counters_set_req *req); - -static inline void -devlink_dpipe_table_counters_set_req_set_bus_name(struct devlink_dpipe_table_counters_set_req *req, - const char *bus_name) -{ - free(req->bus_name); - req->_present.bus_name_len = strlen(bus_name); - req->bus_name = malloc(req->_present.bus_name_len + 1); - memcpy(req->bus_name, bus_name, req->_present.bus_name_len); - req->bus_name[req->_present.bus_name_len] = 0; -} -static inline void -devlink_dpipe_table_counters_set_req_set_dev_name(struct devlink_dpipe_table_counters_set_req *req, - const char *dev_name) -{ - free(req->dev_name); - req->_present.dev_name_len = strlen(dev_name); - req->dev_name = malloc(req->_present.dev_name_len + 1); - memcpy(req->dev_name, dev_name, req->_present.dev_name_len); - req->dev_name[req->_present.dev_name_len] = 0; -} -static inline void -devlink_dpipe_table_counters_set_req_set_dpipe_table_name(struct devlink_dpipe_table_counters_set_req *req, - const char *dpipe_table_name) -{ - free(req->dpipe_table_name); - req->_present.dpipe_table_name_len = strlen(dpipe_table_name); - req->dpipe_table_name = malloc(req->_present.dpipe_table_name_len + 1); - memcpy(req->dpipe_table_name, dpipe_table_name, req->_present.dpipe_table_name_len); - req->dpipe_table_name[req->_present.dpipe_table_name_len] = 0; -} -static inline void -devlink_dpipe_table_counters_set_req_set_dpipe_table_counters_enabled(struct devlink_dpipe_table_counters_set_req *req, - __u8 dpipe_table_counters_enabled) -{ - req->_present.dpipe_table_counters_enabled = 1; - req->dpipe_table_counters_enabled = dpipe_table_counters_enabled; -} - -/* - * Set dpipe counter attributes. - */ -int devlink_dpipe_table_counters_set(struct ynl_sock *ys, - struct devlink_dpipe_table_counters_set_req *req); - -/* ============== DEVLINK_CMD_RESOURCE_SET ============== */ -/* DEVLINK_CMD_RESOURCE_SET - do */ -struct devlink_resource_set_req { - struct { - __u32 bus_name_len; - __u32 dev_name_len; - __u32 resource_id:1; - __u32 resource_size:1; - } _present; - - char *bus_name; - char *dev_name; - __u64 resource_id; - __u64 resource_size; -}; - -static inline struct devlink_resource_set_req * -devlink_resource_set_req_alloc(void) -{ - return calloc(1, sizeof(struct devlink_resource_set_req)); -} -void devlink_resource_set_req_free(struct devlink_resource_set_req *req); - -static inline void -devlink_resource_set_req_set_bus_name(struct devlink_resource_set_req *req, - const char *bus_name) -{ - free(req->bus_name); - req->_present.bus_name_len = strlen(bus_name); - req->bus_name = malloc(req->_present.bus_name_len + 1); - memcpy(req->bus_name, bus_name, req->_present.bus_name_len); - req->bus_name[req->_present.bus_name_len] = 0; -} -static inline void -devlink_resource_set_req_set_dev_name(struct devlink_resource_set_req *req, - const char *dev_name) -{ - free(req->dev_name); - req->_present.dev_name_len = strlen(dev_name); - req->dev_name = malloc(req->_present.dev_name_len + 1); - memcpy(req->dev_name, dev_name, req->_present.dev_name_len); - req->dev_name[req->_present.dev_name_len] = 0; -} -static inline void -devlink_resource_set_req_set_resource_id(struct devlink_resource_set_req *req, - __u64 resource_id) -{ - req->_present.resource_id = 1; - req->resource_id = resource_id; -} -static inline void -devlink_resource_set_req_set_resource_size(struct devlink_resource_set_req *req, - __u64 resource_size) -{ - req->_present.resource_size = 1; - req->resource_size = resource_size; -} - -/* - * Set resource attributes. - */ -int devlink_resource_set(struct ynl_sock *ys, - struct devlink_resource_set_req *req); - -/* ============== DEVLINK_CMD_RESOURCE_DUMP ============== */ -/* DEVLINK_CMD_RESOURCE_DUMP - do */ -struct devlink_resource_dump_req { - struct { - __u32 bus_name_len; - __u32 dev_name_len; - } _present; - - char *bus_name; - char *dev_name; -}; - -static inline struct devlink_resource_dump_req * -devlink_resource_dump_req_alloc(void) -{ - return calloc(1, sizeof(struct devlink_resource_dump_req)); -} -void devlink_resource_dump_req_free(struct devlink_resource_dump_req *req); - -static inline void -devlink_resource_dump_req_set_bus_name(struct devlink_resource_dump_req *req, - const char *bus_name) -{ - free(req->bus_name); - req->_present.bus_name_len = strlen(bus_name); - req->bus_name = malloc(req->_present.bus_name_len + 1); - memcpy(req->bus_name, bus_name, req->_present.bus_name_len); - req->bus_name[req->_present.bus_name_len] = 0; -} -static inline void -devlink_resource_dump_req_set_dev_name(struct devlink_resource_dump_req *req, - const char *dev_name) -{ - free(req->dev_name); - req->_present.dev_name_len = strlen(dev_name); - req->dev_name = malloc(req->_present.dev_name_len + 1); - memcpy(req->dev_name, dev_name, req->_present.dev_name_len); - req->dev_name[req->_present.dev_name_len] = 0; -} - -struct devlink_resource_dump_rsp { - struct { - __u32 bus_name_len; - __u32 dev_name_len; - __u32 resource_list:1; - } _present; - - char *bus_name; - char *dev_name; - struct devlink_dl_resource_list resource_list; -}; - -void devlink_resource_dump_rsp_free(struct devlink_resource_dump_rsp *rsp); - -/* - * Get resource attributes. - */ -struct devlink_resource_dump_rsp * -devlink_resource_dump(struct ynl_sock *ys, - struct devlink_resource_dump_req *req); - -/* ============== DEVLINK_CMD_RELOAD ============== */ -/* DEVLINK_CMD_RELOAD - do */ -struct devlink_reload_req { - struct { - __u32 bus_name_len; - __u32 dev_name_len; - __u32 reload_action:1; - __u32 reload_limits:1; - __u32 netns_pid:1; - __u32 netns_fd:1; - __u32 netns_id:1; - } _present; - - char *bus_name; - char *dev_name; - enum devlink_reload_action reload_action; - struct nla_bitfield32 reload_limits; - __u32 netns_pid; - __u32 netns_fd; - __u32 netns_id; -}; - -static inline struct devlink_reload_req *devlink_reload_req_alloc(void) -{ - return calloc(1, sizeof(struct devlink_reload_req)); -} -void devlink_reload_req_free(struct devlink_reload_req *req); - -static inline void -devlink_reload_req_set_bus_name(struct devlink_reload_req *req, - const char *bus_name) -{ - free(req->bus_name); - req->_present.bus_name_len = strlen(bus_name); - req->bus_name = malloc(req->_present.bus_name_len + 1); - memcpy(req->bus_name, bus_name, req->_present.bus_name_len); - req->bus_name[req->_present.bus_name_len] = 0; -} -static inline void -devlink_reload_req_set_dev_name(struct devlink_reload_req *req, - const char *dev_name) -{ - free(req->dev_name); - req->_present.dev_name_len = strlen(dev_name); - req->dev_name = malloc(req->_present.dev_name_len + 1); - memcpy(req->dev_name, dev_name, req->_present.dev_name_len); - req->dev_name[req->_present.dev_name_len] = 0; -} -static inline void -devlink_reload_req_set_reload_action(struct devlink_reload_req *req, - enum devlink_reload_action reload_action) -{ - req->_present.reload_action = 1; - req->reload_action = reload_action; -} -static inline void -devlink_reload_req_set_reload_limits(struct devlink_reload_req *req, - struct nla_bitfield32 *reload_limits) -{ - req->_present.reload_limits = 1; - memcpy(&req->reload_limits, reload_limits, sizeof(struct nla_bitfield32)); -} -static inline void -devlink_reload_req_set_netns_pid(struct devlink_reload_req *req, - __u32 netns_pid) -{ - req->_present.netns_pid = 1; - req->netns_pid = netns_pid; -} -static inline void -devlink_reload_req_set_netns_fd(struct devlink_reload_req *req, __u32 netns_fd) -{ - req->_present.netns_fd = 1; - req->netns_fd = netns_fd; -} -static inline void -devlink_reload_req_set_netns_id(struct devlink_reload_req *req, __u32 netns_id) -{ - req->_present.netns_id = 1; - req->netns_id = netns_id; -} - -struct devlink_reload_rsp { - struct { - __u32 bus_name_len; - __u32 dev_name_len; - __u32 reload_actions_performed:1; - } _present; - - char *bus_name; - char *dev_name; - struct nla_bitfield32 reload_actions_performed; -}; - -void devlink_reload_rsp_free(struct devlink_reload_rsp *rsp); - -/* - * Reload devlink. - */ -struct devlink_reload_rsp * -devlink_reload(struct ynl_sock *ys, struct devlink_reload_req *req); - -/* ============== DEVLINK_CMD_PARAM_GET ============== */ -/* DEVLINK_CMD_PARAM_GET - do */ -struct devlink_param_get_req { - struct { - __u32 bus_name_len; - __u32 dev_name_len; - __u32 param_name_len; - } _present; - - char *bus_name; - char *dev_name; - char *param_name; -}; - -static inline struct devlink_param_get_req *devlink_param_get_req_alloc(void) -{ - return calloc(1, sizeof(struct devlink_param_get_req)); -} -void devlink_param_get_req_free(struct devlink_param_get_req *req); - -static inline void -devlink_param_get_req_set_bus_name(struct devlink_param_get_req *req, - const char *bus_name) -{ - free(req->bus_name); - req->_present.bus_name_len = strlen(bus_name); - req->bus_name = malloc(req->_present.bus_name_len + 1); - memcpy(req->bus_name, bus_name, req->_present.bus_name_len); - req->bus_name[req->_present.bus_name_len] = 0; -} -static inline void -devlink_param_get_req_set_dev_name(struct devlink_param_get_req *req, - const char *dev_name) -{ - free(req->dev_name); - req->_present.dev_name_len = strlen(dev_name); - req->dev_name = malloc(req->_present.dev_name_len + 1); - memcpy(req->dev_name, dev_name, req->_present.dev_name_len); - req->dev_name[req->_present.dev_name_len] = 0; -} -static inline void -devlink_param_get_req_set_param_name(struct devlink_param_get_req *req, - const char *param_name) -{ - free(req->param_name); - req->_present.param_name_len = strlen(param_name); - req->param_name = malloc(req->_present.param_name_len + 1); - memcpy(req->param_name, param_name, req->_present.param_name_len); - req->param_name[req->_present.param_name_len] = 0; -} - -struct devlink_param_get_rsp { - struct { - __u32 bus_name_len; - __u32 dev_name_len; - __u32 param_name_len; - } _present; - - char *bus_name; - char *dev_name; - char *param_name; -}; - -void devlink_param_get_rsp_free(struct devlink_param_get_rsp *rsp); - -/* - * Get param instances. - */ -struct devlink_param_get_rsp * -devlink_param_get(struct ynl_sock *ys, struct devlink_param_get_req *req); - -/* DEVLINK_CMD_PARAM_GET - dump */ -struct devlink_param_get_req_dump { - struct { - __u32 bus_name_len; - __u32 dev_name_len; - } _present; - - char *bus_name; - char *dev_name; -}; - -static inline struct devlink_param_get_req_dump * -devlink_param_get_req_dump_alloc(void) -{ - return calloc(1, sizeof(struct devlink_param_get_req_dump)); -} -void devlink_param_get_req_dump_free(struct devlink_param_get_req_dump *req); - -static inline void -devlink_param_get_req_dump_set_bus_name(struct devlink_param_get_req_dump *req, - const char *bus_name) -{ - free(req->bus_name); - req->_present.bus_name_len = strlen(bus_name); - req->bus_name = malloc(req->_present.bus_name_len + 1); - memcpy(req->bus_name, bus_name, req->_present.bus_name_len); - req->bus_name[req->_present.bus_name_len] = 0; -} -static inline void -devlink_param_get_req_dump_set_dev_name(struct devlink_param_get_req_dump *req, - const char *dev_name) -{ - free(req->dev_name); - req->_present.dev_name_len = strlen(dev_name); - req->dev_name = malloc(req->_present.dev_name_len + 1); - memcpy(req->dev_name, dev_name, req->_present.dev_name_len); - req->dev_name[req->_present.dev_name_len] = 0; -} - -struct devlink_param_get_list { - struct devlink_param_get_list *next; - struct devlink_param_get_rsp obj __attribute__((aligned(8))); -}; - -void devlink_param_get_list_free(struct devlink_param_get_list *rsp); - -struct devlink_param_get_list * -devlink_param_get_dump(struct ynl_sock *ys, - struct devlink_param_get_req_dump *req); - -/* ============== DEVLINK_CMD_PARAM_SET ============== */ -/* DEVLINK_CMD_PARAM_SET - do */ -struct devlink_param_set_req { - struct { - __u32 bus_name_len; - __u32 dev_name_len; - __u32 param_name_len; - __u32 param_type:1; - __u32 param_value_cmode:1; - } _present; - - char *bus_name; - char *dev_name; - char *param_name; - __u8 param_type; - enum devlink_param_cmode param_value_cmode; -}; - -static inline struct devlink_param_set_req *devlink_param_set_req_alloc(void) -{ - return calloc(1, sizeof(struct devlink_param_set_req)); -} -void devlink_param_set_req_free(struct devlink_param_set_req *req); - -static inline void -devlink_param_set_req_set_bus_name(struct devlink_param_set_req *req, - const char *bus_name) -{ - free(req->bus_name); - req->_present.bus_name_len = strlen(bus_name); - req->bus_name = malloc(req->_present.bus_name_len + 1); - memcpy(req->bus_name, bus_name, req->_present.bus_name_len); - req->bus_name[req->_present.bus_name_len] = 0; -} -static inline void -devlink_param_set_req_set_dev_name(struct devlink_param_set_req *req, - const char *dev_name) -{ - free(req->dev_name); - req->_present.dev_name_len = strlen(dev_name); - req->dev_name = malloc(req->_present.dev_name_len + 1); - memcpy(req->dev_name, dev_name, req->_present.dev_name_len); - req->dev_name[req->_present.dev_name_len] = 0; -} -static inline void -devlink_param_set_req_set_param_name(struct devlink_param_set_req *req, - const char *param_name) -{ - free(req->param_name); - req->_present.param_name_len = strlen(param_name); - req->param_name = malloc(req->_present.param_name_len + 1); - memcpy(req->param_name, param_name, req->_present.param_name_len); - req->param_name[req->_present.param_name_len] = 0; -} -static inline void -devlink_param_set_req_set_param_type(struct devlink_param_set_req *req, - __u8 param_type) -{ - req->_present.param_type = 1; - req->param_type = param_type; -} -static inline void -devlink_param_set_req_set_param_value_cmode(struct devlink_param_set_req *req, - enum devlink_param_cmode param_value_cmode) -{ - req->_present.param_value_cmode = 1; - req->param_value_cmode = param_value_cmode; -} - -/* - * Set param instances. - */ -int devlink_param_set(struct ynl_sock *ys, struct devlink_param_set_req *req); - -/* ============== DEVLINK_CMD_REGION_GET ============== */ -/* DEVLINK_CMD_REGION_GET - do */ -struct devlink_region_get_req { - struct { - __u32 bus_name_len; - __u32 dev_name_len; - __u32 port_index:1; - __u32 region_name_len; - } _present; - - char *bus_name; - char *dev_name; - __u32 port_index; - char *region_name; -}; - -static inline struct devlink_region_get_req *devlink_region_get_req_alloc(void) -{ - return calloc(1, sizeof(struct devlink_region_get_req)); -} -void devlink_region_get_req_free(struct devlink_region_get_req *req); - -static inline void -devlink_region_get_req_set_bus_name(struct devlink_region_get_req *req, - const char *bus_name) -{ - free(req->bus_name); - req->_present.bus_name_len = strlen(bus_name); - req->bus_name = malloc(req->_present.bus_name_len + 1); - memcpy(req->bus_name, bus_name, req->_present.bus_name_len); - req->bus_name[req->_present.bus_name_len] = 0; -} -static inline void -devlink_region_get_req_set_dev_name(struct devlink_region_get_req *req, - const char *dev_name) -{ - free(req->dev_name); - req->_present.dev_name_len = strlen(dev_name); - req->dev_name = malloc(req->_present.dev_name_len + 1); - memcpy(req->dev_name, dev_name, req->_present.dev_name_len); - req->dev_name[req->_present.dev_name_len] = 0; -} -static inline void -devlink_region_get_req_set_port_index(struct devlink_region_get_req *req, - __u32 port_index) -{ - req->_present.port_index = 1; - req->port_index = port_index; -} -static inline void -devlink_region_get_req_set_region_name(struct devlink_region_get_req *req, - const char *region_name) -{ - free(req->region_name); - req->_present.region_name_len = strlen(region_name); - req->region_name = malloc(req->_present.region_name_len + 1); - memcpy(req->region_name, region_name, req->_present.region_name_len); - req->region_name[req->_present.region_name_len] = 0; -} - -struct devlink_region_get_rsp { - struct { - __u32 bus_name_len; - __u32 dev_name_len; - __u32 port_index:1; - __u32 region_name_len; - } _present; - - char *bus_name; - char *dev_name; - __u32 port_index; - char *region_name; -}; - -void devlink_region_get_rsp_free(struct devlink_region_get_rsp *rsp); - -/* - * Get region instances. - */ -struct devlink_region_get_rsp * -devlink_region_get(struct ynl_sock *ys, struct devlink_region_get_req *req); - -/* DEVLINK_CMD_REGION_GET - dump */ -struct devlink_region_get_req_dump { - struct { - __u32 bus_name_len; - __u32 dev_name_len; - } _present; - - char *bus_name; - char *dev_name; -}; - -static inline struct devlink_region_get_req_dump * -devlink_region_get_req_dump_alloc(void) -{ - return calloc(1, sizeof(struct devlink_region_get_req_dump)); -} -void devlink_region_get_req_dump_free(struct devlink_region_get_req_dump *req); - -static inline void -devlink_region_get_req_dump_set_bus_name(struct devlink_region_get_req_dump *req, - const char *bus_name) -{ - free(req->bus_name); - req->_present.bus_name_len = strlen(bus_name); - req->bus_name = malloc(req->_present.bus_name_len + 1); - memcpy(req->bus_name, bus_name, req->_present.bus_name_len); - req->bus_name[req->_present.bus_name_len] = 0; -} -static inline void -devlink_region_get_req_dump_set_dev_name(struct devlink_region_get_req_dump *req, - const char *dev_name) -{ - free(req->dev_name); - req->_present.dev_name_len = strlen(dev_name); - req->dev_name = malloc(req->_present.dev_name_len + 1); - memcpy(req->dev_name, dev_name, req->_present.dev_name_len); - req->dev_name[req->_present.dev_name_len] = 0; -} - -struct devlink_region_get_list { - struct devlink_region_get_list *next; - struct devlink_region_get_rsp obj __attribute__((aligned(8))); -}; - -void devlink_region_get_list_free(struct devlink_region_get_list *rsp); - -struct devlink_region_get_list * -devlink_region_get_dump(struct ynl_sock *ys, - struct devlink_region_get_req_dump *req); - -/* ============== DEVLINK_CMD_REGION_NEW ============== */ -/* DEVLINK_CMD_REGION_NEW - do */ -struct devlink_region_new_req { - struct { - __u32 bus_name_len; - __u32 dev_name_len; - __u32 port_index:1; - __u32 region_name_len; - __u32 region_snapshot_id:1; - } _present; - - char *bus_name; - char *dev_name; - __u32 port_index; - char *region_name; - __u32 region_snapshot_id; -}; - -static inline struct devlink_region_new_req *devlink_region_new_req_alloc(void) -{ - return calloc(1, sizeof(struct devlink_region_new_req)); -} -void devlink_region_new_req_free(struct devlink_region_new_req *req); - -static inline void -devlink_region_new_req_set_bus_name(struct devlink_region_new_req *req, - const char *bus_name) -{ - free(req->bus_name); - req->_present.bus_name_len = strlen(bus_name); - req->bus_name = malloc(req->_present.bus_name_len + 1); - memcpy(req->bus_name, bus_name, req->_present.bus_name_len); - req->bus_name[req->_present.bus_name_len] = 0; -} -static inline void -devlink_region_new_req_set_dev_name(struct devlink_region_new_req *req, - const char *dev_name) -{ - free(req->dev_name); - req->_present.dev_name_len = strlen(dev_name); - req->dev_name = malloc(req->_present.dev_name_len + 1); - memcpy(req->dev_name, dev_name, req->_present.dev_name_len); - req->dev_name[req->_present.dev_name_len] = 0; -} -static inline void -devlink_region_new_req_set_port_index(struct devlink_region_new_req *req, - __u32 port_index) -{ - req->_present.port_index = 1; - req->port_index = port_index; -} -static inline void -devlink_region_new_req_set_region_name(struct devlink_region_new_req *req, - const char *region_name) -{ - free(req->region_name); - req->_present.region_name_len = strlen(region_name); - req->region_name = malloc(req->_present.region_name_len + 1); - memcpy(req->region_name, region_name, req->_present.region_name_len); - req->region_name[req->_present.region_name_len] = 0; -} -static inline void -devlink_region_new_req_set_region_snapshot_id(struct devlink_region_new_req *req, - __u32 region_snapshot_id) -{ - req->_present.region_snapshot_id = 1; - req->region_snapshot_id = region_snapshot_id; -} - -struct devlink_region_new_rsp { - struct { - __u32 bus_name_len; - __u32 dev_name_len; - __u32 port_index:1; - __u32 region_name_len; - __u32 region_snapshot_id:1; - } _present; - - char *bus_name; - char *dev_name; - __u32 port_index; - char *region_name; - __u32 region_snapshot_id; -}; - -void devlink_region_new_rsp_free(struct devlink_region_new_rsp *rsp); - -/* - * Create region snapshot. - */ -struct devlink_region_new_rsp * -devlink_region_new(struct ynl_sock *ys, struct devlink_region_new_req *req); - -/* ============== DEVLINK_CMD_REGION_DEL ============== */ -/* DEVLINK_CMD_REGION_DEL - do */ -struct devlink_region_del_req { - struct { - __u32 bus_name_len; - __u32 dev_name_len; - __u32 port_index:1; - __u32 region_name_len; - __u32 region_snapshot_id:1; - } _present; - - char *bus_name; - char *dev_name; - __u32 port_index; - char *region_name; - __u32 region_snapshot_id; -}; - -static inline struct devlink_region_del_req *devlink_region_del_req_alloc(void) -{ - return calloc(1, sizeof(struct devlink_region_del_req)); -} -void devlink_region_del_req_free(struct devlink_region_del_req *req); - -static inline void -devlink_region_del_req_set_bus_name(struct devlink_region_del_req *req, - const char *bus_name) -{ - free(req->bus_name); - req->_present.bus_name_len = strlen(bus_name); - req->bus_name = malloc(req->_present.bus_name_len + 1); - memcpy(req->bus_name, bus_name, req->_present.bus_name_len); - req->bus_name[req->_present.bus_name_len] = 0; -} -static inline void -devlink_region_del_req_set_dev_name(struct devlink_region_del_req *req, - const char *dev_name) -{ - free(req->dev_name); - req->_present.dev_name_len = strlen(dev_name); - req->dev_name = malloc(req->_present.dev_name_len + 1); - memcpy(req->dev_name, dev_name, req->_present.dev_name_len); - req->dev_name[req->_present.dev_name_len] = 0; -} -static inline void -devlink_region_del_req_set_port_index(struct devlink_region_del_req *req, - __u32 port_index) -{ - req->_present.port_index = 1; - req->port_index = port_index; -} -static inline void -devlink_region_del_req_set_region_name(struct devlink_region_del_req *req, - const char *region_name) -{ - free(req->region_name); - req->_present.region_name_len = strlen(region_name); - req->region_name = malloc(req->_present.region_name_len + 1); - memcpy(req->region_name, region_name, req->_present.region_name_len); - req->region_name[req->_present.region_name_len] = 0; -} -static inline void -devlink_region_del_req_set_region_snapshot_id(struct devlink_region_del_req *req, - __u32 region_snapshot_id) -{ - req->_present.region_snapshot_id = 1; - req->region_snapshot_id = region_snapshot_id; -} - -/* - * Delete region snapshot. - */ -int devlink_region_del(struct ynl_sock *ys, struct devlink_region_del_req *req); - -/* ============== DEVLINK_CMD_REGION_READ ============== */ -/* DEVLINK_CMD_REGION_READ - dump */ -struct devlink_region_read_req_dump { - struct { - __u32 bus_name_len; - __u32 dev_name_len; - __u32 port_index:1; - __u32 region_name_len; - __u32 region_snapshot_id:1; - __u32 region_direct:1; - __u32 region_chunk_addr:1; - __u32 region_chunk_len:1; - } _present; - - char *bus_name; - char *dev_name; - __u32 port_index; - char *region_name; - __u32 region_snapshot_id; - __u64 region_chunk_addr; - __u64 region_chunk_len; -}; - -static inline struct devlink_region_read_req_dump * -devlink_region_read_req_dump_alloc(void) -{ - return calloc(1, sizeof(struct devlink_region_read_req_dump)); -} -void -devlink_region_read_req_dump_free(struct devlink_region_read_req_dump *req); - -static inline void -devlink_region_read_req_dump_set_bus_name(struct devlink_region_read_req_dump *req, - const char *bus_name) -{ - free(req->bus_name); - req->_present.bus_name_len = strlen(bus_name); - req->bus_name = malloc(req->_present.bus_name_len + 1); - memcpy(req->bus_name, bus_name, req->_present.bus_name_len); - req->bus_name[req->_present.bus_name_len] = 0; -} -static inline void -devlink_region_read_req_dump_set_dev_name(struct devlink_region_read_req_dump *req, - const char *dev_name) -{ - free(req->dev_name); - req->_present.dev_name_len = strlen(dev_name); - req->dev_name = malloc(req->_present.dev_name_len + 1); - memcpy(req->dev_name, dev_name, req->_present.dev_name_len); - req->dev_name[req->_present.dev_name_len] = 0; -} -static inline void -devlink_region_read_req_dump_set_port_index(struct devlink_region_read_req_dump *req, - __u32 port_index) -{ - req->_present.port_index = 1; - req->port_index = port_index; -} -static inline void -devlink_region_read_req_dump_set_region_name(struct devlink_region_read_req_dump *req, - const char *region_name) -{ - free(req->region_name); - req->_present.region_name_len = strlen(region_name); - req->region_name = malloc(req->_present.region_name_len + 1); - memcpy(req->region_name, region_name, req->_present.region_name_len); - req->region_name[req->_present.region_name_len] = 0; -} -static inline void -devlink_region_read_req_dump_set_region_snapshot_id(struct devlink_region_read_req_dump *req, - __u32 region_snapshot_id) -{ - req->_present.region_snapshot_id = 1; - req->region_snapshot_id = region_snapshot_id; -} -static inline void -devlink_region_read_req_dump_set_region_direct(struct devlink_region_read_req_dump *req) -{ - req->_present.region_direct = 1; -} -static inline void -devlink_region_read_req_dump_set_region_chunk_addr(struct devlink_region_read_req_dump *req, - __u64 region_chunk_addr) -{ - req->_present.region_chunk_addr = 1; - req->region_chunk_addr = region_chunk_addr; -} -static inline void -devlink_region_read_req_dump_set_region_chunk_len(struct devlink_region_read_req_dump *req, - __u64 region_chunk_len) -{ - req->_present.region_chunk_len = 1; - req->region_chunk_len = region_chunk_len; -} - -struct devlink_region_read_rsp_dump { - struct { - __u32 bus_name_len; - __u32 dev_name_len; - __u32 port_index:1; - __u32 region_name_len; - } _present; - - char *bus_name; - char *dev_name; - __u32 port_index; - char *region_name; -}; - -struct devlink_region_read_rsp_list { - struct devlink_region_read_rsp_list *next; - struct devlink_region_read_rsp_dump obj __attribute__((aligned(8))); -}; - -void -devlink_region_read_rsp_list_free(struct devlink_region_read_rsp_list *rsp); - -struct devlink_region_read_rsp_list * -devlink_region_read_dump(struct ynl_sock *ys, - struct devlink_region_read_req_dump *req); - -/* ============== DEVLINK_CMD_PORT_PARAM_GET ============== */ -/* DEVLINK_CMD_PORT_PARAM_GET - do */ -struct devlink_port_param_get_req { - struct { - __u32 bus_name_len; - __u32 dev_name_len; - __u32 port_index:1; - } _present; - - char *bus_name; - char *dev_name; - __u32 port_index; -}; - -static inline struct devlink_port_param_get_req * -devlink_port_param_get_req_alloc(void) -{ - return calloc(1, sizeof(struct devlink_port_param_get_req)); -} -void devlink_port_param_get_req_free(struct devlink_port_param_get_req *req); - -static inline void -devlink_port_param_get_req_set_bus_name(struct devlink_port_param_get_req *req, - const char *bus_name) -{ - free(req->bus_name); - req->_present.bus_name_len = strlen(bus_name); - req->bus_name = malloc(req->_present.bus_name_len + 1); - memcpy(req->bus_name, bus_name, req->_present.bus_name_len); - req->bus_name[req->_present.bus_name_len] = 0; -} -static inline void -devlink_port_param_get_req_set_dev_name(struct devlink_port_param_get_req *req, - const char *dev_name) -{ - free(req->dev_name); - req->_present.dev_name_len = strlen(dev_name); - req->dev_name = malloc(req->_present.dev_name_len + 1); - memcpy(req->dev_name, dev_name, req->_present.dev_name_len); - req->dev_name[req->_present.dev_name_len] = 0; -} -static inline void -devlink_port_param_get_req_set_port_index(struct devlink_port_param_get_req *req, - __u32 port_index) -{ - req->_present.port_index = 1; - req->port_index = port_index; -} - -struct devlink_port_param_get_rsp { - struct { - __u32 bus_name_len; - __u32 dev_name_len; - __u32 port_index:1; - } _present; - - char *bus_name; - char *dev_name; - __u32 port_index; -}; - -void devlink_port_param_get_rsp_free(struct devlink_port_param_get_rsp *rsp); - -/* - * Get port param instances. - */ -struct devlink_port_param_get_rsp * -devlink_port_param_get(struct ynl_sock *ys, - struct devlink_port_param_get_req *req); - -/* DEVLINK_CMD_PORT_PARAM_GET - dump */ -struct devlink_port_param_get_list { - struct devlink_port_param_get_list *next; - struct devlink_port_param_get_rsp obj __attribute__((aligned(8))); -}; - -void devlink_port_param_get_list_free(struct devlink_port_param_get_list *rsp); - -struct devlink_port_param_get_list * -devlink_port_param_get_dump(struct ynl_sock *ys); - -/* ============== DEVLINK_CMD_PORT_PARAM_SET ============== */ -/* DEVLINK_CMD_PORT_PARAM_SET - do */ -struct devlink_port_param_set_req { - struct { - __u32 bus_name_len; - __u32 dev_name_len; - __u32 port_index:1; - } _present; - - char *bus_name; - char *dev_name; - __u32 port_index; -}; - -static inline struct devlink_port_param_set_req * -devlink_port_param_set_req_alloc(void) -{ - return calloc(1, sizeof(struct devlink_port_param_set_req)); -} -void devlink_port_param_set_req_free(struct devlink_port_param_set_req *req); - -static inline void -devlink_port_param_set_req_set_bus_name(struct devlink_port_param_set_req *req, - const char *bus_name) -{ - free(req->bus_name); - req->_present.bus_name_len = strlen(bus_name); - req->bus_name = malloc(req->_present.bus_name_len + 1); - memcpy(req->bus_name, bus_name, req->_present.bus_name_len); - req->bus_name[req->_present.bus_name_len] = 0; -} -static inline void -devlink_port_param_set_req_set_dev_name(struct devlink_port_param_set_req *req, - const char *dev_name) -{ - free(req->dev_name); - req->_present.dev_name_len = strlen(dev_name); - req->dev_name = malloc(req->_present.dev_name_len + 1); - memcpy(req->dev_name, dev_name, req->_present.dev_name_len); - req->dev_name[req->_present.dev_name_len] = 0; -} -static inline void -devlink_port_param_set_req_set_port_index(struct devlink_port_param_set_req *req, - __u32 port_index) -{ - req->_present.port_index = 1; - req->port_index = port_index; -} - -/* - * Set port param instances. - */ -int devlink_port_param_set(struct ynl_sock *ys, - struct devlink_port_param_set_req *req); - -/* ============== DEVLINK_CMD_INFO_GET ============== */ -/* DEVLINK_CMD_INFO_GET - do */ -struct devlink_info_get_req { - struct { - __u32 bus_name_len; - __u32 dev_name_len; - } _present; - - char *bus_name; - char *dev_name; -}; - -static inline struct devlink_info_get_req *devlink_info_get_req_alloc(void) -{ - return calloc(1, sizeof(struct devlink_info_get_req)); -} -void devlink_info_get_req_free(struct devlink_info_get_req *req); - -static inline void -devlink_info_get_req_set_bus_name(struct devlink_info_get_req *req, - const char *bus_name) -{ - free(req->bus_name); - req->_present.bus_name_len = strlen(bus_name); - req->bus_name = malloc(req->_present.bus_name_len + 1); - memcpy(req->bus_name, bus_name, req->_present.bus_name_len); - req->bus_name[req->_present.bus_name_len] = 0; -} -static inline void -devlink_info_get_req_set_dev_name(struct devlink_info_get_req *req, - const char *dev_name) -{ - free(req->dev_name); - req->_present.dev_name_len = strlen(dev_name); - req->dev_name = malloc(req->_present.dev_name_len + 1); - memcpy(req->dev_name, dev_name, req->_present.dev_name_len); - req->dev_name[req->_present.dev_name_len] = 0; -} - -struct devlink_info_get_rsp { - struct { - __u32 bus_name_len; - __u32 dev_name_len; - __u32 info_driver_name_len; - __u32 info_serial_number_len; - } _present; - - char *bus_name; - char *dev_name; - char *info_driver_name; - char *info_serial_number; - unsigned int n_info_version_fixed; - struct devlink_dl_info_version *info_version_fixed; - unsigned int n_info_version_running; - struct devlink_dl_info_version *info_version_running; - unsigned int n_info_version_stored; - struct devlink_dl_info_version *info_version_stored; -}; - -void devlink_info_get_rsp_free(struct devlink_info_get_rsp *rsp); - -/* - * Get device information, like driver name, hardware and firmware versions etc. - */ -struct devlink_info_get_rsp * -devlink_info_get(struct ynl_sock *ys, struct devlink_info_get_req *req); - -/* DEVLINK_CMD_INFO_GET - dump */ -struct devlink_info_get_list { - struct devlink_info_get_list *next; - struct devlink_info_get_rsp obj __attribute__((aligned(8))); -}; - -void devlink_info_get_list_free(struct devlink_info_get_list *rsp); - -struct devlink_info_get_list *devlink_info_get_dump(struct ynl_sock *ys); - -/* ============== DEVLINK_CMD_HEALTH_REPORTER_GET ============== */ -/* DEVLINK_CMD_HEALTH_REPORTER_GET - do */ -struct devlink_health_reporter_get_req { - struct { - __u32 bus_name_len; - __u32 dev_name_len; - __u32 port_index:1; - __u32 health_reporter_name_len; - } _present; - - char *bus_name; - char *dev_name; - __u32 port_index; - char *health_reporter_name; -}; - -static inline struct devlink_health_reporter_get_req * -devlink_health_reporter_get_req_alloc(void) -{ - return calloc(1, sizeof(struct devlink_health_reporter_get_req)); -} -void -devlink_health_reporter_get_req_free(struct devlink_health_reporter_get_req *req); - -static inline void -devlink_health_reporter_get_req_set_bus_name(struct devlink_health_reporter_get_req *req, - const char *bus_name) -{ - free(req->bus_name); - req->_present.bus_name_len = strlen(bus_name); - req->bus_name = malloc(req->_present.bus_name_len + 1); - memcpy(req->bus_name, bus_name, req->_present.bus_name_len); - req->bus_name[req->_present.bus_name_len] = 0; -} -static inline void -devlink_health_reporter_get_req_set_dev_name(struct devlink_health_reporter_get_req *req, - const char *dev_name) -{ - free(req->dev_name); - req->_present.dev_name_len = strlen(dev_name); - req->dev_name = malloc(req->_present.dev_name_len + 1); - memcpy(req->dev_name, dev_name, req->_present.dev_name_len); - req->dev_name[req->_present.dev_name_len] = 0; -} -static inline void -devlink_health_reporter_get_req_set_port_index(struct devlink_health_reporter_get_req *req, - __u32 port_index) -{ - req->_present.port_index = 1; - req->port_index = port_index; -} -static inline void -devlink_health_reporter_get_req_set_health_reporter_name(struct devlink_health_reporter_get_req *req, - const char *health_reporter_name) -{ - free(req->health_reporter_name); - req->_present.health_reporter_name_len = strlen(health_reporter_name); - req->health_reporter_name = malloc(req->_present.health_reporter_name_len + 1); - memcpy(req->health_reporter_name, health_reporter_name, req->_present.health_reporter_name_len); - req->health_reporter_name[req->_present.health_reporter_name_len] = 0; -} - -struct devlink_health_reporter_get_rsp { - struct { - __u32 bus_name_len; - __u32 dev_name_len; - __u32 port_index:1; - __u32 health_reporter_name_len; - } _present; - - char *bus_name; - char *dev_name; - __u32 port_index; - char *health_reporter_name; -}; - -void -devlink_health_reporter_get_rsp_free(struct devlink_health_reporter_get_rsp *rsp); - -/* - * Get health reporter instances. - */ -struct devlink_health_reporter_get_rsp * -devlink_health_reporter_get(struct ynl_sock *ys, - struct devlink_health_reporter_get_req *req); - -/* DEVLINK_CMD_HEALTH_REPORTER_GET - dump */ -struct devlink_health_reporter_get_req_dump { - struct { - __u32 bus_name_len; - __u32 dev_name_len; - __u32 port_index:1; - } _present; - - char *bus_name; - char *dev_name; - __u32 port_index; -}; - -static inline struct devlink_health_reporter_get_req_dump * -devlink_health_reporter_get_req_dump_alloc(void) -{ - return calloc(1, sizeof(struct devlink_health_reporter_get_req_dump)); -} -void -devlink_health_reporter_get_req_dump_free(struct devlink_health_reporter_get_req_dump *req); - -static inline void -devlink_health_reporter_get_req_dump_set_bus_name(struct devlink_health_reporter_get_req_dump *req, - const char *bus_name) -{ - free(req->bus_name); - req->_present.bus_name_len = strlen(bus_name); - req->bus_name = malloc(req->_present.bus_name_len + 1); - memcpy(req->bus_name, bus_name, req->_present.bus_name_len); - req->bus_name[req->_present.bus_name_len] = 0; -} -static inline void -devlink_health_reporter_get_req_dump_set_dev_name(struct devlink_health_reporter_get_req_dump *req, - const char *dev_name) -{ - free(req->dev_name); - req->_present.dev_name_len = strlen(dev_name); - req->dev_name = malloc(req->_present.dev_name_len + 1); - memcpy(req->dev_name, dev_name, req->_present.dev_name_len); - req->dev_name[req->_present.dev_name_len] = 0; -} -static inline void -devlink_health_reporter_get_req_dump_set_port_index(struct devlink_health_reporter_get_req_dump *req, - __u32 port_index) -{ - req->_present.port_index = 1; - req->port_index = port_index; -} - -struct devlink_health_reporter_get_list { - struct devlink_health_reporter_get_list *next; - struct devlink_health_reporter_get_rsp obj __attribute__((aligned(8))); -}; - -void -devlink_health_reporter_get_list_free(struct devlink_health_reporter_get_list *rsp); - -struct devlink_health_reporter_get_list * -devlink_health_reporter_get_dump(struct ynl_sock *ys, - struct devlink_health_reporter_get_req_dump *req); - -/* ============== DEVLINK_CMD_HEALTH_REPORTER_SET ============== */ -/* DEVLINK_CMD_HEALTH_REPORTER_SET - do */ -struct devlink_health_reporter_set_req { - struct { - __u32 bus_name_len; - __u32 dev_name_len; - __u32 port_index:1; - __u32 health_reporter_name_len; - __u32 health_reporter_graceful_period:1; - __u32 health_reporter_auto_recover:1; - __u32 health_reporter_auto_dump:1; - } _present; - - char *bus_name; - char *dev_name; - __u32 port_index; - char *health_reporter_name; - __u64 health_reporter_graceful_period; - __u8 health_reporter_auto_recover; - __u8 health_reporter_auto_dump; -}; - -static inline struct devlink_health_reporter_set_req * -devlink_health_reporter_set_req_alloc(void) -{ - return calloc(1, sizeof(struct devlink_health_reporter_set_req)); -} -void -devlink_health_reporter_set_req_free(struct devlink_health_reporter_set_req *req); - -static inline void -devlink_health_reporter_set_req_set_bus_name(struct devlink_health_reporter_set_req *req, - const char *bus_name) -{ - free(req->bus_name); - req->_present.bus_name_len = strlen(bus_name); - req->bus_name = malloc(req->_present.bus_name_len + 1); - memcpy(req->bus_name, bus_name, req->_present.bus_name_len); - req->bus_name[req->_present.bus_name_len] = 0; -} -static inline void -devlink_health_reporter_set_req_set_dev_name(struct devlink_health_reporter_set_req *req, - const char *dev_name) -{ - free(req->dev_name); - req->_present.dev_name_len = strlen(dev_name); - req->dev_name = malloc(req->_present.dev_name_len + 1); - memcpy(req->dev_name, dev_name, req->_present.dev_name_len); - req->dev_name[req->_present.dev_name_len] = 0; -} -static inline void -devlink_health_reporter_set_req_set_port_index(struct devlink_health_reporter_set_req *req, - __u32 port_index) -{ - req->_present.port_index = 1; - req->port_index = port_index; -} -static inline void -devlink_health_reporter_set_req_set_health_reporter_name(struct devlink_health_reporter_set_req *req, - const char *health_reporter_name) -{ - free(req->health_reporter_name); - req->_present.health_reporter_name_len = strlen(health_reporter_name); - req->health_reporter_name = malloc(req->_present.health_reporter_name_len + 1); - memcpy(req->health_reporter_name, health_reporter_name, req->_present.health_reporter_name_len); - req->health_reporter_name[req->_present.health_reporter_name_len] = 0; -} -static inline void -devlink_health_reporter_set_req_set_health_reporter_graceful_period(struct devlink_health_reporter_set_req *req, - __u64 health_reporter_graceful_period) -{ - req->_present.health_reporter_graceful_period = 1; - req->health_reporter_graceful_period = health_reporter_graceful_period; -} -static inline void -devlink_health_reporter_set_req_set_health_reporter_auto_recover(struct devlink_health_reporter_set_req *req, - __u8 health_reporter_auto_recover) -{ - req->_present.health_reporter_auto_recover = 1; - req->health_reporter_auto_recover = health_reporter_auto_recover; -} -static inline void -devlink_health_reporter_set_req_set_health_reporter_auto_dump(struct devlink_health_reporter_set_req *req, - __u8 health_reporter_auto_dump) -{ - req->_present.health_reporter_auto_dump = 1; - req->health_reporter_auto_dump = health_reporter_auto_dump; -} - -/* - * Set health reporter instances. - */ -int devlink_health_reporter_set(struct ynl_sock *ys, - struct devlink_health_reporter_set_req *req); - -/* ============== DEVLINK_CMD_HEALTH_REPORTER_RECOVER ============== */ -/* DEVLINK_CMD_HEALTH_REPORTER_RECOVER - do */ -struct devlink_health_reporter_recover_req { - struct { - __u32 bus_name_len; - __u32 dev_name_len; - __u32 port_index:1; - __u32 health_reporter_name_len; - } _present; - - char *bus_name; - char *dev_name; - __u32 port_index; - char *health_reporter_name; -}; - -static inline struct devlink_health_reporter_recover_req * -devlink_health_reporter_recover_req_alloc(void) -{ - return calloc(1, sizeof(struct devlink_health_reporter_recover_req)); -} -void -devlink_health_reporter_recover_req_free(struct devlink_health_reporter_recover_req *req); - -static inline void -devlink_health_reporter_recover_req_set_bus_name(struct devlink_health_reporter_recover_req *req, - const char *bus_name) -{ - free(req->bus_name); - req->_present.bus_name_len = strlen(bus_name); - req->bus_name = malloc(req->_present.bus_name_len + 1); - memcpy(req->bus_name, bus_name, req->_present.bus_name_len); - req->bus_name[req->_present.bus_name_len] = 0; -} -static inline void -devlink_health_reporter_recover_req_set_dev_name(struct devlink_health_reporter_recover_req *req, - const char *dev_name) -{ - free(req->dev_name); - req->_present.dev_name_len = strlen(dev_name); - req->dev_name = malloc(req->_present.dev_name_len + 1); - memcpy(req->dev_name, dev_name, req->_present.dev_name_len); - req->dev_name[req->_present.dev_name_len] = 0; -} -static inline void -devlink_health_reporter_recover_req_set_port_index(struct devlink_health_reporter_recover_req *req, - __u32 port_index) -{ - req->_present.port_index = 1; - req->port_index = port_index; -} -static inline void -devlink_health_reporter_recover_req_set_health_reporter_name(struct devlink_health_reporter_recover_req *req, - const char *health_reporter_name) -{ - free(req->health_reporter_name); - req->_present.health_reporter_name_len = strlen(health_reporter_name); - req->health_reporter_name = malloc(req->_present.health_reporter_name_len + 1); - memcpy(req->health_reporter_name, health_reporter_name, req->_present.health_reporter_name_len); - req->health_reporter_name[req->_present.health_reporter_name_len] = 0; -} - -/* - * Recover health reporter instances. - */ -int devlink_health_reporter_recover(struct ynl_sock *ys, - struct devlink_health_reporter_recover_req *req); - -/* ============== DEVLINK_CMD_HEALTH_REPORTER_DIAGNOSE ============== */ -/* DEVLINK_CMD_HEALTH_REPORTER_DIAGNOSE - do */ -struct devlink_health_reporter_diagnose_req { - struct { - __u32 bus_name_len; - __u32 dev_name_len; - __u32 port_index:1; - __u32 health_reporter_name_len; - } _present; - - char *bus_name; - char *dev_name; - __u32 port_index; - char *health_reporter_name; -}; - -static inline struct devlink_health_reporter_diagnose_req * -devlink_health_reporter_diagnose_req_alloc(void) -{ - return calloc(1, sizeof(struct devlink_health_reporter_diagnose_req)); -} -void -devlink_health_reporter_diagnose_req_free(struct devlink_health_reporter_diagnose_req *req); - -static inline void -devlink_health_reporter_diagnose_req_set_bus_name(struct devlink_health_reporter_diagnose_req *req, - const char *bus_name) -{ - free(req->bus_name); - req->_present.bus_name_len = strlen(bus_name); - req->bus_name = malloc(req->_present.bus_name_len + 1); - memcpy(req->bus_name, bus_name, req->_present.bus_name_len); - req->bus_name[req->_present.bus_name_len] = 0; -} -static inline void -devlink_health_reporter_diagnose_req_set_dev_name(struct devlink_health_reporter_diagnose_req *req, - const char *dev_name) -{ - free(req->dev_name); - req->_present.dev_name_len = strlen(dev_name); - req->dev_name = malloc(req->_present.dev_name_len + 1); - memcpy(req->dev_name, dev_name, req->_present.dev_name_len); - req->dev_name[req->_present.dev_name_len] = 0; -} -static inline void -devlink_health_reporter_diagnose_req_set_port_index(struct devlink_health_reporter_diagnose_req *req, - __u32 port_index) -{ - req->_present.port_index = 1; - req->port_index = port_index; -} -static inline void -devlink_health_reporter_diagnose_req_set_health_reporter_name(struct devlink_health_reporter_diagnose_req *req, - const char *health_reporter_name) -{ - free(req->health_reporter_name); - req->_present.health_reporter_name_len = strlen(health_reporter_name); - req->health_reporter_name = malloc(req->_present.health_reporter_name_len + 1); - memcpy(req->health_reporter_name, health_reporter_name, req->_present.health_reporter_name_len); - req->health_reporter_name[req->_present.health_reporter_name_len] = 0; -} - -/* - * Diagnose health reporter instances. - */ -int devlink_health_reporter_diagnose(struct ynl_sock *ys, - struct devlink_health_reporter_diagnose_req *req); - -/* ============== DEVLINK_CMD_HEALTH_REPORTER_DUMP_GET ============== */ -/* DEVLINK_CMD_HEALTH_REPORTER_DUMP_GET - dump */ -struct devlink_health_reporter_dump_get_req_dump { - struct { - __u32 bus_name_len; - __u32 dev_name_len; - __u32 port_index:1; - __u32 health_reporter_name_len; - } _present; - - char *bus_name; - char *dev_name; - __u32 port_index; - char *health_reporter_name; -}; - -static inline struct devlink_health_reporter_dump_get_req_dump * -devlink_health_reporter_dump_get_req_dump_alloc(void) -{ - return calloc(1, sizeof(struct devlink_health_reporter_dump_get_req_dump)); -} -void -devlink_health_reporter_dump_get_req_dump_free(struct devlink_health_reporter_dump_get_req_dump *req); - -static inline void -devlink_health_reporter_dump_get_req_dump_set_bus_name(struct devlink_health_reporter_dump_get_req_dump *req, - const char *bus_name) -{ - free(req->bus_name); - req->_present.bus_name_len = strlen(bus_name); - req->bus_name = malloc(req->_present.bus_name_len + 1); - memcpy(req->bus_name, bus_name, req->_present.bus_name_len); - req->bus_name[req->_present.bus_name_len] = 0; -} -static inline void -devlink_health_reporter_dump_get_req_dump_set_dev_name(struct devlink_health_reporter_dump_get_req_dump *req, - const char *dev_name) -{ - free(req->dev_name); - req->_present.dev_name_len = strlen(dev_name); - req->dev_name = malloc(req->_present.dev_name_len + 1); - memcpy(req->dev_name, dev_name, req->_present.dev_name_len); - req->dev_name[req->_present.dev_name_len] = 0; -} -static inline void -devlink_health_reporter_dump_get_req_dump_set_port_index(struct devlink_health_reporter_dump_get_req_dump *req, - __u32 port_index) -{ - req->_present.port_index = 1; - req->port_index = port_index; -} -static inline void -devlink_health_reporter_dump_get_req_dump_set_health_reporter_name(struct devlink_health_reporter_dump_get_req_dump *req, - const char *health_reporter_name) -{ - free(req->health_reporter_name); - req->_present.health_reporter_name_len = strlen(health_reporter_name); - req->health_reporter_name = malloc(req->_present.health_reporter_name_len + 1); - memcpy(req->health_reporter_name, health_reporter_name, req->_present.health_reporter_name_len); - req->health_reporter_name[req->_present.health_reporter_name_len] = 0; -} - -struct devlink_health_reporter_dump_get_rsp_dump { - struct { - __u32 fmsg:1; - } _present; - - struct devlink_dl_fmsg fmsg; -}; - -struct devlink_health_reporter_dump_get_rsp_list { - struct devlink_health_reporter_dump_get_rsp_list *next; - struct devlink_health_reporter_dump_get_rsp_dump obj __attribute__((aligned(8))); -}; - -void -devlink_health_reporter_dump_get_rsp_list_free(struct devlink_health_reporter_dump_get_rsp_list *rsp); - -struct devlink_health_reporter_dump_get_rsp_list * -devlink_health_reporter_dump_get_dump(struct ynl_sock *ys, - struct devlink_health_reporter_dump_get_req_dump *req); - -/* ============== DEVLINK_CMD_HEALTH_REPORTER_DUMP_CLEAR ============== */ -/* DEVLINK_CMD_HEALTH_REPORTER_DUMP_CLEAR - do */ -struct devlink_health_reporter_dump_clear_req { - struct { - __u32 bus_name_len; - __u32 dev_name_len; - __u32 port_index:1; - __u32 health_reporter_name_len; - } _present; - - char *bus_name; - char *dev_name; - __u32 port_index; - char *health_reporter_name; -}; - -static inline struct devlink_health_reporter_dump_clear_req * -devlink_health_reporter_dump_clear_req_alloc(void) -{ - return calloc(1, sizeof(struct devlink_health_reporter_dump_clear_req)); -} -void -devlink_health_reporter_dump_clear_req_free(struct devlink_health_reporter_dump_clear_req *req); - -static inline void -devlink_health_reporter_dump_clear_req_set_bus_name(struct devlink_health_reporter_dump_clear_req *req, - const char *bus_name) -{ - free(req->bus_name); - req->_present.bus_name_len = strlen(bus_name); - req->bus_name = malloc(req->_present.bus_name_len + 1); - memcpy(req->bus_name, bus_name, req->_present.bus_name_len); - req->bus_name[req->_present.bus_name_len] = 0; -} -static inline void -devlink_health_reporter_dump_clear_req_set_dev_name(struct devlink_health_reporter_dump_clear_req *req, - const char *dev_name) -{ - free(req->dev_name); - req->_present.dev_name_len = strlen(dev_name); - req->dev_name = malloc(req->_present.dev_name_len + 1); - memcpy(req->dev_name, dev_name, req->_present.dev_name_len); - req->dev_name[req->_present.dev_name_len] = 0; -} -static inline void -devlink_health_reporter_dump_clear_req_set_port_index(struct devlink_health_reporter_dump_clear_req *req, - __u32 port_index) -{ - req->_present.port_index = 1; - req->port_index = port_index; -} -static inline void -devlink_health_reporter_dump_clear_req_set_health_reporter_name(struct devlink_health_reporter_dump_clear_req *req, - const char *health_reporter_name) -{ - free(req->health_reporter_name); - req->_present.health_reporter_name_len = strlen(health_reporter_name); - req->health_reporter_name = malloc(req->_present.health_reporter_name_len + 1); - memcpy(req->health_reporter_name, health_reporter_name, req->_present.health_reporter_name_len); - req->health_reporter_name[req->_present.health_reporter_name_len] = 0; -} - -/* - * Clear dump of health reporter instances. - */ -int devlink_health_reporter_dump_clear(struct ynl_sock *ys, - struct devlink_health_reporter_dump_clear_req *req); - -/* ============== DEVLINK_CMD_FLASH_UPDATE ============== */ -/* DEVLINK_CMD_FLASH_UPDATE - do */ -struct devlink_flash_update_req { - struct { - __u32 bus_name_len; - __u32 dev_name_len; - __u32 flash_update_file_name_len; - __u32 flash_update_component_len; - __u32 flash_update_overwrite_mask:1; - } _present; - - char *bus_name; - char *dev_name; - char *flash_update_file_name; - char *flash_update_component; - struct nla_bitfield32 flash_update_overwrite_mask; -}; - -static inline struct devlink_flash_update_req * -devlink_flash_update_req_alloc(void) -{ - return calloc(1, sizeof(struct devlink_flash_update_req)); -} -void devlink_flash_update_req_free(struct devlink_flash_update_req *req); - -static inline void -devlink_flash_update_req_set_bus_name(struct devlink_flash_update_req *req, - const char *bus_name) -{ - free(req->bus_name); - req->_present.bus_name_len = strlen(bus_name); - req->bus_name = malloc(req->_present.bus_name_len + 1); - memcpy(req->bus_name, bus_name, req->_present.bus_name_len); - req->bus_name[req->_present.bus_name_len] = 0; -} -static inline void -devlink_flash_update_req_set_dev_name(struct devlink_flash_update_req *req, - const char *dev_name) -{ - free(req->dev_name); - req->_present.dev_name_len = strlen(dev_name); - req->dev_name = malloc(req->_present.dev_name_len + 1); - memcpy(req->dev_name, dev_name, req->_present.dev_name_len); - req->dev_name[req->_present.dev_name_len] = 0; -} -static inline void -devlink_flash_update_req_set_flash_update_file_name(struct devlink_flash_update_req *req, - const char *flash_update_file_name) -{ - free(req->flash_update_file_name); - req->_present.flash_update_file_name_len = strlen(flash_update_file_name); - req->flash_update_file_name = malloc(req->_present.flash_update_file_name_len + 1); - memcpy(req->flash_update_file_name, flash_update_file_name, req->_present.flash_update_file_name_len); - req->flash_update_file_name[req->_present.flash_update_file_name_len] = 0; -} -static inline void -devlink_flash_update_req_set_flash_update_component(struct devlink_flash_update_req *req, - const char *flash_update_component) -{ - free(req->flash_update_component); - req->_present.flash_update_component_len = strlen(flash_update_component); - req->flash_update_component = malloc(req->_present.flash_update_component_len + 1); - memcpy(req->flash_update_component, flash_update_component, req->_present.flash_update_component_len); - req->flash_update_component[req->_present.flash_update_component_len] = 0; -} -static inline void -devlink_flash_update_req_set_flash_update_overwrite_mask(struct devlink_flash_update_req *req, - struct nla_bitfield32 *flash_update_overwrite_mask) -{ - req->_present.flash_update_overwrite_mask = 1; - memcpy(&req->flash_update_overwrite_mask, flash_update_overwrite_mask, sizeof(struct nla_bitfield32)); -} - -/* - * Flash update devlink instances. - */ -int devlink_flash_update(struct ynl_sock *ys, - struct devlink_flash_update_req *req); - -/* ============== DEVLINK_CMD_TRAP_GET ============== */ -/* DEVLINK_CMD_TRAP_GET - do */ -struct devlink_trap_get_req { - struct { - __u32 bus_name_len; - __u32 dev_name_len; - __u32 trap_name_len; - } _present; - - char *bus_name; - char *dev_name; - char *trap_name; -}; - -static inline struct devlink_trap_get_req *devlink_trap_get_req_alloc(void) -{ - return calloc(1, sizeof(struct devlink_trap_get_req)); -} -void devlink_trap_get_req_free(struct devlink_trap_get_req *req); - -static inline void -devlink_trap_get_req_set_bus_name(struct devlink_trap_get_req *req, - const char *bus_name) -{ - free(req->bus_name); - req->_present.bus_name_len = strlen(bus_name); - req->bus_name = malloc(req->_present.bus_name_len + 1); - memcpy(req->bus_name, bus_name, req->_present.bus_name_len); - req->bus_name[req->_present.bus_name_len] = 0; -} -static inline void -devlink_trap_get_req_set_dev_name(struct devlink_trap_get_req *req, - const char *dev_name) -{ - free(req->dev_name); - req->_present.dev_name_len = strlen(dev_name); - req->dev_name = malloc(req->_present.dev_name_len + 1); - memcpy(req->dev_name, dev_name, req->_present.dev_name_len); - req->dev_name[req->_present.dev_name_len] = 0; -} -static inline void -devlink_trap_get_req_set_trap_name(struct devlink_trap_get_req *req, - const char *trap_name) -{ - free(req->trap_name); - req->_present.trap_name_len = strlen(trap_name); - req->trap_name = malloc(req->_present.trap_name_len + 1); - memcpy(req->trap_name, trap_name, req->_present.trap_name_len); - req->trap_name[req->_present.trap_name_len] = 0; -} - -struct devlink_trap_get_rsp { - struct { - __u32 bus_name_len; - __u32 dev_name_len; - __u32 trap_name_len; - } _present; - - char *bus_name; - char *dev_name; - char *trap_name; -}; - -void devlink_trap_get_rsp_free(struct devlink_trap_get_rsp *rsp); - -/* - * Get trap instances. - */ -struct devlink_trap_get_rsp * -devlink_trap_get(struct ynl_sock *ys, struct devlink_trap_get_req *req); - -/* DEVLINK_CMD_TRAP_GET - dump */ -struct devlink_trap_get_req_dump { - struct { - __u32 bus_name_len; - __u32 dev_name_len; - } _present; - - char *bus_name; - char *dev_name; -}; - -static inline struct devlink_trap_get_req_dump * -devlink_trap_get_req_dump_alloc(void) -{ - return calloc(1, sizeof(struct devlink_trap_get_req_dump)); -} -void devlink_trap_get_req_dump_free(struct devlink_trap_get_req_dump *req); - -static inline void -devlink_trap_get_req_dump_set_bus_name(struct devlink_trap_get_req_dump *req, - const char *bus_name) -{ - free(req->bus_name); - req->_present.bus_name_len = strlen(bus_name); - req->bus_name = malloc(req->_present.bus_name_len + 1); - memcpy(req->bus_name, bus_name, req->_present.bus_name_len); - req->bus_name[req->_present.bus_name_len] = 0; -} -static inline void -devlink_trap_get_req_dump_set_dev_name(struct devlink_trap_get_req_dump *req, - const char *dev_name) -{ - free(req->dev_name); - req->_present.dev_name_len = strlen(dev_name); - req->dev_name = malloc(req->_present.dev_name_len + 1); - memcpy(req->dev_name, dev_name, req->_present.dev_name_len); - req->dev_name[req->_present.dev_name_len] = 0; -} - -struct devlink_trap_get_list { - struct devlink_trap_get_list *next; - struct devlink_trap_get_rsp obj __attribute__((aligned(8))); -}; - -void devlink_trap_get_list_free(struct devlink_trap_get_list *rsp); - -struct devlink_trap_get_list * -devlink_trap_get_dump(struct ynl_sock *ys, - struct devlink_trap_get_req_dump *req); - -/* ============== DEVLINK_CMD_TRAP_SET ============== */ -/* DEVLINK_CMD_TRAP_SET - do */ -struct devlink_trap_set_req { - struct { - __u32 bus_name_len; - __u32 dev_name_len; - __u32 trap_name_len; - __u32 trap_action:1; - } _present; - - char *bus_name; - char *dev_name; - char *trap_name; - enum devlink_trap_action trap_action; -}; - -static inline struct devlink_trap_set_req *devlink_trap_set_req_alloc(void) -{ - return calloc(1, sizeof(struct devlink_trap_set_req)); -} -void devlink_trap_set_req_free(struct devlink_trap_set_req *req); - -static inline void -devlink_trap_set_req_set_bus_name(struct devlink_trap_set_req *req, - const char *bus_name) -{ - free(req->bus_name); - req->_present.bus_name_len = strlen(bus_name); - req->bus_name = malloc(req->_present.bus_name_len + 1); - memcpy(req->bus_name, bus_name, req->_present.bus_name_len); - req->bus_name[req->_present.bus_name_len] = 0; -} -static inline void -devlink_trap_set_req_set_dev_name(struct devlink_trap_set_req *req, - const char *dev_name) -{ - free(req->dev_name); - req->_present.dev_name_len = strlen(dev_name); - req->dev_name = malloc(req->_present.dev_name_len + 1); - memcpy(req->dev_name, dev_name, req->_present.dev_name_len); - req->dev_name[req->_present.dev_name_len] = 0; -} -static inline void -devlink_trap_set_req_set_trap_name(struct devlink_trap_set_req *req, - const char *trap_name) -{ - free(req->trap_name); - req->_present.trap_name_len = strlen(trap_name); - req->trap_name = malloc(req->_present.trap_name_len + 1); - memcpy(req->trap_name, trap_name, req->_present.trap_name_len); - req->trap_name[req->_present.trap_name_len] = 0; -} -static inline void -devlink_trap_set_req_set_trap_action(struct devlink_trap_set_req *req, - enum devlink_trap_action trap_action) -{ - req->_present.trap_action = 1; - req->trap_action = trap_action; -} - -/* - * Set trap instances. - */ -int devlink_trap_set(struct ynl_sock *ys, struct devlink_trap_set_req *req); - -/* ============== DEVLINK_CMD_TRAP_GROUP_GET ============== */ -/* DEVLINK_CMD_TRAP_GROUP_GET - do */ -struct devlink_trap_group_get_req { - struct { - __u32 bus_name_len; - __u32 dev_name_len; - __u32 trap_group_name_len; - } _present; - - char *bus_name; - char *dev_name; - char *trap_group_name; -}; - -static inline struct devlink_trap_group_get_req * -devlink_trap_group_get_req_alloc(void) -{ - return calloc(1, sizeof(struct devlink_trap_group_get_req)); -} -void devlink_trap_group_get_req_free(struct devlink_trap_group_get_req *req); - -static inline void -devlink_trap_group_get_req_set_bus_name(struct devlink_trap_group_get_req *req, - const char *bus_name) -{ - free(req->bus_name); - req->_present.bus_name_len = strlen(bus_name); - req->bus_name = malloc(req->_present.bus_name_len + 1); - memcpy(req->bus_name, bus_name, req->_present.bus_name_len); - req->bus_name[req->_present.bus_name_len] = 0; -} -static inline void -devlink_trap_group_get_req_set_dev_name(struct devlink_trap_group_get_req *req, - const char *dev_name) -{ - free(req->dev_name); - req->_present.dev_name_len = strlen(dev_name); - req->dev_name = malloc(req->_present.dev_name_len + 1); - memcpy(req->dev_name, dev_name, req->_present.dev_name_len); - req->dev_name[req->_present.dev_name_len] = 0; -} -static inline void -devlink_trap_group_get_req_set_trap_group_name(struct devlink_trap_group_get_req *req, - const char *trap_group_name) -{ - free(req->trap_group_name); - req->_present.trap_group_name_len = strlen(trap_group_name); - req->trap_group_name = malloc(req->_present.trap_group_name_len + 1); - memcpy(req->trap_group_name, trap_group_name, req->_present.trap_group_name_len); - req->trap_group_name[req->_present.trap_group_name_len] = 0; -} - -struct devlink_trap_group_get_rsp { - struct { - __u32 bus_name_len; - __u32 dev_name_len; - __u32 trap_group_name_len; - } _present; - - char *bus_name; - char *dev_name; - char *trap_group_name; -}; - -void devlink_trap_group_get_rsp_free(struct devlink_trap_group_get_rsp *rsp); - -/* - * Get trap group instances. - */ -struct devlink_trap_group_get_rsp * -devlink_trap_group_get(struct ynl_sock *ys, - struct devlink_trap_group_get_req *req); - -/* DEVLINK_CMD_TRAP_GROUP_GET - dump */ -struct devlink_trap_group_get_req_dump { - struct { - __u32 bus_name_len; - __u32 dev_name_len; - } _present; - - char *bus_name; - char *dev_name; -}; - -static inline struct devlink_trap_group_get_req_dump * -devlink_trap_group_get_req_dump_alloc(void) -{ - return calloc(1, sizeof(struct devlink_trap_group_get_req_dump)); -} -void -devlink_trap_group_get_req_dump_free(struct devlink_trap_group_get_req_dump *req); - -static inline void -devlink_trap_group_get_req_dump_set_bus_name(struct devlink_trap_group_get_req_dump *req, - const char *bus_name) -{ - free(req->bus_name); - req->_present.bus_name_len = strlen(bus_name); - req->bus_name = malloc(req->_present.bus_name_len + 1); - memcpy(req->bus_name, bus_name, req->_present.bus_name_len); - req->bus_name[req->_present.bus_name_len] = 0; -} -static inline void -devlink_trap_group_get_req_dump_set_dev_name(struct devlink_trap_group_get_req_dump *req, - const char *dev_name) -{ - free(req->dev_name); - req->_present.dev_name_len = strlen(dev_name); - req->dev_name = malloc(req->_present.dev_name_len + 1); - memcpy(req->dev_name, dev_name, req->_present.dev_name_len); - req->dev_name[req->_present.dev_name_len] = 0; -} - -struct devlink_trap_group_get_list { - struct devlink_trap_group_get_list *next; - struct devlink_trap_group_get_rsp obj __attribute__((aligned(8))); -}; - -void devlink_trap_group_get_list_free(struct devlink_trap_group_get_list *rsp); - -struct devlink_trap_group_get_list * -devlink_trap_group_get_dump(struct ynl_sock *ys, - struct devlink_trap_group_get_req_dump *req); - -/* ============== DEVLINK_CMD_TRAP_GROUP_SET ============== */ -/* DEVLINK_CMD_TRAP_GROUP_SET - do */ -struct devlink_trap_group_set_req { - struct { - __u32 bus_name_len; - __u32 dev_name_len; - __u32 trap_group_name_len; - __u32 trap_action:1; - __u32 trap_policer_id:1; - } _present; - - char *bus_name; - char *dev_name; - char *trap_group_name; - enum devlink_trap_action trap_action; - __u32 trap_policer_id; -}; - -static inline struct devlink_trap_group_set_req * -devlink_trap_group_set_req_alloc(void) -{ - return calloc(1, sizeof(struct devlink_trap_group_set_req)); -} -void devlink_trap_group_set_req_free(struct devlink_trap_group_set_req *req); - -static inline void -devlink_trap_group_set_req_set_bus_name(struct devlink_trap_group_set_req *req, - const char *bus_name) -{ - free(req->bus_name); - req->_present.bus_name_len = strlen(bus_name); - req->bus_name = malloc(req->_present.bus_name_len + 1); - memcpy(req->bus_name, bus_name, req->_present.bus_name_len); - req->bus_name[req->_present.bus_name_len] = 0; -} -static inline void -devlink_trap_group_set_req_set_dev_name(struct devlink_trap_group_set_req *req, - const char *dev_name) -{ - free(req->dev_name); - req->_present.dev_name_len = strlen(dev_name); - req->dev_name = malloc(req->_present.dev_name_len + 1); - memcpy(req->dev_name, dev_name, req->_present.dev_name_len); - req->dev_name[req->_present.dev_name_len] = 0; -} -static inline void -devlink_trap_group_set_req_set_trap_group_name(struct devlink_trap_group_set_req *req, - const char *trap_group_name) -{ - free(req->trap_group_name); - req->_present.trap_group_name_len = strlen(trap_group_name); - req->trap_group_name = malloc(req->_present.trap_group_name_len + 1); - memcpy(req->trap_group_name, trap_group_name, req->_present.trap_group_name_len); - req->trap_group_name[req->_present.trap_group_name_len] = 0; -} -static inline void -devlink_trap_group_set_req_set_trap_action(struct devlink_trap_group_set_req *req, - enum devlink_trap_action trap_action) -{ - req->_present.trap_action = 1; - req->trap_action = trap_action; -} -static inline void -devlink_trap_group_set_req_set_trap_policer_id(struct devlink_trap_group_set_req *req, - __u32 trap_policer_id) -{ - req->_present.trap_policer_id = 1; - req->trap_policer_id = trap_policer_id; -} - -/* - * Set trap group instances. - */ -int devlink_trap_group_set(struct ynl_sock *ys, - struct devlink_trap_group_set_req *req); - -/* ============== DEVLINK_CMD_TRAP_POLICER_GET ============== */ -/* DEVLINK_CMD_TRAP_POLICER_GET - do */ -struct devlink_trap_policer_get_req { - struct { - __u32 bus_name_len; - __u32 dev_name_len; - __u32 trap_policer_id:1; - } _present; - - char *bus_name; - char *dev_name; - __u32 trap_policer_id; -}; - -static inline struct devlink_trap_policer_get_req * -devlink_trap_policer_get_req_alloc(void) -{ - return calloc(1, sizeof(struct devlink_trap_policer_get_req)); -} -void -devlink_trap_policer_get_req_free(struct devlink_trap_policer_get_req *req); - -static inline void -devlink_trap_policer_get_req_set_bus_name(struct devlink_trap_policer_get_req *req, - const char *bus_name) -{ - free(req->bus_name); - req->_present.bus_name_len = strlen(bus_name); - req->bus_name = malloc(req->_present.bus_name_len + 1); - memcpy(req->bus_name, bus_name, req->_present.bus_name_len); - req->bus_name[req->_present.bus_name_len] = 0; -} -static inline void -devlink_trap_policer_get_req_set_dev_name(struct devlink_trap_policer_get_req *req, - const char *dev_name) -{ - free(req->dev_name); - req->_present.dev_name_len = strlen(dev_name); - req->dev_name = malloc(req->_present.dev_name_len + 1); - memcpy(req->dev_name, dev_name, req->_present.dev_name_len); - req->dev_name[req->_present.dev_name_len] = 0; -} -static inline void -devlink_trap_policer_get_req_set_trap_policer_id(struct devlink_trap_policer_get_req *req, - __u32 trap_policer_id) -{ - req->_present.trap_policer_id = 1; - req->trap_policer_id = trap_policer_id; -} - -struct devlink_trap_policer_get_rsp { - struct { - __u32 bus_name_len; - __u32 dev_name_len; - __u32 trap_policer_id:1; - } _present; - - char *bus_name; - char *dev_name; - __u32 trap_policer_id; -}; - -void -devlink_trap_policer_get_rsp_free(struct devlink_trap_policer_get_rsp *rsp); - -/* - * Get trap policer instances. - */ -struct devlink_trap_policer_get_rsp * -devlink_trap_policer_get(struct ynl_sock *ys, - struct devlink_trap_policer_get_req *req); - -/* DEVLINK_CMD_TRAP_POLICER_GET - dump */ -struct devlink_trap_policer_get_req_dump { - struct { - __u32 bus_name_len; - __u32 dev_name_len; - } _present; - - char *bus_name; - char *dev_name; -}; - -static inline struct devlink_trap_policer_get_req_dump * -devlink_trap_policer_get_req_dump_alloc(void) -{ - return calloc(1, sizeof(struct devlink_trap_policer_get_req_dump)); -} -void -devlink_trap_policer_get_req_dump_free(struct devlink_trap_policer_get_req_dump *req); - -static inline void -devlink_trap_policer_get_req_dump_set_bus_name(struct devlink_trap_policer_get_req_dump *req, - const char *bus_name) -{ - free(req->bus_name); - req->_present.bus_name_len = strlen(bus_name); - req->bus_name = malloc(req->_present.bus_name_len + 1); - memcpy(req->bus_name, bus_name, req->_present.bus_name_len); - req->bus_name[req->_present.bus_name_len] = 0; -} -static inline void -devlink_trap_policer_get_req_dump_set_dev_name(struct devlink_trap_policer_get_req_dump *req, - const char *dev_name) -{ - free(req->dev_name); - req->_present.dev_name_len = strlen(dev_name); - req->dev_name = malloc(req->_present.dev_name_len + 1); - memcpy(req->dev_name, dev_name, req->_present.dev_name_len); - req->dev_name[req->_present.dev_name_len] = 0; -} - -struct devlink_trap_policer_get_list { - struct devlink_trap_policer_get_list *next; - struct devlink_trap_policer_get_rsp obj __attribute__((aligned(8))); -}; - -void -devlink_trap_policer_get_list_free(struct devlink_trap_policer_get_list *rsp); - -struct devlink_trap_policer_get_list * -devlink_trap_policer_get_dump(struct ynl_sock *ys, - struct devlink_trap_policer_get_req_dump *req); - -/* ============== DEVLINK_CMD_TRAP_POLICER_SET ============== */ -/* DEVLINK_CMD_TRAP_POLICER_SET - do */ -struct devlink_trap_policer_set_req { - struct { - __u32 bus_name_len; - __u32 dev_name_len; - __u32 trap_policer_id:1; - __u32 trap_policer_rate:1; - __u32 trap_policer_burst:1; - } _present; - - char *bus_name; - char *dev_name; - __u32 trap_policer_id; - __u64 trap_policer_rate; - __u64 trap_policer_burst; -}; - -static inline struct devlink_trap_policer_set_req * -devlink_trap_policer_set_req_alloc(void) -{ - return calloc(1, sizeof(struct devlink_trap_policer_set_req)); -} -void -devlink_trap_policer_set_req_free(struct devlink_trap_policer_set_req *req); - -static inline void -devlink_trap_policer_set_req_set_bus_name(struct devlink_trap_policer_set_req *req, - const char *bus_name) -{ - free(req->bus_name); - req->_present.bus_name_len = strlen(bus_name); - req->bus_name = malloc(req->_present.bus_name_len + 1); - memcpy(req->bus_name, bus_name, req->_present.bus_name_len); - req->bus_name[req->_present.bus_name_len] = 0; -} -static inline void -devlink_trap_policer_set_req_set_dev_name(struct devlink_trap_policer_set_req *req, - const char *dev_name) -{ - free(req->dev_name); - req->_present.dev_name_len = strlen(dev_name); - req->dev_name = malloc(req->_present.dev_name_len + 1); - memcpy(req->dev_name, dev_name, req->_present.dev_name_len); - req->dev_name[req->_present.dev_name_len] = 0; -} -static inline void -devlink_trap_policer_set_req_set_trap_policer_id(struct devlink_trap_policer_set_req *req, - __u32 trap_policer_id) -{ - req->_present.trap_policer_id = 1; - req->trap_policer_id = trap_policer_id; -} -static inline void -devlink_trap_policer_set_req_set_trap_policer_rate(struct devlink_trap_policer_set_req *req, - __u64 trap_policer_rate) -{ - req->_present.trap_policer_rate = 1; - req->trap_policer_rate = trap_policer_rate; -} -static inline void -devlink_trap_policer_set_req_set_trap_policer_burst(struct devlink_trap_policer_set_req *req, - __u64 trap_policer_burst) -{ - req->_present.trap_policer_burst = 1; - req->trap_policer_burst = trap_policer_burst; -} - -/* - * Get trap policer instances. - */ -int devlink_trap_policer_set(struct ynl_sock *ys, - struct devlink_trap_policer_set_req *req); - -/* ============== DEVLINK_CMD_HEALTH_REPORTER_TEST ============== */ -/* DEVLINK_CMD_HEALTH_REPORTER_TEST - do */ -struct devlink_health_reporter_test_req { - struct { - __u32 bus_name_len; - __u32 dev_name_len; - __u32 port_index:1; - __u32 health_reporter_name_len; - } _present; - - char *bus_name; - char *dev_name; - __u32 port_index; - char *health_reporter_name; -}; - -static inline struct devlink_health_reporter_test_req * -devlink_health_reporter_test_req_alloc(void) -{ - return calloc(1, sizeof(struct devlink_health_reporter_test_req)); -} -void -devlink_health_reporter_test_req_free(struct devlink_health_reporter_test_req *req); - -static inline void -devlink_health_reporter_test_req_set_bus_name(struct devlink_health_reporter_test_req *req, - const char *bus_name) -{ - free(req->bus_name); - req->_present.bus_name_len = strlen(bus_name); - req->bus_name = malloc(req->_present.bus_name_len + 1); - memcpy(req->bus_name, bus_name, req->_present.bus_name_len); - req->bus_name[req->_present.bus_name_len] = 0; -} -static inline void -devlink_health_reporter_test_req_set_dev_name(struct devlink_health_reporter_test_req *req, - const char *dev_name) -{ - free(req->dev_name); - req->_present.dev_name_len = strlen(dev_name); - req->dev_name = malloc(req->_present.dev_name_len + 1); - memcpy(req->dev_name, dev_name, req->_present.dev_name_len); - req->dev_name[req->_present.dev_name_len] = 0; -} -static inline void -devlink_health_reporter_test_req_set_port_index(struct devlink_health_reporter_test_req *req, - __u32 port_index) -{ - req->_present.port_index = 1; - req->port_index = port_index; -} -static inline void -devlink_health_reporter_test_req_set_health_reporter_name(struct devlink_health_reporter_test_req *req, - const char *health_reporter_name) -{ - free(req->health_reporter_name); - req->_present.health_reporter_name_len = strlen(health_reporter_name); - req->health_reporter_name = malloc(req->_present.health_reporter_name_len + 1); - memcpy(req->health_reporter_name, health_reporter_name, req->_present.health_reporter_name_len); - req->health_reporter_name[req->_present.health_reporter_name_len] = 0; -} - -/* - * Test health reporter instances. - */ -int devlink_health_reporter_test(struct ynl_sock *ys, - struct devlink_health_reporter_test_req *req); - -/* ============== DEVLINK_CMD_RATE_GET ============== */ -/* DEVLINK_CMD_RATE_GET - do */ -struct devlink_rate_get_req { - struct { - __u32 bus_name_len; - __u32 dev_name_len; - __u32 port_index:1; - __u32 rate_node_name_len; - } _present; - - char *bus_name; - char *dev_name; - __u32 port_index; - char *rate_node_name; -}; - -static inline struct devlink_rate_get_req *devlink_rate_get_req_alloc(void) -{ - return calloc(1, sizeof(struct devlink_rate_get_req)); -} -void devlink_rate_get_req_free(struct devlink_rate_get_req *req); - -static inline void -devlink_rate_get_req_set_bus_name(struct devlink_rate_get_req *req, - const char *bus_name) -{ - free(req->bus_name); - req->_present.bus_name_len = strlen(bus_name); - req->bus_name = malloc(req->_present.bus_name_len + 1); - memcpy(req->bus_name, bus_name, req->_present.bus_name_len); - req->bus_name[req->_present.bus_name_len] = 0; -} -static inline void -devlink_rate_get_req_set_dev_name(struct devlink_rate_get_req *req, - const char *dev_name) -{ - free(req->dev_name); - req->_present.dev_name_len = strlen(dev_name); - req->dev_name = malloc(req->_present.dev_name_len + 1); - memcpy(req->dev_name, dev_name, req->_present.dev_name_len); - req->dev_name[req->_present.dev_name_len] = 0; -} -static inline void -devlink_rate_get_req_set_port_index(struct devlink_rate_get_req *req, - __u32 port_index) -{ - req->_present.port_index = 1; - req->port_index = port_index; -} -static inline void -devlink_rate_get_req_set_rate_node_name(struct devlink_rate_get_req *req, - const char *rate_node_name) -{ - free(req->rate_node_name); - req->_present.rate_node_name_len = strlen(rate_node_name); - req->rate_node_name = malloc(req->_present.rate_node_name_len + 1); - memcpy(req->rate_node_name, rate_node_name, req->_present.rate_node_name_len); - req->rate_node_name[req->_present.rate_node_name_len] = 0; -} - -struct devlink_rate_get_rsp { - struct { - __u32 bus_name_len; - __u32 dev_name_len; - __u32 port_index:1; - __u32 rate_node_name_len; - } _present; - - char *bus_name; - char *dev_name; - __u32 port_index; - char *rate_node_name; -}; - -void devlink_rate_get_rsp_free(struct devlink_rate_get_rsp *rsp); - -/* - * Get rate instances. - */ -struct devlink_rate_get_rsp * -devlink_rate_get(struct ynl_sock *ys, struct devlink_rate_get_req *req); - -/* DEVLINK_CMD_RATE_GET - dump */ -struct devlink_rate_get_req_dump { - struct { - __u32 bus_name_len; - __u32 dev_name_len; - } _present; - - char *bus_name; - char *dev_name; -}; - -static inline struct devlink_rate_get_req_dump * -devlink_rate_get_req_dump_alloc(void) -{ - return calloc(1, sizeof(struct devlink_rate_get_req_dump)); -} -void devlink_rate_get_req_dump_free(struct devlink_rate_get_req_dump *req); - -static inline void -devlink_rate_get_req_dump_set_bus_name(struct devlink_rate_get_req_dump *req, - const char *bus_name) -{ - free(req->bus_name); - req->_present.bus_name_len = strlen(bus_name); - req->bus_name = malloc(req->_present.bus_name_len + 1); - memcpy(req->bus_name, bus_name, req->_present.bus_name_len); - req->bus_name[req->_present.bus_name_len] = 0; -} -static inline void -devlink_rate_get_req_dump_set_dev_name(struct devlink_rate_get_req_dump *req, - const char *dev_name) -{ - free(req->dev_name); - req->_present.dev_name_len = strlen(dev_name); - req->dev_name = malloc(req->_present.dev_name_len + 1); - memcpy(req->dev_name, dev_name, req->_present.dev_name_len); - req->dev_name[req->_present.dev_name_len] = 0; -} - -struct devlink_rate_get_list { - struct devlink_rate_get_list *next; - struct devlink_rate_get_rsp obj __attribute__((aligned(8))); -}; - -void devlink_rate_get_list_free(struct devlink_rate_get_list *rsp); - -struct devlink_rate_get_list * -devlink_rate_get_dump(struct ynl_sock *ys, - struct devlink_rate_get_req_dump *req); - -/* ============== DEVLINK_CMD_RATE_SET ============== */ -/* DEVLINK_CMD_RATE_SET - do */ -struct devlink_rate_set_req { - struct { - __u32 bus_name_len; - __u32 dev_name_len; - __u32 rate_node_name_len; - __u32 rate_tx_share:1; - __u32 rate_tx_max:1; - __u32 rate_tx_priority:1; - __u32 rate_tx_weight:1; - __u32 rate_parent_node_name_len; - } _present; - - char *bus_name; - char *dev_name; - char *rate_node_name; - __u64 rate_tx_share; - __u64 rate_tx_max; - __u32 rate_tx_priority; - __u32 rate_tx_weight; - char *rate_parent_node_name; -}; - -static inline struct devlink_rate_set_req *devlink_rate_set_req_alloc(void) -{ - return calloc(1, sizeof(struct devlink_rate_set_req)); -} -void devlink_rate_set_req_free(struct devlink_rate_set_req *req); - -static inline void -devlink_rate_set_req_set_bus_name(struct devlink_rate_set_req *req, - const char *bus_name) -{ - free(req->bus_name); - req->_present.bus_name_len = strlen(bus_name); - req->bus_name = malloc(req->_present.bus_name_len + 1); - memcpy(req->bus_name, bus_name, req->_present.bus_name_len); - req->bus_name[req->_present.bus_name_len] = 0; -} -static inline void -devlink_rate_set_req_set_dev_name(struct devlink_rate_set_req *req, - const char *dev_name) -{ - free(req->dev_name); - req->_present.dev_name_len = strlen(dev_name); - req->dev_name = malloc(req->_present.dev_name_len + 1); - memcpy(req->dev_name, dev_name, req->_present.dev_name_len); - req->dev_name[req->_present.dev_name_len] = 0; -} -static inline void -devlink_rate_set_req_set_rate_node_name(struct devlink_rate_set_req *req, - const char *rate_node_name) -{ - free(req->rate_node_name); - req->_present.rate_node_name_len = strlen(rate_node_name); - req->rate_node_name = malloc(req->_present.rate_node_name_len + 1); - memcpy(req->rate_node_name, rate_node_name, req->_present.rate_node_name_len); - req->rate_node_name[req->_present.rate_node_name_len] = 0; -} -static inline void -devlink_rate_set_req_set_rate_tx_share(struct devlink_rate_set_req *req, - __u64 rate_tx_share) -{ - req->_present.rate_tx_share = 1; - req->rate_tx_share = rate_tx_share; -} -static inline void -devlink_rate_set_req_set_rate_tx_max(struct devlink_rate_set_req *req, - __u64 rate_tx_max) -{ - req->_present.rate_tx_max = 1; - req->rate_tx_max = rate_tx_max; -} -static inline void -devlink_rate_set_req_set_rate_tx_priority(struct devlink_rate_set_req *req, - __u32 rate_tx_priority) -{ - req->_present.rate_tx_priority = 1; - req->rate_tx_priority = rate_tx_priority; -} -static inline void -devlink_rate_set_req_set_rate_tx_weight(struct devlink_rate_set_req *req, - __u32 rate_tx_weight) -{ - req->_present.rate_tx_weight = 1; - req->rate_tx_weight = rate_tx_weight; -} -static inline void -devlink_rate_set_req_set_rate_parent_node_name(struct devlink_rate_set_req *req, - const char *rate_parent_node_name) -{ - free(req->rate_parent_node_name); - req->_present.rate_parent_node_name_len = strlen(rate_parent_node_name); - req->rate_parent_node_name = malloc(req->_present.rate_parent_node_name_len + 1); - memcpy(req->rate_parent_node_name, rate_parent_node_name, req->_present.rate_parent_node_name_len); - req->rate_parent_node_name[req->_present.rate_parent_node_name_len] = 0; -} - -/* - * Set rate instances. - */ -int devlink_rate_set(struct ynl_sock *ys, struct devlink_rate_set_req *req); - -/* ============== DEVLINK_CMD_RATE_NEW ============== */ -/* DEVLINK_CMD_RATE_NEW - do */ -struct devlink_rate_new_req { - struct { - __u32 bus_name_len; - __u32 dev_name_len; - __u32 rate_node_name_len; - __u32 rate_tx_share:1; - __u32 rate_tx_max:1; - __u32 rate_tx_priority:1; - __u32 rate_tx_weight:1; - __u32 rate_parent_node_name_len; - } _present; - - char *bus_name; - char *dev_name; - char *rate_node_name; - __u64 rate_tx_share; - __u64 rate_tx_max; - __u32 rate_tx_priority; - __u32 rate_tx_weight; - char *rate_parent_node_name; -}; - -static inline struct devlink_rate_new_req *devlink_rate_new_req_alloc(void) -{ - return calloc(1, sizeof(struct devlink_rate_new_req)); -} -void devlink_rate_new_req_free(struct devlink_rate_new_req *req); - -static inline void -devlink_rate_new_req_set_bus_name(struct devlink_rate_new_req *req, - const char *bus_name) -{ - free(req->bus_name); - req->_present.bus_name_len = strlen(bus_name); - req->bus_name = malloc(req->_present.bus_name_len + 1); - memcpy(req->bus_name, bus_name, req->_present.bus_name_len); - req->bus_name[req->_present.bus_name_len] = 0; -} -static inline void -devlink_rate_new_req_set_dev_name(struct devlink_rate_new_req *req, - const char *dev_name) -{ - free(req->dev_name); - req->_present.dev_name_len = strlen(dev_name); - req->dev_name = malloc(req->_present.dev_name_len + 1); - memcpy(req->dev_name, dev_name, req->_present.dev_name_len); - req->dev_name[req->_present.dev_name_len] = 0; -} -static inline void -devlink_rate_new_req_set_rate_node_name(struct devlink_rate_new_req *req, - const char *rate_node_name) -{ - free(req->rate_node_name); - req->_present.rate_node_name_len = strlen(rate_node_name); - req->rate_node_name = malloc(req->_present.rate_node_name_len + 1); - memcpy(req->rate_node_name, rate_node_name, req->_present.rate_node_name_len); - req->rate_node_name[req->_present.rate_node_name_len] = 0; -} -static inline void -devlink_rate_new_req_set_rate_tx_share(struct devlink_rate_new_req *req, - __u64 rate_tx_share) -{ - req->_present.rate_tx_share = 1; - req->rate_tx_share = rate_tx_share; -} -static inline void -devlink_rate_new_req_set_rate_tx_max(struct devlink_rate_new_req *req, - __u64 rate_tx_max) -{ - req->_present.rate_tx_max = 1; - req->rate_tx_max = rate_tx_max; -} -static inline void -devlink_rate_new_req_set_rate_tx_priority(struct devlink_rate_new_req *req, - __u32 rate_tx_priority) -{ - req->_present.rate_tx_priority = 1; - req->rate_tx_priority = rate_tx_priority; -} -static inline void -devlink_rate_new_req_set_rate_tx_weight(struct devlink_rate_new_req *req, - __u32 rate_tx_weight) -{ - req->_present.rate_tx_weight = 1; - req->rate_tx_weight = rate_tx_weight; -} -static inline void -devlink_rate_new_req_set_rate_parent_node_name(struct devlink_rate_new_req *req, - const char *rate_parent_node_name) -{ - free(req->rate_parent_node_name); - req->_present.rate_parent_node_name_len = strlen(rate_parent_node_name); - req->rate_parent_node_name = malloc(req->_present.rate_parent_node_name_len + 1); - memcpy(req->rate_parent_node_name, rate_parent_node_name, req->_present.rate_parent_node_name_len); - req->rate_parent_node_name[req->_present.rate_parent_node_name_len] = 0; -} - -/* - * Create rate instances. - */ -int devlink_rate_new(struct ynl_sock *ys, struct devlink_rate_new_req *req); - -/* ============== DEVLINK_CMD_RATE_DEL ============== */ -/* DEVLINK_CMD_RATE_DEL - do */ -struct devlink_rate_del_req { - struct { - __u32 bus_name_len; - __u32 dev_name_len; - __u32 rate_node_name_len; - } _present; - - char *bus_name; - char *dev_name; - char *rate_node_name; -}; - -static inline struct devlink_rate_del_req *devlink_rate_del_req_alloc(void) -{ - return calloc(1, sizeof(struct devlink_rate_del_req)); -} -void devlink_rate_del_req_free(struct devlink_rate_del_req *req); - -static inline void -devlink_rate_del_req_set_bus_name(struct devlink_rate_del_req *req, - const char *bus_name) -{ - free(req->bus_name); - req->_present.bus_name_len = strlen(bus_name); - req->bus_name = malloc(req->_present.bus_name_len + 1); - memcpy(req->bus_name, bus_name, req->_present.bus_name_len); - req->bus_name[req->_present.bus_name_len] = 0; -} -static inline void -devlink_rate_del_req_set_dev_name(struct devlink_rate_del_req *req, - const char *dev_name) -{ - free(req->dev_name); - req->_present.dev_name_len = strlen(dev_name); - req->dev_name = malloc(req->_present.dev_name_len + 1); - memcpy(req->dev_name, dev_name, req->_present.dev_name_len); - req->dev_name[req->_present.dev_name_len] = 0; -} -static inline void -devlink_rate_del_req_set_rate_node_name(struct devlink_rate_del_req *req, - const char *rate_node_name) -{ - free(req->rate_node_name); - req->_present.rate_node_name_len = strlen(rate_node_name); - req->rate_node_name = malloc(req->_present.rate_node_name_len + 1); - memcpy(req->rate_node_name, rate_node_name, req->_present.rate_node_name_len); - req->rate_node_name[req->_present.rate_node_name_len] = 0; -} - -/* - * Delete rate instances. - */ -int devlink_rate_del(struct ynl_sock *ys, struct devlink_rate_del_req *req); - -/* ============== DEVLINK_CMD_LINECARD_GET ============== */ -/* DEVLINK_CMD_LINECARD_GET - do */ -struct devlink_linecard_get_req { - struct { - __u32 bus_name_len; - __u32 dev_name_len; - __u32 linecard_index:1; - } _present; - - char *bus_name; - char *dev_name; - __u32 linecard_index; -}; - -static inline struct devlink_linecard_get_req * -devlink_linecard_get_req_alloc(void) -{ - return calloc(1, sizeof(struct devlink_linecard_get_req)); -} -void devlink_linecard_get_req_free(struct devlink_linecard_get_req *req); - -static inline void -devlink_linecard_get_req_set_bus_name(struct devlink_linecard_get_req *req, - const char *bus_name) -{ - free(req->bus_name); - req->_present.bus_name_len = strlen(bus_name); - req->bus_name = malloc(req->_present.bus_name_len + 1); - memcpy(req->bus_name, bus_name, req->_present.bus_name_len); - req->bus_name[req->_present.bus_name_len] = 0; -} -static inline void -devlink_linecard_get_req_set_dev_name(struct devlink_linecard_get_req *req, - const char *dev_name) -{ - free(req->dev_name); - req->_present.dev_name_len = strlen(dev_name); - req->dev_name = malloc(req->_present.dev_name_len + 1); - memcpy(req->dev_name, dev_name, req->_present.dev_name_len); - req->dev_name[req->_present.dev_name_len] = 0; -} -static inline void -devlink_linecard_get_req_set_linecard_index(struct devlink_linecard_get_req *req, - __u32 linecard_index) -{ - req->_present.linecard_index = 1; - req->linecard_index = linecard_index; -} - -struct devlink_linecard_get_rsp { - struct { - __u32 bus_name_len; - __u32 dev_name_len; - __u32 linecard_index:1; - } _present; - - char *bus_name; - char *dev_name; - __u32 linecard_index; -}; - -void devlink_linecard_get_rsp_free(struct devlink_linecard_get_rsp *rsp); - -/* - * Get line card instances. - */ -struct devlink_linecard_get_rsp * -devlink_linecard_get(struct ynl_sock *ys, struct devlink_linecard_get_req *req); - -/* DEVLINK_CMD_LINECARD_GET - dump */ -struct devlink_linecard_get_req_dump { - struct { - __u32 bus_name_len; - __u32 dev_name_len; - } _present; - - char *bus_name; - char *dev_name; -}; - -static inline struct devlink_linecard_get_req_dump * -devlink_linecard_get_req_dump_alloc(void) -{ - return calloc(1, sizeof(struct devlink_linecard_get_req_dump)); -} -void -devlink_linecard_get_req_dump_free(struct devlink_linecard_get_req_dump *req); - -static inline void -devlink_linecard_get_req_dump_set_bus_name(struct devlink_linecard_get_req_dump *req, - const char *bus_name) -{ - free(req->bus_name); - req->_present.bus_name_len = strlen(bus_name); - req->bus_name = malloc(req->_present.bus_name_len + 1); - memcpy(req->bus_name, bus_name, req->_present.bus_name_len); - req->bus_name[req->_present.bus_name_len] = 0; -} -static inline void -devlink_linecard_get_req_dump_set_dev_name(struct devlink_linecard_get_req_dump *req, - const char *dev_name) -{ - free(req->dev_name); - req->_present.dev_name_len = strlen(dev_name); - req->dev_name = malloc(req->_present.dev_name_len + 1); - memcpy(req->dev_name, dev_name, req->_present.dev_name_len); - req->dev_name[req->_present.dev_name_len] = 0; -} - -struct devlink_linecard_get_list { - struct devlink_linecard_get_list *next; - struct devlink_linecard_get_rsp obj __attribute__((aligned(8))); -}; - -void devlink_linecard_get_list_free(struct devlink_linecard_get_list *rsp); - -struct devlink_linecard_get_list * -devlink_linecard_get_dump(struct ynl_sock *ys, - struct devlink_linecard_get_req_dump *req); - -/* ============== DEVLINK_CMD_LINECARD_SET ============== */ -/* DEVLINK_CMD_LINECARD_SET - do */ -struct devlink_linecard_set_req { - struct { - __u32 bus_name_len; - __u32 dev_name_len; - __u32 linecard_index:1; - __u32 linecard_type_len; - } _present; - - char *bus_name; - char *dev_name; - __u32 linecard_index; - char *linecard_type; -}; - -static inline struct devlink_linecard_set_req * -devlink_linecard_set_req_alloc(void) -{ - return calloc(1, sizeof(struct devlink_linecard_set_req)); -} -void devlink_linecard_set_req_free(struct devlink_linecard_set_req *req); - -static inline void -devlink_linecard_set_req_set_bus_name(struct devlink_linecard_set_req *req, - const char *bus_name) -{ - free(req->bus_name); - req->_present.bus_name_len = strlen(bus_name); - req->bus_name = malloc(req->_present.bus_name_len + 1); - memcpy(req->bus_name, bus_name, req->_present.bus_name_len); - req->bus_name[req->_present.bus_name_len] = 0; -} -static inline void -devlink_linecard_set_req_set_dev_name(struct devlink_linecard_set_req *req, - const char *dev_name) -{ - free(req->dev_name); - req->_present.dev_name_len = strlen(dev_name); - req->dev_name = malloc(req->_present.dev_name_len + 1); - memcpy(req->dev_name, dev_name, req->_present.dev_name_len); - req->dev_name[req->_present.dev_name_len] = 0; -} -static inline void -devlink_linecard_set_req_set_linecard_index(struct devlink_linecard_set_req *req, - __u32 linecard_index) -{ - req->_present.linecard_index = 1; - req->linecard_index = linecard_index; -} -static inline void -devlink_linecard_set_req_set_linecard_type(struct devlink_linecard_set_req *req, - const char *linecard_type) -{ - free(req->linecard_type); - req->_present.linecard_type_len = strlen(linecard_type); - req->linecard_type = malloc(req->_present.linecard_type_len + 1); - memcpy(req->linecard_type, linecard_type, req->_present.linecard_type_len); - req->linecard_type[req->_present.linecard_type_len] = 0; -} - -/* - * Set line card instances. - */ -int devlink_linecard_set(struct ynl_sock *ys, - struct devlink_linecard_set_req *req); - -/* ============== DEVLINK_CMD_SELFTESTS_GET ============== */ -/* DEVLINK_CMD_SELFTESTS_GET - do */ -struct devlink_selftests_get_req { - struct { - __u32 bus_name_len; - __u32 dev_name_len; - } _present; - - char *bus_name; - char *dev_name; -}; - -static inline struct devlink_selftests_get_req * -devlink_selftests_get_req_alloc(void) -{ - return calloc(1, sizeof(struct devlink_selftests_get_req)); -} -void devlink_selftests_get_req_free(struct devlink_selftests_get_req *req); - -static inline void -devlink_selftests_get_req_set_bus_name(struct devlink_selftests_get_req *req, - const char *bus_name) -{ - free(req->bus_name); - req->_present.bus_name_len = strlen(bus_name); - req->bus_name = malloc(req->_present.bus_name_len + 1); - memcpy(req->bus_name, bus_name, req->_present.bus_name_len); - req->bus_name[req->_present.bus_name_len] = 0; -} -static inline void -devlink_selftests_get_req_set_dev_name(struct devlink_selftests_get_req *req, - const char *dev_name) -{ - free(req->dev_name); - req->_present.dev_name_len = strlen(dev_name); - req->dev_name = malloc(req->_present.dev_name_len + 1); - memcpy(req->dev_name, dev_name, req->_present.dev_name_len); - req->dev_name[req->_present.dev_name_len] = 0; -} - -struct devlink_selftests_get_rsp { - struct { - __u32 bus_name_len; - __u32 dev_name_len; - } _present; - - char *bus_name; - char *dev_name; -}; - -void devlink_selftests_get_rsp_free(struct devlink_selftests_get_rsp *rsp); - -/* - * Get device selftest instances. - */ -struct devlink_selftests_get_rsp * -devlink_selftests_get(struct ynl_sock *ys, - struct devlink_selftests_get_req *req); - -/* DEVLINK_CMD_SELFTESTS_GET - dump */ -struct devlink_selftests_get_list { - struct devlink_selftests_get_list *next; - struct devlink_selftests_get_rsp obj __attribute__((aligned(8))); -}; - -void devlink_selftests_get_list_free(struct devlink_selftests_get_list *rsp); - -struct devlink_selftests_get_list * -devlink_selftests_get_dump(struct ynl_sock *ys); - -/* ============== DEVLINK_CMD_SELFTESTS_RUN ============== */ -/* DEVLINK_CMD_SELFTESTS_RUN - do */ -struct devlink_selftests_run_req { - struct { - __u32 bus_name_len; - __u32 dev_name_len; - __u32 selftests:1; - } _present; - - char *bus_name; - char *dev_name; - struct devlink_dl_selftest_id selftests; -}; - -static inline struct devlink_selftests_run_req * -devlink_selftests_run_req_alloc(void) -{ - return calloc(1, sizeof(struct devlink_selftests_run_req)); -} -void devlink_selftests_run_req_free(struct devlink_selftests_run_req *req); - -static inline void -devlink_selftests_run_req_set_bus_name(struct devlink_selftests_run_req *req, - const char *bus_name) -{ - free(req->bus_name); - req->_present.bus_name_len = strlen(bus_name); - req->bus_name = malloc(req->_present.bus_name_len + 1); - memcpy(req->bus_name, bus_name, req->_present.bus_name_len); - req->bus_name[req->_present.bus_name_len] = 0; -} -static inline void -devlink_selftests_run_req_set_dev_name(struct devlink_selftests_run_req *req, - const char *dev_name) -{ - free(req->dev_name); - req->_present.dev_name_len = strlen(dev_name); - req->dev_name = malloc(req->_present.dev_name_len + 1); - memcpy(req->dev_name, dev_name, req->_present.dev_name_len); - req->dev_name[req->_present.dev_name_len] = 0; -} -static inline void -devlink_selftests_run_req_set_selftests_flash(struct devlink_selftests_run_req *req) -{ - req->_present.selftests = 1; - req->selftests._present.flash = 1; -} - -/* - * Run device selftest instances. - */ -int devlink_selftests_run(struct ynl_sock *ys, - struct devlink_selftests_run_req *req); - -#endif /* _LINUX_DEVLINK_GEN_H */ diff --git a/tools/net/ynl/generated/ethtool-user.c b/tools/net/ynl/generated/ethtool-user.c deleted file mode 100644 index 660435639e2b..000000000000 --- a/tools/net/ynl/generated/ethtool-user.c +++ /dev/null @@ -1,6370 +0,0 @@ -// SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) -/* Do not edit directly, auto-generated from: */ -/* Documentation/netlink/specs/ethtool.yaml */ -/* YNL-GEN user source */ -/* YNL-ARG --user-header linux/ethtool_netlink.h --exclude-op stats-get */ - -#include -#include -#include "ethtool-user.h" -#include "ynl.h" -#include - -#include -#include - -#include "linux/ethtool_netlink.h" - -/* Enums */ -static const char * const ethtool_op_strmap[] = { - [ETHTOOL_MSG_STRSET_GET] = "strset-get", - [ETHTOOL_MSG_LINKINFO_GET] = "linkinfo-get", - [3] = "linkinfo-ntf", - [ETHTOOL_MSG_LINKMODES_GET] = "linkmodes-get", - [5] = "linkmodes-ntf", - [ETHTOOL_MSG_LINKSTATE_GET] = "linkstate-get", - [ETHTOOL_MSG_DEBUG_GET] = "debug-get", - [8] = "debug-ntf", - [ETHTOOL_MSG_WOL_GET] = "wol-get", - [10] = "wol-ntf", - [ETHTOOL_MSG_FEATURES_GET] = "features-get", - [ETHTOOL_MSG_FEATURES_SET] = "features-set", - [13] = "features-ntf", - [14] = "privflags-get", - [15] = "privflags-ntf", - [16] = "rings-get", - [17] = "rings-ntf", - [18] = "channels-get", - [19] = "channels-ntf", - [20] = "coalesce-get", - [21] = "coalesce-ntf", - [22] = "pause-get", - [23] = "pause-ntf", - [24] = "eee-get", - [25] = "eee-ntf", - [26] = "tsinfo-get", - [27] = "cable-test-ntf", - [28] = "cable-test-tdr-ntf", - [29] = "tunnel-info-get", - [30] = "fec-get", - [31] = "fec-ntf", - [32] = "module-eeprom-get", - [34] = "phc-vclocks-get", - [35] = "module-get", - [36] = "module-ntf", - [37] = "pse-get", - [ETHTOOL_MSG_RSS_GET] = "rss-get", - [ETHTOOL_MSG_PLCA_GET_CFG] = "plca-get-cfg", - [40] = "plca-get-status", - [41] = "plca-ntf", - [ETHTOOL_MSG_MM_GET] = "mm-get", - [43] = "mm-ntf", -}; - -const char *ethtool_op_str(int op) -{ - if (op < 0 || op >= (int)MNL_ARRAY_SIZE(ethtool_op_strmap)) - return NULL; - return ethtool_op_strmap[op]; -} - -static const char * const ethtool_udp_tunnel_type_strmap[] = { - [0] = "vxlan", - [1] = "geneve", - [2] = "vxlan-gpe", -}; - -const char *ethtool_udp_tunnel_type_str(int value) -{ - if (value < 0 || value >= (int)MNL_ARRAY_SIZE(ethtool_udp_tunnel_type_strmap)) - return NULL; - return ethtool_udp_tunnel_type_strmap[value]; -} - -static const char * const ethtool_stringset_strmap[] = { -}; - -const char *ethtool_stringset_str(enum ethtool_stringset value) -{ - if (value < 0 || value >= (int)MNL_ARRAY_SIZE(ethtool_stringset_strmap)) - return NULL; - return ethtool_stringset_strmap[value]; -} - -/* Policies */ -struct ynl_policy_attr ethtool_header_policy[ETHTOOL_A_HEADER_MAX + 1] = { - [ETHTOOL_A_HEADER_DEV_INDEX] = { .name = "dev-index", .type = YNL_PT_U32, }, - [ETHTOOL_A_HEADER_DEV_NAME] = { .name = "dev-name", .type = YNL_PT_NUL_STR, }, - [ETHTOOL_A_HEADER_FLAGS] = { .name = "flags", .type = YNL_PT_U32, }, -}; - -struct ynl_policy_nest ethtool_header_nest = { - .max_attr = ETHTOOL_A_HEADER_MAX, - .table = ethtool_header_policy, -}; - -struct ynl_policy_attr ethtool_pause_stat_policy[ETHTOOL_A_PAUSE_STAT_MAX + 1] = { - [ETHTOOL_A_PAUSE_STAT_PAD] = { .name = "pad", .type = YNL_PT_IGNORE, }, - [ETHTOOL_A_PAUSE_STAT_TX_FRAMES] = { .name = "tx-frames", .type = YNL_PT_U64, }, - [ETHTOOL_A_PAUSE_STAT_RX_FRAMES] = { .name = "rx-frames", .type = YNL_PT_U64, }, -}; - -struct ynl_policy_nest ethtool_pause_stat_nest = { - .max_attr = ETHTOOL_A_PAUSE_STAT_MAX, - .table = ethtool_pause_stat_policy, -}; - -struct ynl_policy_attr ethtool_cable_test_tdr_cfg_policy[ETHTOOL_A_CABLE_TEST_TDR_CFG_MAX + 1] = { - [ETHTOOL_A_CABLE_TEST_TDR_CFG_FIRST] = { .name = "first", .type = YNL_PT_U32, }, - [ETHTOOL_A_CABLE_TEST_TDR_CFG_LAST] = { .name = "last", .type = YNL_PT_U32, }, - [ETHTOOL_A_CABLE_TEST_TDR_CFG_STEP] = { .name = "step", .type = YNL_PT_U32, }, - [ETHTOOL_A_CABLE_TEST_TDR_CFG_PAIR] = { .name = "pair", .type = YNL_PT_U8, }, -}; - -struct ynl_policy_nest ethtool_cable_test_tdr_cfg_nest = { - .max_attr = ETHTOOL_A_CABLE_TEST_TDR_CFG_MAX, - .table = ethtool_cable_test_tdr_cfg_policy, -}; - -struct ynl_policy_attr ethtool_fec_stat_policy[ETHTOOL_A_FEC_STAT_MAX + 1] = { - [ETHTOOL_A_FEC_STAT_PAD] = { .name = "pad", .type = YNL_PT_IGNORE, }, - [ETHTOOL_A_FEC_STAT_CORRECTED] = { .name = "corrected", .type = YNL_PT_BINARY,}, - [ETHTOOL_A_FEC_STAT_UNCORR] = { .name = "uncorr", .type = YNL_PT_BINARY,}, - [ETHTOOL_A_FEC_STAT_CORR_BITS] = { .name = "corr-bits", .type = YNL_PT_BINARY,}, -}; - -struct ynl_policy_nest ethtool_fec_stat_nest = { - .max_attr = ETHTOOL_A_FEC_STAT_MAX, - .table = ethtool_fec_stat_policy, -}; - -struct ynl_policy_attr ethtool_mm_stat_policy[ETHTOOL_A_MM_STAT_MAX + 1] = { - [ETHTOOL_A_MM_STAT_PAD] = { .name = "pad", .type = YNL_PT_IGNORE, }, - [ETHTOOL_A_MM_STAT_REASSEMBLY_ERRORS] = { .name = "reassembly-errors", .type = YNL_PT_U64, }, - [ETHTOOL_A_MM_STAT_SMD_ERRORS] = { .name = "smd-errors", .type = YNL_PT_U64, }, - [ETHTOOL_A_MM_STAT_REASSEMBLY_OK] = { .name = "reassembly-ok", .type = YNL_PT_U64, }, - [ETHTOOL_A_MM_STAT_RX_FRAG_COUNT] = { .name = "rx-frag-count", .type = YNL_PT_U64, }, - [ETHTOOL_A_MM_STAT_TX_FRAG_COUNT] = { .name = "tx-frag-count", .type = YNL_PT_U64, }, - [ETHTOOL_A_MM_STAT_HOLD_COUNT] = { .name = "hold-count", .type = YNL_PT_U64, }, -}; - -struct ynl_policy_nest ethtool_mm_stat_nest = { - .max_attr = ETHTOOL_A_MM_STAT_MAX, - .table = ethtool_mm_stat_policy, -}; - -struct ynl_policy_attr ethtool_cable_result_policy[ETHTOOL_A_CABLE_RESULT_MAX + 1] = { - [ETHTOOL_A_CABLE_RESULT_PAIR] = { .name = "pair", .type = YNL_PT_U8, }, - [ETHTOOL_A_CABLE_RESULT_CODE] = { .name = "code", .type = YNL_PT_U8, }, -}; - -struct ynl_policy_nest ethtool_cable_result_nest = { - .max_attr = ETHTOOL_A_CABLE_RESULT_MAX, - .table = ethtool_cable_result_policy, -}; - -struct ynl_policy_attr ethtool_cable_fault_length_policy[ETHTOOL_A_CABLE_FAULT_LENGTH_MAX + 1] = { - [ETHTOOL_A_CABLE_FAULT_LENGTH_PAIR] = { .name = "pair", .type = YNL_PT_U8, }, - [ETHTOOL_A_CABLE_FAULT_LENGTH_CM] = { .name = "cm", .type = YNL_PT_U32, }, -}; - -struct ynl_policy_nest ethtool_cable_fault_length_nest = { - .max_attr = ETHTOOL_A_CABLE_FAULT_LENGTH_MAX, - .table = ethtool_cable_fault_length_policy, -}; - -struct ynl_policy_attr ethtool_bitset_bit_policy[ETHTOOL_A_BITSET_BIT_MAX + 1] = { - [ETHTOOL_A_BITSET_BIT_INDEX] = { .name = "index", .type = YNL_PT_U32, }, - [ETHTOOL_A_BITSET_BIT_NAME] = { .name = "name", .type = YNL_PT_NUL_STR, }, - [ETHTOOL_A_BITSET_BIT_VALUE] = { .name = "value", .type = YNL_PT_FLAG, }, -}; - -struct ynl_policy_nest ethtool_bitset_bit_nest = { - .max_attr = ETHTOOL_A_BITSET_BIT_MAX, - .table = ethtool_bitset_bit_policy, -}; - -struct ynl_policy_attr ethtool_tunnel_udp_entry_policy[ETHTOOL_A_TUNNEL_UDP_ENTRY_MAX + 1] = { - [ETHTOOL_A_TUNNEL_UDP_ENTRY_PORT] = { .name = "port", .type = YNL_PT_U16, }, - [ETHTOOL_A_TUNNEL_UDP_ENTRY_TYPE] = { .name = "type", .type = YNL_PT_U32, }, -}; - -struct ynl_policy_nest ethtool_tunnel_udp_entry_nest = { - .max_attr = ETHTOOL_A_TUNNEL_UDP_ENTRY_MAX, - .table = ethtool_tunnel_udp_entry_policy, -}; - -struct ynl_policy_attr ethtool_string_policy[ETHTOOL_A_STRING_MAX + 1] = { - [ETHTOOL_A_STRING_INDEX] = { .name = "index", .type = YNL_PT_U32, }, - [ETHTOOL_A_STRING_VALUE] = { .name = "value", .type = YNL_PT_NUL_STR, }, -}; - -struct ynl_policy_nest ethtool_string_nest = { - .max_attr = ETHTOOL_A_STRING_MAX, - .table = ethtool_string_policy, -}; - -struct ynl_policy_attr ethtool_cable_nest_policy[ETHTOOL_A_CABLE_NEST_MAX + 1] = { - [ETHTOOL_A_CABLE_NEST_RESULT] = { .name = "result", .type = YNL_PT_NEST, .nest = ðtool_cable_result_nest, }, - [ETHTOOL_A_CABLE_NEST_FAULT_LENGTH] = { .name = "fault-length", .type = YNL_PT_NEST, .nest = ðtool_cable_fault_length_nest, }, -}; - -struct ynl_policy_nest ethtool_cable_nest_nest = { - .max_attr = ETHTOOL_A_CABLE_NEST_MAX, - .table = ethtool_cable_nest_policy, -}; - -struct ynl_policy_attr ethtool_bitset_bits_policy[ETHTOOL_A_BITSET_BITS_MAX + 1] = { - [ETHTOOL_A_BITSET_BITS_BIT] = { .name = "bit", .type = YNL_PT_NEST, .nest = ðtool_bitset_bit_nest, }, -}; - -struct ynl_policy_nest ethtool_bitset_bits_nest = { - .max_attr = ETHTOOL_A_BITSET_BITS_MAX, - .table = ethtool_bitset_bits_policy, -}; - -struct ynl_policy_attr ethtool_strings_policy[ETHTOOL_A_STRINGS_MAX + 1] = { - [ETHTOOL_A_STRINGS_STRING] = { .name = "string", .type = YNL_PT_NEST, .nest = ðtool_string_nest, }, -}; - -struct ynl_policy_nest ethtool_strings_nest = { - .max_attr = ETHTOOL_A_STRINGS_MAX, - .table = ethtool_strings_policy, -}; - -struct ynl_policy_attr ethtool_bitset_policy[ETHTOOL_A_BITSET_MAX + 1] = { - [ETHTOOL_A_BITSET_NOMASK] = { .name = "nomask", .type = YNL_PT_FLAG, }, - [ETHTOOL_A_BITSET_SIZE] = { .name = "size", .type = YNL_PT_U32, }, - [ETHTOOL_A_BITSET_BITS] = { .name = "bits", .type = YNL_PT_NEST, .nest = ðtool_bitset_bits_nest, }, -}; - -struct ynl_policy_nest ethtool_bitset_nest = { - .max_attr = ETHTOOL_A_BITSET_MAX, - .table = ethtool_bitset_policy, -}; - -struct ynl_policy_attr ethtool_stringset_policy[ETHTOOL_A_STRINGSET_MAX + 1] = { - [ETHTOOL_A_STRINGSET_ID] = { .name = "id", .type = YNL_PT_U32, }, - [ETHTOOL_A_STRINGSET_COUNT] = { .name = "count", .type = YNL_PT_U32, }, - [ETHTOOL_A_STRINGSET_STRINGS] = { .name = "strings", .type = YNL_PT_NEST, .nest = ðtool_strings_nest, }, -}; - -struct ynl_policy_nest ethtool_stringset_nest = { - .max_attr = ETHTOOL_A_STRINGSET_MAX, - .table = ethtool_stringset_policy, -}; - -struct ynl_policy_attr ethtool_tunnel_udp_table_policy[ETHTOOL_A_TUNNEL_UDP_TABLE_MAX + 1] = { - [ETHTOOL_A_TUNNEL_UDP_TABLE_SIZE] = { .name = "size", .type = YNL_PT_U32, }, - [ETHTOOL_A_TUNNEL_UDP_TABLE_TYPES] = { .name = "types", .type = YNL_PT_NEST, .nest = ðtool_bitset_nest, }, - [ETHTOOL_A_TUNNEL_UDP_TABLE_ENTRY] = { .name = "entry", .type = YNL_PT_NEST, .nest = ðtool_tunnel_udp_entry_nest, }, -}; - -struct ynl_policy_nest ethtool_tunnel_udp_table_nest = { - .max_attr = ETHTOOL_A_TUNNEL_UDP_TABLE_MAX, - .table = ethtool_tunnel_udp_table_policy, -}; - -struct ynl_policy_attr ethtool_stringsets_policy[ETHTOOL_A_STRINGSETS_MAX + 1] = { - [ETHTOOL_A_STRINGSETS_STRINGSET] = { .name = "stringset", .type = YNL_PT_NEST, .nest = ðtool_stringset_nest, }, -}; - -struct ynl_policy_nest ethtool_stringsets_nest = { - .max_attr = ETHTOOL_A_STRINGSETS_MAX, - .table = ethtool_stringsets_policy, -}; - -struct ynl_policy_attr ethtool_tunnel_udp_policy[ETHTOOL_A_TUNNEL_UDP_MAX + 1] = { - [ETHTOOL_A_TUNNEL_UDP_TABLE] = { .name = "table", .type = YNL_PT_NEST, .nest = ðtool_tunnel_udp_table_nest, }, -}; - -struct ynl_policy_nest ethtool_tunnel_udp_nest = { - .max_attr = ETHTOOL_A_TUNNEL_UDP_MAX, - .table = ethtool_tunnel_udp_policy, -}; - -struct ynl_policy_attr ethtool_strset_policy[ETHTOOL_A_STRSET_MAX + 1] = { - [ETHTOOL_A_STRSET_HEADER] = { .name = "header", .type = YNL_PT_NEST, .nest = ðtool_header_nest, }, - [ETHTOOL_A_STRSET_STRINGSETS] = { .name = "stringsets", .type = YNL_PT_NEST, .nest = ðtool_stringsets_nest, }, - [ETHTOOL_A_STRSET_COUNTS_ONLY] = { .name = "counts-only", .type = YNL_PT_FLAG, }, -}; - -struct ynl_policy_nest ethtool_strset_nest = { - .max_attr = ETHTOOL_A_STRSET_MAX, - .table = ethtool_strset_policy, -}; - -struct ynl_policy_attr ethtool_linkinfo_policy[ETHTOOL_A_LINKINFO_MAX + 1] = { - [ETHTOOL_A_LINKINFO_HEADER] = { .name = "header", .type = YNL_PT_NEST, .nest = ðtool_header_nest, }, - [ETHTOOL_A_LINKINFO_PORT] = { .name = "port", .type = YNL_PT_U8, }, - [ETHTOOL_A_LINKINFO_PHYADDR] = { .name = "phyaddr", .type = YNL_PT_U8, }, - [ETHTOOL_A_LINKINFO_TP_MDIX] = { .name = "tp-mdix", .type = YNL_PT_U8, }, - [ETHTOOL_A_LINKINFO_TP_MDIX_CTRL] = { .name = "tp-mdix-ctrl", .type = YNL_PT_U8, }, - [ETHTOOL_A_LINKINFO_TRANSCEIVER] = { .name = "transceiver", .type = YNL_PT_U8, }, -}; - -struct ynl_policy_nest ethtool_linkinfo_nest = { - .max_attr = ETHTOOL_A_LINKINFO_MAX, - .table = ethtool_linkinfo_policy, -}; - -struct ynl_policy_attr ethtool_linkmodes_policy[ETHTOOL_A_LINKMODES_MAX + 1] = { - [ETHTOOL_A_LINKMODES_HEADER] = { .name = "header", .type = YNL_PT_NEST, .nest = ðtool_header_nest, }, - [ETHTOOL_A_LINKMODES_AUTONEG] = { .name = "autoneg", .type = YNL_PT_U8, }, - [ETHTOOL_A_LINKMODES_OURS] = { .name = "ours", .type = YNL_PT_NEST, .nest = ðtool_bitset_nest, }, - [ETHTOOL_A_LINKMODES_PEER] = { .name = "peer", .type = YNL_PT_NEST, .nest = ðtool_bitset_nest, }, - [ETHTOOL_A_LINKMODES_SPEED] = { .name = "speed", .type = YNL_PT_U32, }, - [ETHTOOL_A_LINKMODES_DUPLEX] = { .name = "duplex", .type = YNL_PT_U8, }, - [ETHTOOL_A_LINKMODES_MASTER_SLAVE_CFG] = { .name = "master-slave-cfg", .type = YNL_PT_U8, }, - [ETHTOOL_A_LINKMODES_MASTER_SLAVE_STATE] = { .name = "master-slave-state", .type = YNL_PT_U8, }, - [ETHTOOL_A_LINKMODES_LANES] = { .name = "lanes", .type = YNL_PT_U32, }, - [ETHTOOL_A_LINKMODES_RATE_MATCHING] = { .name = "rate-matching", .type = YNL_PT_U8, }, -}; - -struct ynl_policy_nest ethtool_linkmodes_nest = { - .max_attr = ETHTOOL_A_LINKMODES_MAX, - .table = ethtool_linkmodes_policy, -}; - -struct ynl_policy_attr ethtool_linkstate_policy[ETHTOOL_A_LINKSTATE_MAX + 1] = { - [ETHTOOL_A_LINKSTATE_HEADER] = { .name = "header", .type = YNL_PT_NEST, .nest = ðtool_header_nest, }, - [ETHTOOL_A_LINKSTATE_LINK] = { .name = "link", .type = YNL_PT_U8, }, - [ETHTOOL_A_LINKSTATE_SQI] = { .name = "sqi", .type = YNL_PT_U32, }, - [ETHTOOL_A_LINKSTATE_SQI_MAX] = { .name = "sqi-max", .type = YNL_PT_U32, }, - [ETHTOOL_A_LINKSTATE_EXT_STATE] = { .name = "ext-state", .type = YNL_PT_U8, }, - [ETHTOOL_A_LINKSTATE_EXT_SUBSTATE] = { .name = "ext-substate", .type = YNL_PT_U8, }, - [ETHTOOL_A_LINKSTATE_EXT_DOWN_CNT] = { .name = "ext-down-cnt", .type = YNL_PT_U32, }, -}; - -struct ynl_policy_nest ethtool_linkstate_nest = { - .max_attr = ETHTOOL_A_LINKSTATE_MAX, - .table = ethtool_linkstate_policy, -}; - -struct ynl_policy_attr ethtool_debug_policy[ETHTOOL_A_DEBUG_MAX + 1] = { - [ETHTOOL_A_DEBUG_HEADER] = { .name = "header", .type = YNL_PT_NEST, .nest = ðtool_header_nest, }, - [ETHTOOL_A_DEBUG_MSGMASK] = { .name = "msgmask", .type = YNL_PT_NEST, .nest = ðtool_bitset_nest, }, -}; - -struct ynl_policy_nest ethtool_debug_nest = { - .max_attr = ETHTOOL_A_DEBUG_MAX, - .table = ethtool_debug_policy, -}; - -struct ynl_policy_attr ethtool_wol_policy[ETHTOOL_A_WOL_MAX + 1] = { - [ETHTOOL_A_WOL_HEADER] = { .name = "header", .type = YNL_PT_NEST, .nest = ðtool_header_nest, }, - [ETHTOOL_A_WOL_MODES] = { .name = "modes", .type = YNL_PT_NEST, .nest = ðtool_bitset_nest, }, - [ETHTOOL_A_WOL_SOPASS] = { .name = "sopass", .type = YNL_PT_BINARY,}, -}; - -struct ynl_policy_nest ethtool_wol_nest = { - .max_attr = ETHTOOL_A_WOL_MAX, - .table = ethtool_wol_policy, -}; - -struct ynl_policy_attr ethtool_features_policy[ETHTOOL_A_FEATURES_MAX + 1] = { - [ETHTOOL_A_FEATURES_HEADER] = { .name = "header", .type = YNL_PT_NEST, .nest = ðtool_header_nest, }, - [ETHTOOL_A_FEATURES_HW] = { .name = "hw", .type = YNL_PT_NEST, .nest = ðtool_bitset_nest, }, - [ETHTOOL_A_FEATURES_WANTED] = { .name = "wanted", .type = YNL_PT_NEST, .nest = ðtool_bitset_nest, }, - [ETHTOOL_A_FEATURES_ACTIVE] = { .name = "active", .type = YNL_PT_NEST, .nest = ðtool_bitset_nest, }, - [ETHTOOL_A_FEATURES_NOCHANGE] = { .name = "nochange", .type = YNL_PT_NEST, .nest = ðtool_bitset_nest, }, -}; - -struct ynl_policy_nest ethtool_features_nest = { - .max_attr = ETHTOOL_A_FEATURES_MAX, - .table = ethtool_features_policy, -}; - -struct ynl_policy_attr ethtool_privflags_policy[ETHTOOL_A_PRIVFLAGS_MAX + 1] = { - [ETHTOOL_A_PRIVFLAGS_HEADER] = { .name = "header", .type = YNL_PT_NEST, .nest = ðtool_header_nest, }, - [ETHTOOL_A_PRIVFLAGS_FLAGS] = { .name = "flags", .type = YNL_PT_NEST, .nest = ðtool_bitset_nest, }, -}; - -struct ynl_policy_nest ethtool_privflags_nest = { - .max_attr = ETHTOOL_A_PRIVFLAGS_MAX, - .table = ethtool_privflags_policy, -}; - -struct ynl_policy_attr ethtool_rings_policy[ETHTOOL_A_RINGS_MAX + 1] = { - [ETHTOOL_A_RINGS_HEADER] = { .name = "header", .type = YNL_PT_NEST, .nest = ðtool_header_nest, }, - [ETHTOOL_A_RINGS_RX_MAX] = { .name = "rx-max", .type = YNL_PT_U32, }, - [ETHTOOL_A_RINGS_RX_MINI_MAX] = { .name = "rx-mini-max", .type = YNL_PT_U32, }, - [ETHTOOL_A_RINGS_RX_JUMBO_MAX] = { .name = "rx-jumbo-max", .type = YNL_PT_U32, }, - [ETHTOOL_A_RINGS_TX_MAX] = { .name = "tx-max", .type = YNL_PT_U32, }, - [ETHTOOL_A_RINGS_RX] = { .name = "rx", .type = YNL_PT_U32, }, - [ETHTOOL_A_RINGS_RX_MINI] = { .name = "rx-mini", .type = YNL_PT_U32, }, - [ETHTOOL_A_RINGS_RX_JUMBO] = { .name = "rx-jumbo", .type = YNL_PT_U32, }, - [ETHTOOL_A_RINGS_TX] = { .name = "tx", .type = YNL_PT_U32, }, - [ETHTOOL_A_RINGS_RX_BUF_LEN] = { .name = "rx-buf-len", .type = YNL_PT_U32, }, - [ETHTOOL_A_RINGS_TCP_DATA_SPLIT] = { .name = "tcp-data-split", .type = YNL_PT_U8, }, - [ETHTOOL_A_RINGS_CQE_SIZE] = { .name = "cqe-size", .type = YNL_PT_U32, }, - [ETHTOOL_A_RINGS_TX_PUSH] = { .name = "tx-push", .type = YNL_PT_U8, }, - [ETHTOOL_A_RINGS_RX_PUSH] = { .name = "rx-push", .type = YNL_PT_U8, }, - [ETHTOOL_A_RINGS_TX_PUSH_BUF_LEN] = { .name = "tx-push-buf-len", .type = YNL_PT_U32, }, - [ETHTOOL_A_RINGS_TX_PUSH_BUF_LEN_MAX] = { .name = "tx-push-buf-len-max", .type = YNL_PT_U32, }, -}; - -struct ynl_policy_nest ethtool_rings_nest = { - .max_attr = ETHTOOL_A_RINGS_MAX, - .table = ethtool_rings_policy, -}; - -struct ynl_policy_attr ethtool_channels_policy[ETHTOOL_A_CHANNELS_MAX + 1] = { - [ETHTOOL_A_CHANNELS_HEADER] = { .name = "header", .type = YNL_PT_NEST, .nest = ðtool_header_nest, }, - [ETHTOOL_A_CHANNELS_RX_MAX] = { .name = "rx-max", .type = YNL_PT_U32, }, - [ETHTOOL_A_CHANNELS_TX_MAX] = { .name = "tx-max", .type = YNL_PT_U32, }, - [ETHTOOL_A_CHANNELS_OTHER_MAX] = { .name = "other-max", .type = YNL_PT_U32, }, - [ETHTOOL_A_CHANNELS_COMBINED_MAX] = { .name = "combined-max", .type = YNL_PT_U32, }, - [ETHTOOL_A_CHANNELS_RX_COUNT] = { .name = "rx-count", .type = YNL_PT_U32, }, - [ETHTOOL_A_CHANNELS_TX_COUNT] = { .name = "tx-count", .type = YNL_PT_U32, }, - [ETHTOOL_A_CHANNELS_OTHER_COUNT] = { .name = "other-count", .type = YNL_PT_U32, }, - [ETHTOOL_A_CHANNELS_COMBINED_COUNT] = { .name = "combined-count", .type = YNL_PT_U32, }, -}; - -struct ynl_policy_nest ethtool_channels_nest = { - .max_attr = ETHTOOL_A_CHANNELS_MAX, - .table = ethtool_channels_policy, -}; - -struct ynl_policy_attr ethtool_coalesce_policy[ETHTOOL_A_COALESCE_MAX + 1] = { - [ETHTOOL_A_COALESCE_HEADER] = { .name = "header", .type = YNL_PT_NEST, .nest = ðtool_header_nest, }, - [ETHTOOL_A_COALESCE_RX_USECS] = { .name = "rx-usecs", .type = YNL_PT_U32, }, - [ETHTOOL_A_COALESCE_RX_MAX_FRAMES] = { .name = "rx-max-frames", .type = YNL_PT_U32, }, - [ETHTOOL_A_COALESCE_RX_USECS_IRQ] = { .name = "rx-usecs-irq", .type = YNL_PT_U32, }, - [ETHTOOL_A_COALESCE_RX_MAX_FRAMES_IRQ] = { .name = "rx-max-frames-irq", .type = YNL_PT_U32, }, - [ETHTOOL_A_COALESCE_TX_USECS] = { .name = "tx-usecs", .type = YNL_PT_U32, }, - [ETHTOOL_A_COALESCE_TX_MAX_FRAMES] = { .name = "tx-max-frames", .type = YNL_PT_U32, }, - [ETHTOOL_A_COALESCE_TX_USECS_IRQ] = { .name = "tx-usecs-irq", .type = YNL_PT_U32, }, - [ETHTOOL_A_COALESCE_TX_MAX_FRAMES_IRQ] = { .name = "tx-max-frames-irq", .type = YNL_PT_U32, }, - [ETHTOOL_A_COALESCE_STATS_BLOCK_USECS] = { .name = "stats-block-usecs", .type = YNL_PT_U32, }, - [ETHTOOL_A_COALESCE_USE_ADAPTIVE_RX] = { .name = "use-adaptive-rx", .type = YNL_PT_U8, }, - [ETHTOOL_A_COALESCE_USE_ADAPTIVE_TX] = { .name = "use-adaptive-tx", .type = YNL_PT_U8, }, - [ETHTOOL_A_COALESCE_PKT_RATE_LOW] = { .name = "pkt-rate-low", .type = YNL_PT_U32, }, - [ETHTOOL_A_COALESCE_RX_USECS_LOW] = { .name = "rx-usecs-low", .type = YNL_PT_U32, }, - [ETHTOOL_A_COALESCE_RX_MAX_FRAMES_LOW] = { .name = "rx-max-frames-low", .type = YNL_PT_U32, }, - [ETHTOOL_A_COALESCE_TX_USECS_LOW] = { .name = "tx-usecs-low", .type = YNL_PT_U32, }, - [ETHTOOL_A_COALESCE_TX_MAX_FRAMES_LOW] = { .name = "tx-max-frames-low", .type = YNL_PT_U32, }, - [ETHTOOL_A_COALESCE_PKT_RATE_HIGH] = { .name = "pkt-rate-high", .type = YNL_PT_U32, }, - [ETHTOOL_A_COALESCE_RX_USECS_HIGH] = { .name = "rx-usecs-high", .type = YNL_PT_U32, }, - [ETHTOOL_A_COALESCE_RX_MAX_FRAMES_HIGH] = { .name = "rx-max-frames-high", .type = YNL_PT_U32, }, - [ETHTOOL_A_COALESCE_TX_USECS_HIGH] = { .name = "tx-usecs-high", .type = YNL_PT_U32, }, - [ETHTOOL_A_COALESCE_TX_MAX_FRAMES_HIGH] = { .name = "tx-max-frames-high", .type = YNL_PT_U32, }, - [ETHTOOL_A_COALESCE_RATE_SAMPLE_INTERVAL] = { .name = "rate-sample-interval", .type = YNL_PT_U32, }, - [ETHTOOL_A_COALESCE_USE_CQE_MODE_TX] = { .name = "use-cqe-mode-tx", .type = YNL_PT_U8, }, - [ETHTOOL_A_COALESCE_USE_CQE_MODE_RX] = { .name = "use-cqe-mode-rx", .type = YNL_PT_U8, }, - [ETHTOOL_A_COALESCE_TX_AGGR_MAX_BYTES] = { .name = "tx-aggr-max-bytes", .type = YNL_PT_U32, }, - [ETHTOOL_A_COALESCE_TX_AGGR_MAX_FRAMES] = { .name = "tx-aggr-max-frames", .type = YNL_PT_U32, }, - [ETHTOOL_A_COALESCE_TX_AGGR_TIME_USECS] = { .name = "tx-aggr-time-usecs", .type = YNL_PT_U32, }, -}; - -struct ynl_policy_nest ethtool_coalesce_nest = { - .max_attr = ETHTOOL_A_COALESCE_MAX, - .table = ethtool_coalesce_policy, -}; - -struct ynl_policy_attr ethtool_pause_policy[ETHTOOL_A_PAUSE_MAX + 1] = { - [ETHTOOL_A_PAUSE_HEADER] = { .name = "header", .type = YNL_PT_NEST, .nest = ðtool_header_nest, }, - [ETHTOOL_A_PAUSE_AUTONEG] = { .name = "autoneg", .type = YNL_PT_U8, }, - [ETHTOOL_A_PAUSE_RX] = { .name = "rx", .type = YNL_PT_U8, }, - [ETHTOOL_A_PAUSE_TX] = { .name = "tx", .type = YNL_PT_U8, }, - [ETHTOOL_A_PAUSE_STATS] = { .name = "stats", .type = YNL_PT_NEST, .nest = ðtool_pause_stat_nest, }, - [ETHTOOL_A_PAUSE_STATS_SRC] = { .name = "stats-src", .type = YNL_PT_U32, }, -}; - -struct ynl_policy_nest ethtool_pause_nest = { - .max_attr = ETHTOOL_A_PAUSE_MAX, - .table = ethtool_pause_policy, -}; - -struct ynl_policy_attr ethtool_eee_policy[ETHTOOL_A_EEE_MAX + 1] = { - [ETHTOOL_A_EEE_HEADER] = { .name = "header", .type = YNL_PT_NEST, .nest = ðtool_header_nest, }, - [ETHTOOL_A_EEE_MODES_OURS] = { .name = "modes-ours", .type = YNL_PT_NEST, .nest = ðtool_bitset_nest, }, - [ETHTOOL_A_EEE_MODES_PEER] = { .name = "modes-peer", .type = YNL_PT_NEST, .nest = ðtool_bitset_nest, }, - [ETHTOOL_A_EEE_ACTIVE] = { .name = "active", .type = YNL_PT_U8, }, - [ETHTOOL_A_EEE_ENABLED] = { .name = "enabled", .type = YNL_PT_U8, }, - [ETHTOOL_A_EEE_TX_LPI_ENABLED] = { .name = "tx-lpi-enabled", .type = YNL_PT_U8, }, - [ETHTOOL_A_EEE_TX_LPI_TIMER] = { .name = "tx-lpi-timer", .type = YNL_PT_U32, }, -}; - -struct ynl_policy_nest ethtool_eee_nest = { - .max_attr = ETHTOOL_A_EEE_MAX, - .table = ethtool_eee_policy, -}; - -struct ynl_policy_attr ethtool_tsinfo_policy[ETHTOOL_A_TSINFO_MAX + 1] = { - [ETHTOOL_A_TSINFO_HEADER] = { .name = "header", .type = YNL_PT_NEST, .nest = ðtool_header_nest, }, - [ETHTOOL_A_TSINFO_TIMESTAMPING] = { .name = "timestamping", .type = YNL_PT_NEST, .nest = ðtool_bitset_nest, }, - [ETHTOOL_A_TSINFO_TX_TYPES] = { .name = "tx-types", .type = YNL_PT_NEST, .nest = ðtool_bitset_nest, }, - [ETHTOOL_A_TSINFO_RX_FILTERS] = { .name = "rx-filters", .type = YNL_PT_NEST, .nest = ðtool_bitset_nest, }, - [ETHTOOL_A_TSINFO_PHC_INDEX] = { .name = "phc-index", .type = YNL_PT_U32, }, -}; - -struct ynl_policy_nest ethtool_tsinfo_nest = { - .max_attr = ETHTOOL_A_TSINFO_MAX, - .table = ethtool_tsinfo_policy, -}; - -struct ynl_policy_attr ethtool_cable_test_policy[ETHTOOL_A_CABLE_TEST_MAX + 1] = { - [ETHTOOL_A_CABLE_TEST_HEADER] = { .name = "header", .type = YNL_PT_NEST, .nest = ðtool_header_nest, }, -}; - -struct ynl_policy_nest ethtool_cable_test_nest = { - .max_attr = ETHTOOL_A_CABLE_TEST_MAX, - .table = ethtool_cable_test_policy, -}; - -struct ynl_policy_attr ethtool_cable_test_ntf_policy[ETHTOOL_A_CABLE_TEST_NTF_MAX + 1] = { - [ETHTOOL_A_CABLE_TEST_NTF_HEADER] = { .name = "header", .type = YNL_PT_NEST, .nest = ðtool_header_nest, }, - [ETHTOOL_A_CABLE_TEST_NTF_STATUS] = { .name = "status", .type = YNL_PT_U8, }, - [ETHTOOL_A_CABLE_TEST_NTF_NEST] = { .name = "nest", .type = YNL_PT_NEST, .nest = ðtool_cable_nest_nest, }, -}; - -struct ynl_policy_nest ethtool_cable_test_ntf_nest = { - .max_attr = ETHTOOL_A_CABLE_TEST_NTF_MAX, - .table = ethtool_cable_test_ntf_policy, -}; - -struct ynl_policy_attr ethtool_cable_test_tdr_policy[ETHTOOL_A_CABLE_TEST_TDR_MAX + 1] = { - [ETHTOOL_A_CABLE_TEST_TDR_HEADER] = { .name = "header", .type = YNL_PT_NEST, .nest = ðtool_header_nest, }, - [ETHTOOL_A_CABLE_TEST_TDR_CFG] = { .name = "cfg", .type = YNL_PT_NEST, .nest = ðtool_cable_test_tdr_cfg_nest, }, -}; - -struct ynl_policy_nest ethtool_cable_test_tdr_nest = { - .max_attr = ETHTOOL_A_CABLE_TEST_TDR_MAX, - .table = ethtool_cable_test_tdr_policy, -}; - -struct ynl_policy_attr ethtool_cable_test_tdr_ntf_policy[ETHTOOL_A_CABLE_TEST_TDR_NTF_MAX + 1] = { - [ETHTOOL_A_CABLE_TEST_TDR_NTF_HEADER] = { .name = "header", .type = YNL_PT_NEST, .nest = ðtool_header_nest, }, - [ETHTOOL_A_CABLE_TEST_TDR_NTF_STATUS] = { .name = "status", .type = YNL_PT_U8, }, - [ETHTOOL_A_CABLE_TEST_TDR_NTF_NEST] = { .name = "nest", .type = YNL_PT_NEST, .nest = ðtool_cable_nest_nest, }, -}; - -struct ynl_policy_nest ethtool_cable_test_tdr_ntf_nest = { - .max_attr = ETHTOOL_A_CABLE_TEST_TDR_NTF_MAX, - .table = ethtool_cable_test_tdr_ntf_policy, -}; - -struct ynl_policy_attr ethtool_tunnel_info_policy[ETHTOOL_A_TUNNEL_INFO_MAX + 1] = { - [ETHTOOL_A_TUNNEL_INFO_HEADER] = { .name = "header", .type = YNL_PT_NEST, .nest = ðtool_header_nest, }, - [ETHTOOL_A_TUNNEL_INFO_UDP_PORTS] = { .name = "udp-ports", .type = YNL_PT_NEST, .nest = ðtool_tunnel_udp_nest, }, -}; - -struct ynl_policy_nest ethtool_tunnel_info_nest = { - .max_attr = ETHTOOL_A_TUNNEL_INFO_MAX, - .table = ethtool_tunnel_info_policy, -}; - -struct ynl_policy_attr ethtool_fec_policy[ETHTOOL_A_FEC_MAX + 1] = { - [ETHTOOL_A_FEC_HEADER] = { .name = "header", .type = YNL_PT_NEST, .nest = ðtool_header_nest, }, - [ETHTOOL_A_FEC_MODES] = { .name = "modes", .type = YNL_PT_NEST, .nest = ðtool_bitset_nest, }, - [ETHTOOL_A_FEC_AUTO] = { .name = "auto", .type = YNL_PT_U8, }, - [ETHTOOL_A_FEC_ACTIVE] = { .name = "active", .type = YNL_PT_U32, }, - [ETHTOOL_A_FEC_STATS] = { .name = "stats", .type = YNL_PT_NEST, .nest = ðtool_fec_stat_nest, }, -}; - -struct ynl_policy_nest ethtool_fec_nest = { - .max_attr = ETHTOOL_A_FEC_MAX, - .table = ethtool_fec_policy, -}; - -struct ynl_policy_attr ethtool_module_eeprom_policy[ETHTOOL_A_MODULE_EEPROM_MAX + 1] = { - [ETHTOOL_A_MODULE_EEPROM_HEADER] = { .name = "header", .type = YNL_PT_NEST, .nest = ðtool_header_nest, }, - [ETHTOOL_A_MODULE_EEPROM_OFFSET] = { .name = "offset", .type = YNL_PT_U32, }, - [ETHTOOL_A_MODULE_EEPROM_LENGTH] = { .name = "length", .type = YNL_PT_U32, }, - [ETHTOOL_A_MODULE_EEPROM_PAGE] = { .name = "page", .type = YNL_PT_U8, }, - [ETHTOOL_A_MODULE_EEPROM_BANK] = { .name = "bank", .type = YNL_PT_U8, }, - [ETHTOOL_A_MODULE_EEPROM_I2C_ADDRESS] = { .name = "i2c-address", .type = YNL_PT_U8, }, - [ETHTOOL_A_MODULE_EEPROM_DATA] = { .name = "data", .type = YNL_PT_BINARY,}, -}; - -struct ynl_policy_nest ethtool_module_eeprom_nest = { - .max_attr = ETHTOOL_A_MODULE_EEPROM_MAX, - .table = ethtool_module_eeprom_policy, -}; - -struct ynl_policy_attr ethtool_phc_vclocks_policy[ETHTOOL_A_PHC_VCLOCKS_MAX + 1] = { - [ETHTOOL_A_PHC_VCLOCKS_HEADER] = { .name = "header", .type = YNL_PT_NEST, .nest = ðtool_header_nest, }, - [ETHTOOL_A_PHC_VCLOCKS_NUM] = { .name = "num", .type = YNL_PT_U32, }, - [ETHTOOL_A_PHC_VCLOCKS_INDEX] = { .name = "index", .type = YNL_PT_BINARY,}, -}; - -struct ynl_policy_nest ethtool_phc_vclocks_nest = { - .max_attr = ETHTOOL_A_PHC_VCLOCKS_MAX, - .table = ethtool_phc_vclocks_policy, -}; - -struct ynl_policy_attr ethtool_module_policy[ETHTOOL_A_MODULE_MAX + 1] = { - [ETHTOOL_A_MODULE_HEADER] = { .name = "header", .type = YNL_PT_NEST, .nest = ðtool_header_nest, }, - [ETHTOOL_A_MODULE_POWER_MODE_POLICY] = { .name = "power-mode-policy", .type = YNL_PT_U8, }, - [ETHTOOL_A_MODULE_POWER_MODE] = { .name = "power-mode", .type = YNL_PT_U8, }, -}; - -struct ynl_policy_nest ethtool_module_nest = { - .max_attr = ETHTOOL_A_MODULE_MAX, - .table = ethtool_module_policy, -}; - -struct ynl_policy_attr ethtool_pse_policy[ETHTOOL_A_PSE_MAX + 1] = { - [ETHTOOL_A_PSE_HEADER] = { .name = "header", .type = YNL_PT_NEST, .nest = ðtool_header_nest, }, - [ETHTOOL_A_PODL_PSE_ADMIN_STATE] = { .name = "admin-state", .type = YNL_PT_U32, }, - [ETHTOOL_A_PODL_PSE_ADMIN_CONTROL] = { .name = "admin-control", .type = YNL_PT_U32, }, - [ETHTOOL_A_PODL_PSE_PW_D_STATUS] = { .name = "pw-d-status", .type = YNL_PT_U32, }, -}; - -struct ynl_policy_nest ethtool_pse_nest = { - .max_attr = ETHTOOL_A_PSE_MAX, - .table = ethtool_pse_policy, -}; - -struct ynl_policy_attr ethtool_rss_policy[ETHTOOL_A_RSS_MAX + 1] = { - [ETHTOOL_A_RSS_HEADER] = { .name = "header", .type = YNL_PT_NEST, .nest = ðtool_header_nest, }, - [ETHTOOL_A_RSS_CONTEXT] = { .name = "context", .type = YNL_PT_U32, }, - [ETHTOOL_A_RSS_HFUNC] = { .name = "hfunc", .type = YNL_PT_U32, }, - [ETHTOOL_A_RSS_INDIR] = { .name = "indir", .type = YNL_PT_BINARY,}, - [ETHTOOL_A_RSS_HKEY] = { .name = "hkey", .type = YNL_PT_BINARY,}, -}; - -struct ynl_policy_nest ethtool_rss_nest = { - .max_attr = ETHTOOL_A_RSS_MAX, - .table = ethtool_rss_policy, -}; - -struct ynl_policy_attr ethtool_plca_policy[ETHTOOL_A_PLCA_MAX + 1] = { - [ETHTOOL_A_PLCA_HEADER] = { .name = "header", .type = YNL_PT_NEST, .nest = ðtool_header_nest, }, - [ETHTOOL_A_PLCA_VERSION] = { .name = "version", .type = YNL_PT_U16, }, - [ETHTOOL_A_PLCA_ENABLED] = { .name = "enabled", .type = YNL_PT_U8, }, - [ETHTOOL_A_PLCA_STATUS] = { .name = "status", .type = YNL_PT_U8, }, - [ETHTOOL_A_PLCA_NODE_CNT] = { .name = "node-cnt", .type = YNL_PT_U32, }, - [ETHTOOL_A_PLCA_NODE_ID] = { .name = "node-id", .type = YNL_PT_U32, }, - [ETHTOOL_A_PLCA_TO_TMR] = { .name = "to-tmr", .type = YNL_PT_U32, }, - [ETHTOOL_A_PLCA_BURST_CNT] = { .name = "burst-cnt", .type = YNL_PT_U32, }, - [ETHTOOL_A_PLCA_BURST_TMR] = { .name = "burst-tmr", .type = YNL_PT_U32, }, -}; - -struct ynl_policy_nest ethtool_plca_nest = { - .max_attr = ETHTOOL_A_PLCA_MAX, - .table = ethtool_plca_policy, -}; - -struct ynl_policy_attr ethtool_mm_policy[ETHTOOL_A_MM_MAX + 1] = { - [ETHTOOL_A_MM_HEADER] = { .name = "header", .type = YNL_PT_NEST, .nest = ðtool_header_nest, }, - [ETHTOOL_A_MM_PMAC_ENABLED] = { .name = "pmac-enabled", .type = YNL_PT_U8, }, - [ETHTOOL_A_MM_TX_ENABLED] = { .name = "tx-enabled", .type = YNL_PT_U8, }, - [ETHTOOL_A_MM_TX_ACTIVE] = { .name = "tx-active", .type = YNL_PT_U8, }, - [ETHTOOL_A_MM_TX_MIN_FRAG_SIZE] = { .name = "tx-min-frag-size", .type = YNL_PT_U32, }, - [ETHTOOL_A_MM_RX_MIN_FRAG_SIZE] = { .name = "rx-min-frag-size", .type = YNL_PT_U32, }, - [ETHTOOL_A_MM_VERIFY_ENABLED] = { .name = "verify-enabled", .type = YNL_PT_U8, }, - [ETHTOOL_A_MM_VERIFY_STATUS] = { .name = "verify-status", .type = YNL_PT_U8, }, - [ETHTOOL_A_MM_VERIFY_TIME] = { .name = "verify-time", .type = YNL_PT_U32, }, - [ETHTOOL_A_MM_MAX_VERIFY_TIME] = { .name = "max-verify-time", .type = YNL_PT_U32, }, - [ETHTOOL_A_MM_STATS] = { .name = "stats", .type = YNL_PT_NEST, .nest = ðtool_mm_stat_nest, }, -}; - -struct ynl_policy_nest ethtool_mm_nest = { - .max_attr = ETHTOOL_A_MM_MAX, - .table = ethtool_mm_policy, -}; - -/* Common nested types */ -void ethtool_header_free(struct ethtool_header *obj) -{ - free(obj->dev_name); -} - -int ethtool_header_put(struct nlmsghdr *nlh, unsigned int attr_type, - struct ethtool_header *obj) -{ - struct nlattr *nest; - - nest = mnl_attr_nest_start(nlh, attr_type); - if (obj->_present.dev_index) - mnl_attr_put_u32(nlh, ETHTOOL_A_HEADER_DEV_INDEX, obj->dev_index); - if (obj->_present.dev_name_len) - mnl_attr_put_strz(nlh, ETHTOOL_A_HEADER_DEV_NAME, obj->dev_name); - if (obj->_present.flags) - mnl_attr_put_u32(nlh, ETHTOOL_A_HEADER_FLAGS, obj->flags); - mnl_attr_nest_end(nlh, nest); - - return 0; -} - -int ethtool_header_parse(struct ynl_parse_arg *yarg, - const struct nlattr *nested) -{ - struct ethtool_header *dst = yarg->data; - const struct nlattr *attr; - - mnl_attr_for_each_nested(attr, nested) { - unsigned int type = mnl_attr_get_type(attr); - - if (type == ETHTOOL_A_HEADER_DEV_INDEX) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.dev_index = 1; - dst->dev_index = mnl_attr_get_u32(attr); - } else if (type == ETHTOOL_A_HEADER_DEV_NAME) { - unsigned int len; - - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - - len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr)); - dst->_present.dev_name_len = len; - dst->dev_name = malloc(len + 1); - memcpy(dst->dev_name, mnl_attr_get_str(attr), len); - dst->dev_name[len] = 0; - } else if (type == ETHTOOL_A_HEADER_FLAGS) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.flags = 1; - dst->flags = mnl_attr_get_u32(attr); - } - } - - return 0; -} - -void ethtool_pause_stat_free(struct ethtool_pause_stat *obj) -{ -} - -int ethtool_pause_stat_put(struct nlmsghdr *nlh, unsigned int attr_type, - struct ethtool_pause_stat *obj) -{ - struct nlattr *nest; - - nest = mnl_attr_nest_start(nlh, attr_type); - if (obj->_present.tx_frames) - mnl_attr_put_u64(nlh, ETHTOOL_A_PAUSE_STAT_TX_FRAMES, obj->tx_frames); - if (obj->_present.rx_frames) - mnl_attr_put_u64(nlh, ETHTOOL_A_PAUSE_STAT_RX_FRAMES, obj->rx_frames); - mnl_attr_nest_end(nlh, nest); - - return 0; -} - -int ethtool_pause_stat_parse(struct ynl_parse_arg *yarg, - const struct nlattr *nested) -{ - struct ethtool_pause_stat *dst = yarg->data; - const struct nlattr *attr; - - mnl_attr_for_each_nested(attr, nested) { - unsigned int type = mnl_attr_get_type(attr); - - if (type == ETHTOOL_A_PAUSE_STAT_TX_FRAMES) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.tx_frames = 1; - dst->tx_frames = mnl_attr_get_u64(attr); - } else if (type == ETHTOOL_A_PAUSE_STAT_RX_FRAMES) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.rx_frames = 1; - dst->rx_frames = mnl_attr_get_u64(attr); - } - } - - return 0; -} - -void ethtool_cable_test_tdr_cfg_free(struct ethtool_cable_test_tdr_cfg *obj) -{ -} - -void ethtool_fec_stat_free(struct ethtool_fec_stat *obj) -{ - free(obj->corrected); - free(obj->uncorr); - free(obj->corr_bits); -} - -int ethtool_fec_stat_put(struct nlmsghdr *nlh, unsigned int attr_type, - struct ethtool_fec_stat *obj) -{ - struct nlattr *nest; - - nest = mnl_attr_nest_start(nlh, attr_type); - if (obj->_present.corrected_len) - mnl_attr_put(nlh, ETHTOOL_A_FEC_STAT_CORRECTED, obj->_present.corrected_len, obj->corrected); - if (obj->_present.uncorr_len) - mnl_attr_put(nlh, ETHTOOL_A_FEC_STAT_UNCORR, obj->_present.uncorr_len, obj->uncorr); - if (obj->_present.corr_bits_len) - mnl_attr_put(nlh, ETHTOOL_A_FEC_STAT_CORR_BITS, obj->_present.corr_bits_len, obj->corr_bits); - mnl_attr_nest_end(nlh, nest); - - return 0; -} - -int ethtool_fec_stat_parse(struct ynl_parse_arg *yarg, - const struct nlattr *nested) -{ - struct ethtool_fec_stat *dst = yarg->data; - const struct nlattr *attr; - - mnl_attr_for_each_nested(attr, nested) { - unsigned int type = mnl_attr_get_type(attr); - - if (type == ETHTOOL_A_FEC_STAT_CORRECTED) { - unsigned int len; - - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - - len = mnl_attr_get_payload_len(attr); - dst->_present.corrected_len = len; - dst->corrected = malloc(len); - memcpy(dst->corrected, mnl_attr_get_payload(attr), len); - } else if (type == ETHTOOL_A_FEC_STAT_UNCORR) { - unsigned int len; - - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - - len = mnl_attr_get_payload_len(attr); - dst->_present.uncorr_len = len; - dst->uncorr = malloc(len); - memcpy(dst->uncorr, mnl_attr_get_payload(attr), len); - } else if (type == ETHTOOL_A_FEC_STAT_CORR_BITS) { - unsigned int len; - - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - - len = mnl_attr_get_payload_len(attr); - dst->_present.corr_bits_len = len; - dst->corr_bits = malloc(len); - memcpy(dst->corr_bits, mnl_attr_get_payload(attr), len); - } - } - - return 0; -} - -void ethtool_mm_stat_free(struct ethtool_mm_stat *obj) -{ -} - -int ethtool_mm_stat_parse(struct ynl_parse_arg *yarg, - const struct nlattr *nested) -{ - struct ethtool_mm_stat *dst = yarg->data; - const struct nlattr *attr; - - mnl_attr_for_each_nested(attr, nested) { - unsigned int type = mnl_attr_get_type(attr); - - if (type == ETHTOOL_A_MM_STAT_REASSEMBLY_ERRORS) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.reassembly_errors = 1; - dst->reassembly_errors = mnl_attr_get_u64(attr); - } else if (type == ETHTOOL_A_MM_STAT_SMD_ERRORS) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.smd_errors = 1; - dst->smd_errors = mnl_attr_get_u64(attr); - } else if (type == ETHTOOL_A_MM_STAT_REASSEMBLY_OK) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.reassembly_ok = 1; - dst->reassembly_ok = mnl_attr_get_u64(attr); - } else if (type == ETHTOOL_A_MM_STAT_RX_FRAG_COUNT) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.rx_frag_count = 1; - dst->rx_frag_count = mnl_attr_get_u64(attr); - } else if (type == ETHTOOL_A_MM_STAT_TX_FRAG_COUNT) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.tx_frag_count = 1; - dst->tx_frag_count = mnl_attr_get_u64(attr); - } else if (type == ETHTOOL_A_MM_STAT_HOLD_COUNT) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.hold_count = 1; - dst->hold_count = mnl_attr_get_u64(attr); - } - } - - return 0; -} - -void ethtool_cable_result_free(struct ethtool_cable_result *obj) -{ -} - -int ethtool_cable_result_parse(struct ynl_parse_arg *yarg, - const struct nlattr *nested) -{ - struct ethtool_cable_result *dst = yarg->data; - const struct nlattr *attr; - - mnl_attr_for_each_nested(attr, nested) { - unsigned int type = mnl_attr_get_type(attr); - - if (type == ETHTOOL_A_CABLE_RESULT_PAIR) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.pair = 1; - dst->pair = mnl_attr_get_u8(attr); - } else if (type == ETHTOOL_A_CABLE_RESULT_CODE) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.code = 1; - dst->code = mnl_attr_get_u8(attr); - } - } - - return 0; -} - -void ethtool_cable_fault_length_free(struct ethtool_cable_fault_length *obj) -{ -} - -int ethtool_cable_fault_length_parse(struct ynl_parse_arg *yarg, - const struct nlattr *nested) -{ - struct ethtool_cable_fault_length *dst = yarg->data; - const struct nlattr *attr; - - mnl_attr_for_each_nested(attr, nested) { - unsigned int type = mnl_attr_get_type(attr); - - if (type == ETHTOOL_A_CABLE_FAULT_LENGTH_PAIR) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.pair = 1; - dst->pair = mnl_attr_get_u8(attr); - } else if (type == ETHTOOL_A_CABLE_FAULT_LENGTH_CM) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.cm = 1; - dst->cm = mnl_attr_get_u32(attr); - } - } - - return 0; -} - -void ethtool_bitset_bit_free(struct ethtool_bitset_bit *obj) -{ - free(obj->name); -} - -int ethtool_bitset_bit_put(struct nlmsghdr *nlh, unsigned int attr_type, - struct ethtool_bitset_bit *obj) -{ - struct nlattr *nest; - - nest = mnl_attr_nest_start(nlh, attr_type); - if (obj->_present.index) - mnl_attr_put_u32(nlh, ETHTOOL_A_BITSET_BIT_INDEX, obj->index); - if (obj->_present.name_len) - mnl_attr_put_strz(nlh, ETHTOOL_A_BITSET_BIT_NAME, obj->name); - if (obj->_present.value) - mnl_attr_put(nlh, ETHTOOL_A_BITSET_BIT_VALUE, 0, NULL); - mnl_attr_nest_end(nlh, nest); - - return 0; -} - -int ethtool_bitset_bit_parse(struct ynl_parse_arg *yarg, - const struct nlattr *nested) -{ - struct ethtool_bitset_bit *dst = yarg->data; - const struct nlattr *attr; - - mnl_attr_for_each_nested(attr, nested) { - unsigned int type = mnl_attr_get_type(attr); - - if (type == ETHTOOL_A_BITSET_BIT_INDEX) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.index = 1; - dst->index = mnl_attr_get_u32(attr); - } else if (type == ETHTOOL_A_BITSET_BIT_NAME) { - unsigned int len; - - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - - len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr)); - dst->_present.name_len = len; - dst->name = malloc(len + 1); - memcpy(dst->name, mnl_attr_get_str(attr), len); - dst->name[len] = 0; - } else if (type == ETHTOOL_A_BITSET_BIT_VALUE) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.value = 1; - } - } - - return 0; -} - -void ethtool_tunnel_udp_entry_free(struct ethtool_tunnel_udp_entry *obj) -{ -} - -int ethtool_tunnel_udp_entry_parse(struct ynl_parse_arg *yarg, - const struct nlattr *nested) -{ - struct ethtool_tunnel_udp_entry *dst = yarg->data; - const struct nlattr *attr; - - mnl_attr_for_each_nested(attr, nested) { - unsigned int type = mnl_attr_get_type(attr); - - if (type == ETHTOOL_A_TUNNEL_UDP_ENTRY_PORT) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.port = 1; - dst->port = mnl_attr_get_u16(attr); - } else if (type == ETHTOOL_A_TUNNEL_UDP_ENTRY_TYPE) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.type = 1; - dst->type = mnl_attr_get_u32(attr); - } - } - - return 0; -} - -void ethtool_string_free(struct ethtool_string *obj) -{ - free(obj->value); -} - -int ethtool_string_put(struct nlmsghdr *nlh, unsigned int attr_type, - struct ethtool_string *obj) -{ - struct nlattr *nest; - - nest = mnl_attr_nest_start(nlh, attr_type); - if (obj->_present.index) - mnl_attr_put_u32(nlh, ETHTOOL_A_STRING_INDEX, obj->index); - if (obj->_present.value_len) - mnl_attr_put_strz(nlh, ETHTOOL_A_STRING_VALUE, obj->value); - mnl_attr_nest_end(nlh, nest); - - return 0; -} - -int ethtool_string_parse(struct ynl_parse_arg *yarg, - const struct nlattr *nested) -{ - struct ethtool_string *dst = yarg->data; - const struct nlattr *attr; - - mnl_attr_for_each_nested(attr, nested) { - unsigned int type = mnl_attr_get_type(attr); - - if (type == ETHTOOL_A_STRING_INDEX) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.index = 1; - dst->index = mnl_attr_get_u32(attr); - } else if (type == ETHTOOL_A_STRING_VALUE) { - unsigned int len; - - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - - len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr)); - dst->_present.value_len = len; - dst->value = malloc(len + 1); - memcpy(dst->value, mnl_attr_get_str(attr), len); - dst->value[len] = 0; - } - } - - return 0; -} - -void ethtool_cable_nest_free(struct ethtool_cable_nest *obj) -{ - ethtool_cable_result_free(&obj->result); - ethtool_cable_fault_length_free(&obj->fault_length); -} - -int ethtool_cable_nest_parse(struct ynl_parse_arg *yarg, - const struct nlattr *nested) -{ - struct ethtool_cable_nest *dst = yarg->data; - const struct nlattr *attr; - struct ynl_parse_arg parg; - - parg.ys = yarg->ys; - - mnl_attr_for_each_nested(attr, nested) { - unsigned int type = mnl_attr_get_type(attr); - - if (type == ETHTOOL_A_CABLE_NEST_RESULT) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.result = 1; - - parg.rsp_policy = ðtool_cable_result_nest; - parg.data = &dst->result; - if (ethtool_cable_result_parse(&parg, attr)) - return MNL_CB_ERROR; - } else if (type == ETHTOOL_A_CABLE_NEST_FAULT_LENGTH) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.fault_length = 1; - - parg.rsp_policy = ðtool_cable_fault_length_nest; - parg.data = &dst->fault_length; - if (ethtool_cable_fault_length_parse(&parg, attr)) - return MNL_CB_ERROR; - } - } - - return 0; -} - -void ethtool_bitset_bits_free(struct ethtool_bitset_bits *obj) -{ - unsigned int i; - - for (i = 0; i < obj->n_bit; i++) - ethtool_bitset_bit_free(&obj->bit[i]); - free(obj->bit); -} - -int ethtool_bitset_bits_put(struct nlmsghdr *nlh, unsigned int attr_type, - struct ethtool_bitset_bits *obj) -{ - struct nlattr *nest; - - nest = mnl_attr_nest_start(nlh, attr_type); - for (unsigned int i = 0; i < obj->n_bit; i++) - ethtool_bitset_bit_put(nlh, ETHTOOL_A_BITSET_BITS_BIT, &obj->bit[i]); - mnl_attr_nest_end(nlh, nest); - - return 0; -} - -int ethtool_bitset_bits_parse(struct ynl_parse_arg *yarg, - const struct nlattr *nested) -{ - struct ethtool_bitset_bits *dst = yarg->data; - const struct nlattr *attr; - struct ynl_parse_arg parg; - unsigned int n_bit = 0; - int i; - - parg.ys = yarg->ys; - - if (dst->bit) - return ynl_error_parse(yarg, "attribute already present (bitset-bits.bit)"); - - mnl_attr_for_each_nested(attr, nested) { - unsigned int type = mnl_attr_get_type(attr); - - if (type == ETHTOOL_A_BITSET_BITS_BIT) { - n_bit++; - } - } - - if (n_bit) { - dst->bit = calloc(n_bit, sizeof(*dst->bit)); - dst->n_bit = n_bit; - i = 0; - parg.rsp_policy = ðtool_bitset_bit_nest; - mnl_attr_for_each_nested(attr, nested) { - if (mnl_attr_get_type(attr) == ETHTOOL_A_BITSET_BITS_BIT) { - parg.data = &dst->bit[i]; - if (ethtool_bitset_bit_parse(&parg, attr)) - return MNL_CB_ERROR; - i++; - } - } - } - - return 0; -} - -void ethtool_strings_free(struct ethtool_strings *obj) -{ - unsigned int i; - - for (i = 0; i < obj->n_string; i++) - ethtool_string_free(&obj->string[i]); - free(obj->string); -} - -int ethtool_strings_put(struct nlmsghdr *nlh, unsigned int attr_type, - struct ethtool_strings *obj) -{ - struct nlattr *nest; - - nest = mnl_attr_nest_start(nlh, attr_type); - for (unsigned int i = 0; i < obj->n_string; i++) - ethtool_string_put(nlh, ETHTOOL_A_STRINGS_STRING, &obj->string[i]); - mnl_attr_nest_end(nlh, nest); - - return 0; -} - -int ethtool_strings_parse(struct ynl_parse_arg *yarg, - const struct nlattr *nested) -{ - struct ethtool_strings *dst = yarg->data; - const struct nlattr *attr; - struct ynl_parse_arg parg; - unsigned int n_string = 0; - int i; - - parg.ys = yarg->ys; - - if (dst->string) - return ynl_error_parse(yarg, "attribute already present (strings.string)"); - - mnl_attr_for_each_nested(attr, nested) { - unsigned int type = mnl_attr_get_type(attr); - - if (type == ETHTOOL_A_STRINGS_STRING) { - n_string++; - } - } - - if (n_string) { - dst->string = calloc(n_string, sizeof(*dst->string)); - dst->n_string = n_string; - i = 0; - parg.rsp_policy = ðtool_string_nest; - mnl_attr_for_each_nested(attr, nested) { - if (mnl_attr_get_type(attr) == ETHTOOL_A_STRINGS_STRING) { - parg.data = &dst->string[i]; - if (ethtool_string_parse(&parg, attr)) - return MNL_CB_ERROR; - i++; - } - } - } - - return 0; -} - -void ethtool_bitset_free(struct ethtool_bitset *obj) -{ - ethtool_bitset_bits_free(&obj->bits); -} - -int ethtool_bitset_put(struct nlmsghdr *nlh, unsigned int attr_type, - struct ethtool_bitset *obj) -{ - struct nlattr *nest; - - nest = mnl_attr_nest_start(nlh, attr_type); - if (obj->_present.nomask) - mnl_attr_put(nlh, ETHTOOL_A_BITSET_NOMASK, 0, NULL); - if (obj->_present.size) - mnl_attr_put_u32(nlh, ETHTOOL_A_BITSET_SIZE, obj->size); - if (obj->_present.bits) - ethtool_bitset_bits_put(nlh, ETHTOOL_A_BITSET_BITS, &obj->bits); - mnl_attr_nest_end(nlh, nest); - - return 0; -} - -int ethtool_bitset_parse(struct ynl_parse_arg *yarg, - const struct nlattr *nested) -{ - struct ethtool_bitset *dst = yarg->data; - const struct nlattr *attr; - struct ynl_parse_arg parg; - - parg.ys = yarg->ys; - - mnl_attr_for_each_nested(attr, nested) { - unsigned int type = mnl_attr_get_type(attr); - - if (type == ETHTOOL_A_BITSET_NOMASK) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.nomask = 1; - } else if (type == ETHTOOL_A_BITSET_SIZE) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.size = 1; - dst->size = mnl_attr_get_u32(attr); - } else if (type == ETHTOOL_A_BITSET_BITS) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.bits = 1; - - parg.rsp_policy = ðtool_bitset_bits_nest; - parg.data = &dst->bits; - if (ethtool_bitset_bits_parse(&parg, attr)) - return MNL_CB_ERROR; - } - } - - return 0; -} - -void ethtool_stringset_free(struct ethtool_stringset_ *obj) -{ - unsigned int i; - - for (i = 0; i < obj->n_strings; i++) - ethtool_strings_free(&obj->strings[i]); - free(obj->strings); -} - -int ethtool_stringset_put(struct nlmsghdr *nlh, unsigned int attr_type, - struct ethtool_stringset_ *obj) -{ - struct nlattr *nest; - - nest = mnl_attr_nest_start(nlh, attr_type); - if (obj->_present.id) - mnl_attr_put_u32(nlh, ETHTOOL_A_STRINGSET_ID, obj->id); - if (obj->_present.count) - mnl_attr_put_u32(nlh, ETHTOOL_A_STRINGSET_COUNT, obj->count); - for (unsigned int i = 0; i < obj->n_strings; i++) - ethtool_strings_put(nlh, ETHTOOL_A_STRINGSET_STRINGS, &obj->strings[i]); - mnl_attr_nest_end(nlh, nest); - - return 0; -} - -int ethtool_stringset_parse(struct ynl_parse_arg *yarg, - const struct nlattr *nested) -{ - struct ethtool_stringset_ *dst = yarg->data; - unsigned int n_strings = 0; - const struct nlattr *attr; - struct ynl_parse_arg parg; - int i; - - parg.ys = yarg->ys; - - if (dst->strings) - return ynl_error_parse(yarg, "attribute already present (stringset.strings)"); - - mnl_attr_for_each_nested(attr, nested) { - unsigned int type = mnl_attr_get_type(attr); - - if (type == ETHTOOL_A_STRINGSET_ID) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.id = 1; - dst->id = mnl_attr_get_u32(attr); - } else if (type == ETHTOOL_A_STRINGSET_COUNT) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.count = 1; - dst->count = mnl_attr_get_u32(attr); - } else if (type == ETHTOOL_A_STRINGSET_STRINGS) { - n_strings++; - } - } - - if (n_strings) { - dst->strings = calloc(n_strings, sizeof(*dst->strings)); - dst->n_strings = n_strings; - i = 0; - parg.rsp_policy = ðtool_strings_nest; - mnl_attr_for_each_nested(attr, nested) { - if (mnl_attr_get_type(attr) == ETHTOOL_A_STRINGSET_STRINGS) { - parg.data = &dst->strings[i]; - if (ethtool_strings_parse(&parg, attr)) - return MNL_CB_ERROR; - i++; - } - } - } - - return 0; -} - -void ethtool_tunnel_udp_table_free(struct ethtool_tunnel_udp_table *obj) -{ - unsigned int i; - - ethtool_bitset_free(&obj->types); - for (i = 0; i < obj->n_entry; i++) - ethtool_tunnel_udp_entry_free(&obj->entry[i]); - free(obj->entry); -} - -int ethtool_tunnel_udp_table_parse(struct ynl_parse_arg *yarg, - const struct nlattr *nested) -{ - struct ethtool_tunnel_udp_table *dst = yarg->data; - const struct nlattr *attr; - struct ynl_parse_arg parg; - unsigned int n_entry = 0; - int i; - - parg.ys = yarg->ys; - - if (dst->entry) - return ynl_error_parse(yarg, "attribute already present (tunnel-udp-table.entry)"); - - mnl_attr_for_each_nested(attr, nested) { - unsigned int type = mnl_attr_get_type(attr); - - if (type == ETHTOOL_A_TUNNEL_UDP_TABLE_SIZE) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.size = 1; - dst->size = mnl_attr_get_u32(attr); - } else if (type == ETHTOOL_A_TUNNEL_UDP_TABLE_TYPES) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.types = 1; - - parg.rsp_policy = ðtool_bitset_nest; - parg.data = &dst->types; - if (ethtool_bitset_parse(&parg, attr)) - return MNL_CB_ERROR; - } else if (type == ETHTOOL_A_TUNNEL_UDP_TABLE_ENTRY) { - n_entry++; - } - } - - if (n_entry) { - dst->entry = calloc(n_entry, sizeof(*dst->entry)); - dst->n_entry = n_entry; - i = 0; - parg.rsp_policy = ðtool_tunnel_udp_entry_nest; - mnl_attr_for_each_nested(attr, nested) { - if (mnl_attr_get_type(attr) == ETHTOOL_A_TUNNEL_UDP_TABLE_ENTRY) { - parg.data = &dst->entry[i]; - if (ethtool_tunnel_udp_entry_parse(&parg, attr)) - return MNL_CB_ERROR; - i++; - } - } - } - - return 0; -} - -void ethtool_stringsets_free(struct ethtool_stringsets *obj) -{ - unsigned int i; - - for (i = 0; i < obj->n_stringset; i++) - ethtool_stringset_free(&obj->stringset[i]); - free(obj->stringset); -} - -int ethtool_stringsets_put(struct nlmsghdr *nlh, unsigned int attr_type, - struct ethtool_stringsets *obj) -{ - struct nlattr *nest; - - nest = mnl_attr_nest_start(nlh, attr_type); - for (unsigned int i = 0; i < obj->n_stringset; i++) - ethtool_stringset_put(nlh, ETHTOOL_A_STRINGSETS_STRINGSET, &obj->stringset[i]); - mnl_attr_nest_end(nlh, nest); - - return 0; -} - -int ethtool_stringsets_parse(struct ynl_parse_arg *yarg, - const struct nlattr *nested) -{ - struct ethtool_stringsets *dst = yarg->data; - unsigned int n_stringset = 0; - const struct nlattr *attr; - struct ynl_parse_arg parg; - int i; - - parg.ys = yarg->ys; - - if (dst->stringset) - return ynl_error_parse(yarg, "attribute already present (stringsets.stringset)"); - - mnl_attr_for_each_nested(attr, nested) { - unsigned int type = mnl_attr_get_type(attr); - - if (type == ETHTOOL_A_STRINGSETS_STRINGSET) { - n_stringset++; - } - } - - if (n_stringset) { - dst->stringset = calloc(n_stringset, sizeof(*dst->stringset)); - dst->n_stringset = n_stringset; - i = 0; - parg.rsp_policy = ðtool_stringset_nest; - mnl_attr_for_each_nested(attr, nested) { - if (mnl_attr_get_type(attr) == ETHTOOL_A_STRINGSETS_STRINGSET) { - parg.data = &dst->stringset[i]; - if (ethtool_stringset_parse(&parg, attr)) - return MNL_CB_ERROR; - i++; - } - } - } - - return 0; -} - -void ethtool_tunnel_udp_free(struct ethtool_tunnel_udp *obj) -{ - ethtool_tunnel_udp_table_free(&obj->table); -} - -int ethtool_tunnel_udp_parse(struct ynl_parse_arg *yarg, - const struct nlattr *nested) -{ - struct ethtool_tunnel_udp *dst = yarg->data; - const struct nlattr *attr; - struct ynl_parse_arg parg; - - parg.ys = yarg->ys; - - mnl_attr_for_each_nested(attr, nested) { - unsigned int type = mnl_attr_get_type(attr); - - if (type == ETHTOOL_A_TUNNEL_UDP_TABLE) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.table = 1; - - parg.rsp_policy = ðtool_tunnel_udp_table_nest; - parg.data = &dst->table; - if (ethtool_tunnel_udp_table_parse(&parg, attr)) - return MNL_CB_ERROR; - } - } - - return 0; -} - -/* ============== ETHTOOL_MSG_STRSET_GET ============== */ -/* ETHTOOL_MSG_STRSET_GET - do */ -void ethtool_strset_get_req_free(struct ethtool_strset_get_req *req) -{ - ethtool_header_free(&req->header); - ethtool_stringsets_free(&req->stringsets); - free(req); -} - -void ethtool_strset_get_rsp_free(struct ethtool_strset_get_rsp *rsp) -{ - ethtool_header_free(&rsp->header); - ethtool_stringsets_free(&rsp->stringsets); - free(rsp); -} - -int ethtool_strset_get_rsp_parse(const struct nlmsghdr *nlh, void *data) -{ - struct ethtool_strset_get_rsp *dst; - struct ynl_parse_arg *yarg = data; - const struct nlattr *attr; - struct ynl_parse_arg parg; - - dst = yarg->data; - parg.ys = yarg->ys; - - mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) { - unsigned int type = mnl_attr_get_type(attr); - - if (type == ETHTOOL_A_STRSET_HEADER) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.header = 1; - - parg.rsp_policy = ðtool_header_nest; - parg.data = &dst->header; - if (ethtool_header_parse(&parg, attr)) - return MNL_CB_ERROR; - } else if (type == ETHTOOL_A_STRSET_STRINGSETS) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.stringsets = 1; - - parg.rsp_policy = ðtool_stringsets_nest; - parg.data = &dst->stringsets; - if (ethtool_stringsets_parse(&parg, attr)) - return MNL_CB_ERROR; - } - } - - return MNL_CB_OK; -} - -struct ethtool_strset_get_rsp * -ethtool_strset_get(struct ynl_sock *ys, struct ethtool_strset_get_req *req) -{ - struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; - struct ethtool_strset_get_rsp *rsp; - struct nlmsghdr *nlh; - int err; - - nlh = ynl_gemsg_start_req(ys, ys->family_id, ETHTOOL_MSG_STRSET_GET, 1); - ys->req_policy = ðtool_strset_nest; - yrs.yarg.rsp_policy = ðtool_strset_nest; - - if (req->_present.header) - ethtool_header_put(nlh, ETHTOOL_A_STRSET_HEADER, &req->header); - if (req->_present.stringsets) - ethtool_stringsets_put(nlh, ETHTOOL_A_STRSET_STRINGSETS, &req->stringsets); - if (req->_present.counts_only) - mnl_attr_put(nlh, ETHTOOL_A_STRSET_COUNTS_ONLY, 0, NULL); - - rsp = calloc(1, sizeof(*rsp)); - yrs.yarg.data = rsp; - yrs.cb = ethtool_strset_get_rsp_parse; - yrs.rsp_cmd = ETHTOOL_MSG_STRSET_GET; - - err = ynl_exec(ys, nlh, &yrs); - if (err < 0) - goto err_free; - - return rsp; - -err_free: - ethtool_strset_get_rsp_free(rsp); - return NULL; -} - -/* ETHTOOL_MSG_STRSET_GET - dump */ -void ethtool_strset_get_list_free(struct ethtool_strset_get_list *rsp) -{ - struct ethtool_strset_get_list *next = rsp; - - while ((void *)next != YNL_LIST_END) { - rsp = next; - next = rsp->next; - - ethtool_header_free(&rsp->obj.header); - ethtool_stringsets_free(&rsp->obj.stringsets); - free(rsp); - } -} - -struct ethtool_strset_get_list * -ethtool_strset_get_dump(struct ynl_sock *ys, - struct ethtool_strset_get_req_dump *req) -{ - struct ynl_dump_state yds = {}; - struct nlmsghdr *nlh; - int err; - - yds.ys = ys; - yds.alloc_sz = sizeof(struct ethtool_strset_get_list); - yds.cb = ethtool_strset_get_rsp_parse; - yds.rsp_cmd = ETHTOOL_MSG_STRSET_GET; - yds.rsp_policy = ðtool_strset_nest; - - nlh = ynl_gemsg_start_dump(ys, ys->family_id, ETHTOOL_MSG_STRSET_GET, 1); - ys->req_policy = ðtool_strset_nest; - - if (req->_present.header) - ethtool_header_put(nlh, ETHTOOL_A_STRSET_HEADER, &req->header); - if (req->_present.stringsets) - ethtool_stringsets_put(nlh, ETHTOOL_A_STRSET_STRINGSETS, &req->stringsets); - if (req->_present.counts_only) - mnl_attr_put(nlh, ETHTOOL_A_STRSET_COUNTS_ONLY, 0, NULL); - - err = ynl_exec_dump(ys, nlh, &yds); - if (err < 0) - goto free_list; - - return yds.first; - -free_list: - ethtool_strset_get_list_free(yds.first); - return NULL; -} - -/* ============== ETHTOOL_MSG_LINKINFO_GET ============== */ -/* ETHTOOL_MSG_LINKINFO_GET - do */ -void ethtool_linkinfo_get_req_free(struct ethtool_linkinfo_get_req *req) -{ - ethtool_header_free(&req->header); - free(req); -} - -void ethtool_linkinfo_get_rsp_free(struct ethtool_linkinfo_get_rsp *rsp) -{ - ethtool_header_free(&rsp->header); - free(rsp); -} - -int ethtool_linkinfo_get_rsp_parse(const struct nlmsghdr *nlh, void *data) -{ - struct ethtool_linkinfo_get_rsp *dst; - struct ynl_parse_arg *yarg = data; - const struct nlattr *attr; - struct ynl_parse_arg parg; - - dst = yarg->data; - parg.ys = yarg->ys; - - mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) { - unsigned int type = mnl_attr_get_type(attr); - - if (type == ETHTOOL_A_LINKINFO_HEADER) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.header = 1; - - parg.rsp_policy = ðtool_header_nest; - parg.data = &dst->header; - if (ethtool_header_parse(&parg, attr)) - return MNL_CB_ERROR; - } else if (type == ETHTOOL_A_LINKINFO_PORT) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.port = 1; - dst->port = mnl_attr_get_u8(attr); - } else if (type == ETHTOOL_A_LINKINFO_PHYADDR) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.phyaddr = 1; - dst->phyaddr = mnl_attr_get_u8(attr); - } else if (type == ETHTOOL_A_LINKINFO_TP_MDIX) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.tp_mdix = 1; - dst->tp_mdix = mnl_attr_get_u8(attr); - } else if (type == ETHTOOL_A_LINKINFO_TP_MDIX_CTRL) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.tp_mdix_ctrl = 1; - dst->tp_mdix_ctrl = mnl_attr_get_u8(attr); - } else if (type == ETHTOOL_A_LINKINFO_TRANSCEIVER) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.transceiver = 1; - dst->transceiver = mnl_attr_get_u8(attr); - } - } - - return MNL_CB_OK; -} - -struct ethtool_linkinfo_get_rsp * -ethtool_linkinfo_get(struct ynl_sock *ys, struct ethtool_linkinfo_get_req *req) -{ - struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; - struct ethtool_linkinfo_get_rsp *rsp; - struct nlmsghdr *nlh; - int err; - - nlh = ynl_gemsg_start_req(ys, ys->family_id, ETHTOOL_MSG_LINKINFO_GET, 1); - ys->req_policy = ðtool_linkinfo_nest; - yrs.yarg.rsp_policy = ðtool_linkinfo_nest; - - if (req->_present.header) - ethtool_header_put(nlh, ETHTOOL_A_LINKINFO_HEADER, &req->header); - - rsp = calloc(1, sizeof(*rsp)); - yrs.yarg.data = rsp; - yrs.cb = ethtool_linkinfo_get_rsp_parse; - yrs.rsp_cmd = ETHTOOL_MSG_LINKINFO_GET; - - err = ynl_exec(ys, nlh, &yrs); - if (err < 0) - goto err_free; - - return rsp; - -err_free: - ethtool_linkinfo_get_rsp_free(rsp); - return NULL; -} - -/* ETHTOOL_MSG_LINKINFO_GET - dump */ -void ethtool_linkinfo_get_list_free(struct ethtool_linkinfo_get_list *rsp) -{ - struct ethtool_linkinfo_get_list *next = rsp; - - while ((void *)next != YNL_LIST_END) { - rsp = next; - next = rsp->next; - - ethtool_header_free(&rsp->obj.header); - free(rsp); - } -} - -struct ethtool_linkinfo_get_list * -ethtool_linkinfo_get_dump(struct ynl_sock *ys, - struct ethtool_linkinfo_get_req_dump *req) -{ - struct ynl_dump_state yds = {}; - struct nlmsghdr *nlh; - int err; - - yds.ys = ys; - yds.alloc_sz = sizeof(struct ethtool_linkinfo_get_list); - yds.cb = ethtool_linkinfo_get_rsp_parse; - yds.rsp_cmd = ETHTOOL_MSG_LINKINFO_GET; - yds.rsp_policy = ðtool_linkinfo_nest; - - nlh = ynl_gemsg_start_dump(ys, ys->family_id, ETHTOOL_MSG_LINKINFO_GET, 1); - ys->req_policy = ðtool_linkinfo_nest; - - if (req->_present.header) - ethtool_header_put(nlh, ETHTOOL_A_LINKINFO_HEADER, &req->header); - - err = ynl_exec_dump(ys, nlh, &yds); - if (err < 0) - goto free_list; - - return yds.first; - -free_list: - ethtool_linkinfo_get_list_free(yds.first); - return NULL; -} - -/* ETHTOOL_MSG_LINKINFO_GET - notify */ -void ethtool_linkinfo_get_ntf_free(struct ethtool_linkinfo_get_ntf *rsp) -{ - ethtool_header_free(&rsp->obj.header); - free(rsp); -} - -/* ============== ETHTOOL_MSG_LINKINFO_SET ============== */ -/* ETHTOOL_MSG_LINKINFO_SET - do */ -void ethtool_linkinfo_set_req_free(struct ethtool_linkinfo_set_req *req) -{ - ethtool_header_free(&req->header); - free(req); -} - -int ethtool_linkinfo_set(struct ynl_sock *ys, - struct ethtool_linkinfo_set_req *req) -{ - struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; - struct nlmsghdr *nlh; - int err; - - nlh = ynl_gemsg_start_req(ys, ys->family_id, ETHTOOL_MSG_LINKINFO_SET, 1); - ys->req_policy = ðtool_linkinfo_nest; - - if (req->_present.header) - ethtool_header_put(nlh, ETHTOOL_A_LINKINFO_HEADER, &req->header); - if (req->_present.port) - mnl_attr_put_u8(nlh, ETHTOOL_A_LINKINFO_PORT, req->port); - if (req->_present.phyaddr) - mnl_attr_put_u8(nlh, ETHTOOL_A_LINKINFO_PHYADDR, req->phyaddr); - if (req->_present.tp_mdix) - mnl_attr_put_u8(nlh, ETHTOOL_A_LINKINFO_TP_MDIX, req->tp_mdix); - if (req->_present.tp_mdix_ctrl) - mnl_attr_put_u8(nlh, ETHTOOL_A_LINKINFO_TP_MDIX_CTRL, req->tp_mdix_ctrl); - if (req->_present.transceiver) - mnl_attr_put_u8(nlh, ETHTOOL_A_LINKINFO_TRANSCEIVER, req->transceiver); - - err = ynl_exec(ys, nlh, &yrs); - if (err < 0) - return -1; - - return 0; -} - -/* ============== ETHTOOL_MSG_LINKMODES_GET ============== */ -/* ETHTOOL_MSG_LINKMODES_GET - do */ -void ethtool_linkmodes_get_req_free(struct ethtool_linkmodes_get_req *req) -{ - ethtool_header_free(&req->header); - free(req); -} - -void ethtool_linkmodes_get_rsp_free(struct ethtool_linkmodes_get_rsp *rsp) -{ - ethtool_header_free(&rsp->header); - ethtool_bitset_free(&rsp->ours); - ethtool_bitset_free(&rsp->peer); - free(rsp); -} - -int ethtool_linkmodes_get_rsp_parse(const struct nlmsghdr *nlh, void *data) -{ - struct ethtool_linkmodes_get_rsp *dst; - struct ynl_parse_arg *yarg = data; - const struct nlattr *attr; - struct ynl_parse_arg parg; - - dst = yarg->data; - parg.ys = yarg->ys; - - mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) { - unsigned int type = mnl_attr_get_type(attr); - - if (type == ETHTOOL_A_LINKMODES_HEADER) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.header = 1; - - parg.rsp_policy = ðtool_header_nest; - parg.data = &dst->header; - if (ethtool_header_parse(&parg, attr)) - return MNL_CB_ERROR; - } else if (type == ETHTOOL_A_LINKMODES_AUTONEG) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.autoneg = 1; - dst->autoneg = mnl_attr_get_u8(attr); - } else if (type == ETHTOOL_A_LINKMODES_OURS) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.ours = 1; - - parg.rsp_policy = ðtool_bitset_nest; - parg.data = &dst->ours; - if (ethtool_bitset_parse(&parg, attr)) - return MNL_CB_ERROR; - } else if (type == ETHTOOL_A_LINKMODES_PEER) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.peer = 1; - - parg.rsp_policy = ðtool_bitset_nest; - parg.data = &dst->peer; - if (ethtool_bitset_parse(&parg, attr)) - return MNL_CB_ERROR; - } else if (type == ETHTOOL_A_LINKMODES_SPEED) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.speed = 1; - dst->speed = mnl_attr_get_u32(attr); - } else if (type == ETHTOOL_A_LINKMODES_DUPLEX) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.duplex = 1; - dst->duplex = mnl_attr_get_u8(attr); - } else if (type == ETHTOOL_A_LINKMODES_MASTER_SLAVE_CFG) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.master_slave_cfg = 1; - dst->master_slave_cfg = mnl_attr_get_u8(attr); - } else if (type == ETHTOOL_A_LINKMODES_MASTER_SLAVE_STATE) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.master_slave_state = 1; - dst->master_slave_state = mnl_attr_get_u8(attr); - } else if (type == ETHTOOL_A_LINKMODES_LANES) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.lanes = 1; - dst->lanes = mnl_attr_get_u32(attr); - } else if (type == ETHTOOL_A_LINKMODES_RATE_MATCHING) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.rate_matching = 1; - dst->rate_matching = mnl_attr_get_u8(attr); - } - } - - return MNL_CB_OK; -} - -struct ethtool_linkmodes_get_rsp * -ethtool_linkmodes_get(struct ynl_sock *ys, - struct ethtool_linkmodes_get_req *req) -{ - struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; - struct ethtool_linkmodes_get_rsp *rsp; - struct nlmsghdr *nlh; - int err; - - nlh = ynl_gemsg_start_req(ys, ys->family_id, ETHTOOL_MSG_LINKMODES_GET, 1); - ys->req_policy = ðtool_linkmodes_nest; - yrs.yarg.rsp_policy = ðtool_linkmodes_nest; - - if (req->_present.header) - ethtool_header_put(nlh, ETHTOOL_A_LINKMODES_HEADER, &req->header); - - rsp = calloc(1, sizeof(*rsp)); - yrs.yarg.data = rsp; - yrs.cb = ethtool_linkmodes_get_rsp_parse; - yrs.rsp_cmd = ETHTOOL_MSG_LINKMODES_GET; - - err = ynl_exec(ys, nlh, &yrs); - if (err < 0) - goto err_free; - - return rsp; - -err_free: - ethtool_linkmodes_get_rsp_free(rsp); - return NULL; -} - -/* ETHTOOL_MSG_LINKMODES_GET - dump */ -void ethtool_linkmodes_get_list_free(struct ethtool_linkmodes_get_list *rsp) -{ - struct ethtool_linkmodes_get_list *next = rsp; - - while ((void *)next != YNL_LIST_END) { - rsp = next; - next = rsp->next; - - ethtool_header_free(&rsp->obj.header); - ethtool_bitset_free(&rsp->obj.ours); - ethtool_bitset_free(&rsp->obj.peer); - free(rsp); - } -} - -struct ethtool_linkmodes_get_list * -ethtool_linkmodes_get_dump(struct ynl_sock *ys, - struct ethtool_linkmodes_get_req_dump *req) -{ - struct ynl_dump_state yds = {}; - struct nlmsghdr *nlh; - int err; - - yds.ys = ys; - yds.alloc_sz = sizeof(struct ethtool_linkmodes_get_list); - yds.cb = ethtool_linkmodes_get_rsp_parse; - yds.rsp_cmd = ETHTOOL_MSG_LINKMODES_GET; - yds.rsp_policy = ðtool_linkmodes_nest; - - nlh = ynl_gemsg_start_dump(ys, ys->family_id, ETHTOOL_MSG_LINKMODES_GET, 1); - ys->req_policy = ðtool_linkmodes_nest; - - if (req->_present.header) - ethtool_header_put(nlh, ETHTOOL_A_LINKMODES_HEADER, &req->header); - - err = ynl_exec_dump(ys, nlh, &yds); - if (err < 0) - goto free_list; - - return yds.first; - -free_list: - ethtool_linkmodes_get_list_free(yds.first); - return NULL; -} - -/* ETHTOOL_MSG_LINKMODES_GET - notify */ -void ethtool_linkmodes_get_ntf_free(struct ethtool_linkmodes_get_ntf *rsp) -{ - ethtool_header_free(&rsp->obj.header); - ethtool_bitset_free(&rsp->obj.ours); - ethtool_bitset_free(&rsp->obj.peer); - free(rsp); -} - -/* ============== ETHTOOL_MSG_LINKMODES_SET ============== */ -/* ETHTOOL_MSG_LINKMODES_SET - do */ -void ethtool_linkmodes_set_req_free(struct ethtool_linkmodes_set_req *req) -{ - ethtool_header_free(&req->header); - ethtool_bitset_free(&req->ours); - ethtool_bitset_free(&req->peer); - free(req); -} - -int ethtool_linkmodes_set(struct ynl_sock *ys, - struct ethtool_linkmodes_set_req *req) -{ - struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; - struct nlmsghdr *nlh; - int err; - - nlh = ynl_gemsg_start_req(ys, ys->family_id, ETHTOOL_MSG_LINKMODES_SET, 1); - ys->req_policy = ðtool_linkmodes_nest; - - if (req->_present.header) - ethtool_header_put(nlh, ETHTOOL_A_LINKMODES_HEADER, &req->header); - if (req->_present.autoneg) - mnl_attr_put_u8(nlh, ETHTOOL_A_LINKMODES_AUTONEG, req->autoneg); - if (req->_present.ours) - ethtool_bitset_put(nlh, ETHTOOL_A_LINKMODES_OURS, &req->ours); - if (req->_present.peer) - ethtool_bitset_put(nlh, ETHTOOL_A_LINKMODES_PEER, &req->peer); - if (req->_present.speed) - mnl_attr_put_u32(nlh, ETHTOOL_A_LINKMODES_SPEED, req->speed); - if (req->_present.duplex) - mnl_attr_put_u8(nlh, ETHTOOL_A_LINKMODES_DUPLEX, req->duplex); - if (req->_present.master_slave_cfg) - mnl_attr_put_u8(nlh, ETHTOOL_A_LINKMODES_MASTER_SLAVE_CFG, req->master_slave_cfg); - if (req->_present.master_slave_state) - mnl_attr_put_u8(nlh, ETHTOOL_A_LINKMODES_MASTER_SLAVE_STATE, req->master_slave_state); - if (req->_present.lanes) - mnl_attr_put_u32(nlh, ETHTOOL_A_LINKMODES_LANES, req->lanes); - if (req->_present.rate_matching) - mnl_attr_put_u8(nlh, ETHTOOL_A_LINKMODES_RATE_MATCHING, req->rate_matching); - - err = ynl_exec(ys, nlh, &yrs); - if (err < 0) - return -1; - - return 0; -} - -/* ============== ETHTOOL_MSG_LINKSTATE_GET ============== */ -/* ETHTOOL_MSG_LINKSTATE_GET - do */ -void ethtool_linkstate_get_req_free(struct ethtool_linkstate_get_req *req) -{ - ethtool_header_free(&req->header); - free(req); -} - -void ethtool_linkstate_get_rsp_free(struct ethtool_linkstate_get_rsp *rsp) -{ - ethtool_header_free(&rsp->header); - free(rsp); -} - -int ethtool_linkstate_get_rsp_parse(const struct nlmsghdr *nlh, void *data) -{ - struct ethtool_linkstate_get_rsp *dst; - struct ynl_parse_arg *yarg = data; - const struct nlattr *attr; - struct ynl_parse_arg parg; - - dst = yarg->data; - parg.ys = yarg->ys; - - mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) { - unsigned int type = mnl_attr_get_type(attr); - - if (type == ETHTOOL_A_LINKSTATE_HEADER) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.header = 1; - - parg.rsp_policy = ðtool_header_nest; - parg.data = &dst->header; - if (ethtool_header_parse(&parg, attr)) - return MNL_CB_ERROR; - } else if (type == ETHTOOL_A_LINKSTATE_LINK) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.link = 1; - dst->link = mnl_attr_get_u8(attr); - } else if (type == ETHTOOL_A_LINKSTATE_SQI) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.sqi = 1; - dst->sqi = mnl_attr_get_u32(attr); - } else if (type == ETHTOOL_A_LINKSTATE_SQI_MAX) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.sqi_max = 1; - dst->sqi_max = mnl_attr_get_u32(attr); - } else if (type == ETHTOOL_A_LINKSTATE_EXT_STATE) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.ext_state = 1; - dst->ext_state = mnl_attr_get_u8(attr); - } else if (type == ETHTOOL_A_LINKSTATE_EXT_SUBSTATE) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.ext_substate = 1; - dst->ext_substate = mnl_attr_get_u8(attr); - } else if (type == ETHTOOL_A_LINKSTATE_EXT_DOWN_CNT) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.ext_down_cnt = 1; - dst->ext_down_cnt = mnl_attr_get_u32(attr); - } - } - - return MNL_CB_OK; -} - -struct ethtool_linkstate_get_rsp * -ethtool_linkstate_get(struct ynl_sock *ys, - struct ethtool_linkstate_get_req *req) -{ - struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; - struct ethtool_linkstate_get_rsp *rsp; - struct nlmsghdr *nlh; - int err; - - nlh = ynl_gemsg_start_req(ys, ys->family_id, ETHTOOL_MSG_LINKSTATE_GET, 1); - ys->req_policy = ðtool_linkstate_nest; - yrs.yarg.rsp_policy = ðtool_linkstate_nest; - - if (req->_present.header) - ethtool_header_put(nlh, ETHTOOL_A_LINKSTATE_HEADER, &req->header); - - rsp = calloc(1, sizeof(*rsp)); - yrs.yarg.data = rsp; - yrs.cb = ethtool_linkstate_get_rsp_parse; - yrs.rsp_cmd = ETHTOOL_MSG_LINKSTATE_GET; - - err = ynl_exec(ys, nlh, &yrs); - if (err < 0) - goto err_free; - - return rsp; - -err_free: - ethtool_linkstate_get_rsp_free(rsp); - return NULL; -} - -/* ETHTOOL_MSG_LINKSTATE_GET - dump */ -void ethtool_linkstate_get_list_free(struct ethtool_linkstate_get_list *rsp) -{ - struct ethtool_linkstate_get_list *next = rsp; - - while ((void *)next != YNL_LIST_END) { - rsp = next; - next = rsp->next; - - ethtool_header_free(&rsp->obj.header); - free(rsp); - } -} - -struct ethtool_linkstate_get_list * -ethtool_linkstate_get_dump(struct ynl_sock *ys, - struct ethtool_linkstate_get_req_dump *req) -{ - struct ynl_dump_state yds = {}; - struct nlmsghdr *nlh; - int err; - - yds.ys = ys; - yds.alloc_sz = sizeof(struct ethtool_linkstate_get_list); - yds.cb = ethtool_linkstate_get_rsp_parse; - yds.rsp_cmd = ETHTOOL_MSG_LINKSTATE_GET; - yds.rsp_policy = ðtool_linkstate_nest; - - nlh = ynl_gemsg_start_dump(ys, ys->family_id, ETHTOOL_MSG_LINKSTATE_GET, 1); - ys->req_policy = ðtool_linkstate_nest; - - if (req->_present.header) - ethtool_header_put(nlh, ETHTOOL_A_LINKSTATE_HEADER, &req->header); - - err = ynl_exec_dump(ys, nlh, &yds); - if (err < 0) - goto free_list; - - return yds.first; - -free_list: - ethtool_linkstate_get_list_free(yds.first); - return NULL; -} - -/* ============== ETHTOOL_MSG_DEBUG_GET ============== */ -/* ETHTOOL_MSG_DEBUG_GET - do */ -void ethtool_debug_get_req_free(struct ethtool_debug_get_req *req) -{ - ethtool_header_free(&req->header); - free(req); -} - -void ethtool_debug_get_rsp_free(struct ethtool_debug_get_rsp *rsp) -{ - ethtool_header_free(&rsp->header); - ethtool_bitset_free(&rsp->msgmask); - free(rsp); -} - -int ethtool_debug_get_rsp_parse(const struct nlmsghdr *nlh, void *data) -{ - struct ethtool_debug_get_rsp *dst; - struct ynl_parse_arg *yarg = data; - const struct nlattr *attr; - struct ynl_parse_arg parg; - - dst = yarg->data; - parg.ys = yarg->ys; - - mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) { - unsigned int type = mnl_attr_get_type(attr); - - if (type == ETHTOOL_A_DEBUG_HEADER) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.header = 1; - - parg.rsp_policy = ðtool_header_nest; - parg.data = &dst->header; - if (ethtool_header_parse(&parg, attr)) - return MNL_CB_ERROR; - } else if (type == ETHTOOL_A_DEBUG_MSGMASK) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.msgmask = 1; - - parg.rsp_policy = ðtool_bitset_nest; - parg.data = &dst->msgmask; - if (ethtool_bitset_parse(&parg, attr)) - return MNL_CB_ERROR; - } - } - - return MNL_CB_OK; -} - -struct ethtool_debug_get_rsp * -ethtool_debug_get(struct ynl_sock *ys, struct ethtool_debug_get_req *req) -{ - struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; - struct ethtool_debug_get_rsp *rsp; - struct nlmsghdr *nlh; - int err; - - nlh = ynl_gemsg_start_req(ys, ys->family_id, ETHTOOL_MSG_DEBUG_GET, 1); - ys->req_policy = ðtool_debug_nest; - yrs.yarg.rsp_policy = ðtool_debug_nest; - - if (req->_present.header) - ethtool_header_put(nlh, ETHTOOL_A_DEBUG_HEADER, &req->header); - - rsp = calloc(1, sizeof(*rsp)); - yrs.yarg.data = rsp; - yrs.cb = ethtool_debug_get_rsp_parse; - yrs.rsp_cmd = ETHTOOL_MSG_DEBUG_GET; - - err = ynl_exec(ys, nlh, &yrs); - if (err < 0) - goto err_free; - - return rsp; - -err_free: - ethtool_debug_get_rsp_free(rsp); - return NULL; -} - -/* ETHTOOL_MSG_DEBUG_GET - dump */ -void ethtool_debug_get_list_free(struct ethtool_debug_get_list *rsp) -{ - struct ethtool_debug_get_list *next = rsp; - - while ((void *)next != YNL_LIST_END) { - rsp = next; - next = rsp->next; - - ethtool_header_free(&rsp->obj.header); - ethtool_bitset_free(&rsp->obj.msgmask); - free(rsp); - } -} - -struct ethtool_debug_get_list * -ethtool_debug_get_dump(struct ynl_sock *ys, - struct ethtool_debug_get_req_dump *req) -{ - struct ynl_dump_state yds = {}; - struct nlmsghdr *nlh; - int err; - - yds.ys = ys; - yds.alloc_sz = sizeof(struct ethtool_debug_get_list); - yds.cb = ethtool_debug_get_rsp_parse; - yds.rsp_cmd = ETHTOOL_MSG_DEBUG_GET; - yds.rsp_policy = ðtool_debug_nest; - - nlh = ynl_gemsg_start_dump(ys, ys->family_id, ETHTOOL_MSG_DEBUG_GET, 1); - ys->req_policy = ðtool_debug_nest; - - if (req->_present.header) - ethtool_header_put(nlh, ETHTOOL_A_DEBUG_HEADER, &req->header); - - err = ynl_exec_dump(ys, nlh, &yds); - if (err < 0) - goto free_list; - - return yds.first; - -free_list: - ethtool_debug_get_list_free(yds.first); - return NULL; -} - -/* ETHTOOL_MSG_DEBUG_GET - notify */ -void ethtool_debug_get_ntf_free(struct ethtool_debug_get_ntf *rsp) -{ - ethtool_header_free(&rsp->obj.header); - ethtool_bitset_free(&rsp->obj.msgmask); - free(rsp); -} - -/* ============== ETHTOOL_MSG_DEBUG_SET ============== */ -/* ETHTOOL_MSG_DEBUG_SET - do */ -void ethtool_debug_set_req_free(struct ethtool_debug_set_req *req) -{ - ethtool_header_free(&req->header); - ethtool_bitset_free(&req->msgmask); - free(req); -} - -int ethtool_debug_set(struct ynl_sock *ys, struct ethtool_debug_set_req *req) -{ - struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; - struct nlmsghdr *nlh; - int err; - - nlh = ynl_gemsg_start_req(ys, ys->family_id, ETHTOOL_MSG_DEBUG_SET, 1); - ys->req_policy = ðtool_debug_nest; - - if (req->_present.header) - ethtool_header_put(nlh, ETHTOOL_A_DEBUG_HEADER, &req->header); - if (req->_present.msgmask) - ethtool_bitset_put(nlh, ETHTOOL_A_DEBUG_MSGMASK, &req->msgmask); - - err = ynl_exec(ys, nlh, &yrs); - if (err < 0) - return -1; - - return 0; -} - -/* ============== ETHTOOL_MSG_WOL_GET ============== */ -/* ETHTOOL_MSG_WOL_GET - do */ -void ethtool_wol_get_req_free(struct ethtool_wol_get_req *req) -{ - ethtool_header_free(&req->header); - free(req); -} - -void ethtool_wol_get_rsp_free(struct ethtool_wol_get_rsp *rsp) -{ - ethtool_header_free(&rsp->header); - ethtool_bitset_free(&rsp->modes); - free(rsp->sopass); - free(rsp); -} - -int ethtool_wol_get_rsp_parse(const struct nlmsghdr *nlh, void *data) -{ - struct ynl_parse_arg *yarg = data; - struct ethtool_wol_get_rsp *dst; - const struct nlattr *attr; - struct ynl_parse_arg parg; - - dst = yarg->data; - parg.ys = yarg->ys; - - mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) { - unsigned int type = mnl_attr_get_type(attr); - - if (type == ETHTOOL_A_WOL_HEADER) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.header = 1; - - parg.rsp_policy = ðtool_header_nest; - parg.data = &dst->header; - if (ethtool_header_parse(&parg, attr)) - return MNL_CB_ERROR; - } else if (type == ETHTOOL_A_WOL_MODES) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.modes = 1; - - parg.rsp_policy = ðtool_bitset_nest; - parg.data = &dst->modes; - if (ethtool_bitset_parse(&parg, attr)) - return MNL_CB_ERROR; - } else if (type == ETHTOOL_A_WOL_SOPASS) { - unsigned int len; - - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - - len = mnl_attr_get_payload_len(attr); - dst->_present.sopass_len = len; - dst->sopass = malloc(len); - memcpy(dst->sopass, mnl_attr_get_payload(attr), len); - } - } - - return MNL_CB_OK; -} - -struct ethtool_wol_get_rsp * -ethtool_wol_get(struct ynl_sock *ys, struct ethtool_wol_get_req *req) -{ - struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; - struct ethtool_wol_get_rsp *rsp; - struct nlmsghdr *nlh; - int err; - - nlh = ynl_gemsg_start_req(ys, ys->family_id, ETHTOOL_MSG_WOL_GET, 1); - ys->req_policy = ðtool_wol_nest; - yrs.yarg.rsp_policy = ðtool_wol_nest; - - if (req->_present.header) - ethtool_header_put(nlh, ETHTOOL_A_WOL_HEADER, &req->header); - - rsp = calloc(1, sizeof(*rsp)); - yrs.yarg.data = rsp; - yrs.cb = ethtool_wol_get_rsp_parse; - yrs.rsp_cmd = ETHTOOL_MSG_WOL_GET; - - err = ynl_exec(ys, nlh, &yrs); - if (err < 0) - goto err_free; - - return rsp; - -err_free: - ethtool_wol_get_rsp_free(rsp); - return NULL; -} - -/* ETHTOOL_MSG_WOL_GET - dump */ -void ethtool_wol_get_list_free(struct ethtool_wol_get_list *rsp) -{ - struct ethtool_wol_get_list *next = rsp; - - while ((void *)next != YNL_LIST_END) { - rsp = next; - next = rsp->next; - - ethtool_header_free(&rsp->obj.header); - ethtool_bitset_free(&rsp->obj.modes); - free(rsp->obj.sopass); - free(rsp); - } -} - -struct ethtool_wol_get_list * -ethtool_wol_get_dump(struct ynl_sock *ys, struct ethtool_wol_get_req_dump *req) -{ - struct ynl_dump_state yds = {}; - struct nlmsghdr *nlh; - int err; - - yds.ys = ys; - yds.alloc_sz = sizeof(struct ethtool_wol_get_list); - yds.cb = ethtool_wol_get_rsp_parse; - yds.rsp_cmd = ETHTOOL_MSG_WOL_GET; - yds.rsp_policy = ðtool_wol_nest; - - nlh = ynl_gemsg_start_dump(ys, ys->family_id, ETHTOOL_MSG_WOL_GET, 1); - ys->req_policy = ðtool_wol_nest; - - if (req->_present.header) - ethtool_header_put(nlh, ETHTOOL_A_WOL_HEADER, &req->header); - - err = ynl_exec_dump(ys, nlh, &yds); - if (err < 0) - goto free_list; - - return yds.first; - -free_list: - ethtool_wol_get_list_free(yds.first); - return NULL; -} - -/* ETHTOOL_MSG_WOL_GET - notify */ -void ethtool_wol_get_ntf_free(struct ethtool_wol_get_ntf *rsp) -{ - ethtool_header_free(&rsp->obj.header); - ethtool_bitset_free(&rsp->obj.modes); - free(rsp->obj.sopass); - free(rsp); -} - -/* ============== ETHTOOL_MSG_WOL_SET ============== */ -/* ETHTOOL_MSG_WOL_SET - do */ -void ethtool_wol_set_req_free(struct ethtool_wol_set_req *req) -{ - ethtool_header_free(&req->header); - ethtool_bitset_free(&req->modes); - free(req->sopass); - free(req); -} - -int ethtool_wol_set(struct ynl_sock *ys, struct ethtool_wol_set_req *req) -{ - struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; - struct nlmsghdr *nlh; - int err; - - nlh = ynl_gemsg_start_req(ys, ys->family_id, ETHTOOL_MSG_WOL_SET, 1); - ys->req_policy = ðtool_wol_nest; - - if (req->_present.header) - ethtool_header_put(nlh, ETHTOOL_A_WOL_HEADER, &req->header); - if (req->_present.modes) - ethtool_bitset_put(nlh, ETHTOOL_A_WOL_MODES, &req->modes); - if (req->_present.sopass_len) - mnl_attr_put(nlh, ETHTOOL_A_WOL_SOPASS, req->_present.sopass_len, req->sopass); - - err = ynl_exec(ys, nlh, &yrs); - if (err < 0) - return -1; - - return 0; -} - -/* ============== ETHTOOL_MSG_FEATURES_GET ============== */ -/* ETHTOOL_MSG_FEATURES_GET - do */ -void ethtool_features_get_req_free(struct ethtool_features_get_req *req) -{ - ethtool_header_free(&req->header); - free(req); -} - -void ethtool_features_get_rsp_free(struct ethtool_features_get_rsp *rsp) -{ - ethtool_header_free(&rsp->header); - ethtool_bitset_free(&rsp->hw); - ethtool_bitset_free(&rsp->wanted); - ethtool_bitset_free(&rsp->active); - ethtool_bitset_free(&rsp->nochange); - free(rsp); -} - -int ethtool_features_get_rsp_parse(const struct nlmsghdr *nlh, void *data) -{ - struct ethtool_features_get_rsp *dst; - struct ynl_parse_arg *yarg = data; - const struct nlattr *attr; - struct ynl_parse_arg parg; - - dst = yarg->data; - parg.ys = yarg->ys; - - mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) { - unsigned int type = mnl_attr_get_type(attr); - - if (type == ETHTOOL_A_FEATURES_HEADER) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.header = 1; - - parg.rsp_policy = ðtool_header_nest; - parg.data = &dst->header; - if (ethtool_header_parse(&parg, attr)) - return MNL_CB_ERROR; - } else if (type == ETHTOOL_A_FEATURES_HW) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.hw = 1; - - parg.rsp_policy = ðtool_bitset_nest; - parg.data = &dst->hw; - if (ethtool_bitset_parse(&parg, attr)) - return MNL_CB_ERROR; - } else if (type == ETHTOOL_A_FEATURES_WANTED) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.wanted = 1; - - parg.rsp_policy = ðtool_bitset_nest; - parg.data = &dst->wanted; - if (ethtool_bitset_parse(&parg, attr)) - return MNL_CB_ERROR; - } else if (type == ETHTOOL_A_FEATURES_ACTIVE) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.active = 1; - - parg.rsp_policy = ðtool_bitset_nest; - parg.data = &dst->active; - if (ethtool_bitset_parse(&parg, attr)) - return MNL_CB_ERROR; - } else if (type == ETHTOOL_A_FEATURES_NOCHANGE) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.nochange = 1; - - parg.rsp_policy = ðtool_bitset_nest; - parg.data = &dst->nochange; - if (ethtool_bitset_parse(&parg, attr)) - return MNL_CB_ERROR; - } - } - - return MNL_CB_OK; -} - -struct ethtool_features_get_rsp * -ethtool_features_get(struct ynl_sock *ys, struct ethtool_features_get_req *req) -{ - struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; - struct ethtool_features_get_rsp *rsp; - struct nlmsghdr *nlh; - int err; - - nlh = ynl_gemsg_start_req(ys, ys->family_id, ETHTOOL_MSG_FEATURES_GET, 1); - ys->req_policy = ðtool_features_nest; - yrs.yarg.rsp_policy = ðtool_features_nest; - - if (req->_present.header) - ethtool_header_put(nlh, ETHTOOL_A_FEATURES_HEADER, &req->header); - - rsp = calloc(1, sizeof(*rsp)); - yrs.yarg.data = rsp; - yrs.cb = ethtool_features_get_rsp_parse; - yrs.rsp_cmd = ETHTOOL_MSG_FEATURES_GET; - - err = ynl_exec(ys, nlh, &yrs); - if (err < 0) - goto err_free; - - return rsp; - -err_free: - ethtool_features_get_rsp_free(rsp); - return NULL; -} - -/* ETHTOOL_MSG_FEATURES_GET - dump */ -void ethtool_features_get_list_free(struct ethtool_features_get_list *rsp) -{ - struct ethtool_features_get_list *next = rsp; - - while ((void *)next != YNL_LIST_END) { - rsp = next; - next = rsp->next; - - ethtool_header_free(&rsp->obj.header); - ethtool_bitset_free(&rsp->obj.hw); - ethtool_bitset_free(&rsp->obj.wanted); - ethtool_bitset_free(&rsp->obj.active); - ethtool_bitset_free(&rsp->obj.nochange); - free(rsp); - } -} - -struct ethtool_features_get_list * -ethtool_features_get_dump(struct ynl_sock *ys, - struct ethtool_features_get_req_dump *req) -{ - struct ynl_dump_state yds = {}; - struct nlmsghdr *nlh; - int err; - - yds.ys = ys; - yds.alloc_sz = sizeof(struct ethtool_features_get_list); - yds.cb = ethtool_features_get_rsp_parse; - yds.rsp_cmd = ETHTOOL_MSG_FEATURES_GET; - yds.rsp_policy = ðtool_features_nest; - - nlh = ynl_gemsg_start_dump(ys, ys->family_id, ETHTOOL_MSG_FEATURES_GET, 1); - ys->req_policy = ðtool_features_nest; - - if (req->_present.header) - ethtool_header_put(nlh, ETHTOOL_A_FEATURES_HEADER, &req->header); - - err = ynl_exec_dump(ys, nlh, &yds); - if (err < 0) - goto free_list; - - return yds.first; - -free_list: - ethtool_features_get_list_free(yds.first); - return NULL; -} - -/* ETHTOOL_MSG_FEATURES_GET - notify */ -void ethtool_features_get_ntf_free(struct ethtool_features_get_ntf *rsp) -{ - ethtool_header_free(&rsp->obj.header); - ethtool_bitset_free(&rsp->obj.hw); - ethtool_bitset_free(&rsp->obj.wanted); - ethtool_bitset_free(&rsp->obj.active); - ethtool_bitset_free(&rsp->obj.nochange); - free(rsp); -} - -/* ============== ETHTOOL_MSG_FEATURES_SET ============== */ -/* ETHTOOL_MSG_FEATURES_SET - do */ -void ethtool_features_set_req_free(struct ethtool_features_set_req *req) -{ - ethtool_header_free(&req->header); - ethtool_bitset_free(&req->hw); - ethtool_bitset_free(&req->wanted); - ethtool_bitset_free(&req->active); - ethtool_bitset_free(&req->nochange); - free(req); -} - -void ethtool_features_set_rsp_free(struct ethtool_features_set_rsp *rsp) -{ - ethtool_header_free(&rsp->header); - ethtool_bitset_free(&rsp->hw); - ethtool_bitset_free(&rsp->wanted); - ethtool_bitset_free(&rsp->active); - ethtool_bitset_free(&rsp->nochange); - free(rsp); -} - -int ethtool_features_set_rsp_parse(const struct nlmsghdr *nlh, void *data) -{ - struct ethtool_features_set_rsp *dst; - struct ynl_parse_arg *yarg = data; - const struct nlattr *attr; - struct ynl_parse_arg parg; - - dst = yarg->data; - parg.ys = yarg->ys; - - mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) { - unsigned int type = mnl_attr_get_type(attr); - - if (type == ETHTOOL_A_FEATURES_HEADER) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.header = 1; - - parg.rsp_policy = ðtool_header_nest; - parg.data = &dst->header; - if (ethtool_header_parse(&parg, attr)) - return MNL_CB_ERROR; - } else if (type == ETHTOOL_A_FEATURES_HW) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.hw = 1; - - parg.rsp_policy = ðtool_bitset_nest; - parg.data = &dst->hw; - if (ethtool_bitset_parse(&parg, attr)) - return MNL_CB_ERROR; - } else if (type == ETHTOOL_A_FEATURES_WANTED) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.wanted = 1; - - parg.rsp_policy = ðtool_bitset_nest; - parg.data = &dst->wanted; - if (ethtool_bitset_parse(&parg, attr)) - return MNL_CB_ERROR; - } else if (type == ETHTOOL_A_FEATURES_ACTIVE) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.active = 1; - - parg.rsp_policy = ðtool_bitset_nest; - parg.data = &dst->active; - if (ethtool_bitset_parse(&parg, attr)) - return MNL_CB_ERROR; - } else if (type == ETHTOOL_A_FEATURES_NOCHANGE) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.nochange = 1; - - parg.rsp_policy = ðtool_bitset_nest; - parg.data = &dst->nochange; - if (ethtool_bitset_parse(&parg, attr)) - return MNL_CB_ERROR; - } - } - - return MNL_CB_OK; -} - -struct ethtool_features_set_rsp * -ethtool_features_set(struct ynl_sock *ys, struct ethtool_features_set_req *req) -{ - struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; - struct ethtool_features_set_rsp *rsp; - struct nlmsghdr *nlh; - int err; - - nlh = ynl_gemsg_start_req(ys, ys->family_id, ETHTOOL_MSG_FEATURES_SET, 1); - ys->req_policy = ðtool_features_nest; - yrs.yarg.rsp_policy = ðtool_features_nest; - - if (req->_present.header) - ethtool_header_put(nlh, ETHTOOL_A_FEATURES_HEADER, &req->header); - if (req->_present.hw) - ethtool_bitset_put(nlh, ETHTOOL_A_FEATURES_HW, &req->hw); - if (req->_present.wanted) - ethtool_bitset_put(nlh, ETHTOOL_A_FEATURES_WANTED, &req->wanted); - if (req->_present.active) - ethtool_bitset_put(nlh, ETHTOOL_A_FEATURES_ACTIVE, &req->active); - if (req->_present.nochange) - ethtool_bitset_put(nlh, ETHTOOL_A_FEATURES_NOCHANGE, &req->nochange); - - rsp = calloc(1, sizeof(*rsp)); - yrs.yarg.data = rsp; - yrs.cb = ethtool_features_set_rsp_parse; - yrs.rsp_cmd = ETHTOOL_MSG_FEATURES_SET; - - err = ynl_exec(ys, nlh, &yrs); - if (err < 0) - goto err_free; - - return rsp; - -err_free: - ethtool_features_set_rsp_free(rsp); - return NULL; -} - -/* ============== ETHTOOL_MSG_PRIVFLAGS_GET ============== */ -/* ETHTOOL_MSG_PRIVFLAGS_GET - do */ -void ethtool_privflags_get_req_free(struct ethtool_privflags_get_req *req) -{ - ethtool_header_free(&req->header); - free(req); -} - -void ethtool_privflags_get_rsp_free(struct ethtool_privflags_get_rsp *rsp) -{ - ethtool_header_free(&rsp->header); - ethtool_bitset_free(&rsp->flags); - free(rsp); -} - -int ethtool_privflags_get_rsp_parse(const struct nlmsghdr *nlh, void *data) -{ - struct ethtool_privflags_get_rsp *dst; - struct ynl_parse_arg *yarg = data; - const struct nlattr *attr; - struct ynl_parse_arg parg; - - dst = yarg->data; - parg.ys = yarg->ys; - - mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) { - unsigned int type = mnl_attr_get_type(attr); - - if (type == ETHTOOL_A_PRIVFLAGS_HEADER) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.header = 1; - - parg.rsp_policy = ðtool_header_nest; - parg.data = &dst->header; - if (ethtool_header_parse(&parg, attr)) - return MNL_CB_ERROR; - } else if (type == ETHTOOL_A_PRIVFLAGS_FLAGS) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.flags = 1; - - parg.rsp_policy = ðtool_bitset_nest; - parg.data = &dst->flags; - if (ethtool_bitset_parse(&parg, attr)) - return MNL_CB_ERROR; - } - } - - return MNL_CB_OK; -} - -struct ethtool_privflags_get_rsp * -ethtool_privflags_get(struct ynl_sock *ys, - struct ethtool_privflags_get_req *req) -{ - struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; - struct ethtool_privflags_get_rsp *rsp; - struct nlmsghdr *nlh; - int err; - - nlh = ynl_gemsg_start_req(ys, ys->family_id, ETHTOOL_MSG_PRIVFLAGS_GET, 1); - ys->req_policy = ðtool_privflags_nest; - yrs.yarg.rsp_policy = ðtool_privflags_nest; - - if (req->_present.header) - ethtool_header_put(nlh, ETHTOOL_A_PRIVFLAGS_HEADER, &req->header); - - rsp = calloc(1, sizeof(*rsp)); - yrs.yarg.data = rsp; - yrs.cb = ethtool_privflags_get_rsp_parse; - yrs.rsp_cmd = 14; - - err = ynl_exec(ys, nlh, &yrs); - if (err < 0) - goto err_free; - - return rsp; - -err_free: - ethtool_privflags_get_rsp_free(rsp); - return NULL; -} - -/* ETHTOOL_MSG_PRIVFLAGS_GET - dump */ -void ethtool_privflags_get_list_free(struct ethtool_privflags_get_list *rsp) -{ - struct ethtool_privflags_get_list *next = rsp; - - while ((void *)next != YNL_LIST_END) { - rsp = next; - next = rsp->next; - - ethtool_header_free(&rsp->obj.header); - ethtool_bitset_free(&rsp->obj.flags); - free(rsp); - } -} - -struct ethtool_privflags_get_list * -ethtool_privflags_get_dump(struct ynl_sock *ys, - struct ethtool_privflags_get_req_dump *req) -{ - struct ynl_dump_state yds = {}; - struct nlmsghdr *nlh; - int err; - - yds.ys = ys; - yds.alloc_sz = sizeof(struct ethtool_privflags_get_list); - yds.cb = ethtool_privflags_get_rsp_parse; - yds.rsp_cmd = 14; - yds.rsp_policy = ðtool_privflags_nest; - - nlh = ynl_gemsg_start_dump(ys, ys->family_id, ETHTOOL_MSG_PRIVFLAGS_GET, 1); - ys->req_policy = ðtool_privflags_nest; - - if (req->_present.header) - ethtool_header_put(nlh, ETHTOOL_A_PRIVFLAGS_HEADER, &req->header); - - err = ynl_exec_dump(ys, nlh, &yds); - if (err < 0) - goto free_list; - - return yds.first; - -free_list: - ethtool_privflags_get_list_free(yds.first); - return NULL; -} - -/* ETHTOOL_MSG_PRIVFLAGS_GET - notify */ -void ethtool_privflags_get_ntf_free(struct ethtool_privflags_get_ntf *rsp) -{ - ethtool_header_free(&rsp->obj.header); - ethtool_bitset_free(&rsp->obj.flags); - free(rsp); -} - -/* ============== ETHTOOL_MSG_PRIVFLAGS_SET ============== */ -/* ETHTOOL_MSG_PRIVFLAGS_SET - do */ -void ethtool_privflags_set_req_free(struct ethtool_privflags_set_req *req) -{ - ethtool_header_free(&req->header); - ethtool_bitset_free(&req->flags); - free(req); -} - -int ethtool_privflags_set(struct ynl_sock *ys, - struct ethtool_privflags_set_req *req) -{ - struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; - struct nlmsghdr *nlh; - int err; - - nlh = ynl_gemsg_start_req(ys, ys->family_id, ETHTOOL_MSG_PRIVFLAGS_SET, 1); - ys->req_policy = ðtool_privflags_nest; - - if (req->_present.header) - ethtool_header_put(nlh, ETHTOOL_A_PRIVFLAGS_HEADER, &req->header); - if (req->_present.flags) - ethtool_bitset_put(nlh, ETHTOOL_A_PRIVFLAGS_FLAGS, &req->flags); - - err = ynl_exec(ys, nlh, &yrs); - if (err < 0) - return -1; - - return 0; -} - -/* ============== ETHTOOL_MSG_RINGS_GET ============== */ -/* ETHTOOL_MSG_RINGS_GET - do */ -void ethtool_rings_get_req_free(struct ethtool_rings_get_req *req) -{ - ethtool_header_free(&req->header); - free(req); -} - -void ethtool_rings_get_rsp_free(struct ethtool_rings_get_rsp *rsp) -{ - ethtool_header_free(&rsp->header); - free(rsp); -} - -int ethtool_rings_get_rsp_parse(const struct nlmsghdr *nlh, void *data) -{ - struct ethtool_rings_get_rsp *dst; - struct ynl_parse_arg *yarg = data; - const struct nlattr *attr; - struct ynl_parse_arg parg; - - dst = yarg->data; - parg.ys = yarg->ys; - - mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) { - unsigned int type = mnl_attr_get_type(attr); - - if (type == ETHTOOL_A_RINGS_HEADER) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.header = 1; - - parg.rsp_policy = ðtool_header_nest; - parg.data = &dst->header; - if (ethtool_header_parse(&parg, attr)) - return MNL_CB_ERROR; - } else if (type == ETHTOOL_A_RINGS_RX_MAX) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.rx_max = 1; - dst->rx_max = mnl_attr_get_u32(attr); - } else if (type == ETHTOOL_A_RINGS_RX_MINI_MAX) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.rx_mini_max = 1; - dst->rx_mini_max = mnl_attr_get_u32(attr); - } else if (type == ETHTOOL_A_RINGS_RX_JUMBO_MAX) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.rx_jumbo_max = 1; - dst->rx_jumbo_max = mnl_attr_get_u32(attr); - } else if (type == ETHTOOL_A_RINGS_TX_MAX) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.tx_max = 1; - dst->tx_max = mnl_attr_get_u32(attr); - } else if (type == ETHTOOL_A_RINGS_RX) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.rx = 1; - dst->rx = mnl_attr_get_u32(attr); - } else if (type == ETHTOOL_A_RINGS_RX_MINI) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.rx_mini = 1; - dst->rx_mini = mnl_attr_get_u32(attr); - } else if (type == ETHTOOL_A_RINGS_RX_JUMBO) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.rx_jumbo = 1; - dst->rx_jumbo = mnl_attr_get_u32(attr); - } else if (type == ETHTOOL_A_RINGS_TX) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.tx = 1; - dst->tx = mnl_attr_get_u32(attr); - } else if (type == ETHTOOL_A_RINGS_RX_BUF_LEN) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.rx_buf_len = 1; - dst->rx_buf_len = mnl_attr_get_u32(attr); - } else if (type == ETHTOOL_A_RINGS_TCP_DATA_SPLIT) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.tcp_data_split = 1; - dst->tcp_data_split = mnl_attr_get_u8(attr); - } else if (type == ETHTOOL_A_RINGS_CQE_SIZE) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.cqe_size = 1; - dst->cqe_size = mnl_attr_get_u32(attr); - } else if (type == ETHTOOL_A_RINGS_TX_PUSH) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.tx_push = 1; - dst->tx_push = mnl_attr_get_u8(attr); - } else if (type == ETHTOOL_A_RINGS_RX_PUSH) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.rx_push = 1; - dst->rx_push = mnl_attr_get_u8(attr); - } else if (type == ETHTOOL_A_RINGS_TX_PUSH_BUF_LEN) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.tx_push_buf_len = 1; - dst->tx_push_buf_len = mnl_attr_get_u32(attr); - } else if (type == ETHTOOL_A_RINGS_TX_PUSH_BUF_LEN_MAX) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.tx_push_buf_len_max = 1; - dst->tx_push_buf_len_max = mnl_attr_get_u32(attr); - } - } - - return MNL_CB_OK; -} - -struct ethtool_rings_get_rsp * -ethtool_rings_get(struct ynl_sock *ys, struct ethtool_rings_get_req *req) -{ - struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; - struct ethtool_rings_get_rsp *rsp; - struct nlmsghdr *nlh; - int err; - - nlh = ynl_gemsg_start_req(ys, ys->family_id, ETHTOOL_MSG_RINGS_GET, 1); - ys->req_policy = ðtool_rings_nest; - yrs.yarg.rsp_policy = ðtool_rings_nest; - - if (req->_present.header) - ethtool_header_put(nlh, ETHTOOL_A_RINGS_HEADER, &req->header); - - rsp = calloc(1, sizeof(*rsp)); - yrs.yarg.data = rsp; - yrs.cb = ethtool_rings_get_rsp_parse; - yrs.rsp_cmd = 16; - - err = ynl_exec(ys, nlh, &yrs); - if (err < 0) - goto err_free; - - return rsp; - -err_free: - ethtool_rings_get_rsp_free(rsp); - return NULL; -} - -/* ETHTOOL_MSG_RINGS_GET - dump */ -void ethtool_rings_get_list_free(struct ethtool_rings_get_list *rsp) -{ - struct ethtool_rings_get_list *next = rsp; - - while ((void *)next != YNL_LIST_END) { - rsp = next; - next = rsp->next; - - ethtool_header_free(&rsp->obj.header); - free(rsp); - } -} - -struct ethtool_rings_get_list * -ethtool_rings_get_dump(struct ynl_sock *ys, - struct ethtool_rings_get_req_dump *req) -{ - struct ynl_dump_state yds = {}; - struct nlmsghdr *nlh; - int err; - - yds.ys = ys; - yds.alloc_sz = sizeof(struct ethtool_rings_get_list); - yds.cb = ethtool_rings_get_rsp_parse; - yds.rsp_cmd = 16; - yds.rsp_policy = ðtool_rings_nest; - - nlh = ynl_gemsg_start_dump(ys, ys->family_id, ETHTOOL_MSG_RINGS_GET, 1); - ys->req_policy = ðtool_rings_nest; - - if (req->_present.header) - ethtool_header_put(nlh, ETHTOOL_A_RINGS_HEADER, &req->header); - - err = ynl_exec_dump(ys, nlh, &yds); - if (err < 0) - goto free_list; - - return yds.first; - -free_list: - ethtool_rings_get_list_free(yds.first); - return NULL; -} - -/* ETHTOOL_MSG_RINGS_GET - notify */ -void ethtool_rings_get_ntf_free(struct ethtool_rings_get_ntf *rsp) -{ - ethtool_header_free(&rsp->obj.header); - free(rsp); -} - -/* ============== ETHTOOL_MSG_RINGS_SET ============== */ -/* ETHTOOL_MSG_RINGS_SET - do */ -void ethtool_rings_set_req_free(struct ethtool_rings_set_req *req) -{ - ethtool_header_free(&req->header); - free(req); -} - -int ethtool_rings_set(struct ynl_sock *ys, struct ethtool_rings_set_req *req) -{ - struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; - struct nlmsghdr *nlh; - int err; - - nlh = ynl_gemsg_start_req(ys, ys->family_id, ETHTOOL_MSG_RINGS_SET, 1); - ys->req_policy = ðtool_rings_nest; - - if (req->_present.header) - ethtool_header_put(nlh, ETHTOOL_A_RINGS_HEADER, &req->header); - if (req->_present.rx_max) - mnl_attr_put_u32(nlh, ETHTOOL_A_RINGS_RX_MAX, req->rx_max); - if (req->_present.rx_mini_max) - mnl_attr_put_u32(nlh, ETHTOOL_A_RINGS_RX_MINI_MAX, req->rx_mini_max); - if (req->_present.rx_jumbo_max) - mnl_attr_put_u32(nlh, ETHTOOL_A_RINGS_RX_JUMBO_MAX, req->rx_jumbo_max); - if (req->_present.tx_max) - mnl_attr_put_u32(nlh, ETHTOOL_A_RINGS_TX_MAX, req->tx_max); - if (req->_present.rx) - mnl_attr_put_u32(nlh, ETHTOOL_A_RINGS_RX, req->rx); - if (req->_present.rx_mini) - mnl_attr_put_u32(nlh, ETHTOOL_A_RINGS_RX_MINI, req->rx_mini); - if (req->_present.rx_jumbo) - mnl_attr_put_u32(nlh, ETHTOOL_A_RINGS_RX_JUMBO, req->rx_jumbo); - if (req->_present.tx) - mnl_attr_put_u32(nlh, ETHTOOL_A_RINGS_TX, req->tx); - if (req->_present.rx_buf_len) - mnl_attr_put_u32(nlh, ETHTOOL_A_RINGS_RX_BUF_LEN, req->rx_buf_len); - if (req->_present.tcp_data_split) - mnl_attr_put_u8(nlh, ETHTOOL_A_RINGS_TCP_DATA_SPLIT, req->tcp_data_split); - if (req->_present.cqe_size) - mnl_attr_put_u32(nlh, ETHTOOL_A_RINGS_CQE_SIZE, req->cqe_size); - if (req->_present.tx_push) - mnl_attr_put_u8(nlh, ETHTOOL_A_RINGS_TX_PUSH, req->tx_push); - if (req->_present.rx_push) - mnl_attr_put_u8(nlh, ETHTOOL_A_RINGS_RX_PUSH, req->rx_push); - if (req->_present.tx_push_buf_len) - mnl_attr_put_u32(nlh, ETHTOOL_A_RINGS_TX_PUSH_BUF_LEN, req->tx_push_buf_len); - if (req->_present.tx_push_buf_len_max) - mnl_attr_put_u32(nlh, ETHTOOL_A_RINGS_TX_PUSH_BUF_LEN_MAX, req->tx_push_buf_len_max); - - err = ynl_exec(ys, nlh, &yrs); - if (err < 0) - return -1; - - return 0; -} - -/* ============== ETHTOOL_MSG_CHANNELS_GET ============== */ -/* ETHTOOL_MSG_CHANNELS_GET - do */ -void ethtool_channels_get_req_free(struct ethtool_channels_get_req *req) -{ - ethtool_header_free(&req->header); - free(req); -} - -void ethtool_channels_get_rsp_free(struct ethtool_channels_get_rsp *rsp) -{ - ethtool_header_free(&rsp->header); - free(rsp); -} - -int ethtool_channels_get_rsp_parse(const struct nlmsghdr *nlh, void *data) -{ - struct ethtool_channels_get_rsp *dst; - struct ynl_parse_arg *yarg = data; - const struct nlattr *attr; - struct ynl_parse_arg parg; - - dst = yarg->data; - parg.ys = yarg->ys; - - mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) { - unsigned int type = mnl_attr_get_type(attr); - - if (type == ETHTOOL_A_CHANNELS_HEADER) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.header = 1; - - parg.rsp_policy = ðtool_header_nest; - parg.data = &dst->header; - if (ethtool_header_parse(&parg, attr)) - return MNL_CB_ERROR; - } else if (type == ETHTOOL_A_CHANNELS_RX_MAX) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.rx_max = 1; - dst->rx_max = mnl_attr_get_u32(attr); - } else if (type == ETHTOOL_A_CHANNELS_TX_MAX) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.tx_max = 1; - dst->tx_max = mnl_attr_get_u32(attr); - } else if (type == ETHTOOL_A_CHANNELS_OTHER_MAX) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.other_max = 1; - dst->other_max = mnl_attr_get_u32(attr); - } else if (type == ETHTOOL_A_CHANNELS_COMBINED_MAX) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.combined_max = 1; - dst->combined_max = mnl_attr_get_u32(attr); - } else if (type == ETHTOOL_A_CHANNELS_RX_COUNT) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.rx_count = 1; - dst->rx_count = mnl_attr_get_u32(attr); - } else if (type == ETHTOOL_A_CHANNELS_TX_COUNT) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.tx_count = 1; - dst->tx_count = mnl_attr_get_u32(attr); - } else if (type == ETHTOOL_A_CHANNELS_OTHER_COUNT) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.other_count = 1; - dst->other_count = mnl_attr_get_u32(attr); - } else if (type == ETHTOOL_A_CHANNELS_COMBINED_COUNT) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.combined_count = 1; - dst->combined_count = mnl_attr_get_u32(attr); - } - } - - return MNL_CB_OK; -} - -struct ethtool_channels_get_rsp * -ethtool_channels_get(struct ynl_sock *ys, struct ethtool_channels_get_req *req) -{ - struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; - struct ethtool_channels_get_rsp *rsp; - struct nlmsghdr *nlh; - int err; - - nlh = ynl_gemsg_start_req(ys, ys->family_id, ETHTOOL_MSG_CHANNELS_GET, 1); - ys->req_policy = ðtool_channels_nest; - yrs.yarg.rsp_policy = ðtool_channels_nest; - - if (req->_present.header) - ethtool_header_put(nlh, ETHTOOL_A_CHANNELS_HEADER, &req->header); - - rsp = calloc(1, sizeof(*rsp)); - yrs.yarg.data = rsp; - yrs.cb = ethtool_channels_get_rsp_parse; - yrs.rsp_cmd = 18; - - err = ynl_exec(ys, nlh, &yrs); - if (err < 0) - goto err_free; - - return rsp; - -err_free: - ethtool_channels_get_rsp_free(rsp); - return NULL; -} - -/* ETHTOOL_MSG_CHANNELS_GET - dump */ -void ethtool_channels_get_list_free(struct ethtool_channels_get_list *rsp) -{ - struct ethtool_channels_get_list *next = rsp; - - while ((void *)next != YNL_LIST_END) { - rsp = next; - next = rsp->next; - - ethtool_header_free(&rsp->obj.header); - free(rsp); - } -} - -struct ethtool_channels_get_list * -ethtool_channels_get_dump(struct ynl_sock *ys, - struct ethtool_channels_get_req_dump *req) -{ - struct ynl_dump_state yds = {}; - struct nlmsghdr *nlh; - int err; - - yds.ys = ys; - yds.alloc_sz = sizeof(struct ethtool_channels_get_list); - yds.cb = ethtool_channels_get_rsp_parse; - yds.rsp_cmd = 18; - yds.rsp_policy = ðtool_channels_nest; - - nlh = ynl_gemsg_start_dump(ys, ys->family_id, ETHTOOL_MSG_CHANNELS_GET, 1); - ys->req_policy = ðtool_channels_nest; - - if (req->_present.header) - ethtool_header_put(nlh, ETHTOOL_A_CHANNELS_HEADER, &req->header); - - err = ynl_exec_dump(ys, nlh, &yds); - if (err < 0) - goto free_list; - - return yds.first; - -free_list: - ethtool_channels_get_list_free(yds.first); - return NULL; -} - -/* ETHTOOL_MSG_CHANNELS_GET - notify */ -void ethtool_channels_get_ntf_free(struct ethtool_channels_get_ntf *rsp) -{ - ethtool_header_free(&rsp->obj.header); - free(rsp); -} - -/* ============== ETHTOOL_MSG_CHANNELS_SET ============== */ -/* ETHTOOL_MSG_CHANNELS_SET - do */ -void ethtool_channels_set_req_free(struct ethtool_channels_set_req *req) -{ - ethtool_header_free(&req->header); - free(req); -} - -int ethtool_channels_set(struct ynl_sock *ys, - struct ethtool_channels_set_req *req) -{ - struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; - struct nlmsghdr *nlh; - int err; - - nlh = ynl_gemsg_start_req(ys, ys->family_id, ETHTOOL_MSG_CHANNELS_SET, 1); - ys->req_policy = ðtool_channels_nest; - - if (req->_present.header) - ethtool_header_put(nlh, ETHTOOL_A_CHANNELS_HEADER, &req->header); - if (req->_present.rx_max) - mnl_attr_put_u32(nlh, ETHTOOL_A_CHANNELS_RX_MAX, req->rx_max); - if (req->_present.tx_max) - mnl_attr_put_u32(nlh, ETHTOOL_A_CHANNELS_TX_MAX, req->tx_max); - if (req->_present.other_max) - mnl_attr_put_u32(nlh, ETHTOOL_A_CHANNELS_OTHER_MAX, req->other_max); - if (req->_present.combined_max) - mnl_attr_put_u32(nlh, ETHTOOL_A_CHANNELS_COMBINED_MAX, req->combined_max); - if (req->_present.rx_count) - mnl_attr_put_u32(nlh, ETHTOOL_A_CHANNELS_RX_COUNT, req->rx_count); - if (req->_present.tx_count) - mnl_attr_put_u32(nlh, ETHTOOL_A_CHANNELS_TX_COUNT, req->tx_count); - if (req->_present.other_count) - mnl_attr_put_u32(nlh, ETHTOOL_A_CHANNELS_OTHER_COUNT, req->other_count); - if (req->_present.combined_count) - mnl_attr_put_u32(nlh, ETHTOOL_A_CHANNELS_COMBINED_COUNT, req->combined_count); - - err = ynl_exec(ys, nlh, &yrs); - if (err < 0) - return -1; - - return 0; -} - -/* ============== ETHTOOL_MSG_COALESCE_GET ============== */ -/* ETHTOOL_MSG_COALESCE_GET - do */ -void ethtool_coalesce_get_req_free(struct ethtool_coalesce_get_req *req) -{ - ethtool_header_free(&req->header); - free(req); -} - -void ethtool_coalesce_get_rsp_free(struct ethtool_coalesce_get_rsp *rsp) -{ - ethtool_header_free(&rsp->header); - free(rsp); -} - -int ethtool_coalesce_get_rsp_parse(const struct nlmsghdr *nlh, void *data) -{ - struct ethtool_coalesce_get_rsp *dst; - struct ynl_parse_arg *yarg = data; - const struct nlattr *attr; - struct ynl_parse_arg parg; - - dst = yarg->data; - parg.ys = yarg->ys; - - mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) { - unsigned int type = mnl_attr_get_type(attr); - - if (type == ETHTOOL_A_COALESCE_HEADER) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.header = 1; - - parg.rsp_policy = ðtool_header_nest; - parg.data = &dst->header; - if (ethtool_header_parse(&parg, attr)) - return MNL_CB_ERROR; - } else if (type == ETHTOOL_A_COALESCE_RX_USECS) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.rx_usecs = 1; - dst->rx_usecs = mnl_attr_get_u32(attr); - } else if (type == ETHTOOL_A_COALESCE_RX_MAX_FRAMES) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.rx_max_frames = 1; - dst->rx_max_frames = mnl_attr_get_u32(attr); - } else if (type == ETHTOOL_A_COALESCE_RX_USECS_IRQ) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.rx_usecs_irq = 1; - dst->rx_usecs_irq = mnl_attr_get_u32(attr); - } else if (type == ETHTOOL_A_COALESCE_RX_MAX_FRAMES_IRQ) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.rx_max_frames_irq = 1; - dst->rx_max_frames_irq = mnl_attr_get_u32(attr); - } else if (type == ETHTOOL_A_COALESCE_TX_USECS) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.tx_usecs = 1; - dst->tx_usecs = mnl_attr_get_u32(attr); - } else if (type == ETHTOOL_A_COALESCE_TX_MAX_FRAMES) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.tx_max_frames = 1; - dst->tx_max_frames = mnl_attr_get_u32(attr); - } else if (type == ETHTOOL_A_COALESCE_TX_USECS_IRQ) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.tx_usecs_irq = 1; - dst->tx_usecs_irq = mnl_attr_get_u32(attr); - } else if (type == ETHTOOL_A_COALESCE_TX_MAX_FRAMES_IRQ) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.tx_max_frames_irq = 1; - dst->tx_max_frames_irq = mnl_attr_get_u32(attr); - } else if (type == ETHTOOL_A_COALESCE_STATS_BLOCK_USECS) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.stats_block_usecs = 1; - dst->stats_block_usecs = mnl_attr_get_u32(attr); - } else if (type == ETHTOOL_A_COALESCE_USE_ADAPTIVE_RX) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.use_adaptive_rx = 1; - dst->use_adaptive_rx = mnl_attr_get_u8(attr); - } else if (type == ETHTOOL_A_COALESCE_USE_ADAPTIVE_TX) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.use_adaptive_tx = 1; - dst->use_adaptive_tx = mnl_attr_get_u8(attr); - } else if (type == ETHTOOL_A_COALESCE_PKT_RATE_LOW) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.pkt_rate_low = 1; - dst->pkt_rate_low = mnl_attr_get_u32(attr); - } else if (type == ETHTOOL_A_COALESCE_RX_USECS_LOW) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.rx_usecs_low = 1; - dst->rx_usecs_low = mnl_attr_get_u32(attr); - } else if (type == ETHTOOL_A_COALESCE_RX_MAX_FRAMES_LOW) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.rx_max_frames_low = 1; - dst->rx_max_frames_low = mnl_attr_get_u32(attr); - } else if (type == ETHTOOL_A_COALESCE_TX_USECS_LOW) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.tx_usecs_low = 1; - dst->tx_usecs_low = mnl_attr_get_u32(attr); - } else if (type == ETHTOOL_A_COALESCE_TX_MAX_FRAMES_LOW) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.tx_max_frames_low = 1; - dst->tx_max_frames_low = mnl_attr_get_u32(attr); - } else if (type == ETHTOOL_A_COALESCE_PKT_RATE_HIGH) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.pkt_rate_high = 1; - dst->pkt_rate_high = mnl_attr_get_u32(attr); - } else if (type == ETHTOOL_A_COALESCE_RX_USECS_HIGH) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.rx_usecs_high = 1; - dst->rx_usecs_high = mnl_attr_get_u32(attr); - } else if (type == ETHTOOL_A_COALESCE_RX_MAX_FRAMES_HIGH) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.rx_max_frames_high = 1; - dst->rx_max_frames_high = mnl_attr_get_u32(attr); - } else if (type == ETHTOOL_A_COALESCE_TX_USECS_HIGH) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.tx_usecs_high = 1; - dst->tx_usecs_high = mnl_attr_get_u32(attr); - } else if (type == ETHTOOL_A_COALESCE_TX_MAX_FRAMES_HIGH) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.tx_max_frames_high = 1; - dst->tx_max_frames_high = mnl_attr_get_u32(attr); - } else if (type == ETHTOOL_A_COALESCE_RATE_SAMPLE_INTERVAL) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.rate_sample_interval = 1; - dst->rate_sample_interval = mnl_attr_get_u32(attr); - } else if (type == ETHTOOL_A_COALESCE_USE_CQE_MODE_TX) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.use_cqe_mode_tx = 1; - dst->use_cqe_mode_tx = mnl_attr_get_u8(attr); - } else if (type == ETHTOOL_A_COALESCE_USE_CQE_MODE_RX) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.use_cqe_mode_rx = 1; - dst->use_cqe_mode_rx = mnl_attr_get_u8(attr); - } else if (type == ETHTOOL_A_COALESCE_TX_AGGR_MAX_BYTES) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.tx_aggr_max_bytes = 1; - dst->tx_aggr_max_bytes = mnl_attr_get_u32(attr); - } else if (type == ETHTOOL_A_COALESCE_TX_AGGR_MAX_FRAMES) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.tx_aggr_max_frames = 1; - dst->tx_aggr_max_frames = mnl_attr_get_u32(attr); - } else if (type == ETHTOOL_A_COALESCE_TX_AGGR_TIME_USECS) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.tx_aggr_time_usecs = 1; - dst->tx_aggr_time_usecs = mnl_attr_get_u32(attr); - } - } - - return MNL_CB_OK; -} - -struct ethtool_coalesce_get_rsp * -ethtool_coalesce_get(struct ynl_sock *ys, struct ethtool_coalesce_get_req *req) -{ - struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; - struct ethtool_coalesce_get_rsp *rsp; - struct nlmsghdr *nlh; - int err; - - nlh = ynl_gemsg_start_req(ys, ys->family_id, ETHTOOL_MSG_COALESCE_GET, 1); - ys->req_policy = ðtool_coalesce_nest; - yrs.yarg.rsp_policy = ðtool_coalesce_nest; - - if (req->_present.header) - ethtool_header_put(nlh, ETHTOOL_A_COALESCE_HEADER, &req->header); - - rsp = calloc(1, sizeof(*rsp)); - yrs.yarg.data = rsp; - yrs.cb = ethtool_coalesce_get_rsp_parse; - yrs.rsp_cmd = 20; - - err = ynl_exec(ys, nlh, &yrs); - if (err < 0) - goto err_free; - - return rsp; - -err_free: - ethtool_coalesce_get_rsp_free(rsp); - return NULL; -} - -/* ETHTOOL_MSG_COALESCE_GET - dump */ -void ethtool_coalesce_get_list_free(struct ethtool_coalesce_get_list *rsp) -{ - struct ethtool_coalesce_get_list *next = rsp; - - while ((void *)next != YNL_LIST_END) { - rsp = next; - next = rsp->next; - - ethtool_header_free(&rsp->obj.header); - free(rsp); - } -} - -struct ethtool_coalesce_get_list * -ethtool_coalesce_get_dump(struct ynl_sock *ys, - struct ethtool_coalesce_get_req_dump *req) -{ - struct ynl_dump_state yds = {}; - struct nlmsghdr *nlh; - int err; - - yds.ys = ys; - yds.alloc_sz = sizeof(struct ethtool_coalesce_get_list); - yds.cb = ethtool_coalesce_get_rsp_parse; - yds.rsp_cmd = 20; - yds.rsp_policy = ðtool_coalesce_nest; - - nlh = ynl_gemsg_start_dump(ys, ys->family_id, ETHTOOL_MSG_COALESCE_GET, 1); - ys->req_policy = ðtool_coalesce_nest; - - if (req->_present.header) - ethtool_header_put(nlh, ETHTOOL_A_COALESCE_HEADER, &req->header); - - err = ynl_exec_dump(ys, nlh, &yds); - if (err < 0) - goto free_list; - - return yds.first; - -free_list: - ethtool_coalesce_get_list_free(yds.first); - return NULL; -} - -/* ETHTOOL_MSG_COALESCE_GET - notify */ -void ethtool_coalesce_get_ntf_free(struct ethtool_coalesce_get_ntf *rsp) -{ - ethtool_header_free(&rsp->obj.header); - free(rsp); -} - -/* ============== ETHTOOL_MSG_COALESCE_SET ============== */ -/* ETHTOOL_MSG_COALESCE_SET - do */ -void ethtool_coalesce_set_req_free(struct ethtool_coalesce_set_req *req) -{ - ethtool_header_free(&req->header); - free(req); -} - -int ethtool_coalesce_set(struct ynl_sock *ys, - struct ethtool_coalesce_set_req *req) -{ - struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; - struct nlmsghdr *nlh; - int err; - - nlh = ynl_gemsg_start_req(ys, ys->family_id, ETHTOOL_MSG_COALESCE_SET, 1); - ys->req_policy = ðtool_coalesce_nest; - - if (req->_present.header) - ethtool_header_put(nlh, ETHTOOL_A_COALESCE_HEADER, &req->header); - if (req->_present.rx_usecs) - mnl_attr_put_u32(nlh, ETHTOOL_A_COALESCE_RX_USECS, req->rx_usecs); - if (req->_present.rx_max_frames) - mnl_attr_put_u32(nlh, ETHTOOL_A_COALESCE_RX_MAX_FRAMES, req->rx_max_frames); - if (req->_present.rx_usecs_irq) - mnl_attr_put_u32(nlh, ETHTOOL_A_COALESCE_RX_USECS_IRQ, req->rx_usecs_irq); - if (req->_present.rx_max_frames_irq) - mnl_attr_put_u32(nlh, ETHTOOL_A_COALESCE_RX_MAX_FRAMES_IRQ, req->rx_max_frames_irq); - if (req->_present.tx_usecs) - mnl_attr_put_u32(nlh, ETHTOOL_A_COALESCE_TX_USECS, req->tx_usecs); - if (req->_present.tx_max_frames) - mnl_attr_put_u32(nlh, ETHTOOL_A_COALESCE_TX_MAX_FRAMES, req->tx_max_frames); - if (req->_present.tx_usecs_irq) - mnl_attr_put_u32(nlh, ETHTOOL_A_COALESCE_TX_USECS_IRQ, req->tx_usecs_irq); - if (req->_present.tx_max_frames_irq) - mnl_attr_put_u32(nlh, ETHTOOL_A_COALESCE_TX_MAX_FRAMES_IRQ, req->tx_max_frames_irq); - if (req->_present.stats_block_usecs) - mnl_attr_put_u32(nlh, ETHTOOL_A_COALESCE_STATS_BLOCK_USECS, req->stats_block_usecs); - if (req->_present.use_adaptive_rx) - mnl_attr_put_u8(nlh, ETHTOOL_A_COALESCE_USE_ADAPTIVE_RX, req->use_adaptive_rx); - if (req->_present.use_adaptive_tx) - mnl_attr_put_u8(nlh, ETHTOOL_A_COALESCE_USE_ADAPTIVE_TX, req->use_adaptive_tx); - if (req->_present.pkt_rate_low) - mnl_attr_put_u32(nlh, ETHTOOL_A_COALESCE_PKT_RATE_LOW, req->pkt_rate_low); - if (req->_present.rx_usecs_low) - mnl_attr_put_u32(nlh, ETHTOOL_A_COALESCE_RX_USECS_LOW, req->rx_usecs_low); - if (req->_present.rx_max_frames_low) - mnl_attr_put_u32(nlh, ETHTOOL_A_COALESCE_RX_MAX_FRAMES_LOW, req->rx_max_frames_low); - if (req->_present.tx_usecs_low) - mnl_attr_put_u32(nlh, ETHTOOL_A_COALESCE_TX_USECS_LOW, req->tx_usecs_low); - if (req->_present.tx_max_frames_low) - mnl_attr_put_u32(nlh, ETHTOOL_A_COALESCE_TX_MAX_FRAMES_LOW, req->tx_max_frames_low); - if (req->_present.pkt_rate_high) - mnl_attr_put_u32(nlh, ETHTOOL_A_COALESCE_PKT_RATE_HIGH, req->pkt_rate_high); - if (req->_present.rx_usecs_high) - mnl_attr_put_u32(nlh, ETHTOOL_A_COALESCE_RX_USECS_HIGH, req->rx_usecs_high); - if (req->_present.rx_max_frames_high) - mnl_attr_put_u32(nlh, ETHTOOL_A_COALESCE_RX_MAX_FRAMES_HIGH, req->rx_max_frames_high); - if (req->_present.tx_usecs_high) - mnl_attr_put_u32(nlh, ETHTOOL_A_COALESCE_TX_USECS_HIGH, req->tx_usecs_high); - if (req->_present.tx_max_frames_high) - mnl_attr_put_u32(nlh, ETHTOOL_A_COALESCE_TX_MAX_FRAMES_HIGH, req->tx_max_frames_high); - if (req->_present.rate_sample_interval) - mnl_attr_put_u32(nlh, ETHTOOL_A_COALESCE_RATE_SAMPLE_INTERVAL, req->rate_sample_interval); - if (req->_present.use_cqe_mode_tx) - mnl_attr_put_u8(nlh, ETHTOOL_A_COALESCE_USE_CQE_MODE_TX, req->use_cqe_mode_tx); - if (req->_present.use_cqe_mode_rx) - mnl_attr_put_u8(nlh, ETHTOOL_A_COALESCE_USE_CQE_MODE_RX, req->use_cqe_mode_rx); - if (req->_present.tx_aggr_max_bytes) - mnl_attr_put_u32(nlh, ETHTOOL_A_COALESCE_TX_AGGR_MAX_BYTES, req->tx_aggr_max_bytes); - if (req->_present.tx_aggr_max_frames) - mnl_attr_put_u32(nlh, ETHTOOL_A_COALESCE_TX_AGGR_MAX_FRAMES, req->tx_aggr_max_frames); - if (req->_present.tx_aggr_time_usecs) - mnl_attr_put_u32(nlh, ETHTOOL_A_COALESCE_TX_AGGR_TIME_USECS, req->tx_aggr_time_usecs); - - err = ynl_exec(ys, nlh, &yrs); - if (err < 0) - return -1; - - return 0; -} - -/* ============== ETHTOOL_MSG_PAUSE_GET ============== */ -/* ETHTOOL_MSG_PAUSE_GET - do */ -void ethtool_pause_get_req_free(struct ethtool_pause_get_req *req) -{ - ethtool_header_free(&req->header); - free(req); -} - -void ethtool_pause_get_rsp_free(struct ethtool_pause_get_rsp *rsp) -{ - ethtool_header_free(&rsp->header); - ethtool_pause_stat_free(&rsp->stats); - free(rsp); -} - -int ethtool_pause_get_rsp_parse(const struct nlmsghdr *nlh, void *data) -{ - struct ethtool_pause_get_rsp *dst; - struct ynl_parse_arg *yarg = data; - const struct nlattr *attr; - struct ynl_parse_arg parg; - - dst = yarg->data; - parg.ys = yarg->ys; - - mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) { - unsigned int type = mnl_attr_get_type(attr); - - if (type == ETHTOOL_A_PAUSE_HEADER) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.header = 1; - - parg.rsp_policy = ðtool_header_nest; - parg.data = &dst->header; - if (ethtool_header_parse(&parg, attr)) - return MNL_CB_ERROR; - } else if (type == ETHTOOL_A_PAUSE_AUTONEG) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.autoneg = 1; - dst->autoneg = mnl_attr_get_u8(attr); - } else if (type == ETHTOOL_A_PAUSE_RX) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.rx = 1; - dst->rx = mnl_attr_get_u8(attr); - } else if (type == ETHTOOL_A_PAUSE_TX) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.tx = 1; - dst->tx = mnl_attr_get_u8(attr); - } else if (type == ETHTOOL_A_PAUSE_STATS) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.stats = 1; - - parg.rsp_policy = ðtool_pause_stat_nest; - parg.data = &dst->stats; - if (ethtool_pause_stat_parse(&parg, attr)) - return MNL_CB_ERROR; - } else if (type == ETHTOOL_A_PAUSE_STATS_SRC) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.stats_src = 1; - dst->stats_src = mnl_attr_get_u32(attr); - } - } - - return MNL_CB_OK; -} - -struct ethtool_pause_get_rsp * -ethtool_pause_get(struct ynl_sock *ys, struct ethtool_pause_get_req *req) -{ - struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; - struct ethtool_pause_get_rsp *rsp; - struct nlmsghdr *nlh; - int err; - - nlh = ynl_gemsg_start_req(ys, ys->family_id, ETHTOOL_MSG_PAUSE_GET, 1); - ys->req_policy = ðtool_pause_nest; - yrs.yarg.rsp_policy = ðtool_pause_nest; - - if (req->_present.header) - ethtool_header_put(nlh, ETHTOOL_A_PAUSE_HEADER, &req->header); - - rsp = calloc(1, sizeof(*rsp)); - yrs.yarg.data = rsp; - yrs.cb = ethtool_pause_get_rsp_parse; - yrs.rsp_cmd = 22; - - err = ynl_exec(ys, nlh, &yrs); - if (err < 0) - goto err_free; - - return rsp; - -err_free: - ethtool_pause_get_rsp_free(rsp); - return NULL; -} - -/* ETHTOOL_MSG_PAUSE_GET - dump */ -void ethtool_pause_get_list_free(struct ethtool_pause_get_list *rsp) -{ - struct ethtool_pause_get_list *next = rsp; - - while ((void *)next != YNL_LIST_END) { - rsp = next; - next = rsp->next; - - ethtool_header_free(&rsp->obj.header); - ethtool_pause_stat_free(&rsp->obj.stats); - free(rsp); - } -} - -struct ethtool_pause_get_list * -ethtool_pause_get_dump(struct ynl_sock *ys, - struct ethtool_pause_get_req_dump *req) -{ - struct ynl_dump_state yds = {}; - struct nlmsghdr *nlh; - int err; - - yds.ys = ys; - yds.alloc_sz = sizeof(struct ethtool_pause_get_list); - yds.cb = ethtool_pause_get_rsp_parse; - yds.rsp_cmd = 22; - yds.rsp_policy = ðtool_pause_nest; - - nlh = ynl_gemsg_start_dump(ys, ys->family_id, ETHTOOL_MSG_PAUSE_GET, 1); - ys->req_policy = ðtool_pause_nest; - - if (req->_present.header) - ethtool_header_put(nlh, ETHTOOL_A_PAUSE_HEADER, &req->header); - - err = ynl_exec_dump(ys, nlh, &yds); - if (err < 0) - goto free_list; - - return yds.first; - -free_list: - ethtool_pause_get_list_free(yds.first); - return NULL; -} - -/* ETHTOOL_MSG_PAUSE_GET - notify */ -void ethtool_pause_get_ntf_free(struct ethtool_pause_get_ntf *rsp) -{ - ethtool_header_free(&rsp->obj.header); - ethtool_pause_stat_free(&rsp->obj.stats); - free(rsp); -} - -/* ============== ETHTOOL_MSG_PAUSE_SET ============== */ -/* ETHTOOL_MSG_PAUSE_SET - do */ -void ethtool_pause_set_req_free(struct ethtool_pause_set_req *req) -{ - ethtool_header_free(&req->header); - ethtool_pause_stat_free(&req->stats); - free(req); -} - -int ethtool_pause_set(struct ynl_sock *ys, struct ethtool_pause_set_req *req) -{ - struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; - struct nlmsghdr *nlh; - int err; - - nlh = ynl_gemsg_start_req(ys, ys->family_id, ETHTOOL_MSG_PAUSE_SET, 1); - ys->req_policy = ðtool_pause_nest; - - if (req->_present.header) - ethtool_header_put(nlh, ETHTOOL_A_PAUSE_HEADER, &req->header); - if (req->_present.autoneg) - mnl_attr_put_u8(nlh, ETHTOOL_A_PAUSE_AUTONEG, req->autoneg); - if (req->_present.rx) - mnl_attr_put_u8(nlh, ETHTOOL_A_PAUSE_RX, req->rx); - if (req->_present.tx) - mnl_attr_put_u8(nlh, ETHTOOL_A_PAUSE_TX, req->tx); - if (req->_present.stats) - ethtool_pause_stat_put(nlh, ETHTOOL_A_PAUSE_STATS, &req->stats); - if (req->_present.stats_src) - mnl_attr_put_u32(nlh, ETHTOOL_A_PAUSE_STATS_SRC, req->stats_src); - - err = ynl_exec(ys, nlh, &yrs); - if (err < 0) - return -1; - - return 0; -} - -/* ============== ETHTOOL_MSG_EEE_GET ============== */ -/* ETHTOOL_MSG_EEE_GET - do */ -void ethtool_eee_get_req_free(struct ethtool_eee_get_req *req) -{ - ethtool_header_free(&req->header); - free(req); -} - -void ethtool_eee_get_rsp_free(struct ethtool_eee_get_rsp *rsp) -{ - ethtool_header_free(&rsp->header); - ethtool_bitset_free(&rsp->modes_ours); - ethtool_bitset_free(&rsp->modes_peer); - free(rsp); -} - -int ethtool_eee_get_rsp_parse(const struct nlmsghdr *nlh, void *data) -{ - struct ynl_parse_arg *yarg = data; - struct ethtool_eee_get_rsp *dst; - const struct nlattr *attr; - struct ynl_parse_arg parg; - - dst = yarg->data; - parg.ys = yarg->ys; - - mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) { - unsigned int type = mnl_attr_get_type(attr); - - if (type == ETHTOOL_A_EEE_HEADER) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.header = 1; - - parg.rsp_policy = ðtool_header_nest; - parg.data = &dst->header; - if (ethtool_header_parse(&parg, attr)) - return MNL_CB_ERROR; - } else if (type == ETHTOOL_A_EEE_MODES_OURS) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.modes_ours = 1; - - parg.rsp_policy = ðtool_bitset_nest; - parg.data = &dst->modes_ours; - if (ethtool_bitset_parse(&parg, attr)) - return MNL_CB_ERROR; - } else if (type == ETHTOOL_A_EEE_MODES_PEER) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.modes_peer = 1; - - parg.rsp_policy = ðtool_bitset_nest; - parg.data = &dst->modes_peer; - if (ethtool_bitset_parse(&parg, attr)) - return MNL_CB_ERROR; - } else if (type == ETHTOOL_A_EEE_ACTIVE) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.active = 1; - dst->active = mnl_attr_get_u8(attr); - } else if (type == ETHTOOL_A_EEE_ENABLED) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.enabled = 1; - dst->enabled = mnl_attr_get_u8(attr); - } else if (type == ETHTOOL_A_EEE_TX_LPI_ENABLED) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.tx_lpi_enabled = 1; - dst->tx_lpi_enabled = mnl_attr_get_u8(attr); - } else if (type == ETHTOOL_A_EEE_TX_LPI_TIMER) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.tx_lpi_timer = 1; - dst->tx_lpi_timer = mnl_attr_get_u32(attr); - } - } - - return MNL_CB_OK; -} - -struct ethtool_eee_get_rsp * -ethtool_eee_get(struct ynl_sock *ys, struct ethtool_eee_get_req *req) -{ - struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; - struct ethtool_eee_get_rsp *rsp; - struct nlmsghdr *nlh; - int err; - - nlh = ynl_gemsg_start_req(ys, ys->family_id, ETHTOOL_MSG_EEE_GET, 1); - ys->req_policy = ðtool_eee_nest; - yrs.yarg.rsp_policy = ðtool_eee_nest; - - if (req->_present.header) - ethtool_header_put(nlh, ETHTOOL_A_EEE_HEADER, &req->header); - - rsp = calloc(1, sizeof(*rsp)); - yrs.yarg.data = rsp; - yrs.cb = ethtool_eee_get_rsp_parse; - yrs.rsp_cmd = 24; - - err = ynl_exec(ys, nlh, &yrs); - if (err < 0) - goto err_free; - - return rsp; - -err_free: - ethtool_eee_get_rsp_free(rsp); - return NULL; -} - -/* ETHTOOL_MSG_EEE_GET - dump */ -void ethtool_eee_get_list_free(struct ethtool_eee_get_list *rsp) -{ - struct ethtool_eee_get_list *next = rsp; - - while ((void *)next != YNL_LIST_END) { - rsp = next; - next = rsp->next; - - ethtool_header_free(&rsp->obj.header); - ethtool_bitset_free(&rsp->obj.modes_ours); - ethtool_bitset_free(&rsp->obj.modes_peer); - free(rsp); - } -} - -struct ethtool_eee_get_list * -ethtool_eee_get_dump(struct ynl_sock *ys, struct ethtool_eee_get_req_dump *req) -{ - struct ynl_dump_state yds = {}; - struct nlmsghdr *nlh; - int err; - - yds.ys = ys; - yds.alloc_sz = sizeof(struct ethtool_eee_get_list); - yds.cb = ethtool_eee_get_rsp_parse; - yds.rsp_cmd = 24; - yds.rsp_policy = ðtool_eee_nest; - - nlh = ynl_gemsg_start_dump(ys, ys->family_id, ETHTOOL_MSG_EEE_GET, 1); - ys->req_policy = ðtool_eee_nest; - - if (req->_present.header) - ethtool_header_put(nlh, ETHTOOL_A_EEE_HEADER, &req->header); - - err = ynl_exec_dump(ys, nlh, &yds); - if (err < 0) - goto free_list; - - return yds.first; - -free_list: - ethtool_eee_get_list_free(yds.first); - return NULL; -} - -/* ETHTOOL_MSG_EEE_GET - notify */ -void ethtool_eee_get_ntf_free(struct ethtool_eee_get_ntf *rsp) -{ - ethtool_header_free(&rsp->obj.header); - ethtool_bitset_free(&rsp->obj.modes_ours); - ethtool_bitset_free(&rsp->obj.modes_peer); - free(rsp); -} - -/* ============== ETHTOOL_MSG_EEE_SET ============== */ -/* ETHTOOL_MSG_EEE_SET - do */ -void ethtool_eee_set_req_free(struct ethtool_eee_set_req *req) -{ - ethtool_header_free(&req->header); - ethtool_bitset_free(&req->modes_ours); - ethtool_bitset_free(&req->modes_peer); - free(req); -} - -int ethtool_eee_set(struct ynl_sock *ys, struct ethtool_eee_set_req *req) -{ - struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; - struct nlmsghdr *nlh; - int err; - - nlh = ynl_gemsg_start_req(ys, ys->family_id, ETHTOOL_MSG_EEE_SET, 1); - ys->req_policy = ðtool_eee_nest; - - if (req->_present.header) - ethtool_header_put(nlh, ETHTOOL_A_EEE_HEADER, &req->header); - if (req->_present.modes_ours) - ethtool_bitset_put(nlh, ETHTOOL_A_EEE_MODES_OURS, &req->modes_ours); - if (req->_present.modes_peer) - ethtool_bitset_put(nlh, ETHTOOL_A_EEE_MODES_PEER, &req->modes_peer); - if (req->_present.active) - mnl_attr_put_u8(nlh, ETHTOOL_A_EEE_ACTIVE, req->active); - if (req->_present.enabled) - mnl_attr_put_u8(nlh, ETHTOOL_A_EEE_ENABLED, req->enabled); - if (req->_present.tx_lpi_enabled) - mnl_attr_put_u8(nlh, ETHTOOL_A_EEE_TX_LPI_ENABLED, req->tx_lpi_enabled); - if (req->_present.tx_lpi_timer) - mnl_attr_put_u32(nlh, ETHTOOL_A_EEE_TX_LPI_TIMER, req->tx_lpi_timer); - - err = ynl_exec(ys, nlh, &yrs); - if (err < 0) - return -1; - - return 0; -} - -/* ============== ETHTOOL_MSG_TSINFO_GET ============== */ -/* ETHTOOL_MSG_TSINFO_GET - do */ -void ethtool_tsinfo_get_req_free(struct ethtool_tsinfo_get_req *req) -{ - ethtool_header_free(&req->header); - free(req); -} - -void ethtool_tsinfo_get_rsp_free(struct ethtool_tsinfo_get_rsp *rsp) -{ - ethtool_header_free(&rsp->header); - ethtool_bitset_free(&rsp->timestamping); - ethtool_bitset_free(&rsp->tx_types); - ethtool_bitset_free(&rsp->rx_filters); - free(rsp); -} - -int ethtool_tsinfo_get_rsp_parse(const struct nlmsghdr *nlh, void *data) -{ - struct ethtool_tsinfo_get_rsp *dst; - struct ynl_parse_arg *yarg = data; - const struct nlattr *attr; - struct ynl_parse_arg parg; - - dst = yarg->data; - parg.ys = yarg->ys; - - mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) { - unsigned int type = mnl_attr_get_type(attr); - - if (type == ETHTOOL_A_TSINFO_HEADER) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.header = 1; - - parg.rsp_policy = ðtool_header_nest; - parg.data = &dst->header; - if (ethtool_header_parse(&parg, attr)) - return MNL_CB_ERROR; - } else if (type == ETHTOOL_A_TSINFO_TIMESTAMPING) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.timestamping = 1; - - parg.rsp_policy = ðtool_bitset_nest; - parg.data = &dst->timestamping; - if (ethtool_bitset_parse(&parg, attr)) - return MNL_CB_ERROR; - } else if (type == ETHTOOL_A_TSINFO_TX_TYPES) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.tx_types = 1; - - parg.rsp_policy = ðtool_bitset_nest; - parg.data = &dst->tx_types; - if (ethtool_bitset_parse(&parg, attr)) - return MNL_CB_ERROR; - } else if (type == ETHTOOL_A_TSINFO_RX_FILTERS) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.rx_filters = 1; - - parg.rsp_policy = ðtool_bitset_nest; - parg.data = &dst->rx_filters; - if (ethtool_bitset_parse(&parg, attr)) - return MNL_CB_ERROR; - } else if (type == ETHTOOL_A_TSINFO_PHC_INDEX) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.phc_index = 1; - dst->phc_index = mnl_attr_get_u32(attr); - } - } - - return MNL_CB_OK; -} - -struct ethtool_tsinfo_get_rsp * -ethtool_tsinfo_get(struct ynl_sock *ys, struct ethtool_tsinfo_get_req *req) -{ - struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; - struct ethtool_tsinfo_get_rsp *rsp; - struct nlmsghdr *nlh; - int err; - - nlh = ynl_gemsg_start_req(ys, ys->family_id, ETHTOOL_MSG_TSINFO_GET, 1); - ys->req_policy = ðtool_tsinfo_nest; - yrs.yarg.rsp_policy = ðtool_tsinfo_nest; - - if (req->_present.header) - ethtool_header_put(nlh, ETHTOOL_A_TSINFO_HEADER, &req->header); - - rsp = calloc(1, sizeof(*rsp)); - yrs.yarg.data = rsp; - yrs.cb = ethtool_tsinfo_get_rsp_parse; - yrs.rsp_cmd = 26; - - err = ynl_exec(ys, nlh, &yrs); - if (err < 0) - goto err_free; - - return rsp; - -err_free: - ethtool_tsinfo_get_rsp_free(rsp); - return NULL; -} - -/* ETHTOOL_MSG_TSINFO_GET - dump */ -void ethtool_tsinfo_get_list_free(struct ethtool_tsinfo_get_list *rsp) -{ - struct ethtool_tsinfo_get_list *next = rsp; - - while ((void *)next != YNL_LIST_END) { - rsp = next; - next = rsp->next; - - ethtool_header_free(&rsp->obj.header); - ethtool_bitset_free(&rsp->obj.timestamping); - ethtool_bitset_free(&rsp->obj.tx_types); - ethtool_bitset_free(&rsp->obj.rx_filters); - free(rsp); - } -} - -struct ethtool_tsinfo_get_list * -ethtool_tsinfo_get_dump(struct ynl_sock *ys, - struct ethtool_tsinfo_get_req_dump *req) -{ - struct ynl_dump_state yds = {}; - struct nlmsghdr *nlh; - int err; - - yds.ys = ys; - yds.alloc_sz = sizeof(struct ethtool_tsinfo_get_list); - yds.cb = ethtool_tsinfo_get_rsp_parse; - yds.rsp_cmd = 26; - yds.rsp_policy = ðtool_tsinfo_nest; - - nlh = ynl_gemsg_start_dump(ys, ys->family_id, ETHTOOL_MSG_TSINFO_GET, 1); - ys->req_policy = ðtool_tsinfo_nest; - - if (req->_present.header) - ethtool_header_put(nlh, ETHTOOL_A_TSINFO_HEADER, &req->header); - - err = ynl_exec_dump(ys, nlh, &yds); - if (err < 0) - goto free_list; - - return yds.first; - -free_list: - ethtool_tsinfo_get_list_free(yds.first); - return NULL; -} - -/* ============== ETHTOOL_MSG_CABLE_TEST_ACT ============== */ -/* ETHTOOL_MSG_CABLE_TEST_ACT - do */ -void ethtool_cable_test_act_req_free(struct ethtool_cable_test_act_req *req) -{ - ethtool_header_free(&req->header); - free(req); -} - -int ethtool_cable_test_act(struct ynl_sock *ys, - struct ethtool_cable_test_act_req *req) -{ - struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; - struct nlmsghdr *nlh; - int err; - - nlh = ynl_gemsg_start_req(ys, ys->family_id, ETHTOOL_MSG_CABLE_TEST_ACT, 1); - ys->req_policy = ðtool_cable_test_nest; - - if (req->_present.header) - ethtool_header_put(nlh, ETHTOOL_A_CABLE_TEST_HEADER, &req->header); - - err = ynl_exec(ys, nlh, &yrs); - if (err < 0) - return -1; - - return 0; -} - -/* ============== ETHTOOL_MSG_CABLE_TEST_TDR_ACT ============== */ -/* ETHTOOL_MSG_CABLE_TEST_TDR_ACT - do */ -void -ethtool_cable_test_tdr_act_req_free(struct ethtool_cable_test_tdr_act_req *req) -{ - ethtool_header_free(&req->header); - free(req); -} - -int ethtool_cable_test_tdr_act(struct ynl_sock *ys, - struct ethtool_cable_test_tdr_act_req *req) -{ - struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; - struct nlmsghdr *nlh; - int err; - - nlh = ynl_gemsg_start_req(ys, ys->family_id, ETHTOOL_MSG_CABLE_TEST_TDR_ACT, 1); - ys->req_policy = ðtool_cable_test_tdr_nest; - - if (req->_present.header) - ethtool_header_put(nlh, ETHTOOL_A_CABLE_TEST_TDR_HEADER, &req->header); - - err = ynl_exec(ys, nlh, &yrs); - if (err < 0) - return -1; - - return 0; -} - -/* ============== ETHTOOL_MSG_TUNNEL_INFO_GET ============== */ -/* ETHTOOL_MSG_TUNNEL_INFO_GET - do */ -void ethtool_tunnel_info_get_req_free(struct ethtool_tunnel_info_get_req *req) -{ - ethtool_header_free(&req->header); - free(req); -} - -void ethtool_tunnel_info_get_rsp_free(struct ethtool_tunnel_info_get_rsp *rsp) -{ - ethtool_header_free(&rsp->header); - ethtool_tunnel_udp_free(&rsp->udp_ports); - free(rsp); -} - -int ethtool_tunnel_info_get_rsp_parse(const struct nlmsghdr *nlh, void *data) -{ - struct ethtool_tunnel_info_get_rsp *dst; - struct ynl_parse_arg *yarg = data; - const struct nlattr *attr; - struct ynl_parse_arg parg; - - dst = yarg->data; - parg.ys = yarg->ys; - - mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) { - unsigned int type = mnl_attr_get_type(attr); - - if (type == ETHTOOL_A_TUNNEL_INFO_HEADER) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.header = 1; - - parg.rsp_policy = ðtool_header_nest; - parg.data = &dst->header; - if (ethtool_header_parse(&parg, attr)) - return MNL_CB_ERROR; - } else if (type == ETHTOOL_A_TUNNEL_INFO_UDP_PORTS) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.udp_ports = 1; - - parg.rsp_policy = ðtool_tunnel_udp_nest; - parg.data = &dst->udp_ports; - if (ethtool_tunnel_udp_parse(&parg, attr)) - return MNL_CB_ERROR; - } - } - - return MNL_CB_OK; -} - -struct ethtool_tunnel_info_get_rsp * -ethtool_tunnel_info_get(struct ynl_sock *ys, - struct ethtool_tunnel_info_get_req *req) -{ - struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; - struct ethtool_tunnel_info_get_rsp *rsp; - struct nlmsghdr *nlh; - int err; - - nlh = ynl_gemsg_start_req(ys, ys->family_id, ETHTOOL_MSG_TUNNEL_INFO_GET, 1); - ys->req_policy = ðtool_tunnel_info_nest; - yrs.yarg.rsp_policy = ðtool_tunnel_info_nest; - - if (req->_present.header) - ethtool_header_put(nlh, ETHTOOL_A_TUNNEL_INFO_HEADER, &req->header); - - rsp = calloc(1, sizeof(*rsp)); - yrs.yarg.data = rsp; - yrs.cb = ethtool_tunnel_info_get_rsp_parse; - yrs.rsp_cmd = 29; - - err = ynl_exec(ys, nlh, &yrs); - if (err < 0) - goto err_free; - - return rsp; - -err_free: - ethtool_tunnel_info_get_rsp_free(rsp); - return NULL; -} - -/* ETHTOOL_MSG_TUNNEL_INFO_GET - dump */ -void -ethtool_tunnel_info_get_list_free(struct ethtool_tunnel_info_get_list *rsp) -{ - struct ethtool_tunnel_info_get_list *next = rsp; - - while ((void *)next != YNL_LIST_END) { - rsp = next; - next = rsp->next; - - ethtool_header_free(&rsp->obj.header); - ethtool_tunnel_udp_free(&rsp->obj.udp_ports); - free(rsp); - } -} - -struct ethtool_tunnel_info_get_list * -ethtool_tunnel_info_get_dump(struct ynl_sock *ys, - struct ethtool_tunnel_info_get_req_dump *req) -{ - struct ynl_dump_state yds = {}; - struct nlmsghdr *nlh; - int err; - - yds.ys = ys; - yds.alloc_sz = sizeof(struct ethtool_tunnel_info_get_list); - yds.cb = ethtool_tunnel_info_get_rsp_parse; - yds.rsp_cmd = 29; - yds.rsp_policy = ðtool_tunnel_info_nest; - - nlh = ynl_gemsg_start_dump(ys, ys->family_id, ETHTOOL_MSG_TUNNEL_INFO_GET, 1); - ys->req_policy = ðtool_tunnel_info_nest; - - if (req->_present.header) - ethtool_header_put(nlh, ETHTOOL_A_TUNNEL_INFO_HEADER, &req->header); - - err = ynl_exec_dump(ys, nlh, &yds); - if (err < 0) - goto free_list; - - return yds.first; - -free_list: - ethtool_tunnel_info_get_list_free(yds.first); - return NULL; -} - -/* ============== ETHTOOL_MSG_FEC_GET ============== */ -/* ETHTOOL_MSG_FEC_GET - do */ -void ethtool_fec_get_req_free(struct ethtool_fec_get_req *req) -{ - ethtool_header_free(&req->header); - free(req); -} - -void ethtool_fec_get_rsp_free(struct ethtool_fec_get_rsp *rsp) -{ - ethtool_header_free(&rsp->header); - ethtool_bitset_free(&rsp->modes); - ethtool_fec_stat_free(&rsp->stats); - free(rsp); -} - -int ethtool_fec_get_rsp_parse(const struct nlmsghdr *nlh, void *data) -{ - struct ynl_parse_arg *yarg = data; - struct ethtool_fec_get_rsp *dst; - const struct nlattr *attr; - struct ynl_parse_arg parg; - - dst = yarg->data; - parg.ys = yarg->ys; - - mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) { - unsigned int type = mnl_attr_get_type(attr); - - if (type == ETHTOOL_A_FEC_HEADER) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.header = 1; - - parg.rsp_policy = ðtool_header_nest; - parg.data = &dst->header; - if (ethtool_header_parse(&parg, attr)) - return MNL_CB_ERROR; - } else if (type == ETHTOOL_A_FEC_MODES) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.modes = 1; - - parg.rsp_policy = ðtool_bitset_nest; - parg.data = &dst->modes; - if (ethtool_bitset_parse(&parg, attr)) - return MNL_CB_ERROR; - } else if (type == ETHTOOL_A_FEC_AUTO) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.auto_ = 1; - dst->auto_ = mnl_attr_get_u8(attr); - } else if (type == ETHTOOL_A_FEC_ACTIVE) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.active = 1; - dst->active = mnl_attr_get_u32(attr); - } else if (type == ETHTOOL_A_FEC_STATS) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.stats = 1; - - parg.rsp_policy = ðtool_fec_stat_nest; - parg.data = &dst->stats; - if (ethtool_fec_stat_parse(&parg, attr)) - return MNL_CB_ERROR; - } - } - - return MNL_CB_OK; -} - -struct ethtool_fec_get_rsp * -ethtool_fec_get(struct ynl_sock *ys, struct ethtool_fec_get_req *req) -{ - struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; - struct ethtool_fec_get_rsp *rsp; - struct nlmsghdr *nlh; - int err; - - nlh = ynl_gemsg_start_req(ys, ys->family_id, ETHTOOL_MSG_FEC_GET, 1); - ys->req_policy = ðtool_fec_nest; - yrs.yarg.rsp_policy = ðtool_fec_nest; - - if (req->_present.header) - ethtool_header_put(nlh, ETHTOOL_A_FEC_HEADER, &req->header); - - rsp = calloc(1, sizeof(*rsp)); - yrs.yarg.data = rsp; - yrs.cb = ethtool_fec_get_rsp_parse; - yrs.rsp_cmd = 30; - - err = ynl_exec(ys, nlh, &yrs); - if (err < 0) - goto err_free; - - return rsp; - -err_free: - ethtool_fec_get_rsp_free(rsp); - return NULL; -} - -/* ETHTOOL_MSG_FEC_GET - dump */ -void ethtool_fec_get_list_free(struct ethtool_fec_get_list *rsp) -{ - struct ethtool_fec_get_list *next = rsp; - - while ((void *)next != YNL_LIST_END) { - rsp = next; - next = rsp->next; - - ethtool_header_free(&rsp->obj.header); - ethtool_bitset_free(&rsp->obj.modes); - ethtool_fec_stat_free(&rsp->obj.stats); - free(rsp); - } -} - -struct ethtool_fec_get_list * -ethtool_fec_get_dump(struct ynl_sock *ys, struct ethtool_fec_get_req_dump *req) -{ - struct ynl_dump_state yds = {}; - struct nlmsghdr *nlh; - int err; - - yds.ys = ys; - yds.alloc_sz = sizeof(struct ethtool_fec_get_list); - yds.cb = ethtool_fec_get_rsp_parse; - yds.rsp_cmd = 30; - yds.rsp_policy = ðtool_fec_nest; - - nlh = ynl_gemsg_start_dump(ys, ys->family_id, ETHTOOL_MSG_FEC_GET, 1); - ys->req_policy = ðtool_fec_nest; - - if (req->_present.header) - ethtool_header_put(nlh, ETHTOOL_A_FEC_HEADER, &req->header); - - err = ynl_exec_dump(ys, nlh, &yds); - if (err < 0) - goto free_list; - - return yds.first; - -free_list: - ethtool_fec_get_list_free(yds.first); - return NULL; -} - -/* ETHTOOL_MSG_FEC_GET - notify */ -void ethtool_fec_get_ntf_free(struct ethtool_fec_get_ntf *rsp) -{ - ethtool_header_free(&rsp->obj.header); - ethtool_bitset_free(&rsp->obj.modes); - ethtool_fec_stat_free(&rsp->obj.stats); - free(rsp); -} - -/* ============== ETHTOOL_MSG_FEC_SET ============== */ -/* ETHTOOL_MSG_FEC_SET - do */ -void ethtool_fec_set_req_free(struct ethtool_fec_set_req *req) -{ - ethtool_header_free(&req->header); - ethtool_bitset_free(&req->modes); - ethtool_fec_stat_free(&req->stats); - free(req); -} - -int ethtool_fec_set(struct ynl_sock *ys, struct ethtool_fec_set_req *req) -{ - struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; - struct nlmsghdr *nlh; - int err; - - nlh = ynl_gemsg_start_req(ys, ys->family_id, ETHTOOL_MSG_FEC_SET, 1); - ys->req_policy = ðtool_fec_nest; - - if (req->_present.header) - ethtool_header_put(nlh, ETHTOOL_A_FEC_HEADER, &req->header); - if (req->_present.modes) - ethtool_bitset_put(nlh, ETHTOOL_A_FEC_MODES, &req->modes); - if (req->_present.auto_) - mnl_attr_put_u8(nlh, ETHTOOL_A_FEC_AUTO, req->auto_); - if (req->_present.active) - mnl_attr_put_u32(nlh, ETHTOOL_A_FEC_ACTIVE, req->active); - if (req->_present.stats) - ethtool_fec_stat_put(nlh, ETHTOOL_A_FEC_STATS, &req->stats); - - err = ynl_exec(ys, nlh, &yrs); - if (err < 0) - return -1; - - return 0; -} - -/* ============== ETHTOOL_MSG_MODULE_EEPROM_GET ============== */ -/* ETHTOOL_MSG_MODULE_EEPROM_GET - do */ -void -ethtool_module_eeprom_get_req_free(struct ethtool_module_eeprom_get_req *req) -{ - ethtool_header_free(&req->header); - free(req); -} - -void -ethtool_module_eeprom_get_rsp_free(struct ethtool_module_eeprom_get_rsp *rsp) -{ - ethtool_header_free(&rsp->header); - free(rsp->data); - free(rsp); -} - -int ethtool_module_eeprom_get_rsp_parse(const struct nlmsghdr *nlh, void *data) -{ - struct ethtool_module_eeprom_get_rsp *dst; - struct ynl_parse_arg *yarg = data; - const struct nlattr *attr; - struct ynl_parse_arg parg; - - dst = yarg->data; - parg.ys = yarg->ys; - - mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) { - unsigned int type = mnl_attr_get_type(attr); - - if (type == ETHTOOL_A_MODULE_EEPROM_HEADER) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.header = 1; - - parg.rsp_policy = ðtool_header_nest; - parg.data = &dst->header; - if (ethtool_header_parse(&parg, attr)) - return MNL_CB_ERROR; - } else if (type == ETHTOOL_A_MODULE_EEPROM_OFFSET) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.offset = 1; - dst->offset = mnl_attr_get_u32(attr); - } else if (type == ETHTOOL_A_MODULE_EEPROM_LENGTH) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.length = 1; - dst->length = mnl_attr_get_u32(attr); - } else if (type == ETHTOOL_A_MODULE_EEPROM_PAGE) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.page = 1; - dst->page = mnl_attr_get_u8(attr); - } else if (type == ETHTOOL_A_MODULE_EEPROM_BANK) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.bank = 1; - dst->bank = mnl_attr_get_u8(attr); - } else if (type == ETHTOOL_A_MODULE_EEPROM_I2C_ADDRESS) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.i2c_address = 1; - dst->i2c_address = mnl_attr_get_u8(attr); - } else if (type == ETHTOOL_A_MODULE_EEPROM_DATA) { - unsigned int len; - - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - - len = mnl_attr_get_payload_len(attr); - dst->_present.data_len = len; - dst->data = malloc(len); - memcpy(dst->data, mnl_attr_get_payload(attr), len); - } - } - - return MNL_CB_OK; -} - -struct ethtool_module_eeprom_get_rsp * -ethtool_module_eeprom_get(struct ynl_sock *ys, - struct ethtool_module_eeprom_get_req *req) -{ - struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; - struct ethtool_module_eeprom_get_rsp *rsp; - struct nlmsghdr *nlh; - int err; - - nlh = ynl_gemsg_start_req(ys, ys->family_id, ETHTOOL_MSG_MODULE_EEPROM_GET, 1); - ys->req_policy = ðtool_module_eeprom_nest; - yrs.yarg.rsp_policy = ðtool_module_eeprom_nest; - - if (req->_present.header) - ethtool_header_put(nlh, ETHTOOL_A_MODULE_EEPROM_HEADER, &req->header); - - rsp = calloc(1, sizeof(*rsp)); - yrs.yarg.data = rsp; - yrs.cb = ethtool_module_eeprom_get_rsp_parse; - yrs.rsp_cmd = 32; - - err = ynl_exec(ys, nlh, &yrs); - if (err < 0) - goto err_free; - - return rsp; - -err_free: - ethtool_module_eeprom_get_rsp_free(rsp); - return NULL; -} - -/* ETHTOOL_MSG_MODULE_EEPROM_GET - dump */ -void -ethtool_module_eeprom_get_list_free(struct ethtool_module_eeprom_get_list *rsp) -{ - struct ethtool_module_eeprom_get_list *next = rsp; - - while ((void *)next != YNL_LIST_END) { - rsp = next; - next = rsp->next; - - ethtool_header_free(&rsp->obj.header); - free(rsp->obj.data); - free(rsp); - } -} - -struct ethtool_module_eeprom_get_list * -ethtool_module_eeprom_get_dump(struct ynl_sock *ys, - struct ethtool_module_eeprom_get_req_dump *req) -{ - struct ynl_dump_state yds = {}; - struct nlmsghdr *nlh; - int err; - - yds.ys = ys; - yds.alloc_sz = sizeof(struct ethtool_module_eeprom_get_list); - yds.cb = ethtool_module_eeprom_get_rsp_parse; - yds.rsp_cmd = 32; - yds.rsp_policy = ðtool_module_eeprom_nest; - - nlh = ynl_gemsg_start_dump(ys, ys->family_id, ETHTOOL_MSG_MODULE_EEPROM_GET, 1); - ys->req_policy = ðtool_module_eeprom_nest; - - if (req->_present.header) - ethtool_header_put(nlh, ETHTOOL_A_MODULE_EEPROM_HEADER, &req->header); - - err = ynl_exec_dump(ys, nlh, &yds); - if (err < 0) - goto free_list; - - return yds.first; - -free_list: - ethtool_module_eeprom_get_list_free(yds.first); - return NULL; -} - -/* ============== ETHTOOL_MSG_PHC_VCLOCKS_GET ============== */ -/* ETHTOOL_MSG_PHC_VCLOCKS_GET - do */ -void ethtool_phc_vclocks_get_req_free(struct ethtool_phc_vclocks_get_req *req) -{ - ethtool_header_free(&req->header); - free(req); -} - -void ethtool_phc_vclocks_get_rsp_free(struct ethtool_phc_vclocks_get_rsp *rsp) -{ - ethtool_header_free(&rsp->header); - free(rsp); -} - -int ethtool_phc_vclocks_get_rsp_parse(const struct nlmsghdr *nlh, void *data) -{ - struct ethtool_phc_vclocks_get_rsp *dst; - struct ynl_parse_arg *yarg = data; - const struct nlattr *attr; - struct ynl_parse_arg parg; - - dst = yarg->data; - parg.ys = yarg->ys; - - mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) { - unsigned int type = mnl_attr_get_type(attr); - - if (type == ETHTOOL_A_PHC_VCLOCKS_HEADER) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.header = 1; - - parg.rsp_policy = ðtool_header_nest; - parg.data = &dst->header; - if (ethtool_header_parse(&parg, attr)) - return MNL_CB_ERROR; - } else if (type == ETHTOOL_A_PHC_VCLOCKS_NUM) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.num = 1; - dst->num = mnl_attr_get_u32(attr); - } - } - - return MNL_CB_OK; -} - -struct ethtool_phc_vclocks_get_rsp * -ethtool_phc_vclocks_get(struct ynl_sock *ys, - struct ethtool_phc_vclocks_get_req *req) -{ - struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; - struct ethtool_phc_vclocks_get_rsp *rsp; - struct nlmsghdr *nlh; - int err; - - nlh = ynl_gemsg_start_req(ys, ys->family_id, ETHTOOL_MSG_PHC_VCLOCKS_GET, 1); - ys->req_policy = ðtool_phc_vclocks_nest; - yrs.yarg.rsp_policy = ðtool_phc_vclocks_nest; - - if (req->_present.header) - ethtool_header_put(nlh, ETHTOOL_A_PHC_VCLOCKS_HEADER, &req->header); - - rsp = calloc(1, sizeof(*rsp)); - yrs.yarg.data = rsp; - yrs.cb = ethtool_phc_vclocks_get_rsp_parse; - yrs.rsp_cmd = 34; - - err = ynl_exec(ys, nlh, &yrs); - if (err < 0) - goto err_free; - - return rsp; - -err_free: - ethtool_phc_vclocks_get_rsp_free(rsp); - return NULL; -} - -/* ETHTOOL_MSG_PHC_VCLOCKS_GET - dump */ -void -ethtool_phc_vclocks_get_list_free(struct ethtool_phc_vclocks_get_list *rsp) -{ - struct ethtool_phc_vclocks_get_list *next = rsp; - - while ((void *)next != YNL_LIST_END) { - rsp = next; - next = rsp->next; - - ethtool_header_free(&rsp->obj.header); - free(rsp); - } -} - -struct ethtool_phc_vclocks_get_list * -ethtool_phc_vclocks_get_dump(struct ynl_sock *ys, - struct ethtool_phc_vclocks_get_req_dump *req) -{ - struct ynl_dump_state yds = {}; - struct nlmsghdr *nlh; - int err; - - yds.ys = ys; - yds.alloc_sz = sizeof(struct ethtool_phc_vclocks_get_list); - yds.cb = ethtool_phc_vclocks_get_rsp_parse; - yds.rsp_cmd = 34; - yds.rsp_policy = ðtool_phc_vclocks_nest; - - nlh = ynl_gemsg_start_dump(ys, ys->family_id, ETHTOOL_MSG_PHC_VCLOCKS_GET, 1); - ys->req_policy = ðtool_phc_vclocks_nest; - - if (req->_present.header) - ethtool_header_put(nlh, ETHTOOL_A_PHC_VCLOCKS_HEADER, &req->header); - - err = ynl_exec_dump(ys, nlh, &yds); - if (err < 0) - goto free_list; - - return yds.first; - -free_list: - ethtool_phc_vclocks_get_list_free(yds.first); - return NULL; -} - -/* ============== ETHTOOL_MSG_MODULE_GET ============== */ -/* ETHTOOL_MSG_MODULE_GET - do */ -void ethtool_module_get_req_free(struct ethtool_module_get_req *req) -{ - ethtool_header_free(&req->header); - free(req); -} - -void ethtool_module_get_rsp_free(struct ethtool_module_get_rsp *rsp) -{ - ethtool_header_free(&rsp->header); - free(rsp); -} - -int ethtool_module_get_rsp_parse(const struct nlmsghdr *nlh, void *data) -{ - struct ethtool_module_get_rsp *dst; - struct ynl_parse_arg *yarg = data; - const struct nlattr *attr; - struct ynl_parse_arg parg; - - dst = yarg->data; - parg.ys = yarg->ys; - - mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) { - unsigned int type = mnl_attr_get_type(attr); - - if (type == ETHTOOL_A_MODULE_HEADER) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.header = 1; - - parg.rsp_policy = ðtool_header_nest; - parg.data = &dst->header; - if (ethtool_header_parse(&parg, attr)) - return MNL_CB_ERROR; - } else if (type == ETHTOOL_A_MODULE_POWER_MODE_POLICY) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.power_mode_policy = 1; - dst->power_mode_policy = mnl_attr_get_u8(attr); - } else if (type == ETHTOOL_A_MODULE_POWER_MODE) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.power_mode = 1; - dst->power_mode = mnl_attr_get_u8(attr); - } - } - - return MNL_CB_OK; -} - -struct ethtool_module_get_rsp * -ethtool_module_get(struct ynl_sock *ys, struct ethtool_module_get_req *req) -{ - struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; - struct ethtool_module_get_rsp *rsp; - struct nlmsghdr *nlh; - int err; - - nlh = ynl_gemsg_start_req(ys, ys->family_id, ETHTOOL_MSG_MODULE_GET, 1); - ys->req_policy = ðtool_module_nest; - yrs.yarg.rsp_policy = ðtool_module_nest; - - if (req->_present.header) - ethtool_header_put(nlh, ETHTOOL_A_MODULE_HEADER, &req->header); - - rsp = calloc(1, sizeof(*rsp)); - yrs.yarg.data = rsp; - yrs.cb = ethtool_module_get_rsp_parse; - yrs.rsp_cmd = 35; - - err = ynl_exec(ys, nlh, &yrs); - if (err < 0) - goto err_free; - - return rsp; - -err_free: - ethtool_module_get_rsp_free(rsp); - return NULL; -} - -/* ETHTOOL_MSG_MODULE_GET - dump */ -void ethtool_module_get_list_free(struct ethtool_module_get_list *rsp) -{ - struct ethtool_module_get_list *next = rsp; - - while ((void *)next != YNL_LIST_END) { - rsp = next; - next = rsp->next; - - ethtool_header_free(&rsp->obj.header); - free(rsp); - } -} - -struct ethtool_module_get_list * -ethtool_module_get_dump(struct ynl_sock *ys, - struct ethtool_module_get_req_dump *req) -{ - struct ynl_dump_state yds = {}; - struct nlmsghdr *nlh; - int err; - - yds.ys = ys; - yds.alloc_sz = sizeof(struct ethtool_module_get_list); - yds.cb = ethtool_module_get_rsp_parse; - yds.rsp_cmd = 35; - yds.rsp_policy = ðtool_module_nest; - - nlh = ynl_gemsg_start_dump(ys, ys->family_id, ETHTOOL_MSG_MODULE_GET, 1); - ys->req_policy = ðtool_module_nest; - - if (req->_present.header) - ethtool_header_put(nlh, ETHTOOL_A_MODULE_HEADER, &req->header); - - err = ynl_exec_dump(ys, nlh, &yds); - if (err < 0) - goto free_list; - - return yds.first; - -free_list: - ethtool_module_get_list_free(yds.first); - return NULL; -} - -/* ETHTOOL_MSG_MODULE_GET - notify */ -void ethtool_module_get_ntf_free(struct ethtool_module_get_ntf *rsp) -{ - ethtool_header_free(&rsp->obj.header); - free(rsp); -} - -/* ============== ETHTOOL_MSG_MODULE_SET ============== */ -/* ETHTOOL_MSG_MODULE_SET - do */ -void ethtool_module_set_req_free(struct ethtool_module_set_req *req) -{ - ethtool_header_free(&req->header); - free(req); -} - -int ethtool_module_set(struct ynl_sock *ys, struct ethtool_module_set_req *req) -{ - struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; - struct nlmsghdr *nlh; - int err; - - nlh = ynl_gemsg_start_req(ys, ys->family_id, ETHTOOL_MSG_MODULE_SET, 1); - ys->req_policy = ðtool_module_nest; - - if (req->_present.header) - ethtool_header_put(nlh, ETHTOOL_A_MODULE_HEADER, &req->header); - if (req->_present.power_mode_policy) - mnl_attr_put_u8(nlh, ETHTOOL_A_MODULE_POWER_MODE_POLICY, req->power_mode_policy); - if (req->_present.power_mode) - mnl_attr_put_u8(nlh, ETHTOOL_A_MODULE_POWER_MODE, req->power_mode); - - err = ynl_exec(ys, nlh, &yrs); - if (err < 0) - return -1; - - return 0; -} - -/* ============== ETHTOOL_MSG_PSE_GET ============== */ -/* ETHTOOL_MSG_PSE_GET - do */ -void ethtool_pse_get_req_free(struct ethtool_pse_get_req *req) -{ - ethtool_header_free(&req->header); - free(req); -} - -void ethtool_pse_get_rsp_free(struct ethtool_pse_get_rsp *rsp) -{ - ethtool_header_free(&rsp->header); - free(rsp); -} - -int ethtool_pse_get_rsp_parse(const struct nlmsghdr *nlh, void *data) -{ - struct ynl_parse_arg *yarg = data; - struct ethtool_pse_get_rsp *dst; - const struct nlattr *attr; - struct ynl_parse_arg parg; - - dst = yarg->data; - parg.ys = yarg->ys; - - mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) { - unsigned int type = mnl_attr_get_type(attr); - - if (type == ETHTOOL_A_PSE_HEADER) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.header = 1; - - parg.rsp_policy = ðtool_header_nest; - parg.data = &dst->header; - if (ethtool_header_parse(&parg, attr)) - return MNL_CB_ERROR; - } else if (type == ETHTOOL_A_PODL_PSE_ADMIN_STATE) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.admin_state = 1; - dst->admin_state = mnl_attr_get_u32(attr); - } else if (type == ETHTOOL_A_PODL_PSE_ADMIN_CONTROL) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.admin_control = 1; - dst->admin_control = mnl_attr_get_u32(attr); - } else if (type == ETHTOOL_A_PODL_PSE_PW_D_STATUS) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.pw_d_status = 1; - dst->pw_d_status = mnl_attr_get_u32(attr); - } - } - - return MNL_CB_OK; -} - -struct ethtool_pse_get_rsp * -ethtool_pse_get(struct ynl_sock *ys, struct ethtool_pse_get_req *req) -{ - struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; - struct ethtool_pse_get_rsp *rsp; - struct nlmsghdr *nlh; - int err; - - nlh = ynl_gemsg_start_req(ys, ys->family_id, ETHTOOL_MSG_PSE_GET, 1); - ys->req_policy = ðtool_pse_nest; - yrs.yarg.rsp_policy = ðtool_pse_nest; - - if (req->_present.header) - ethtool_header_put(nlh, ETHTOOL_A_PSE_HEADER, &req->header); - - rsp = calloc(1, sizeof(*rsp)); - yrs.yarg.data = rsp; - yrs.cb = ethtool_pse_get_rsp_parse; - yrs.rsp_cmd = 37; - - err = ynl_exec(ys, nlh, &yrs); - if (err < 0) - goto err_free; - - return rsp; - -err_free: - ethtool_pse_get_rsp_free(rsp); - return NULL; -} - -/* ETHTOOL_MSG_PSE_GET - dump */ -void ethtool_pse_get_list_free(struct ethtool_pse_get_list *rsp) -{ - struct ethtool_pse_get_list *next = rsp; - - while ((void *)next != YNL_LIST_END) { - rsp = next; - next = rsp->next; - - ethtool_header_free(&rsp->obj.header); - free(rsp); - } -} - -struct ethtool_pse_get_list * -ethtool_pse_get_dump(struct ynl_sock *ys, struct ethtool_pse_get_req_dump *req) -{ - struct ynl_dump_state yds = {}; - struct nlmsghdr *nlh; - int err; - - yds.ys = ys; - yds.alloc_sz = sizeof(struct ethtool_pse_get_list); - yds.cb = ethtool_pse_get_rsp_parse; - yds.rsp_cmd = 37; - yds.rsp_policy = ðtool_pse_nest; - - nlh = ynl_gemsg_start_dump(ys, ys->family_id, ETHTOOL_MSG_PSE_GET, 1); - ys->req_policy = ðtool_pse_nest; - - if (req->_present.header) - ethtool_header_put(nlh, ETHTOOL_A_PSE_HEADER, &req->header); - - err = ynl_exec_dump(ys, nlh, &yds); - if (err < 0) - goto free_list; - - return yds.first; - -free_list: - ethtool_pse_get_list_free(yds.first); - return NULL; -} - -/* ============== ETHTOOL_MSG_PSE_SET ============== */ -/* ETHTOOL_MSG_PSE_SET - do */ -void ethtool_pse_set_req_free(struct ethtool_pse_set_req *req) -{ - ethtool_header_free(&req->header); - free(req); -} - -int ethtool_pse_set(struct ynl_sock *ys, struct ethtool_pse_set_req *req) -{ - struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; - struct nlmsghdr *nlh; - int err; - - nlh = ynl_gemsg_start_req(ys, ys->family_id, ETHTOOL_MSG_PSE_SET, 1); - ys->req_policy = ðtool_pse_nest; - - if (req->_present.header) - ethtool_header_put(nlh, ETHTOOL_A_PSE_HEADER, &req->header); - if (req->_present.admin_state) - mnl_attr_put_u32(nlh, ETHTOOL_A_PODL_PSE_ADMIN_STATE, req->admin_state); - if (req->_present.admin_control) - mnl_attr_put_u32(nlh, ETHTOOL_A_PODL_PSE_ADMIN_CONTROL, req->admin_control); - if (req->_present.pw_d_status) - mnl_attr_put_u32(nlh, ETHTOOL_A_PODL_PSE_PW_D_STATUS, req->pw_d_status); - - err = ynl_exec(ys, nlh, &yrs); - if (err < 0) - return -1; - - return 0; -} - -/* ============== ETHTOOL_MSG_RSS_GET ============== */ -/* ETHTOOL_MSG_RSS_GET - do */ -void ethtool_rss_get_req_free(struct ethtool_rss_get_req *req) -{ - ethtool_header_free(&req->header); - free(req); -} - -void ethtool_rss_get_rsp_free(struct ethtool_rss_get_rsp *rsp) -{ - ethtool_header_free(&rsp->header); - free(rsp->indir); - free(rsp->hkey); - free(rsp); -} - -int ethtool_rss_get_rsp_parse(const struct nlmsghdr *nlh, void *data) -{ - struct ynl_parse_arg *yarg = data; - struct ethtool_rss_get_rsp *dst; - const struct nlattr *attr; - struct ynl_parse_arg parg; - - dst = yarg->data; - parg.ys = yarg->ys; - - mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) { - unsigned int type = mnl_attr_get_type(attr); - - if (type == ETHTOOL_A_RSS_HEADER) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.header = 1; - - parg.rsp_policy = ðtool_header_nest; - parg.data = &dst->header; - if (ethtool_header_parse(&parg, attr)) - return MNL_CB_ERROR; - } else if (type == ETHTOOL_A_RSS_CONTEXT) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.context = 1; - dst->context = mnl_attr_get_u32(attr); - } else if (type == ETHTOOL_A_RSS_HFUNC) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.hfunc = 1; - dst->hfunc = mnl_attr_get_u32(attr); - } else if (type == ETHTOOL_A_RSS_INDIR) { - unsigned int len; - - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - - len = mnl_attr_get_payload_len(attr); - dst->_present.indir_len = len; - dst->indir = malloc(len); - memcpy(dst->indir, mnl_attr_get_payload(attr), len); - } else if (type == ETHTOOL_A_RSS_HKEY) { - unsigned int len; - - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - - len = mnl_attr_get_payload_len(attr); - dst->_present.hkey_len = len; - dst->hkey = malloc(len); - memcpy(dst->hkey, mnl_attr_get_payload(attr), len); - } - } - - return MNL_CB_OK; -} - -struct ethtool_rss_get_rsp * -ethtool_rss_get(struct ynl_sock *ys, struct ethtool_rss_get_req *req) -{ - struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; - struct ethtool_rss_get_rsp *rsp; - struct nlmsghdr *nlh; - int err; - - nlh = ynl_gemsg_start_req(ys, ys->family_id, ETHTOOL_MSG_RSS_GET, 1); - ys->req_policy = ðtool_rss_nest; - yrs.yarg.rsp_policy = ðtool_rss_nest; - - if (req->_present.header) - ethtool_header_put(nlh, ETHTOOL_A_RSS_HEADER, &req->header); - - rsp = calloc(1, sizeof(*rsp)); - yrs.yarg.data = rsp; - yrs.cb = ethtool_rss_get_rsp_parse; - yrs.rsp_cmd = ETHTOOL_MSG_RSS_GET; - - err = ynl_exec(ys, nlh, &yrs); - if (err < 0) - goto err_free; - - return rsp; - -err_free: - ethtool_rss_get_rsp_free(rsp); - return NULL; -} - -/* ETHTOOL_MSG_RSS_GET - dump */ -void ethtool_rss_get_list_free(struct ethtool_rss_get_list *rsp) -{ - struct ethtool_rss_get_list *next = rsp; - - while ((void *)next != YNL_LIST_END) { - rsp = next; - next = rsp->next; - - ethtool_header_free(&rsp->obj.header); - free(rsp->obj.indir); - free(rsp->obj.hkey); - free(rsp); - } -} - -struct ethtool_rss_get_list * -ethtool_rss_get_dump(struct ynl_sock *ys, struct ethtool_rss_get_req_dump *req) -{ - struct ynl_dump_state yds = {}; - struct nlmsghdr *nlh; - int err; - - yds.ys = ys; - yds.alloc_sz = sizeof(struct ethtool_rss_get_list); - yds.cb = ethtool_rss_get_rsp_parse; - yds.rsp_cmd = ETHTOOL_MSG_RSS_GET; - yds.rsp_policy = ðtool_rss_nest; - - nlh = ynl_gemsg_start_dump(ys, ys->family_id, ETHTOOL_MSG_RSS_GET, 1); - ys->req_policy = ðtool_rss_nest; - - if (req->_present.header) - ethtool_header_put(nlh, ETHTOOL_A_RSS_HEADER, &req->header); - - err = ynl_exec_dump(ys, nlh, &yds); - if (err < 0) - goto free_list; - - return yds.first; - -free_list: - ethtool_rss_get_list_free(yds.first); - return NULL; -} - -/* ============== ETHTOOL_MSG_PLCA_GET_CFG ============== */ -/* ETHTOOL_MSG_PLCA_GET_CFG - do */ -void ethtool_plca_get_cfg_req_free(struct ethtool_plca_get_cfg_req *req) -{ - ethtool_header_free(&req->header); - free(req); -} - -void ethtool_plca_get_cfg_rsp_free(struct ethtool_plca_get_cfg_rsp *rsp) -{ - ethtool_header_free(&rsp->header); - free(rsp); -} - -int ethtool_plca_get_cfg_rsp_parse(const struct nlmsghdr *nlh, void *data) -{ - struct ethtool_plca_get_cfg_rsp *dst; - struct ynl_parse_arg *yarg = data; - const struct nlattr *attr; - struct ynl_parse_arg parg; - - dst = yarg->data; - parg.ys = yarg->ys; - - mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) { - unsigned int type = mnl_attr_get_type(attr); - - if (type == ETHTOOL_A_PLCA_HEADER) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.header = 1; - - parg.rsp_policy = ðtool_header_nest; - parg.data = &dst->header; - if (ethtool_header_parse(&parg, attr)) - return MNL_CB_ERROR; - } else if (type == ETHTOOL_A_PLCA_VERSION) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.version = 1; - dst->version = mnl_attr_get_u16(attr); - } else if (type == ETHTOOL_A_PLCA_ENABLED) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.enabled = 1; - dst->enabled = mnl_attr_get_u8(attr); - } else if (type == ETHTOOL_A_PLCA_STATUS) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.status = 1; - dst->status = mnl_attr_get_u8(attr); - } else if (type == ETHTOOL_A_PLCA_NODE_CNT) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.node_cnt = 1; - dst->node_cnt = mnl_attr_get_u32(attr); - } else if (type == ETHTOOL_A_PLCA_NODE_ID) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.node_id = 1; - dst->node_id = mnl_attr_get_u32(attr); - } else if (type == ETHTOOL_A_PLCA_TO_TMR) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.to_tmr = 1; - dst->to_tmr = mnl_attr_get_u32(attr); - } else if (type == ETHTOOL_A_PLCA_BURST_CNT) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.burst_cnt = 1; - dst->burst_cnt = mnl_attr_get_u32(attr); - } else if (type == ETHTOOL_A_PLCA_BURST_TMR) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.burst_tmr = 1; - dst->burst_tmr = mnl_attr_get_u32(attr); - } - } - - return MNL_CB_OK; -} - -struct ethtool_plca_get_cfg_rsp * -ethtool_plca_get_cfg(struct ynl_sock *ys, struct ethtool_plca_get_cfg_req *req) -{ - struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; - struct ethtool_plca_get_cfg_rsp *rsp; - struct nlmsghdr *nlh; - int err; - - nlh = ynl_gemsg_start_req(ys, ys->family_id, ETHTOOL_MSG_PLCA_GET_CFG, 1); - ys->req_policy = ðtool_plca_nest; - yrs.yarg.rsp_policy = ðtool_plca_nest; - - if (req->_present.header) - ethtool_header_put(nlh, ETHTOOL_A_PLCA_HEADER, &req->header); - - rsp = calloc(1, sizeof(*rsp)); - yrs.yarg.data = rsp; - yrs.cb = ethtool_plca_get_cfg_rsp_parse; - yrs.rsp_cmd = ETHTOOL_MSG_PLCA_GET_CFG; - - err = ynl_exec(ys, nlh, &yrs); - if (err < 0) - goto err_free; - - return rsp; - -err_free: - ethtool_plca_get_cfg_rsp_free(rsp); - return NULL; -} - -/* ETHTOOL_MSG_PLCA_GET_CFG - dump */ -void ethtool_plca_get_cfg_list_free(struct ethtool_plca_get_cfg_list *rsp) -{ - struct ethtool_plca_get_cfg_list *next = rsp; - - while ((void *)next != YNL_LIST_END) { - rsp = next; - next = rsp->next; - - ethtool_header_free(&rsp->obj.header); - free(rsp); - } -} - -struct ethtool_plca_get_cfg_list * -ethtool_plca_get_cfg_dump(struct ynl_sock *ys, - struct ethtool_plca_get_cfg_req_dump *req) -{ - struct ynl_dump_state yds = {}; - struct nlmsghdr *nlh; - int err; - - yds.ys = ys; - yds.alloc_sz = sizeof(struct ethtool_plca_get_cfg_list); - yds.cb = ethtool_plca_get_cfg_rsp_parse; - yds.rsp_cmd = ETHTOOL_MSG_PLCA_GET_CFG; - yds.rsp_policy = ðtool_plca_nest; - - nlh = ynl_gemsg_start_dump(ys, ys->family_id, ETHTOOL_MSG_PLCA_GET_CFG, 1); - ys->req_policy = ðtool_plca_nest; - - if (req->_present.header) - ethtool_header_put(nlh, ETHTOOL_A_PLCA_HEADER, &req->header); - - err = ynl_exec_dump(ys, nlh, &yds); - if (err < 0) - goto free_list; - - return yds.first; - -free_list: - ethtool_plca_get_cfg_list_free(yds.first); - return NULL; -} - -/* ETHTOOL_MSG_PLCA_GET_CFG - notify */ -void ethtool_plca_get_cfg_ntf_free(struct ethtool_plca_get_cfg_ntf *rsp) -{ - ethtool_header_free(&rsp->obj.header); - free(rsp); -} - -/* ============== ETHTOOL_MSG_PLCA_SET_CFG ============== */ -/* ETHTOOL_MSG_PLCA_SET_CFG - do */ -void ethtool_plca_set_cfg_req_free(struct ethtool_plca_set_cfg_req *req) -{ - ethtool_header_free(&req->header); - free(req); -} - -int ethtool_plca_set_cfg(struct ynl_sock *ys, - struct ethtool_plca_set_cfg_req *req) -{ - struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; - struct nlmsghdr *nlh; - int err; - - nlh = ynl_gemsg_start_req(ys, ys->family_id, ETHTOOL_MSG_PLCA_SET_CFG, 1); - ys->req_policy = ðtool_plca_nest; - - if (req->_present.header) - ethtool_header_put(nlh, ETHTOOL_A_PLCA_HEADER, &req->header); - if (req->_present.version) - mnl_attr_put_u16(nlh, ETHTOOL_A_PLCA_VERSION, req->version); - if (req->_present.enabled) - mnl_attr_put_u8(nlh, ETHTOOL_A_PLCA_ENABLED, req->enabled); - if (req->_present.status) - mnl_attr_put_u8(nlh, ETHTOOL_A_PLCA_STATUS, req->status); - if (req->_present.node_cnt) - mnl_attr_put_u32(nlh, ETHTOOL_A_PLCA_NODE_CNT, req->node_cnt); - if (req->_present.node_id) - mnl_attr_put_u32(nlh, ETHTOOL_A_PLCA_NODE_ID, req->node_id); - if (req->_present.to_tmr) - mnl_attr_put_u32(nlh, ETHTOOL_A_PLCA_TO_TMR, req->to_tmr); - if (req->_present.burst_cnt) - mnl_attr_put_u32(nlh, ETHTOOL_A_PLCA_BURST_CNT, req->burst_cnt); - if (req->_present.burst_tmr) - mnl_attr_put_u32(nlh, ETHTOOL_A_PLCA_BURST_TMR, req->burst_tmr); - - err = ynl_exec(ys, nlh, &yrs); - if (err < 0) - return -1; - - return 0; -} - -/* ============== ETHTOOL_MSG_PLCA_GET_STATUS ============== */ -/* ETHTOOL_MSG_PLCA_GET_STATUS - do */ -void ethtool_plca_get_status_req_free(struct ethtool_plca_get_status_req *req) -{ - ethtool_header_free(&req->header); - free(req); -} - -void ethtool_plca_get_status_rsp_free(struct ethtool_plca_get_status_rsp *rsp) -{ - ethtool_header_free(&rsp->header); - free(rsp); -} - -int ethtool_plca_get_status_rsp_parse(const struct nlmsghdr *nlh, void *data) -{ - struct ethtool_plca_get_status_rsp *dst; - struct ynl_parse_arg *yarg = data; - const struct nlattr *attr; - struct ynl_parse_arg parg; - - dst = yarg->data; - parg.ys = yarg->ys; - - mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) { - unsigned int type = mnl_attr_get_type(attr); - - if (type == ETHTOOL_A_PLCA_HEADER) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.header = 1; - - parg.rsp_policy = ðtool_header_nest; - parg.data = &dst->header; - if (ethtool_header_parse(&parg, attr)) - return MNL_CB_ERROR; - } else if (type == ETHTOOL_A_PLCA_VERSION) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.version = 1; - dst->version = mnl_attr_get_u16(attr); - } else if (type == ETHTOOL_A_PLCA_ENABLED) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.enabled = 1; - dst->enabled = mnl_attr_get_u8(attr); - } else if (type == ETHTOOL_A_PLCA_STATUS) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.status = 1; - dst->status = mnl_attr_get_u8(attr); - } else if (type == ETHTOOL_A_PLCA_NODE_CNT) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.node_cnt = 1; - dst->node_cnt = mnl_attr_get_u32(attr); - } else if (type == ETHTOOL_A_PLCA_NODE_ID) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.node_id = 1; - dst->node_id = mnl_attr_get_u32(attr); - } else if (type == ETHTOOL_A_PLCA_TO_TMR) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.to_tmr = 1; - dst->to_tmr = mnl_attr_get_u32(attr); - } else if (type == ETHTOOL_A_PLCA_BURST_CNT) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.burst_cnt = 1; - dst->burst_cnt = mnl_attr_get_u32(attr); - } else if (type == ETHTOOL_A_PLCA_BURST_TMR) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.burst_tmr = 1; - dst->burst_tmr = mnl_attr_get_u32(attr); - } - } - - return MNL_CB_OK; -} - -struct ethtool_plca_get_status_rsp * -ethtool_plca_get_status(struct ynl_sock *ys, - struct ethtool_plca_get_status_req *req) -{ - struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; - struct ethtool_plca_get_status_rsp *rsp; - struct nlmsghdr *nlh; - int err; - - nlh = ynl_gemsg_start_req(ys, ys->family_id, ETHTOOL_MSG_PLCA_GET_STATUS, 1); - ys->req_policy = ðtool_plca_nest; - yrs.yarg.rsp_policy = ðtool_plca_nest; - - if (req->_present.header) - ethtool_header_put(nlh, ETHTOOL_A_PLCA_HEADER, &req->header); - - rsp = calloc(1, sizeof(*rsp)); - yrs.yarg.data = rsp; - yrs.cb = ethtool_plca_get_status_rsp_parse; - yrs.rsp_cmd = 40; - - err = ynl_exec(ys, nlh, &yrs); - if (err < 0) - goto err_free; - - return rsp; - -err_free: - ethtool_plca_get_status_rsp_free(rsp); - return NULL; -} - -/* ETHTOOL_MSG_PLCA_GET_STATUS - dump */ -void -ethtool_plca_get_status_list_free(struct ethtool_plca_get_status_list *rsp) -{ - struct ethtool_plca_get_status_list *next = rsp; - - while ((void *)next != YNL_LIST_END) { - rsp = next; - next = rsp->next; - - ethtool_header_free(&rsp->obj.header); - free(rsp); - } -} - -struct ethtool_plca_get_status_list * -ethtool_plca_get_status_dump(struct ynl_sock *ys, - struct ethtool_plca_get_status_req_dump *req) -{ - struct ynl_dump_state yds = {}; - struct nlmsghdr *nlh; - int err; - - yds.ys = ys; - yds.alloc_sz = sizeof(struct ethtool_plca_get_status_list); - yds.cb = ethtool_plca_get_status_rsp_parse; - yds.rsp_cmd = 40; - yds.rsp_policy = ðtool_plca_nest; - - nlh = ynl_gemsg_start_dump(ys, ys->family_id, ETHTOOL_MSG_PLCA_GET_STATUS, 1); - ys->req_policy = ðtool_plca_nest; - - if (req->_present.header) - ethtool_header_put(nlh, ETHTOOL_A_PLCA_HEADER, &req->header); - - err = ynl_exec_dump(ys, nlh, &yds); - if (err < 0) - goto free_list; - - return yds.first; - -free_list: - ethtool_plca_get_status_list_free(yds.first); - return NULL; -} - -/* ============== ETHTOOL_MSG_MM_GET ============== */ -/* ETHTOOL_MSG_MM_GET - do */ -void ethtool_mm_get_req_free(struct ethtool_mm_get_req *req) -{ - ethtool_header_free(&req->header); - free(req); -} - -void ethtool_mm_get_rsp_free(struct ethtool_mm_get_rsp *rsp) -{ - ethtool_header_free(&rsp->header); - ethtool_mm_stat_free(&rsp->stats); - free(rsp); -} - -int ethtool_mm_get_rsp_parse(const struct nlmsghdr *nlh, void *data) -{ - struct ynl_parse_arg *yarg = data; - struct ethtool_mm_get_rsp *dst; - const struct nlattr *attr; - struct ynl_parse_arg parg; - - dst = yarg->data; - parg.ys = yarg->ys; - - mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) { - unsigned int type = mnl_attr_get_type(attr); - - if (type == ETHTOOL_A_MM_HEADER) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.header = 1; - - parg.rsp_policy = ðtool_header_nest; - parg.data = &dst->header; - if (ethtool_header_parse(&parg, attr)) - return MNL_CB_ERROR; - } else if (type == ETHTOOL_A_MM_PMAC_ENABLED) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.pmac_enabled = 1; - dst->pmac_enabled = mnl_attr_get_u8(attr); - } else if (type == ETHTOOL_A_MM_TX_ENABLED) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.tx_enabled = 1; - dst->tx_enabled = mnl_attr_get_u8(attr); - } else if (type == ETHTOOL_A_MM_TX_ACTIVE) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.tx_active = 1; - dst->tx_active = mnl_attr_get_u8(attr); - } else if (type == ETHTOOL_A_MM_TX_MIN_FRAG_SIZE) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.tx_min_frag_size = 1; - dst->tx_min_frag_size = mnl_attr_get_u32(attr); - } else if (type == ETHTOOL_A_MM_RX_MIN_FRAG_SIZE) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.rx_min_frag_size = 1; - dst->rx_min_frag_size = mnl_attr_get_u32(attr); - } else if (type == ETHTOOL_A_MM_VERIFY_ENABLED) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.verify_enabled = 1; - dst->verify_enabled = mnl_attr_get_u8(attr); - } else if (type == ETHTOOL_A_MM_VERIFY_TIME) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.verify_time = 1; - dst->verify_time = mnl_attr_get_u32(attr); - } else if (type == ETHTOOL_A_MM_MAX_VERIFY_TIME) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.max_verify_time = 1; - dst->max_verify_time = mnl_attr_get_u32(attr); - } else if (type == ETHTOOL_A_MM_STATS) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.stats = 1; - - parg.rsp_policy = ðtool_mm_stat_nest; - parg.data = &dst->stats; - if (ethtool_mm_stat_parse(&parg, attr)) - return MNL_CB_ERROR; - } - } - - return MNL_CB_OK; -} - -struct ethtool_mm_get_rsp * -ethtool_mm_get(struct ynl_sock *ys, struct ethtool_mm_get_req *req) -{ - struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; - struct ethtool_mm_get_rsp *rsp; - struct nlmsghdr *nlh; - int err; - - nlh = ynl_gemsg_start_req(ys, ys->family_id, ETHTOOL_MSG_MM_GET, 1); - ys->req_policy = ðtool_mm_nest; - yrs.yarg.rsp_policy = ðtool_mm_nest; - - if (req->_present.header) - ethtool_header_put(nlh, ETHTOOL_A_MM_HEADER, &req->header); - - rsp = calloc(1, sizeof(*rsp)); - yrs.yarg.data = rsp; - yrs.cb = ethtool_mm_get_rsp_parse; - yrs.rsp_cmd = ETHTOOL_MSG_MM_GET; - - err = ynl_exec(ys, nlh, &yrs); - if (err < 0) - goto err_free; - - return rsp; - -err_free: - ethtool_mm_get_rsp_free(rsp); - return NULL; -} - -/* ETHTOOL_MSG_MM_GET - dump */ -void ethtool_mm_get_list_free(struct ethtool_mm_get_list *rsp) -{ - struct ethtool_mm_get_list *next = rsp; - - while ((void *)next != YNL_LIST_END) { - rsp = next; - next = rsp->next; - - ethtool_header_free(&rsp->obj.header); - ethtool_mm_stat_free(&rsp->obj.stats); - free(rsp); - } -} - -struct ethtool_mm_get_list * -ethtool_mm_get_dump(struct ynl_sock *ys, struct ethtool_mm_get_req_dump *req) -{ - struct ynl_dump_state yds = {}; - struct nlmsghdr *nlh; - int err; - - yds.ys = ys; - yds.alloc_sz = sizeof(struct ethtool_mm_get_list); - yds.cb = ethtool_mm_get_rsp_parse; - yds.rsp_cmd = ETHTOOL_MSG_MM_GET; - yds.rsp_policy = ðtool_mm_nest; - - nlh = ynl_gemsg_start_dump(ys, ys->family_id, ETHTOOL_MSG_MM_GET, 1); - ys->req_policy = ðtool_mm_nest; - - if (req->_present.header) - ethtool_header_put(nlh, ETHTOOL_A_MM_HEADER, &req->header); - - err = ynl_exec_dump(ys, nlh, &yds); - if (err < 0) - goto free_list; - - return yds.first; - -free_list: - ethtool_mm_get_list_free(yds.first); - return NULL; -} - -/* ETHTOOL_MSG_MM_GET - notify */ -void ethtool_mm_get_ntf_free(struct ethtool_mm_get_ntf *rsp) -{ - ethtool_header_free(&rsp->obj.header); - ethtool_mm_stat_free(&rsp->obj.stats); - free(rsp); -} - -/* ============== ETHTOOL_MSG_MM_SET ============== */ -/* ETHTOOL_MSG_MM_SET - do */ -void ethtool_mm_set_req_free(struct ethtool_mm_set_req *req) -{ - ethtool_header_free(&req->header); - free(req); -} - -int ethtool_mm_set(struct ynl_sock *ys, struct ethtool_mm_set_req *req) -{ - struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; - struct nlmsghdr *nlh; - int err; - - nlh = ynl_gemsg_start_req(ys, ys->family_id, ETHTOOL_MSG_MM_SET, 1); - ys->req_policy = ðtool_mm_nest; - - if (req->_present.header) - ethtool_header_put(nlh, ETHTOOL_A_MM_HEADER, &req->header); - if (req->_present.verify_enabled) - mnl_attr_put_u8(nlh, ETHTOOL_A_MM_VERIFY_ENABLED, req->verify_enabled); - if (req->_present.verify_time) - mnl_attr_put_u32(nlh, ETHTOOL_A_MM_VERIFY_TIME, req->verify_time); - if (req->_present.tx_enabled) - mnl_attr_put_u8(nlh, ETHTOOL_A_MM_TX_ENABLED, req->tx_enabled); - if (req->_present.pmac_enabled) - mnl_attr_put_u8(nlh, ETHTOOL_A_MM_PMAC_ENABLED, req->pmac_enabled); - if (req->_present.tx_min_frag_size) - mnl_attr_put_u32(nlh, ETHTOOL_A_MM_TX_MIN_FRAG_SIZE, req->tx_min_frag_size); - - err = ynl_exec(ys, nlh, &yrs); - if (err < 0) - return -1; - - return 0; -} - -/* ETHTOOL_MSG_CABLE_TEST_NTF - event */ -int ethtool_cable_test_ntf_rsp_parse(const struct nlmsghdr *nlh, void *data) -{ - struct ethtool_cable_test_ntf_rsp *dst; - struct ynl_parse_arg *yarg = data; - const struct nlattr *attr; - struct ynl_parse_arg parg; - - dst = yarg->data; - parg.ys = yarg->ys; - - mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) { - unsigned int type = mnl_attr_get_type(attr); - - if (type == ETHTOOL_A_CABLE_TEST_NTF_HEADER) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.header = 1; - - parg.rsp_policy = ðtool_header_nest; - parg.data = &dst->header; - if (ethtool_header_parse(&parg, attr)) - return MNL_CB_ERROR; - } else if (type == ETHTOOL_A_CABLE_TEST_NTF_STATUS) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.status = 1; - dst->status = mnl_attr_get_u8(attr); - } - } - - return MNL_CB_OK; -} - -void ethtool_cable_test_ntf_free(struct ethtool_cable_test_ntf *rsp) -{ - ethtool_header_free(&rsp->obj.header); - free(rsp); -} - -/* ETHTOOL_MSG_CABLE_TEST_TDR_NTF - event */ -int ethtool_cable_test_tdr_ntf_rsp_parse(const struct nlmsghdr *nlh, - void *data) -{ - struct ethtool_cable_test_tdr_ntf_rsp *dst; - struct ynl_parse_arg *yarg = data; - const struct nlattr *attr; - struct ynl_parse_arg parg; - - dst = yarg->data; - parg.ys = yarg->ys; - - mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) { - unsigned int type = mnl_attr_get_type(attr); - - if (type == ETHTOOL_A_CABLE_TEST_TDR_NTF_HEADER) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.header = 1; - - parg.rsp_policy = ðtool_header_nest; - parg.data = &dst->header; - if (ethtool_header_parse(&parg, attr)) - return MNL_CB_ERROR; - } else if (type == ETHTOOL_A_CABLE_TEST_TDR_NTF_STATUS) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.status = 1; - dst->status = mnl_attr_get_u8(attr); - } else if (type == ETHTOOL_A_CABLE_TEST_TDR_NTF_NEST) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.nest = 1; - - parg.rsp_policy = ðtool_cable_nest_nest; - parg.data = &dst->nest; - if (ethtool_cable_nest_parse(&parg, attr)) - return MNL_CB_ERROR; - } - } - - return MNL_CB_OK; -} - -void ethtool_cable_test_tdr_ntf_free(struct ethtool_cable_test_tdr_ntf *rsp) -{ - ethtool_header_free(&rsp->obj.header); - ethtool_cable_nest_free(&rsp->obj.nest); - free(rsp); -} - -static const struct ynl_ntf_info ethtool_ntf_info[] = { - [ETHTOOL_MSG_LINKINFO_NTF] = { - .alloc_sz = sizeof(struct ethtool_linkinfo_get_ntf), - .cb = ethtool_linkinfo_get_rsp_parse, - .policy = ðtool_linkinfo_nest, - .free = (void *)ethtool_linkinfo_get_ntf_free, - }, - [ETHTOOL_MSG_LINKMODES_NTF] = { - .alloc_sz = sizeof(struct ethtool_linkmodes_get_ntf), - .cb = ethtool_linkmodes_get_rsp_parse, - .policy = ðtool_linkmodes_nest, - .free = (void *)ethtool_linkmodes_get_ntf_free, - }, - [ETHTOOL_MSG_DEBUG_NTF] = { - .alloc_sz = sizeof(struct ethtool_debug_get_ntf), - .cb = ethtool_debug_get_rsp_parse, - .policy = ðtool_debug_nest, - .free = (void *)ethtool_debug_get_ntf_free, - }, - [ETHTOOL_MSG_WOL_NTF] = { - .alloc_sz = sizeof(struct ethtool_wol_get_ntf), - .cb = ethtool_wol_get_rsp_parse, - .policy = ðtool_wol_nest, - .free = (void *)ethtool_wol_get_ntf_free, - }, - [ETHTOOL_MSG_FEATURES_NTF] = { - .alloc_sz = sizeof(struct ethtool_features_get_ntf), - .cb = ethtool_features_get_rsp_parse, - .policy = ðtool_features_nest, - .free = (void *)ethtool_features_get_ntf_free, - }, - [ETHTOOL_MSG_PRIVFLAGS_NTF] = { - .alloc_sz = sizeof(struct ethtool_privflags_get_ntf), - .cb = ethtool_privflags_get_rsp_parse, - .policy = ðtool_privflags_nest, - .free = (void *)ethtool_privflags_get_ntf_free, - }, - [ETHTOOL_MSG_RINGS_NTF] = { - .alloc_sz = sizeof(struct ethtool_rings_get_ntf), - .cb = ethtool_rings_get_rsp_parse, - .policy = ðtool_rings_nest, - .free = (void *)ethtool_rings_get_ntf_free, - }, - [ETHTOOL_MSG_CHANNELS_NTF] = { - .alloc_sz = sizeof(struct ethtool_channels_get_ntf), - .cb = ethtool_channels_get_rsp_parse, - .policy = ðtool_channels_nest, - .free = (void *)ethtool_channels_get_ntf_free, - }, - [ETHTOOL_MSG_COALESCE_NTF] = { - .alloc_sz = sizeof(struct ethtool_coalesce_get_ntf), - .cb = ethtool_coalesce_get_rsp_parse, - .policy = ðtool_coalesce_nest, - .free = (void *)ethtool_coalesce_get_ntf_free, - }, - [ETHTOOL_MSG_PAUSE_NTF] = { - .alloc_sz = sizeof(struct ethtool_pause_get_ntf), - .cb = ethtool_pause_get_rsp_parse, - .policy = ðtool_pause_nest, - .free = (void *)ethtool_pause_get_ntf_free, - }, - [ETHTOOL_MSG_EEE_NTF] = { - .alloc_sz = sizeof(struct ethtool_eee_get_ntf), - .cb = ethtool_eee_get_rsp_parse, - .policy = ðtool_eee_nest, - .free = (void *)ethtool_eee_get_ntf_free, - }, - [ETHTOOL_MSG_CABLE_TEST_NTF] = { - .alloc_sz = sizeof(struct ethtool_cable_test_ntf), - .cb = ethtool_cable_test_ntf_rsp_parse, - .policy = ðtool_cable_test_ntf_nest, - .free = (void *)ethtool_cable_test_ntf_free, - }, - [ETHTOOL_MSG_CABLE_TEST_TDR_NTF] = { - .alloc_sz = sizeof(struct ethtool_cable_test_tdr_ntf), - .cb = ethtool_cable_test_tdr_ntf_rsp_parse, - .policy = ðtool_cable_test_tdr_ntf_nest, - .free = (void *)ethtool_cable_test_tdr_ntf_free, - }, - [ETHTOOL_MSG_FEC_NTF] = { - .alloc_sz = sizeof(struct ethtool_fec_get_ntf), - .cb = ethtool_fec_get_rsp_parse, - .policy = ðtool_fec_nest, - .free = (void *)ethtool_fec_get_ntf_free, - }, - [ETHTOOL_MSG_MODULE_NTF] = { - .alloc_sz = sizeof(struct ethtool_module_get_ntf), - .cb = ethtool_module_get_rsp_parse, - .policy = ðtool_module_nest, - .free = (void *)ethtool_module_get_ntf_free, - }, - [ETHTOOL_MSG_PLCA_NTF] = { - .alloc_sz = sizeof(struct ethtool_plca_get_cfg_ntf), - .cb = ethtool_plca_get_cfg_rsp_parse, - .policy = ðtool_plca_nest, - .free = (void *)ethtool_plca_get_cfg_ntf_free, - }, - [ETHTOOL_MSG_MM_NTF] = { - .alloc_sz = sizeof(struct ethtool_mm_get_ntf), - .cb = ethtool_mm_get_rsp_parse, - .policy = ðtool_mm_nest, - .free = (void *)ethtool_mm_get_ntf_free, - }, -}; - -const struct ynl_family ynl_ethtool_family = { - .name = "ethtool", - .ntf_info = ethtool_ntf_info, - .ntf_info_size = MNL_ARRAY_SIZE(ethtool_ntf_info), -}; diff --git a/tools/net/ynl/generated/ethtool-user.h b/tools/net/ynl/generated/ethtool-user.h deleted file mode 100644 index ca0ec5fd7798..000000000000 --- a/tools/net/ynl/generated/ethtool-user.h +++ /dev/null @@ -1,5535 +0,0 @@ -/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */ -/* Do not edit directly, auto-generated from: */ -/* Documentation/netlink/specs/ethtool.yaml */ -/* YNL-GEN user header */ -/* YNL-ARG --user-header linux/ethtool_netlink.h --exclude-op stats-get */ - -#ifndef _LINUX_ETHTOOL_GEN_H -#define _LINUX_ETHTOOL_GEN_H - -#include -#include -#include -#include - -struct ynl_sock; - -extern const struct ynl_family ynl_ethtool_family; - -/* Enums */ -const char *ethtool_op_str(int op); -const char *ethtool_udp_tunnel_type_str(int value); -const char *ethtool_stringset_str(enum ethtool_stringset value); - -/* Common nested types */ -struct ethtool_header { - struct { - __u32 dev_index:1; - __u32 dev_name_len; - __u32 flags:1; - } _present; - - __u32 dev_index; - char *dev_name; - __u32 flags; -}; - -struct ethtool_pause_stat { - struct { - __u32 tx_frames:1; - __u32 rx_frames:1; - } _present; - - __u64 tx_frames; - __u64 rx_frames; -}; - -struct ethtool_cable_test_tdr_cfg { - struct { - __u32 first:1; - __u32 last:1; - __u32 step:1; - __u32 pair:1; - } _present; - - __u32 first; - __u32 last; - __u32 step; - __u8 pair; -}; - -struct ethtool_fec_stat { - struct { - __u32 corrected_len; - __u32 uncorr_len; - __u32 corr_bits_len; - } _present; - - void *corrected; - void *uncorr; - void *corr_bits; -}; - -struct ethtool_mm_stat { - struct { - __u32 reassembly_errors:1; - __u32 smd_errors:1; - __u32 reassembly_ok:1; - __u32 rx_frag_count:1; - __u32 tx_frag_count:1; - __u32 hold_count:1; - } _present; - - __u64 reassembly_errors; - __u64 smd_errors; - __u64 reassembly_ok; - __u64 rx_frag_count; - __u64 tx_frag_count; - __u64 hold_count; -}; - -struct ethtool_cable_result { - struct { - __u32 pair:1; - __u32 code:1; - } _present; - - __u8 pair; - __u8 code; -}; - -struct ethtool_cable_fault_length { - struct { - __u32 pair:1; - __u32 cm:1; - } _present; - - __u8 pair; - __u32 cm; -}; - -struct ethtool_bitset_bit { - struct { - __u32 index:1; - __u32 name_len; - __u32 value:1; - } _present; - - __u32 index; - char *name; -}; - -struct ethtool_tunnel_udp_entry { - struct { - __u32 port:1; - __u32 type:1; - } _present; - - __u16 port /* big-endian */; - __u32 type; -}; - -struct ethtool_string { - struct { - __u32 index:1; - __u32 value_len; - } _present; - - __u32 index; - char *value; -}; - -struct ethtool_cable_nest { - struct { - __u32 result:1; - __u32 fault_length:1; - } _present; - - struct ethtool_cable_result result; - struct ethtool_cable_fault_length fault_length; -}; - -struct ethtool_bitset_bits { - unsigned int n_bit; - struct ethtool_bitset_bit *bit; -}; - -struct ethtool_strings { - unsigned int n_string; - struct ethtool_string *string; -}; - -struct ethtool_bitset { - struct { - __u32 nomask:1; - __u32 size:1; - __u32 bits:1; - } _present; - - __u32 size; - struct ethtool_bitset_bits bits; -}; - -struct ethtool_stringset_ { - struct { - __u32 id:1; - __u32 count:1; - } _present; - - __u32 id; - __u32 count; - unsigned int n_strings; - struct ethtool_strings *strings; -}; - -struct ethtool_tunnel_udp_table { - struct { - __u32 size:1; - __u32 types:1; - } _present; - - __u32 size; - struct ethtool_bitset types; - unsigned int n_entry; - struct ethtool_tunnel_udp_entry *entry; -}; - -struct ethtool_stringsets { - unsigned int n_stringset; - struct ethtool_stringset_ *stringset; -}; - -struct ethtool_tunnel_udp { - struct { - __u32 table:1; - } _present; - - struct ethtool_tunnel_udp_table table; -}; - -/* ============== ETHTOOL_MSG_STRSET_GET ============== */ -/* ETHTOOL_MSG_STRSET_GET - do */ -struct ethtool_strset_get_req { - struct { - __u32 header:1; - __u32 stringsets:1; - __u32 counts_only:1; - } _present; - - struct ethtool_header header; - struct ethtool_stringsets stringsets; -}; - -static inline struct ethtool_strset_get_req *ethtool_strset_get_req_alloc(void) -{ - return calloc(1, sizeof(struct ethtool_strset_get_req)); -} -void ethtool_strset_get_req_free(struct ethtool_strset_get_req *req); - -static inline void -ethtool_strset_get_req_set_header_dev_index(struct ethtool_strset_get_req *req, - __u32 dev_index) -{ - req->_present.header = 1; - req->header._present.dev_index = 1; - req->header.dev_index = dev_index; -} -static inline void -ethtool_strset_get_req_set_header_dev_name(struct ethtool_strset_get_req *req, - const char *dev_name) -{ - free(req->header.dev_name); - req->header._present.dev_name_len = strlen(dev_name); - req->header.dev_name = malloc(req->header._present.dev_name_len + 1); - memcpy(req->header.dev_name, dev_name, req->header._present.dev_name_len); - req->header.dev_name[req->header._present.dev_name_len] = 0; -} -static inline void -ethtool_strset_get_req_set_header_flags(struct ethtool_strset_get_req *req, - __u32 flags) -{ - req->_present.header = 1; - req->header._present.flags = 1; - req->header.flags = flags; -} -static inline void -__ethtool_strset_get_req_set_stringsets_stringset(struct ethtool_strset_get_req *req, - struct ethtool_stringset_ *stringset, - unsigned int n_stringset) -{ - free(req->stringsets.stringset); - req->stringsets.stringset = stringset; - req->stringsets.n_stringset = n_stringset; -} -static inline void -ethtool_strset_get_req_set_counts_only(struct ethtool_strset_get_req *req) -{ - req->_present.counts_only = 1; -} - -struct ethtool_strset_get_rsp { - struct { - __u32 header:1; - __u32 stringsets:1; - } _present; - - struct ethtool_header header; - struct ethtool_stringsets stringsets; -}; - -void ethtool_strset_get_rsp_free(struct ethtool_strset_get_rsp *rsp); - -/* - * Get string set from the kernel. - */ -struct ethtool_strset_get_rsp * -ethtool_strset_get(struct ynl_sock *ys, struct ethtool_strset_get_req *req); - -/* ETHTOOL_MSG_STRSET_GET - dump */ -struct ethtool_strset_get_req_dump { - struct { - __u32 header:1; - __u32 stringsets:1; - __u32 counts_only:1; - } _present; - - struct ethtool_header header; - struct ethtool_stringsets stringsets; -}; - -static inline struct ethtool_strset_get_req_dump * -ethtool_strset_get_req_dump_alloc(void) -{ - return calloc(1, sizeof(struct ethtool_strset_get_req_dump)); -} -void ethtool_strset_get_req_dump_free(struct ethtool_strset_get_req_dump *req); - -static inline void -ethtool_strset_get_req_dump_set_header_dev_index(struct ethtool_strset_get_req_dump *req, - __u32 dev_index) -{ - req->_present.header = 1; - req->header._present.dev_index = 1; - req->header.dev_index = dev_index; -} -static inline void -ethtool_strset_get_req_dump_set_header_dev_name(struct ethtool_strset_get_req_dump *req, - const char *dev_name) -{ - free(req->header.dev_name); - req->header._present.dev_name_len = strlen(dev_name); - req->header.dev_name = malloc(req->header._present.dev_name_len + 1); - memcpy(req->header.dev_name, dev_name, req->header._present.dev_name_len); - req->header.dev_name[req->header._present.dev_name_len] = 0; -} -static inline void -ethtool_strset_get_req_dump_set_header_flags(struct ethtool_strset_get_req_dump *req, - __u32 flags) -{ - req->_present.header = 1; - req->header._present.flags = 1; - req->header.flags = flags; -} -static inline void -__ethtool_strset_get_req_dump_set_stringsets_stringset(struct ethtool_strset_get_req_dump *req, - struct ethtool_stringset_ *stringset, - unsigned int n_stringset) -{ - free(req->stringsets.stringset); - req->stringsets.stringset = stringset; - req->stringsets.n_stringset = n_stringset; -} -static inline void -ethtool_strset_get_req_dump_set_counts_only(struct ethtool_strset_get_req_dump *req) -{ - req->_present.counts_only = 1; -} - -struct ethtool_strset_get_list { - struct ethtool_strset_get_list *next; - struct ethtool_strset_get_rsp obj __attribute__((aligned(8))); -}; - -void ethtool_strset_get_list_free(struct ethtool_strset_get_list *rsp); - -struct ethtool_strset_get_list * -ethtool_strset_get_dump(struct ynl_sock *ys, - struct ethtool_strset_get_req_dump *req); - -/* ============== ETHTOOL_MSG_LINKINFO_GET ============== */ -/* ETHTOOL_MSG_LINKINFO_GET - do */ -struct ethtool_linkinfo_get_req { - struct { - __u32 header:1; - } _present; - - struct ethtool_header header; -}; - -static inline struct ethtool_linkinfo_get_req * -ethtool_linkinfo_get_req_alloc(void) -{ - return calloc(1, sizeof(struct ethtool_linkinfo_get_req)); -} -void ethtool_linkinfo_get_req_free(struct ethtool_linkinfo_get_req *req); - -static inline void -ethtool_linkinfo_get_req_set_header_dev_index(struct ethtool_linkinfo_get_req *req, - __u32 dev_index) -{ - req->_present.header = 1; - req->header._present.dev_index = 1; - req->header.dev_index = dev_index; -} -static inline void -ethtool_linkinfo_get_req_set_header_dev_name(struct ethtool_linkinfo_get_req *req, - const char *dev_name) -{ - free(req->header.dev_name); - req->header._present.dev_name_len = strlen(dev_name); - req->header.dev_name = malloc(req->header._present.dev_name_len + 1); - memcpy(req->header.dev_name, dev_name, req->header._present.dev_name_len); - req->header.dev_name[req->header._present.dev_name_len] = 0; -} -static inline void -ethtool_linkinfo_get_req_set_header_flags(struct ethtool_linkinfo_get_req *req, - __u32 flags) -{ - req->_present.header = 1; - req->header._present.flags = 1; - req->header.flags = flags; -} - -struct ethtool_linkinfo_get_rsp { - struct { - __u32 header:1; - __u32 port:1; - __u32 phyaddr:1; - __u32 tp_mdix:1; - __u32 tp_mdix_ctrl:1; - __u32 transceiver:1; - } _present; - - struct ethtool_header header; - __u8 port; - __u8 phyaddr; - __u8 tp_mdix; - __u8 tp_mdix_ctrl; - __u8 transceiver; -}; - -void ethtool_linkinfo_get_rsp_free(struct ethtool_linkinfo_get_rsp *rsp); - -/* - * Get link info. - */ -struct ethtool_linkinfo_get_rsp * -ethtool_linkinfo_get(struct ynl_sock *ys, struct ethtool_linkinfo_get_req *req); - -/* ETHTOOL_MSG_LINKINFO_GET - dump */ -struct ethtool_linkinfo_get_req_dump { - struct { - __u32 header:1; - } _present; - - struct ethtool_header header; -}; - -static inline struct ethtool_linkinfo_get_req_dump * -ethtool_linkinfo_get_req_dump_alloc(void) -{ - return calloc(1, sizeof(struct ethtool_linkinfo_get_req_dump)); -} -void -ethtool_linkinfo_get_req_dump_free(struct ethtool_linkinfo_get_req_dump *req); - -static inline void -ethtool_linkinfo_get_req_dump_set_header_dev_index(struct ethtool_linkinfo_get_req_dump *req, - __u32 dev_index) -{ - req->_present.header = 1; - req->header._present.dev_index = 1; - req->header.dev_index = dev_index; -} -static inline void -ethtool_linkinfo_get_req_dump_set_header_dev_name(struct ethtool_linkinfo_get_req_dump *req, - const char *dev_name) -{ - free(req->header.dev_name); - req->header._present.dev_name_len = strlen(dev_name); - req->header.dev_name = malloc(req->header._present.dev_name_len + 1); - memcpy(req->header.dev_name, dev_name, req->header._present.dev_name_len); - req->header.dev_name[req->header._present.dev_name_len] = 0; -} -static inline void -ethtool_linkinfo_get_req_dump_set_header_flags(struct ethtool_linkinfo_get_req_dump *req, - __u32 flags) -{ - req->_present.header = 1; - req->header._present.flags = 1; - req->header.flags = flags; -} - -struct ethtool_linkinfo_get_list { - struct ethtool_linkinfo_get_list *next; - struct ethtool_linkinfo_get_rsp obj __attribute__((aligned(8))); -}; - -void ethtool_linkinfo_get_list_free(struct ethtool_linkinfo_get_list *rsp); - -struct ethtool_linkinfo_get_list * -ethtool_linkinfo_get_dump(struct ynl_sock *ys, - struct ethtool_linkinfo_get_req_dump *req); - -/* ETHTOOL_MSG_LINKINFO_GET - notify */ -struct ethtool_linkinfo_get_ntf { - __u16 family; - __u8 cmd; - struct ynl_ntf_base_type *next; - void (*free)(struct ethtool_linkinfo_get_ntf *ntf); - struct ethtool_linkinfo_get_rsp obj __attribute__((aligned(8))); -}; - -void ethtool_linkinfo_get_ntf_free(struct ethtool_linkinfo_get_ntf *rsp); - -/* ============== ETHTOOL_MSG_LINKINFO_SET ============== */ -/* ETHTOOL_MSG_LINKINFO_SET - do */ -struct ethtool_linkinfo_set_req { - struct { - __u32 header:1; - __u32 port:1; - __u32 phyaddr:1; - __u32 tp_mdix:1; - __u32 tp_mdix_ctrl:1; - __u32 transceiver:1; - } _present; - - struct ethtool_header header; - __u8 port; - __u8 phyaddr; - __u8 tp_mdix; - __u8 tp_mdix_ctrl; - __u8 transceiver; -}; - -static inline struct ethtool_linkinfo_set_req * -ethtool_linkinfo_set_req_alloc(void) -{ - return calloc(1, sizeof(struct ethtool_linkinfo_set_req)); -} -void ethtool_linkinfo_set_req_free(struct ethtool_linkinfo_set_req *req); - -static inline void -ethtool_linkinfo_set_req_set_header_dev_index(struct ethtool_linkinfo_set_req *req, - __u32 dev_index) -{ - req->_present.header = 1; - req->header._present.dev_index = 1; - req->header.dev_index = dev_index; -} -static inline void -ethtool_linkinfo_set_req_set_header_dev_name(struct ethtool_linkinfo_set_req *req, - const char *dev_name) -{ - free(req->header.dev_name); - req->header._present.dev_name_len = strlen(dev_name); - req->header.dev_name = malloc(req->header._present.dev_name_len + 1); - memcpy(req->header.dev_name, dev_name, req->header._present.dev_name_len); - req->header.dev_name[req->header._present.dev_name_len] = 0; -} -static inline void -ethtool_linkinfo_set_req_set_header_flags(struct ethtool_linkinfo_set_req *req, - __u32 flags) -{ - req->_present.header = 1; - req->header._present.flags = 1; - req->header.flags = flags; -} -static inline void -ethtool_linkinfo_set_req_set_port(struct ethtool_linkinfo_set_req *req, - __u8 port) -{ - req->_present.port = 1; - req->port = port; -} -static inline void -ethtool_linkinfo_set_req_set_phyaddr(struct ethtool_linkinfo_set_req *req, - __u8 phyaddr) -{ - req->_present.phyaddr = 1; - req->phyaddr = phyaddr; -} -static inline void -ethtool_linkinfo_set_req_set_tp_mdix(struct ethtool_linkinfo_set_req *req, - __u8 tp_mdix) -{ - req->_present.tp_mdix = 1; - req->tp_mdix = tp_mdix; -} -static inline void -ethtool_linkinfo_set_req_set_tp_mdix_ctrl(struct ethtool_linkinfo_set_req *req, - __u8 tp_mdix_ctrl) -{ - req->_present.tp_mdix_ctrl = 1; - req->tp_mdix_ctrl = tp_mdix_ctrl; -} -static inline void -ethtool_linkinfo_set_req_set_transceiver(struct ethtool_linkinfo_set_req *req, - __u8 transceiver) -{ - req->_present.transceiver = 1; - req->transceiver = transceiver; -} - -/* - * Set link info. - */ -int ethtool_linkinfo_set(struct ynl_sock *ys, - struct ethtool_linkinfo_set_req *req); - -/* ============== ETHTOOL_MSG_LINKMODES_GET ============== */ -/* ETHTOOL_MSG_LINKMODES_GET - do */ -struct ethtool_linkmodes_get_req { - struct { - __u32 header:1; - } _present; - - struct ethtool_header header; -}; - -static inline struct ethtool_linkmodes_get_req * -ethtool_linkmodes_get_req_alloc(void) -{ - return calloc(1, sizeof(struct ethtool_linkmodes_get_req)); -} -void ethtool_linkmodes_get_req_free(struct ethtool_linkmodes_get_req *req); - -static inline void -ethtool_linkmodes_get_req_set_header_dev_index(struct ethtool_linkmodes_get_req *req, - __u32 dev_index) -{ - req->_present.header = 1; - req->header._present.dev_index = 1; - req->header.dev_index = dev_index; -} -static inline void -ethtool_linkmodes_get_req_set_header_dev_name(struct ethtool_linkmodes_get_req *req, - const char *dev_name) -{ - free(req->header.dev_name); - req->header._present.dev_name_len = strlen(dev_name); - req->header.dev_name = malloc(req->header._present.dev_name_len + 1); - memcpy(req->header.dev_name, dev_name, req->header._present.dev_name_len); - req->header.dev_name[req->header._present.dev_name_len] = 0; -} -static inline void -ethtool_linkmodes_get_req_set_header_flags(struct ethtool_linkmodes_get_req *req, - __u32 flags) -{ - req->_present.header = 1; - req->header._present.flags = 1; - req->header.flags = flags; -} - -struct ethtool_linkmodes_get_rsp { - struct { - __u32 header:1; - __u32 autoneg:1; - __u32 ours:1; - __u32 peer:1; - __u32 speed:1; - __u32 duplex:1; - __u32 master_slave_cfg:1; - __u32 master_slave_state:1; - __u32 lanes:1; - __u32 rate_matching:1; - } _present; - - struct ethtool_header header; - __u8 autoneg; - struct ethtool_bitset ours; - struct ethtool_bitset peer; - __u32 speed; - __u8 duplex; - __u8 master_slave_cfg; - __u8 master_slave_state; - __u32 lanes; - __u8 rate_matching; -}; - -void ethtool_linkmodes_get_rsp_free(struct ethtool_linkmodes_get_rsp *rsp); - -/* - * Get link modes. - */ -struct ethtool_linkmodes_get_rsp * -ethtool_linkmodes_get(struct ynl_sock *ys, - struct ethtool_linkmodes_get_req *req); - -/* ETHTOOL_MSG_LINKMODES_GET - dump */ -struct ethtool_linkmodes_get_req_dump { - struct { - __u32 header:1; - } _present; - - struct ethtool_header header; -}; - -static inline struct ethtool_linkmodes_get_req_dump * -ethtool_linkmodes_get_req_dump_alloc(void) -{ - return calloc(1, sizeof(struct ethtool_linkmodes_get_req_dump)); -} -void -ethtool_linkmodes_get_req_dump_free(struct ethtool_linkmodes_get_req_dump *req); - -static inline void -ethtool_linkmodes_get_req_dump_set_header_dev_index(struct ethtool_linkmodes_get_req_dump *req, - __u32 dev_index) -{ - req->_present.header = 1; - req->header._present.dev_index = 1; - req->header.dev_index = dev_index; -} -static inline void -ethtool_linkmodes_get_req_dump_set_header_dev_name(struct ethtool_linkmodes_get_req_dump *req, - const char *dev_name) -{ - free(req->header.dev_name); - req->header._present.dev_name_len = strlen(dev_name); - req->header.dev_name = malloc(req->header._present.dev_name_len + 1); - memcpy(req->header.dev_name, dev_name, req->header._present.dev_name_len); - req->header.dev_name[req->header._present.dev_name_len] = 0; -} -static inline void -ethtool_linkmodes_get_req_dump_set_header_flags(struct ethtool_linkmodes_get_req_dump *req, - __u32 flags) -{ - req->_present.header = 1; - req->header._present.flags = 1; - req->header.flags = flags; -} - -struct ethtool_linkmodes_get_list { - struct ethtool_linkmodes_get_list *next; - struct ethtool_linkmodes_get_rsp obj __attribute__((aligned(8))); -}; - -void ethtool_linkmodes_get_list_free(struct ethtool_linkmodes_get_list *rsp); - -struct ethtool_linkmodes_get_list * -ethtool_linkmodes_get_dump(struct ynl_sock *ys, - struct ethtool_linkmodes_get_req_dump *req); - -/* ETHTOOL_MSG_LINKMODES_GET - notify */ -struct ethtool_linkmodes_get_ntf { - __u16 family; - __u8 cmd; - struct ynl_ntf_base_type *next; - void (*free)(struct ethtool_linkmodes_get_ntf *ntf); - struct ethtool_linkmodes_get_rsp obj __attribute__((aligned(8))); -}; - -void ethtool_linkmodes_get_ntf_free(struct ethtool_linkmodes_get_ntf *rsp); - -/* ============== ETHTOOL_MSG_LINKMODES_SET ============== */ -/* ETHTOOL_MSG_LINKMODES_SET - do */ -struct ethtool_linkmodes_set_req { - struct { - __u32 header:1; - __u32 autoneg:1; - __u32 ours:1; - __u32 peer:1; - __u32 speed:1; - __u32 duplex:1; - __u32 master_slave_cfg:1; - __u32 master_slave_state:1; - __u32 lanes:1; - __u32 rate_matching:1; - } _present; - - struct ethtool_header header; - __u8 autoneg; - struct ethtool_bitset ours; - struct ethtool_bitset peer; - __u32 speed; - __u8 duplex; - __u8 master_slave_cfg; - __u8 master_slave_state; - __u32 lanes; - __u8 rate_matching; -}; - -static inline struct ethtool_linkmodes_set_req * -ethtool_linkmodes_set_req_alloc(void) -{ - return calloc(1, sizeof(struct ethtool_linkmodes_set_req)); -} -void ethtool_linkmodes_set_req_free(struct ethtool_linkmodes_set_req *req); - -static inline void -ethtool_linkmodes_set_req_set_header_dev_index(struct ethtool_linkmodes_set_req *req, - __u32 dev_index) -{ - req->_present.header = 1; - req->header._present.dev_index = 1; - req->header.dev_index = dev_index; -} -static inline void -ethtool_linkmodes_set_req_set_header_dev_name(struct ethtool_linkmodes_set_req *req, - const char *dev_name) -{ - free(req->header.dev_name); - req->header._present.dev_name_len = strlen(dev_name); - req->header.dev_name = malloc(req->header._present.dev_name_len + 1); - memcpy(req->header.dev_name, dev_name, req->header._present.dev_name_len); - req->header.dev_name[req->header._present.dev_name_len] = 0; -} -static inline void -ethtool_linkmodes_set_req_set_header_flags(struct ethtool_linkmodes_set_req *req, - __u32 flags) -{ - req->_present.header = 1; - req->header._present.flags = 1; - req->header.flags = flags; -} -static inline void -ethtool_linkmodes_set_req_set_autoneg(struct ethtool_linkmodes_set_req *req, - __u8 autoneg) -{ - req->_present.autoneg = 1; - req->autoneg = autoneg; -} -static inline void -ethtool_linkmodes_set_req_set_ours_nomask(struct ethtool_linkmodes_set_req *req) -{ - req->_present.ours = 1; - req->ours._present.nomask = 1; -} -static inline void -ethtool_linkmodes_set_req_set_ours_size(struct ethtool_linkmodes_set_req *req, - __u32 size) -{ - req->_present.ours = 1; - req->ours._present.size = 1; - req->ours.size = size; -} -static inline void -__ethtool_linkmodes_set_req_set_ours_bits_bit(struct ethtool_linkmodes_set_req *req, - struct ethtool_bitset_bit *bit, - unsigned int n_bit) -{ - free(req->ours.bits.bit); - req->ours.bits.bit = bit; - req->ours.bits.n_bit = n_bit; -} -static inline void -ethtool_linkmodes_set_req_set_peer_nomask(struct ethtool_linkmodes_set_req *req) -{ - req->_present.peer = 1; - req->peer._present.nomask = 1; -} -static inline void -ethtool_linkmodes_set_req_set_peer_size(struct ethtool_linkmodes_set_req *req, - __u32 size) -{ - req->_present.peer = 1; - req->peer._present.size = 1; - req->peer.size = size; -} -static inline void -__ethtool_linkmodes_set_req_set_peer_bits_bit(struct ethtool_linkmodes_set_req *req, - struct ethtool_bitset_bit *bit, - unsigned int n_bit) -{ - free(req->peer.bits.bit); - req->peer.bits.bit = bit; - req->peer.bits.n_bit = n_bit; -} -static inline void -ethtool_linkmodes_set_req_set_speed(struct ethtool_linkmodes_set_req *req, - __u32 speed) -{ - req->_present.speed = 1; - req->speed = speed; -} -static inline void -ethtool_linkmodes_set_req_set_duplex(struct ethtool_linkmodes_set_req *req, - __u8 duplex) -{ - req->_present.duplex = 1; - req->duplex = duplex; -} -static inline void -ethtool_linkmodes_set_req_set_master_slave_cfg(struct ethtool_linkmodes_set_req *req, - __u8 master_slave_cfg) -{ - req->_present.master_slave_cfg = 1; - req->master_slave_cfg = master_slave_cfg; -} -static inline void -ethtool_linkmodes_set_req_set_master_slave_state(struct ethtool_linkmodes_set_req *req, - __u8 master_slave_state) -{ - req->_present.master_slave_state = 1; - req->master_slave_state = master_slave_state; -} -static inline void -ethtool_linkmodes_set_req_set_lanes(struct ethtool_linkmodes_set_req *req, - __u32 lanes) -{ - req->_present.lanes = 1; - req->lanes = lanes; -} -static inline void -ethtool_linkmodes_set_req_set_rate_matching(struct ethtool_linkmodes_set_req *req, - __u8 rate_matching) -{ - req->_present.rate_matching = 1; - req->rate_matching = rate_matching; -} - -/* - * Set link modes. - */ -int ethtool_linkmodes_set(struct ynl_sock *ys, - struct ethtool_linkmodes_set_req *req); - -/* ============== ETHTOOL_MSG_LINKSTATE_GET ============== */ -/* ETHTOOL_MSG_LINKSTATE_GET - do */ -struct ethtool_linkstate_get_req { - struct { - __u32 header:1; - } _present; - - struct ethtool_header header; -}; - -static inline struct ethtool_linkstate_get_req * -ethtool_linkstate_get_req_alloc(void) -{ - return calloc(1, sizeof(struct ethtool_linkstate_get_req)); -} -void ethtool_linkstate_get_req_free(struct ethtool_linkstate_get_req *req); - -static inline void -ethtool_linkstate_get_req_set_header_dev_index(struct ethtool_linkstate_get_req *req, - __u32 dev_index) -{ - req->_present.header = 1; - req->header._present.dev_index = 1; - req->header.dev_index = dev_index; -} -static inline void -ethtool_linkstate_get_req_set_header_dev_name(struct ethtool_linkstate_get_req *req, - const char *dev_name) -{ - free(req->header.dev_name); - req->header._present.dev_name_len = strlen(dev_name); - req->header.dev_name = malloc(req->header._present.dev_name_len + 1); - memcpy(req->header.dev_name, dev_name, req->header._present.dev_name_len); - req->header.dev_name[req->header._present.dev_name_len] = 0; -} -static inline void -ethtool_linkstate_get_req_set_header_flags(struct ethtool_linkstate_get_req *req, - __u32 flags) -{ - req->_present.header = 1; - req->header._present.flags = 1; - req->header.flags = flags; -} - -struct ethtool_linkstate_get_rsp { - struct { - __u32 header:1; - __u32 link:1; - __u32 sqi:1; - __u32 sqi_max:1; - __u32 ext_state:1; - __u32 ext_substate:1; - __u32 ext_down_cnt:1; - } _present; - - struct ethtool_header header; - __u8 link; - __u32 sqi; - __u32 sqi_max; - __u8 ext_state; - __u8 ext_substate; - __u32 ext_down_cnt; -}; - -void ethtool_linkstate_get_rsp_free(struct ethtool_linkstate_get_rsp *rsp); - -/* - * Get link state. - */ -struct ethtool_linkstate_get_rsp * -ethtool_linkstate_get(struct ynl_sock *ys, - struct ethtool_linkstate_get_req *req); - -/* ETHTOOL_MSG_LINKSTATE_GET - dump */ -struct ethtool_linkstate_get_req_dump { - struct { - __u32 header:1; - } _present; - - struct ethtool_header header; -}; - -static inline struct ethtool_linkstate_get_req_dump * -ethtool_linkstate_get_req_dump_alloc(void) -{ - return calloc(1, sizeof(struct ethtool_linkstate_get_req_dump)); -} -void -ethtool_linkstate_get_req_dump_free(struct ethtool_linkstate_get_req_dump *req); - -static inline void -ethtool_linkstate_get_req_dump_set_header_dev_index(struct ethtool_linkstate_get_req_dump *req, - __u32 dev_index) -{ - req->_present.header = 1; - req->header._present.dev_index = 1; - req->header.dev_index = dev_index; -} -static inline void -ethtool_linkstate_get_req_dump_set_header_dev_name(struct ethtool_linkstate_get_req_dump *req, - const char *dev_name) -{ - free(req->header.dev_name); - req->header._present.dev_name_len = strlen(dev_name); - req->header.dev_name = malloc(req->header._present.dev_name_len + 1); - memcpy(req->header.dev_name, dev_name, req->header._present.dev_name_len); - req->header.dev_name[req->header._present.dev_name_len] = 0; -} -static inline void -ethtool_linkstate_get_req_dump_set_header_flags(struct ethtool_linkstate_get_req_dump *req, - __u32 flags) -{ - req->_present.header = 1; - req->header._present.flags = 1; - req->header.flags = flags; -} - -struct ethtool_linkstate_get_list { - struct ethtool_linkstate_get_list *next; - struct ethtool_linkstate_get_rsp obj __attribute__((aligned(8))); -}; - -void ethtool_linkstate_get_list_free(struct ethtool_linkstate_get_list *rsp); - -struct ethtool_linkstate_get_list * -ethtool_linkstate_get_dump(struct ynl_sock *ys, - struct ethtool_linkstate_get_req_dump *req); - -/* ============== ETHTOOL_MSG_DEBUG_GET ============== */ -/* ETHTOOL_MSG_DEBUG_GET - do */ -struct ethtool_debug_get_req { - struct { - __u32 header:1; - } _present; - - struct ethtool_header header; -}; - -static inline struct ethtool_debug_get_req *ethtool_debug_get_req_alloc(void) -{ - return calloc(1, sizeof(struct ethtool_debug_get_req)); -} -void ethtool_debug_get_req_free(struct ethtool_debug_get_req *req); - -static inline void -ethtool_debug_get_req_set_header_dev_index(struct ethtool_debug_get_req *req, - __u32 dev_index) -{ - req->_present.header = 1; - req->header._present.dev_index = 1; - req->header.dev_index = dev_index; -} -static inline void -ethtool_debug_get_req_set_header_dev_name(struct ethtool_debug_get_req *req, - const char *dev_name) -{ - free(req->header.dev_name); - req->header._present.dev_name_len = strlen(dev_name); - req->header.dev_name = malloc(req->header._present.dev_name_len + 1); - memcpy(req->header.dev_name, dev_name, req->header._present.dev_name_len); - req->header.dev_name[req->header._present.dev_name_len] = 0; -} -static inline void -ethtool_debug_get_req_set_header_flags(struct ethtool_debug_get_req *req, - __u32 flags) -{ - req->_present.header = 1; - req->header._present.flags = 1; - req->header.flags = flags; -} - -struct ethtool_debug_get_rsp { - struct { - __u32 header:1; - __u32 msgmask:1; - } _present; - - struct ethtool_header header; - struct ethtool_bitset msgmask; -}; - -void ethtool_debug_get_rsp_free(struct ethtool_debug_get_rsp *rsp); - -/* - * Get debug message mask. - */ -struct ethtool_debug_get_rsp * -ethtool_debug_get(struct ynl_sock *ys, struct ethtool_debug_get_req *req); - -/* ETHTOOL_MSG_DEBUG_GET - dump */ -struct ethtool_debug_get_req_dump { - struct { - __u32 header:1; - } _present; - - struct ethtool_header header; -}; - -static inline struct ethtool_debug_get_req_dump * -ethtool_debug_get_req_dump_alloc(void) -{ - return calloc(1, sizeof(struct ethtool_debug_get_req_dump)); -} -void ethtool_debug_get_req_dump_free(struct ethtool_debug_get_req_dump *req); - -static inline void -ethtool_debug_get_req_dump_set_header_dev_index(struct ethtool_debug_get_req_dump *req, - __u32 dev_index) -{ - req->_present.header = 1; - req->header._present.dev_index = 1; - req->header.dev_index = dev_index; -} -static inline void -ethtool_debug_get_req_dump_set_header_dev_name(struct ethtool_debug_get_req_dump *req, - const char *dev_name) -{ - free(req->header.dev_name); - req->header._present.dev_name_len = strlen(dev_name); - req->header.dev_name = malloc(req->header._present.dev_name_len + 1); - memcpy(req->header.dev_name, dev_name, req->header._present.dev_name_len); - req->header.dev_name[req->header._present.dev_name_len] = 0; -} -static inline void -ethtool_debug_get_req_dump_set_header_flags(struct ethtool_debug_get_req_dump *req, - __u32 flags) -{ - req->_present.header = 1; - req->header._present.flags = 1; - req->header.flags = flags; -} - -struct ethtool_debug_get_list { - struct ethtool_debug_get_list *next; - struct ethtool_debug_get_rsp obj __attribute__((aligned(8))); -}; - -void ethtool_debug_get_list_free(struct ethtool_debug_get_list *rsp); - -struct ethtool_debug_get_list * -ethtool_debug_get_dump(struct ynl_sock *ys, - struct ethtool_debug_get_req_dump *req); - -/* ETHTOOL_MSG_DEBUG_GET - notify */ -struct ethtool_debug_get_ntf { - __u16 family; - __u8 cmd; - struct ynl_ntf_base_type *next; - void (*free)(struct ethtool_debug_get_ntf *ntf); - struct ethtool_debug_get_rsp obj __attribute__((aligned(8))); -}; - -void ethtool_debug_get_ntf_free(struct ethtool_debug_get_ntf *rsp); - -/* ============== ETHTOOL_MSG_DEBUG_SET ============== */ -/* ETHTOOL_MSG_DEBUG_SET - do */ -struct ethtool_debug_set_req { - struct { - __u32 header:1; - __u32 msgmask:1; - } _present; - - struct ethtool_header header; - struct ethtool_bitset msgmask; -}; - -static inline struct ethtool_debug_set_req *ethtool_debug_set_req_alloc(void) -{ - return calloc(1, sizeof(struct ethtool_debug_set_req)); -} -void ethtool_debug_set_req_free(struct ethtool_debug_set_req *req); - -static inline void -ethtool_debug_set_req_set_header_dev_index(struct ethtool_debug_set_req *req, - __u32 dev_index) -{ - req->_present.header = 1; - req->header._present.dev_index = 1; - req->header.dev_index = dev_index; -} -static inline void -ethtool_debug_set_req_set_header_dev_name(struct ethtool_debug_set_req *req, - const char *dev_name) -{ - free(req->header.dev_name); - req->header._present.dev_name_len = strlen(dev_name); - req->header.dev_name = malloc(req->header._present.dev_name_len + 1); - memcpy(req->header.dev_name, dev_name, req->header._present.dev_name_len); - req->header.dev_name[req->header._present.dev_name_len] = 0; -} -static inline void -ethtool_debug_set_req_set_header_flags(struct ethtool_debug_set_req *req, - __u32 flags) -{ - req->_present.header = 1; - req->header._present.flags = 1; - req->header.flags = flags; -} -static inline void -ethtool_debug_set_req_set_msgmask_nomask(struct ethtool_debug_set_req *req) -{ - req->_present.msgmask = 1; - req->msgmask._present.nomask = 1; -} -static inline void -ethtool_debug_set_req_set_msgmask_size(struct ethtool_debug_set_req *req, - __u32 size) -{ - req->_present.msgmask = 1; - req->msgmask._present.size = 1; - req->msgmask.size = size; -} -static inline void -__ethtool_debug_set_req_set_msgmask_bits_bit(struct ethtool_debug_set_req *req, - struct ethtool_bitset_bit *bit, - unsigned int n_bit) -{ - free(req->msgmask.bits.bit); - req->msgmask.bits.bit = bit; - req->msgmask.bits.n_bit = n_bit; -} - -/* - * Set debug message mask. - */ -int ethtool_debug_set(struct ynl_sock *ys, struct ethtool_debug_set_req *req); - -/* ============== ETHTOOL_MSG_WOL_GET ============== */ -/* ETHTOOL_MSG_WOL_GET - do */ -struct ethtool_wol_get_req { - struct { - __u32 header:1; - } _present; - - struct ethtool_header header; -}; - -static inline struct ethtool_wol_get_req *ethtool_wol_get_req_alloc(void) -{ - return calloc(1, sizeof(struct ethtool_wol_get_req)); -} -void ethtool_wol_get_req_free(struct ethtool_wol_get_req *req); - -static inline void -ethtool_wol_get_req_set_header_dev_index(struct ethtool_wol_get_req *req, - __u32 dev_index) -{ - req->_present.header = 1; - req->header._present.dev_index = 1; - req->header.dev_index = dev_index; -} -static inline void -ethtool_wol_get_req_set_header_dev_name(struct ethtool_wol_get_req *req, - const char *dev_name) -{ - free(req->header.dev_name); - req->header._present.dev_name_len = strlen(dev_name); - req->header.dev_name = malloc(req->header._present.dev_name_len + 1); - memcpy(req->header.dev_name, dev_name, req->header._present.dev_name_len); - req->header.dev_name[req->header._present.dev_name_len] = 0; -} -static inline void -ethtool_wol_get_req_set_header_flags(struct ethtool_wol_get_req *req, - __u32 flags) -{ - req->_present.header = 1; - req->header._present.flags = 1; - req->header.flags = flags; -} - -struct ethtool_wol_get_rsp { - struct { - __u32 header:1; - __u32 modes:1; - __u32 sopass_len; - } _present; - - struct ethtool_header header; - struct ethtool_bitset modes; - void *sopass; -}; - -void ethtool_wol_get_rsp_free(struct ethtool_wol_get_rsp *rsp); - -/* - * Get WOL params. - */ -struct ethtool_wol_get_rsp * -ethtool_wol_get(struct ynl_sock *ys, struct ethtool_wol_get_req *req); - -/* ETHTOOL_MSG_WOL_GET - dump */ -struct ethtool_wol_get_req_dump { - struct { - __u32 header:1; - } _present; - - struct ethtool_header header; -}; - -static inline struct ethtool_wol_get_req_dump * -ethtool_wol_get_req_dump_alloc(void) -{ - return calloc(1, sizeof(struct ethtool_wol_get_req_dump)); -} -void ethtool_wol_get_req_dump_free(struct ethtool_wol_get_req_dump *req); - -static inline void -ethtool_wol_get_req_dump_set_header_dev_index(struct ethtool_wol_get_req_dump *req, - __u32 dev_index) -{ - req->_present.header = 1; - req->header._present.dev_index = 1; - req->header.dev_index = dev_index; -} -static inline void -ethtool_wol_get_req_dump_set_header_dev_name(struct ethtool_wol_get_req_dump *req, - const char *dev_name) -{ - free(req->header.dev_name); - req->header._present.dev_name_len = strlen(dev_name); - req->header.dev_name = malloc(req->header._present.dev_name_len + 1); - memcpy(req->header.dev_name, dev_name, req->header._present.dev_name_len); - req->header.dev_name[req->header._present.dev_name_len] = 0; -} -static inline void -ethtool_wol_get_req_dump_set_header_flags(struct ethtool_wol_get_req_dump *req, - __u32 flags) -{ - req->_present.header = 1; - req->header._present.flags = 1; - req->header.flags = flags; -} - -struct ethtool_wol_get_list { - struct ethtool_wol_get_list *next; - struct ethtool_wol_get_rsp obj __attribute__((aligned(8))); -}; - -void ethtool_wol_get_list_free(struct ethtool_wol_get_list *rsp); - -struct ethtool_wol_get_list * -ethtool_wol_get_dump(struct ynl_sock *ys, struct ethtool_wol_get_req_dump *req); - -/* ETHTOOL_MSG_WOL_GET - notify */ -struct ethtool_wol_get_ntf { - __u16 family; - __u8 cmd; - struct ynl_ntf_base_type *next; - void (*free)(struct ethtool_wol_get_ntf *ntf); - struct ethtool_wol_get_rsp obj __attribute__((aligned(8))); -}; - -void ethtool_wol_get_ntf_free(struct ethtool_wol_get_ntf *rsp); - -/* ============== ETHTOOL_MSG_WOL_SET ============== */ -/* ETHTOOL_MSG_WOL_SET - do */ -struct ethtool_wol_set_req { - struct { - __u32 header:1; - __u32 modes:1; - __u32 sopass_len; - } _present; - - struct ethtool_header header; - struct ethtool_bitset modes; - void *sopass; -}; - -static inline struct ethtool_wol_set_req *ethtool_wol_set_req_alloc(void) -{ - return calloc(1, sizeof(struct ethtool_wol_set_req)); -} -void ethtool_wol_set_req_free(struct ethtool_wol_set_req *req); - -static inline void -ethtool_wol_set_req_set_header_dev_index(struct ethtool_wol_set_req *req, - __u32 dev_index) -{ - req->_present.header = 1; - req->header._present.dev_index = 1; - req->header.dev_index = dev_index; -} -static inline void -ethtool_wol_set_req_set_header_dev_name(struct ethtool_wol_set_req *req, - const char *dev_name) -{ - free(req->header.dev_name); - req->header._present.dev_name_len = strlen(dev_name); - req->header.dev_name = malloc(req->header._present.dev_name_len + 1); - memcpy(req->header.dev_name, dev_name, req->header._present.dev_name_len); - req->header.dev_name[req->header._present.dev_name_len] = 0; -} -static inline void -ethtool_wol_set_req_set_header_flags(struct ethtool_wol_set_req *req, - __u32 flags) -{ - req->_present.header = 1; - req->header._present.flags = 1; - req->header.flags = flags; -} -static inline void -ethtool_wol_set_req_set_modes_nomask(struct ethtool_wol_set_req *req) -{ - req->_present.modes = 1; - req->modes._present.nomask = 1; -} -static inline void -ethtool_wol_set_req_set_modes_size(struct ethtool_wol_set_req *req, __u32 size) -{ - req->_present.modes = 1; - req->modes._present.size = 1; - req->modes.size = size; -} -static inline void -__ethtool_wol_set_req_set_modes_bits_bit(struct ethtool_wol_set_req *req, - struct ethtool_bitset_bit *bit, - unsigned int n_bit) -{ - free(req->modes.bits.bit); - req->modes.bits.bit = bit; - req->modes.bits.n_bit = n_bit; -} -static inline void -ethtool_wol_set_req_set_sopass(struct ethtool_wol_set_req *req, - const void *sopass, size_t len) -{ - free(req->sopass); - req->_present.sopass_len = len; - req->sopass = malloc(req->_present.sopass_len); - memcpy(req->sopass, sopass, req->_present.sopass_len); -} - -/* - * Set WOL params. - */ -int ethtool_wol_set(struct ynl_sock *ys, struct ethtool_wol_set_req *req); - -/* ============== ETHTOOL_MSG_FEATURES_GET ============== */ -/* ETHTOOL_MSG_FEATURES_GET - do */ -struct ethtool_features_get_req { - struct { - __u32 header:1; - } _present; - - struct ethtool_header header; -}; - -static inline struct ethtool_features_get_req * -ethtool_features_get_req_alloc(void) -{ - return calloc(1, sizeof(struct ethtool_features_get_req)); -} -void ethtool_features_get_req_free(struct ethtool_features_get_req *req); - -static inline void -ethtool_features_get_req_set_header_dev_index(struct ethtool_features_get_req *req, - __u32 dev_index) -{ - req->_present.header = 1; - req->header._present.dev_index = 1; - req->header.dev_index = dev_index; -} -static inline void -ethtool_features_get_req_set_header_dev_name(struct ethtool_features_get_req *req, - const char *dev_name) -{ - free(req->header.dev_name); - req->header._present.dev_name_len = strlen(dev_name); - req->header.dev_name = malloc(req->header._present.dev_name_len + 1); - memcpy(req->header.dev_name, dev_name, req->header._present.dev_name_len); - req->header.dev_name[req->header._present.dev_name_len] = 0; -} -static inline void -ethtool_features_get_req_set_header_flags(struct ethtool_features_get_req *req, - __u32 flags) -{ - req->_present.header = 1; - req->header._present.flags = 1; - req->header.flags = flags; -} - -struct ethtool_features_get_rsp { - struct { - __u32 header:1; - __u32 hw:1; - __u32 wanted:1; - __u32 active:1; - __u32 nochange:1; - } _present; - - struct ethtool_header header; - struct ethtool_bitset hw; - struct ethtool_bitset wanted; - struct ethtool_bitset active; - struct ethtool_bitset nochange; -}; - -void ethtool_features_get_rsp_free(struct ethtool_features_get_rsp *rsp); - -/* - * Get features. - */ -struct ethtool_features_get_rsp * -ethtool_features_get(struct ynl_sock *ys, struct ethtool_features_get_req *req); - -/* ETHTOOL_MSG_FEATURES_GET - dump */ -struct ethtool_features_get_req_dump { - struct { - __u32 header:1; - } _present; - - struct ethtool_header header; -}; - -static inline struct ethtool_features_get_req_dump * -ethtool_features_get_req_dump_alloc(void) -{ - return calloc(1, sizeof(struct ethtool_features_get_req_dump)); -} -void -ethtool_features_get_req_dump_free(struct ethtool_features_get_req_dump *req); - -static inline void -ethtool_features_get_req_dump_set_header_dev_index(struct ethtool_features_get_req_dump *req, - __u32 dev_index) -{ - req->_present.header = 1; - req->header._present.dev_index = 1; - req->header.dev_index = dev_index; -} -static inline void -ethtool_features_get_req_dump_set_header_dev_name(struct ethtool_features_get_req_dump *req, - const char *dev_name) -{ - free(req->header.dev_name); - req->header._present.dev_name_len = strlen(dev_name); - req->header.dev_name = malloc(req->header._present.dev_name_len + 1); - memcpy(req->header.dev_name, dev_name, req->header._present.dev_name_len); - req->header.dev_name[req->header._present.dev_name_len] = 0; -} -static inline void -ethtool_features_get_req_dump_set_header_flags(struct ethtool_features_get_req_dump *req, - __u32 flags) -{ - req->_present.header = 1; - req->header._present.flags = 1; - req->header.flags = flags; -} - -struct ethtool_features_get_list { - struct ethtool_features_get_list *next; - struct ethtool_features_get_rsp obj __attribute__((aligned(8))); -}; - -void ethtool_features_get_list_free(struct ethtool_features_get_list *rsp); - -struct ethtool_features_get_list * -ethtool_features_get_dump(struct ynl_sock *ys, - struct ethtool_features_get_req_dump *req); - -/* ETHTOOL_MSG_FEATURES_GET - notify */ -struct ethtool_features_get_ntf { - __u16 family; - __u8 cmd; - struct ynl_ntf_base_type *next; - void (*free)(struct ethtool_features_get_ntf *ntf); - struct ethtool_features_get_rsp obj __attribute__((aligned(8))); -}; - -void ethtool_features_get_ntf_free(struct ethtool_features_get_ntf *rsp); - -/* ============== ETHTOOL_MSG_FEATURES_SET ============== */ -/* ETHTOOL_MSG_FEATURES_SET - do */ -struct ethtool_features_set_req { - struct { - __u32 header:1; - __u32 hw:1; - __u32 wanted:1; - __u32 active:1; - __u32 nochange:1; - } _present; - - struct ethtool_header header; - struct ethtool_bitset hw; - struct ethtool_bitset wanted; - struct ethtool_bitset active; - struct ethtool_bitset nochange; -}; - -static inline struct ethtool_features_set_req * -ethtool_features_set_req_alloc(void) -{ - return calloc(1, sizeof(struct ethtool_features_set_req)); -} -void ethtool_features_set_req_free(struct ethtool_features_set_req *req); - -static inline void -ethtool_features_set_req_set_header_dev_index(struct ethtool_features_set_req *req, - __u32 dev_index) -{ - req->_present.header = 1; - req->header._present.dev_index = 1; - req->header.dev_index = dev_index; -} -static inline void -ethtool_features_set_req_set_header_dev_name(struct ethtool_features_set_req *req, - const char *dev_name) -{ - free(req->header.dev_name); - req->header._present.dev_name_len = strlen(dev_name); - req->header.dev_name = malloc(req->header._present.dev_name_len + 1); - memcpy(req->header.dev_name, dev_name, req->header._present.dev_name_len); - req->header.dev_name[req->header._present.dev_name_len] = 0; -} -static inline void -ethtool_features_set_req_set_header_flags(struct ethtool_features_set_req *req, - __u32 flags) -{ - req->_present.header = 1; - req->header._present.flags = 1; - req->header.flags = flags; -} -static inline void -ethtool_features_set_req_set_hw_nomask(struct ethtool_features_set_req *req) -{ - req->_present.hw = 1; - req->hw._present.nomask = 1; -} -static inline void -ethtool_features_set_req_set_hw_size(struct ethtool_features_set_req *req, - __u32 size) -{ - req->_present.hw = 1; - req->hw._present.size = 1; - req->hw.size = size; -} -static inline void -__ethtool_features_set_req_set_hw_bits_bit(struct ethtool_features_set_req *req, - struct ethtool_bitset_bit *bit, - unsigned int n_bit) -{ - free(req->hw.bits.bit); - req->hw.bits.bit = bit; - req->hw.bits.n_bit = n_bit; -} -static inline void -ethtool_features_set_req_set_wanted_nomask(struct ethtool_features_set_req *req) -{ - req->_present.wanted = 1; - req->wanted._present.nomask = 1; -} -static inline void -ethtool_features_set_req_set_wanted_size(struct ethtool_features_set_req *req, - __u32 size) -{ - req->_present.wanted = 1; - req->wanted._present.size = 1; - req->wanted.size = size; -} -static inline void -__ethtool_features_set_req_set_wanted_bits_bit(struct ethtool_features_set_req *req, - struct ethtool_bitset_bit *bit, - unsigned int n_bit) -{ - free(req->wanted.bits.bit); - req->wanted.bits.bit = bit; - req->wanted.bits.n_bit = n_bit; -} -static inline void -ethtool_features_set_req_set_active_nomask(struct ethtool_features_set_req *req) -{ - req->_present.active = 1; - req->active._present.nomask = 1; -} -static inline void -ethtool_features_set_req_set_active_size(struct ethtool_features_set_req *req, - __u32 size) -{ - req->_present.active = 1; - req->active._present.size = 1; - req->active.size = size; -} -static inline void -__ethtool_features_set_req_set_active_bits_bit(struct ethtool_features_set_req *req, - struct ethtool_bitset_bit *bit, - unsigned int n_bit) -{ - free(req->active.bits.bit); - req->active.bits.bit = bit; - req->active.bits.n_bit = n_bit; -} -static inline void -ethtool_features_set_req_set_nochange_nomask(struct ethtool_features_set_req *req) -{ - req->_present.nochange = 1; - req->nochange._present.nomask = 1; -} -static inline void -ethtool_features_set_req_set_nochange_size(struct ethtool_features_set_req *req, - __u32 size) -{ - req->_present.nochange = 1; - req->nochange._present.size = 1; - req->nochange.size = size; -} -static inline void -__ethtool_features_set_req_set_nochange_bits_bit(struct ethtool_features_set_req *req, - struct ethtool_bitset_bit *bit, - unsigned int n_bit) -{ - free(req->nochange.bits.bit); - req->nochange.bits.bit = bit; - req->nochange.bits.n_bit = n_bit; -} - -struct ethtool_features_set_rsp { - struct { - __u32 header:1; - __u32 hw:1; - __u32 wanted:1; - __u32 active:1; - __u32 nochange:1; - } _present; - - struct ethtool_header header; - struct ethtool_bitset hw; - struct ethtool_bitset wanted; - struct ethtool_bitset active; - struct ethtool_bitset nochange; -}; - -void ethtool_features_set_rsp_free(struct ethtool_features_set_rsp *rsp); - -/* - * Set features. - */ -struct ethtool_features_set_rsp * -ethtool_features_set(struct ynl_sock *ys, struct ethtool_features_set_req *req); - -/* ============== ETHTOOL_MSG_PRIVFLAGS_GET ============== */ -/* ETHTOOL_MSG_PRIVFLAGS_GET - do */ -struct ethtool_privflags_get_req { - struct { - __u32 header:1; - } _present; - - struct ethtool_header header; -}; - -static inline struct ethtool_privflags_get_req * -ethtool_privflags_get_req_alloc(void) -{ - return calloc(1, sizeof(struct ethtool_privflags_get_req)); -} -void ethtool_privflags_get_req_free(struct ethtool_privflags_get_req *req); - -static inline void -ethtool_privflags_get_req_set_header_dev_index(struct ethtool_privflags_get_req *req, - __u32 dev_index) -{ - req->_present.header = 1; - req->header._present.dev_index = 1; - req->header.dev_index = dev_index; -} -static inline void -ethtool_privflags_get_req_set_header_dev_name(struct ethtool_privflags_get_req *req, - const char *dev_name) -{ - free(req->header.dev_name); - req->header._present.dev_name_len = strlen(dev_name); - req->header.dev_name = malloc(req->header._present.dev_name_len + 1); - memcpy(req->header.dev_name, dev_name, req->header._present.dev_name_len); - req->header.dev_name[req->header._present.dev_name_len] = 0; -} -static inline void -ethtool_privflags_get_req_set_header_flags(struct ethtool_privflags_get_req *req, - __u32 flags) -{ - req->_present.header = 1; - req->header._present.flags = 1; - req->header.flags = flags; -} - -struct ethtool_privflags_get_rsp { - struct { - __u32 header:1; - __u32 flags:1; - } _present; - - struct ethtool_header header; - struct ethtool_bitset flags; -}; - -void ethtool_privflags_get_rsp_free(struct ethtool_privflags_get_rsp *rsp); - -/* - * Get device private flags. - */ -struct ethtool_privflags_get_rsp * -ethtool_privflags_get(struct ynl_sock *ys, - struct ethtool_privflags_get_req *req); - -/* ETHTOOL_MSG_PRIVFLAGS_GET - dump */ -struct ethtool_privflags_get_req_dump { - struct { - __u32 header:1; - } _present; - - struct ethtool_header header; -}; - -static inline struct ethtool_privflags_get_req_dump * -ethtool_privflags_get_req_dump_alloc(void) -{ - return calloc(1, sizeof(struct ethtool_privflags_get_req_dump)); -} -void -ethtool_privflags_get_req_dump_free(struct ethtool_privflags_get_req_dump *req); - -static inline void -ethtool_privflags_get_req_dump_set_header_dev_index(struct ethtool_privflags_get_req_dump *req, - __u32 dev_index) -{ - req->_present.header = 1; - req->header._present.dev_index = 1; - req->header.dev_index = dev_index; -} -static inline void -ethtool_privflags_get_req_dump_set_header_dev_name(struct ethtool_privflags_get_req_dump *req, - const char *dev_name) -{ - free(req->header.dev_name); - req->header._present.dev_name_len = strlen(dev_name); - req->header.dev_name = malloc(req->header._present.dev_name_len + 1); - memcpy(req->header.dev_name, dev_name, req->header._present.dev_name_len); - req->header.dev_name[req->header._present.dev_name_len] = 0; -} -static inline void -ethtool_privflags_get_req_dump_set_header_flags(struct ethtool_privflags_get_req_dump *req, - __u32 flags) -{ - req->_present.header = 1; - req->header._present.flags = 1; - req->header.flags = flags; -} - -struct ethtool_privflags_get_list { - struct ethtool_privflags_get_list *next; - struct ethtool_privflags_get_rsp obj __attribute__((aligned(8))); -}; - -void ethtool_privflags_get_list_free(struct ethtool_privflags_get_list *rsp); - -struct ethtool_privflags_get_list * -ethtool_privflags_get_dump(struct ynl_sock *ys, - struct ethtool_privflags_get_req_dump *req); - -/* ETHTOOL_MSG_PRIVFLAGS_GET - notify */ -struct ethtool_privflags_get_ntf { - __u16 family; - __u8 cmd; - struct ynl_ntf_base_type *next; - void (*free)(struct ethtool_privflags_get_ntf *ntf); - struct ethtool_privflags_get_rsp obj __attribute__((aligned(8))); -}; - -void ethtool_privflags_get_ntf_free(struct ethtool_privflags_get_ntf *rsp); - -/* ============== ETHTOOL_MSG_PRIVFLAGS_SET ============== */ -/* ETHTOOL_MSG_PRIVFLAGS_SET - do */ -struct ethtool_privflags_set_req { - struct { - __u32 header:1; - __u32 flags:1; - } _present; - - struct ethtool_header header; - struct ethtool_bitset flags; -}; - -static inline struct ethtool_privflags_set_req * -ethtool_privflags_set_req_alloc(void) -{ - return calloc(1, sizeof(struct ethtool_privflags_set_req)); -} -void ethtool_privflags_set_req_free(struct ethtool_privflags_set_req *req); - -static inline void -ethtool_privflags_set_req_set_header_dev_index(struct ethtool_privflags_set_req *req, - __u32 dev_index) -{ - req->_present.header = 1; - req->header._present.dev_index = 1; - req->header.dev_index = dev_index; -} -static inline void -ethtool_privflags_set_req_set_header_dev_name(struct ethtool_privflags_set_req *req, - const char *dev_name) -{ - free(req->header.dev_name); - req->header._present.dev_name_len = strlen(dev_name); - req->header.dev_name = malloc(req->header._present.dev_name_len + 1); - memcpy(req->header.dev_name, dev_name, req->header._present.dev_name_len); - req->header.dev_name[req->header._present.dev_name_len] = 0; -} -static inline void -ethtool_privflags_set_req_set_header_flags(struct ethtool_privflags_set_req *req, - __u32 flags) -{ - req->_present.header = 1; - req->header._present.flags = 1; - req->header.flags = flags; -} -static inline void -ethtool_privflags_set_req_set_flags_nomask(struct ethtool_privflags_set_req *req) -{ - req->_present.flags = 1; - req->flags._present.nomask = 1; -} -static inline void -ethtool_privflags_set_req_set_flags_size(struct ethtool_privflags_set_req *req, - __u32 size) -{ - req->_present.flags = 1; - req->flags._present.size = 1; - req->flags.size = size; -} -static inline void -__ethtool_privflags_set_req_set_flags_bits_bit(struct ethtool_privflags_set_req *req, - struct ethtool_bitset_bit *bit, - unsigned int n_bit) -{ - free(req->flags.bits.bit); - req->flags.bits.bit = bit; - req->flags.bits.n_bit = n_bit; -} - -/* - * Set device private flags. - */ -int ethtool_privflags_set(struct ynl_sock *ys, - struct ethtool_privflags_set_req *req); - -/* ============== ETHTOOL_MSG_RINGS_GET ============== */ -/* ETHTOOL_MSG_RINGS_GET - do */ -struct ethtool_rings_get_req { - struct { - __u32 header:1; - } _present; - - struct ethtool_header header; -}; - -static inline struct ethtool_rings_get_req *ethtool_rings_get_req_alloc(void) -{ - return calloc(1, sizeof(struct ethtool_rings_get_req)); -} -void ethtool_rings_get_req_free(struct ethtool_rings_get_req *req); - -static inline void -ethtool_rings_get_req_set_header_dev_index(struct ethtool_rings_get_req *req, - __u32 dev_index) -{ - req->_present.header = 1; - req->header._present.dev_index = 1; - req->header.dev_index = dev_index; -} -static inline void -ethtool_rings_get_req_set_header_dev_name(struct ethtool_rings_get_req *req, - const char *dev_name) -{ - free(req->header.dev_name); - req->header._present.dev_name_len = strlen(dev_name); - req->header.dev_name = malloc(req->header._present.dev_name_len + 1); - memcpy(req->header.dev_name, dev_name, req->header._present.dev_name_len); - req->header.dev_name[req->header._present.dev_name_len] = 0; -} -static inline void -ethtool_rings_get_req_set_header_flags(struct ethtool_rings_get_req *req, - __u32 flags) -{ - req->_present.header = 1; - req->header._present.flags = 1; - req->header.flags = flags; -} - -struct ethtool_rings_get_rsp { - struct { - __u32 header:1; - __u32 rx_max:1; - __u32 rx_mini_max:1; - __u32 rx_jumbo_max:1; - __u32 tx_max:1; - __u32 rx:1; - __u32 rx_mini:1; - __u32 rx_jumbo:1; - __u32 tx:1; - __u32 rx_buf_len:1; - __u32 tcp_data_split:1; - __u32 cqe_size:1; - __u32 tx_push:1; - __u32 rx_push:1; - __u32 tx_push_buf_len:1; - __u32 tx_push_buf_len_max:1; - } _present; - - struct ethtool_header header; - __u32 rx_max; - __u32 rx_mini_max; - __u32 rx_jumbo_max; - __u32 tx_max; - __u32 rx; - __u32 rx_mini; - __u32 rx_jumbo; - __u32 tx; - __u32 rx_buf_len; - __u8 tcp_data_split; - __u32 cqe_size; - __u8 tx_push; - __u8 rx_push; - __u32 tx_push_buf_len; - __u32 tx_push_buf_len_max; -}; - -void ethtool_rings_get_rsp_free(struct ethtool_rings_get_rsp *rsp); - -/* - * Get ring params. - */ -struct ethtool_rings_get_rsp * -ethtool_rings_get(struct ynl_sock *ys, struct ethtool_rings_get_req *req); - -/* ETHTOOL_MSG_RINGS_GET - dump */ -struct ethtool_rings_get_req_dump { - struct { - __u32 header:1; - } _present; - - struct ethtool_header header; -}; - -static inline struct ethtool_rings_get_req_dump * -ethtool_rings_get_req_dump_alloc(void) -{ - return calloc(1, sizeof(struct ethtool_rings_get_req_dump)); -} -void ethtool_rings_get_req_dump_free(struct ethtool_rings_get_req_dump *req); - -static inline void -ethtool_rings_get_req_dump_set_header_dev_index(struct ethtool_rings_get_req_dump *req, - __u32 dev_index) -{ - req->_present.header = 1; - req->header._present.dev_index = 1; - req->header.dev_index = dev_index; -} -static inline void -ethtool_rings_get_req_dump_set_header_dev_name(struct ethtool_rings_get_req_dump *req, - const char *dev_name) -{ - free(req->header.dev_name); - req->header._present.dev_name_len = strlen(dev_name); - req->header.dev_name = malloc(req->header._present.dev_name_len + 1); - memcpy(req->header.dev_name, dev_name, req->header._present.dev_name_len); - req->header.dev_name[req->header._present.dev_name_len] = 0; -} -static inline void -ethtool_rings_get_req_dump_set_header_flags(struct ethtool_rings_get_req_dump *req, - __u32 flags) -{ - req->_present.header = 1; - req->header._present.flags = 1; - req->header.flags = flags; -} - -struct ethtool_rings_get_list { - struct ethtool_rings_get_list *next; - struct ethtool_rings_get_rsp obj __attribute__((aligned(8))); -}; - -void ethtool_rings_get_list_free(struct ethtool_rings_get_list *rsp); - -struct ethtool_rings_get_list * -ethtool_rings_get_dump(struct ynl_sock *ys, - struct ethtool_rings_get_req_dump *req); - -/* ETHTOOL_MSG_RINGS_GET - notify */ -struct ethtool_rings_get_ntf { - __u16 family; - __u8 cmd; - struct ynl_ntf_base_type *next; - void (*free)(struct ethtool_rings_get_ntf *ntf); - struct ethtool_rings_get_rsp obj __attribute__((aligned(8))); -}; - -void ethtool_rings_get_ntf_free(struct ethtool_rings_get_ntf *rsp); - -/* ============== ETHTOOL_MSG_RINGS_SET ============== */ -/* ETHTOOL_MSG_RINGS_SET - do */ -struct ethtool_rings_set_req { - struct { - __u32 header:1; - __u32 rx_max:1; - __u32 rx_mini_max:1; - __u32 rx_jumbo_max:1; - __u32 tx_max:1; - __u32 rx:1; - __u32 rx_mini:1; - __u32 rx_jumbo:1; - __u32 tx:1; - __u32 rx_buf_len:1; - __u32 tcp_data_split:1; - __u32 cqe_size:1; - __u32 tx_push:1; - __u32 rx_push:1; - __u32 tx_push_buf_len:1; - __u32 tx_push_buf_len_max:1; - } _present; - - struct ethtool_header header; - __u32 rx_max; - __u32 rx_mini_max; - __u32 rx_jumbo_max; - __u32 tx_max; - __u32 rx; - __u32 rx_mini; - __u32 rx_jumbo; - __u32 tx; - __u32 rx_buf_len; - __u8 tcp_data_split; - __u32 cqe_size; - __u8 tx_push; - __u8 rx_push; - __u32 tx_push_buf_len; - __u32 tx_push_buf_len_max; -}; - -static inline struct ethtool_rings_set_req *ethtool_rings_set_req_alloc(void) -{ - return calloc(1, sizeof(struct ethtool_rings_set_req)); -} -void ethtool_rings_set_req_free(struct ethtool_rings_set_req *req); - -static inline void -ethtool_rings_set_req_set_header_dev_index(struct ethtool_rings_set_req *req, - __u32 dev_index) -{ - req->_present.header = 1; - req->header._present.dev_index = 1; - req->header.dev_index = dev_index; -} -static inline void -ethtool_rings_set_req_set_header_dev_name(struct ethtool_rings_set_req *req, - const char *dev_name) -{ - free(req->header.dev_name); - req->header._present.dev_name_len = strlen(dev_name); - req->header.dev_name = malloc(req->header._present.dev_name_len + 1); - memcpy(req->header.dev_name, dev_name, req->header._present.dev_name_len); - req->header.dev_name[req->header._present.dev_name_len] = 0; -} -static inline void -ethtool_rings_set_req_set_header_flags(struct ethtool_rings_set_req *req, - __u32 flags) -{ - req->_present.header = 1; - req->header._present.flags = 1; - req->header.flags = flags; -} -static inline void -ethtool_rings_set_req_set_rx_max(struct ethtool_rings_set_req *req, - __u32 rx_max) -{ - req->_present.rx_max = 1; - req->rx_max = rx_max; -} -static inline void -ethtool_rings_set_req_set_rx_mini_max(struct ethtool_rings_set_req *req, - __u32 rx_mini_max) -{ - req->_present.rx_mini_max = 1; - req->rx_mini_max = rx_mini_max; -} -static inline void -ethtool_rings_set_req_set_rx_jumbo_max(struct ethtool_rings_set_req *req, - __u32 rx_jumbo_max) -{ - req->_present.rx_jumbo_max = 1; - req->rx_jumbo_max = rx_jumbo_max; -} -static inline void -ethtool_rings_set_req_set_tx_max(struct ethtool_rings_set_req *req, - __u32 tx_max) -{ - req->_present.tx_max = 1; - req->tx_max = tx_max; -} -static inline void -ethtool_rings_set_req_set_rx(struct ethtool_rings_set_req *req, __u32 rx) -{ - req->_present.rx = 1; - req->rx = rx; -} -static inline void -ethtool_rings_set_req_set_rx_mini(struct ethtool_rings_set_req *req, - __u32 rx_mini) -{ - req->_present.rx_mini = 1; - req->rx_mini = rx_mini; -} -static inline void -ethtool_rings_set_req_set_rx_jumbo(struct ethtool_rings_set_req *req, - __u32 rx_jumbo) -{ - req->_present.rx_jumbo = 1; - req->rx_jumbo = rx_jumbo; -} -static inline void -ethtool_rings_set_req_set_tx(struct ethtool_rings_set_req *req, __u32 tx) -{ - req->_present.tx = 1; - req->tx = tx; -} -static inline void -ethtool_rings_set_req_set_rx_buf_len(struct ethtool_rings_set_req *req, - __u32 rx_buf_len) -{ - req->_present.rx_buf_len = 1; - req->rx_buf_len = rx_buf_len; -} -static inline void -ethtool_rings_set_req_set_tcp_data_split(struct ethtool_rings_set_req *req, - __u8 tcp_data_split) -{ - req->_present.tcp_data_split = 1; - req->tcp_data_split = tcp_data_split; -} -static inline void -ethtool_rings_set_req_set_cqe_size(struct ethtool_rings_set_req *req, - __u32 cqe_size) -{ - req->_present.cqe_size = 1; - req->cqe_size = cqe_size; -} -static inline void -ethtool_rings_set_req_set_tx_push(struct ethtool_rings_set_req *req, - __u8 tx_push) -{ - req->_present.tx_push = 1; - req->tx_push = tx_push; -} -static inline void -ethtool_rings_set_req_set_rx_push(struct ethtool_rings_set_req *req, - __u8 rx_push) -{ - req->_present.rx_push = 1; - req->rx_push = rx_push; -} -static inline void -ethtool_rings_set_req_set_tx_push_buf_len(struct ethtool_rings_set_req *req, - __u32 tx_push_buf_len) -{ - req->_present.tx_push_buf_len = 1; - req->tx_push_buf_len = tx_push_buf_len; -} -static inline void -ethtool_rings_set_req_set_tx_push_buf_len_max(struct ethtool_rings_set_req *req, - __u32 tx_push_buf_len_max) -{ - req->_present.tx_push_buf_len_max = 1; - req->tx_push_buf_len_max = tx_push_buf_len_max; -} - -/* - * Set ring params. - */ -int ethtool_rings_set(struct ynl_sock *ys, struct ethtool_rings_set_req *req); - -/* ============== ETHTOOL_MSG_CHANNELS_GET ============== */ -/* ETHTOOL_MSG_CHANNELS_GET - do */ -struct ethtool_channels_get_req { - struct { - __u32 header:1; - } _present; - - struct ethtool_header header; -}; - -static inline struct ethtool_channels_get_req * -ethtool_channels_get_req_alloc(void) -{ - return calloc(1, sizeof(struct ethtool_channels_get_req)); -} -void ethtool_channels_get_req_free(struct ethtool_channels_get_req *req); - -static inline void -ethtool_channels_get_req_set_header_dev_index(struct ethtool_channels_get_req *req, - __u32 dev_index) -{ - req->_present.header = 1; - req->header._present.dev_index = 1; - req->header.dev_index = dev_index; -} -static inline void -ethtool_channels_get_req_set_header_dev_name(struct ethtool_channels_get_req *req, - const char *dev_name) -{ - free(req->header.dev_name); - req->header._present.dev_name_len = strlen(dev_name); - req->header.dev_name = malloc(req->header._present.dev_name_len + 1); - memcpy(req->header.dev_name, dev_name, req->header._present.dev_name_len); - req->header.dev_name[req->header._present.dev_name_len] = 0; -} -static inline void -ethtool_channels_get_req_set_header_flags(struct ethtool_channels_get_req *req, - __u32 flags) -{ - req->_present.header = 1; - req->header._present.flags = 1; - req->header.flags = flags; -} - -struct ethtool_channels_get_rsp { - struct { - __u32 header:1; - __u32 rx_max:1; - __u32 tx_max:1; - __u32 other_max:1; - __u32 combined_max:1; - __u32 rx_count:1; - __u32 tx_count:1; - __u32 other_count:1; - __u32 combined_count:1; - } _present; - - struct ethtool_header header; - __u32 rx_max; - __u32 tx_max; - __u32 other_max; - __u32 combined_max; - __u32 rx_count; - __u32 tx_count; - __u32 other_count; - __u32 combined_count; -}; - -void ethtool_channels_get_rsp_free(struct ethtool_channels_get_rsp *rsp); - -/* - * Get channel params. - */ -struct ethtool_channels_get_rsp * -ethtool_channels_get(struct ynl_sock *ys, struct ethtool_channels_get_req *req); - -/* ETHTOOL_MSG_CHANNELS_GET - dump */ -struct ethtool_channels_get_req_dump { - struct { - __u32 header:1; - } _present; - - struct ethtool_header header; -}; - -static inline struct ethtool_channels_get_req_dump * -ethtool_channels_get_req_dump_alloc(void) -{ - return calloc(1, sizeof(struct ethtool_channels_get_req_dump)); -} -void -ethtool_channels_get_req_dump_free(struct ethtool_channels_get_req_dump *req); - -static inline void -ethtool_channels_get_req_dump_set_header_dev_index(struct ethtool_channels_get_req_dump *req, - __u32 dev_index) -{ - req->_present.header = 1; - req->header._present.dev_index = 1; - req->header.dev_index = dev_index; -} -static inline void -ethtool_channels_get_req_dump_set_header_dev_name(struct ethtool_channels_get_req_dump *req, - const char *dev_name) -{ - free(req->header.dev_name); - req->header._present.dev_name_len = strlen(dev_name); - req->header.dev_name = malloc(req->header._present.dev_name_len + 1); - memcpy(req->header.dev_name, dev_name, req->header._present.dev_name_len); - req->header.dev_name[req->header._present.dev_name_len] = 0; -} -static inline void -ethtool_channels_get_req_dump_set_header_flags(struct ethtool_channels_get_req_dump *req, - __u32 flags) -{ - req->_present.header = 1; - req->header._present.flags = 1; - req->header.flags = flags; -} - -struct ethtool_channels_get_list { - struct ethtool_channels_get_list *next; - struct ethtool_channels_get_rsp obj __attribute__((aligned(8))); -}; - -void ethtool_channels_get_list_free(struct ethtool_channels_get_list *rsp); - -struct ethtool_channels_get_list * -ethtool_channels_get_dump(struct ynl_sock *ys, - struct ethtool_channels_get_req_dump *req); - -/* ETHTOOL_MSG_CHANNELS_GET - notify */ -struct ethtool_channels_get_ntf { - __u16 family; - __u8 cmd; - struct ynl_ntf_base_type *next; - void (*free)(struct ethtool_channels_get_ntf *ntf); - struct ethtool_channels_get_rsp obj __attribute__((aligned(8))); -}; - -void ethtool_channels_get_ntf_free(struct ethtool_channels_get_ntf *rsp); - -/* ============== ETHTOOL_MSG_CHANNELS_SET ============== */ -/* ETHTOOL_MSG_CHANNELS_SET - do */ -struct ethtool_channels_set_req { - struct { - __u32 header:1; - __u32 rx_max:1; - __u32 tx_max:1; - __u32 other_max:1; - __u32 combined_max:1; - __u32 rx_count:1; - __u32 tx_count:1; - __u32 other_count:1; - __u32 combined_count:1; - } _present; - - struct ethtool_header header; - __u32 rx_max; - __u32 tx_max; - __u32 other_max; - __u32 combined_max; - __u32 rx_count; - __u32 tx_count; - __u32 other_count; - __u32 combined_count; -}; - -static inline struct ethtool_channels_set_req * -ethtool_channels_set_req_alloc(void) -{ - return calloc(1, sizeof(struct ethtool_channels_set_req)); -} -void ethtool_channels_set_req_free(struct ethtool_channels_set_req *req); - -static inline void -ethtool_channels_set_req_set_header_dev_index(struct ethtool_channels_set_req *req, - __u32 dev_index) -{ - req->_present.header = 1; - req->header._present.dev_index = 1; - req->header.dev_index = dev_index; -} -static inline void -ethtool_channels_set_req_set_header_dev_name(struct ethtool_channels_set_req *req, - const char *dev_name) -{ - free(req->header.dev_name); - req->header._present.dev_name_len = strlen(dev_name); - req->header.dev_name = malloc(req->header._present.dev_name_len + 1); - memcpy(req->header.dev_name, dev_name, req->header._present.dev_name_len); - req->header.dev_name[req->header._present.dev_name_len] = 0; -} -static inline void -ethtool_channels_set_req_set_header_flags(struct ethtool_channels_set_req *req, - __u32 flags) -{ - req->_present.header = 1; - req->header._present.flags = 1; - req->header.flags = flags; -} -static inline void -ethtool_channels_set_req_set_rx_max(struct ethtool_channels_set_req *req, - __u32 rx_max) -{ - req->_present.rx_max = 1; - req->rx_max = rx_max; -} -static inline void -ethtool_channels_set_req_set_tx_max(struct ethtool_channels_set_req *req, - __u32 tx_max) -{ - req->_present.tx_max = 1; - req->tx_max = tx_max; -} -static inline void -ethtool_channels_set_req_set_other_max(struct ethtool_channels_set_req *req, - __u32 other_max) -{ - req->_present.other_max = 1; - req->other_max = other_max; -} -static inline void -ethtool_channels_set_req_set_combined_max(struct ethtool_channels_set_req *req, - __u32 combined_max) -{ - req->_present.combined_max = 1; - req->combined_max = combined_max; -} -static inline void -ethtool_channels_set_req_set_rx_count(struct ethtool_channels_set_req *req, - __u32 rx_count) -{ - req->_present.rx_count = 1; - req->rx_count = rx_count; -} -static inline void -ethtool_channels_set_req_set_tx_count(struct ethtool_channels_set_req *req, - __u32 tx_count) -{ - req->_present.tx_count = 1; - req->tx_count = tx_count; -} -static inline void -ethtool_channels_set_req_set_other_count(struct ethtool_channels_set_req *req, - __u32 other_count) -{ - req->_present.other_count = 1; - req->other_count = other_count; -} -static inline void -ethtool_channels_set_req_set_combined_count(struct ethtool_channels_set_req *req, - __u32 combined_count) -{ - req->_present.combined_count = 1; - req->combined_count = combined_count; -} - -/* - * Set channel params. - */ -int ethtool_channels_set(struct ynl_sock *ys, - struct ethtool_channels_set_req *req); - -/* ============== ETHTOOL_MSG_COALESCE_GET ============== */ -/* ETHTOOL_MSG_COALESCE_GET - do */ -struct ethtool_coalesce_get_req { - struct { - __u32 header:1; - } _present; - - struct ethtool_header header; -}; - -static inline struct ethtool_coalesce_get_req * -ethtool_coalesce_get_req_alloc(void) -{ - return calloc(1, sizeof(struct ethtool_coalesce_get_req)); -} -void ethtool_coalesce_get_req_free(struct ethtool_coalesce_get_req *req); - -static inline void -ethtool_coalesce_get_req_set_header_dev_index(struct ethtool_coalesce_get_req *req, - __u32 dev_index) -{ - req->_present.header = 1; - req->header._present.dev_index = 1; - req->header.dev_index = dev_index; -} -static inline void -ethtool_coalesce_get_req_set_header_dev_name(struct ethtool_coalesce_get_req *req, - const char *dev_name) -{ - free(req->header.dev_name); - req->header._present.dev_name_len = strlen(dev_name); - req->header.dev_name = malloc(req->header._present.dev_name_len + 1); - memcpy(req->header.dev_name, dev_name, req->header._present.dev_name_len); - req->header.dev_name[req->header._present.dev_name_len] = 0; -} -static inline void -ethtool_coalesce_get_req_set_header_flags(struct ethtool_coalesce_get_req *req, - __u32 flags) -{ - req->_present.header = 1; - req->header._present.flags = 1; - req->header.flags = flags; -} - -struct ethtool_coalesce_get_rsp { - struct { - __u32 header:1; - __u32 rx_usecs:1; - __u32 rx_max_frames:1; - __u32 rx_usecs_irq:1; - __u32 rx_max_frames_irq:1; - __u32 tx_usecs:1; - __u32 tx_max_frames:1; - __u32 tx_usecs_irq:1; - __u32 tx_max_frames_irq:1; - __u32 stats_block_usecs:1; - __u32 use_adaptive_rx:1; - __u32 use_adaptive_tx:1; - __u32 pkt_rate_low:1; - __u32 rx_usecs_low:1; - __u32 rx_max_frames_low:1; - __u32 tx_usecs_low:1; - __u32 tx_max_frames_low:1; - __u32 pkt_rate_high:1; - __u32 rx_usecs_high:1; - __u32 rx_max_frames_high:1; - __u32 tx_usecs_high:1; - __u32 tx_max_frames_high:1; - __u32 rate_sample_interval:1; - __u32 use_cqe_mode_tx:1; - __u32 use_cqe_mode_rx:1; - __u32 tx_aggr_max_bytes:1; - __u32 tx_aggr_max_frames:1; - __u32 tx_aggr_time_usecs:1; - } _present; - - struct ethtool_header header; - __u32 rx_usecs; - __u32 rx_max_frames; - __u32 rx_usecs_irq; - __u32 rx_max_frames_irq; - __u32 tx_usecs; - __u32 tx_max_frames; - __u32 tx_usecs_irq; - __u32 tx_max_frames_irq; - __u32 stats_block_usecs; - __u8 use_adaptive_rx; - __u8 use_adaptive_tx; - __u32 pkt_rate_low; - __u32 rx_usecs_low; - __u32 rx_max_frames_low; - __u32 tx_usecs_low; - __u32 tx_max_frames_low; - __u32 pkt_rate_high; - __u32 rx_usecs_high; - __u32 rx_max_frames_high; - __u32 tx_usecs_high; - __u32 tx_max_frames_high; - __u32 rate_sample_interval; - __u8 use_cqe_mode_tx; - __u8 use_cqe_mode_rx; - __u32 tx_aggr_max_bytes; - __u32 tx_aggr_max_frames; - __u32 tx_aggr_time_usecs; -}; - -void ethtool_coalesce_get_rsp_free(struct ethtool_coalesce_get_rsp *rsp); - -/* - * Get coalesce params. - */ -struct ethtool_coalesce_get_rsp * -ethtool_coalesce_get(struct ynl_sock *ys, struct ethtool_coalesce_get_req *req); - -/* ETHTOOL_MSG_COALESCE_GET - dump */ -struct ethtool_coalesce_get_req_dump { - struct { - __u32 header:1; - } _present; - - struct ethtool_header header; -}; - -static inline struct ethtool_coalesce_get_req_dump * -ethtool_coalesce_get_req_dump_alloc(void) -{ - return calloc(1, sizeof(struct ethtool_coalesce_get_req_dump)); -} -void -ethtool_coalesce_get_req_dump_free(struct ethtool_coalesce_get_req_dump *req); - -static inline void -ethtool_coalesce_get_req_dump_set_header_dev_index(struct ethtool_coalesce_get_req_dump *req, - __u32 dev_index) -{ - req->_present.header = 1; - req->header._present.dev_index = 1; - req->header.dev_index = dev_index; -} -static inline void -ethtool_coalesce_get_req_dump_set_header_dev_name(struct ethtool_coalesce_get_req_dump *req, - const char *dev_name) -{ - free(req->header.dev_name); - req->header._present.dev_name_len = strlen(dev_name); - req->header.dev_name = malloc(req->header._present.dev_name_len + 1); - memcpy(req->header.dev_name, dev_name, req->header._present.dev_name_len); - req->header.dev_name[req->header._present.dev_name_len] = 0; -} -static inline void -ethtool_coalesce_get_req_dump_set_header_flags(struct ethtool_coalesce_get_req_dump *req, - __u32 flags) -{ - req->_present.header = 1; - req->header._present.flags = 1; - req->header.flags = flags; -} - -struct ethtool_coalesce_get_list { - struct ethtool_coalesce_get_list *next; - struct ethtool_coalesce_get_rsp obj __attribute__((aligned(8))); -}; - -void ethtool_coalesce_get_list_free(struct ethtool_coalesce_get_list *rsp); - -struct ethtool_coalesce_get_list * -ethtool_coalesce_get_dump(struct ynl_sock *ys, - struct ethtool_coalesce_get_req_dump *req); - -/* ETHTOOL_MSG_COALESCE_GET - notify */ -struct ethtool_coalesce_get_ntf { - __u16 family; - __u8 cmd; - struct ynl_ntf_base_type *next; - void (*free)(struct ethtool_coalesce_get_ntf *ntf); - struct ethtool_coalesce_get_rsp obj __attribute__((aligned(8))); -}; - -void ethtool_coalesce_get_ntf_free(struct ethtool_coalesce_get_ntf *rsp); - -/* ============== ETHTOOL_MSG_COALESCE_SET ============== */ -/* ETHTOOL_MSG_COALESCE_SET - do */ -struct ethtool_coalesce_set_req { - struct { - __u32 header:1; - __u32 rx_usecs:1; - __u32 rx_max_frames:1; - __u32 rx_usecs_irq:1; - __u32 rx_max_frames_irq:1; - __u32 tx_usecs:1; - __u32 tx_max_frames:1; - __u32 tx_usecs_irq:1; - __u32 tx_max_frames_irq:1; - __u32 stats_block_usecs:1; - __u32 use_adaptive_rx:1; - __u32 use_adaptive_tx:1; - __u32 pkt_rate_low:1; - __u32 rx_usecs_low:1; - __u32 rx_max_frames_low:1; - __u32 tx_usecs_low:1; - __u32 tx_max_frames_low:1; - __u32 pkt_rate_high:1; - __u32 rx_usecs_high:1; - __u32 rx_max_frames_high:1; - __u32 tx_usecs_high:1; - __u32 tx_max_frames_high:1; - __u32 rate_sample_interval:1; - __u32 use_cqe_mode_tx:1; - __u32 use_cqe_mode_rx:1; - __u32 tx_aggr_max_bytes:1; - __u32 tx_aggr_max_frames:1; - __u32 tx_aggr_time_usecs:1; - } _present; - - struct ethtool_header header; - __u32 rx_usecs; - __u32 rx_max_frames; - __u32 rx_usecs_irq; - __u32 rx_max_frames_irq; - __u32 tx_usecs; - __u32 tx_max_frames; - __u32 tx_usecs_irq; - __u32 tx_max_frames_irq; - __u32 stats_block_usecs; - __u8 use_adaptive_rx; - __u8 use_adaptive_tx; - __u32 pkt_rate_low; - __u32 rx_usecs_low; - __u32 rx_max_frames_low; - __u32 tx_usecs_low; - __u32 tx_max_frames_low; - __u32 pkt_rate_high; - __u32 rx_usecs_high; - __u32 rx_max_frames_high; - __u32 tx_usecs_high; - __u32 tx_max_frames_high; - __u32 rate_sample_interval; - __u8 use_cqe_mode_tx; - __u8 use_cqe_mode_rx; - __u32 tx_aggr_max_bytes; - __u32 tx_aggr_max_frames; - __u32 tx_aggr_time_usecs; -}; - -static inline struct ethtool_coalesce_set_req * -ethtool_coalesce_set_req_alloc(void) -{ - return calloc(1, sizeof(struct ethtool_coalesce_set_req)); -} -void ethtool_coalesce_set_req_free(struct ethtool_coalesce_set_req *req); - -static inline void -ethtool_coalesce_set_req_set_header_dev_index(struct ethtool_coalesce_set_req *req, - __u32 dev_index) -{ - req->_present.header = 1; - req->header._present.dev_index = 1; - req->header.dev_index = dev_index; -} -static inline void -ethtool_coalesce_set_req_set_header_dev_name(struct ethtool_coalesce_set_req *req, - const char *dev_name) -{ - free(req->header.dev_name); - req->header._present.dev_name_len = strlen(dev_name); - req->header.dev_name = malloc(req->header._present.dev_name_len + 1); - memcpy(req->header.dev_name, dev_name, req->header._present.dev_name_len); - req->header.dev_name[req->header._present.dev_name_len] = 0; -} -static inline void -ethtool_coalesce_set_req_set_header_flags(struct ethtool_coalesce_set_req *req, - __u32 flags) -{ - req->_present.header = 1; - req->header._present.flags = 1; - req->header.flags = flags; -} -static inline void -ethtool_coalesce_set_req_set_rx_usecs(struct ethtool_coalesce_set_req *req, - __u32 rx_usecs) -{ - req->_present.rx_usecs = 1; - req->rx_usecs = rx_usecs; -} -static inline void -ethtool_coalesce_set_req_set_rx_max_frames(struct ethtool_coalesce_set_req *req, - __u32 rx_max_frames) -{ - req->_present.rx_max_frames = 1; - req->rx_max_frames = rx_max_frames; -} -static inline void -ethtool_coalesce_set_req_set_rx_usecs_irq(struct ethtool_coalesce_set_req *req, - __u32 rx_usecs_irq) -{ - req->_present.rx_usecs_irq = 1; - req->rx_usecs_irq = rx_usecs_irq; -} -static inline void -ethtool_coalesce_set_req_set_rx_max_frames_irq(struct ethtool_coalesce_set_req *req, - __u32 rx_max_frames_irq) -{ - req->_present.rx_max_frames_irq = 1; - req->rx_max_frames_irq = rx_max_frames_irq; -} -static inline void -ethtool_coalesce_set_req_set_tx_usecs(struct ethtool_coalesce_set_req *req, - __u32 tx_usecs) -{ - req->_present.tx_usecs = 1; - req->tx_usecs = tx_usecs; -} -static inline void -ethtool_coalesce_set_req_set_tx_max_frames(struct ethtool_coalesce_set_req *req, - __u32 tx_max_frames) -{ - req->_present.tx_max_frames = 1; - req->tx_max_frames = tx_max_frames; -} -static inline void -ethtool_coalesce_set_req_set_tx_usecs_irq(struct ethtool_coalesce_set_req *req, - __u32 tx_usecs_irq) -{ - req->_present.tx_usecs_irq = 1; - req->tx_usecs_irq = tx_usecs_irq; -} -static inline void -ethtool_coalesce_set_req_set_tx_max_frames_irq(struct ethtool_coalesce_set_req *req, - __u32 tx_max_frames_irq) -{ - req->_present.tx_max_frames_irq = 1; - req->tx_max_frames_irq = tx_max_frames_irq; -} -static inline void -ethtool_coalesce_set_req_set_stats_block_usecs(struct ethtool_coalesce_set_req *req, - __u32 stats_block_usecs) -{ - req->_present.stats_block_usecs = 1; - req->stats_block_usecs = stats_block_usecs; -} -static inline void -ethtool_coalesce_set_req_set_use_adaptive_rx(struct ethtool_coalesce_set_req *req, - __u8 use_adaptive_rx) -{ - req->_present.use_adaptive_rx = 1; - req->use_adaptive_rx = use_adaptive_rx; -} -static inline void -ethtool_coalesce_set_req_set_use_adaptive_tx(struct ethtool_coalesce_set_req *req, - __u8 use_adaptive_tx) -{ - req->_present.use_adaptive_tx = 1; - req->use_adaptive_tx = use_adaptive_tx; -} -static inline void -ethtool_coalesce_set_req_set_pkt_rate_low(struct ethtool_coalesce_set_req *req, - __u32 pkt_rate_low) -{ - req->_present.pkt_rate_low = 1; - req->pkt_rate_low = pkt_rate_low; -} -static inline void -ethtool_coalesce_set_req_set_rx_usecs_low(struct ethtool_coalesce_set_req *req, - __u32 rx_usecs_low) -{ - req->_present.rx_usecs_low = 1; - req->rx_usecs_low = rx_usecs_low; -} -static inline void -ethtool_coalesce_set_req_set_rx_max_frames_low(struct ethtool_coalesce_set_req *req, - __u32 rx_max_frames_low) -{ - req->_present.rx_max_frames_low = 1; - req->rx_max_frames_low = rx_max_frames_low; -} -static inline void -ethtool_coalesce_set_req_set_tx_usecs_low(struct ethtool_coalesce_set_req *req, - __u32 tx_usecs_low) -{ - req->_present.tx_usecs_low = 1; - req->tx_usecs_low = tx_usecs_low; -} -static inline void -ethtool_coalesce_set_req_set_tx_max_frames_low(struct ethtool_coalesce_set_req *req, - __u32 tx_max_frames_low) -{ - req->_present.tx_max_frames_low = 1; - req->tx_max_frames_low = tx_max_frames_low; -} -static inline void -ethtool_coalesce_set_req_set_pkt_rate_high(struct ethtool_coalesce_set_req *req, - __u32 pkt_rate_high) -{ - req->_present.pkt_rate_high = 1; - req->pkt_rate_high = pkt_rate_high; -} -static inline void -ethtool_coalesce_set_req_set_rx_usecs_high(struct ethtool_coalesce_set_req *req, - __u32 rx_usecs_high) -{ - req->_present.rx_usecs_high = 1; - req->rx_usecs_high = rx_usecs_high; -} -static inline void -ethtool_coalesce_set_req_set_rx_max_frames_high(struct ethtool_coalesce_set_req *req, - __u32 rx_max_frames_high) -{ - req->_present.rx_max_frames_high = 1; - req->rx_max_frames_high = rx_max_frames_high; -} -static inline void -ethtool_coalesce_set_req_set_tx_usecs_high(struct ethtool_coalesce_set_req *req, - __u32 tx_usecs_high) -{ - req->_present.tx_usecs_high = 1; - req->tx_usecs_high = tx_usecs_high; -} -static inline void -ethtool_coalesce_set_req_set_tx_max_frames_high(struct ethtool_coalesce_set_req *req, - __u32 tx_max_frames_high) -{ - req->_present.tx_max_frames_high = 1; - req->tx_max_frames_high = tx_max_frames_high; -} -static inline void -ethtool_coalesce_set_req_set_rate_sample_interval(struct ethtool_coalesce_set_req *req, - __u32 rate_sample_interval) -{ - req->_present.rate_sample_interval = 1; - req->rate_sample_interval = rate_sample_interval; -} -static inline void -ethtool_coalesce_set_req_set_use_cqe_mode_tx(struct ethtool_coalesce_set_req *req, - __u8 use_cqe_mode_tx) -{ - req->_present.use_cqe_mode_tx = 1; - req->use_cqe_mode_tx = use_cqe_mode_tx; -} -static inline void -ethtool_coalesce_set_req_set_use_cqe_mode_rx(struct ethtool_coalesce_set_req *req, - __u8 use_cqe_mode_rx) -{ - req->_present.use_cqe_mode_rx = 1; - req->use_cqe_mode_rx = use_cqe_mode_rx; -} -static inline void -ethtool_coalesce_set_req_set_tx_aggr_max_bytes(struct ethtool_coalesce_set_req *req, - __u32 tx_aggr_max_bytes) -{ - req->_present.tx_aggr_max_bytes = 1; - req->tx_aggr_max_bytes = tx_aggr_max_bytes; -} -static inline void -ethtool_coalesce_set_req_set_tx_aggr_max_frames(struct ethtool_coalesce_set_req *req, - __u32 tx_aggr_max_frames) -{ - req->_present.tx_aggr_max_frames = 1; - req->tx_aggr_max_frames = tx_aggr_max_frames; -} -static inline void -ethtool_coalesce_set_req_set_tx_aggr_time_usecs(struct ethtool_coalesce_set_req *req, - __u32 tx_aggr_time_usecs) -{ - req->_present.tx_aggr_time_usecs = 1; - req->tx_aggr_time_usecs = tx_aggr_time_usecs; -} - -/* - * Set coalesce params. - */ -int ethtool_coalesce_set(struct ynl_sock *ys, - struct ethtool_coalesce_set_req *req); - -/* ============== ETHTOOL_MSG_PAUSE_GET ============== */ -/* ETHTOOL_MSG_PAUSE_GET - do */ -struct ethtool_pause_get_req { - struct { - __u32 header:1; - } _present; - - struct ethtool_header header; -}; - -static inline struct ethtool_pause_get_req *ethtool_pause_get_req_alloc(void) -{ - return calloc(1, sizeof(struct ethtool_pause_get_req)); -} -void ethtool_pause_get_req_free(struct ethtool_pause_get_req *req); - -static inline void -ethtool_pause_get_req_set_header_dev_index(struct ethtool_pause_get_req *req, - __u32 dev_index) -{ - req->_present.header = 1; - req->header._present.dev_index = 1; - req->header.dev_index = dev_index; -} -static inline void -ethtool_pause_get_req_set_header_dev_name(struct ethtool_pause_get_req *req, - const char *dev_name) -{ - free(req->header.dev_name); - req->header._present.dev_name_len = strlen(dev_name); - req->header.dev_name = malloc(req->header._present.dev_name_len + 1); - memcpy(req->header.dev_name, dev_name, req->header._present.dev_name_len); - req->header.dev_name[req->header._present.dev_name_len] = 0; -} -static inline void -ethtool_pause_get_req_set_header_flags(struct ethtool_pause_get_req *req, - __u32 flags) -{ - req->_present.header = 1; - req->header._present.flags = 1; - req->header.flags = flags; -} - -struct ethtool_pause_get_rsp { - struct { - __u32 header:1; - __u32 autoneg:1; - __u32 rx:1; - __u32 tx:1; - __u32 stats:1; - __u32 stats_src:1; - } _present; - - struct ethtool_header header; - __u8 autoneg; - __u8 rx; - __u8 tx; - struct ethtool_pause_stat stats; - __u32 stats_src; -}; - -void ethtool_pause_get_rsp_free(struct ethtool_pause_get_rsp *rsp); - -/* - * Get pause params. - */ -struct ethtool_pause_get_rsp * -ethtool_pause_get(struct ynl_sock *ys, struct ethtool_pause_get_req *req); - -/* ETHTOOL_MSG_PAUSE_GET - dump */ -struct ethtool_pause_get_req_dump { - struct { - __u32 header:1; - } _present; - - struct ethtool_header header; -}; - -static inline struct ethtool_pause_get_req_dump * -ethtool_pause_get_req_dump_alloc(void) -{ - return calloc(1, sizeof(struct ethtool_pause_get_req_dump)); -} -void ethtool_pause_get_req_dump_free(struct ethtool_pause_get_req_dump *req); - -static inline void -ethtool_pause_get_req_dump_set_header_dev_index(struct ethtool_pause_get_req_dump *req, - __u32 dev_index) -{ - req->_present.header = 1; - req->header._present.dev_index = 1; - req->header.dev_index = dev_index; -} -static inline void -ethtool_pause_get_req_dump_set_header_dev_name(struct ethtool_pause_get_req_dump *req, - const char *dev_name) -{ - free(req->header.dev_name); - req->header._present.dev_name_len = strlen(dev_name); - req->header.dev_name = malloc(req->header._present.dev_name_len + 1); - memcpy(req->header.dev_name, dev_name, req->header._present.dev_name_len); - req->header.dev_name[req->header._present.dev_name_len] = 0; -} -static inline void -ethtool_pause_get_req_dump_set_header_flags(struct ethtool_pause_get_req_dump *req, - __u32 flags) -{ - req->_present.header = 1; - req->header._present.flags = 1; - req->header.flags = flags; -} - -struct ethtool_pause_get_list { - struct ethtool_pause_get_list *next; - struct ethtool_pause_get_rsp obj __attribute__((aligned(8))); -}; - -void ethtool_pause_get_list_free(struct ethtool_pause_get_list *rsp); - -struct ethtool_pause_get_list * -ethtool_pause_get_dump(struct ynl_sock *ys, - struct ethtool_pause_get_req_dump *req); - -/* ETHTOOL_MSG_PAUSE_GET - notify */ -struct ethtool_pause_get_ntf { - __u16 family; - __u8 cmd; - struct ynl_ntf_base_type *next; - void (*free)(struct ethtool_pause_get_ntf *ntf); - struct ethtool_pause_get_rsp obj __attribute__((aligned(8))); -}; - -void ethtool_pause_get_ntf_free(struct ethtool_pause_get_ntf *rsp); - -/* ============== ETHTOOL_MSG_PAUSE_SET ============== */ -/* ETHTOOL_MSG_PAUSE_SET - do */ -struct ethtool_pause_set_req { - struct { - __u32 header:1; - __u32 autoneg:1; - __u32 rx:1; - __u32 tx:1; - __u32 stats:1; - __u32 stats_src:1; - } _present; - - struct ethtool_header header; - __u8 autoneg; - __u8 rx; - __u8 tx; - struct ethtool_pause_stat stats; - __u32 stats_src; -}; - -static inline struct ethtool_pause_set_req *ethtool_pause_set_req_alloc(void) -{ - return calloc(1, sizeof(struct ethtool_pause_set_req)); -} -void ethtool_pause_set_req_free(struct ethtool_pause_set_req *req); - -static inline void -ethtool_pause_set_req_set_header_dev_index(struct ethtool_pause_set_req *req, - __u32 dev_index) -{ - req->_present.header = 1; - req->header._present.dev_index = 1; - req->header.dev_index = dev_index; -} -static inline void -ethtool_pause_set_req_set_header_dev_name(struct ethtool_pause_set_req *req, - const char *dev_name) -{ - free(req->header.dev_name); - req->header._present.dev_name_len = strlen(dev_name); - req->header.dev_name = malloc(req->header._present.dev_name_len + 1); - memcpy(req->header.dev_name, dev_name, req->header._present.dev_name_len); - req->header.dev_name[req->header._present.dev_name_len] = 0; -} -static inline void -ethtool_pause_set_req_set_header_flags(struct ethtool_pause_set_req *req, - __u32 flags) -{ - req->_present.header = 1; - req->header._present.flags = 1; - req->header.flags = flags; -} -static inline void -ethtool_pause_set_req_set_autoneg(struct ethtool_pause_set_req *req, - __u8 autoneg) -{ - req->_present.autoneg = 1; - req->autoneg = autoneg; -} -static inline void -ethtool_pause_set_req_set_rx(struct ethtool_pause_set_req *req, __u8 rx) -{ - req->_present.rx = 1; - req->rx = rx; -} -static inline void -ethtool_pause_set_req_set_tx(struct ethtool_pause_set_req *req, __u8 tx) -{ - req->_present.tx = 1; - req->tx = tx; -} -static inline void -ethtool_pause_set_req_set_stats_tx_frames(struct ethtool_pause_set_req *req, - __u64 tx_frames) -{ - req->_present.stats = 1; - req->stats._present.tx_frames = 1; - req->stats.tx_frames = tx_frames; -} -static inline void -ethtool_pause_set_req_set_stats_rx_frames(struct ethtool_pause_set_req *req, - __u64 rx_frames) -{ - req->_present.stats = 1; - req->stats._present.rx_frames = 1; - req->stats.rx_frames = rx_frames; -} -static inline void -ethtool_pause_set_req_set_stats_src(struct ethtool_pause_set_req *req, - __u32 stats_src) -{ - req->_present.stats_src = 1; - req->stats_src = stats_src; -} - -/* - * Set pause params. - */ -int ethtool_pause_set(struct ynl_sock *ys, struct ethtool_pause_set_req *req); - -/* ============== ETHTOOL_MSG_EEE_GET ============== */ -/* ETHTOOL_MSG_EEE_GET - do */ -struct ethtool_eee_get_req { - struct { - __u32 header:1; - } _present; - - struct ethtool_header header; -}; - -static inline struct ethtool_eee_get_req *ethtool_eee_get_req_alloc(void) -{ - return calloc(1, sizeof(struct ethtool_eee_get_req)); -} -void ethtool_eee_get_req_free(struct ethtool_eee_get_req *req); - -static inline void -ethtool_eee_get_req_set_header_dev_index(struct ethtool_eee_get_req *req, - __u32 dev_index) -{ - req->_present.header = 1; - req->header._present.dev_index = 1; - req->header.dev_index = dev_index; -} -static inline void -ethtool_eee_get_req_set_header_dev_name(struct ethtool_eee_get_req *req, - const char *dev_name) -{ - free(req->header.dev_name); - req->header._present.dev_name_len = strlen(dev_name); - req->header.dev_name = malloc(req->header._present.dev_name_len + 1); - memcpy(req->header.dev_name, dev_name, req->header._present.dev_name_len); - req->header.dev_name[req->header._present.dev_name_len] = 0; -} -static inline void -ethtool_eee_get_req_set_header_flags(struct ethtool_eee_get_req *req, - __u32 flags) -{ - req->_present.header = 1; - req->header._present.flags = 1; - req->header.flags = flags; -} - -struct ethtool_eee_get_rsp { - struct { - __u32 header:1; - __u32 modes_ours:1; - __u32 modes_peer:1; - __u32 active:1; - __u32 enabled:1; - __u32 tx_lpi_enabled:1; - __u32 tx_lpi_timer:1; - } _present; - - struct ethtool_header header; - struct ethtool_bitset modes_ours; - struct ethtool_bitset modes_peer; - __u8 active; - __u8 enabled; - __u8 tx_lpi_enabled; - __u32 tx_lpi_timer; -}; - -void ethtool_eee_get_rsp_free(struct ethtool_eee_get_rsp *rsp); - -/* - * Get eee params. - */ -struct ethtool_eee_get_rsp * -ethtool_eee_get(struct ynl_sock *ys, struct ethtool_eee_get_req *req); - -/* ETHTOOL_MSG_EEE_GET - dump */ -struct ethtool_eee_get_req_dump { - struct { - __u32 header:1; - } _present; - - struct ethtool_header header; -}; - -static inline struct ethtool_eee_get_req_dump * -ethtool_eee_get_req_dump_alloc(void) -{ - return calloc(1, sizeof(struct ethtool_eee_get_req_dump)); -} -void ethtool_eee_get_req_dump_free(struct ethtool_eee_get_req_dump *req); - -static inline void -ethtool_eee_get_req_dump_set_header_dev_index(struct ethtool_eee_get_req_dump *req, - __u32 dev_index) -{ - req->_present.header = 1; - req->header._present.dev_index = 1; - req->header.dev_index = dev_index; -} -static inline void -ethtool_eee_get_req_dump_set_header_dev_name(struct ethtool_eee_get_req_dump *req, - const char *dev_name) -{ - free(req->header.dev_name); - req->header._present.dev_name_len = strlen(dev_name); - req->header.dev_name = malloc(req->header._present.dev_name_len + 1); - memcpy(req->header.dev_name, dev_name, req->header._present.dev_name_len); - req->header.dev_name[req->header._present.dev_name_len] = 0; -} -static inline void -ethtool_eee_get_req_dump_set_header_flags(struct ethtool_eee_get_req_dump *req, - __u32 flags) -{ - req->_present.header = 1; - req->header._present.flags = 1; - req->header.flags = flags; -} - -struct ethtool_eee_get_list { - struct ethtool_eee_get_list *next; - struct ethtool_eee_get_rsp obj __attribute__((aligned(8))); -}; - -void ethtool_eee_get_list_free(struct ethtool_eee_get_list *rsp); - -struct ethtool_eee_get_list * -ethtool_eee_get_dump(struct ynl_sock *ys, struct ethtool_eee_get_req_dump *req); - -/* ETHTOOL_MSG_EEE_GET - notify */ -struct ethtool_eee_get_ntf { - __u16 family; - __u8 cmd; - struct ynl_ntf_base_type *next; - void (*free)(struct ethtool_eee_get_ntf *ntf); - struct ethtool_eee_get_rsp obj __attribute__((aligned(8))); -}; - -void ethtool_eee_get_ntf_free(struct ethtool_eee_get_ntf *rsp); - -/* ============== ETHTOOL_MSG_EEE_SET ============== */ -/* ETHTOOL_MSG_EEE_SET - do */ -struct ethtool_eee_set_req { - struct { - __u32 header:1; - __u32 modes_ours:1; - __u32 modes_peer:1; - __u32 active:1; - __u32 enabled:1; - __u32 tx_lpi_enabled:1; - __u32 tx_lpi_timer:1; - } _present; - - struct ethtool_header header; - struct ethtool_bitset modes_ours; - struct ethtool_bitset modes_peer; - __u8 active; - __u8 enabled; - __u8 tx_lpi_enabled; - __u32 tx_lpi_timer; -}; - -static inline struct ethtool_eee_set_req *ethtool_eee_set_req_alloc(void) -{ - return calloc(1, sizeof(struct ethtool_eee_set_req)); -} -void ethtool_eee_set_req_free(struct ethtool_eee_set_req *req); - -static inline void -ethtool_eee_set_req_set_header_dev_index(struct ethtool_eee_set_req *req, - __u32 dev_index) -{ - req->_present.header = 1; - req->header._present.dev_index = 1; - req->header.dev_index = dev_index; -} -static inline void -ethtool_eee_set_req_set_header_dev_name(struct ethtool_eee_set_req *req, - const char *dev_name) -{ - free(req->header.dev_name); - req->header._present.dev_name_len = strlen(dev_name); - req->header.dev_name = malloc(req->header._present.dev_name_len + 1); - memcpy(req->header.dev_name, dev_name, req->header._present.dev_name_len); - req->header.dev_name[req->header._present.dev_name_len] = 0; -} -static inline void -ethtool_eee_set_req_set_header_flags(struct ethtool_eee_set_req *req, - __u32 flags) -{ - req->_present.header = 1; - req->header._present.flags = 1; - req->header.flags = flags; -} -static inline void -ethtool_eee_set_req_set_modes_ours_nomask(struct ethtool_eee_set_req *req) -{ - req->_present.modes_ours = 1; - req->modes_ours._present.nomask = 1; -} -static inline void -ethtool_eee_set_req_set_modes_ours_size(struct ethtool_eee_set_req *req, - __u32 size) -{ - req->_present.modes_ours = 1; - req->modes_ours._present.size = 1; - req->modes_ours.size = size; -} -static inline void -__ethtool_eee_set_req_set_modes_ours_bits_bit(struct ethtool_eee_set_req *req, - struct ethtool_bitset_bit *bit, - unsigned int n_bit) -{ - free(req->modes_ours.bits.bit); - req->modes_ours.bits.bit = bit; - req->modes_ours.bits.n_bit = n_bit; -} -static inline void -ethtool_eee_set_req_set_modes_peer_nomask(struct ethtool_eee_set_req *req) -{ - req->_present.modes_peer = 1; - req->modes_peer._present.nomask = 1; -} -static inline void -ethtool_eee_set_req_set_modes_peer_size(struct ethtool_eee_set_req *req, - __u32 size) -{ - req->_present.modes_peer = 1; - req->modes_peer._present.size = 1; - req->modes_peer.size = size; -} -static inline void -__ethtool_eee_set_req_set_modes_peer_bits_bit(struct ethtool_eee_set_req *req, - struct ethtool_bitset_bit *bit, - unsigned int n_bit) -{ - free(req->modes_peer.bits.bit); - req->modes_peer.bits.bit = bit; - req->modes_peer.bits.n_bit = n_bit; -} -static inline void -ethtool_eee_set_req_set_active(struct ethtool_eee_set_req *req, __u8 active) -{ - req->_present.active = 1; - req->active = active; -} -static inline void -ethtool_eee_set_req_set_enabled(struct ethtool_eee_set_req *req, __u8 enabled) -{ - req->_present.enabled = 1; - req->enabled = enabled; -} -static inline void -ethtool_eee_set_req_set_tx_lpi_enabled(struct ethtool_eee_set_req *req, - __u8 tx_lpi_enabled) -{ - req->_present.tx_lpi_enabled = 1; - req->tx_lpi_enabled = tx_lpi_enabled; -} -static inline void -ethtool_eee_set_req_set_tx_lpi_timer(struct ethtool_eee_set_req *req, - __u32 tx_lpi_timer) -{ - req->_present.tx_lpi_timer = 1; - req->tx_lpi_timer = tx_lpi_timer; -} - -/* - * Set eee params. - */ -int ethtool_eee_set(struct ynl_sock *ys, struct ethtool_eee_set_req *req); - -/* ============== ETHTOOL_MSG_TSINFO_GET ============== */ -/* ETHTOOL_MSG_TSINFO_GET - do */ -struct ethtool_tsinfo_get_req { - struct { - __u32 header:1; - } _present; - - struct ethtool_header header; -}; - -static inline struct ethtool_tsinfo_get_req *ethtool_tsinfo_get_req_alloc(void) -{ - return calloc(1, sizeof(struct ethtool_tsinfo_get_req)); -} -void ethtool_tsinfo_get_req_free(struct ethtool_tsinfo_get_req *req); - -static inline void -ethtool_tsinfo_get_req_set_header_dev_index(struct ethtool_tsinfo_get_req *req, - __u32 dev_index) -{ - req->_present.header = 1; - req->header._present.dev_index = 1; - req->header.dev_index = dev_index; -} -static inline void -ethtool_tsinfo_get_req_set_header_dev_name(struct ethtool_tsinfo_get_req *req, - const char *dev_name) -{ - free(req->header.dev_name); - req->header._present.dev_name_len = strlen(dev_name); - req->header.dev_name = malloc(req->header._present.dev_name_len + 1); - memcpy(req->header.dev_name, dev_name, req->header._present.dev_name_len); - req->header.dev_name[req->header._present.dev_name_len] = 0; -} -static inline void -ethtool_tsinfo_get_req_set_header_flags(struct ethtool_tsinfo_get_req *req, - __u32 flags) -{ - req->_present.header = 1; - req->header._present.flags = 1; - req->header.flags = flags; -} - -struct ethtool_tsinfo_get_rsp { - struct { - __u32 header:1; - __u32 timestamping:1; - __u32 tx_types:1; - __u32 rx_filters:1; - __u32 phc_index:1; - } _present; - - struct ethtool_header header; - struct ethtool_bitset timestamping; - struct ethtool_bitset tx_types; - struct ethtool_bitset rx_filters; - __u32 phc_index; -}; - -void ethtool_tsinfo_get_rsp_free(struct ethtool_tsinfo_get_rsp *rsp); - -/* - * Get tsinfo params. - */ -struct ethtool_tsinfo_get_rsp * -ethtool_tsinfo_get(struct ynl_sock *ys, struct ethtool_tsinfo_get_req *req); - -/* ETHTOOL_MSG_TSINFO_GET - dump */ -struct ethtool_tsinfo_get_req_dump { - struct { - __u32 header:1; - } _present; - - struct ethtool_header header; -}; - -static inline struct ethtool_tsinfo_get_req_dump * -ethtool_tsinfo_get_req_dump_alloc(void) -{ - return calloc(1, sizeof(struct ethtool_tsinfo_get_req_dump)); -} -void ethtool_tsinfo_get_req_dump_free(struct ethtool_tsinfo_get_req_dump *req); - -static inline void -ethtool_tsinfo_get_req_dump_set_header_dev_index(struct ethtool_tsinfo_get_req_dump *req, - __u32 dev_index) -{ - req->_present.header = 1; - req->header._present.dev_index = 1; - req->header.dev_index = dev_index; -} -static inline void -ethtool_tsinfo_get_req_dump_set_header_dev_name(struct ethtool_tsinfo_get_req_dump *req, - const char *dev_name) -{ - free(req->header.dev_name); - req->header._present.dev_name_len = strlen(dev_name); - req->header.dev_name = malloc(req->header._present.dev_name_len + 1); - memcpy(req->header.dev_name, dev_name, req->header._present.dev_name_len); - req->header.dev_name[req->header._present.dev_name_len] = 0; -} -static inline void -ethtool_tsinfo_get_req_dump_set_header_flags(struct ethtool_tsinfo_get_req_dump *req, - __u32 flags) -{ - req->_present.header = 1; - req->header._present.flags = 1; - req->header.flags = flags; -} - -struct ethtool_tsinfo_get_list { - struct ethtool_tsinfo_get_list *next; - struct ethtool_tsinfo_get_rsp obj __attribute__((aligned(8))); -}; - -void ethtool_tsinfo_get_list_free(struct ethtool_tsinfo_get_list *rsp); - -struct ethtool_tsinfo_get_list * -ethtool_tsinfo_get_dump(struct ynl_sock *ys, - struct ethtool_tsinfo_get_req_dump *req); - -/* ============== ETHTOOL_MSG_CABLE_TEST_ACT ============== */ -/* ETHTOOL_MSG_CABLE_TEST_ACT - do */ -struct ethtool_cable_test_act_req { - struct { - __u32 header:1; - } _present; - - struct ethtool_header header; -}; - -static inline struct ethtool_cable_test_act_req * -ethtool_cable_test_act_req_alloc(void) -{ - return calloc(1, sizeof(struct ethtool_cable_test_act_req)); -} -void ethtool_cable_test_act_req_free(struct ethtool_cable_test_act_req *req); - -static inline void -ethtool_cable_test_act_req_set_header_dev_index(struct ethtool_cable_test_act_req *req, - __u32 dev_index) -{ - req->_present.header = 1; - req->header._present.dev_index = 1; - req->header.dev_index = dev_index; -} -static inline void -ethtool_cable_test_act_req_set_header_dev_name(struct ethtool_cable_test_act_req *req, - const char *dev_name) -{ - free(req->header.dev_name); - req->header._present.dev_name_len = strlen(dev_name); - req->header.dev_name = malloc(req->header._present.dev_name_len + 1); - memcpy(req->header.dev_name, dev_name, req->header._present.dev_name_len); - req->header.dev_name[req->header._present.dev_name_len] = 0; -} -static inline void -ethtool_cable_test_act_req_set_header_flags(struct ethtool_cable_test_act_req *req, - __u32 flags) -{ - req->_present.header = 1; - req->header._present.flags = 1; - req->header.flags = flags; -} - -/* - * Cable test. - */ -int ethtool_cable_test_act(struct ynl_sock *ys, - struct ethtool_cable_test_act_req *req); - -/* ============== ETHTOOL_MSG_CABLE_TEST_TDR_ACT ============== */ -/* ETHTOOL_MSG_CABLE_TEST_TDR_ACT - do */ -struct ethtool_cable_test_tdr_act_req { - struct { - __u32 header:1; - } _present; - - struct ethtool_header header; -}; - -static inline struct ethtool_cable_test_tdr_act_req * -ethtool_cable_test_tdr_act_req_alloc(void) -{ - return calloc(1, sizeof(struct ethtool_cable_test_tdr_act_req)); -} -void -ethtool_cable_test_tdr_act_req_free(struct ethtool_cable_test_tdr_act_req *req); - -static inline void -ethtool_cable_test_tdr_act_req_set_header_dev_index(struct ethtool_cable_test_tdr_act_req *req, - __u32 dev_index) -{ - req->_present.header = 1; - req->header._present.dev_index = 1; - req->header.dev_index = dev_index; -} -static inline void -ethtool_cable_test_tdr_act_req_set_header_dev_name(struct ethtool_cable_test_tdr_act_req *req, - const char *dev_name) -{ - free(req->header.dev_name); - req->header._present.dev_name_len = strlen(dev_name); - req->header.dev_name = malloc(req->header._present.dev_name_len + 1); - memcpy(req->header.dev_name, dev_name, req->header._present.dev_name_len); - req->header.dev_name[req->header._present.dev_name_len] = 0; -} -static inline void -ethtool_cable_test_tdr_act_req_set_header_flags(struct ethtool_cable_test_tdr_act_req *req, - __u32 flags) -{ - req->_present.header = 1; - req->header._present.flags = 1; - req->header.flags = flags; -} - -/* - * Cable test TDR. - */ -int ethtool_cable_test_tdr_act(struct ynl_sock *ys, - struct ethtool_cable_test_tdr_act_req *req); - -/* ============== ETHTOOL_MSG_TUNNEL_INFO_GET ============== */ -/* ETHTOOL_MSG_TUNNEL_INFO_GET - do */ -struct ethtool_tunnel_info_get_req { - struct { - __u32 header:1; - } _present; - - struct ethtool_header header; -}; - -static inline struct ethtool_tunnel_info_get_req * -ethtool_tunnel_info_get_req_alloc(void) -{ - return calloc(1, sizeof(struct ethtool_tunnel_info_get_req)); -} -void ethtool_tunnel_info_get_req_free(struct ethtool_tunnel_info_get_req *req); - -static inline void -ethtool_tunnel_info_get_req_set_header_dev_index(struct ethtool_tunnel_info_get_req *req, - __u32 dev_index) -{ - req->_present.header = 1; - req->header._present.dev_index = 1; - req->header.dev_index = dev_index; -} -static inline void -ethtool_tunnel_info_get_req_set_header_dev_name(struct ethtool_tunnel_info_get_req *req, - const char *dev_name) -{ - free(req->header.dev_name); - req->header._present.dev_name_len = strlen(dev_name); - req->header.dev_name = malloc(req->header._present.dev_name_len + 1); - memcpy(req->header.dev_name, dev_name, req->header._present.dev_name_len); - req->header.dev_name[req->header._present.dev_name_len] = 0; -} -static inline void -ethtool_tunnel_info_get_req_set_header_flags(struct ethtool_tunnel_info_get_req *req, - __u32 flags) -{ - req->_present.header = 1; - req->header._present.flags = 1; - req->header.flags = flags; -} - -struct ethtool_tunnel_info_get_rsp { - struct { - __u32 header:1; - __u32 udp_ports:1; - } _present; - - struct ethtool_header header; - struct ethtool_tunnel_udp udp_ports; -}; - -void ethtool_tunnel_info_get_rsp_free(struct ethtool_tunnel_info_get_rsp *rsp); - -/* - * Get tsinfo params. - */ -struct ethtool_tunnel_info_get_rsp * -ethtool_tunnel_info_get(struct ynl_sock *ys, - struct ethtool_tunnel_info_get_req *req); - -/* ETHTOOL_MSG_TUNNEL_INFO_GET - dump */ -struct ethtool_tunnel_info_get_req_dump { - struct { - __u32 header:1; - } _present; - - struct ethtool_header header; -}; - -static inline struct ethtool_tunnel_info_get_req_dump * -ethtool_tunnel_info_get_req_dump_alloc(void) -{ - return calloc(1, sizeof(struct ethtool_tunnel_info_get_req_dump)); -} -void -ethtool_tunnel_info_get_req_dump_free(struct ethtool_tunnel_info_get_req_dump *req); - -static inline void -ethtool_tunnel_info_get_req_dump_set_header_dev_index(struct ethtool_tunnel_info_get_req_dump *req, - __u32 dev_index) -{ - req->_present.header = 1; - req->header._present.dev_index = 1; - req->header.dev_index = dev_index; -} -static inline void -ethtool_tunnel_info_get_req_dump_set_header_dev_name(struct ethtool_tunnel_info_get_req_dump *req, - const char *dev_name) -{ - free(req->header.dev_name); - req->header._present.dev_name_len = strlen(dev_name); - req->header.dev_name = malloc(req->header._present.dev_name_len + 1); - memcpy(req->header.dev_name, dev_name, req->header._present.dev_name_len); - req->header.dev_name[req->header._present.dev_name_len] = 0; -} -static inline void -ethtool_tunnel_info_get_req_dump_set_header_flags(struct ethtool_tunnel_info_get_req_dump *req, - __u32 flags) -{ - req->_present.header = 1; - req->header._present.flags = 1; - req->header.flags = flags; -} - -struct ethtool_tunnel_info_get_list { - struct ethtool_tunnel_info_get_list *next; - struct ethtool_tunnel_info_get_rsp obj __attribute__((aligned(8))); -}; - -void -ethtool_tunnel_info_get_list_free(struct ethtool_tunnel_info_get_list *rsp); - -struct ethtool_tunnel_info_get_list * -ethtool_tunnel_info_get_dump(struct ynl_sock *ys, - struct ethtool_tunnel_info_get_req_dump *req); - -/* ============== ETHTOOL_MSG_FEC_GET ============== */ -/* ETHTOOL_MSG_FEC_GET - do */ -struct ethtool_fec_get_req { - struct { - __u32 header:1; - } _present; - - struct ethtool_header header; -}; - -static inline struct ethtool_fec_get_req *ethtool_fec_get_req_alloc(void) -{ - return calloc(1, sizeof(struct ethtool_fec_get_req)); -} -void ethtool_fec_get_req_free(struct ethtool_fec_get_req *req); - -static inline void -ethtool_fec_get_req_set_header_dev_index(struct ethtool_fec_get_req *req, - __u32 dev_index) -{ - req->_present.header = 1; - req->header._present.dev_index = 1; - req->header.dev_index = dev_index; -} -static inline void -ethtool_fec_get_req_set_header_dev_name(struct ethtool_fec_get_req *req, - const char *dev_name) -{ - free(req->header.dev_name); - req->header._present.dev_name_len = strlen(dev_name); - req->header.dev_name = malloc(req->header._present.dev_name_len + 1); - memcpy(req->header.dev_name, dev_name, req->header._present.dev_name_len); - req->header.dev_name[req->header._present.dev_name_len] = 0; -} -static inline void -ethtool_fec_get_req_set_header_flags(struct ethtool_fec_get_req *req, - __u32 flags) -{ - req->_present.header = 1; - req->header._present.flags = 1; - req->header.flags = flags; -} - -struct ethtool_fec_get_rsp { - struct { - __u32 header:1; - __u32 modes:1; - __u32 auto_:1; - __u32 active:1; - __u32 stats:1; - } _present; - - struct ethtool_header header; - struct ethtool_bitset modes; - __u8 auto_; - __u32 active; - struct ethtool_fec_stat stats; -}; - -void ethtool_fec_get_rsp_free(struct ethtool_fec_get_rsp *rsp); - -/* - * Get FEC params. - */ -struct ethtool_fec_get_rsp * -ethtool_fec_get(struct ynl_sock *ys, struct ethtool_fec_get_req *req); - -/* ETHTOOL_MSG_FEC_GET - dump */ -struct ethtool_fec_get_req_dump { - struct { - __u32 header:1; - } _present; - - struct ethtool_header header; -}; - -static inline struct ethtool_fec_get_req_dump * -ethtool_fec_get_req_dump_alloc(void) -{ - return calloc(1, sizeof(struct ethtool_fec_get_req_dump)); -} -void ethtool_fec_get_req_dump_free(struct ethtool_fec_get_req_dump *req); - -static inline void -ethtool_fec_get_req_dump_set_header_dev_index(struct ethtool_fec_get_req_dump *req, - __u32 dev_index) -{ - req->_present.header = 1; - req->header._present.dev_index = 1; - req->header.dev_index = dev_index; -} -static inline void -ethtool_fec_get_req_dump_set_header_dev_name(struct ethtool_fec_get_req_dump *req, - const char *dev_name) -{ - free(req->header.dev_name); - req->header._present.dev_name_len = strlen(dev_name); - req->header.dev_name = malloc(req->header._present.dev_name_len + 1); - memcpy(req->header.dev_name, dev_name, req->header._present.dev_name_len); - req->header.dev_name[req->header._present.dev_name_len] = 0; -} -static inline void -ethtool_fec_get_req_dump_set_header_flags(struct ethtool_fec_get_req_dump *req, - __u32 flags) -{ - req->_present.header = 1; - req->header._present.flags = 1; - req->header.flags = flags; -} - -struct ethtool_fec_get_list { - struct ethtool_fec_get_list *next; - struct ethtool_fec_get_rsp obj __attribute__((aligned(8))); -}; - -void ethtool_fec_get_list_free(struct ethtool_fec_get_list *rsp); - -struct ethtool_fec_get_list * -ethtool_fec_get_dump(struct ynl_sock *ys, struct ethtool_fec_get_req_dump *req); - -/* ETHTOOL_MSG_FEC_GET - notify */ -struct ethtool_fec_get_ntf { - __u16 family; - __u8 cmd; - struct ynl_ntf_base_type *next; - void (*free)(struct ethtool_fec_get_ntf *ntf); - struct ethtool_fec_get_rsp obj __attribute__((aligned(8))); -}; - -void ethtool_fec_get_ntf_free(struct ethtool_fec_get_ntf *rsp); - -/* ============== ETHTOOL_MSG_FEC_SET ============== */ -/* ETHTOOL_MSG_FEC_SET - do */ -struct ethtool_fec_set_req { - struct { - __u32 header:1; - __u32 modes:1; - __u32 auto_:1; - __u32 active:1; - __u32 stats:1; - } _present; - - struct ethtool_header header; - struct ethtool_bitset modes; - __u8 auto_; - __u32 active; - struct ethtool_fec_stat stats; -}; - -static inline struct ethtool_fec_set_req *ethtool_fec_set_req_alloc(void) -{ - return calloc(1, sizeof(struct ethtool_fec_set_req)); -} -void ethtool_fec_set_req_free(struct ethtool_fec_set_req *req); - -static inline void -ethtool_fec_set_req_set_header_dev_index(struct ethtool_fec_set_req *req, - __u32 dev_index) -{ - req->_present.header = 1; - req->header._present.dev_index = 1; - req->header.dev_index = dev_index; -} -static inline void -ethtool_fec_set_req_set_header_dev_name(struct ethtool_fec_set_req *req, - const char *dev_name) -{ - free(req->header.dev_name); - req->header._present.dev_name_len = strlen(dev_name); - req->header.dev_name = malloc(req->header._present.dev_name_len + 1); - memcpy(req->header.dev_name, dev_name, req->header._present.dev_name_len); - req->header.dev_name[req->header._present.dev_name_len] = 0; -} -static inline void -ethtool_fec_set_req_set_header_flags(struct ethtool_fec_set_req *req, - __u32 flags) -{ - req->_present.header = 1; - req->header._present.flags = 1; - req->header.flags = flags; -} -static inline void -ethtool_fec_set_req_set_modes_nomask(struct ethtool_fec_set_req *req) -{ - req->_present.modes = 1; - req->modes._present.nomask = 1; -} -static inline void -ethtool_fec_set_req_set_modes_size(struct ethtool_fec_set_req *req, __u32 size) -{ - req->_present.modes = 1; - req->modes._present.size = 1; - req->modes.size = size; -} -static inline void -__ethtool_fec_set_req_set_modes_bits_bit(struct ethtool_fec_set_req *req, - struct ethtool_bitset_bit *bit, - unsigned int n_bit) -{ - free(req->modes.bits.bit); - req->modes.bits.bit = bit; - req->modes.bits.n_bit = n_bit; -} -static inline void -ethtool_fec_set_req_set_auto_(struct ethtool_fec_set_req *req, __u8 auto_) -{ - req->_present.auto_ = 1; - req->auto_ = auto_; -} -static inline void -ethtool_fec_set_req_set_active(struct ethtool_fec_set_req *req, __u32 active) -{ - req->_present.active = 1; - req->active = active; -} -static inline void -ethtool_fec_set_req_set_stats_corrected(struct ethtool_fec_set_req *req, - const void *corrected, size_t len) -{ - free(req->stats.corrected); - req->stats._present.corrected_len = len; - req->stats.corrected = malloc(req->stats._present.corrected_len); - memcpy(req->stats.corrected, corrected, req->stats._present.corrected_len); -} -static inline void -ethtool_fec_set_req_set_stats_uncorr(struct ethtool_fec_set_req *req, - const void *uncorr, size_t len) -{ - free(req->stats.uncorr); - req->stats._present.uncorr_len = len; - req->stats.uncorr = malloc(req->stats._present.uncorr_len); - memcpy(req->stats.uncorr, uncorr, req->stats._present.uncorr_len); -} -static inline void -ethtool_fec_set_req_set_stats_corr_bits(struct ethtool_fec_set_req *req, - const void *corr_bits, size_t len) -{ - free(req->stats.corr_bits); - req->stats._present.corr_bits_len = len; - req->stats.corr_bits = malloc(req->stats._present.corr_bits_len); - memcpy(req->stats.corr_bits, corr_bits, req->stats._present.corr_bits_len); -} - -/* - * Set FEC params. - */ -int ethtool_fec_set(struct ynl_sock *ys, struct ethtool_fec_set_req *req); - -/* ============== ETHTOOL_MSG_MODULE_EEPROM_GET ============== */ -/* ETHTOOL_MSG_MODULE_EEPROM_GET - do */ -struct ethtool_module_eeprom_get_req { - struct { - __u32 header:1; - } _present; - - struct ethtool_header header; -}; - -static inline struct ethtool_module_eeprom_get_req * -ethtool_module_eeprom_get_req_alloc(void) -{ - return calloc(1, sizeof(struct ethtool_module_eeprom_get_req)); -} -void -ethtool_module_eeprom_get_req_free(struct ethtool_module_eeprom_get_req *req); - -static inline void -ethtool_module_eeprom_get_req_set_header_dev_index(struct ethtool_module_eeprom_get_req *req, - __u32 dev_index) -{ - req->_present.header = 1; - req->header._present.dev_index = 1; - req->header.dev_index = dev_index; -} -static inline void -ethtool_module_eeprom_get_req_set_header_dev_name(struct ethtool_module_eeprom_get_req *req, - const char *dev_name) -{ - free(req->header.dev_name); - req->header._present.dev_name_len = strlen(dev_name); - req->header.dev_name = malloc(req->header._present.dev_name_len + 1); - memcpy(req->header.dev_name, dev_name, req->header._present.dev_name_len); - req->header.dev_name[req->header._present.dev_name_len] = 0; -} -static inline void -ethtool_module_eeprom_get_req_set_header_flags(struct ethtool_module_eeprom_get_req *req, - __u32 flags) -{ - req->_present.header = 1; - req->header._present.flags = 1; - req->header.flags = flags; -} - -struct ethtool_module_eeprom_get_rsp { - struct { - __u32 header:1; - __u32 offset:1; - __u32 length:1; - __u32 page:1; - __u32 bank:1; - __u32 i2c_address:1; - __u32 data_len; - } _present; - - struct ethtool_header header; - __u32 offset; - __u32 length; - __u8 page; - __u8 bank; - __u8 i2c_address; - void *data; -}; - -void -ethtool_module_eeprom_get_rsp_free(struct ethtool_module_eeprom_get_rsp *rsp); - -/* - * Get module EEPROM params. - */ -struct ethtool_module_eeprom_get_rsp * -ethtool_module_eeprom_get(struct ynl_sock *ys, - struct ethtool_module_eeprom_get_req *req); - -/* ETHTOOL_MSG_MODULE_EEPROM_GET - dump */ -struct ethtool_module_eeprom_get_req_dump { - struct { - __u32 header:1; - } _present; - - struct ethtool_header header; -}; - -static inline struct ethtool_module_eeprom_get_req_dump * -ethtool_module_eeprom_get_req_dump_alloc(void) -{ - return calloc(1, sizeof(struct ethtool_module_eeprom_get_req_dump)); -} -void -ethtool_module_eeprom_get_req_dump_free(struct ethtool_module_eeprom_get_req_dump *req); - -static inline void -ethtool_module_eeprom_get_req_dump_set_header_dev_index(struct ethtool_module_eeprom_get_req_dump *req, - __u32 dev_index) -{ - req->_present.header = 1; - req->header._present.dev_index = 1; - req->header.dev_index = dev_index; -} -static inline void -ethtool_module_eeprom_get_req_dump_set_header_dev_name(struct ethtool_module_eeprom_get_req_dump *req, - const char *dev_name) -{ - free(req->header.dev_name); - req->header._present.dev_name_len = strlen(dev_name); - req->header.dev_name = malloc(req->header._present.dev_name_len + 1); - memcpy(req->header.dev_name, dev_name, req->header._present.dev_name_len); - req->header.dev_name[req->header._present.dev_name_len] = 0; -} -static inline void -ethtool_module_eeprom_get_req_dump_set_header_flags(struct ethtool_module_eeprom_get_req_dump *req, - __u32 flags) -{ - req->_present.header = 1; - req->header._present.flags = 1; - req->header.flags = flags; -} - -struct ethtool_module_eeprom_get_list { - struct ethtool_module_eeprom_get_list *next; - struct ethtool_module_eeprom_get_rsp obj __attribute__((aligned(8))); -}; - -void -ethtool_module_eeprom_get_list_free(struct ethtool_module_eeprom_get_list *rsp); - -struct ethtool_module_eeprom_get_list * -ethtool_module_eeprom_get_dump(struct ynl_sock *ys, - struct ethtool_module_eeprom_get_req_dump *req); - -/* ============== ETHTOOL_MSG_PHC_VCLOCKS_GET ============== */ -/* ETHTOOL_MSG_PHC_VCLOCKS_GET - do */ -struct ethtool_phc_vclocks_get_req { - struct { - __u32 header:1; - } _present; - - struct ethtool_header header; -}; - -static inline struct ethtool_phc_vclocks_get_req * -ethtool_phc_vclocks_get_req_alloc(void) -{ - return calloc(1, sizeof(struct ethtool_phc_vclocks_get_req)); -} -void ethtool_phc_vclocks_get_req_free(struct ethtool_phc_vclocks_get_req *req); - -static inline void -ethtool_phc_vclocks_get_req_set_header_dev_index(struct ethtool_phc_vclocks_get_req *req, - __u32 dev_index) -{ - req->_present.header = 1; - req->header._present.dev_index = 1; - req->header.dev_index = dev_index; -} -static inline void -ethtool_phc_vclocks_get_req_set_header_dev_name(struct ethtool_phc_vclocks_get_req *req, - const char *dev_name) -{ - free(req->header.dev_name); - req->header._present.dev_name_len = strlen(dev_name); - req->header.dev_name = malloc(req->header._present.dev_name_len + 1); - memcpy(req->header.dev_name, dev_name, req->header._present.dev_name_len); - req->header.dev_name[req->header._present.dev_name_len] = 0; -} -static inline void -ethtool_phc_vclocks_get_req_set_header_flags(struct ethtool_phc_vclocks_get_req *req, - __u32 flags) -{ - req->_present.header = 1; - req->header._present.flags = 1; - req->header.flags = flags; -} - -struct ethtool_phc_vclocks_get_rsp { - struct { - __u32 header:1; - __u32 num:1; - } _present; - - struct ethtool_header header; - __u32 num; -}; - -void ethtool_phc_vclocks_get_rsp_free(struct ethtool_phc_vclocks_get_rsp *rsp); - -/* - * Get PHC VCLOCKs. - */ -struct ethtool_phc_vclocks_get_rsp * -ethtool_phc_vclocks_get(struct ynl_sock *ys, - struct ethtool_phc_vclocks_get_req *req); - -/* ETHTOOL_MSG_PHC_VCLOCKS_GET - dump */ -struct ethtool_phc_vclocks_get_req_dump { - struct { - __u32 header:1; - } _present; - - struct ethtool_header header; -}; - -static inline struct ethtool_phc_vclocks_get_req_dump * -ethtool_phc_vclocks_get_req_dump_alloc(void) -{ - return calloc(1, sizeof(struct ethtool_phc_vclocks_get_req_dump)); -} -void -ethtool_phc_vclocks_get_req_dump_free(struct ethtool_phc_vclocks_get_req_dump *req); - -static inline void -ethtool_phc_vclocks_get_req_dump_set_header_dev_index(struct ethtool_phc_vclocks_get_req_dump *req, - __u32 dev_index) -{ - req->_present.header = 1; - req->header._present.dev_index = 1; - req->header.dev_index = dev_index; -} -static inline void -ethtool_phc_vclocks_get_req_dump_set_header_dev_name(struct ethtool_phc_vclocks_get_req_dump *req, - const char *dev_name) -{ - free(req->header.dev_name); - req->header._present.dev_name_len = strlen(dev_name); - req->header.dev_name = malloc(req->header._present.dev_name_len + 1); - memcpy(req->header.dev_name, dev_name, req->header._present.dev_name_len); - req->header.dev_name[req->header._present.dev_name_len] = 0; -} -static inline void -ethtool_phc_vclocks_get_req_dump_set_header_flags(struct ethtool_phc_vclocks_get_req_dump *req, - __u32 flags) -{ - req->_present.header = 1; - req->header._present.flags = 1; - req->header.flags = flags; -} - -struct ethtool_phc_vclocks_get_list { - struct ethtool_phc_vclocks_get_list *next; - struct ethtool_phc_vclocks_get_rsp obj __attribute__((aligned(8))); -}; - -void -ethtool_phc_vclocks_get_list_free(struct ethtool_phc_vclocks_get_list *rsp); - -struct ethtool_phc_vclocks_get_list * -ethtool_phc_vclocks_get_dump(struct ynl_sock *ys, - struct ethtool_phc_vclocks_get_req_dump *req); - -/* ============== ETHTOOL_MSG_MODULE_GET ============== */ -/* ETHTOOL_MSG_MODULE_GET - do */ -struct ethtool_module_get_req { - struct { - __u32 header:1; - } _present; - - struct ethtool_header header; -}; - -static inline struct ethtool_module_get_req *ethtool_module_get_req_alloc(void) -{ - return calloc(1, sizeof(struct ethtool_module_get_req)); -} -void ethtool_module_get_req_free(struct ethtool_module_get_req *req); - -static inline void -ethtool_module_get_req_set_header_dev_index(struct ethtool_module_get_req *req, - __u32 dev_index) -{ - req->_present.header = 1; - req->header._present.dev_index = 1; - req->header.dev_index = dev_index; -} -static inline void -ethtool_module_get_req_set_header_dev_name(struct ethtool_module_get_req *req, - const char *dev_name) -{ - free(req->header.dev_name); - req->header._present.dev_name_len = strlen(dev_name); - req->header.dev_name = malloc(req->header._present.dev_name_len + 1); - memcpy(req->header.dev_name, dev_name, req->header._present.dev_name_len); - req->header.dev_name[req->header._present.dev_name_len] = 0; -} -static inline void -ethtool_module_get_req_set_header_flags(struct ethtool_module_get_req *req, - __u32 flags) -{ - req->_present.header = 1; - req->header._present.flags = 1; - req->header.flags = flags; -} - -struct ethtool_module_get_rsp { - struct { - __u32 header:1; - __u32 power_mode_policy:1; - __u32 power_mode:1; - } _present; - - struct ethtool_header header; - __u8 power_mode_policy; - __u8 power_mode; -}; - -void ethtool_module_get_rsp_free(struct ethtool_module_get_rsp *rsp); - -/* - * Get module params. - */ -struct ethtool_module_get_rsp * -ethtool_module_get(struct ynl_sock *ys, struct ethtool_module_get_req *req); - -/* ETHTOOL_MSG_MODULE_GET - dump */ -struct ethtool_module_get_req_dump { - struct { - __u32 header:1; - } _present; - - struct ethtool_header header; -}; - -static inline struct ethtool_module_get_req_dump * -ethtool_module_get_req_dump_alloc(void) -{ - return calloc(1, sizeof(struct ethtool_module_get_req_dump)); -} -void ethtool_module_get_req_dump_free(struct ethtool_module_get_req_dump *req); - -static inline void -ethtool_module_get_req_dump_set_header_dev_index(struct ethtool_module_get_req_dump *req, - __u32 dev_index) -{ - req->_present.header = 1; - req->header._present.dev_index = 1; - req->header.dev_index = dev_index; -} -static inline void -ethtool_module_get_req_dump_set_header_dev_name(struct ethtool_module_get_req_dump *req, - const char *dev_name) -{ - free(req->header.dev_name); - req->header._present.dev_name_len = strlen(dev_name); - req->header.dev_name = malloc(req->header._present.dev_name_len + 1); - memcpy(req->header.dev_name, dev_name, req->header._present.dev_name_len); - req->header.dev_name[req->header._present.dev_name_len] = 0; -} -static inline void -ethtool_module_get_req_dump_set_header_flags(struct ethtool_module_get_req_dump *req, - __u32 flags) -{ - req->_present.header = 1; - req->header._present.flags = 1; - req->header.flags = flags; -} - -struct ethtool_module_get_list { - struct ethtool_module_get_list *next; - struct ethtool_module_get_rsp obj __attribute__((aligned(8))); -}; - -void ethtool_module_get_list_free(struct ethtool_module_get_list *rsp); - -struct ethtool_module_get_list * -ethtool_module_get_dump(struct ynl_sock *ys, - struct ethtool_module_get_req_dump *req); - -/* ETHTOOL_MSG_MODULE_GET - notify */ -struct ethtool_module_get_ntf { - __u16 family; - __u8 cmd; - struct ynl_ntf_base_type *next; - void (*free)(struct ethtool_module_get_ntf *ntf); - struct ethtool_module_get_rsp obj __attribute__((aligned(8))); -}; - -void ethtool_module_get_ntf_free(struct ethtool_module_get_ntf *rsp); - -/* ============== ETHTOOL_MSG_MODULE_SET ============== */ -/* ETHTOOL_MSG_MODULE_SET - do */ -struct ethtool_module_set_req { - struct { - __u32 header:1; - __u32 power_mode_policy:1; - __u32 power_mode:1; - } _present; - - struct ethtool_header header; - __u8 power_mode_policy; - __u8 power_mode; -}; - -static inline struct ethtool_module_set_req *ethtool_module_set_req_alloc(void) -{ - return calloc(1, sizeof(struct ethtool_module_set_req)); -} -void ethtool_module_set_req_free(struct ethtool_module_set_req *req); - -static inline void -ethtool_module_set_req_set_header_dev_index(struct ethtool_module_set_req *req, - __u32 dev_index) -{ - req->_present.header = 1; - req->header._present.dev_index = 1; - req->header.dev_index = dev_index; -} -static inline void -ethtool_module_set_req_set_header_dev_name(struct ethtool_module_set_req *req, - const char *dev_name) -{ - free(req->header.dev_name); - req->header._present.dev_name_len = strlen(dev_name); - req->header.dev_name = malloc(req->header._present.dev_name_len + 1); - memcpy(req->header.dev_name, dev_name, req->header._present.dev_name_len); - req->header.dev_name[req->header._present.dev_name_len] = 0; -} -static inline void -ethtool_module_set_req_set_header_flags(struct ethtool_module_set_req *req, - __u32 flags) -{ - req->_present.header = 1; - req->header._present.flags = 1; - req->header.flags = flags; -} -static inline void -ethtool_module_set_req_set_power_mode_policy(struct ethtool_module_set_req *req, - __u8 power_mode_policy) -{ - req->_present.power_mode_policy = 1; - req->power_mode_policy = power_mode_policy; -} -static inline void -ethtool_module_set_req_set_power_mode(struct ethtool_module_set_req *req, - __u8 power_mode) -{ - req->_present.power_mode = 1; - req->power_mode = power_mode; -} - -/* - * Set module params. - */ -int ethtool_module_set(struct ynl_sock *ys, struct ethtool_module_set_req *req); - -/* ============== ETHTOOL_MSG_PSE_GET ============== */ -/* ETHTOOL_MSG_PSE_GET - do */ -struct ethtool_pse_get_req { - struct { - __u32 header:1; - } _present; - - struct ethtool_header header; -}; - -static inline struct ethtool_pse_get_req *ethtool_pse_get_req_alloc(void) -{ - return calloc(1, sizeof(struct ethtool_pse_get_req)); -} -void ethtool_pse_get_req_free(struct ethtool_pse_get_req *req); - -static inline void -ethtool_pse_get_req_set_header_dev_index(struct ethtool_pse_get_req *req, - __u32 dev_index) -{ - req->_present.header = 1; - req->header._present.dev_index = 1; - req->header.dev_index = dev_index; -} -static inline void -ethtool_pse_get_req_set_header_dev_name(struct ethtool_pse_get_req *req, - const char *dev_name) -{ - free(req->header.dev_name); - req->header._present.dev_name_len = strlen(dev_name); - req->header.dev_name = malloc(req->header._present.dev_name_len + 1); - memcpy(req->header.dev_name, dev_name, req->header._present.dev_name_len); - req->header.dev_name[req->header._present.dev_name_len] = 0; -} -static inline void -ethtool_pse_get_req_set_header_flags(struct ethtool_pse_get_req *req, - __u32 flags) -{ - req->_present.header = 1; - req->header._present.flags = 1; - req->header.flags = flags; -} - -struct ethtool_pse_get_rsp { - struct { - __u32 header:1; - __u32 admin_state:1; - __u32 admin_control:1; - __u32 pw_d_status:1; - } _present; - - struct ethtool_header header; - __u32 admin_state; - __u32 admin_control; - __u32 pw_d_status; -}; - -void ethtool_pse_get_rsp_free(struct ethtool_pse_get_rsp *rsp); - -/* - * Get Power Sourcing Equipment params. - */ -struct ethtool_pse_get_rsp * -ethtool_pse_get(struct ynl_sock *ys, struct ethtool_pse_get_req *req); - -/* ETHTOOL_MSG_PSE_GET - dump */ -struct ethtool_pse_get_req_dump { - struct { - __u32 header:1; - } _present; - - struct ethtool_header header; -}; - -static inline struct ethtool_pse_get_req_dump * -ethtool_pse_get_req_dump_alloc(void) -{ - return calloc(1, sizeof(struct ethtool_pse_get_req_dump)); -} -void ethtool_pse_get_req_dump_free(struct ethtool_pse_get_req_dump *req); - -static inline void -ethtool_pse_get_req_dump_set_header_dev_index(struct ethtool_pse_get_req_dump *req, - __u32 dev_index) -{ - req->_present.header = 1; - req->header._present.dev_index = 1; - req->header.dev_index = dev_index; -} -static inline void -ethtool_pse_get_req_dump_set_header_dev_name(struct ethtool_pse_get_req_dump *req, - const char *dev_name) -{ - free(req->header.dev_name); - req->header._present.dev_name_len = strlen(dev_name); - req->header.dev_name = malloc(req->header._present.dev_name_len + 1); - memcpy(req->header.dev_name, dev_name, req->header._present.dev_name_len); - req->header.dev_name[req->header._present.dev_name_len] = 0; -} -static inline void -ethtool_pse_get_req_dump_set_header_flags(struct ethtool_pse_get_req_dump *req, - __u32 flags) -{ - req->_present.header = 1; - req->header._present.flags = 1; - req->header.flags = flags; -} - -struct ethtool_pse_get_list { - struct ethtool_pse_get_list *next; - struct ethtool_pse_get_rsp obj __attribute__((aligned(8))); -}; - -void ethtool_pse_get_list_free(struct ethtool_pse_get_list *rsp); - -struct ethtool_pse_get_list * -ethtool_pse_get_dump(struct ynl_sock *ys, struct ethtool_pse_get_req_dump *req); - -/* ============== ETHTOOL_MSG_PSE_SET ============== */ -/* ETHTOOL_MSG_PSE_SET - do */ -struct ethtool_pse_set_req { - struct { - __u32 header:1; - __u32 admin_state:1; - __u32 admin_control:1; - __u32 pw_d_status:1; - } _present; - - struct ethtool_header header; - __u32 admin_state; - __u32 admin_control; - __u32 pw_d_status; -}; - -static inline struct ethtool_pse_set_req *ethtool_pse_set_req_alloc(void) -{ - return calloc(1, sizeof(struct ethtool_pse_set_req)); -} -void ethtool_pse_set_req_free(struct ethtool_pse_set_req *req); - -static inline void -ethtool_pse_set_req_set_header_dev_index(struct ethtool_pse_set_req *req, - __u32 dev_index) -{ - req->_present.header = 1; - req->header._present.dev_index = 1; - req->header.dev_index = dev_index; -} -static inline void -ethtool_pse_set_req_set_header_dev_name(struct ethtool_pse_set_req *req, - const char *dev_name) -{ - free(req->header.dev_name); - req->header._present.dev_name_len = strlen(dev_name); - req->header.dev_name = malloc(req->header._present.dev_name_len + 1); - memcpy(req->header.dev_name, dev_name, req->header._present.dev_name_len); - req->header.dev_name[req->header._present.dev_name_len] = 0; -} -static inline void -ethtool_pse_set_req_set_header_flags(struct ethtool_pse_set_req *req, - __u32 flags) -{ - req->_present.header = 1; - req->header._present.flags = 1; - req->header.flags = flags; -} -static inline void -ethtool_pse_set_req_set_admin_state(struct ethtool_pse_set_req *req, - __u32 admin_state) -{ - req->_present.admin_state = 1; - req->admin_state = admin_state; -} -static inline void -ethtool_pse_set_req_set_admin_control(struct ethtool_pse_set_req *req, - __u32 admin_control) -{ - req->_present.admin_control = 1; - req->admin_control = admin_control; -} -static inline void -ethtool_pse_set_req_set_pw_d_status(struct ethtool_pse_set_req *req, - __u32 pw_d_status) -{ - req->_present.pw_d_status = 1; - req->pw_d_status = pw_d_status; -} - -/* - * Set Power Sourcing Equipment params. - */ -int ethtool_pse_set(struct ynl_sock *ys, struct ethtool_pse_set_req *req); - -/* ============== ETHTOOL_MSG_RSS_GET ============== */ -/* ETHTOOL_MSG_RSS_GET - do */ -struct ethtool_rss_get_req { - struct { - __u32 header:1; - } _present; - - struct ethtool_header header; -}; - -static inline struct ethtool_rss_get_req *ethtool_rss_get_req_alloc(void) -{ - return calloc(1, sizeof(struct ethtool_rss_get_req)); -} -void ethtool_rss_get_req_free(struct ethtool_rss_get_req *req); - -static inline void -ethtool_rss_get_req_set_header_dev_index(struct ethtool_rss_get_req *req, - __u32 dev_index) -{ - req->_present.header = 1; - req->header._present.dev_index = 1; - req->header.dev_index = dev_index; -} -static inline void -ethtool_rss_get_req_set_header_dev_name(struct ethtool_rss_get_req *req, - const char *dev_name) -{ - free(req->header.dev_name); - req->header._present.dev_name_len = strlen(dev_name); - req->header.dev_name = malloc(req->header._present.dev_name_len + 1); - memcpy(req->header.dev_name, dev_name, req->header._present.dev_name_len); - req->header.dev_name[req->header._present.dev_name_len] = 0; -} -static inline void -ethtool_rss_get_req_set_header_flags(struct ethtool_rss_get_req *req, - __u32 flags) -{ - req->_present.header = 1; - req->header._present.flags = 1; - req->header.flags = flags; -} - -struct ethtool_rss_get_rsp { - struct { - __u32 header:1; - __u32 context:1; - __u32 hfunc:1; - __u32 indir_len; - __u32 hkey_len; - } _present; - - struct ethtool_header header; - __u32 context; - __u32 hfunc; - void *indir; - void *hkey; -}; - -void ethtool_rss_get_rsp_free(struct ethtool_rss_get_rsp *rsp); - -/* - * Get RSS params. - */ -struct ethtool_rss_get_rsp * -ethtool_rss_get(struct ynl_sock *ys, struct ethtool_rss_get_req *req); - -/* ETHTOOL_MSG_RSS_GET - dump */ -struct ethtool_rss_get_req_dump { - struct { - __u32 header:1; - } _present; - - struct ethtool_header header; -}; - -static inline struct ethtool_rss_get_req_dump * -ethtool_rss_get_req_dump_alloc(void) -{ - return calloc(1, sizeof(struct ethtool_rss_get_req_dump)); -} -void ethtool_rss_get_req_dump_free(struct ethtool_rss_get_req_dump *req); - -static inline void -ethtool_rss_get_req_dump_set_header_dev_index(struct ethtool_rss_get_req_dump *req, - __u32 dev_index) -{ - req->_present.header = 1; - req->header._present.dev_index = 1; - req->header.dev_index = dev_index; -} -static inline void -ethtool_rss_get_req_dump_set_header_dev_name(struct ethtool_rss_get_req_dump *req, - const char *dev_name) -{ - free(req->header.dev_name); - req->header._present.dev_name_len = strlen(dev_name); - req->header.dev_name = malloc(req->header._present.dev_name_len + 1); - memcpy(req->header.dev_name, dev_name, req->header._present.dev_name_len); - req->header.dev_name[req->header._present.dev_name_len] = 0; -} -static inline void -ethtool_rss_get_req_dump_set_header_flags(struct ethtool_rss_get_req_dump *req, - __u32 flags) -{ - req->_present.header = 1; - req->header._present.flags = 1; - req->header.flags = flags; -} - -struct ethtool_rss_get_list { - struct ethtool_rss_get_list *next; - struct ethtool_rss_get_rsp obj __attribute__((aligned(8))); -}; - -void ethtool_rss_get_list_free(struct ethtool_rss_get_list *rsp); - -struct ethtool_rss_get_list * -ethtool_rss_get_dump(struct ynl_sock *ys, struct ethtool_rss_get_req_dump *req); - -/* ============== ETHTOOL_MSG_PLCA_GET_CFG ============== */ -/* ETHTOOL_MSG_PLCA_GET_CFG - do */ -struct ethtool_plca_get_cfg_req { - struct { - __u32 header:1; - } _present; - - struct ethtool_header header; -}; - -static inline struct ethtool_plca_get_cfg_req * -ethtool_plca_get_cfg_req_alloc(void) -{ - return calloc(1, sizeof(struct ethtool_plca_get_cfg_req)); -} -void ethtool_plca_get_cfg_req_free(struct ethtool_plca_get_cfg_req *req); - -static inline void -ethtool_plca_get_cfg_req_set_header_dev_index(struct ethtool_plca_get_cfg_req *req, - __u32 dev_index) -{ - req->_present.header = 1; - req->header._present.dev_index = 1; - req->header.dev_index = dev_index; -} -static inline void -ethtool_plca_get_cfg_req_set_header_dev_name(struct ethtool_plca_get_cfg_req *req, - const char *dev_name) -{ - free(req->header.dev_name); - req->header._present.dev_name_len = strlen(dev_name); - req->header.dev_name = malloc(req->header._present.dev_name_len + 1); - memcpy(req->header.dev_name, dev_name, req->header._present.dev_name_len); - req->header.dev_name[req->header._present.dev_name_len] = 0; -} -static inline void -ethtool_plca_get_cfg_req_set_header_flags(struct ethtool_plca_get_cfg_req *req, - __u32 flags) -{ - req->_present.header = 1; - req->header._present.flags = 1; - req->header.flags = flags; -} - -struct ethtool_plca_get_cfg_rsp { - struct { - __u32 header:1; - __u32 version:1; - __u32 enabled:1; - __u32 status:1; - __u32 node_cnt:1; - __u32 node_id:1; - __u32 to_tmr:1; - __u32 burst_cnt:1; - __u32 burst_tmr:1; - } _present; - - struct ethtool_header header; - __u16 version; - __u8 enabled; - __u8 status; - __u32 node_cnt; - __u32 node_id; - __u32 to_tmr; - __u32 burst_cnt; - __u32 burst_tmr; -}; - -void ethtool_plca_get_cfg_rsp_free(struct ethtool_plca_get_cfg_rsp *rsp); - -/* - * Get PLCA params. - */ -struct ethtool_plca_get_cfg_rsp * -ethtool_plca_get_cfg(struct ynl_sock *ys, struct ethtool_plca_get_cfg_req *req); - -/* ETHTOOL_MSG_PLCA_GET_CFG - dump */ -struct ethtool_plca_get_cfg_req_dump { - struct { - __u32 header:1; - } _present; - - struct ethtool_header header; -}; - -static inline struct ethtool_plca_get_cfg_req_dump * -ethtool_plca_get_cfg_req_dump_alloc(void) -{ - return calloc(1, sizeof(struct ethtool_plca_get_cfg_req_dump)); -} -void -ethtool_plca_get_cfg_req_dump_free(struct ethtool_plca_get_cfg_req_dump *req); - -static inline void -ethtool_plca_get_cfg_req_dump_set_header_dev_index(struct ethtool_plca_get_cfg_req_dump *req, - __u32 dev_index) -{ - req->_present.header = 1; - req->header._present.dev_index = 1; - req->header.dev_index = dev_index; -} -static inline void -ethtool_plca_get_cfg_req_dump_set_header_dev_name(struct ethtool_plca_get_cfg_req_dump *req, - const char *dev_name) -{ - free(req->header.dev_name); - req->header._present.dev_name_len = strlen(dev_name); - req->header.dev_name = malloc(req->header._present.dev_name_len + 1); - memcpy(req->header.dev_name, dev_name, req->header._present.dev_name_len); - req->header.dev_name[req->header._present.dev_name_len] = 0; -} -static inline void -ethtool_plca_get_cfg_req_dump_set_header_flags(struct ethtool_plca_get_cfg_req_dump *req, - __u32 flags) -{ - req->_present.header = 1; - req->header._present.flags = 1; - req->header.flags = flags; -} - -struct ethtool_plca_get_cfg_list { - struct ethtool_plca_get_cfg_list *next; - struct ethtool_plca_get_cfg_rsp obj __attribute__((aligned(8))); -}; - -void ethtool_plca_get_cfg_list_free(struct ethtool_plca_get_cfg_list *rsp); - -struct ethtool_plca_get_cfg_list * -ethtool_plca_get_cfg_dump(struct ynl_sock *ys, - struct ethtool_plca_get_cfg_req_dump *req); - -/* ETHTOOL_MSG_PLCA_GET_CFG - notify */ -struct ethtool_plca_get_cfg_ntf { - __u16 family; - __u8 cmd; - struct ynl_ntf_base_type *next; - void (*free)(struct ethtool_plca_get_cfg_ntf *ntf); - struct ethtool_plca_get_cfg_rsp obj __attribute__((aligned(8))); -}; - -void ethtool_plca_get_cfg_ntf_free(struct ethtool_plca_get_cfg_ntf *rsp); - -/* ============== ETHTOOL_MSG_PLCA_SET_CFG ============== */ -/* ETHTOOL_MSG_PLCA_SET_CFG - do */ -struct ethtool_plca_set_cfg_req { - struct { - __u32 header:1; - __u32 version:1; - __u32 enabled:1; - __u32 status:1; - __u32 node_cnt:1; - __u32 node_id:1; - __u32 to_tmr:1; - __u32 burst_cnt:1; - __u32 burst_tmr:1; - } _present; - - struct ethtool_header header; - __u16 version; - __u8 enabled; - __u8 status; - __u32 node_cnt; - __u32 node_id; - __u32 to_tmr; - __u32 burst_cnt; - __u32 burst_tmr; -}; - -static inline struct ethtool_plca_set_cfg_req * -ethtool_plca_set_cfg_req_alloc(void) -{ - return calloc(1, sizeof(struct ethtool_plca_set_cfg_req)); -} -void ethtool_plca_set_cfg_req_free(struct ethtool_plca_set_cfg_req *req); - -static inline void -ethtool_plca_set_cfg_req_set_header_dev_index(struct ethtool_plca_set_cfg_req *req, - __u32 dev_index) -{ - req->_present.header = 1; - req->header._present.dev_index = 1; - req->header.dev_index = dev_index; -} -static inline void -ethtool_plca_set_cfg_req_set_header_dev_name(struct ethtool_plca_set_cfg_req *req, - const char *dev_name) -{ - free(req->header.dev_name); - req->header._present.dev_name_len = strlen(dev_name); - req->header.dev_name = malloc(req->header._present.dev_name_len + 1); - memcpy(req->header.dev_name, dev_name, req->header._present.dev_name_len); - req->header.dev_name[req->header._present.dev_name_len] = 0; -} -static inline void -ethtool_plca_set_cfg_req_set_header_flags(struct ethtool_plca_set_cfg_req *req, - __u32 flags) -{ - req->_present.header = 1; - req->header._present.flags = 1; - req->header.flags = flags; -} -static inline void -ethtool_plca_set_cfg_req_set_version(struct ethtool_plca_set_cfg_req *req, - __u16 version) -{ - req->_present.version = 1; - req->version = version; -} -static inline void -ethtool_plca_set_cfg_req_set_enabled(struct ethtool_plca_set_cfg_req *req, - __u8 enabled) -{ - req->_present.enabled = 1; - req->enabled = enabled; -} -static inline void -ethtool_plca_set_cfg_req_set_status(struct ethtool_plca_set_cfg_req *req, - __u8 status) -{ - req->_present.status = 1; - req->status = status; -} -static inline void -ethtool_plca_set_cfg_req_set_node_cnt(struct ethtool_plca_set_cfg_req *req, - __u32 node_cnt) -{ - req->_present.node_cnt = 1; - req->node_cnt = node_cnt; -} -static inline void -ethtool_plca_set_cfg_req_set_node_id(struct ethtool_plca_set_cfg_req *req, - __u32 node_id) -{ - req->_present.node_id = 1; - req->node_id = node_id; -} -static inline void -ethtool_plca_set_cfg_req_set_to_tmr(struct ethtool_plca_set_cfg_req *req, - __u32 to_tmr) -{ - req->_present.to_tmr = 1; - req->to_tmr = to_tmr; -} -static inline void -ethtool_plca_set_cfg_req_set_burst_cnt(struct ethtool_plca_set_cfg_req *req, - __u32 burst_cnt) -{ - req->_present.burst_cnt = 1; - req->burst_cnt = burst_cnt; -} -static inline void -ethtool_plca_set_cfg_req_set_burst_tmr(struct ethtool_plca_set_cfg_req *req, - __u32 burst_tmr) -{ - req->_present.burst_tmr = 1; - req->burst_tmr = burst_tmr; -} - -/* - * Set PLCA params. - */ -int ethtool_plca_set_cfg(struct ynl_sock *ys, - struct ethtool_plca_set_cfg_req *req); - -/* ============== ETHTOOL_MSG_PLCA_GET_STATUS ============== */ -/* ETHTOOL_MSG_PLCA_GET_STATUS - do */ -struct ethtool_plca_get_status_req { - struct { - __u32 header:1; - } _present; - - struct ethtool_header header; -}; - -static inline struct ethtool_plca_get_status_req * -ethtool_plca_get_status_req_alloc(void) -{ - return calloc(1, sizeof(struct ethtool_plca_get_status_req)); -} -void ethtool_plca_get_status_req_free(struct ethtool_plca_get_status_req *req); - -static inline void -ethtool_plca_get_status_req_set_header_dev_index(struct ethtool_plca_get_status_req *req, - __u32 dev_index) -{ - req->_present.header = 1; - req->header._present.dev_index = 1; - req->header.dev_index = dev_index; -} -static inline void -ethtool_plca_get_status_req_set_header_dev_name(struct ethtool_plca_get_status_req *req, - const char *dev_name) -{ - free(req->header.dev_name); - req->header._present.dev_name_len = strlen(dev_name); - req->header.dev_name = malloc(req->header._present.dev_name_len + 1); - memcpy(req->header.dev_name, dev_name, req->header._present.dev_name_len); - req->header.dev_name[req->header._present.dev_name_len] = 0; -} -static inline void -ethtool_plca_get_status_req_set_header_flags(struct ethtool_plca_get_status_req *req, - __u32 flags) -{ - req->_present.header = 1; - req->header._present.flags = 1; - req->header.flags = flags; -} - -struct ethtool_plca_get_status_rsp { - struct { - __u32 header:1; - __u32 version:1; - __u32 enabled:1; - __u32 status:1; - __u32 node_cnt:1; - __u32 node_id:1; - __u32 to_tmr:1; - __u32 burst_cnt:1; - __u32 burst_tmr:1; - } _present; - - struct ethtool_header header; - __u16 version; - __u8 enabled; - __u8 status; - __u32 node_cnt; - __u32 node_id; - __u32 to_tmr; - __u32 burst_cnt; - __u32 burst_tmr; -}; - -void ethtool_plca_get_status_rsp_free(struct ethtool_plca_get_status_rsp *rsp); - -/* - * Get PLCA status params. - */ -struct ethtool_plca_get_status_rsp * -ethtool_plca_get_status(struct ynl_sock *ys, - struct ethtool_plca_get_status_req *req); - -/* ETHTOOL_MSG_PLCA_GET_STATUS - dump */ -struct ethtool_plca_get_status_req_dump { - struct { - __u32 header:1; - } _present; - - struct ethtool_header header; -}; - -static inline struct ethtool_plca_get_status_req_dump * -ethtool_plca_get_status_req_dump_alloc(void) -{ - return calloc(1, sizeof(struct ethtool_plca_get_status_req_dump)); -} -void -ethtool_plca_get_status_req_dump_free(struct ethtool_plca_get_status_req_dump *req); - -static inline void -ethtool_plca_get_status_req_dump_set_header_dev_index(struct ethtool_plca_get_status_req_dump *req, - __u32 dev_index) -{ - req->_present.header = 1; - req->header._present.dev_index = 1; - req->header.dev_index = dev_index; -} -static inline void -ethtool_plca_get_status_req_dump_set_header_dev_name(struct ethtool_plca_get_status_req_dump *req, - const char *dev_name) -{ - free(req->header.dev_name); - req->header._present.dev_name_len = strlen(dev_name); - req->header.dev_name = malloc(req->header._present.dev_name_len + 1); - memcpy(req->header.dev_name, dev_name, req->header._present.dev_name_len); - req->header.dev_name[req->header._present.dev_name_len] = 0; -} -static inline void -ethtool_plca_get_status_req_dump_set_header_flags(struct ethtool_plca_get_status_req_dump *req, - __u32 flags) -{ - req->_present.header = 1; - req->header._present.flags = 1; - req->header.flags = flags; -} - -struct ethtool_plca_get_status_list { - struct ethtool_plca_get_status_list *next; - struct ethtool_plca_get_status_rsp obj __attribute__((aligned(8))); -}; - -void -ethtool_plca_get_status_list_free(struct ethtool_plca_get_status_list *rsp); - -struct ethtool_plca_get_status_list * -ethtool_plca_get_status_dump(struct ynl_sock *ys, - struct ethtool_plca_get_status_req_dump *req); - -/* ============== ETHTOOL_MSG_MM_GET ============== */ -/* ETHTOOL_MSG_MM_GET - do */ -struct ethtool_mm_get_req { - struct { - __u32 header:1; - } _present; - - struct ethtool_header header; -}; - -static inline struct ethtool_mm_get_req *ethtool_mm_get_req_alloc(void) -{ - return calloc(1, sizeof(struct ethtool_mm_get_req)); -} -void ethtool_mm_get_req_free(struct ethtool_mm_get_req *req); - -static inline void -ethtool_mm_get_req_set_header_dev_index(struct ethtool_mm_get_req *req, - __u32 dev_index) -{ - req->_present.header = 1; - req->header._present.dev_index = 1; - req->header.dev_index = dev_index; -} -static inline void -ethtool_mm_get_req_set_header_dev_name(struct ethtool_mm_get_req *req, - const char *dev_name) -{ - free(req->header.dev_name); - req->header._present.dev_name_len = strlen(dev_name); - req->header.dev_name = malloc(req->header._present.dev_name_len + 1); - memcpy(req->header.dev_name, dev_name, req->header._present.dev_name_len); - req->header.dev_name[req->header._present.dev_name_len] = 0; -} -static inline void -ethtool_mm_get_req_set_header_flags(struct ethtool_mm_get_req *req, - __u32 flags) -{ - req->_present.header = 1; - req->header._present.flags = 1; - req->header.flags = flags; -} - -struct ethtool_mm_get_rsp { - struct { - __u32 header:1; - __u32 pmac_enabled:1; - __u32 tx_enabled:1; - __u32 tx_active:1; - __u32 tx_min_frag_size:1; - __u32 rx_min_frag_size:1; - __u32 verify_enabled:1; - __u32 verify_time:1; - __u32 max_verify_time:1; - __u32 stats:1; - } _present; - - struct ethtool_header header; - __u8 pmac_enabled; - __u8 tx_enabled; - __u8 tx_active; - __u32 tx_min_frag_size; - __u32 rx_min_frag_size; - __u8 verify_enabled; - __u32 verify_time; - __u32 max_verify_time; - struct ethtool_mm_stat stats; -}; - -void ethtool_mm_get_rsp_free(struct ethtool_mm_get_rsp *rsp); - -/* - * Get MAC Merge configuration and state - */ -struct ethtool_mm_get_rsp * -ethtool_mm_get(struct ynl_sock *ys, struct ethtool_mm_get_req *req); - -/* ETHTOOL_MSG_MM_GET - dump */ -struct ethtool_mm_get_req_dump { - struct { - __u32 header:1; - } _present; - - struct ethtool_header header; -}; - -static inline struct ethtool_mm_get_req_dump * -ethtool_mm_get_req_dump_alloc(void) -{ - return calloc(1, sizeof(struct ethtool_mm_get_req_dump)); -} -void ethtool_mm_get_req_dump_free(struct ethtool_mm_get_req_dump *req); - -static inline void -ethtool_mm_get_req_dump_set_header_dev_index(struct ethtool_mm_get_req_dump *req, - __u32 dev_index) -{ - req->_present.header = 1; - req->header._present.dev_index = 1; - req->header.dev_index = dev_index; -} -static inline void -ethtool_mm_get_req_dump_set_header_dev_name(struct ethtool_mm_get_req_dump *req, - const char *dev_name) -{ - free(req->header.dev_name); - req->header._present.dev_name_len = strlen(dev_name); - req->header.dev_name = malloc(req->header._present.dev_name_len + 1); - memcpy(req->header.dev_name, dev_name, req->header._present.dev_name_len); - req->header.dev_name[req->header._present.dev_name_len] = 0; -} -static inline void -ethtool_mm_get_req_dump_set_header_flags(struct ethtool_mm_get_req_dump *req, - __u32 flags) -{ - req->_present.header = 1; - req->header._present.flags = 1; - req->header.flags = flags; -} - -struct ethtool_mm_get_list { - struct ethtool_mm_get_list *next; - struct ethtool_mm_get_rsp obj __attribute__((aligned(8))); -}; - -void ethtool_mm_get_list_free(struct ethtool_mm_get_list *rsp); - -struct ethtool_mm_get_list * -ethtool_mm_get_dump(struct ynl_sock *ys, struct ethtool_mm_get_req_dump *req); - -/* ETHTOOL_MSG_MM_GET - notify */ -struct ethtool_mm_get_ntf { - __u16 family; - __u8 cmd; - struct ynl_ntf_base_type *next; - void (*free)(struct ethtool_mm_get_ntf *ntf); - struct ethtool_mm_get_rsp obj __attribute__((aligned(8))); -}; - -void ethtool_mm_get_ntf_free(struct ethtool_mm_get_ntf *rsp); - -/* ============== ETHTOOL_MSG_MM_SET ============== */ -/* ETHTOOL_MSG_MM_SET - do */ -struct ethtool_mm_set_req { - struct { - __u32 header:1; - __u32 verify_enabled:1; - __u32 verify_time:1; - __u32 tx_enabled:1; - __u32 pmac_enabled:1; - __u32 tx_min_frag_size:1; - } _present; - - struct ethtool_header header; - __u8 verify_enabled; - __u32 verify_time; - __u8 tx_enabled; - __u8 pmac_enabled; - __u32 tx_min_frag_size; -}; - -static inline struct ethtool_mm_set_req *ethtool_mm_set_req_alloc(void) -{ - return calloc(1, sizeof(struct ethtool_mm_set_req)); -} -void ethtool_mm_set_req_free(struct ethtool_mm_set_req *req); - -static inline void -ethtool_mm_set_req_set_header_dev_index(struct ethtool_mm_set_req *req, - __u32 dev_index) -{ - req->_present.header = 1; - req->header._present.dev_index = 1; - req->header.dev_index = dev_index; -} -static inline void -ethtool_mm_set_req_set_header_dev_name(struct ethtool_mm_set_req *req, - const char *dev_name) -{ - free(req->header.dev_name); - req->header._present.dev_name_len = strlen(dev_name); - req->header.dev_name = malloc(req->header._present.dev_name_len + 1); - memcpy(req->header.dev_name, dev_name, req->header._present.dev_name_len); - req->header.dev_name[req->header._present.dev_name_len] = 0; -} -static inline void -ethtool_mm_set_req_set_header_flags(struct ethtool_mm_set_req *req, - __u32 flags) -{ - req->_present.header = 1; - req->header._present.flags = 1; - req->header.flags = flags; -} -static inline void -ethtool_mm_set_req_set_verify_enabled(struct ethtool_mm_set_req *req, - __u8 verify_enabled) -{ - req->_present.verify_enabled = 1; - req->verify_enabled = verify_enabled; -} -static inline void -ethtool_mm_set_req_set_verify_time(struct ethtool_mm_set_req *req, - __u32 verify_time) -{ - req->_present.verify_time = 1; - req->verify_time = verify_time; -} -static inline void -ethtool_mm_set_req_set_tx_enabled(struct ethtool_mm_set_req *req, - __u8 tx_enabled) -{ - req->_present.tx_enabled = 1; - req->tx_enabled = tx_enabled; -} -static inline void -ethtool_mm_set_req_set_pmac_enabled(struct ethtool_mm_set_req *req, - __u8 pmac_enabled) -{ - req->_present.pmac_enabled = 1; - req->pmac_enabled = pmac_enabled; -} -static inline void -ethtool_mm_set_req_set_tx_min_frag_size(struct ethtool_mm_set_req *req, - __u32 tx_min_frag_size) -{ - req->_present.tx_min_frag_size = 1; - req->tx_min_frag_size = tx_min_frag_size; -} - -/* - * Set MAC Merge configuration - */ -int ethtool_mm_set(struct ynl_sock *ys, struct ethtool_mm_set_req *req); - -/* ETHTOOL_MSG_CABLE_TEST_NTF - event */ -struct ethtool_cable_test_ntf_rsp { - struct { - __u32 header:1; - __u32 status:1; - } _present; - - struct ethtool_header header; - __u8 status; -}; - -struct ethtool_cable_test_ntf { - __u16 family; - __u8 cmd; - struct ynl_ntf_base_type *next; - void (*free)(struct ethtool_cable_test_ntf *ntf); - struct ethtool_cable_test_ntf_rsp obj __attribute__((aligned(8))); -}; - -void ethtool_cable_test_ntf_free(struct ethtool_cable_test_ntf *rsp); - -/* ETHTOOL_MSG_CABLE_TEST_TDR_NTF - event */ -struct ethtool_cable_test_tdr_ntf_rsp { - struct { - __u32 header:1; - __u32 status:1; - __u32 nest:1; - } _present; - - struct ethtool_header header; - __u8 status; - struct ethtool_cable_nest nest; -}; - -struct ethtool_cable_test_tdr_ntf { - __u16 family; - __u8 cmd; - struct ynl_ntf_base_type *next; - void (*free)(struct ethtool_cable_test_tdr_ntf *ntf); - struct ethtool_cable_test_tdr_ntf_rsp obj __attribute__((aligned(8))); -}; - -void ethtool_cable_test_tdr_ntf_free(struct ethtool_cable_test_tdr_ntf *rsp); - -#endif /* _LINUX_ETHTOOL_GEN_H */ diff --git a/tools/net/ynl/generated/fou-user.c b/tools/net/ynl/generated/fou-user.c deleted file mode 100644 index f30bef23bc31..000000000000 --- a/tools/net/ynl/generated/fou-user.c +++ /dev/null @@ -1,330 +0,0 @@ -// SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) -/* Do not edit directly, auto-generated from: */ -/* Documentation/netlink/specs/fou.yaml */ -/* YNL-GEN user source */ - -#include -#include -#include "fou-user.h" -#include "ynl.h" -#include - -#include -#include - -/* Enums */ -static const char * const fou_op_strmap[] = { - [FOU_CMD_ADD] = "add", - [FOU_CMD_DEL] = "del", - [FOU_CMD_GET] = "get", -}; - -const char *fou_op_str(int op) -{ - if (op < 0 || op >= (int)MNL_ARRAY_SIZE(fou_op_strmap)) - return NULL; - return fou_op_strmap[op]; -} - -static const char * const fou_encap_type_strmap[] = { - [0] = "unspec", - [1] = "direct", - [2] = "gue", -}; - -const char *fou_encap_type_str(int value) -{ - if (value < 0 || value >= (int)MNL_ARRAY_SIZE(fou_encap_type_strmap)) - return NULL; - return fou_encap_type_strmap[value]; -} - -/* Policies */ -struct ynl_policy_attr fou_policy[FOU_ATTR_MAX + 1] = { - [FOU_ATTR_UNSPEC] = { .name = "unspec", .type = YNL_PT_REJECT, }, - [FOU_ATTR_PORT] = { .name = "port", .type = YNL_PT_U16, }, - [FOU_ATTR_AF] = { .name = "af", .type = YNL_PT_U8, }, - [FOU_ATTR_IPPROTO] = { .name = "ipproto", .type = YNL_PT_U8, }, - [FOU_ATTR_TYPE] = { .name = "type", .type = YNL_PT_U8, }, - [FOU_ATTR_REMCSUM_NOPARTIAL] = { .name = "remcsum_nopartial", .type = YNL_PT_FLAG, }, - [FOU_ATTR_LOCAL_V4] = { .name = "local_v4", .type = YNL_PT_U32, }, - [FOU_ATTR_LOCAL_V6] = { .name = "local_v6", .type = YNL_PT_BINARY,}, - [FOU_ATTR_PEER_V4] = { .name = "peer_v4", .type = YNL_PT_U32, }, - [FOU_ATTR_PEER_V6] = { .name = "peer_v6", .type = YNL_PT_BINARY,}, - [FOU_ATTR_PEER_PORT] = { .name = "peer_port", .type = YNL_PT_U16, }, - [FOU_ATTR_IFINDEX] = { .name = "ifindex", .type = YNL_PT_U32, }, -}; - -struct ynl_policy_nest fou_nest = { - .max_attr = FOU_ATTR_MAX, - .table = fou_policy, -}; - -/* Common nested types */ -/* ============== FOU_CMD_ADD ============== */ -/* FOU_CMD_ADD - do */ -void fou_add_req_free(struct fou_add_req *req) -{ - free(req->local_v6); - free(req->peer_v6); - free(req); -} - -int fou_add(struct ynl_sock *ys, struct fou_add_req *req) -{ - struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; - struct nlmsghdr *nlh; - int err; - - nlh = ynl_gemsg_start_req(ys, ys->family_id, FOU_CMD_ADD, 1); - ys->req_policy = &fou_nest; - - if (req->_present.port) - mnl_attr_put_u16(nlh, FOU_ATTR_PORT, req->port); - if (req->_present.ipproto) - mnl_attr_put_u8(nlh, FOU_ATTR_IPPROTO, req->ipproto); - if (req->_present.type) - mnl_attr_put_u8(nlh, FOU_ATTR_TYPE, req->type); - if (req->_present.remcsum_nopartial) - mnl_attr_put(nlh, FOU_ATTR_REMCSUM_NOPARTIAL, 0, NULL); - if (req->_present.local_v4) - mnl_attr_put_u32(nlh, FOU_ATTR_LOCAL_V4, req->local_v4); - if (req->_present.peer_v4) - mnl_attr_put_u32(nlh, FOU_ATTR_PEER_V4, req->peer_v4); - if (req->_present.local_v6_len) - mnl_attr_put(nlh, FOU_ATTR_LOCAL_V6, req->_present.local_v6_len, req->local_v6); - if (req->_present.peer_v6_len) - mnl_attr_put(nlh, FOU_ATTR_PEER_V6, req->_present.peer_v6_len, req->peer_v6); - if (req->_present.peer_port) - mnl_attr_put_u16(nlh, FOU_ATTR_PEER_PORT, req->peer_port); - if (req->_present.ifindex) - mnl_attr_put_u32(nlh, FOU_ATTR_IFINDEX, req->ifindex); - - err = ynl_exec(ys, nlh, &yrs); - if (err < 0) - return -1; - - return 0; -} - -/* ============== FOU_CMD_DEL ============== */ -/* FOU_CMD_DEL - do */ -void fou_del_req_free(struct fou_del_req *req) -{ - free(req->local_v6); - free(req->peer_v6); - free(req); -} - -int fou_del(struct ynl_sock *ys, struct fou_del_req *req) -{ - struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; - struct nlmsghdr *nlh; - int err; - - nlh = ynl_gemsg_start_req(ys, ys->family_id, FOU_CMD_DEL, 1); - ys->req_policy = &fou_nest; - - if (req->_present.af) - mnl_attr_put_u8(nlh, FOU_ATTR_AF, req->af); - if (req->_present.ifindex) - mnl_attr_put_u32(nlh, FOU_ATTR_IFINDEX, req->ifindex); - if (req->_present.port) - mnl_attr_put_u16(nlh, FOU_ATTR_PORT, req->port); - if (req->_present.peer_port) - mnl_attr_put_u16(nlh, FOU_ATTR_PEER_PORT, req->peer_port); - if (req->_present.local_v4) - mnl_attr_put_u32(nlh, FOU_ATTR_LOCAL_V4, req->local_v4); - if (req->_present.peer_v4) - mnl_attr_put_u32(nlh, FOU_ATTR_PEER_V4, req->peer_v4); - if (req->_present.local_v6_len) - mnl_attr_put(nlh, FOU_ATTR_LOCAL_V6, req->_present.local_v6_len, req->local_v6); - if (req->_present.peer_v6_len) - mnl_attr_put(nlh, FOU_ATTR_PEER_V6, req->_present.peer_v6_len, req->peer_v6); - - err = ynl_exec(ys, nlh, &yrs); - if (err < 0) - return -1; - - return 0; -} - -/* ============== FOU_CMD_GET ============== */ -/* FOU_CMD_GET - do */ -void fou_get_req_free(struct fou_get_req *req) -{ - free(req->local_v6); - free(req->peer_v6); - free(req); -} - -void fou_get_rsp_free(struct fou_get_rsp *rsp) -{ - free(rsp->local_v6); - free(rsp->peer_v6); - free(rsp); -} - -int fou_get_rsp_parse(const struct nlmsghdr *nlh, void *data) -{ - struct ynl_parse_arg *yarg = data; - const struct nlattr *attr; - struct fou_get_rsp *dst; - - dst = yarg->data; - - mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) { - unsigned int type = mnl_attr_get_type(attr); - - if (type == FOU_ATTR_PORT) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.port = 1; - dst->port = mnl_attr_get_u16(attr); - } else if (type == FOU_ATTR_IPPROTO) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.ipproto = 1; - dst->ipproto = mnl_attr_get_u8(attr); - } else if (type == FOU_ATTR_TYPE) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.type = 1; - dst->type = mnl_attr_get_u8(attr); - } else if (type == FOU_ATTR_REMCSUM_NOPARTIAL) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.remcsum_nopartial = 1; - } else if (type == FOU_ATTR_LOCAL_V4) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.local_v4 = 1; - dst->local_v4 = mnl_attr_get_u32(attr); - } else if (type == FOU_ATTR_PEER_V4) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.peer_v4 = 1; - dst->peer_v4 = mnl_attr_get_u32(attr); - } else if (type == FOU_ATTR_LOCAL_V6) { - unsigned int len; - - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - - len = mnl_attr_get_payload_len(attr); - dst->_present.local_v6_len = len; - dst->local_v6 = malloc(len); - memcpy(dst->local_v6, mnl_attr_get_payload(attr), len); - } else if (type == FOU_ATTR_PEER_V6) { - unsigned int len; - - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - - len = mnl_attr_get_payload_len(attr); - dst->_present.peer_v6_len = len; - dst->peer_v6 = malloc(len); - memcpy(dst->peer_v6, mnl_attr_get_payload(attr), len); - } else if (type == FOU_ATTR_PEER_PORT) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.peer_port = 1; - dst->peer_port = mnl_attr_get_u16(attr); - } else if (type == FOU_ATTR_IFINDEX) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.ifindex = 1; - dst->ifindex = mnl_attr_get_u32(attr); - } - } - - return MNL_CB_OK; -} - -struct fou_get_rsp *fou_get(struct ynl_sock *ys, struct fou_get_req *req) -{ - struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; - struct fou_get_rsp *rsp; - struct nlmsghdr *nlh; - int err; - - nlh = ynl_gemsg_start_req(ys, ys->family_id, FOU_CMD_GET, 1); - ys->req_policy = &fou_nest; - yrs.yarg.rsp_policy = &fou_nest; - - if (req->_present.af) - mnl_attr_put_u8(nlh, FOU_ATTR_AF, req->af); - if (req->_present.ifindex) - mnl_attr_put_u32(nlh, FOU_ATTR_IFINDEX, req->ifindex); - if (req->_present.port) - mnl_attr_put_u16(nlh, FOU_ATTR_PORT, req->port); - if (req->_present.peer_port) - mnl_attr_put_u16(nlh, FOU_ATTR_PEER_PORT, req->peer_port); - if (req->_present.local_v4) - mnl_attr_put_u32(nlh, FOU_ATTR_LOCAL_V4, req->local_v4); - if (req->_present.peer_v4) - mnl_attr_put_u32(nlh, FOU_ATTR_PEER_V4, req->peer_v4); - if (req->_present.local_v6_len) - mnl_attr_put(nlh, FOU_ATTR_LOCAL_V6, req->_present.local_v6_len, req->local_v6); - if (req->_present.peer_v6_len) - mnl_attr_put(nlh, FOU_ATTR_PEER_V6, req->_present.peer_v6_len, req->peer_v6); - - rsp = calloc(1, sizeof(*rsp)); - yrs.yarg.data = rsp; - yrs.cb = fou_get_rsp_parse; - yrs.rsp_cmd = FOU_CMD_GET; - - err = ynl_exec(ys, nlh, &yrs); - if (err < 0) - goto err_free; - - return rsp; - -err_free: - fou_get_rsp_free(rsp); - return NULL; -} - -/* FOU_CMD_GET - dump */ -void fou_get_list_free(struct fou_get_list *rsp) -{ - struct fou_get_list *next = rsp; - - while ((void *)next != YNL_LIST_END) { - rsp = next; - next = rsp->next; - - free(rsp->obj.local_v6); - free(rsp->obj.peer_v6); - free(rsp); - } -} - -struct fou_get_list *fou_get_dump(struct ynl_sock *ys) -{ - struct ynl_dump_state yds = {}; - struct nlmsghdr *nlh; - int err; - - yds.ys = ys; - yds.alloc_sz = sizeof(struct fou_get_list); - yds.cb = fou_get_rsp_parse; - yds.rsp_cmd = FOU_CMD_GET; - yds.rsp_policy = &fou_nest; - - nlh = ynl_gemsg_start_dump(ys, ys->family_id, FOU_CMD_GET, 1); - - err = ynl_exec_dump(ys, nlh, &yds); - if (err < 0) - goto free_list; - - return yds.first; - -free_list: - fou_get_list_free(yds.first); - return NULL; -} - -const struct ynl_family ynl_fou_family = { - .name = "fou", -}; diff --git a/tools/net/ynl/generated/fou-user.h b/tools/net/ynl/generated/fou-user.h deleted file mode 100644 index fd566716ddd6..000000000000 --- a/tools/net/ynl/generated/fou-user.h +++ /dev/null @@ -1,343 +0,0 @@ -/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */ -/* Do not edit directly, auto-generated from: */ -/* Documentation/netlink/specs/fou.yaml */ -/* YNL-GEN user header */ - -#ifndef _LINUX_FOU_GEN_H -#define _LINUX_FOU_GEN_H - -#include -#include -#include -#include - -struct ynl_sock; - -extern const struct ynl_family ynl_fou_family; - -/* Enums */ -const char *fou_op_str(int op); -const char *fou_encap_type_str(int value); - -/* Common nested types */ -/* ============== FOU_CMD_ADD ============== */ -/* FOU_CMD_ADD - do */ -struct fou_add_req { - struct { - __u32 port:1; - __u32 ipproto:1; - __u32 type:1; - __u32 remcsum_nopartial:1; - __u32 local_v4:1; - __u32 peer_v4:1; - __u32 local_v6_len; - __u32 peer_v6_len; - __u32 peer_port:1; - __u32 ifindex:1; - } _present; - - __u16 port /* big-endian */; - __u8 ipproto; - __u8 type; - __u32 local_v4; - __u32 peer_v4; - void *local_v6; - void *peer_v6; - __u16 peer_port /* big-endian */; - __s32 ifindex; -}; - -static inline struct fou_add_req *fou_add_req_alloc(void) -{ - return calloc(1, sizeof(struct fou_add_req)); -} -void fou_add_req_free(struct fou_add_req *req); - -static inline void -fou_add_req_set_port(struct fou_add_req *req, __u16 port /* big-endian */) -{ - req->_present.port = 1; - req->port = port; -} -static inline void -fou_add_req_set_ipproto(struct fou_add_req *req, __u8 ipproto) -{ - req->_present.ipproto = 1; - req->ipproto = ipproto; -} -static inline void fou_add_req_set_type(struct fou_add_req *req, __u8 type) -{ - req->_present.type = 1; - req->type = type; -} -static inline void fou_add_req_set_remcsum_nopartial(struct fou_add_req *req) -{ - req->_present.remcsum_nopartial = 1; -} -static inline void -fou_add_req_set_local_v4(struct fou_add_req *req, __u32 local_v4) -{ - req->_present.local_v4 = 1; - req->local_v4 = local_v4; -} -static inline void -fou_add_req_set_peer_v4(struct fou_add_req *req, __u32 peer_v4) -{ - req->_present.peer_v4 = 1; - req->peer_v4 = peer_v4; -} -static inline void -fou_add_req_set_local_v6(struct fou_add_req *req, const void *local_v6, - size_t len) -{ - free(req->local_v6); - req->_present.local_v6_len = len; - req->local_v6 = malloc(req->_present.local_v6_len); - memcpy(req->local_v6, local_v6, req->_present.local_v6_len); -} -static inline void -fou_add_req_set_peer_v6(struct fou_add_req *req, const void *peer_v6, - size_t len) -{ - free(req->peer_v6); - req->_present.peer_v6_len = len; - req->peer_v6 = malloc(req->_present.peer_v6_len); - memcpy(req->peer_v6, peer_v6, req->_present.peer_v6_len); -} -static inline void -fou_add_req_set_peer_port(struct fou_add_req *req, - __u16 peer_port /* big-endian */) -{ - req->_present.peer_port = 1; - req->peer_port = peer_port; -} -static inline void -fou_add_req_set_ifindex(struct fou_add_req *req, __s32 ifindex) -{ - req->_present.ifindex = 1; - req->ifindex = ifindex; -} - -/* - * Add port. - */ -int fou_add(struct ynl_sock *ys, struct fou_add_req *req); - -/* ============== FOU_CMD_DEL ============== */ -/* FOU_CMD_DEL - do */ -struct fou_del_req { - struct { - __u32 af:1; - __u32 ifindex:1; - __u32 port:1; - __u32 peer_port:1; - __u32 local_v4:1; - __u32 peer_v4:1; - __u32 local_v6_len; - __u32 peer_v6_len; - } _present; - - __u8 af; - __s32 ifindex; - __u16 port /* big-endian */; - __u16 peer_port /* big-endian */; - __u32 local_v4; - __u32 peer_v4; - void *local_v6; - void *peer_v6; -}; - -static inline struct fou_del_req *fou_del_req_alloc(void) -{ - return calloc(1, sizeof(struct fou_del_req)); -} -void fou_del_req_free(struct fou_del_req *req); - -static inline void fou_del_req_set_af(struct fou_del_req *req, __u8 af) -{ - req->_present.af = 1; - req->af = af; -} -static inline void -fou_del_req_set_ifindex(struct fou_del_req *req, __s32 ifindex) -{ - req->_present.ifindex = 1; - req->ifindex = ifindex; -} -static inline void -fou_del_req_set_port(struct fou_del_req *req, __u16 port /* big-endian */) -{ - req->_present.port = 1; - req->port = port; -} -static inline void -fou_del_req_set_peer_port(struct fou_del_req *req, - __u16 peer_port /* big-endian */) -{ - req->_present.peer_port = 1; - req->peer_port = peer_port; -} -static inline void -fou_del_req_set_local_v4(struct fou_del_req *req, __u32 local_v4) -{ - req->_present.local_v4 = 1; - req->local_v4 = local_v4; -} -static inline void -fou_del_req_set_peer_v4(struct fou_del_req *req, __u32 peer_v4) -{ - req->_present.peer_v4 = 1; - req->peer_v4 = peer_v4; -} -static inline void -fou_del_req_set_local_v6(struct fou_del_req *req, const void *local_v6, - size_t len) -{ - free(req->local_v6); - req->_present.local_v6_len = len; - req->local_v6 = malloc(req->_present.local_v6_len); - memcpy(req->local_v6, local_v6, req->_present.local_v6_len); -} -static inline void -fou_del_req_set_peer_v6(struct fou_del_req *req, const void *peer_v6, - size_t len) -{ - free(req->peer_v6); - req->_present.peer_v6_len = len; - req->peer_v6 = malloc(req->_present.peer_v6_len); - memcpy(req->peer_v6, peer_v6, req->_present.peer_v6_len); -} - -/* - * Delete port. - */ -int fou_del(struct ynl_sock *ys, struct fou_del_req *req); - -/* ============== FOU_CMD_GET ============== */ -/* FOU_CMD_GET - do */ -struct fou_get_req { - struct { - __u32 af:1; - __u32 ifindex:1; - __u32 port:1; - __u32 peer_port:1; - __u32 local_v4:1; - __u32 peer_v4:1; - __u32 local_v6_len; - __u32 peer_v6_len; - } _present; - - __u8 af; - __s32 ifindex; - __u16 port /* big-endian */; - __u16 peer_port /* big-endian */; - __u32 local_v4; - __u32 peer_v4; - void *local_v6; - void *peer_v6; -}; - -static inline struct fou_get_req *fou_get_req_alloc(void) -{ - return calloc(1, sizeof(struct fou_get_req)); -} -void fou_get_req_free(struct fou_get_req *req); - -static inline void fou_get_req_set_af(struct fou_get_req *req, __u8 af) -{ - req->_present.af = 1; - req->af = af; -} -static inline void -fou_get_req_set_ifindex(struct fou_get_req *req, __s32 ifindex) -{ - req->_present.ifindex = 1; - req->ifindex = ifindex; -} -static inline void -fou_get_req_set_port(struct fou_get_req *req, __u16 port /* big-endian */) -{ - req->_present.port = 1; - req->port = port; -} -static inline void -fou_get_req_set_peer_port(struct fou_get_req *req, - __u16 peer_port /* big-endian */) -{ - req->_present.peer_port = 1; - req->peer_port = peer_port; -} -static inline void -fou_get_req_set_local_v4(struct fou_get_req *req, __u32 local_v4) -{ - req->_present.local_v4 = 1; - req->local_v4 = local_v4; -} -static inline void -fou_get_req_set_peer_v4(struct fou_get_req *req, __u32 peer_v4) -{ - req->_present.peer_v4 = 1; - req->peer_v4 = peer_v4; -} -static inline void -fou_get_req_set_local_v6(struct fou_get_req *req, const void *local_v6, - size_t len) -{ - free(req->local_v6); - req->_present.local_v6_len = len; - req->local_v6 = malloc(req->_present.local_v6_len); - memcpy(req->local_v6, local_v6, req->_present.local_v6_len); -} -static inline void -fou_get_req_set_peer_v6(struct fou_get_req *req, const void *peer_v6, - size_t len) -{ - free(req->peer_v6); - req->_present.peer_v6_len = len; - req->peer_v6 = malloc(req->_present.peer_v6_len); - memcpy(req->peer_v6, peer_v6, req->_present.peer_v6_len); -} - -struct fou_get_rsp { - struct { - __u32 port:1; - __u32 ipproto:1; - __u32 type:1; - __u32 remcsum_nopartial:1; - __u32 local_v4:1; - __u32 peer_v4:1; - __u32 local_v6_len; - __u32 peer_v6_len; - __u32 peer_port:1; - __u32 ifindex:1; - } _present; - - __u16 port /* big-endian */; - __u8 ipproto; - __u8 type; - __u32 local_v4; - __u32 peer_v4; - void *local_v6; - void *peer_v6; - __u16 peer_port /* big-endian */; - __s32 ifindex; -}; - -void fou_get_rsp_free(struct fou_get_rsp *rsp); - -/* - * Get tunnel info. - */ -struct fou_get_rsp *fou_get(struct ynl_sock *ys, struct fou_get_req *req); - -/* FOU_CMD_GET - dump */ -struct fou_get_list { - struct fou_get_list *next; - struct fou_get_rsp obj __attribute__((aligned(8))); -}; - -void fou_get_list_free(struct fou_get_list *rsp); - -struct fou_get_list *fou_get_dump(struct ynl_sock *ys); - -#endif /* _LINUX_FOU_GEN_H */ diff --git a/tools/net/ynl/generated/handshake-user.c b/tools/net/ynl/generated/handshake-user.c deleted file mode 100644 index 6901f8462cca..000000000000 --- a/tools/net/ynl/generated/handshake-user.c +++ /dev/null @@ -1,332 +0,0 @@ -// SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) -/* Do not edit directly, auto-generated from: */ -/* Documentation/netlink/specs/handshake.yaml */ -/* YNL-GEN user source */ - -#include -#include -#include "handshake-user.h" -#include "ynl.h" -#include - -#include -#include - -/* Enums */ -static const char * const handshake_op_strmap[] = { - [HANDSHAKE_CMD_READY] = "ready", - [HANDSHAKE_CMD_ACCEPT] = "accept", - [HANDSHAKE_CMD_DONE] = "done", -}; - -const char *handshake_op_str(int op) -{ - if (op < 0 || op >= (int)MNL_ARRAY_SIZE(handshake_op_strmap)) - return NULL; - return handshake_op_strmap[op]; -} - -static const char * const handshake_handler_class_strmap[] = { - [0] = "none", - [1] = "tlshd", - [2] = "max", -}; - -const char *handshake_handler_class_str(enum handshake_handler_class value) -{ - if (value < 0 || value >= (int)MNL_ARRAY_SIZE(handshake_handler_class_strmap)) - return NULL; - return handshake_handler_class_strmap[value]; -} - -static const char * const handshake_msg_type_strmap[] = { - [0] = "unspec", - [1] = "clienthello", - [2] = "serverhello", -}; - -const char *handshake_msg_type_str(enum handshake_msg_type value) -{ - if (value < 0 || value >= (int)MNL_ARRAY_SIZE(handshake_msg_type_strmap)) - return NULL; - return handshake_msg_type_strmap[value]; -} - -static const char * const handshake_auth_strmap[] = { - [0] = "unspec", - [1] = "unauth", - [2] = "psk", - [3] = "x509", -}; - -const char *handshake_auth_str(enum handshake_auth value) -{ - if (value < 0 || value >= (int)MNL_ARRAY_SIZE(handshake_auth_strmap)) - return NULL; - return handshake_auth_strmap[value]; -} - -/* Policies */ -struct ynl_policy_attr handshake_x509_policy[HANDSHAKE_A_X509_MAX + 1] = { - [HANDSHAKE_A_X509_CERT] = { .name = "cert", .type = YNL_PT_U32, }, - [HANDSHAKE_A_X509_PRIVKEY] = { .name = "privkey", .type = YNL_PT_U32, }, -}; - -struct ynl_policy_nest handshake_x509_nest = { - .max_attr = HANDSHAKE_A_X509_MAX, - .table = handshake_x509_policy, -}; - -struct ynl_policy_attr handshake_accept_policy[HANDSHAKE_A_ACCEPT_MAX + 1] = { - [HANDSHAKE_A_ACCEPT_SOCKFD] = { .name = "sockfd", .type = YNL_PT_U32, }, - [HANDSHAKE_A_ACCEPT_HANDLER_CLASS] = { .name = "handler-class", .type = YNL_PT_U32, }, - [HANDSHAKE_A_ACCEPT_MESSAGE_TYPE] = { .name = "message-type", .type = YNL_PT_U32, }, - [HANDSHAKE_A_ACCEPT_TIMEOUT] = { .name = "timeout", .type = YNL_PT_U32, }, - [HANDSHAKE_A_ACCEPT_AUTH_MODE] = { .name = "auth-mode", .type = YNL_PT_U32, }, - [HANDSHAKE_A_ACCEPT_PEER_IDENTITY] = { .name = "peer-identity", .type = YNL_PT_U32, }, - [HANDSHAKE_A_ACCEPT_CERTIFICATE] = { .name = "certificate", .type = YNL_PT_NEST, .nest = &handshake_x509_nest, }, - [HANDSHAKE_A_ACCEPT_PEERNAME] = { .name = "peername", .type = YNL_PT_NUL_STR, }, -}; - -struct ynl_policy_nest handshake_accept_nest = { - .max_attr = HANDSHAKE_A_ACCEPT_MAX, - .table = handshake_accept_policy, -}; - -struct ynl_policy_attr handshake_done_policy[HANDSHAKE_A_DONE_MAX + 1] = { - [HANDSHAKE_A_DONE_STATUS] = { .name = "status", .type = YNL_PT_U32, }, - [HANDSHAKE_A_DONE_SOCKFD] = { .name = "sockfd", .type = YNL_PT_U32, }, - [HANDSHAKE_A_DONE_REMOTE_AUTH] = { .name = "remote-auth", .type = YNL_PT_U32, }, -}; - -struct ynl_policy_nest handshake_done_nest = { - .max_attr = HANDSHAKE_A_DONE_MAX, - .table = handshake_done_policy, -}; - -/* Common nested types */ -void handshake_x509_free(struct handshake_x509 *obj) -{ -} - -int handshake_x509_parse(struct ynl_parse_arg *yarg, - const struct nlattr *nested) -{ - struct handshake_x509 *dst = yarg->data; - const struct nlattr *attr; - - mnl_attr_for_each_nested(attr, nested) { - unsigned int type = mnl_attr_get_type(attr); - - if (type == HANDSHAKE_A_X509_CERT) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.cert = 1; - dst->cert = mnl_attr_get_u32(attr); - } else if (type == HANDSHAKE_A_X509_PRIVKEY) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.privkey = 1; - dst->privkey = mnl_attr_get_u32(attr); - } - } - - return 0; -} - -/* ============== HANDSHAKE_CMD_ACCEPT ============== */ -/* HANDSHAKE_CMD_ACCEPT - do */ -void handshake_accept_req_free(struct handshake_accept_req *req) -{ - free(req); -} - -void handshake_accept_rsp_free(struct handshake_accept_rsp *rsp) -{ - unsigned int i; - - free(rsp->peer_identity); - for (i = 0; i < rsp->n_certificate; i++) - handshake_x509_free(&rsp->certificate[i]); - free(rsp->certificate); - free(rsp->peername); - free(rsp); -} - -int handshake_accept_rsp_parse(const struct nlmsghdr *nlh, void *data) -{ - struct ynl_parse_arg *yarg = data; - struct handshake_accept_rsp *dst; - unsigned int n_peer_identity = 0; - unsigned int n_certificate = 0; - const struct nlattr *attr; - struct ynl_parse_arg parg; - int i; - - dst = yarg->data; - parg.ys = yarg->ys; - - if (dst->certificate) - return ynl_error_parse(yarg, "attribute already present (accept.certificate)"); - if (dst->peer_identity) - return ynl_error_parse(yarg, "attribute already present (accept.peer-identity)"); - - mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) { - unsigned int type = mnl_attr_get_type(attr); - - if (type == HANDSHAKE_A_ACCEPT_SOCKFD) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.sockfd = 1; - dst->sockfd = mnl_attr_get_u32(attr); - } else if (type == HANDSHAKE_A_ACCEPT_MESSAGE_TYPE) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.message_type = 1; - dst->message_type = mnl_attr_get_u32(attr); - } else if (type == HANDSHAKE_A_ACCEPT_TIMEOUT) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.timeout = 1; - dst->timeout = mnl_attr_get_u32(attr); - } else if (type == HANDSHAKE_A_ACCEPT_AUTH_MODE) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.auth_mode = 1; - dst->auth_mode = mnl_attr_get_u32(attr); - } else if (type == HANDSHAKE_A_ACCEPT_PEER_IDENTITY) { - n_peer_identity++; - } else if (type == HANDSHAKE_A_ACCEPT_CERTIFICATE) { - n_certificate++; - } else if (type == HANDSHAKE_A_ACCEPT_PEERNAME) { - unsigned int len; - - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - - len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr)); - dst->_present.peername_len = len; - dst->peername = malloc(len + 1); - memcpy(dst->peername, mnl_attr_get_str(attr), len); - dst->peername[len] = 0; - } - } - - if (n_certificate) { - dst->certificate = calloc(n_certificate, sizeof(*dst->certificate)); - dst->n_certificate = n_certificate; - i = 0; - parg.rsp_policy = &handshake_x509_nest; - mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) { - if (mnl_attr_get_type(attr) == HANDSHAKE_A_ACCEPT_CERTIFICATE) { - parg.data = &dst->certificate[i]; - if (handshake_x509_parse(&parg, attr)) - return MNL_CB_ERROR; - i++; - } - } - } - if (n_peer_identity) { - dst->peer_identity = calloc(n_peer_identity, sizeof(*dst->peer_identity)); - dst->n_peer_identity = n_peer_identity; - i = 0; - mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) { - if (mnl_attr_get_type(attr) == HANDSHAKE_A_ACCEPT_PEER_IDENTITY) { - dst->peer_identity[i] = mnl_attr_get_u32(attr); - i++; - } - } - } - - return MNL_CB_OK; -} - -struct handshake_accept_rsp * -handshake_accept(struct ynl_sock *ys, struct handshake_accept_req *req) -{ - struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; - struct handshake_accept_rsp *rsp; - struct nlmsghdr *nlh; - int err; - - nlh = ynl_gemsg_start_req(ys, ys->family_id, HANDSHAKE_CMD_ACCEPT, 1); - ys->req_policy = &handshake_accept_nest; - yrs.yarg.rsp_policy = &handshake_accept_nest; - - if (req->_present.handler_class) - mnl_attr_put_u32(nlh, HANDSHAKE_A_ACCEPT_HANDLER_CLASS, req->handler_class); - - rsp = calloc(1, sizeof(*rsp)); - yrs.yarg.data = rsp; - yrs.cb = handshake_accept_rsp_parse; - yrs.rsp_cmd = HANDSHAKE_CMD_ACCEPT; - - err = ynl_exec(ys, nlh, &yrs); - if (err < 0) - goto err_free; - - return rsp; - -err_free: - handshake_accept_rsp_free(rsp); - return NULL; -} - -/* HANDSHAKE_CMD_ACCEPT - notify */ -void handshake_accept_ntf_free(struct handshake_accept_ntf *rsp) -{ - unsigned int i; - - free(rsp->obj.peer_identity); - for (i = 0; i < rsp->obj.n_certificate; i++) - handshake_x509_free(&rsp->obj.certificate[i]); - free(rsp->obj.certificate); - free(rsp->obj.peername); - free(rsp); -} - -/* ============== HANDSHAKE_CMD_DONE ============== */ -/* HANDSHAKE_CMD_DONE - do */ -void handshake_done_req_free(struct handshake_done_req *req) -{ - free(req->remote_auth); - free(req); -} - -int handshake_done(struct ynl_sock *ys, struct handshake_done_req *req) -{ - struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; - struct nlmsghdr *nlh; - int err; - - nlh = ynl_gemsg_start_req(ys, ys->family_id, HANDSHAKE_CMD_DONE, 1); - ys->req_policy = &handshake_done_nest; - - if (req->_present.status) - mnl_attr_put_u32(nlh, HANDSHAKE_A_DONE_STATUS, req->status); - if (req->_present.sockfd) - mnl_attr_put_u32(nlh, HANDSHAKE_A_DONE_SOCKFD, req->sockfd); - for (unsigned int i = 0; i < req->n_remote_auth; i++) - mnl_attr_put_u32(nlh, HANDSHAKE_A_DONE_REMOTE_AUTH, req->remote_auth[i]); - - err = ynl_exec(ys, nlh, &yrs); - if (err < 0) - return -1; - - return 0; -} - -static const struct ynl_ntf_info handshake_ntf_info[] = { - [HANDSHAKE_CMD_READY] = { - .alloc_sz = sizeof(struct handshake_accept_ntf), - .cb = handshake_accept_rsp_parse, - .policy = &handshake_accept_nest, - .free = (void *)handshake_accept_ntf_free, - }, -}; - -const struct ynl_family ynl_handshake_family = { - .name = "handshake", - .ntf_info = handshake_ntf_info, - .ntf_info_size = MNL_ARRAY_SIZE(handshake_ntf_info), -}; diff --git a/tools/net/ynl/generated/handshake-user.h b/tools/net/ynl/generated/handshake-user.h deleted file mode 100644 index bce537d8b8cc..000000000000 --- a/tools/net/ynl/generated/handshake-user.h +++ /dev/null @@ -1,145 +0,0 @@ -/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */ -/* Do not edit directly, auto-generated from: */ -/* Documentation/netlink/specs/handshake.yaml */ -/* YNL-GEN user header */ - -#ifndef _LINUX_HANDSHAKE_GEN_H -#define _LINUX_HANDSHAKE_GEN_H - -#include -#include -#include -#include - -struct ynl_sock; - -extern const struct ynl_family ynl_handshake_family; - -/* Enums */ -const char *handshake_op_str(int op); -const char *handshake_handler_class_str(enum handshake_handler_class value); -const char *handshake_msg_type_str(enum handshake_msg_type value); -const char *handshake_auth_str(enum handshake_auth value); - -/* Common nested types */ -struct handshake_x509 { - struct { - __u32 cert:1; - __u32 privkey:1; - } _present; - - __s32 cert; - __s32 privkey; -}; - -/* ============== HANDSHAKE_CMD_ACCEPT ============== */ -/* HANDSHAKE_CMD_ACCEPT - do */ -struct handshake_accept_req { - struct { - __u32 handler_class:1; - } _present; - - enum handshake_handler_class handler_class; -}; - -static inline struct handshake_accept_req *handshake_accept_req_alloc(void) -{ - return calloc(1, sizeof(struct handshake_accept_req)); -} -void handshake_accept_req_free(struct handshake_accept_req *req); - -static inline void -handshake_accept_req_set_handler_class(struct handshake_accept_req *req, - enum handshake_handler_class handler_class) -{ - req->_present.handler_class = 1; - req->handler_class = handler_class; -} - -struct handshake_accept_rsp { - struct { - __u32 sockfd:1; - __u32 message_type:1; - __u32 timeout:1; - __u32 auth_mode:1; - __u32 peername_len; - } _present; - - __s32 sockfd; - enum handshake_msg_type message_type; - __u32 timeout; - enum handshake_auth auth_mode; - unsigned int n_peer_identity; - __u32 *peer_identity; - unsigned int n_certificate; - struct handshake_x509 *certificate; - char *peername; -}; - -void handshake_accept_rsp_free(struct handshake_accept_rsp *rsp); - -/* - * Handler retrieves next queued handshake request - */ -struct handshake_accept_rsp * -handshake_accept(struct ynl_sock *ys, struct handshake_accept_req *req); - -/* HANDSHAKE_CMD_ACCEPT - notify */ -struct handshake_accept_ntf { - __u16 family; - __u8 cmd; - struct ynl_ntf_base_type *next; - void (*free)(struct handshake_accept_ntf *ntf); - struct handshake_accept_rsp obj __attribute__((aligned(8))); -}; - -void handshake_accept_ntf_free(struct handshake_accept_ntf *rsp); - -/* ============== HANDSHAKE_CMD_DONE ============== */ -/* HANDSHAKE_CMD_DONE - do */ -struct handshake_done_req { - struct { - __u32 status:1; - __u32 sockfd:1; - } _present; - - __u32 status; - __s32 sockfd; - unsigned int n_remote_auth; - __u32 *remote_auth; -}; - -static inline struct handshake_done_req *handshake_done_req_alloc(void) -{ - return calloc(1, sizeof(struct handshake_done_req)); -} -void handshake_done_req_free(struct handshake_done_req *req); - -static inline void -handshake_done_req_set_status(struct handshake_done_req *req, __u32 status) -{ - req->_present.status = 1; - req->status = status; -} -static inline void -handshake_done_req_set_sockfd(struct handshake_done_req *req, __s32 sockfd) -{ - req->_present.sockfd = 1; - req->sockfd = sockfd; -} -static inline void -__handshake_done_req_set_remote_auth(struct handshake_done_req *req, - __u32 *remote_auth, - unsigned int n_remote_auth) -{ - free(req->remote_auth); - req->remote_auth = remote_auth; - req->n_remote_auth = n_remote_auth; -} - -/* - * Handler reports handshake completion - */ -int handshake_done(struct ynl_sock *ys, struct handshake_done_req *req); - -#endif /* _LINUX_HANDSHAKE_GEN_H */ diff --git a/tools/net/ynl/generated/netdev-user.c b/tools/net/ynl/generated/netdev-user.c deleted file mode 100644 index ed8bcb855a1d..000000000000 --- a/tools/net/ynl/generated/netdev-user.c +++ /dev/null @@ -1,952 +0,0 @@ -// SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) -/* Do not edit directly, auto-generated from: */ -/* Documentation/netlink/specs/netdev.yaml */ -/* YNL-GEN user source */ - -#include -#include -#include "netdev-user.h" -#include "ynl.h" -#include - -#include -#include - -/* Enums */ -static const char * const netdev_op_strmap[] = { - [NETDEV_CMD_DEV_GET] = "dev-get", - [NETDEV_CMD_DEV_ADD_NTF] = "dev-add-ntf", - [NETDEV_CMD_DEV_DEL_NTF] = "dev-del-ntf", - [NETDEV_CMD_DEV_CHANGE_NTF] = "dev-change-ntf", - [NETDEV_CMD_PAGE_POOL_GET] = "page-pool-get", - [NETDEV_CMD_PAGE_POOL_ADD_NTF] = "page-pool-add-ntf", - [NETDEV_CMD_PAGE_POOL_DEL_NTF] = "page-pool-del-ntf", - [NETDEV_CMD_PAGE_POOL_CHANGE_NTF] = "page-pool-change-ntf", - [NETDEV_CMD_PAGE_POOL_STATS_GET] = "page-pool-stats-get", - [NETDEV_CMD_QUEUE_GET] = "queue-get", - [NETDEV_CMD_NAPI_GET] = "napi-get", -}; - -const char *netdev_op_str(int op) -{ - if (op < 0 || op >= (int)MNL_ARRAY_SIZE(netdev_op_strmap)) - return NULL; - return netdev_op_strmap[op]; -} - -static const char * const netdev_xdp_act_strmap[] = { - [0] = "basic", - [1] = "redirect", - [2] = "ndo-xmit", - [3] = "xsk-zerocopy", - [4] = "hw-offload", - [5] = "rx-sg", - [6] = "ndo-xmit-sg", -}; - -const char *netdev_xdp_act_str(enum netdev_xdp_act value) -{ - value = ffs(value) - 1; - if (value < 0 || value >= (int)MNL_ARRAY_SIZE(netdev_xdp_act_strmap)) - return NULL; - return netdev_xdp_act_strmap[value]; -} - -static const char * const netdev_xdp_rx_metadata_strmap[] = { - [0] = "timestamp", - [1] = "hash", -}; - -const char *netdev_xdp_rx_metadata_str(enum netdev_xdp_rx_metadata value) -{ - value = ffs(value) - 1; - if (value < 0 || value >= (int)MNL_ARRAY_SIZE(netdev_xdp_rx_metadata_strmap)) - return NULL; - return netdev_xdp_rx_metadata_strmap[value]; -} - -static const char * const netdev_xsk_flags_strmap[] = { - [0] = "tx-timestamp", - [1] = "tx-checksum", -}; - -const char *netdev_xsk_flags_str(enum netdev_xsk_flags value) -{ - value = ffs(value) - 1; - if (value < 0 || value >= (int)MNL_ARRAY_SIZE(netdev_xsk_flags_strmap)) - return NULL; - return netdev_xsk_flags_strmap[value]; -} - -static const char * const netdev_queue_type_strmap[] = { - [0] = "rx", - [1] = "tx", -}; - -const char *netdev_queue_type_str(enum netdev_queue_type value) -{ - if (value < 0 || value >= (int)MNL_ARRAY_SIZE(netdev_queue_type_strmap)) - return NULL; - return netdev_queue_type_strmap[value]; -} - -/* Policies */ -struct ynl_policy_attr netdev_page_pool_info_policy[NETDEV_A_PAGE_POOL_MAX + 1] = { - [NETDEV_A_PAGE_POOL_ID] = { .name = "id", .type = YNL_PT_UINT, }, - [NETDEV_A_PAGE_POOL_IFINDEX] = { .name = "ifindex", .type = YNL_PT_U32, }, -}; - -struct ynl_policy_nest netdev_page_pool_info_nest = { - .max_attr = NETDEV_A_PAGE_POOL_MAX, - .table = netdev_page_pool_info_policy, -}; - -struct ynl_policy_attr netdev_dev_policy[NETDEV_A_DEV_MAX + 1] = { - [NETDEV_A_DEV_IFINDEX] = { .name = "ifindex", .type = YNL_PT_U32, }, - [NETDEV_A_DEV_PAD] = { .name = "pad", .type = YNL_PT_IGNORE, }, - [NETDEV_A_DEV_XDP_FEATURES] = { .name = "xdp-features", .type = YNL_PT_U64, }, - [NETDEV_A_DEV_XDP_ZC_MAX_SEGS] = { .name = "xdp-zc-max-segs", .type = YNL_PT_U32, }, - [NETDEV_A_DEV_XDP_RX_METADATA_FEATURES] = { .name = "xdp-rx-metadata-features", .type = YNL_PT_U64, }, - [NETDEV_A_DEV_XSK_FEATURES] = { .name = "xsk-features", .type = YNL_PT_U64, }, -}; - -struct ynl_policy_nest netdev_dev_nest = { - .max_attr = NETDEV_A_DEV_MAX, - .table = netdev_dev_policy, -}; - -struct ynl_policy_attr netdev_page_pool_policy[NETDEV_A_PAGE_POOL_MAX + 1] = { - [NETDEV_A_PAGE_POOL_ID] = { .name = "id", .type = YNL_PT_UINT, }, - [NETDEV_A_PAGE_POOL_IFINDEX] = { .name = "ifindex", .type = YNL_PT_U32, }, - [NETDEV_A_PAGE_POOL_NAPI_ID] = { .name = "napi-id", .type = YNL_PT_UINT, }, - [NETDEV_A_PAGE_POOL_INFLIGHT] = { .name = "inflight", .type = YNL_PT_UINT, }, - [NETDEV_A_PAGE_POOL_INFLIGHT_MEM] = { .name = "inflight-mem", .type = YNL_PT_UINT, }, - [NETDEV_A_PAGE_POOL_DETACH_TIME] = { .name = "detach-time", .type = YNL_PT_UINT, }, -}; - -struct ynl_policy_nest netdev_page_pool_nest = { - .max_attr = NETDEV_A_PAGE_POOL_MAX, - .table = netdev_page_pool_policy, -}; - -struct ynl_policy_attr netdev_page_pool_stats_policy[NETDEV_A_PAGE_POOL_STATS_MAX + 1] = { - [NETDEV_A_PAGE_POOL_STATS_INFO] = { .name = "info", .type = YNL_PT_NEST, .nest = &netdev_page_pool_info_nest, }, - [NETDEV_A_PAGE_POOL_STATS_ALLOC_FAST] = { .name = "alloc-fast", .type = YNL_PT_UINT, }, - [NETDEV_A_PAGE_POOL_STATS_ALLOC_SLOW] = { .name = "alloc-slow", .type = YNL_PT_UINT, }, - [NETDEV_A_PAGE_POOL_STATS_ALLOC_SLOW_HIGH_ORDER] = { .name = "alloc-slow-high-order", .type = YNL_PT_UINT, }, - [NETDEV_A_PAGE_POOL_STATS_ALLOC_EMPTY] = { .name = "alloc-empty", .type = YNL_PT_UINT, }, - [NETDEV_A_PAGE_POOL_STATS_ALLOC_REFILL] = { .name = "alloc-refill", .type = YNL_PT_UINT, }, - [NETDEV_A_PAGE_POOL_STATS_ALLOC_WAIVE] = { .name = "alloc-waive", .type = YNL_PT_UINT, }, - [NETDEV_A_PAGE_POOL_STATS_RECYCLE_CACHED] = { .name = "recycle-cached", .type = YNL_PT_UINT, }, - [NETDEV_A_PAGE_POOL_STATS_RECYCLE_CACHE_FULL] = { .name = "recycle-cache-full", .type = YNL_PT_UINT, }, - [NETDEV_A_PAGE_POOL_STATS_RECYCLE_RING] = { .name = "recycle-ring", .type = YNL_PT_UINT, }, - [NETDEV_A_PAGE_POOL_STATS_RECYCLE_RING_FULL] = { .name = "recycle-ring-full", .type = YNL_PT_UINT, }, - [NETDEV_A_PAGE_POOL_STATS_RECYCLE_RELEASED_REFCNT] = { .name = "recycle-released-refcnt", .type = YNL_PT_UINT, }, -}; - -struct ynl_policy_nest netdev_page_pool_stats_nest = { - .max_attr = NETDEV_A_PAGE_POOL_STATS_MAX, - .table = netdev_page_pool_stats_policy, -}; - -struct ynl_policy_attr netdev_queue_policy[NETDEV_A_QUEUE_MAX + 1] = { - [NETDEV_A_QUEUE_ID] = { .name = "id", .type = YNL_PT_U32, }, - [NETDEV_A_QUEUE_IFINDEX] = { .name = "ifindex", .type = YNL_PT_U32, }, - [NETDEV_A_QUEUE_TYPE] = { .name = "type", .type = YNL_PT_U32, }, - [NETDEV_A_QUEUE_NAPI_ID] = { .name = "napi-id", .type = YNL_PT_U32, }, -}; - -struct ynl_policy_nest netdev_queue_nest = { - .max_attr = NETDEV_A_QUEUE_MAX, - .table = netdev_queue_policy, -}; - -struct ynl_policy_attr netdev_napi_policy[NETDEV_A_NAPI_MAX + 1] = { - [NETDEV_A_NAPI_IFINDEX] = { .name = "ifindex", .type = YNL_PT_U32, }, - [NETDEV_A_NAPI_ID] = { .name = "id", .type = YNL_PT_U32, }, - [NETDEV_A_NAPI_IRQ] = { .name = "irq", .type = YNL_PT_U32, }, - [NETDEV_A_NAPI_PID] = { .name = "pid", .type = YNL_PT_U32, }, -}; - -struct ynl_policy_nest netdev_napi_nest = { - .max_attr = NETDEV_A_NAPI_MAX, - .table = netdev_napi_policy, -}; - -/* Common nested types */ -void netdev_page_pool_info_free(struct netdev_page_pool_info *obj) -{ -} - -int netdev_page_pool_info_put(struct nlmsghdr *nlh, unsigned int attr_type, - struct netdev_page_pool_info *obj) -{ - struct nlattr *nest; - - nest = mnl_attr_nest_start(nlh, attr_type); - if (obj->_present.id) - mnl_attr_put_uint(nlh, NETDEV_A_PAGE_POOL_ID, obj->id); - if (obj->_present.ifindex) - mnl_attr_put_u32(nlh, NETDEV_A_PAGE_POOL_IFINDEX, obj->ifindex); - mnl_attr_nest_end(nlh, nest); - - return 0; -} - -int netdev_page_pool_info_parse(struct ynl_parse_arg *yarg, - const struct nlattr *nested) -{ - struct netdev_page_pool_info *dst = yarg->data; - const struct nlattr *attr; - - mnl_attr_for_each_nested(attr, nested) { - unsigned int type = mnl_attr_get_type(attr); - - if (type == NETDEV_A_PAGE_POOL_ID) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.id = 1; - dst->id = mnl_attr_get_uint(attr); - } else if (type == NETDEV_A_PAGE_POOL_IFINDEX) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.ifindex = 1; - dst->ifindex = mnl_attr_get_u32(attr); - } else if (type == NETDEV_A_NAPI_IRQ) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.irq = 1; - dst->irq = mnl_attr_get_u32(attr); - } else if (type == NETDEV_A_NAPI_PID) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.pid = 1; - dst->pid = mnl_attr_get_u32(attr); - } - } - - return 0; -} - -/* ============== NETDEV_CMD_DEV_GET ============== */ -/* NETDEV_CMD_DEV_GET - do */ -void netdev_dev_get_req_free(struct netdev_dev_get_req *req) -{ - free(req); -} - -void netdev_dev_get_rsp_free(struct netdev_dev_get_rsp *rsp) -{ - free(rsp); -} - -int netdev_dev_get_rsp_parse(const struct nlmsghdr *nlh, void *data) -{ - struct ynl_parse_arg *yarg = data; - struct netdev_dev_get_rsp *dst; - const struct nlattr *attr; - - dst = yarg->data; - - mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) { - unsigned int type = mnl_attr_get_type(attr); - - if (type == NETDEV_A_DEV_IFINDEX) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.ifindex = 1; - dst->ifindex = mnl_attr_get_u32(attr); - } else if (type == NETDEV_A_DEV_XDP_FEATURES) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.xdp_features = 1; - dst->xdp_features = mnl_attr_get_u64(attr); - } else if (type == NETDEV_A_DEV_XDP_ZC_MAX_SEGS) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.xdp_zc_max_segs = 1; - dst->xdp_zc_max_segs = mnl_attr_get_u32(attr); - } else if (type == NETDEV_A_DEV_XDP_RX_METADATA_FEATURES) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.xdp_rx_metadata_features = 1; - dst->xdp_rx_metadata_features = mnl_attr_get_u64(attr); - } else if (type == NETDEV_A_DEV_XSK_FEATURES) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.xsk_features = 1; - dst->xsk_features = mnl_attr_get_u64(attr); - } - } - - return MNL_CB_OK; -} - -struct netdev_dev_get_rsp * -netdev_dev_get(struct ynl_sock *ys, struct netdev_dev_get_req *req) -{ - struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; - struct netdev_dev_get_rsp *rsp; - struct nlmsghdr *nlh; - int err; - - nlh = ynl_gemsg_start_req(ys, ys->family_id, NETDEV_CMD_DEV_GET, 1); - ys->req_policy = &netdev_dev_nest; - yrs.yarg.rsp_policy = &netdev_dev_nest; - - if (req->_present.ifindex) - mnl_attr_put_u32(nlh, NETDEV_A_DEV_IFINDEX, req->ifindex); - - rsp = calloc(1, sizeof(*rsp)); - yrs.yarg.data = rsp; - yrs.cb = netdev_dev_get_rsp_parse; - yrs.rsp_cmd = NETDEV_CMD_DEV_GET; - - err = ynl_exec(ys, nlh, &yrs); - if (err < 0) - goto err_free; - - return rsp; - -err_free: - netdev_dev_get_rsp_free(rsp); - return NULL; -} - -/* NETDEV_CMD_DEV_GET - dump */ -void netdev_dev_get_list_free(struct netdev_dev_get_list *rsp) -{ - struct netdev_dev_get_list *next = rsp; - - while ((void *)next != YNL_LIST_END) { - rsp = next; - next = rsp->next; - - free(rsp); - } -} - -struct netdev_dev_get_list *netdev_dev_get_dump(struct ynl_sock *ys) -{ - struct ynl_dump_state yds = {}; - struct nlmsghdr *nlh; - int err; - - yds.ys = ys; - yds.alloc_sz = sizeof(struct netdev_dev_get_list); - yds.cb = netdev_dev_get_rsp_parse; - yds.rsp_cmd = NETDEV_CMD_DEV_GET; - yds.rsp_policy = &netdev_dev_nest; - - nlh = ynl_gemsg_start_dump(ys, ys->family_id, NETDEV_CMD_DEV_GET, 1); - - err = ynl_exec_dump(ys, nlh, &yds); - if (err < 0) - goto free_list; - - return yds.first; - -free_list: - netdev_dev_get_list_free(yds.first); - return NULL; -} - -/* NETDEV_CMD_DEV_GET - notify */ -void netdev_dev_get_ntf_free(struct netdev_dev_get_ntf *rsp) -{ - free(rsp); -} - -/* ============== NETDEV_CMD_PAGE_POOL_GET ============== */ -/* NETDEV_CMD_PAGE_POOL_GET - do */ -void netdev_page_pool_get_req_free(struct netdev_page_pool_get_req *req) -{ - free(req); -} - -void netdev_page_pool_get_rsp_free(struct netdev_page_pool_get_rsp *rsp) -{ - free(rsp); -} - -int netdev_page_pool_get_rsp_parse(const struct nlmsghdr *nlh, void *data) -{ - struct netdev_page_pool_get_rsp *dst; - struct ynl_parse_arg *yarg = data; - const struct nlattr *attr; - - dst = yarg->data; - - mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) { - unsigned int type = mnl_attr_get_type(attr); - - if (type == NETDEV_A_PAGE_POOL_ID) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.id = 1; - dst->id = mnl_attr_get_uint(attr); - } else if (type == NETDEV_A_PAGE_POOL_IFINDEX) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.ifindex = 1; - dst->ifindex = mnl_attr_get_u32(attr); - } else if (type == NETDEV_A_PAGE_POOL_NAPI_ID) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.napi_id = 1; - dst->napi_id = mnl_attr_get_uint(attr); - } else if (type == NETDEV_A_PAGE_POOL_INFLIGHT) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.inflight = 1; - dst->inflight = mnl_attr_get_uint(attr); - } else if (type == NETDEV_A_PAGE_POOL_INFLIGHT_MEM) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.inflight_mem = 1; - dst->inflight_mem = mnl_attr_get_uint(attr); - } else if (type == NETDEV_A_PAGE_POOL_DETACH_TIME) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.detach_time = 1; - dst->detach_time = mnl_attr_get_uint(attr); - } - } - - return MNL_CB_OK; -} - -struct netdev_page_pool_get_rsp * -netdev_page_pool_get(struct ynl_sock *ys, struct netdev_page_pool_get_req *req) -{ - struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; - struct netdev_page_pool_get_rsp *rsp; - struct nlmsghdr *nlh; - int err; - - nlh = ynl_gemsg_start_req(ys, ys->family_id, NETDEV_CMD_PAGE_POOL_GET, 1); - ys->req_policy = &netdev_page_pool_nest; - yrs.yarg.rsp_policy = &netdev_page_pool_nest; - - if (req->_present.id) - mnl_attr_put_uint(nlh, NETDEV_A_PAGE_POOL_ID, req->id); - - rsp = calloc(1, sizeof(*rsp)); - yrs.yarg.data = rsp; - yrs.cb = netdev_page_pool_get_rsp_parse; - yrs.rsp_cmd = NETDEV_CMD_PAGE_POOL_GET; - - err = ynl_exec(ys, nlh, &yrs); - if (err < 0) - goto err_free; - - return rsp; - -err_free: - netdev_page_pool_get_rsp_free(rsp); - return NULL; -} - -/* NETDEV_CMD_PAGE_POOL_GET - dump */ -void netdev_page_pool_get_list_free(struct netdev_page_pool_get_list *rsp) -{ - struct netdev_page_pool_get_list *next = rsp; - - while ((void *)next != YNL_LIST_END) { - rsp = next; - next = rsp->next; - - free(rsp); - } -} - -struct netdev_page_pool_get_list * -netdev_page_pool_get_dump(struct ynl_sock *ys) -{ - struct ynl_dump_state yds = {}; - struct nlmsghdr *nlh; - int err; - - yds.ys = ys; - yds.alloc_sz = sizeof(struct netdev_page_pool_get_list); - yds.cb = netdev_page_pool_get_rsp_parse; - yds.rsp_cmd = NETDEV_CMD_PAGE_POOL_GET; - yds.rsp_policy = &netdev_page_pool_nest; - - nlh = ynl_gemsg_start_dump(ys, ys->family_id, NETDEV_CMD_PAGE_POOL_GET, 1); - - err = ynl_exec_dump(ys, nlh, &yds); - if (err < 0) - goto free_list; - - return yds.first; - -free_list: - netdev_page_pool_get_list_free(yds.first); - return NULL; -} - -/* NETDEV_CMD_PAGE_POOL_GET - notify */ -void netdev_page_pool_get_ntf_free(struct netdev_page_pool_get_ntf *rsp) -{ - free(rsp); -} - -/* ============== NETDEV_CMD_PAGE_POOL_STATS_GET ============== */ -/* NETDEV_CMD_PAGE_POOL_STATS_GET - do */ -void -netdev_page_pool_stats_get_req_free(struct netdev_page_pool_stats_get_req *req) -{ - netdev_page_pool_info_free(&req->info); - free(req); -} - -void -netdev_page_pool_stats_get_rsp_free(struct netdev_page_pool_stats_get_rsp *rsp) -{ - netdev_page_pool_info_free(&rsp->info); - free(rsp); -} - -int netdev_page_pool_stats_get_rsp_parse(const struct nlmsghdr *nlh, - void *data) -{ - struct netdev_page_pool_stats_get_rsp *dst; - struct ynl_parse_arg *yarg = data; - const struct nlattr *attr; - struct ynl_parse_arg parg; - - dst = yarg->data; - parg.ys = yarg->ys; - - mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) { - unsigned int type = mnl_attr_get_type(attr); - - if (type == NETDEV_A_PAGE_POOL_STATS_INFO) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.info = 1; - - parg.rsp_policy = &netdev_page_pool_info_nest; - parg.data = &dst->info; - if (netdev_page_pool_info_parse(&parg, attr)) - return MNL_CB_ERROR; - } else if (type == NETDEV_A_PAGE_POOL_STATS_ALLOC_FAST) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.alloc_fast = 1; - dst->alloc_fast = mnl_attr_get_uint(attr); - } else if (type == NETDEV_A_PAGE_POOL_STATS_ALLOC_SLOW) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.alloc_slow = 1; - dst->alloc_slow = mnl_attr_get_uint(attr); - } else if (type == NETDEV_A_PAGE_POOL_STATS_ALLOC_SLOW_HIGH_ORDER) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.alloc_slow_high_order = 1; - dst->alloc_slow_high_order = mnl_attr_get_uint(attr); - } else if (type == NETDEV_A_PAGE_POOL_STATS_ALLOC_EMPTY) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.alloc_empty = 1; - dst->alloc_empty = mnl_attr_get_uint(attr); - } else if (type == NETDEV_A_PAGE_POOL_STATS_ALLOC_REFILL) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.alloc_refill = 1; - dst->alloc_refill = mnl_attr_get_uint(attr); - } else if (type == NETDEV_A_PAGE_POOL_STATS_ALLOC_WAIVE) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.alloc_waive = 1; - dst->alloc_waive = mnl_attr_get_uint(attr); - } else if (type == NETDEV_A_PAGE_POOL_STATS_RECYCLE_CACHED) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.recycle_cached = 1; - dst->recycle_cached = mnl_attr_get_uint(attr); - } else if (type == NETDEV_A_PAGE_POOL_STATS_RECYCLE_CACHE_FULL) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.recycle_cache_full = 1; - dst->recycle_cache_full = mnl_attr_get_uint(attr); - } else if (type == NETDEV_A_PAGE_POOL_STATS_RECYCLE_RING) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.recycle_ring = 1; - dst->recycle_ring = mnl_attr_get_uint(attr); - } else if (type == NETDEV_A_PAGE_POOL_STATS_RECYCLE_RING_FULL) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.recycle_ring_full = 1; - dst->recycle_ring_full = mnl_attr_get_uint(attr); - } else if (type == NETDEV_A_PAGE_POOL_STATS_RECYCLE_RELEASED_REFCNT) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.recycle_released_refcnt = 1; - dst->recycle_released_refcnt = mnl_attr_get_uint(attr); - } - } - - return MNL_CB_OK; -} - -struct netdev_page_pool_stats_get_rsp * -netdev_page_pool_stats_get(struct ynl_sock *ys, - struct netdev_page_pool_stats_get_req *req) -{ - struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; - struct netdev_page_pool_stats_get_rsp *rsp; - struct nlmsghdr *nlh; - int err; - - nlh = ynl_gemsg_start_req(ys, ys->family_id, NETDEV_CMD_PAGE_POOL_STATS_GET, 1); - ys->req_policy = &netdev_page_pool_stats_nest; - yrs.yarg.rsp_policy = &netdev_page_pool_stats_nest; - - if (req->_present.info) - netdev_page_pool_info_put(nlh, NETDEV_A_PAGE_POOL_STATS_INFO, &req->info); - - rsp = calloc(1, sizeof(*rsp)); - yrs.yarg.data = rsp; - yrs.cb = netdev_page_pool_stats_get_rsp_parse; - yrs.rsp_cmd = NETDEV_CMD_PAGE_POOL_STATS_GET; - - err = ynl_exec(ys, nlh, &yrs); - if (err < 0) - goto err_free; - - return rsp; - -err_free: - netdev_page_pool_stats_get_rsp_free(rsp); - return NULL; -} - -/* NETDEV_CMD_PAGE_POOL_STATS_GET - dump */ -void -netdev_page_pool_stats_get_list_free(struct netdev_page_pool_stats_get_list *rsp) -{ - struct netdev_page_pool_stats_get_list *next = rsp; - - while ((void *)next != YNL_LIST_END) { - rsp = next; - next = rsp->next; - - netdev_page_pool_info_free(&rsp->obj.info); - free(rsp); - } -} - -struct netdev_page_pool_stats_get_list * -netdev_page_pool_stats_get_dump(struct ynl_sock *ys) -{ - struct ynl_dump_state yds = {}; - struct nlmsghdr *nlh; - int err; - - yds.ys = ys; - yds.alloc_sz = sizeof(struct netdev_page_pool_stats_get_list); - yds.cb = netdev_page_pool_stats_get_rsp_parse; - yds.rsp_cmd = NETDEV_CMD_PAGE_POOL_STATS_GET; - yds.rsp_policy = &netdev_page_pool_stats_nest; - - nlh = ynl_gemsg_start_dump(ys, ys->family_id, NETDEV_CMD_PAGE_POOL_STATS_GET, 1); - - err = ynl_exec_dump(ys, nlh, &yds); - if (err < 0) - goto free_list; - - return yds.first; - -free_list: - netdev_page_pool_stats_get_list_free(yds.first); - return NULL; -} - -/* ============== NETDEV_CMD_QUEUE_GET ============== */ -/* NETDEV_CMD_QUEUE_GET - do */ -void netdev_queue_get_req_free(struct netdev_queue_get_req *req) -{ - free(req); -} - -void netdev_queue_get_rsp_free(struct netdev_queue_get_rsp *rsp) -{ - free(rsp); -} - -int netdev_queue_get_rsp_parse(const struct nlmsghdr *nlh, void *data) -{ - struct ynl_parse_arg *yarg = data; - struct netdev_queue_get_rsp *dst; - const struct nlattr *attr; - - dst = yarg->data; - - mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) { - unsigned int type = mnl_attr_get_type(attr); - - if (type == NETDEV_A_QUEUE_ID) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.id = 1; - dst->id = mnl_attr_get_u32(attr); - } else if (type == NETDEV_A_QUEUE_TYPE) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.type = 1; - dst->type = mnl_attr_get_u32(attr); - } else if (type == NETDEV_A_QUEUE_NAPI_ID) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.napi_id = 1; - dst->napi_id = mnl_attr_get_u32(attr); - } else if (type == NETDEV_A_QUEUE_IFINDEX) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.ifindex = 1; - dst->ifindex = mnl_attr_get_u32(attr); - } - } - - return MNL_CB_OK; -} - -struct netdev_queue_get_rsp * -netdev_queue_get(struct ynl_sock *ys, struct netdev_queue_get_req *req) -{ - struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; - struct netdev_queue_get_rsp *rsp; - struct nlmsghdr *nlh; - int err; - - nlh = ynl_gemsg_start_req(ys, ys->family_id, NETDEV_CMD_QUEUE_GET, 1); - ys->req_policy = &netdev_queue_nest; - yrs.yarg.rsp_policy = &netdev_queue_nest; - - if (req->_present.ifindex) - mnl_attr_put_u32(nlh, NETDEV_A_QUEUE_IFINDEX, req->ifindex); - if (req->_present.type) - mnl_attr_put_u32(nlh, NETDEV_A_QUEUE_TYPE, req->type); - if (req->_present.id) - mnl_attr_put_u32(nlh, NETDEV_A_QUEUE_ID, req->id); - - rsp = calloc(1, sizeof(*rsp)); - yrs.yarg.data = rsp; - yrs.cb = netdev_queue_get_rsp_parse; - yrs.rsp_cmd = NETDEV_CMD_QUEUE_GET; - - err = ynl_exec(ys, nlh, &yrs); - if (err < 0) - goto err_free; - - return rsp; - -err_free: - netdev_queue_get_rsp_free(rsp); - return NULL; -} - -/* NETDEV_CMD_QUEUE_GET - dump */ -void netdev_queue_get_list_free(struct netdev_queue_get_list *rsp) -{ - struct netdev_queue_get_list *next = rsp; - - while ((void *)next != YNL_LIST_END) { - rsp = next; - next = rsp->next; - - free(rsp); - } -} - -struct netdev_queue_get_list * -netdev_queue_get_dump(struct ynl_sock *ys, - struct netdev_queue_get_req_dump *req) -{ - struct ynl_dump_state yds = {}; - struct nlmsghdr *nlh; - int err; - - yds.ys = ys; - yds.alloc_sz = sizeof(struct netdev_queue_get_list); - yds.cb = netdev_queue_get_rsp_parse; - yds.rsp_cmd = NETDEV_CMD_QUEUE_GET; - yds.rsp_policy = &netdev_queue_nest; - - nlh = ynl_gemsg_start_dump(ys, ys->family_id, NETDEV_CMD_QUEUE_GET, 1); - ys->req_policy = &netdev_queue_nest; - - if (req->_present.ifindex) - mnl_attr_put_u32(nlh, NETDEV_A_QUEUE_IFINDEX, req->ifindex); - - err = ynl_exec_dump(ys, nlh, &yds); - if (err < 0) - goto free_list; - - return yds.first; - -free_list: - netdev_queue_get_list_free(yds.first); - return NULL; -} - -/* ============== NETDEV_CMD_NAPI_GET ============== */ -/* NETDEV_CMD_NAPI_GET - do */ -void netdev_napi_get_req_free(struct netdev_napi_get_req *req) -{ - free(req); -} - -void netdev_napi_get_rsp_free(struct netdev_napi_get_rsp *rsp) -{ - free(rsp); -} - -int netdev_napi_get_rsp_parse(const struct nlmsghdr *nlh, void *data) -{ - struct ynl_parse_arg *yarg = data; - struct netdev_napi_get_rsp *dst; - const struct nlattr *attr; - - dst = yarg->data; - - mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) { - unsigned int type = mnl_attr_get_type(attr); - - if (type == NETDEV_A_NAPI_ID) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.id = 1; - dst->id = mnl_attr_get_u32(attr); - } else if (type == NETDEV_A_NAPI_IFINDEX) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.ifindex = 1; - dst->ifindex = mnl_attr_get_u32(attr); - } - } - - return MNL_CB_OK; -} - -struct netdev_napi_get_rsp * -netdev_napi_get(struct ynl_sock *ys, struct netdev_napi_get_req *req) -{ - struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; - struct netdev_napi_get_rsp *rsp; - struct nlmsghdr *nlh; - int err; - - nlh = ynl_gemsg_start_req(ys, ys->family_id, NETDEV_CMD_NAPI_GET, 1); - ys->req_policy = &netdev_napi_nest; - yrs.yarg.rsp_policy = &netdev_napi_nest; - - if (req->_present.id) - mnl_attr_put_u32(nlh, NETDEV_A_NAPI_ID, req->id); - - rsp = calloc(1, sizeof(*rsp)); - yrs.yarg.data = rsp; - yrs.cb = netdev_napi_get_rsp_parse; - yrs.rsp_cmd = NETDEV_CMD_NAPI_GET; - - err = ynl_exec(ys, nlh, &yrs); - if (err < 0) - goto err_free; - - return rsp; - -err_free: - netdev_napi_get_rsp_free(rsp); - return NULL; -} - -/* NETDEV_CMD_NAPI_GET - dump */ -void netdev_napi_get_list_free(struct netdev_napi_get_list *rsp) -{ - struct netdev_napi_get_list *next = rsp; - - while ((void *)next != YNL_LIST_END) { - rsp = next; - next = rsp->next; - - free(rsp); - } -} - -struct netdev_napi_get_list * -netdev_napi_get_dump(struct ynl_sock *ys, struct netdev_napi_get_req_dump *req) -{ - struct ynl_dump_state yds = {}; - struct nlmsghdr *nlh; - int err; - - yds.ys = ys; - yds.alloc_sz = sizeof(struct netdev_napi_get_list); - yds.cb = netdev_napi_get_rsp_parse; - yds.rsp_cmd = NETDEV_CMD_NAPI_GET; - yds.rsp_policy = &netdev_napi_nest; - - nlh = ynl_gemsg_start_dump(ys, ys->family_id, NETDEV_CMD_NAPI_GET, 1); - ys->req_policy = &netdev_napi_nest; - - if (req->_present.ifindex) - mnl_attr_put_u32(nlh, NETDEV_A_NAPI_IFINDEX, req->ifindex); - - err = ynl_exec_dump(ys, nlh, &yds); - if (err < 0) - goto free_list; - - return yds.first; - -free_list: - netdev_napi_get_list_free(yds.first); - return NULL; -} - -static const struct ynl_ntf_info netdev_ntf_info[] = { - [NETDEV_CMD_DEV_ADD_NTF] = { - .alloc_sz = sizeof(struct netdev_dev_get_ntf), - .cb = netdev_dev_get_rsp_parse, - .policy = &netdev_dev_nest, - .free = (void *)netdev_dev_get_ntf_free, - }, - [NETDEV_CMD_DEV_DEL_NTF] = { - .alloc_sz = sizeof(struct netdev_dev_get_ntf), - .cb = netdev_dev_get_rsp_parse, - .policy = &netdev_dev_nest, - .free = (void *)netdev_dev_get_ntf_free, - }, - [NETDEV_CMD_DEV_CHANGE_NTF] = { - .alloc_sz = sizeof(struct netdev_dev_get_ntf), - .cb = netdev_dev_get_rsp_parse, - .policy = &netdev_dev_nest, - .free = (void *)netdev_dev_get_ntf_free, - }, - [NETDEV_CMD_PAGE_POOL_ADD_NTF] = { - .alloc_sz = sizeof(struct netdev_page_pool_get_ntf), - .cb = netdev_page_pool_get_rsp_parse, - .policy = &netdev_page_pool_nest, - .free = (void *)netdev_page_pool_get_ntf_free, - }, - [NETDEV_CMD_PAGE_POOL_DEL_NTF] = { - .alloc_sz = sizeof(struct netdev_page_pool_get_ntf), - .cb = netdev_page_pool_get_rsp_parse, - .policy = &netdev_page_pool_nest, - .free = (void *)netdev_page_pool_get_ntf_free, - }, - [NETDEV_CMD_PAGE_POOL_CHANGE_NTF] = { - .alloc_sz = sizeof(struct netdev_page_pool_get_ntf), - .cb = netdev_page_pool_get_rsp_parse, - .policy = &netdev_page_pool_nest, - .free = (void *)netdev_page_pool_get_ntf_free, - }, -}; - -const struct ynl_family ynl_netdev_family = { - .name = "netdev", - .ntf_info = netdev_ntf_info, - .ntf_info_size = MNL_ARRAY_SIZE(netdev_ntf_info), -}; diff --git a/tools/net/ynl/generated/netdev-user.h b/tools/net/ynl/generated/netdev-user.h deleted file mode 100644 index 3830cf2ab6b8..000000000000 --- a/tools/net/ynl/generated/netdev-user.h +++ /dev/null @@ -1,442 +0,0 @@ -/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */ -/* Do not edit directly, auto-generated from: */ -/* Documentation/netlink/specs/netdev.yaml */ -/* YNL-GEN user header */ - -#ifndef _LINUX_NETDEV_GEN_H -#define _LINUX_NETDEV_GEN_H - -#include -#include -#include -#include - -struct ynl_sock; - -extern const struct ynl_family ynl_netdev_family; - -/* Enums */ -const char *netdev_op_str(int op); -const char *netdev_xdp_act_str(enum netdev_xdp_act value); -const char *netdev_xdp_rx_metadata_str(enum netdev_xdp_rx_metadata value); -const char *netdev_xsk_flags_str(enum netdev_xsk_flags value); -const char *netdev_queue_type_str(enum netdev_queue_type value); - -/* Common nested types */ -struct netdev_page_pool_info { - struct { - __u32 id:1; - __u32 ifindex:1; - } _present; - - __u64 id; - __u32 ifindex; -}; - -/* ============== NETDEV_CMD_DEV_GET ============== */ -/* NETDEV_CMD_DEV_GET - do */ -struct netdev_dev_get_req { - struct { - __u32 ifindex:1; - } _present; - - __u32 ifindex; -}; - -static inline struct netdev_dev_get_req *netdev_dev_get_req_alloc(void) -{ - return calloc(1, sizeof(struct netdev_dev_get_req)); -} -void netdev_dev_get_req_free(struct netdev_dev_get_req *req); - -static inline void -netdev_dev_get_req_set_ifindex(struct netdev_dev_get_req *req, __u32 ifindex) -{ - req->_present.ifindex = 1; - req->ifindex = ifindex; -} - -struct netdev_dev_get_rsp { - struct { - __u32 ifindex:1; - __u32 xdp_features:1; - __u32 xdp_zc_max_segs:1; - __u32 xdp_rx_metadata_features:1; - __u32 xsk_features:1; - } _present; - - __u32 ifindex; - __u64 xdp_features; - __u32 xdp_zc_max_segs; - __u64 xdp_rx_metadata_features; - __u64 xsk_features; -}; - -void netdev_dev_get_rsp_free(struct netdev_dev_get_rsp *rsp); - -/* - * Get / dump information about a netdev. - */ -struct netdev_dev_get_rsp * -netdev_dev_get(struct ynl_sock *ys, struct netdev_dev_get_req *req); - -/* NETDEV_CMD_DEV_GET - dump */ -struct netdev_dev_get_list { - struct netdev_dev_get_list *next; - struct netdev_dev_get_rsp obj __attribute__((aligned(8))); -}; - -void netdev_dev_get_list_free(struct netdev_dev_get_list *rsp); - -struct netdev_dev_get_list *netdev_dev_get_dump(struct ynl_sock *ys); - -/* NETDEV_CMD_DEV_GET - notify */ -struct netdev_dev_get_ntf { - __u16 family; - __u8 cmd; - struct ynl_ntf_base_type *next; - void (*free)(struct netdev_dev_get_ntf *ntf); - struct netdev_dev_get_rsp obj __attribute__((aligned(8))); -}; - -void netdev_dev_get_ntf_free(struct netdev_dev_get_ntf *rsp); - -/* ============== NETDEV_CMD_PAGE_POOL_GET ============== */ -/* NETDEV_CMD_PAGE_POOL_GET - do */ -struct netdev_page_pool_get_req { - struct { - __u32 id:1; - } _present; - - __u64 id; -}; - -static inline struct netdev_page_pool_get_req * -netdev_page_pool_get_req_alloc(void) -{ - return calloc(1, sizeof(struct netdev_page_pool_get_req)); -} -void netdev_page_pool_get_req_free(struct netdev_page_pool_get_req *req); - -static inline void -netdev_page_pool_get_req_set_id(struct netdev_page_pool_get_req *req, __u64 id) -{ - req->_present.id = 1; - req->id = id; -} - -struct netdev_page_pool_get_rsp { - struct { - __u32 id:1; - __u32 ifindex:1; - __u32 napi_id:1; - __u32 inflight:1; - __u32 inflight_mem:1; - __u32 detach_time:1; - } _present; - - __u64 id; - __u32 ifindex; - __u64 napi_id; - __u64 inflight; - __u64 inflight_mem; - __u64 detach_time; -}; - -void netdev_page_pool_get_rsp_free(struct netdev_page_pool_get_rsp *rsp); - -/* - * Get / dump information about Page Pools. -(Only Page Pools associated with a net_device can be listed.) - - */ -struct netdev_page_pool_get_rsp * -netdev_page_pool_get(struct ynl_sock *ys, struct netdev_page_pool_get_req *req); - -/* NETDEV_CMD_PAGE_POOL_GET - dump */ -struct netdev_page_pool_get_list { - struct netdev_page_pool_get_list *next; - struct netdev_page_pool_get_rsp obj __attribute__((aligned(8))); -}; - -void netdev_page_pool_get_list_free(struct netdev_page_pool_get_list *rsp); - -struct netdev_page_pool_get_list * -netdev_page_pool_get_dump(struct ynl_sock *ys); - -/* NETDEV_CMD_PAGE_POOL_GET - notify */ -struct netdev_page_pool_get_ntf { - __u16 family; - __u8 cmd; - struct ynl_ntf_base_type *next; - void (*free)(struct netdev_page_pool_get_ntf *ntf); - struct netdev_page_pool_get_rsp obj __attribute__((aligned(8))); -}; - -void netdev_page_pool_get_ntf_free(struct netdev_page_pool_get_ntf *rsp); - -/* ============== NETDEV_CMD_PAGE_POOL_STATS_GET ============== */ -/* NETDEV_CMD_PAGE_POOL_STATS_GET - do */ -struct netdev_page_pool_stats_get_req { - struct { - __u32 info:1; - } _present; - - struct netdev_page_pool_info info; -}; - -static inline struct netdev_page_pool_stats_get_req * -netdev_page_pool_stats_get_req_alloc(void) -{ - return calloc(1, sizeof(struct netdev_page_pool_stats_get_req)); -} -void -netdev_page_pool_stats_get_req_free(struct netdev_page_pool_stats_get_req *req); - -static inline void -netdev_page_pool_stats_get_req_set_info_id(struct netdev_page_pool_stats_get_req *req, - __u64 id) -{ - req->_present.info = 1; - req->info._present.id = 1; - req->info.id = id; -} -static inline void -netdev_page_pool_stats_get_req_set_info_ifindex(struct netdev_page_pool_stats_get_req *req, - __u32 ifindex) -{ - req->_present.info = 1; - req->info._present.ifindex = 1; - req->info.ifindex = ifindex; -} - -struct netdev_page_pool_stats_get_rsp { - struct { - __u32 info:1; - __u32 alloc_fast:1; - __u32 alloc_slow:1; - __u32 alloc_slow_high_order:1; - __u32 alloc_empty:1; - __u32 alloc_refill:1; - __u32 alloc_waive:1; - __u32 recycle_cached:1; - __u32 recycle_cache_full:1; - __u32 recycle_ring:1; - __u32 recycle_ring_full:1; - __u32 recycle_released_refcnt:1; - } _present; - - struct netdev_page_pool_info info; - __u64 alloc_fast; - __u64 alloc_slow; - __u64 alloc_slow_high_order; - __u64 alloc_empty; - __u64 alloc_refill; - __u64 alloc_waive; - __u64 recycle_cached; - __u64 recycle_cache_full; - __u64 recycle_ring; - __u64 recycle_ring_full; - __u64 recycle_released_refcnt; -}; - -void -netdev_page_pool_stats_get_rsp_free(struct netdev_page_pool_stats_get_rsp *rsp); - -/* - * Get page pool statistics. - */ -struct netdev_page_pool_stats_get_rsp * -netdev_page_pool_stats_get(struct ynl_sock *ys, - struct netdev_page_pool_stats_get_req *req); - -/* NETDEV_CMD_PAGE_POOL_STATS_GET - dump */ -struct netdev_page_pool_stats_get_list { - struct netdev_page_pool_stats_get_list *next; - struct netdev_page_pool_stats_get_rsp obj __attribute__((aligned(8))); -}; - -void -netdev_page_pool_stats_get_list_free(struct netdev_page_pool_stats_get_list *rsp); - -struct netdev_page_pool_stats_get_list * -netdev_page_pool_stats_get_dump(struct ynl_sock *ys); - -/* ============== NETDEV_CMD_QUEUE_GET ============== */ -/* NETDEV_CMD_QUEUE_GET - do */ -struct netdev_queue_get_req { - struct { - __u32 ifindex:1; - __u32 type:1; - __u32 id:1; - } _present; - - __u32 ifindex; - enum netdev_queue_type type; - __u32 id; -}; - -static inline struct netdev_queue_get_req *netdev_queue_get_req_alloc(void) -{ - return calloc(1, sizeof(struct netdev_queue_get_req)); -} -void netdev_queue_get_req_free(struct netdev_queue_get_req *req); - -static inline void -netdev_queue_get_req_set_ifindex(struct netdev_queue_get_req *req, - __u32 ifindex) -{ - req->_present.ifindex = 1; - req->ifindex = ifindex; -} -static inline void -netdev_queue_get_req_set_type(struct netdev_queue_get_req *req, - enum netdev_queue_type type) -{ - req->_present.type = 1; - req->type = type; -} -static inline void -netdev_queue_get_req_set_id(struct netdev_queue_get_req *req, __u32 id) -{ - req->_present.id = 1; - req->id = id; -} - -struct netdev_queue_get_rsp { - struct { - __u32 id:1; - __u32 type:1; - __u32 napi_id:1; - __u32 ifindex:1; - } _present; - - __u32 id; - enum netdev_queue_type type; - __u32 napi_id; - __u32 ifindex; -}; - -void netdev_queue_get_rsp_free(struct netdev_queue_get_rsp *rsp); - -/* - * Get queue information from the kernel. Only configured queues will be reported (as opposed to all available hardware queues). - */ -struct netdev_queue_get_rsp * -netdev_queue_get(struct ynl_sock *ys, struct netdev_queue_get_req *req); - -/* NETDEV_CMD_QUEUE_GET - dump */ -struct netdev_queue_get_req_dump { - struct { - __u32 ifindex:1; - } _present; - - __u32 ifindex; -}; - -static inline struct netdev_queue_get_req_dump * -netdev_queue_get_req_dump_alloc(void) -{ - return calloc(1, sizeof(struct netdev_queue_get_req_dump)); -} -void netdev_queue_get_req_dump_free(struct netdev_queue_get_req_dump *req); - -static inline void -netdev_queue_get_req_dump_set_ifindex(struct netdev_queue_get_req_dump *req, - __u32 ifindex) -{ - req->_present.ifindex = 1; - req->ifindex = ifindex; -} - -struct netdev_queue_get_list { - struct netdev_queue_get_list *next; - struct netdev_queue_get_rsp obj __attribute__((aligned(8))); -}; - -void netdev_queue_get_list_free(struct netdev_queue_get_list *rsp); - -struct netdev_queue_get_list * -netdev_queue_get_dump(struct ynl_sock *ys, - struct netdev_queue_get_req_dump *req); - -/* ============== NETDEV_CMD_NAPI_GET ============== */ -/* NETDEV_CMD_NAPI_GET - do */ -struct netdev_napi_get_req { - struct { - __u32 id:1; - } _present; - - __u32 id; -}; - -static inline struct netdev_napi_get_req *netdev_napi_get_req_alloc(void) -{ - return calloc(1, sizeof(struct netdev_napi_get_req)); -} -void netdev_napi_get_req_free(struct netdev_napi_get_req *req); - -static inline void -netdev_napi_get_req_set_id(struct netdev_napi_get_req *req, __u32 id) -{ - req->_present.id = 1; - req->id = id; -} - -struct netdev_napi_get_rsp { - struct { - __u32 id:1; - __u32 ifindex:1; - __u32 irq:1; - __u32 pid:1; - } _present; - - __u32 id; - __u32 ifindex; - __u32 irq; - __u32 pid; -}; - -void netdev_napi_get_rsp_free(struct netdev_napi_get_rsp *rsp); - -/* - * Get information about NAPI instances configured on the system. - */ -struct netdev_napi_get_rsp * -netdev_napi_get(struct ynl_sock *ys, struct netdev_napi_get_req *req); - -/* NETDEV_CMD_NAPI_GET - dump */ -struct netdev_napi_get_req_dump { - struct { - __u32 ifindex:1; - } _present; - - __u32 ifindex; -}; - -static inline struct netdev_napi_get_req_dump * -netdev_napi_get_req_dump_alloc(void) -{ - return calloc(1, sizeof(struct netdev_napi_get_req_dump)); -} -void netdev_napi_get_req_dump_free(struct netdev_napi_get_req_dump *req); - -static inline void -netdev_napi_get_req_dump_set_ifindex(struct netdev_napi_get_req_dump *req, - __u32 ifindex) -{ - req->_present.ifindex = 1; - req->ifindex = ifindex; -} - -struct netdev_napi_get_list { - struct netdev_napi_get_list *next; - struct netdev_napi_get_rsp obj __attribute__((aligned(8))); -}; - -void netdev_napi_get_list_free(struct netdev_napi_get_list *rsp); - -struct netdev_napi_get_list * -netdev_napi_get_dump(struct ynl_sock *ys, struct netdev_napi_get_req_dump *req); - -#endif /* _LINUX_NETDEV_GEN_H */ diff --git a/tools/net/ynl/generated/nfsd-user.c b/tools/net/ynl/generated/nfsd-user.c deleted file mode 100644 index 360b6448c6e9..000000000000 --- a/tools/net/ynl/generated/nfsd-user.c +++ /dev/null @@ -1,203 +0,0 @@ -// SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) -/* Do not edit directly, auto-generated from: */ -/* Documentation/netlink/specs/nfsd.yaml */ -/* YNL-GEN user source */ - -#include -#include -#include "nfsd-user.h" -#include "ynl.h" -#include - -#include -#include - -/* Enums */ -static const char * const nfsd_op_strmap[] = { - [NFSD_CMD_RPC_STATUS_GET] = "rpc-status-get", -}; - -const char *nfsd_op_str(int op) -{ - if (op < 0 || op >= (int)MNL_ARRAY_SIZE(nfsd_op_strmap)) - return NULL; - return nfsd_op_strmap[op]; -} - -/* Policies */ -struct ynl_policy_attr nfsd_rpc_status_policy[NFSD_A_RPC_STATUS_MAX + 1] = { - [NFSD_A_RPC_STATUS_XID] = { .name = "xid", .type = YNL_PT_U32, }, - [NFSD_A_RPC_STATUS_FLAGS] = { .name = "flags", .type = YNL_PT_U32, }, - [NFSD_A_RPC_STATUS_PROG] = { .name = "prog", .type = YNL_PT_U32, }, - [NFSD_A_RPC_STATUS_VERSION] = { .name = "version", .type = YNL_PT_U8, }, - [NFSD_A_RPC_STATUS_PROC] = { .name = "proc", .type = YNL_PT_U32, }, - [NFSD_A_RPC_STATUS_SERVICE_TIME] = { .name = "service_time", .type = YNL_PT_U64, }, - [NFSD_A_RPC_STATUS_PAD] = { .name = "pad", .type = YNL_PT_IGNORE, }, - [NFSD_A_RPC_STATUS_SADDR4] = { .name = "saddr4", .type = YNL_PT_U32, }, - [NFSD_A_RPC_STATUS_DADDR4] = { .name = "daddr4", .type = YNL_PT_U32, }, - [NFSD_A_RPC_STATUS_SADDR6] = { .name = "saddr6", .type = YNL_PT_BINARY,}, - [NFSD_A_RPC_STATUS_DADDR6] = { .name = "daddr6", .type = YNL_PT_BINARY,}, - [NFSD_A_RPC_STATUS_SPORT] = { .name = "sport", .type = YNL_PT_U16, }, - [NFSD_A_RPC_STATUS_DPORT] = { .name = "dport", .type = YNL_PT_U16, }, - [NFSD_A_RPC_STATUS_COMPOUND_OPS] = { .name = "compound-ops", .type = YNL_PT_U32, }, -}; - -struct ynl_policy_nest nfsd_rpc_status_nest = { - .max_attr = NFSD_A_RPC_STATUS_MAX, - .table = nfsd_rpc_status_policy, -}; - -/* Common nested types */ -/* ============== NFSD_CMD_RPC_STATUS_GET ============== */ -/* NFSD_CMD_RPC_STATUS_GET - dump */ -int nfsd_rpc_status_get_rsp_dump_parse(const struct nlmsghdr *nlh, void *data) -{ - struct nfsd_rpc_status_get_rsp_dump *dst; - struct ynl_parse_arg *yarg = data; - unsigned int n_compound_ops = 0; - const struct nlattr *attr; - int i; - - dst = yarg->data; - - if (dst->compound_ops) - return ynl_error_parse(yarg, "attribute already present (rpc-status.compound-ops)"); - - mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) { - unsigned int type = mnl_attr_get_type(attr); - - if (type == NFSD_A_RPC_STATUS_XID) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.xid = 1; - dst->xid = mnl_attr_get_u32(attr); - } else if (type == NFSD_A_RPC_STATUS_FLAGS) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.flags = 1; - dst->flags = mnl_attr_get_u32(attr); - } else if (type == NFSD_A_RPC_STATUS_PROG) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.prog = 1; - dst->prog = mnl_attr_get_u32(attr); - } else if (type == NFSD_A_RPC_STATUS_VERSION) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.version = 1; - dst->version = mnl_attr_get_u8(attr); - } else if (type == NFSD_A_RPC_STATUS_PROC) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.proc = 1; - dst->proc = mnl_attr_get_u32(attr); - } else if (type == NFSD_A_RPC_STATUS_SERVICE_TIME) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.service_time = 1; - dst->service_time = mnl_attr_get_u64(attr); - } else if (type == NFSD_A_RPC_STATUS_SADDR4) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.saddr4 = 1; - dst->saddr4 = mnl_attr_get_u32(attr); - } else if (type == NFSD_A_RPC_STATUS_DADDR4) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.daddr4 = 1; - dst->daddr4 = mnl_attr_get_u32(attr); - } else if (type == NFSD_A_RPC_STATUS_SADDR6) { - unsigned int len; - - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - - len = mnl_attr_get_payload_len(attr); - dst->_present.saddr6_len = len; - dst->saddr6 = malloc(len); - memcpy(dst->saddr6, mnl_attr_get_payload(attr), len); - } else if (type == NFSD_A_RPC_STATUS_DADDR6) { - unsigned int len; - - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - - len = mnl_attr_get_payload_len(attr); - dst->_present.daddr6_len = len; - dst->daddr6 = malloc(len); - memcpy(dst->daddr6, mnl_attr_get_payload(attr), len); - } else if (type == NFSD_A_RPC_STATUS_SPORT) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.sport = 1; - dst->sport = mnl_attr_get_u16(attr); - } else if (type == NFSD_A_RPC_STATUS_DPORT) { - if (ynl_attr_validate(yarg, attr)) - return MNL_CB_ERROR; - dst->_present.dport = 1; - dst->dport = mnl_attr_get_u16(attr); - } else if (type == NFSD_A_RPC_STATUS_COMPOUND_OPS) { - n_compound_ops++; - } - } - - if (n_compound_ops) { - dst->compound_ops = calloc(n_compound_ops, sizeof(*dst->compound_ops)); - dst->n_compound_ops = n_compound_ops; - i = 0; - mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) { - if (mnl_attr_get_type(attr) == NFSD_A_RPC_STATUS_COMPOUND_OPS) { - dst->compound_ops[i] = mnl_attr_get_u32(attr); - i++; - } - } - } - - return MNL_CB_OK; -} - -void -nfsd_rpc_status_get_rsp_list_free(struct nfsd_rpc_status_get_rsp_list *rsp) -{ - struct nfsd_rpc_status_get_rsp_list *next = rsp; - - while ((void *)next != YNL_LIST_END) { - rsp = next; - next = rsp->next; - - free(rsp->obj.saddr6); - free(rsp->obj.daddr6); - free(rsp->obj.compound_ops); - free(rsp); - } -} - -struct nfsd_rpc_status_get_rsp_list * -nfsd_rpc_status_get_dump(struct ynl_sock *ys) -{ - struct ynl_dump_state yds = {}; - struct nlmsghdr *nlh; - int err; - - yds.ys = ys; - yds.alloc_sz = sizeof(struct nfsd_rpc_status_get_rsp_list); - yds.cb = nfsd_rpc_status_get_rsp_dump_parse; - yds.rsp_cmd = NFSD_CMD_RPC_STATUS_GET; - yds.rsp_policy = &nfsd_rpc_status_nest; - - nlh = ynl_gemsg_start_dump(ys, ys->family_id, NFSD_CMD_RPC_STATUS_GET, 1); - - err = ynl_exec_dump(ys, nlh, &yds); - if (err < 0) - goto free_list; - - return yds.first; - -free_list: - nfsd_rpc_status_get_rsp_list_free(yds.first); - return NULL; -} - -const struct ynl_family ynl_nfsd_family = { - .name = "nfsd", -}; diff --git a/tools/net/ynl/generated/nfsd-user.h b/tools/net/ynl/generated/nfsd-user.h deleted file mode 100644 index 989c6e209ced..000000000000 --- a/tools/net/ynl/generated/nfsd-user.h +++ /dev/null @@ -1,67 +0,0 @@ -/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */ -/* Do not edit directly, auto-generated from: */ -/* Documentation/netlink/specs/nfsd.yaml */ -/* YNL-GEN user header */ - -#ifndef _LINUX_NFSD_GEN_H -#define _LINUX_NFSD_GEN_H - -#include -#include -#include -#include - -struct ynl_sock; - -extern const struct ynl_family ynl_nfsd_family; - -/* Enums */ -const char *nfsd_op_str(int op); - -/* Common nested types */ -/* ============== NFSD_CMD_RPC_STATUS_GET ============== */ -/* NFSD_CMD_RPC_STATUS_GET - dump */ -struct nfsd_rpc_status_get_rsp_dump { - struct { - __u32 xid:1; - __u32 flags:1; - __u32 prog:1; - __u32 version:1; - __u32 proc:1; - __u32 service_time:1; - __u32 saddr4:1; - __u32 daddr4:1; - __u32 saddr6_len; - __u32 daddr6_len; - __u32 sport:1; - __u32 dport:1; - } _present; - - __u32 xid /* big-endian */; - __u32 flags; - __u32 prog; - __u8 version; - __u32 proc; - __s64 service_time; - __u32 saddr4 /* big-endian */; - __u32 daddr4 /* big-endian */; - void *saddr6; - void *daddr6; - __u16 sport /* big-endian */; - __u16 dport /* big-endian */; - unsigned int n_compound_ops; - __u32 *compound_ops; -}; - -struct nfsd_rpc_status_get_rsp_list { - struct nfsd_rpc_status_get_rsp_list *next; - struct nfsd_rpc_status_get_rsp_dump obj __attribute__((aligned(8))); -}; - -void -nfsd_rpc_status_get_rsp_list_free(struct nfsd_rpc_status_get_rsp_list *rsp); - -struct nfsd_rpc_status_get_rsp_list * -nfsd_rpc_status_get_dump(struct ynl_sock *ys); - -#endif /* _LINUX_NFSD_GEN_H */ -- cgit v1.2.3 From 10a149e4b4a9187940adbfff0f216ccb5a15aa41 Mon Sep 17 00:00:00 2001 From: Ilkka Koskinen Date: Thu, 30 Nov 2023 18:15:49 -0800 Subject: perf vendor events arm64 AmpereOne: Rename BPU_FLUSH_MEM_FAULT to GPC_FLUSH_MEM_FAULT The documentation wrongly called the event as BPU_FLUSH_MEM_FAULT and now has been fixed. Correct the name in the perf tool as well. Fixes: a9650b7f6fc09d16 ("perf vendor events arm64: Add AmpereOne core PMU events") Reviewed-by: Ian Rogers Signed-off-by: Ilkka Koskinen Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Ilkka Koskinen Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: John Garry Cc: Leo Yan Cc: linux-arm-kernel@lists.infradead.org Cc: Mark Rutland Cc: Mike Leach Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Will Deacon Link: https://lore.kernel.org/r/20231201021550.1109196-3-ilkka@os.amperecomputing.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/pmu-events/arch/arm64/ampere/ampereone/core-imp-def.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/pmu-events/arch/arm64/ampere/ampereone/core-imp-def.json b/tools/perf/pmu-events/arch/arm64/ampere/ampereone/core-imp-def.json index 88b23b85e33c..879ff21e0b17 100644 --- a/tools/perf/pmu-events/arch/arm64/ampere/ampereone/core-imp-def.json +++ b/tools/perf/pmu-events/arch/arm64/ampere/ampereone/core-imp-def.json @@ -110,7 +110,7 @@ { "PublicDescription": "Flushes due to memory hazards", "EventCode": "0x121", - "EventName": "BPU_FLUSH_MEM_FAULT", + "EventName": "GPC_FLUSH_MEM_FAULT", "BriefDescription": "Flushes due to memory hazards" }, { -- cgit v1.2.3 From 16438b652b464ef7d0a877d31e93ab54338f6b0a Mon Sep 17 00:00:00 2001 From: Ilkka Koskinen Date: Thu, 30 Nov 2023 18:15:50 -0800 Subject: perf vendor events arm64 AmpereOneX: Add core PMU events and metrics Add JSON files for AmpereOneX core PMU events and metrics. Reviewed-by: Ian Rogers Signed-off-by: Ilkka Koskinen Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: John Garry Cc: Leo Yan Cc: Mark Rutland Cc: Mike Leach Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Will Deacon Cc: linux-arm-kernel@lists.infradead.org Link: https://lore.kernel.org/r/20231201021550.1109196-4-ilkka@os.amperecomputing.com Signed-off-by: Arnaldo Carvalho de Melo --- .../arch/arm64/ampere/ampereonex/branch.json | 125 ++++++ .../arch/arm64/ampere/ampereonex/bus.json | 20 + .../arch/arm64/ampere/ampereonex/cache.json | 206 +++++++++ .../arch/arm64/ampere/ampereonex/core-imp-def.json | 464 +++++++++++++++++++++ .../arch/arm64/ampere/ampereonex/exception.json | 47 +++ .../arch/arm64/ampere/ampereonex/instruction.json | 128 ++++++ .../arch/arm64/ampere/ampereonex/intrinsic.json | 14 + .../arch/arm64/ampere/ampereonex/memory.json | 41 ++ .../arch/arm64/ampere/ampereonex/metrics.json | 442 ++++++++++++++++++++ .../arch/arm64/ampere/ampereonex/mmu.json | 170 ++++++++ .../arch/arm64/ampere/ampereonex/pipeline.json | 41 ++ .../arch/arm64/ampere/ampereonex/spe.json | 14 + tools/perf/pmu-events/arch/arm64/mapfile.csv | 1 + 13 files changed, 1713 insertions(+) create mode 100644 tools/perf/pmu-events/arch/arm64/ampere/ampereonex/branch.json create mode 100644 tools/perf/pmu-events/arch/arm64/ampere/ampereonex/bus.json create mode 100644 tools/perf/pmu-events/arch/arm64/ampere/ampereonex/cache.json create mode 100644 tools/perf/pmu-events/arch/arm64/ampere/ampereonex/core-imp-def.json create mode 100644 tools/perf/pmu-events/arch/arm64/ampere/ampereonex/exception.json create mode 100644 tools/perf/pmu-events/arch/arm64/ampere/ampereonex/instruction.json create mode 100644 tools/perf/pmu-events/arch/arm64/ampere/ampereonex/intrinsic.json create mode 100644 tools/perf/pmu-events/arch/arm64/ampere/ampereonex/memory.json create mode 100644 tools/perf/pmu-events/arch/arm64/ampere/ampereonex/metrics.json create mode 100644 tools/perf/pmu-events/arch/arm64/ampere/ampereonex/mmu.json create mode 100644 tools/perf/pmu-events/arch/arm64/ampere/ampereonex/pipeline.json create mode 100644 tools/perf/pmu-events/arch/arm64/ampere/ampereonex/spe.json (limited to 'tools') diff --git a/tools/perf/pmu-events/arch/arm64/ampere/ampereonex/branch.json b/tools/perf/pmu-events/arch/arm64/ampere/ampereonex/branch.json new file mode 100644 index 000000000000..a632755fc086 --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/ampere/ampereonex/branch.json @@ -0,0 +1,125 @@ +[ + { + "ArchStdEvent": "BR_IMMED_SPEC" + }, + { + "ArchStdEvent": "BR_RETURN_SPEC" + }, + { + "ArchStdEvent": "BR_INDIRECT_SPEC" + }, + { + "ArchStdEvent": "BR_MIS_PRED" + }, + { + "ArchStdEvent": "BR_PRED" + }, + { + "PublicDescription": "Instruction architecturally executed, branch not taken", + "EventCode": "0x8107", + "EventName": "BR_SKIP_RETIRED", + "BriefDescription": "Instruction architecturally executed, branch not taken" + }, + { + "PublicDescription": "Instruction architecturally executed, immediate branch taken", + "EventCode": "0x8108", + "EventName": "BR_IMMED_TAKEN_RETIRED", + "BriefDescription": "Instruction architecturally executed, immediate branch taken" + }, + { + "PublicDescription": "Instruction architecturally executed, indirect branch excluding return retired", + "EventCode": "0x810c", + "EventName": "BR_INDNR_TAKEN_RETIRED", + "BriefDescription": "Instruction architecturally executed, indirect branch excluding return retired" + }, + { + "PublicDescription": "Instruction architecturally executed, predicted immediate branch", + "EventCode": "0x8110", + "EventName": "BR_IMMED_PRED_RETIRED", + "BriefDescription": "Instruction architecturally executed, predicted immediate branch" + }, + { + "PublicDescription": "Instruction architecturally executed, mispredicted immediate branch", + "EventCode": "0x8111", + "EventName": "BR_IMMED_MIS_PRED_RETIRED", + "BriefDescription": "Instruction architecturally executed, mispredicted immediate branch" + }, + { + "PublicDescription": "Instruction architecturally executed, predicted indirect branch", + "EventCode": "0x8112", + "EventName": "BR_IND_PRED_RETIRED", + "BriefDescription": "Instruction architecturally executed, predicted indirect branch" + }, + { + "PublicDescription": "Instruction architecturally executed, mispredicted indirect branch", + "EventCode": "0x8113", + "EventName": "BR_IND_MIS_PRED_RETIRED", + "BriefDescription": "Instruction architecturally executed, mispredicted indirect branch" + }, + { + "PublicDescription": "Instruction architecturally executed, predicted procedure return", + "EventCode": "0x8114", + "EventName": "BR_RETURN_PRED_RETIRED", + "BriefDescription": "Instruction architecturally executed, predicted procedure return" + }, + { + "PublicDescription": "Instruction architecturally executed, mispredicted procedure return", + "EventCode": "0x8115", + "EventName": "BR_RETURN_MIS_PRED_RETIRED", + "BriefDescription": "Instruction architecturally executed, mispredicted procedure return" + }, + { + "PublicDescription": "Instruction architecturally executed, predicted indirect branch excluding return", + "EventCode": "0x8116", + "EventName": "BR_INDNR_PRED_RETIRED", + "BriefDescription": "Instruction architecturally executed, predicted indirect branch excluding return" + }, + { + "PublicDescription": "Instruction architecturally executed, mispredicted indirect branch excluding return", + "EventCode": "0x8117", + "EventName": "BR_INDNR_MIS_PRED_RETIRED", + "BriefDescription": "Instruction architecturally executed, mispredicted indirect branch excluding return" + }, + { + "PublicDescription": "Instruction architecturally executed, predicted branch, taken", + "EventCode": "0x8118", + "EventName": "BR_TAKEN_PRED_RETIRED", + "BriefDescription": "Instruction architecturally executed, predicted branch, taken" + }, + { + "PublicDescription": "Instruction architecturally executed, mispredicted branch, taken", + "EventCode": "0x8119", + "EventName": "BR_TAKEN_MIS_PRED_RETIRED", + "BriefDescription": "Instruction architecturally executed, mispredicted branch, taken" + }, + { + "PublicDescription": "Instruction architecturally executed, predicted branch, not taken", + "EventCode": "0x811a", + "EventName": "BR_SKIP_PRED_RETIRED", + "BriefDescription": "Instruction architecturally executed, predicted branch, not taken" + }, + { + "PublicDescription": "Instruction architecturally executed, mispredicted branch, not taken", + "EventCode": "0x811b", + "EventName": "BR_SKIP_MIS_PRED_RETIRED", + "BriefDescription": "Instruction architecturally executed, mispredicted branch, not taken" + }, + { + "PublicDescription": "Instruction architecturally executed, predicted branch", + "EventCode": "0x811c", + "EventName": "BR_PRED_RETIRED", + "BriefDescription": "Instruction architecturally executed, predicted branch" + }, + { + "PublicDescription": "Instruction architecturally executed, indirect branch", + "EventCode": "0x811d", + "EventName": "BR_IND_RETIRED", + "BriefDescription": "Instruction architecturally executed, indirect branch" + }, + { + "PublicDescription": "Branch Record captured.", + "EventCode": "0x811f", + "EventName": "BRB_FILTRATE", + "BriefDescription": "Branch Record captured." + } +] diff --git a/tools/perf/pmu-events/arch/arm64/ampere/ampereonex/bus.json b/tools/perf/pmu-events/arch/arm64/ampere/ampereonex/bus.json new file mode 100644 index 000000000000..2aeb9907831d --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/ampere/ampereonex/bus.json @@ -0,0 +1,20 @@ +[ + { + "ArchStdEvent": "CPU_CYCLES" + }, + { + "ArchStdEvent": "BUS_CYCLES" + }, + { + "ArchStdEvent": "BUS_ACCESS_RD" + }, + { + "ArchStdEvent": "BUS_ACCESS_WR" + }, + { + "ArchStdEvent": "BUS_ACCESS" + }, + { + "ArchStdEvent": "CNT_CYCLES" + } +] diff --git a/tools/perf/pmu-events/arch/arm64/ampere/ampereonex/cache.json b/tools/perf/pmu-events/arch/arm64/ampere/ampereonex/cache.json new file mode 100644 index 000000000000..c50d8e930b05 --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/ampere/ampereonex/cache.json @@ -0,0 +1,206 @@ +[ + { + "ArchStdEvent": "L1D_CACHE_RD" + }, + { + "ArchStdEvent": "L1D_CACHE_WR" + }, + { + "ArchStdEvent": "L1D_CACHE_REFILL_RD" + }, + { + "ArchStdEvent": "L1D_CACHE_INVAL" + }, + { + "ArchStdEvent": "L1D_TLB_REFILL_RD" + }, + { + "ArchStdEvent": "L1D_TLB_REFILL_WR" + }, + { + "ArchStdEvent": "L2D_CACHE_RD" + }, + { + "ArchStdEvent": "L2D_CACHE_WR" + }, + { + "ArchStdEvent": "L2D_CACHE_REFILL_RD" + }, + { + "ArchStdEvent": "L2D_CACHE_REFILL_WR" + }, + { + "ArchStdEvent": "L2D_CACHE_WB_VICTIM" + }, + { + "ArchStdEvent": "L2D_CACHE_WB_CLEAN" + }, + { + "ArchStdEvent": "L2D_CACHE_INVAL" + }, + { + "ArchStdEvent": "L1I_CACHE_REFILL" + }, + { + "ArchStdEvent": "L1I_TLB_REFILL" + }, + { + "ArchStdEvent": "L1D_CACHE_REFILL" + }, + { + "ArchStdEvent": "L1D_CACHE" + }, + { + "ArchStdEvent": "L1D_TLB_REFILL" + }, + { + "ArchStdEvent": "L1I_CACHE" + }, + { + "ArchStdEvent": "L2D_CACHE" + }, + { + "ArchStdEvent": "L2D_CACHE_REFILL" + }, + { + "ArchStdEvent": "L2D_CACHE_WB" + }, + { + "ArchStdEvent": "L1D_TLB" + }, + { + "ArchStdEvent": "L1I_TLB" + }, + { + "ArchStdEvent": "L2D_TLB_REFILL" + }, + { + "ArchStdEvent": "L2I_TLB_REFILL" + }, + { + "ArchStdEvent": "L2D_TLB" + }, + { + "ArchStdEvent": "L2I_TLB" + }, + { + "ArchStdEvent": "DTLB_WALK" + }, + { + "ArchStdEvent": "ITLB_WALK" + }, + { + "ArchStdEvent": "L1D_CACHE_REFILL_WR" + }, + { + "ArchStdEvent": "L1D_CACHE_LMISS_RD" + }, + { + "ArchStdEvent": "L1I_CACHE_LMISS" + }, + { + "ArchStdEvent": "L2D_CACHE_LMISS_RD" + }, + { + "PublicDescription": "Level 1 data or unified cache demand access", + "EventCode": "0x8140", + "EventName": "L1D_CACHE_RW", + "BriefDescription": "Level 1 data or unified cache demand access" + }, + { + "PublicDescription": "Level 1 data or unified cache preload or prefetch", + "EventCode": "0x8142", + "EventName": "L1D_CACHE_PRFM", + "BriefDescription": "Level 1 data or unified cache preload or prefetch" + }, + { + "PublicDescription": "Level 1 data or unified cache refill, preload or prefetch", + "EventCode": "0x8146", + "EventName": "L1D_CACHE_REFILL_PRFM", + "BriefDescription": "Level 1 data or unified cache refill, preload or prefetch" + }, + { + "ArchStdEvent": "L1D_TLB_RD" + }, + { + "ArchStdEvent": "L1D_TLB_WR" + }, + { + "ArchStdEvent": "L2D_TLB_REFILL_RD" + }, + { + "ArchStdEvent": "L2D_TLB_REFILL_WR" + }, + { + "ArchStdEvent": "L2D_TLB_RD" + }, + { + "ArchStdEvent": "L2D_TLB_WR" + }, + { + "PublicDescription": "L1D TLB miss", + "EventCode": "0xD600", + "EventName": "L1D_TLB_MISS", + "BriefDescription": "L1D TLB miss" + }, + { + "PublicDescription": "Level 1 prefetcher, load prefetch requests generated", + "EventCode": "0xd606", + "EventName": "L1_PREFETCH_LD_GEN", + "BriefDescription": "Level 1 prefetcher, load prefetch requests generated" + }, + { + "PublicDescription": "Level 1 prefetcher, load prefetch fills into the level 1 cache", + "EventCode": "0xd607", + "EventName": "L1_PREFETCH_LD_FILL", + "BriefDescription": "Level 1 prefetcher, load prefetch fills into the level 1 cache" + }, + { + "PublicDescription": "Level 1 prefetcher, load prefetch to level 2 generated", + "EventCode": "0xd608", + "EventName": "L1_PREFETCH_L2_REQ", + "BriefDescription": "Level 1 prefetcher, load prefetch to level 2 generated" + }, + { + "PublicDescription": "L1 prefetcher, distance was reset", + "EventCode": "0xd609", + "EventName": "L1_PREFETCH_DIST_RST", + "BriefDescription": "L1 prefetcher, distance was reset" + }, + { + "PublicDescription": "L1 prefetcher, distance was increased", + "EventCode": "0xd60a", + "EventName": "L1_PREFETCH_DIST_INC", + "BriefDescription": "L1 prefetcher, distance was increased" + }, + { + "PublicDescription": "Level 1 prefetcher, table entry is trained", + "EventCode": "0xd60b", + "EventName": "L1_PREFETCH_ENTRY_TRAINED", + "BriefDescription": "Level 1 prefetcher, table entry is trained" + }, + { + "PublicDescription": "L1 data cache refill - Read or Write", + "EventCode": "0xd60e", + "EventName": "L1D_CACHE_REFILL_RW", + "BriefDescription": "L1 data cache refill - Read or Write" + }, + { + "PublicDescription": "Level 2 cache refill from instruction-side miss, including IMMU refills", + "EventCode": "0xD701", + "EventName": "L2C_INST_REFILL", + "BriefDescription": "Level 2 cache refill from instruction-side miss, including IMMU refills" + }, + { + "PublicDescription": "Level 2 cache refill from data-side miss, including DMMU refills", + "EventCode": "0xD702", + "EventName": "L2C_DATA_REFILL", + "BriefDescription": "Level 2 cache refill from data-side miss, including DMMU refills" + }, + { + "PublicDescription": "Level 2 cache prefetcher, load prefetch requests generated", + "EventCode": "0xD703", + "EventName": "L2_PREFETCH_REQ", + "BriefDescription": "Level 2 cache prefetcher, load prefetch requests generated" + } +] diff --git a/tools/perf/pmu-events/arch/arm64/ampere/ampereonex/core-imp-def.json b/tools/perf/pmu-events/arch/arm64/ampere/ampereonex/core-imp-def.json new file mode 100644 index 000000000000..eb5a2208d260 --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/ampere/ampereonex/core-imp-def.json @@ -0,0 +1,464 @@ +[ + { + "PublicDescription": "Level 2 prefetch requests, refilled to L2 cache", + "EventCode": "0x10A", + "EventName": "L2_PREFETCH_REFILL", + "BriefDescription": "Level 2 prefetch requests, refilled to L2 cache" + }, + { + "PublicDescription": "Level 2 prefetch requests, late", + "EventCode": "0x10B", + "EventName": "L2_PREFETCH_UPGRADE", + "BriefDescription": "Level 2 prefetch requests, late" + }, + { + "PublicDescription": "Predictable branch speculatively executed that hit any level of BTB", + "EventCode": "0x110", + "EventName": "BPU_HIT_BTB", + "BriefDescription": "Predictable branch speculatively executed that hit any level of BTB" + }, + { + "PublicDescription": "Predictable conditional branch speculatively executed that hit any level of BTB", + "EventCode": "0x111", + "EventName": "BPU_CONDITIONAL_BRANCH_HIT_BTB", + "BriefDescription": "Predictable conditional branch speculatively executed that hit any level of BTB" + }, + { + "PublicDescription": "Predictable taken branch speculatively executed that hit any level of BTB that access the indirect predictor", + "EventCode": "0x112", + "EventName": "BPU_HIT_INDIRECT_PREDICTOR", + "BriefDescription": "Predictable taken branch speculatively executed that hit any level of BTB that access the indirect predictor" + }, + { + "PublicDescription": "Predictable taken branch speculatively executed that hit any level of BTB that access the return predictor", + "EventCode": "0x113", + "EventName": "BPU_HIT_RSB", + "BriefDescription": "Predictable taken branch speculatively executed that hit any level of BTB that access the return predictor" + }, + { + "PublicDescription": "Predictable unconditional branch speculatively executed that did not hit any level of BTB", + "EventCode": "0x114", + "EventName": "BPU_UNCONDITIONAL_BRANCH_MISS_BTB", + "BriefDescription": "Predictable unconditional branch speculatively executed that did not hit any level of BTB" + }, + { + "PublicDescription": "Predictable branch speculatively executed, unpredicted", + "EventCode": "0x115", + "EventName": "BPU_BRANCH_NO_HIT", + "BriefDescription": "Predictable branch speculatively executed, unpredicted" + }, + { + "PublicDescription": "Predictable branch speculatively executed that hit any level of BTB that mispredict", + "EventCode": "0x116", + "EventName": "BPU_HIT_BTB_AND_MISPREDICT", + "BriefDescription": "Predictable branch speculatively executed that hit any level of BTB that mispredict" + }, + { + "PublicDescription": "Predictable conditional branch speculatively executed that hit any level of BTB that (direction) mispredict", + "EventCode": "0x117", + "EventName": "BPU_CONDITIONAL_BRANCH_HIT_BTB_AND_MISPREDICT", + "BriefDescription": "Predictable conditional branch speculatively executed that hit any level of BTB that (direction) mispredict" + }, + { + "PublicDescription": "Predictable taken branch speculatively executed that hit any level of BTB that access the indirect predictor that mispredict", + "EventCode": "0x118", + "EventName": "BPU_INDIRECT_BRANCH_HIT_BTB_AND_MISPREDICT", + "BriefDescription": "Predictable taken branch speculatively executed that hit any level of BTB that access the indirect predictor that mispredict" + }, + { + "PublicDescription": "Predictable taken branch speculatively executed that hit any level of BTB that access the return predictor that mispredict", + "EventCode": "0x119", + "EventName": "BPU_HIT_RSB_AND_MISPREDICT", + "BriefDescription": "Predictable taken branch speculatively executed that hit any level of BTB that access the return predictor that mispredict" + }, + { + "PublicDescription": "Predictable taken branch speculatively executed that hit any level of BTB that access the overflow/underflow return predictor that mispredict", + "EventCode": "0x11a", + "EventName": "BPU_MISS_RSB_AND_MISPREDICT", + "BriefDescription": "Predictable taken branch speculatively executed that hit any level of BTB that access the overflow/underflow return predictor that mispredict" + }, + { + "PublicDescription": "Predictable branch speculatively executed, unpredicted, that mispredict", + "EventCode": "0x11b", + "EventName": "BPU_NO_PREDICTION_MISPREDICT", + "BriefDescription": "Predictable branch speculatively executed, unpredicted, that mispredict" + }, + { + "PublicDescription": "Preditable branch update the BTB region buffer entry", + "EventCode": "0x11c", + "EventName": "BPU_BTB_UPDATE", + "BriefDescription": "Preditable branch update the BTB region buffer entry" + }, + { + "PublicDescription": "Count predict pipe stalls due to speculative return address predictor full", + "EventCode": "0x11d", + "EventName": "BPU_RSB_FULL_STALL", + "BriefDescription": "Count predict pipe stalls due to speculative return address predictor full" + }, + { + "PublicDescription": "Macro-ops speculatively decoded", + "EventCode": "0x11f", + "EventName": "ICF_INST_SPEC_DECODE", + "BriefDescription": "Macro-ops speculatively decoded" + }, + { + "PublicDescription": "Flushes", + "EventCode": "0x120", + "EventName": "GPC_FLUSH", + "BriefDescription": "Flushes" + }, + { + "PublicDescription": "Flushes due to memory hazards", + "EventCode": "0x121", + "EventName": "GPC_FLUSH_MEM_FAULT", + "BriefDescription": "Flushes due to memory hazards" + }, + { + "PublicDescription": "ETM extout bit 0", + "EventCode": "0x141", + "EventName": "MSC_ETM_EXTOUT0", + "BriefDescription": "ETM extout bit 0" + }, + { + "PublicDescription": "ETM extout bit 1", + "EventCode": "0x142", + "EventName": "MSC_ETM_EXTOUT1", + "BriefDescription": "ETM extout bit 1" + }, + { + "PublicDescription": "ETM extout bit 2", + "EventCode": "0x143", + "EventName": "MSC_ETM_EXTOUT2", + "BriefDescription": "ETM extout bit 2" + }, + { + "PublicDescription": "ETM extout bit 3", + "EventCode": "0x144", + "EventName": "MSC_ETM_EXTOUT3", + "BriefDescription": "ETM extout bit 3" + }, + { + "PublicDescription": "Bus request sn", + "EventCode": "0x156", + "EventName": "L2C_SNOOP", + "BriefDescription": "Bus request sn" + }, + { + "PublicDescription": "L2 TXDAT LCRD blocked", + "EventCode": "0x169", + "EventName": "L2C_DAT_CRD_STALL", + "BriefDescription": "L2 TXDAT LCRD blocked" + }, + { + "PublicDescription": "L2 TXRSP LCRD blocked", + "EventCode": "0x16a", + "EventName": "L2C_RSP_CRD_STALL", + "BriefDescription": "L2 TXRSP LCRD blocked" + }, + { + "PublicDescription": "L2 TXREQ LCRD blocked", + "EventCode": "0x16b", + "EventName": "L2C_REQ_CRD_STALL", + "BriefDescription": "L2 TXREQ LCRD blocked" + }, + { + "PublicDescription": "Early mispredict", + "EventCode": "0xD100", + "EventName": "ICF_EARLY_MIS_PRED", + "BriefDescription": "Early mispredict" + }, + { + "PublicDescription": "FEQ full cycles", + "EventCode": "0xD101", + "EventName": "ICF_FEQ_FULL", + "BriefDescription": "FEQ full cycles" + }, + { + "PublicDescription": "Instruction FIFO Full", + "EventCode": "0xD102", + "EventName": "ICF_INST_FIFO_FULL", + "BriefDescription": "Instruction FIFO Full" + }, + { + "PublicDescription": "L1I TLB miss", + "EventCode": "0xD103", + "EventName": "L1I_TLB_MISS", + "BriefDescription": "L1I TLB miss" + }, + { + "PublicDescription": "ICF sent 0 instructions to IDR this cycle", + "EventCode": "0xD104", + "EventName": "ICF_STALL", + "BriefDescription": "ICF sent 0 instructions to IDR this cycle" + }, + { + "PublicDescription": "PC FIFO Full", + "EventCode": "0xD105", + "EventName": "ICF_PC_FIFO_FULL", + "BriefDescription": "PC FIFO Full" + }, + { + "PublicDescription": "Stall due to BOB ID", + "EventCode": "0xD200", + "EventName": "IDR_STALL_BOB_ID", + "BriefDescription": "Stall due to BOB ID" + }, + { + "PublicDescription": "Dispatch stall due to LOB entries", + "EventCode": "0xD201", + "EventName": "IDR_STALL_LOB_ID", + "BriefDescription": "Dispatch stall due to LOB entries" + }, + { + "PublicDescription": "Dispatch stall due to SOB entries", + "EventCode": "0xD202", + "EventName": "IDR_STALL_SOB_ID", + "BriefDescription": "Dispatch stall due to SOB entries" + }, + { + "PublicDescription": "Dispatch stall due to IXU scheduler entries", + "EventCode": "0xD203", + "EventName": "IDR_STALL_IXU_SCHED", + "BriefDescription": "Dispatch stall due to IXU scheduler entries" + }, + { + "PublicDescription": "Dispatch stall due to FSU scheduler entries", + "EventCode": "0xD204", + "EventName": "IDR_STALL_FSU_SCHED", + "BriefDescription": "Dispatch stall due to FSU scheduler entries" + }, + { + "PublicDescription": "Dispatch stall due to ROB entries", + "EventCode": "0xD205", + "EventName": "IDR_STALL_ROB_ID", + "BriefDescription": "Dispatch stall due to ROB entries" + }, + { + "PublicDescription": "Dispatch stall due to flush", + "EventCode": "0xD206", + "EventName": "IDR_STALL_FLUSH", + "BriefDescription": "Dispatch stall due to flush" + }, + { + "PublicDescription": "Dispatch stall due to WFI", + "EventCode": "0xD207", + "EventName": "IDR_STALL_WFI", + "BriefDescription": "Dispatch stall due to WFI" + }, + { + "PublicDescription": "Number of SWOB drains triggered by timeout", + "EventCode": "0xD208", + "EventName": "IDR_STALL_SWOB_TIMEOUT", + "BriefDescription": "Number of SWOB drains triggered by timeout" + }, + { + "PublicDescription": "Number of SWOB drains triggered by system register or special-purpose register read-after-write or specific special-purpose register writes that cause SWOB drain", + "EventCode": "0xD209", + "EventName": "IDR_STALL_SWOB_RAW", + "BriefDescription": "Number of SWOB drains triggered by system register or special-purpose register read-after-write or specific special-purpose register writes that cause SWOB drain" + }, + { + "PublicDescription": "Number of SWOB drains triggered by system register write when SWOB full", + "EventCode": "0xD20A", + "EventName": "IDR_STALL_SWOB_FULL", + "BriefDescription": "Number of SWOB drains triggered by system register write when SWOB full" + }, + { + "PublicDescription": "Dispatch stall due to L1 instruction cache miss", + "EventCode": "0xD20B", + "EventName": "STALL_FRONTEND_CACHE", + "BriefDescription": "Dispatch stall due to L1 instruction cache miss" + }, + { + "PublicDescription": "Dispatch stall due to L1 data cache miss", + "EventCode": "0xD20D", + "EventName": "STALL_BACKEND_CACHE", + "BriefDescription": "Dispatch stall due to L1 data cache miss" + }, + { + "PublicDescription": "Dispatch stall due to lack of any core resource", + "EventCode": "0xD20F", + "EventName": "STALL_BACKEND_RESOURCE", + "BriefDescription": "Dispatch stall due to lack of any core resource" + }, + { + "PublicDescription": "Instructions issued by the scheduler", + "EventCode": "0xD300", + "EventName": "IXU_NUM_UOPS_ISSUED", + "BriefDescription": "Instructions issued by the scheduler" + }, + { + "PublicDescription": "Any uop issued was canceled for any reason", + "EventCode": "0xD301", + "EventName": "IXU_ISSUE_CANCEL", + "BriefDescription": "Any uop issued was canceled for any reason" + }, + { + "PublicDescription": "A load wakeup to the scheduler has been canceled", + "EventCode": "0xD302", + "EventName": "IXU_LOAD_CANCEL", + "BriefDescription": "A load wakeup to the scheduler has been canceled" + }, + { + "PublicDescription": "The scheduler had to cancel one slow Uop due to resource conflict", + "EventCode": "0xD303", + "EventName": "IXU_SLOW_CANCEL", + "BriefDescription": "The scheduler had to cancel one slow Uop due to resource conflict" + }, + { + "PublicDescription": "Uops issued by the scheduler on IXA", + "EventCode": "0xD304", + "EventName": "IXU_IXA_ISSUED", + "BriefDescription": "Uops issued by the scheduler on IXA" + }, + { + "PublicDescription": "Uops issued by the scheduler on IXA Par 0", + "EventCode": "0xD305", + "EventName": "IXU_IXA_PAR0_ISSUED", + "BriefDescription": "Uops issued by the scheduler on IXA Par 0" + }, + { + "PublicDescription": "Uops issued by the scheduler on IXA Par 1", + "EventCode": "0xD306", + "EventName": "IXU_IXA_PAR1_ISSUED", + "BriefDescription": "Uops issued by the scheduler on IXA Par 1" + }, + { + "PublicDescription": "Uops issued by the scheduler on IXB", + "EventCode": "0xD307", + "EventName": "IXU_IXB_ISSUED", + "BriefDescription": "Uops issued by the scheduler on IXB" + }, + { + "PublicDescription": "Uops issued by the scheduler on IXB Par 0", + "EventCode": "0xD308", + "EventName": "IXU_IXB_PAR0_ISSUED", + "BriefDescription": "Uops issued by the scheduler on IXB Par 0" + }, + { + "PublicDescription": "Uops issued by the scheduler on IXB Par 1", + "EventCode": "0xD309", + "EventName": "IXU_IXB_PAR1_ISSUED", + "BriefDescription": "Uops issued by the scheduler on IXB Par 1" + }, + { + "PublicDescription": "Uops issued by the scheduler on IXC", + "EventCode": "0xD30A", + "EventName": "IXU_IXC_ISSUED", + "BriefDescription": "Uops issued by the scheduler on IXC" + }, + { + "PublicDescription": "Uops issued by the scheduler on IXC Par 0", + "EventCode": "0xD30B", + "EventName": "IXU_IXC_PAR0_ISSUED", + "BriefDescription": "Uops issued by the scheduler on IXC Par 0" + }, + { + "PublicDescription": "Uops issued by the scheduler on IXC Par 1", + "EventCode": "0xD30C", + "EventName": "IXU_IXC_PAR1_ISSUED", + "BriefDescription": "Uops issued by the scheduler on IXC Par 1" + }, + { + "PublicDescription": "Uops issued by the scheduler on IXD", + "EventCode": "0xD30D", + "EventName": "IXU_IXD_ISSUED", + "BriefDescription": "Uops issued by the scheduler on IXD" + }, + { + "PublicDescription": "Uops issued by the scheduler on IXD Par 0", + "EventCode": "0xD30E", + "EventName": "IXU_IXD_PAR0_ISSUED", + "BriefDescription": "Uops issued by the scheduler on IXD Par 0" + }, + { + "PublicDescription": "Uops issued by the scheduler on IXD Par 1", + "EventCode": "0xD30F", + "EventName": "IXU_IXD_PAR1_ISSUED", + "BriefDescription": "Uops issued by the scheduler on IXD Par 1" + }, + { + "PublicDescription": "Uops issued by the FSU scheduler", + "EventCode": "0xD400", + "EventName": "FSU_ISSUED", + "BriefDescription": "Uops issued by the FSU scheduler" + }, + { + "PublicDescription": "Uops issued by the scheduler on FSX", + "EventCode": "0xD401", + "EventName": "FSU_FSX_ISSUED", + "BriefDescription": "Uops issued by the scheduler on FSX" + }, + { + "PublicDescription": "Uops issued by the scheduler on FSY", + "EventCode": "0xD402", + "EventName": "FSU_FSY_ISSUED", + "BriefDescription": "Uops issued by the scheduler on FSY" + }, + { + "PublicDescription": "Uops issued by the scheduler on FSZ", + "EventCode": "0xD403", + "EventName": "FSU_FSZ_ISSUED", + "BriefDescription": "Uops issued by the scheduler on FSZ" + }, + { + "PublicDescription": "Uops canceled (load cancels)", + "EventCode": "0xD404", + "EventName": "FSU_CANCEL", + "BriefDescription": "Uops canceled (load cancels)" + }, + { + "PublicDescription": "Count scheduler stalls due to divide/sqrt", + "EventCode": "0xD405", + "EventName": "FSU_DIV_SQRT_STALL", + "BriefDescription": "Count scheduler stalls due to divide/sqrt" + }, + { + "PublicDescription": "Number of SWOB drains", + "EventCode": "0xD500", + "EventName": "GPC_SWOB_DRAIN", + "BriefDescription": "Number of SWOB drains" + }, + { + "PublicDescription": "GPC detected a Breakpoint instruction match", + "EventCode": "0xD501", + "EventName": "BREAKPOINT_MATCH", + "BriefDescription": "GPC detected a Breakpoint instruction match" + }, + { + "PublicDescription": "Core progress monitor triggered", + "EventCode": "0xd502", + "EventName": "GPC_CPM_TRIGGER", + "BriefDescription": "Core progress monitor triggered" + }, + { + "PublicDescription": "Fill buffer full", + "EventCode": "0xD601", + "EventName": "OFB_FULL", + "BriefDescription": "Fill buffer full" + }, + { + "PublicDescription": "Load satisified from store forwarded data", + "EventCode": "0xD605", + "EventName": "LD_FROM_ST_FWD", + "BriefDescription": "Load satisified from store forwarded data" + }, + { + "PublicDescription": "Store retirement pipe stall", + "EventCode": "0xD60C", + "EventName": "LSU_ST_RETIRE_STALL", + "BriefDescription": "Store retirement pipe stall" + }, + { + "PublicDescription": "LSU detected a Watchpoint data match", + "EventCode": "0xD60D", + "EventName": "WATCHPOINT_MATCH", + "BriefDescription": "LSU detected a Watchpoint data match" + }, + { + "PublicDescription": "Counts cycles that MSC is telling GPC to stall commit due to ETM ISTALL feature", + "EventCode": "0xda00", + "EventName": "MSC_ETM_COMMIT_STALL", + "BriefDescription": "Counts cycles that MSC is telling GPC to stall commit due to ETM ISTALL feature" + } +] diff --git a/tools/perf/pmu-events/arch/arm64/ampere/ampereonex/exception.json b/tools/perf/pmu-events/arch/arm64/ampere/ampereonex/exception.json new file mode 100644 index 000000000000..bd59ba7b74e4 --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/ampere/ampereonex/exception.json @@ -0,0 +1,47 @@ +[ + { + "ArchStdEvent": "EXC_UNDEF" + }, + { + "ArchStdEvent": "EXC_SVC" + }, + { + "ArchStdEvent": "EXC_PABORT" + }, + { + "ArchStdEvent": "EXC_DABORT" + }, + { + "ArchStdEvent": "EXC_IRQ" + }, + { + "ArchStdEvent": "EXC_FIQ" + }, + { + "ArchStdEvent": "EXC_HVC" + }, + { + "ArchStdEvent": "EXC_TRAP_PABORT" + }, + { + "ArchStdEvent": "EXC_TRAP_DABORT" + }, + { + "ArchStdEvent": "EXC_TRAP_OTHER" + }, + { + "ArchStdEvent": "EXC_TRAP_IRQ" + }, + { + "ArchStdEvent": "EXC_TRAP_FIQ" + }, + { + "ArchStdEvent": "EXC_TAKEN" + }, + { + "ArchStdEvent": "EXC_RETURN" + }, + { + "ArchStdEvent": "EXC_SMC" + } +] diff --git a/tools/perf/pmu-events/arch/arm64/ampere/ampereonex/instruction.json b/tools/perf/pmu-events/arch/arm64/ampere/ampereonex/instruction.json new file mode 100644 index 000000000000..a6a20f541e33 --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/ampere/ampereonex/instruction.json @@ -0,0 +1,128 @@ +[ + { + "ArchStdEvent": "SW_INCR" + }, + { + "ArchStdEvent": "ST_RETIRED" + }, + { + "ArchStdEvent": "LD_SPEC" + }, + { + "ArchStdEvent": "ST_SPEC" + }, + { + "ArchStdEvent": "LDST_SPEC" + }, + { + "ArchStdEvent": "DP_SPEC" + }, + { + "ArchStdEvent": "ASE_SPEC" + }, + { + "ArchStdEvent": "VFP_SPEC" + }, + { + "ArchStdEvent": "PC_WRITE_SPEC" + }, + { + "ArchStdEvent": "BR_IMMED_RETIRED" + }, + { + "ArchStdEvent": "BR_RETURN_RETIRED" + }, + { + "ArchStdEvent": "CRYPTO_SPEC" + }, + { + "ArchStdEvent": "ISB_SPEC" + }, + { + "ArchStdEvent": "DSB_SPEC" + }, + { + "ArchStdEvent": "DMB_SPEC" + }, + { + "ArchStdEvent": "RC_LD_SPEC" + }, + { + "ArchStdEvent": "RC_ST_SPEC" + }, + { + "ArchStdEvent": "INST_RETIRED" + }, + { + "ArchStdEvent": "CID_WRITE_RETIRED" + }, + { + "ArchStdEvent": "PC_WRITE_RETIRED" + }, + { + "ArchStdEvent": "INST_SPEC" + }, + { + "ArchStdEvent": "TTBR_WRITE_RETIRED" + }, + { + "ArchStdEvent": "BR_RETIRED" + }, + { + "ArchStdEvent": "BR_MIS_PRED_RETIRED" + }, + { + "ArchStdEvent": "OP_RETIRED" + }, + { + "ArchStdEvent": "OP_SPEC" + }, + { + "PublicDescription": "Operation speculatively executed - ASE Scalar", + "EventCode": "0xd210", + "EventName": "ASE_SCALAR_SPEC", + "BriefDescription": "Operation speculatively executed - ASE Scalar" + }, + { + "PublicDescription": "Operation speculatively executed - ASE Vector", + "EventCode": "0xd211", + "EventName": "ASE_VECTOR_SPEC", + "BriefDescription": "Operation speculatively executed - ASE Vector" + }, + { + "PublicDescription": "Barrier speculatively executed, CSDB", + "EventCode": "0x7f", + "EventName": "CSDB_SPEC", + "BriefDescription": "Barrier speculatively executed, CSDB" + }, + { + "PublicDescription": "Prefetch sent to L2.", + "EventCode": "0xd106", + "EventName": "ICF_PREFETCH_DISPATCH", + "BriefDescription": "Prefetch sent to L2." + }, + { + "PublicDescription": "Prefetch response received but was dropped since we don't support inflight upgrades.", + "EventCode": "0xd107", + "EventName": "ICF_PREFETCH_DROPPED_NO_UPGRADE", + "BriefDescription": "Prefetch response received but was dropped since we don't support inflight upgrades." + }, + { + "PublicDescription": "Prefetch request missed TLB.", + "EventCode": "0xd108", + "EventName": "ICF_PREFETCH_DROPPED_TLB_MISS", + "BriefDescription": "Prefetch request missed TLB." + }, + { + "PublicDescription": "Prefetch request dropped since duplicate was found in TLB.", + "EventCode": "0xd109", + "EventName": "ICF_PREFETCH_DROPPED_DUPLICATE", + "BriefDescription": "Prefetch request dropped since duplicate was found in TLB." + }, + { + "PublicDescription": "Prefetch request dropped since it was found in cache.", + "EventCode": "0xd10a", + "EventName": "ICF_PREFETCH_DROPPED_CACHE_HIT", + "BriefDescription": "Prefetch request dropped since it was found in cache." + } +] diff --git a/tools/perf/pmu-events/arch/arm64/ampere/ampereonex/intrinsic.json b/tools/perf/pmu-events/arch/arm64/ampere/ampereonex/intrinsic.json new file mode 100644 index 000000000000..7ecffb989ae0 --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/ampere/ampereonex/intrinsic.json @@ -0,0 +1,14 @@ +[ + { + "ArchStdEvent": "LDREX_SPEC" + }, + { + "ArchStdEvent": "STREX_PASS_SPEC" + }, + { + "ArchStdEvent": "STREX_FAIL_SPEC" + }, + { + "ArchStdEvent": "STREX_SPEC" + } +] diff --git a/tools/perf/pmu-events/arch/arm64/ampere/ampereonex/memory.json b/tools/perf/pmu-events/arch/arm64/ampere/ampereonex/memory.json new file mode 100644 index 000000000000..a211d94aacde --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/ampere/ampereonex/memory.json @@ -0,0 +1,41 @@ +[ + { + "ArchStdEvent": "LD_RETIRED" + }, + { + "ArchStdEvent": "MEM_ACCESS_RD" + }, + { + "ArchStdEvent": "MEM_ACCESS_WR" + }, + { + "ArchStdEvent": "LD_ALIGN_LAT" + }, + { + "ArchStdEvent": "ST_ALIGN_LAT" + }, + { + "ArchStdEvent": "MEM_ACCESS" + }, + { + "ArchStdEvent": "MEMORY_ERROR" + }, + { + "ArchStdEvent": "LDST_ALIGN_LAT" + }, + { + "ArchStdEvent": "MEM_ACCESS_CHECKED" + }, + { + "ArchStdEvent": "MEM_ACCESS_CHECKED_RD" + }, + { + "ArchStdEvent": "MEM_ACCESS_CHECKED_WR" + }, + { + "PublicDescription": "Flushes due to memory hazards", + "EventCode": "0x121", + "EventName": "BPU_FLUSH_MEM_FAULT", + "BriefDescription": "Flushes due to memory hazards" + } +] diff --git a/tools/perf/pmu-events/arch/arm64/ampere/ampereonex/metrics.json b/tools/perf/pmu-events/arch/arm64/ampere/ampereonex/metrics.json new file mode 100644 index 000000000000..c5d1d22bd034 --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/ampere/ampereonex/metrics.json @@ -0,0 +1,442 @@ +[ + { + "MetricName": "branch_miss_pred_rate", + "MetricExpr": "BR_MIS_PRED / BR_PRED", + "BriefDescription": "Branch predictor misprediction rate. May not count branches that are never resolved because they are in the misprediction shadow of an earlier branch", + "MetricGroup": "branch", + "ScaleUnit": "100%" + }, + { + "MetricName": "bus_utilization", + "MetricExpr": "BUS_ACCESS / (BUS_CYCLES * 1)", + "BriefDescription": "Core-to-uncore bus utilization", + "MetricGroup": "Bus", + "ScaleUnit": "100percent of bus cycles" + }, + { + "MetricName": "l1d_cache_miss_ratio", + "MetricExpr": "L1D_CACHE_REFILL / L1D_CACHE", + "BriefDescription": "This metric measures the ratio of level 1 data cache accesses missed to the total number of level 1 data cache accesses. This gives an indication of the effectiveness of the level 1 data cache.", + "MetricGroup": "Miss_Ratio;L1D_Cache_Effectiveness", + "ScaleUnit": "1per cache access" + }, + { + "MetricName": "l1i_cache_miss_ratio", + "MetricExpr": "L1I_CACHE_REFILL / L1I_CACHE", + "BriefDescription": "This metric measures the ratio of level 1 instruction cache accesses missed to the total number of level 1 instruction cache accesses. This gives an indication of the effectiveness of the level 1 instruction cache.", + "MetricGroup": "Miss_Ratio;L1I_Cache_Effectiveness", + "ScaleUnit": "1per cache access" + }, + { + "MetricName": "Miss_Ratio;l1d_cache_read_miss", + "MetricExpr": "L1D_CACHE_LMISS_RD / L1D_CACHE_RD", + "BriefDescription": "L1D cache read miss rate", + "MetricGroup": "Cache", + "ScaleUnit": "1per cache read access" + }, + { + "MetricName": "l2_cache_miss_ratio", + "MetricExpr": "L2D_CACHE_REFILL / L2D_CACHE", + "BriefDescription": "This metric measures the ratio of level 2 cache accesses missed to the total number of level 2 cache accesses. This gives an indication of the effectiveness of the level 2 cache, which is a unified cache that stores both data and instruction. Note that cache accesses in this cache are either data memory access or instruction fetch as this is a unified cache.", + "MetricGroup": "Miss_Ratio;L2_Cache_Effectiveness", + "ScaleUnit": "1per cache access" + }, + { + "MetricName": "l1i_cache_read_miss_rate", + "MetricExpr": "L1I_CACHE_LMISS / L1I_CACHE", + "BriefDescription": "L1I cache read miss rate", + "MetricGroup": "Cache", + "ScaleUnit": "1per cache access" + }, + { + "MetricName": "l2d_cache_read_miss_rate", + "MetricExpr": "L2D_CACHE_LMISS_RD / L2D_CACHE_RD", + "BriefDescription": "L2 cache read miss rate", + "MetricGroup": "Cache", + "ScaleUnit": "1per cache read access" + }, + { + "MetricName": "l1d_cache_miss_mpki", + "MetricExpr": "(L1D_CACHE_LMISS_RD * 1e3) / INST_RETIRED", + "BriefDescription": "Misses per thousand instructions (data)", + "MetricGroup": "Cache", + "ScaleUnit": "1MPKI" + }, + { + "MetricName": "l1i_cache_miss_mpki", + "MetricExpr": "(L1I_CACHE_LMISS * 1e3) / INST_RETIRED", + "BriefDescription": "Misses per thousand instructions (instruction)", + "MetricGroup": "Cache", + "ScaleUnit": "1MPKI" + }, + { + "MetricName": "simd_percentage", + "MetricExpr": "ASE_SPEC / INST_SPEC", + "BriefDescription": "This metric measures advanced SIMD operations as a percentage of total operations speculatively executed.", + "MetricGroup": "Operation_Mix", + "ScaleUnit": "100percent of operations" + }, + { + "MetricName": "crypto_percentage", + "MetricExpr": "CRYPTO_SPEC / INST_SPEC", + "BriefDescription": "This metric measures crypto operations as a percentage of operations speculatively executed.", + "MetricGroup": "Operation_Mix", + "ScaleUnit": "100percent of operations" + }, + { + "MetricName": "gflops", + "MetricExpr": "VFP_SPEC / (duration_time * 1e9)", + "BriefDescription": "Giga-floating point operations per second", + "MetricGroup": "InstructionMix" + }, + { + "MetricName": "integer_dp_percentage", + "MetricExpr": "DP_SPEC / INST_SPEC", + "BriefDescription": "This metric measures scalar integer operations as a percentage of operations speculatively executed.", + "MetricGroup": "Operation_Mix", + "ScaleUnit": "100percent of operations" + }, + { + "MetricName": "ipc", + "MetricExpr": "INST_RETIRED / CPU_CYCLES", + "BriefDescription": "This metric measures the number of instructions retired per cycle.", + "MetricGroup": "General", + "ScaleUnit": "1per cycle" + }, + { + "MetricName": "load_percentage", + "MetricExpr": "LD_SPEC / INST_SPEC", + "BriefDescription": "This metric measures load operations as a percentage of operations speculatively executed.", + "MetricGroup": "Operation_Mix", + "ScaleUnit": "100percent of operations" + }, + { + "MetricName": "load_store_spec_rate", + "MetricExpr": "LDST_SPEC / INST_SPEC", + "BriefDescription": "The rate of load or store instructions speculatively executed to overall instructions speclatively executed", + "MetricGroup": "Operation_Mix", + "ScaleUnit": "100percent of operations" + }, + { + "MetricName": "retired_mips", + "MetricExpr": "INST_RETIRED / (duration_time * 1e6)", + "BriefDescription": "Millions of instructions per second", + "MetricGroup": "InstructionMix" + }, + { + "MetricName": "spec_utilization_mips", + "MetricExpr": "INST_SPEC / (duration_time * 1e6)", + "BriefDescription": "Millions of instructions per second", + "MetricGroup": "PEutilization" + }, + { + "MetricName": "pc_write_spec_rate", + "MetricExpr": "PC_WRITE_SPEC / INST_SPEC", + "BriefDescription": "The rate of software change of the PC speculatively executed to overall instructions speclatively executed", + "MetricGroup": "Operation_Mix", + "ScaleUnit": "100percent of operations" + }, + { + "MetricName": "store_percentage", + "MetricExpr": "ST_SPEC / INST_SPEC", + "BriefDescription": "This metric measures store operations as a percentage of operations speculatively executed.", + "MetricGroup": "Operation_Mix", + "ScaleUnit": "100percent of operations" + }, + { + "MetricName": "scalar_fp_percentage", + "MetricExpr": "VFP_SPEC / INST_SPEC", + "BriefDescription": "This metric measures scalar floating point operations as a percentage of operations speculatively executed.", + "MetricGroup": "Operation_Mix", + "ScaleUnit": "100percent of operations" + }, + { + "MetricName": "retired_rate", + "MetricExpr": "OP_RETIRED / OP_SPEC", + "BriefDescription": "Of all the micro-operations issued, what percentage are retired(committed)", + "MetricGroup": "General", + "ScaleUnit": "100%" + }, + { + "MetricName": "wasted", + "MetricExpr": "1 - (OP_RETIRED / (CPU_CYCLES * #slots))", + "BriefDescription": "Of all the micro-operations issued, what proportion are lost", + "MetricGroup": "General", + "ScaleUnit": "100%" + }, + { + "MetricName": "wasted_rate", + "MetricExpr": "1 - OP_RETIRED / OP_SPEC", + "BriefDescription": "Of all the micro-operations issued, what percentage are not retired(committed)", + "MetricGroup": "General", + "ScaleUnit": "100%" + }, + { + "MetricName": "stall_backend_cache_rate", + "MetricExpr": "STALL_BACKEND_CACHE / CPU_CYCLES", + "BriefDescription": "Proportion of cycles stalled and no operations issued to backend and cache miss", + "MetricGroup": "Stall", + "ScaleUnit": "100percent of cycles" + }, + { + "MetricName": "stall_backend_resource_rate", + "MetricExpr": "STALL_BACKEND_RESOURCE / CPU_CYCLES", + "BriefDescription": "Proportion of cycles stalled and no operations issued to backend and resource full", + "MetricGroup": "Stall", + "ScaleUnit": "100percent of cycles" + }, + { + "MetricName": "stall_backend_tlb_rate", + "MetricExpr": "STALL_BACKEND_TLB / CPU_CYCLES", + "BriefDescription": "Proportion of cycles stalled and no operations issued to backend and TLB miss", + "MetricGroup": "Stall", + "ScaleUnit": "100percent of cycles" + }, + { + "MetricName": "stall_frontend_cache_rate", + "MetricExpr": "STALL_FRONTEND_CACHE / CPU_CYCLES", + "BriefDescription": "Proportion of cycles stalled and no ops delivered from frontend and cache miss", + "MetricGroup": "Stall", + "ScaleUnit": "100percent of cycles" + }, + { + "MetricName": "stall_frontend_tlb_rate", + "MetricExpr": "STALL_FRONTEND_TLB / CPU_CYCLES", + "BriefDescription": "Proportion of cycles stalled and no ops delivered from frontend and TLB miss", + "MetricGroup": "Stall", + "ScaleUnit": "100percent of cycles" + }, + { + "MetricName": "dtlb_walk_ratio", + "MetricExpr": "DTLB_WALK / L1D_TLB", + "BriefDescription": "This metric measures the ratio of data TLB Walks to the total number of data TLB accesses. This gives an indication of the effectiveness of the data TLB accesses.", + "MetricGroup": "Miss_Ratio;DTLB_Effectiveness", + "ScaleUnit": "1per TLB access" + }, + { + "MetricName": "itlb_walk_ratio", + "MetricExpr": "ITLB_WALK / L1I_TLB", + "BriefDescription": "This metric measures the ratio of instruction TLB Walks to the total number of instruction TLB accesses. This gives an indication of the effectiveness of the instruction TLB accesses.", + "MetricGroup": "Miss_Ratio;ITLB_Effectiveness", + "ScaleUnit": "1per TLB access" + }, + { + "ArchStdEvent": "backend_bound" + }, + { + "ArchStdEvent": "frontend_bound", + "MetricExpr": "100 - (retired_fraction + slots_lost_misspeculation_fraction + backend_bound)" + }, + { + "MetricName": "slots_lost_misspeculation_fraction", + "MetricExpr": "(OP_SPEC - OP_RETIRED) / (CPU_CYCLES * #slots)", + "BriefDescription": "Fraction of slots lost due to misspeculation", + "DefaultMetricgroupName": "TopdownL1", + "MetricGroup": "Default;TopdownL1", + "ScaleUnit": "100percent of slots" + }, + { + "MetricName": "retired_fraction", + "MetricExpr": "OP_RETIRED / (CPU_CYCLES * #slots)", + "BriefDescription": "Fraction of slots retiring, useful work", + "DefaultMetricgroupName": "TopdownL1", + "MetricGroup": "Default;TopdownL1", + "ScaleUnit": "100percent of slots" + }, + { + "MetricName": "backend_core", + "MetricExpr": "(backend_bound / 100) - backend_memory", + "BriefDescription": "Fraction of slots the CPU was stalled due to backend non-memory subsystem issues", + "MetricGroup": "TopdownL2", + "ScaleUnit": "100%" + }, + { + "MetricName": "backend_memory", + "MetricExpr": "(STALL_BACKEND_TLB + STALL_BACKEND_CACHE) / CPU_CYCLES", + "BriefDescription": "Fraction of slots the CPU was stalled due to backend memory subsystem issues (cache/tlb miss)", + "MetricGroup": "TopdownL2", + "ScaleUnit": "100%" + }, + { + "MetricName": "branch_mispredict", + "MetricExpr": "(BR_MIS_PRED_RETIRED / GPC_FLUSH) * slots_lost_misspeculation_fraction", + "BriefDescription": "Fraction of slots lost due to branch misprediciton", + "MetricGroup": "TopdownL2", + "ScaleUnit": "1percent of slots" + }, + { + "MetricName": "frontend_bandwidth", + "MetricExpr": "frontend_bound - frontend_latency", + "BriefDescription": "Fraction of slots the CPU did not dispatch at full bandwidth - able to dispatch partial slots only (1, 2, or 3 uops)", + "MetricGroup": "TopdownL2", + "ScaleUnit": "1percent of slots" + }, + { + "MetricName": "frontend_latency", + "MetricExpr": "(STALL_FRONTEND - ((STALL_SLOT_FRONTEND - ((frontend_bound / 100) * CPU_CYCLES * #slots)) / #slots)) / CPU_CYCLES", + "BriefDescription": "Fraction of slots the CPU was stalled due to frontend latency issues (cache/tlb miss); nothing to dispatch", + "MetricGroup": "TopdownL2", + "ScaleUnit": "100percent of slots" + }, + { + "MetricName": "other_miss_pred", + "MetricExpr": "slots_lost_misspeculation_fraction - branch_mispredict", + "BriefDescription": "Fraction of slots lost due to other/non-branch misprediction misspeculation", + "MetricGroup": "TopdownL2", + "ScaleUnit": "1percent of slots" + }, + { + "MetricName": "pipe_utilization", + "MetricExpr": "100 * ((IXU_NUM_UOPS_ISSUED + FSU_ISSUED) / (CPU_CYCLES * 6))", + "BriefDescription": "Fraction of execute slots utilized", + "MetricGroup": "TopdownL2", + "ScaleUnit": "1percent of slots" + }, + { + "MetricName": "d_cache_l2_miss_rate", + "MetricExpr": "STALL_BACKEND_MEM / CPU_CYCLES", + "BriefDescription": "Fraction of cycles the CPU was stalled due to data L2 cache miss", + "MetricGroup": "TopdownL3", + "ScaleUnit": "100percent of cycles" + }, + { + "MetricName": "d_cache_miss_rate", + "MetricExpr": "STALL_BACKEND_CACHE / CPU_CYCLES", + "BriefDescription": "Fraction of cycles the CPU was stalled due to data cache miss", + "MetricGroup": "TopdownL3", + "ScaleUnit": "100percent of cycles" + }, + { + "MetricName": "d_tlb_miss_rate", + "MetricExpr": "STALL_BACKEND_TLB / CPU_CYCLES", + "BriefDescription": "Fraction of cycles the CPU was stalled due to data TLB miss", + "MetricGroup": "TopdownL3", + "ScaleUnit": "100percent of cycles" + }, + { + "MetricName": "fsu_pipe_utilization", + "MetricExpr": "FSU_ISSUED / (CPU_CYCLES * 2)", + "BriefDescription": "Fraction of FSU execute slots utilized", + "MetricGroup": "TopdownL3", + "ScaleUnit": "100percent of slots" + }, + { + "MetricName": "i_cache_miss_rate", + "MetricExpr": "STALL_FRONTEND_CACHE / CPU_CYCLES", + "BriefDescription": "Fraction of cycles the CPU was stalled due to instruction cache miss", + "MetricGroup": "TopdownL3", + "ScaleUnit": "100percent of slots" + }, + { + "MetricName": "i_tlb_miss_rate", + "MetricExpr": "STALL_FRONTEND_TLB / CPU_CYCLES", + "BriefDescription": "Fraction of cycles the CPU was stalled due to instruction TLB miss", + "MetricGroup": "TopdownL3", + "ScaleUnit": "100percent of slots" + }, + { + "MetricName": "ixu_pipe_utilization", + "MetricExpr": "IXU_NUM_UOPS_ISSUED / (CPU_CYCLES * #slots)", + "BriefDescription": "Fraction of IXU execute slots utilized", + "MetricGroup": "TopdownL3", + "ScaleUnit": "100percent of slots" + }, + { + "MetricName": "stall_recovery_rate", + "MetricExpr": "IDR_STALL_FLUSH / CPU_CYCLES", + "BriefDescription": "Fraction of cycles the CPU was stalled due to flush recovery", + "MetricGroup": "TopdownL3", + "ScaleUnit": "100percent of slots" + }, + { + "MetricName": "stall_fsu_sched_rate", + "MetricExpr": "IDR_STALL_FSU_SCHED / CPU_CYCLES", + "BriefDescription": "Fraction of cycles the CPU was stalled and FSU was full", + "MetricGroup": "TopdownL4", + "ScaleUnit": "100percent of cycles" + }, + { + "MetricName": "stall_ixu_sched_rate", + "MetricExpr": "IDR_STALL_IXU_SCHED / CPU_CYCLES", + "BriefDescription": "Fraction of cycles the CPU was stalled and IXU was full", + "MetricGroup": "TopdownL4", + "ScaleUnit": "100percent of cycles" + }, + { + "MetricName": "stall_lob_id_rate", + "MetricExpr": "IDR_STALL_LOB_ID / CPU_CYCLES", + "BriefDescription": "Fraction of cycles the CPU was stalled and LOB was full", + "MetricGroup": "TopdownL4", + "ScaleUnit": "100percent of cycles" + }, + { + "MetricName": "stall_rob_id_rate", + "MetricExpr": "IDR_STALL_ROB_ID / CPU_CYCLES", + "BriefDescription": "Fraction of cycles the CPU was stalled and ROB was full", + "MetricGroup": "TopdownL4", + "ScaleUnit": "100percent of cycles" + }, + { + "MetricName": "stall_sob_id_rate", + "MetricExpr": "IDR_STALL_SOB_ID / CPU_CYCLES", + "BriefDescription": "Fraction of cycles the CPU was stalled and SOB was full", + "MetricGroup": "TopdownL4", + "ScaleUnit": "100percent of cycles" + }, + { + "MetricName": "l1d_cache_access_demand", + "MetricExpr": "L1D_CACHE_RW / L1D_CACHE", + "BriefDescription": "L1D cache access - demand", + "MetricGroup": "Cache", + "ScaleUnit": "100percent of cache acceses" + }, + { + "MetricName": "l1d_cache_access_prefetces", + "MetricExpr": "L1D_CACHE_PRFM / L1D_CACHE", + "BriefDescription": "L1D cache access - prefetch", + "MetricGroup": "Cache", + "ScaleUnit": "100percent of cache acceses" + }, + { + "MetricName": "l1d_cache_demand_misses", + "MetricExpr": "L1D_CACHE_REFILL_RW / L1D_CACHE", + "BriefDescription": "L1D cache demand misses", + "MetricGroup": "Cache", + "ScaleUnit": "100percent of cache acceses" + }, + { + "MetricName": "l1d_cache_demand_misses_read", + "MetricExpr": "L1D_CACHE_REFILL_RD / L1D_CACHE", + "BriefDescription": "L1D cache demand misses - read", + "MetricGroup": "Cache", + "ScaleUnit": "100percent of cache acceses" + }, + { + "MetricName": "l1d_cache_demand_misses_write", + "MetricExpr": "L1D_CACHE_REFILL_WR / L1D_CACHE", + "BriefDescription": "L1D cache demand misses - write", + "MetricGroup": "Cache", + "ScaleUnit": "100percent of cache acceses" + }, + { + "MetricName": "l1d_cache_prefetch_misses", + "MetricExpr": "L1D_CACHE_REFILL_PRFM / L1D_CACHE", + "BriefDescription": "L1D cache prefetch misses", + "MetricGroup": "Cache", + "ScaleUnit": "100percent of cache acceses" + }, + { + "MetricName": "ase_scalar_mix", + "MetricExpr": "ASE_SCALAR_SPEC / OP_SPEC", + "BriefDescription": "Proportion of advanced SIMD data processing operations (excluding DP_SPEC/LD_SPEC) scalar operations", + "MetricGroup": "Instructions", + "ScaleUnit": "100percent of cache acceses" + }, + { + "MetricName": "ase_vector_mix", + "MetricExpr": "ASE_VECTOR_SPEC / OP_SPEC", + "BriefDescription": "Proportion of advanced SIMD data processing operations (excluding DP_SPEC/LD_SPEC) vector operations", + "MetricGroup": "Instructions", + "ScaleUnit": "100percent of cache acceses" + } +] diff --git a/tools/perf/pmu-events/arch/arm64/ampere/ampereonex/mmu.json b/tools/perf/pmu-events/arch/arm64/ampere/ampereonex/mmu.json new file mode 100644 index 000000000000..66d83b680651 --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/ampere/ampereonex/mmu.json @@ -0,0 +1,170 @@ +[ + { + "PublicDescription": "Level 2 data translation buffer allocation", + "EventCode": "0xD800", + "EventName": "MMU_D_OTB_ALLOC", + "BriefDescription": "Level 2 data translation buffer allocation" + }, + { + "PublicDescription": "Data TLB translation cache hit on S1L2 walk cache entry", + "EventCode": "0xd801", + "EventName": "MMU_D_TRANS_CACHE_HIT_S1L2_WALK", + "BriefDescription": "Data TLB translation cache hit on S1L2 walk cache entry" + }, + { + "PublicDescription": "Data TLB translation cache hit on S1L1 walk cache entry", + "EventCode": "0xd802", + "EventName": "MMU_D_TRANS_CACHE_HIT_S1L1_WALK", + "BriefDescription": "Data TLB translation cache hit on S1L1 walk cache entry" + }, + { + "PublicDescription": "Data TLB translation cache hit on S1L0 walk cache entry", + "EventCode": "0xd803", + "EventName": "MMU_D_TRANS_CACHE_HIT_S1L0_WALK", + "BriefDescription": "Data TLB translation cache hit on S1L0 walk cache entry" + }, + { + "PublicDescription": "Data TLB translation cache hit on S2L2 walk cache entry", + "EventCode": "0xd804", + "EventName": "MMU_D_TRANS_CACHE_HIT_S2L2_WALK", + "BriefDescription": "Data TLB translation cache hit on S2L2 walk cache entry" + }, + { + "PublicDescrition": "Data TLB translation cache hit on S2L1 walk cache entry", + "EventCode": "0xd805", + "EventName": "MMU_D_TRANS_CACHE_HIT_S2L1_WALK", + "BriefDescription": "Data TLB translation cache hit on S2L1 walk cache entry" + }, + { + "PublicDescrition": "Data TLB translation cache hit on S2L0 walk cache entry", + "EventCode": "0xd806", + "EventName": "MMU_D_TRANS_CACHE_HIT_S2L0_WALK", + "BriefDescription": "Data TLB translation cache hit on S2L0 walk cache entry" + }, + { + "PublicDescrition": "Data-side S1 page walk cache lookup", + "EventCode": "0xd807", + "EventName": "MMU_D_S1_WALK_CACHE_LOOKUP", + "BriefDescription": "Data-side S1 page walk cache lookup" + }, + { + "PublicDescrition": "Data-side S1 page walk cache refill", + "EventCode": "0xd808", + "EventName": "MMU_D_S1_WALK_CACHE_REFILL", + "BriefDescription": "Data-side S1 page walk cache refill" + }, + { + "PublicDescrition": "Data-side S2 page walk cache lookup", + "EventCode": "0xd809", + "EventName": "MMU_D_S2_WALK_CACHE_LOOKUP", + "BriefDescription": "Data-side S2 page walk cache lookup" + }, + { + "PublicDescrition": "Data-side S2 page walk cache refill", + "EventCode": "0xd80a", + "EventName": "MMU_D_S2_WALK_CACHE_REFILL", + "BriefDescription": "Data-side S2 page walk cache refill" + }, + { + "PublicDescription": "Data-side S1 table walk fault", + "EventCode": "0xD80B", + "EventName": "MMU_D_S1_WALK_FAULT", + "BriefDescription": "Data-side S1 table walk fault" + }, + { + "PublicDescription": "Data-side S2 table walk fault", + "EventCode": "0xD80C", + "EventName": "MMU_D_S2_WALK_FAULT", + "BriefDescription": "Data-side S2 table walk fault" + }, + { + "PublicDescription": "Data-side table walk steps or descriptor fetches", + "EventCode": "0xD80D", + "EventName": "MMU_D_WALK_STEPS", + "BriefDescription": "Data-side table walk steps or descriptor fetches" + }, + { + "PublicDescription": "Level 2 instruction translation buffer allocation", + "EventCode": "0xD900", + "EventName": "MMU_I_OTB_ALLOC", + "BriefDescription": "Level 2 instruction translation buffer allocation" + }, + { + "PublicDescrition": "Instruction TLB translation cache hit on S1L2 walk cache entry", + "EventCode": "0xd901", + "EventName": "MMU_I_TRANS_CACHE_HIT_S1L2_WALK", + "BriefDescription": "Instruction TLB translation cache hit on S1L2 walk cache entry" + }, + { + "PublicDescrition": "Instruction TLB translation cache hit on S1L1 walk cache entry", + "EventCode": "0xd902", + "EventName": "MMU_I_TRANS_CACHE_HIT_S1L1_WALK", + "BriefDescription": "Instruction TLB translation cache hit on S1L1 walk cache entry" + }, + { + "PublicDescrition": "Instruction TLB translation cache hit on S1L0 walk cache entry", + "EventCode": "0xd903", + "EventName": "MMU_I_TRANS_CACHE_HIT_S1L0_WALK", + "BriefDescription": "Instruction TLB translation cache hit on S1L0 walk cache entry" + }, + { + "PublicDescrition": "Instruction TLB translation cache hit on S2L2 walk cache entry", + "EventCode": "0xd904", + "EventName": "MMU_I_TRANS_CACHE_HIT_S2L2_WALK", + "BriefDescription": "Instruction TLB translation cache hit on S2L2 walk cache entry" + }, + { + "PublicDescrition": "Instruction TLB translation cache hit on S2L1 walk cache entry", + "EventCode": "0xd905", + "EventName": "MMU_I_TRANS_CACHE_HIT_S2L1_WALK", + "BriefDescription": "Instruction TLB translation cache hit on S2L1 walk cache entry" + }, + { + "PublicDescrition": "Instruction TLB translation cache hit on S2L0 walk cache entry", + "EventCode": "0xd906", + "EventName": "MMU_I_TRANS_CACHE_HIT_S2L0_WALK", + "BriefDescription": "Instruction TLB translation cache hit on S2L0 walk cache entry" + }, + { + "PublicDescrition": "Instruction-side S1 page walk cache lookup", + "EventCode": "0xd907", + "EventName": "MMU_I_S1_WALK_CACHE_LOOKUP", + "BriefDescription": "Instruction-side S1 page walk cache lookup" + }, + { + "PublicDescrition": "Instruction-side S1 page walk cache refill", + "EventCode": "0xd908", + "EventName": "MMU_I_S1_WALK_CACHE_REFILL", + "BriefDescription": "Instruction-side S1 page walk cache refill" + }, + { + "PublicDescrition": "Instruction-side S2 page walk cache lookup", + "EventCode": "0xd909", + "EventName": "MMU_I_S2_WALK_CACHE_LOOKUP", + "BriefDescription": "Instruction-side S2 page walk cache lookup" + }, + { + "PublicDescrition": "Instruction-side S2 page walk cache refill", + "EventCode": "0xd90a", + "EventName": "MMU_I_S2_WALK_CACHE_REFILL", + "BriefDescription": "Instruction-side S2 page walk cache refill" + }, + { + "PublicDescription": "Instruction-side S1 table walk fault", + "EventCode": "0xD90B", + "EventName": "MMU_I_S1_WALK_FAULT", + "BriefDescription": "Instruction-side S1 table walk fault" + }, + { + "PublicDescription": "Instruction-side S2 table walk fault", + "EventCode": "0xD90C", + "EventName": "MMU_I_S2_WALK_FAULT", + "BriefDescription": "Instruction-side S2 table walk fault" + }, + { + "PublicDescription": "Instruction-side table walk steps or descriptor fetches", + "EventCode": "0xD90D", + "EventName": "MMU_I_WALK_STEPS", + "BriefDescription": "Instruction-side table walk steps or descriptor fetches" + } +] diff --git a/tools/perf/pmu-events/arch/arm64/ampere/ampereonex/pipeline.json b/tools/perf/pmu-events/arch/arm64/ampere/ampereonex/pipeline.json new file mode 100644 index 000000000000..2fb2d1f183fc --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/ampere/ampereonex/pipeline.json @@ -0,0 +1,41 @@ +[ + { + "ArchStdEvent": "STALL_FRONTEND", + "Errata": "Errata AC03_CPU_29", + "BriefDescription": "Impacted by errata, use metrics instead -" + }, + { + "ArchStdEvent": "STALL_BACKEND" + }, + { + "ArchStdEvent": "STALL", + "Errata": "Errata AC03_CPU_29", + "BriefDescription": "Impacted by errata, use metrics instead -" + }, + { + "ArchStdEvent": "STALL_SLOT_BACKEND" + }, + { + "ArchStdEvent": "STALL_SLOT_FRONTEND", + "Errata": "Errata AC03_CPU_29", + "BriefDescription": "Impacted by errata, use metrics instead -" + }, + { + "ArchStdEvent": "STALL_SLOT" + }, + { + "ArchStdEvent": "STALL_BACKEND_MEM" + }, + { + "PublicDescription": "Frontend stall cycles, TLB", + "EventCode": "0x815c", + "EventName": "STALL_FRONTEND_TLB", + "BriefDescription": "Frontend stall cycles, TLB" + }, + { + "PublicDescription": "Backend stall cycles, TLB", + "EventCode": "0x8167", + "EventName": "STALL_BACKEND_TLB", + "BriefDescription": "Backend stall cycles, TLB" + } +] diff --git a/tools/perf/pmu-events/arch/arm64/ampere/ampereonex/spe.json b/tools/perf/pmu-events/arch/arm64/ampere/ampereonex/spe.json new file mode 100644 index 000000000000..20f2165c85fe --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/ampere/ampereonex/spe.json @@ -0,0 +1,14 @@ +[ + { + "ArchStdEvent": "SAMPLE_POP" + }, + { + "ArchStdEvent": "SAMPLE_FEED" + }, + { + "ArchStdEvent": "SAMPLE_FILTRATE" + }, + { + "ArchStdEvent": "SAMPLE_COLLISION" + } +] diff --git a/tools/perf/pmu-events/arch/arm64/mapfile.csv b/tools/perf/pmu-events/arch/arm64/mapfile.csv index 5b58db5032c1..f4d1ca4d1493 100644 --- a/tools/perf/pmu-events/arch/arm64/mapfile.csv +++ b/tools/perf/pmu-events/arch/arm64/mapfile.csv @@ -42,3 +42,4 @@ 0x00000000480fd010,v1,hisilicon/hip08,core 0x00000000500f0000,v1,ampere/emag,core 0x00000000c00fac30,v1,ampere/ampereone,core +0x00000000c00fac40,v1,ampere/ampereonex,core -- cgit v1.2.3 From b809fc656e763296f227b9b31e8f225e5977a8af Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Wed, 29 Nov 2023 13:34:25 -0800 Subject: perf build: Shellcheck support for OUTPUT directory Migrate Makefile.tests to Build so that variables like rule_mkdir are defined via Makefile.build (needed so the output directory can be created). This requires SHELLCHECK being exported and the clean rule tweaking to remove the files in find. Change find "-perm -o=x" as it was failing on my Debian based Linux kernel tree, switch to using "-executable". Adding a filename prefix of "." to the shellcheck log files is a pain and error prone in make, remove this prefix and just add the shellcheck log files to .gitignore. Fix the command echo so that running the test is displayed. Fixes: 1638b11ef8156c85 ("perf tools: Add perf binary dependent rule for shellcheck log in Makefile.perf") Reviewed-by: Athira Jajeev Signed-off-by: Ian Rogers Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Kan Liang Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20231129213428.2227448-1-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/.gitignore | 3 +++ tools/perf/Makefile.perf | 30 ++++++++++-------------------- tools/perf/tests/Build | 14 ++++++++++++++ tools/perf/tests/Makefile.tests | 22 ---------------------- 4 files changed, 27 insertions(+), 42 deletions(-) delete mode 100644 tools/perf/tests/Makefile.tests (limited to 'tools') diff --git a/tools/perf/.gitignore b/tools/perf/.gitignore index ee5c14f3b8b1..f5b81d439387 100644 --- a/tools/perf/.gitignore +++ b/tools/perf/.gitignore @@ -39,6 +39,9 @@ trace/beauty/generated/ pmu-events/pmu-events.c pmu-events/jevents pmu-events/metric_test.log +tests/shell/*.shellcheck_log +tests/shell/coresight/*.shellcheck_log +tests/shell/lib/*.shellcheck_log feature/ libapi/ libbpf/ diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 824cbc0af7d7..1ab2a908f240 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -229,8 +229,15 @@ else force_fixdep := $(config) endif +# Runs shellcheck on perf test shell scripts +ifeq ($(NO_SHELLCHECK),1) + SHELLCHECK := +else + SHELLCHECK := $(shell which shellcheck 2> /dev/null) +endif + export srctree OUTPUT RM CC CXX LD AR CFLAGS CXXFLAGS V BISON FLEX AWK -export HOSTCC HOSTLD HOSTAR HOSTCFLAGS +export HOSTCC HOSTLD HOSTAR HOSTCFLAGS SHELLCHECK include $(srctree)/tools/build/Makefile.include @@ -673,23 +680,7 @@ $(PERF_IN): prepare FORCE $(PMU_EVENTS_IN): FORCE prepare $(Q)$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=pmu-events obj=pmu-events -# Runs shellcheck on perf test shell scripts - -SHELLCHECK := $(shell which shellcheck 2> /dev/null) - -ifeq ($(NO_SHELLCHECK),1) -SHELLCHECK := -endif - -ifneq ($(SHELLCHECK),) -SHELLCHECK_TEST: FORCE prepare - $(Q)$(MAKE) -f $(srctree)/tools/perf/tests/Makefile.tests -else -SHELLCHECK_TEST: - @: -endif - -$(OUTPUT)perf: $(PERFLIBS) $(PERF_IN) $(PMU_EVENTS_IN) SHELLCHECK_TEST +$(OUTPUT)perf: $(PERFLIBS) $(PERF_IN) $(PMU_EVENTS_IN) $(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) \ $(PERF_IN) $(PMU_EVENTS_IN) $(LIBS) -o $@ @@ -1152,9 +1143,8 @@ bpf-skel-clean: $(call QUIET_CLEAN, bpf-skel) $(RM) -r $(SKEL_TMP_OUT) $(SKELETONS) clean:: $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clean $(LIBSYMBOL)-clean $(LIBPERF)-clean fixdep-clean python-clean bpf-skel-clean tests-coresight-targets-clean - $(Q)$(MAKE) -f $(srctree)/tools/perf/tests/Makefile.tests clean $(call QUIET_CLEAN, core-objs) $(RM) $(LIBPERF_A) $(OUTPUT)perf-archive $(OUTPUT)perf-iostat $(LANG_BINDINGS) - $(Q)find $(or $(OUTPUT),.) -name '*.o' -delete -o -name '\.*.cmd' -delete -o -name '\.*.d' -delete + $(Q)find $(or $(OUTPUT),.) -name '*.o' -delete -o -name '\.*.cmd' -delete -o -name '\.*.d' -delete -o -name '*.shellcheck_log' -delete $(Q)$(RM) $(OUTPUT).config-detected $(call QUIET_CLEAN, core-progs) $(RM) $(ALL_PROGRAMS) perf perf-read-vdso32 perf-read-vdsox32 $(OUTPUT)$(LIBJVMTI).so $(call QUIET_CLEAN, core-gen) $(RM) *.spec *.pyc *.pyo */*.pyc */*.pyo $(OUTPUT)common-cmds.h TAGS tags cscope* $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)FEATURE-DUMP $(OUTPUT)util/*-bison* $(OUTPUT)util/*-flex* \ diff --git a/tools/perf/tests/Build b/tools/perf/tests/Build index 2b45ffa462a6..53ba9c3e20e0 100644 --- a/tools/perf/tests/Build +++ b/tools/perf/tests/Build @@ -77,3 +77,17 @@ CFLAGS_python-use.o += -DPYTHONPATH="BUILD_STR($(OUTPUT)python)" -DPYTHON="BUI CFLAGS_dwarf-unwind.o += -fno-optimize-sibling-calls perf-y += workloads/ + +ifdef SHELLCHECK + SHELL_TESTS := $(shell find tests/shell -executable -type f -name '*.sh') + TEST_LOGS := $(SHELL_TESTS:tests/shell/%=shell/%.shellcheck_log) +else + SHELL_TESTS := + TEST_LOGS := +endif + +$(OUTPUT)%.shellcheck_log: % + $(call rule_mkdir) + $(Q)$(call echo-cmd,test)shellcheck -a -S warning "$<" > $@ || (cat $@ && rm $@ && false) + +perf-y += $(TEST_LOGS) diff --git a/tools/perf/tests/Makefile.tests b/tools/perf/tests/Makefile.tests deleted file mode 100644 index fdaca5f7a946..000000000000 --- a/tools/perf/tests/Makefile.tests +++ /dev/null @@ -1,22 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0 -# Athira Rajeev , 2023 - -PROGS := $(shell find tests/shell -perm -o=x -type f -name '*.sh') -FILE_NAME := $(notdir $(PROGS)) -FILE_NAME := $(FILE_NAME:%=.%) -LOGS := $(join $(dir $(PROGS)),$(FILE_NAME)) -LOGS := $(LOGS:%=%.shellcheck_log) - -.PHONY: all -all: SHELLCHECK_RUN - @: - -SHELLCHECK_RUN: $(LOGS) - -.%.shellcheck_log: % - $(call rule_mkdir) - $(Q)$(call frecho-cmd,test)@shellcheck -S warning "$<" > $@ || (cat $@ && rm $@ && false) - -clean: - $(eval log_files := $(shell find . -name '.*.shellcheck_log')) - @rm -rf $(log_files) -- cgit v1.2.3 From 8226e4a3b35f525d11934484ee5979399ff8b7dc Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Wed, 29 Nov 2023 13:34:27 -0800 Subject: perf test: Use common python setup library Avoid replicated logic by having a common library to set the PYTHON environment variable. Reviewed-by: Athira Jajeev Signed-off-by: Ian Rogers Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Kan Liang Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20231129213428.2227448-3-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/shell/lib/setup_python.sh | 16 ++++++++++++++++ tools/perf/tests/shell/stat+json_output.sh | 16 +++------------- tools/perf/tests/shell/stat_metrics_values.sh | 14 ++++---------- tools/perf/tests/shell/test_perf_data_converter_json.sh | 13 +++---------- 4 files changed, 26 insertions(+), 33 deletions(-) create mode 100644 tools/perf/tests/shell/lib/setup_python.sh (limited to 'tools') diff --git a/tools/perf/tests/shell/lib/setup_python.sh b/tools/perf/tests/shell/lib/setup_python.sh new file mode 100644 index 000000000000..c2fce1793538 --- /dev/null +++ b/tools/perf/tests/shell/lib/setup_python.sh @@ -0,0 +1,16 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 + +if [ "x$PYTHON" = "x" ] +then + python3 --version >/dev/null 2>&1 && PYTHON=python3 +fi +if [ "x$PYTHON" = "x" ] +then + python --version >/dev/null 2>&1 && PYTHON=python +fi +if [ "x$PYTHON" = "x" ] +then + echo Skipping test, python not detected please set environment variable PYTHON. + exit 2 +fi diff --git a/tools/perf/tests/shell/stat+json_output.sh b/tools/perf/tests/shell/stat+json_output.sh index 196e22672c50..3bc900533a5d 100755 --- a/tools/perf/tests/shell/stat+json_output.sh +++ b/tools/perf/tests/shell/stat+json_output.sh @@ -8,20 +8,10 @@ set -e skip_test=0 +shelldir=$(dirname "$0") +# shellcheck source=lib/setup_python.sh +. "${shelldir}"/lib/setup_python.sh pythonchecker=$(dirname $0)/lib/perf_json_output_lint.py -if [ "x$PYTHON" == "x" ] -then - if which python3 > /dev/null - then - PYTHON=python3 - elif which python > /dev/null - then - PYTHON=python - else - echo Skipping test, python not detected please set environment variable PYTHON. - exit 2 - fi -fi stat_output=$(mktemp /tmp/__perf_test.stat_output.json.XXXXX) diff --git a/tools/perf/tests/shell/stat_metrics_values.sh b/tools/perf/tests/shell/stat_metrics_values.sh index ad94c936de7e..7ca172599aa6 100755 --- a/tools/perf/tests/shell/stat_metrics_values.sh +++ b/tools/perf/tests/shell/stat_metrics_values.sh @@ -1,16 +1,10 @@ #!/bin/bash # perf metrics value validation # SPDX-License-Identifier: GPL-2.0 -if [ "x$PYTHON" == "x" ] -then - if which python3 > /dev/null - then - PYTHON=python3 - else - echo Skipping test, python3 not detected please set environment variable PYTHON. - exit 2 - fi -fi + +shelldir=$(dirname "$0") +# shellcheck source=lib/setup_python.sh +. "${shelldir}"/lib/setup_python.sh grep -q GenuineIntel /proc/cpuinfo || { echo Skipping non-Intel; exit 2; } diff --git a/tools/perf/tests/shell/test_perf_data_converter_json.sh b/tools/perf/tests/shell/test_perf_data_converter_json.sh index 6ded58f98f55..c4f1b59d116f 100755 --- a/tools/perf/tests/shell/test_perf_data_converter_json.sh +++ b/tools/perf/tests/shell/test_perf_data_converter_json.sh @@ -6,16 +6,9 @@ set -e err=0 -if [ "$PYTHON" = "" ] ; then - if which python3 > /dev/null ; then - PYTHON=python3 - elif which python > /dev/null ; then - PYTHON=python - else - echo Skipping test, python not detected please set environment variable PYTHON. - exit 2 - fi -fi +shelldir=$(dirname "$0") +# shellcheck source=lib/setup_python.sh +. "${shelldir}"/lib/setup_python.sh perfdata=$(mktemp /tmp/__perf_test.perf.data.XXXXX) result=$(mktemp /tmp/__perf_test.output.json.XXXXX) -- cgit v1.2.3 From 7d723ef83b8070ab2304df22ed952dc627385406 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Wed, 29 Nov 2023 13:34:28 -0800 Subject: perf test: Add basic 'perf list --json" test Test that JSON output produces valid JSON. Reviewed-by: Athira Jajeev Signed-off-by: Ian Rogers Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Athira Jajeev Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Kan Liang Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20231129213428.2227448-4-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/shell/list.sh | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) create mode 100755 tools/perf/tests/shell/list.sh (limited to 'tools') diff --git a/tools/perf/tests/shell/list.sh b/tools/perf/tests/shell/list.sh new file mode 100755 index 000000000000..22b004f2b23e --- /dev/null +++ b/tools/perf/tests/shell/list.sh @@ -0,0 +1,19 @@ +#!/bin/sh +# perf list tests +# SPDX-License-Identifier: GPL-2.0 + +set -e +err=0 + +shelldir=$(dirname "$0") +# shellcheck source=lib/setup_python.sh +. "${shelldir}"/lib/setup_python.sh + +test_list_json() { + echo "Json output test" + perf list -j | $PYTHON -m json.tool + echo "Json output test [Success]" +} + +test_list_json +exit $err -- cgit v1.2.3 From 9eef41014fe01287dae79fe208b9b433b13040bb Mon Sep 17 00:00:00 2001 From: Athira Rajeev Date: Thu, 23 Nov 2023 21:31:10 +0530 Subject: perf vendor events powerpc: Update datasource event name to fix duplicate events Running "perf list" on powerpc fails with segfault as below: $ ./perf list Segmentation fault (core dumped) $ This happens because of duplicate events in the JSON list. The powerpc JSON event list contains some event with same event name, but different event code. They are: - PM_INST_FROM_L3MISS (Present in datasource and frontend) - PM_MRK_DATA_FROM_L2MISS (Present in datasource and marked) - PM_MRK_INST_FROM_L3MISS (Present in datasource and marked) - PM_MRK_DATA_FROM_L3MISS (Present in datasource and marked) pmu_events_table__num_events() uses the value from table_pmu->num_entries which includes duplicate events as well. This causes issue during "perf list" and results in a segmentation fault. Since both event codes are valid, append _DSRC to the Data Source events (datasource.json), so that they would have a unique name. Also add PM_DATA_FROM_L2MISS_DSRC and PM_DATA_FROM_L3MISS_DSRC events. With the fix, 'perf list' works as expected. Fixes: fc143580753348c6 ("perf vendor events power10: Update JSON/events") Signed-off-by: Athira Jajeev Tested-by: Disha Goel Cc: Adrian Hunter Cc: Disha Goel Cc: Ian Rogers Cc: James Clark Cc: Jiri Olsa Cc: Kajol Jain Cc: linuxppc-dev@lists.ozlabs.org Cc: Madhavan Srinivasan Cc: Namhyung Kim Link: https://lore.kernel.org/r/20231123160110.94090-1-atrajeev@linux.vnet.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- .../pmu-events/arch/powerpc/power10/datasource.json | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/perf/pmu-events/arch/powerpc/power10/datasource.json b/tools/perf/pmu-events/arch/powerpc/power10/datasource.json index 6b0356f2d301..0eeaaf1a95b8 100644 --- a/tools/perf/pmu-events/arch/powerpc/power10/datasource.json +++ b/tools/perf/pmu-events/arch/powerpc/power10/datasource.json @@ -99,6 +99,11 @@ "EventName": "PM_INST_FROM_L2MISS", "BriefDescription": "The processor's instruction cache was reloaded from a source beyond the local core's L2 due to a demand miss." }, + { + "EventCode": "0x0003C0000000C040", + "EventName": "PM_DATA_FROM_L2MISS_DSRC", + "BriefDescription": "The processor's L1 data cache was reloaded from a source beyond the local core's L2 due to a demand miss." + }, { "EventCode": "0x000380000010C040", "EventName": "PM_INST_FROM_L2MISS_ALL", @@ -161,9 +166,14 @@ }, { "EventCode": "0x000780000000C040", - "EventName": "PM_INST_FROM_L3MISS", + "EventName": "PM_INST_FROM_L3MISS_DSRC", "BriefDescription": "The processor's instruction cache was reloaded from beyond the local core's L3 due to a demand miss." }, + { + "EventCode": "0x0007C0000000C040", + "EventName": "PM_DATA_FROM_L3MISS_DSRC", + "BriefDescription": "The processor's L1 data cache was reloaded from beyond the local core's L3 due to a demand miss." + }, { "EventCode": "0x000780000010C040", "EventName": "PM_INST_FROM_L3MISS_ALL", @@ -981,7 +991,7 @@ }, { "EventCode": "0x0003C0000000C142", - "EventName": "PM_MRK_DATA_FROM_L2MISS", + "EventName": "PM_MRK_DATA_FROM_L2MISS_DSRC", "BriefDescription": "The processor's L1 data cache was reloaded from a source beyond the local core's L2 due to a demand miss for a marked instruction." }, { @@ -1046,12 +1056,12 @@ }, { "EventCode": "0x000780000000C142", - "EventName": "PM_MRK_INST_FROM_L3MISS", + "EventName": "PM_MRK_INST_FROM_L3MISS_DSRC", "BriefDescription": "The processor's instruction cache was reloaded from beyond the local core's L3 due to a demand miss for a marked instruction." }, { "EventCode": "0x0007C0000000C142", - "EventName": "PM_MRK_DATA_FROM_L3MISS", + "EventName": "PM_MRK_DATA_FROM_L3MISS_DSRC", "BriefDescription": "The processor's L1 data cache was reloaded from beyond the local core's L3 due to a demand miss for a marked instruction." }, { -- cgit v1.2.3 From a4320085a6c694326dd8db46f563d52d1a826f07 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Tue, 28 Nov 2023 12:39:40 -0800 Subject: perf mem: Fix error on hybrid related to availability of mem event in a PMU The below error can be triggered on a hybrid machine. $ perf mem record -t load sleep 1 event syntax error: 'breakpoint/mem-loads,ldlat=30/P' \___ Bad event or PMU Unable to find PMU or event on a PMU of 'breakpoint' In the perf_mem_events__record_args(), the current perf never checks the availability of a mem event on a given PMU. All the PMUs will be added to the perf mem event list. Perf errors out for the unsupported PMU. Extend perf_mem_event__supported() and take a PMU into account. Check the mem event for each PMU before adding it to the perf mem event list. Optimize the perf_mem_events__init() a little bit. The function is to check whether the mem events are supported in the system. It doesn't need to scan all PMUs. Just return with the first supported PMU is good enough. Fixes: 5752c20f3787c9bc ("perf mem: Scan all PMUs instead of just core ones") Reported-by: Ammy Yi Signed-off-by: Kan Liang Tested-by: Ammy Yi Acked-by: Ian Rogers Cc: Adrian Hunter Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Ravi Bangoria Link: https://lore.kernel.org/r/20231128203940.3964287-1-kan.liang@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/mem-events.c | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/mem-events.c b/tools/perf/util/mem-events.c index 954b235e12e5..3a2e3687878c 100644 --- a/tools/perf/util/mem-events.c +++ b/tools/perf/util/mem-events.c @@ -100,11 +100,14 @@ int perf_mem_events__parse(const char *str) return -1; } -static bool perf_mem_event__supported(const char *mnt, char *sysfs_name) +static bool perf_mem_event__supported(const char *mnt, struct perf_pmu *pmu, + struct perf_mem_event *e) { + char sysfs_name[100]; char path[PATH_MAX]; struct stat st; + scnprintf(sysfs_name, sizeof(sysfs_name), e->sysfs_name, pmu->name); scnprintf(path, PATH_MAX, "%s/devices/%s", mnt, sysfs_name); return !stat(path, &st); } @@ -120,7 +123,6 @@ int perf_mem_events__init(void) for (j = 0; j < PERF_MEM_EVENTS__MAX; j++) { struct perf_mem_event *e = perf_mem_events__ptr(j); - char sysfs_name[100]; struct perf_pmu *pmu = NULL; /* @@ -136,12 +138,12 @@ int perf_mem_events__init(void) * of core PMU. */ while ((pmu = perf_pmus__scan(pmu)) != NULL) { - scnprintf(sysfs_name, sizeof(sysfs_name), e->sysfs_name, pmu->name); - e->supported |= perf_mem_event__supported(mnt, sysfs_name); + e->supported |= perf_mem_event__supported(mnt, pmu, e); + if (e->supported) { + found = true; + break; + } } - - if (e->supported) - found = true; } return found ? 0 : -ENOENT; @@ -167,13 +169,10 @@ static void perf_mem_events__print_unsupport_hybrid(struct perf_mem_event *e, int idx) { const char *mnt = sysfs__mount(); - char sysfs_name[100]; struct perf_pmu *pmu = NULL; while ((pmu = perf_pmus__scan(pmu)) != NULL) { - scnprintf(sysfs_name, sizeof(sysfs_name), e->sysfs_name, - pmu->name); - if (!perf_mem_event__supported(mnt, sysfs_name)) { + if (!perf_mem_event__supported(mnt, pmu, e)) { pr_err("failed: event '%s' not supported\n", perf_mem_events__name(idx, pmu->name)); } @@ -183,6 +182,7 @@ static void perf_mem_events__print_unsupport_hybrid(struct perf_mem_event *e, int perf_mem_events__record_args(const char **rec_argv, int *argv_nr, char **rec_tmp, int *tmp_nr) { + const char *mnt = sysfs__mount(); int i = *argv_nr, k = 0; struct perf_mem_event *e; @@ -211,6 +211,9 @@ int perf_mem_events__record_args(const char **rec_argv, int *argv_nr, while ((pmu = perf_pmus__scan(pmu)) != NULL) { const char *s = perf_mem_events__name(j, pmu->name); + if (!perf_mem_event__supported(mnt, pmu, e)) + continue; + rec_argv[i++] = "-e"; if (s) { char *copy = strdup(s); -- cgit v1.2.3 From 144081ef78c36e255c08b74da86ef68e6cf0a221 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Mon, 20 Nov 2023 11:04:08 -0800 Subject: perf test: Add basic 'perf diff' test There are some old bug reports on perf diff crashing: https://rhaas.blogspot.com/2012/06/perf-good-bad-ugly.html Happening across them I was prompted to add two very basic tests that will give some 'perf diff' coverage. Signed-off-by: Ian Rogers Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Athira Jajeev Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20231120190408.281826-1-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/shell/diff.sh | 101 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 101 insertions(+) create mode 100755 tools/perf/tests/shell/diff.sh (limited to 'tools') diff --git a/tools/perf/tests/shell/diff.sh b/tools/perf/tests/shell/diff.sh new file mode 100755 index 000000000000..213185763688 --- /dev/null +++ b/tools/perf/tests/shell/diff.sh @@ -0,0 +1,101 @@ +#!/bin/sh +# perf diff tests +# SPDX-License-Identifier: GPL-2.0 + +set -e + +err=0 +perfdata1=$(mktemp /tmp/__perf_test.perf.data.XXXXX) +perfdata2=$(mktemp /tmp/__perf_test.perf.data.XXXXX) +perfdata3=$(mktemp /tmp/__perf_test.perf.data.XXXXX) +testprog="perf test -w thloop" +testsym="test_loop" + +cleanup() { + rm -rf "${perfdata1}" + rm -rf "${perfdata1}".old + rm -rf "${perfdata2}" + rm -rf "${perfdata2}".old + rm -rf "${perfdata3}" + rm -rf "${perfdata3}".old + + trap - EXIT TERM INT +} + +trap_cleanup() { + cleanup + exit 1 +} +trap trap_cleanup EXIT TERM INT + +make_data() { + file="$1" + if ! perf record -o "${file}" ${testprog} 2> /dev/null + then + echo "Workload record [Failed record]" + echo 1 + return + fi + if ! perf report -i "${file}" -q | grep -q "${testsym}" + then + echo "Workload record [Failed missing output]" + echo 1 + return + fi + echo 0 +} + +test_two_files() { + echo "Basic two file diff test" + err=$(make_data "${perfdata1}") + if [ $err != 0 ] + then + return + fi + err=$(make_data "${perfdata2}") + if [ $err != 0 ] + then + return + fi + + if ! perf diff "${perfdata1}" "${perfdata2}" | grep -q "${testsym}" + then + echo "Basic two file diff test [Failed diff]" + err=1 + return + fi + echo "Basic two file diff test [Success]" +} + +test_three_files() { + echo "Basic three file diff test" + err=$(make_data "${perfdata1}") + if [ $err != 0 ] + then + return + fi + err=$(make_data "${perfdata2}") + if [ $err != 0 ] + then + return + fi + err=$(make_data "${perfdata3}") + if [ $err != 0 ] + then + return + fi + + if ! perf diff "${perfdata1}" "${perfdata2}" "${perfdata3}" | grep -q "${testsym}" + then + echo "Basic three file diff test [Failed diff]" + err=1 + return + fi + echo "Basic three file diff test [Success]" +} + +test_two_files +test_three_files + +cleanup +exit $err -- cgit v1.2.3 From 018b04248543f49d677011d4615f243a39a155a8 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Fri, 30 Jun 2023 09:00:29 +0100 Subject: perf bench sched-seccomp-notify: Fix spelling mistake "synchronious" -> "synchronous" There is a spelling mistake in an option description. Fix it. Signed-off-by: Colin Ian King Acked-by: Ian Rogers Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: kernel-janitors@vger.kernel.org Link: https://lore.kernel.org/r/20230630080029.15614-1-colin.i.king@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/bench/sched-seccomp-notify.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/bench/sched-seccomp-notify.c b/tools/perf/bench/sched-seccomp-notify.c index a01c40131493..269c1f4a6852 100644 --- a/tools/perf/bench/sched-seccomp-notify.c +++ b/tools/perf/bench/sched-seccomp-notify.c @@ -32,7 +32,7 @@ static bool sync_mode; static const struct option options[] = { OPT_U64('l', "loop", &loops, "Specify number of loops"), OPT_BOOLEAN('s', "sync-mode", &sync_mode, - "Enable the synchronious mode for seccomp notifications"), + "Enable the synchronous mode for seccomp notifications"), OPT_END() }; -- cgit v1.2.3 From b1693747487442984050eb0f462b83a3a8307525 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Wed, 29 Nov 2023 13:34:26 -0800 Subject: perf list: Fix JSON segfault by setting the used skip_duplicate_pmus callback Json output didn't set the skip_duplicate_pmus callback yielding a segfault. Fixes: cd4e1efbbc40 ("perf pmus: Skip duplicate PMUs and don't print list suffix by default") Signed-off-by: Ian Rogers Cc: James Clark Cc: Kan Liang Cc: Athira Rajeev Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20231129213428.2227448-2-irogers@google.com [namhyung: updated subject line according to Arnaldo] Signed-off-by: Namhyung Kim --- tools/perf/builtin-list.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'tools') diff --git a/tools/perf/builtin-list.c b/tools/perf/builtin-list.c index a343823c8ddf..61c2c96cc070 100644 --- a/tools/perf/builtin-list.c +++ b/tools/perf/builtin-list.c @@ -434,6 +434,11 @@ static void json_print_metric(void *ps __maybe_unused, const char *group, strbuf_release(&buf); } +static bool json_skip_duplicate_pmus(void *ps __maybe_unused) +{ + return false; +} + static bool default_skip_duplicate_pmus(void *ps) { struct print_state *print_state = ps; @@ -503,6 +508,7 @@ int cmd_list(int argc, const char **argv) .print_end = json_print_end, .print_event = json_print_event, .print_metric = json_print_metric, + .skip_duplicate_pmus = json_skip_duplicate_pmus, }; ps = &json_ps; } else { -- cgit v1.2.3 From 41f6f64e6999a837048b1bd13a2f8742964eca6b Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Tue, 5 Dec 2023 10:42:39 -0800 Subject: bpf: support non-r10 register spill/fill to/from stack in precision tracking Use instruction (jump) history to record instructions that performed register spill/fill to/from stack, regardless if this was done through read-only r10 register, or any other register after copying r10 into it *and* potentially adjusting offset. To make this work reliably, we push extra per-instruction flags into instruction history, encoding stack slot index (spi) and stack frame number in extra 10 bit flags we take away from prev_idx in instruction history. We don't touch idx field for maximum performance, as it's checked most frequently during backtracking. This change removes basically the last remaining practical limitation of precision backtracking logic in BPF verifier. It fixes known deficiencies, but also opens up new opportunities to reduce number of verified states, explored in the subsequent patches. There are only three differences in selftests' BPF object files according to veristat, all in the positive direction (less states). File Program Insns (A) Insns (B) Insns (DIFF) States (A) States (B) States (DIFF) -------------------------------------- ------------- --------- --------- ------------- ---------- ---------- ------------- test_cls_redirect_dynptr.bpf.linked3.o cls_redirect 2987 2864 -123 (-4.12%) 240 231 -9 (-3.75%) xdp_synproxy_kern.bpf.linked3.o syncookie_tc 82848 82661 -187 (-0.23%) 5107 5073 -34 (-0.67%) xdp_synproxy_kern.bpf.linked3.o syncookie_xdp 85116 84964 -152 (-0.18%) 5162 5130 -32 (-0.62%) Note, I avoided renaming jmp_history to more generic insn_hist to minimize number of lines changed and potential merge conflicts between bpf and bpf-next trees. Notice also cur_hist_entry pointer reset to NULL at the beginning of instruction verification loop. This pointer avoids the problem of relying on last jump history entry's insn_idx to determine whether we already have entry for current instruction or not. It can happen that we added jump history entry because current instruction is_jmp_point(), but also we need to add instruction flags for stack access. In this case, we don't want to entries, so we need to reuse last added entry, if it is present. Relying on insn_idx comparison has the same ambiguity problem as the one that was fixed recently in [0], so we avoid that. [0] https://patchwork.kernel.org/project/netdevbpf/patch/20231110002638.4168352-3-andrii@kernel.org/ Acked-by: Eduard Zingerman Reported-by: Tao Lyu Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20231205184248.1502704-2-andrii@kernel.org Signed-off-by: Alexei Starovoitov --- include/linux/bpf_verifier.h | 31 +++- kernel/bpf/verifier.c | 175 ++++++++++++--------- .../bpf/progs/verifier_subprog_precision.c | 23 ++- tools/testing/selftests/bpf/verifier/precise.c | 38 +++-- 4 files changed, 169 insertions(+), 98 deletions(-) (limited to 'tools') diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 3378cc753061..bada59812e00 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -325,12 +325,34 @@ struct bpf_func_state { int allocated_stack; }; -struct bpf_idx_pair { - u32 prev_idx; +#define MAX_CALL_FRAMES 8 + +/* instruction history flags, used in bpf_jmp_history_entry.flags field */ +enum { + /* instruction references stack slot through PTR_TO_STACK register; + * we also store stack's frame number in lower 3 bits (MAX_CALL_FRAMES is 8) + * and accessed stack slot's index in next 6 bits (MAX_BPF_STACK is 512, + * 8 bytes per slot, so slot index (spi) is [0, 63]) + */ + INSN_F_FRAMENO_MASK = 0x7, /* 3 bits */ + + INSN_F_SPI_MASK = 0x3f, /* 6 bits */ + INSN_F_SPI_SHIFT = 3, /* shifted 3 bits to the left */ + + INSN_F_STACK_ACCESS = BIT(9), /* we need 10 bits total */ +}; + +static_assert(INSN_F_FRAMENO_MASK + 1 >= MAX_CALL_FRAMES); +static_assert(INSN_F_SPI_MASK + 1 >= MAX_BPF_STACK / 8); + +struct bpf_jmp_history_entry { u32 idx; + /* insn idx can't be bigger than 1 million */ + u32 prev_idx : 22; + /* special flags, e.g., whether insn is doing register stack spill/load */ + u32 flags : 10; }; -#define MAX_CALL_FRAMES 8 /* Maximum number of register states that can exist at once */ #define BPF_ID_MAP_SIZE ((MAX_BPF_REG + MAX_BPF_STACK / BPF_REG_SIZE) * MAX_CALL_FRAMES) struct bpf_verifier_state { @@ -413,7 +435,7 @@ struct bpf_verifier_state { * For most states jmp_history_cnt is [0-3]. * For loops can go up to ~40. */ - struct bpf_idx_pair *jmp_history; + struct bpf_jmp_history_entry *jmp_history; u32 jmp_history_cnt; u32 dfs_depth; u32 callback_unroll_depth; @@ -656,6 +678,7 @@ struct bpf_verifier_env { int cur_stack; } cfg; struct backtrack_state bt; + struct bpf_jmp_history_entry *cur_hist_ent; u32 pass_cnt; /* number of times do_check() was called */ u32 subprog_cnt; /* number of instructions analyzed by the verifier */ diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 1ed39665f802..9bc16dc66465 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -1355,8 +1355,8 @@ static int copy_verifier_state(struct bpf_verifier_state *dst_state, int i, err; dst_state->jmp_history = copy_array(dst_state->jmp_history, src->jmp_history, - src->jmp_history_cnt, sizeof(struct bpf_idx_pair), - GFP_USER); + src->jmp_history_cnt, sizeof(*dst_state->jmp_history), + GFP_USER); if (!dst_state->jmp_history) return -ENOMEM; dst_state->jmp_history_cnt = src->jmp_history_cnt; @@ -3221,6 +3221,21 @@ static int check_reg_arg(struct bpf_verifier_env *env, u32 regno, return __check_reg_arg(env, state->regs, regno, t); } +static int insn_stack_access_flags(int frameno, int spi) +{ + return INSN_F_STACK_ACCESS | (spi << INSN_F_SPI_SHIFT) | frameno; +} + +static int insn_stack_access_spi(int insn_flags) +{ + return (insn_flags >> INSN_F_SPI_SHIFT) & INSN_F_SPI_MASK; +} + +static int insn_stack_access_frameno(int insn_flags) +{ + return insn_flags & INSN_F_FRAMENO_MASK; +} + static void mark_jmp_point(struct bpf_verifier_env *env, int idx) { env->insn_aux_data[idx].jmp_point = true; @@ -3232,28 +3247,51 @@ static bool is_jmp_point(struct bpf_verifier_env *env, int insn_idx) } /* for any branch, call, exit record the history of jmps in the given state */ -static int push_jmp_history(struct bpf_verifier_env *env, - struct bpf_verifier_state *cur) +static int push_jmp_history(struct bpf_verifier_env *env, struct bpf_verifier_state *cur, + int insn_flags) { u32 cnt = cur->jmp_history_cnt; - struct bpf_idx_pair *p; + struct bpf_jmp_history_entry *p; size_t alloc_size; - if (!is_jmp_point(env, env->insn_idx)) + /* combine instruction flags if we already recorded this instruction */ + if (env->cur_hist_ent) { + /* atomic instructions push insn_flags twice, for READ and + * WRITE sides, but they should agree on stack slot + */ + WARN_ONCE((env->cur_hist_ent->flags & insn_flags) && + (env->cur_hist_ent->flags & insn_flags) != insn_flags, + "verifier insn history bug: insn_idx %d cur flags %x new flags %x\n", + env->insn_idx, env->cur_hist_ent->flags, insn_flags); + env->cur_hist_ent->flags |= insn_flags; return 0; + } cnt++; alloc_size = kmalloc_size_roundup(size_mul(cnt, sizeof(*p))); p = krealloc(cur->jmp_history, alloc_size, GFP_USER); if (!p) return -ENOMEM; - p[cnt - 1].idx = env->insn_idx; - p[cnt - 1].prev_idx = env->prev_insn_idx; cur->jmp_history = p; + + p = &cur->jmp_history[cnt - 1]; + p->idx = env->insn_idx; + p->prev_idx = env->prev_insn_idx; + p->flags = insn_flags; cur->jmp_history_cnt = cnt; + env->cur_hist_ent = p; + return 0; } +static struct bpf_jmp_history_entry *get_jmp_hist_entry(struct bpf_verifier_state *st, + u32 hist_end, int insn_idx) +{ + if (hist_end > 0 && st->jmp_history[hist_end - 1].idx == insn_idx) + return &st->jmp_history[hist_end - 1]; + return NULL; +} + /* Backtrack one insn at a time. If idx is not at the top of recorded * history then previous instruction came from straight line execution. * Return -ENOENT if we exhausted all instructions within given state. @@ -3415,9 +3453,14 @@ static inline bool bt_is_reg_set(struct backtrack_state *bt, u32 reg) return bt->reg_masks[bt->frame] & (1 << reg); } +static inline bool bt_is_frame_slot_set(struct backtrack_state *bt, u32 frame, u32 slot) +{ + return bt->stack_masks[frame] & (1ull << slot); +} + static inline bool bt_is_slot_set(struct backtrack_state *bt, u32 slot) { - return bt->stack_masks[bt->frame] & (1ull << slot); + return bt_is_frame_slot_set(bt, bt->frame, slot); } /* format registers bitmask, e.g., "r0,r2,r4" for 0x15 mask */ @@ -3471,7 +3514,7 @@ static bool calls_callback(struct bpf_verifier_env *env, int insn_idx); * - *was* processed previously during backtracking. */ static int backtrack_insn(struct bpf_verifier_env *env, int idx, int subseq_idx, - struct backtrack_state *bt) + struct bpf_jmp_history_entry *hist, struct backtrack_state *bt) { const struct bpf_insn_cbs cbs = { .cb_call = disasm_kfunc_name, @@ -3484,7 +3527,7 @@ static int backtrack_insn(struct bpf_verifier_env *env, int idx, int subseq_idx, u8 mode = BPF_MODE(insn->code); u32 dreg = insn->dst_reg; u32 sreg = insn->src_reg; - u32 spi, i; + u32 spi, i, fr; if (insn->code == 0) return 0; @@ -3545,20 +3588,15 @@ static int backtrack_insn(struct bpf_verifier_env *env, int idx, int subseq_idx, * by 'precise' mark in corresponding register of this state. * No further tracking necessary. */ - if (insn->src_reg != BPF_REG_FP) + if (!hist || !(hist->flags & INSN_F_STACK_ACCESS)) return 0; - /* dreg = *(u64 *)[fp - off] was a fill from the stack. * that [fp - off] slot contains scalar that needs to be * tracked with precision */ - spi = (-insn->off - 1) / BPF_REG_SIZE; - if (spi >= 64) { - verbose(env, "BUG spi %d\n", spi); - WARN_ONCE(1, "verifier backtracking bug"); - return -EFAULT; - } - bt_set_slot(bt, spi); + spi = insn_stack_access_spi(hist->flags); + fr = insn_stack_access_frameno(hist->flags); + bt_set_frame_slot(bt, fr, spi); } else if (class == BPF_STX || class == BPF_ST) { if (bt_is_reg_set(bt, dreg)) /* stx & st shouldn't be using _scalar_ dst_reg @@ -3567,17 +3605,13 @@ static int backtrack_insn(struct bpf_verifier_env *env, int idx, int subseq_idx, */ return -ENOTSUPP; /* scalars can only be spilled into stack */ - if (insn->dst_reg != BPF_REG_FP) + if (!hist || !(hist->flags & INSN_F_STACK_ACCESS)) return 0; - spi = (-insn->off - 1) / BPF_REG_SIZE; - if (spi >= 64) { - verbose(env, "BUG spi %d\n", spi); - WARN_ONCE(1, "verifier backtracking bug"); - return -EFAULT; - } - if (!bt_is_slot_set(bt, spi)) + spi = insn_stack_access_spi(hist->flags); + fr = insn_stack_access_frameno(hist->flags); + if (!bt_is_frame_slot_set(bt, fr, spi)) return 0; - bt_clear_slot(bt, spi); + bt_clear_frame_slot(bt, fr, spi); if (class == BPF_STX) bt_set_reg(bt, sreg); } else if (class == BPF_JMP || class == BPF_JMP32) { @@ -3621,10 +3655,14 @@ static int backtrack_insn(struct bpf_verifier_env *env, int idx, int subseq_idx, WARN_ONCE(1, "verifier backtracking bug"); return -EFAULT; } - /* we don't track register spills perfectly, - * so fallback to force-precise instead of failing */ - if (bt_stack_mask(bt) != 0) - return -ENOTSUPP; + /* we are now tracking register spills correctly, + * so any instance of leftover slots is a bug + */ + if (bt_stack_mask(bt) != 0) { + verbose(env, "BUG stack slots %llx\n", bt_stack_mask(bt)); + WARN_ONCE(1, "verifier backtracking bug (subprog leftover stack slots)"); + return -EFAULT; + } /* propagate r1-r5 to the caller */ for (i = BPF_REG_1; i <= BPF_REG_5; i++) { if (bt_is_reg_set(bt, i)) { @@ -3649,8 +3687,11 @@ static int backtrack_insn(struct bpf_verifier_env *env, int idx, int subseq_idx, WARN_ONCE(1, "verifier backtracking bug"); return -EFAULT; } - if (bt_stack_mask(bt) != 0) - return -ENOTSUPP; + if (bt_stack_mask(bt) != 0) { + verbose(env, "BUG stack slots %llx\n", bt_stack_mask(bt)); + WARN_ONCE(1, "verifier backtracking bug (callback leftover stack slots)"); + return -EFAULT; + } /* clear r1-r5 in callback subprog's mask */ for (i = BPF_REG_1; i <= BPF_REG_5; i++) bt_clear_reg(bt, i); @@ -4087,6 +4128,7 @@ static int __mark_chain_precision(struct bpf_verifier_env *env, int regno) for (;;) { DECLARE_BITMAP(mask, 64); u32 history = st->jmp_history_cnt; + struct bpf_jmp_history_entry *hist; if (env->log.level & BPF_LOG_LEVEL2) { verbose(env, "mark_precise: frame%d: last_idx %d first_idx %d subseq_idx %d \n", @@ -4150,7 +4192,8 @@ static int __mark_chain_precision(struct bpf_verifier_env *env, int regno) err = 0; skip_first = false; } else { - err = backtrack_insn(env, i, subseq_idx, bt); + hist = get_jmp_hist_entry(st, history, i); + err = backtrack_insn(env, i, subseq_idx, hist, bt); } if (err == -ENOTSUPP) { mark_all_scalars_precise(env, env->cur_state); @@ -4203,22 +4246,10 @@ static int __mark_chain_precision(struct bpf_verifier_env *env, int regno) bitmap_from_u64(mask, bt_frame_stack_mask(bt, fr)); for_each_set_bit(i, mask, 64) { if (i >= func->allocated_stack / BPF_REG_SIZE) { - /* the sequence of instructions: - * 2: (bf) r3 = r10 - * 3: (7b) *(u64 *)(r3 -8) = r0 - * 4: (79) r4 = *(u64 *)(r10 -8) - * doesn't contain jmps. It's backtracked - * as a single block. - * During backtracking insn 3 is not recognized as - * stack access, so at the end of backtracking - * stack slot fp-8 is still marked in stack_mask. - * However the parent state may not have accessed - * fp-8 and it's "unallocated" stack space. - * In such case fallback to conservative. - */ - mark_all_scalars_precise(env, env->cur_state); - bt_reset(bt); - return 0; + verbose(env, "BUG backtracking (stack slot %d, total slots %d)\n", + i, func->allocated_stack / BPF_REG_SIZE); + WARN_ONCE(1, "verifier backtracking bug (stack slot out of bounds)"); + return -EFAULT; } if (!is_spilled_scalar_reg(&func->stack[i])) { @@ -4391,7 +4422,7 @@ static int check_stack_write_fixed_off(struct bpf_verifier_env *env, int i, slot = -off - 1, spi = slot / BPF_REG_SIZE, err; struct bpf_insn *insn = &env->prog->insnsi[insn_idx]; struct bpf_reg_state *reg = NULL; - u32 dst_reg = insn->dst_reg; + int insn_flags = insn_stack_access_flags(state->frameno, spi); err = grow_stack_state(state, round_up(slot + 1, BPF_REG_SIZE)); if (err) @@ -4432,17 +4463,6 @@ static int check_stack_write_fixed_off(struct bpf_verifier_env *env, mark_stack_slot_scratched(env, spi); if (reg && !(off % BPF_REG_SIZE) && register_is_bounded(reg) && !register_is_null(reg) && env->bpf_capable) { - if (dst_reg != BPF_REG_FP) { - /* The backtracking logic can only recognize explicit - * stack slot address like [fp - 8]. Other spill of - * scalar via different register has to be conservative. - * Backtrack from here and mark all registers as precise - * that contributed into 'reg' being a constant. - */ - err = mark_chain_precision(env, value_regno); - if (err) - return err; - } save_register_state(state, spi, reg, size); /* Break the relation on a narrowing spill. */ if (fls64(reg->umax_value) > BITS_PER_BYTE * size) @@ -4454,6 +4474,7 @@ static int check_stack_write_fixed_off(struct bpf_verifier_env *env, __mark_reg_known(&fake_reg, insn->imm); fake_reg.type = SCALAR_VALUE; save_register_state(state, spi, &fake_reg, size); + insn_flags = 0; /* not a register spill */ } else if (reg && is_spillable_regtype(reg->type)) { /* register containing pointer is being spilled into stack */ if (size != BPF_REG_SIZE) { @@ -4499,9 +4520,12 @@ static int check_stack_write_fixed_off(struct bpf_verifier_env *env, /* Mark slots affected by this stack write. */ for (i = 0; i < size; i++) - state->stack[spi].slot_type[(slot - i) % BPF_REG_SIZE] = - type; + state->stack[spi].slot_type[(slot - i) % BPF_REG_SIZE] = type; + insn_flags = 0; /* not a register spill */ } + + if (insn_flags) + return push_jmp_history(env, env->cur_state, insn_flags); return 0; } @@ -4694,6 +4718,7 @@ static int check_stack_read_fixed_off(struct bpf_verifier_env *env, int i, slot = -off - 1, spi = slot / BPF_REG_SIZE; struct bpf_reg_state *reg; u8 *stype, type; + int insn_flags = insn_stack_access_flags(reg_state->frameno, spi); stype = reg_state->stack[spi].slot_type; reg = ®_state->stack[spi].spilled_ptr; @@ -4739,12 +4764,10 @@ static int check_stack_read_fixed_off(struct bpf_verifier_env *env, return -EACCES; } mark_reg_unknown(env, state->regs, dst_regno); + insn_flags = 0; /* not restoring original register state */ } state->regs[dst_regno].live |= REG_LIVE_WRITTEN; - return 0; - } - - if (dst_regno >= 0) { + } else if (dst_regno >= 0) { /* restore register state from stack */ copy_register_state(&state->regs[dst_regno], reg); /* mark reg as written since spilled pointer state likely @@ -4780,7 +4803,10 @@ static int check_stack_read_fixed_off(struct bpf_verifier_env *env, mark_reg_read(env, reg, reg->parent, REG_LIVE_READ64); if (dst_regno >= 0) mark_reg_stack_read(env, reg_state, off, off + size, dst_regno); + insn_flags = 0; /* we are not restoring spilled register */ } + if (insn_flags) + return push_jmp_history(env, env->cur_state, insn_flags); return 0; } @@ -6940,7 +6966,6 @@ static int check_atomic(struct bpf_verifier_env *env, int insn_idx, struct bpf_i BPF_SIZE(insn->code), BPF_WRITE, -1, true, false); if (err) return err; - return 0; } @@ -16910,7 +16935,8 @@ hit: * the precision needs to be propagated back in * the current state. */ - err = err ? : push_jmp_history(env, cur); + if (is_jmp_point(env, env->insn_idx)) + err = err ? : push_jmp_history(env, cur, 0); err = err ? : propagate_precision(env, &sl->state); if (err) return err; @@ -17135,6 +17161,9 @@ static int do_check(struct bpf_verifier_env *env) u8 class; int err; + /* reset current history entry on each new instruction */ + env->cur_hist_ent = NULL; + env->prev_insn_idx = prev_insn_idx; if (env->insn_idx >= insn_cnt) { verbose(env, "invalid insn idx %d insn_cnt %d\n", @@ -17174,7 +17203,7 @@ static int do_check(struct bpf_verifier_env *env) } if (is_jmp_point(env, env->insn_idx)) { - err = push_jmp_history(env, state); + err = push_jmp_history(env, state, 0); if (err) return err; } diff --git a/tools/testing/selftests/bpf/progs/verifier_subprog_precision.c b/tools/testing/selftests/bpf/progs/verifier_subprog_precision.c index 0dfe3f8b69ac..eba98fab2f54 100644 --- a/tools/testing/selftests/bpf/progs/verifier_subprog_precision.c +++ b/tools/testing/selftests/bpf/progs/verifier_subprog_precision.c @@ -589,11 +589,24 @@ static __u64 subprog_spill_reg_precise(void) SEC("?raw_tp") __success __log_level(2) -/* precision backtracking can't currently handle stack access not through r10, - * so we won't be able to mark stack slot fp-8 as precise, and so will - * fallback to forcing all as precise - */ -__msg("mark_precise: frame0: falling back to forcing all scalars precise") +__msg("10: (0f) r1 += r7") +__msg("mark_precise: frame0: last_idx 10 first_idx 7 subseq_idx -1") +__msg("mark_precise: frame0: regs=r7 stack= before 9: (bf) r1 = r8") +__msg("mark_precise: frame0: regs=r7 stack= before 8: (27) r7 *= 4") +__msg("mark_precise: frame0: regs=r7 stack= before 7: (79) r7 = *(u64 *)(r10 -8)") +__msg("mark_precise: frame0: parent state regs= stack=-8: R0_w=2 R6_w=1 R8_rw=map_value(map=.data.vals,ks=4,vs=16) R10=fp0 fp-8_rw=P1") +__msg("mark_precise: frame0: last_idx 18 first_idx 0 subseq_idx 7") +__msg("mark_precise: frame0: regs= stack=-8 before 18: (95) exit") +__msg("mark_precise: frame1: regs= stack= before 17: (0f) r0 += r2") +__msg("mark_precise: frame1: regs= stack= before 16: (79) r2 = *(u64 *)(r1 +0)") +__msg("mark_precise: frame1: regs= stack= before 15: (79) r0 = *(u64 *)(r10 -16)") +__msg("mark_precise: frame1: regs= stack= before 14: (7b) *(u64 *)(r10 -16) = r2") +__msg("mark_precise: frame1: regs= stack= before 13: (7b) *(u64 *)(r1 +0) = r2") +__msg("mark_precise: frame1: regs=r2 stack= before 6: (85) call pc+6") +__msg("mark_precise: frame0: regs=r2 stack= before 5: (bf) r2 = r6") +__msg("mark_precise: frame0: regs=r6 stack= before 4: (07) r1 += -8") +__msg("mark_precise: frame0: regs=r6 stack= before 3: (bf) r1 = r10") +__msg("mark_precise: frame0: regs=r6 stack= before 2: (b7) r6 = 1") __naked int subprog_spill_into_parent_stack_slot_precise(void) { asm volatile ( diff --git a/tools/testing/selftests/bpf/verifier/precise.c b/tools/testing/selftests/bpf/verifier/precise.c index 0d84dd1f38b6..8a2ff81d8350 100644 --- a/tools/testing/selftests/bpf/verifier/precise.c +++ b/tools/testing/selftests/bpf/verifier/precise.c @@ -140,10 +140,11 @@ .result = REJECT, }, { - "precise: ST insn causing spi > allocated_stack", + "precise: ST zero to stack insn is supported", .insns = { BPF_MOV64_REG(BPF_REG_3, BPF_REG_10), BPF_JMP_IMM(BPF_JNE, BPF_REG_3, 123, 0), + /* not a register spill, so we stop precision propagation for R4 here */ BPF_ST_MEM(BPF_DW, BPF_REG_3, -8, 0), BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_10, -8), BPF_MOV64_IMM(BPF_REG_0, -1), @@ -157,11 +158,11 @@ mark_precise: frame0: last_idx 4 first_idx 2\ mark_precise: frame0: regs=r4 stack= before 4\ mark_precise: frame0: regs=r4 stack= before 3\ - mark_precise: frame0: regs= stack=-8 before 2\ - mark_precise: frame0: falling back to forcing all scalars precise\ - force_precise: frame0: forcing r0 to be precise\ mark_precise: frame0: last_idx 5 first_idx 5\ - mark_precise: frame0: parent state regs= stack=:", + mark_precise: frame0: parent state regs=r0 stack=:\ + mark_precise: frame0: last_idx 4 first_idx 2\ + mark_precise: frame0: regs=r0 stack= before 4\ + 5: R0=-1 R4=0", .result = VERBOSE_ACCEPT, .retval = -1, }, @@ -169,6 +170,8 @@ "precise: STX insn causing spi > allocated_stack", .insns = { BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_prandom_u32), + /* make later reg spill more interesting by having somewhat known scalar */ + BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 0xff), BPF_MOV64_REG(BPF_REG_3, BPF_REG_10), BPF_JMP_IMM(BPF_JNE, BPF_REG_3, 123, 0), BPF_STX_MEM(BPF_DW, BPF_REG_3, BPF_REG_0, -8), @@ -179,18 +182,21 @@ }, .prog_type = BPF_PROG_TYPE_XDP, .flags = BPF_F_TEST_STATE_FREQ, - .errstr = "mark_precise: frame0: last_idx 6 first_idx 6\ + .errstr = "mark_precise: frame0: last_idx 7 first_idx 7\ mark_precise: frame0: parent state regs=r4 stack=:\ - mark_precise: frame0: last_idx 5 first_idx 3\ - mark_precise: frame0: regs=r4 stack= before 5\ - mark_precise: frame0: regs=r4 stack= before 4\ - mark_precise: frame0: regs= stack=-8 before 3\ - mark_precise: frame0: falling back to forcing all scalars precise\ - force_precise: frame0: forcing r0 to be precise\ - force_precise: frame0: forcing r0 to be precise\ - force_precise: frame0: forcing r0 to be precise\ - force_precise: frame0: forcing r0 to be precise\ - mark_precise: frame0: last_idx 6 first_idx 6\ + mark_precise: frame0: last_idx 6 first_idx 4\ + mark_precise: frame0: regs=r4 stack= before 6: (b7) r0 = -1\ + mark_precise: frame0: regs=r4 stack= before 5: (79) r4 = *(u64 *)(r10 -8)\ + mark_precise: frame0: regs= stack=-8 before 4: (7b) *(u64 *)(r3 -8) = r0\ + mark_precise: frame0: parent state regs=r0 stack=:\ + mark_precise: frame0: last_idx 3 first_idx 3\ + mark_precise: frame0: regs=r0 stack= before 3: (55) if r3 != 0x7b goto pc+0\ + mark_precise: frame0: regs=r0 stack= before 2: (bf) r3 = r10\ + mark_precise: frame0: regs=r0 stack= before 1: (57) r0 &= 255\ + mark_precise: frame0: parent state regs=r0 stack=:\ + mark_precise: frame0: last_idx 0 first_idx 0\ + mark_precise: frame0: regs=r0 stack= before 0: (85) call bpf_get_prandom_u32#7\ + mark_precise: frame0: last_idx 7 first_idx 7\ mark_precise: frame0: parent state regs= stack=:", .result = VERBOSE_ACCEPT, .retval = -1, -- cgit v1.2.3 From 876301881c436bf38e83a2c0d276a24b642e4aab Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Tue, 5 Dec 2023 10:42:40 -0800 Subject: selftests/bpf: add stack access precision test Add a new selftests that validates precision tracking for stack access instruction, using both r10-based and non-r10-based accesses. For non-r10 ones we also make sure to have non-zero var_off to validate that final stack offset is tracked properly in instruction history information inside verifier. Acked-by: Eduard Zingerman Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20231205184248.1502704-3-andrii@kernel.org Signed-off-by: Alexei Starovoitov --- .../bpf/progs/verifier_subprog_precision.c | 64 ++++++++++++++++++++-- 1 file changed, 59 insertions(+), 5 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/progs/verifier_subprog_precision.c b/tools/testing/selftests/bpf/progs/verifier_subprog_precision.c index eba98fab2f54..6f5d19665cf6 100644 --- a/tools/testing/selftests/bpf/progs/verifier_subprog_precision.c +++ b/tools/testing/selftests/bpf/progs/verifier_subprog_precision.c @@ -641,14 +641,68 @@ __naked int subprog_spill_into_parent_stack_slot_precise(void) ); } -__naked __noinline __used -static __u64 subprog_with_checkpoint(void) +SEC("?raw_tp") +__success __log_level(2) +__msg("17: (0f) r1 += r0") +__msg("mark_precise: frame0: last_idx 17 first_idx 0 subseq_idx -1") +__msg("mark_precise: frame0: regs=r0 stack= before 16: (bf) r1 = r7") +__msg("mark_precise: frame0: regs=r0 stack= before 15: (27) r0 *= 4") +__msg("mark_precise: frame0: regs=r0 stack= before 14: (79) r0 = *(u64 *)(r10 -16)") +__msg("mark_precise: frame0: regs= stack=-16 before 13: (7b) *(u64 *)(r7 -8) = r0") +__msg("mark_precise: frame0: regs=r0 stack= before 12: (79) r0 = *(u64 *)(r8 +16)") +__msg("mark_precise: frame0: regs= stack=-16 before 11: (7b) *(u64 *)(r8 +16) = r0") +__msg("mark_precise: frame0: regs=r0 stack= before 10: (79) r0 = *(u64 *)(r7 -8)") +__msg("mark_precise: frame0: regs= stack=-16 before 9: (7b) *(u64 *)(r10 -16) = r0") +__msg("mark_precise: frame0: regs=r0 stack= before 8: (07) r8 += -32") +__msg("mark_precise: frame0: regs=r0 stack= before 7: (bf) r8 = r10") +__msg("mark_precise: frame0: regs=r0 stack= before 6: (07) r7 += -8") +__msg("mark_precise: frame0: regs=r0 stack= before 5: (bf) r7 = r10") +__msg("mark_precise: frame0: regs=r0 stack= before 21: (95) exit") +__msg("mark_precise: frame1: regs=r0 stack= before 20: (bf) r0 = r1") +__msg("mark_precise: frame1: regs=r1 stack= before 4: (85) call pc+15") +__msg("mark_precise: frame0: regs=r1 stack= before 3: (bf) r1 = r6") +__msg("mark_precise: frame0: regs=r6 stack= before 2: (b7) r6 = 1") +__naked int stack_slot_aliases_precision(void) { asm volatile ( - "r0 = 0;" - /* guaranteed checkpoint if BPF_F_TEST_STATE_FREQ is used */ - "goto +0;" + "r6 = 1;" + /* pass r6 through r1 into subprog to get it back as r0; + * this whole chain will have to be marked as precise later + */ + "r1 = r6;" + "call identity_subprog;" + /* let's setup two registers that are aliased to r10 */ + "r7 = r10;" + "r7 += -8;" /* r7 = r10 - 8 */ + "r8 = r10;" + "r8 += -32;" /* r8 = r10 - 32 */ + /* now spill subprog's return value (a r6 -> r1 -> r0 chain) + * a few times through different stack pointer regs, making + * sure to use r10, r7, and r8 both in LDX and STX insns, and + * *importantly* also using a combination of const var_off and + * insn->off to validate that we record final stack slot + * correctly, instead of relying on just insn->off derivation, + * which is only valid for r10-based stack offset + */ + "*(u64 *)(r10 - 16) = r0;" + "r0 = *(u64 *)(r7 - 8);" /* r7 - 8 == r10 - 16 */ + "*(u64 *)(r8 + 16) = r0;" /* r8 + 16 = r10 - 16 */ + "r0 = *(u64 *)(r8 + 16);" + "*(u64 *)(r7 - 8) = r0;" + "r0 = *(u64 *)(r10 - 16);" + /* get ready to use r0 as an index into array to force precision */ + "r0 *= 4;" + "r1 = %[vals];" + /* here r0->r1->r6 chain is forced to be precise and has to be + * propagated back to the beginning, including through the + * subprog call and all the stack spills and loads + */ + "r1 += r0;" + "r0 = *(u32 *)(r1 + 0);" "exit;" + : + : __imm_ptr(vals) + : __clobber_common, "r6" ); } -- cgit v1.2.3 From b33ceb6a3d2ee07fdd836373383a6d4783581324 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Tue, 5 Dec 2023 10:42:43 -0800 Subject: selftests/bpf: validate STACK_ZERO is preserved on subreg spill Add tests validating that STACK_ZERO slots are preserved when slot is partially overwritten with subregister spill. Acked-by: Eduard Zingerman Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20231205184248.1502704-6-andrii@kernel.org Signed-off-by: Alexei Starovoitov --- .../selftests/bpf/progs/verifier_spill_fill.c | 40 ++++++++++++++++++++++ 1 file changed, 40 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/progs/verifier_spill_fill.c b/tools/testing/selftests/bpf/progs/verifier_spill_fill.c index 6115520154e3..d9dabae81176 100644 --- a/tools/testing/selftests/bpf/progs/verifier_spill_fill.c +++ b/tools/testing/selftests/bpf/progs/verifier_spill_fill.c @@ -4,6 +4,7 @@ #include #include #include "bpf_misc.h" +#include <../../../tools/include/linux/filter.h> struct { __uint(type, BPF_MAP_TYPE_RINGBUF); @@ -450,4 +451,43 @@ l0_%=: r1 >>= 16; \ : __clobber_all); } +SEC("raw_tp") +__log_level(2) +__success +__msg("fp-8=0m??mmmm") +__msg("fp-16=00mm??mm") +__msg("fp-24=00mm???m") +__naked void spill_subregs_preserve_stack_zero(void) +{ + asm volatile ( + "call %[bpf_get_prandom_u32];" + + /* 32-bit subreg spill with ZERO, MISC, and INVALID */ + ".8byte %[fp1_u8_st_zero];" /* ZERO, LLVM-18+: *(u8 *)(r10 -1) = 0; */ + "*(u8 *)(r10 -2) = r0;" /* MISC */ + /* fp-3 and fp-4 stay INVALID */ + "*(u32 *)(r10 -8) = r0;" + + /* 16-bit subreg spill with ZERO, MISC, and INVALID */ + ".8byte %[fp10_u16_st_zero];" /* ZERO, LLVM-18+: *(u16 *)(r10 -10) = 0; */ + "*(u16 *)(r10 -12) = r0;" /* MISC */ + /* fp-13 and fp-14 stay INVALID */ + "*(u16 *)(r10 -16) = r0;" + + /* 8-bit subreg spill with ZERO, MISC, and INVALID */ + ".8byte %[fp18_u16_st_zero];" /* ZERO, LLVM-18+: *(u16 *)(r18 -10) = 0; */ + "*(u16 *)(r10 -20) = r0;" /* MISC */ + /* fp-21, fp-22, and fp-23 stay INVALID */ + "*(u8 *)(r10 -24) = r0;" + + "r0 = 0;" + "exit;" + : + : __imm(bpf_get_prandom_u32), + __imm_insn(fp1_u8_st_zero, BPF_ST_MEM(BPF_B, BPF_REG_FP, -1, 0)), + __imm_insn(fp10_u16_st_zero, BPF_ST_MEM(BPF_H, BPF_REG_FP, -10, 0)), + __imm_insn(fp18_u16_st_zero, BPF_ST_MEM(BPF_H, BPF_REG_FP, -18, 0)) + : __clobber_all); +} + char _license[] SEC("license") = "GPL"; -- cgit v1.2.3 From add1cd7f22e61756987865ada9fe95cd86569025 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Tue, 5 Dec 2023 10:42:45 -0800 Subject: selftests/bpf: validate zero preservation for sub-slot loads Validate that 1-, 2-, and 4-byte loads from stack slots not aligned on 8-byte boundary still preserve zero, when loading from all-STACK_ZERO sub-slots, or when stack sub-slots are covered by spilled register with known constant zero value. Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20231205184248.1502704-8-andrii@kernel.org Signed-off-by: Alexei Starovoitov --- .../selftests/bpf/progs/verifier_spill_fill.c | 71 ++++++++++++++++++++++ 1 file changed, 71 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/progs/verifier_spill_fill.c b/tools/testing/selftests/bpf/progs/verifier_spill_fill.c index d9dabae81176..41fd61299eab 100644 --- a/tools/testing/selftests/bpf/progs/verifier_spill_fill.c +++ b/tools/testing/selftests/bpf/progs/verifier_spill_fill.c @@ -490,4 +490,75 @@ __naked void spill_subregs_preserve_stack_zero(void) : __clobber_all); } +char single_byte_buf[1] SEC(".data.single_byte_buf"); + +SEC("raw_tp") +__log_level(2) +__success +__naked void partial_stack_load_preserves_zeros(void) +{ + asm volatile ( + /* fp-8 is all STACK_ZERO */ + ".8byte %[fp8_st_zero];" /* LLVM-18+: *(u64 *)(r10 -8) = 0; */ + + /* fp-16 is const zero register */ + "r0 = 0;" + "*(u64 *)(r10 -16) = r0;" + + /* load single U8 from non-aligned STACK_ZERO slot */ + "r1 = %[single_byte_buf];" + "r2 = *(u8 *)(r10 -1);" + "r1 += r2;" + "*(u8 *)(r1 + 0) = r2;" /* this should be fine */ + + /* load single U8 from non-aligned ZERO REG slot */ + "r1 = %[single_byte_buf];" + "r2 = *(u8 *)(r10 -9);" + "r1 += r2;" + "*(u8 *)(r1 + 0) = r2;" /* this should be fine */ + + /* load single U16 from non-aligned STACK_ZERO slot */ + "r1 = %[single_byte_buf];" + "r2 = *(u16 *)(r10 -2);" + "r1 += r2;" + "*(u8 *)(r1 + 0) = r2;" /* this should be fine */ + + /* load single U16 from non-aligned ZERO REG slot */ + "r1 = %[single_byte_buf];" + "r2 = *(u16 *)(r10 -10);" + "r1 += r2;" + "*(u8 *)(r1 + 0) = r2;" /* this should be fine */ + + /* load single U32 from non-aligned STACK_ZERO slot */ + "r1 = %[single_byte_buf];" + "r2 = *(u32 *)(r10 -4);" + "r1 += r2;" + "*(u8 *)(r1 + 0) = r2;" /* this should be fine */ + + /* load single U32 from non-aligned ZERO REG slot */ + "r1 = %[single_byte_buf];" + "r2 = *(u32 *)(r10 -12);" + "r1 += r2;" + "*(u8 *)(r1 + 0) = r2;" /* this should be fine */ + + /* for completeness, load U64 from STACK_ZERO slot */ + "r1 = %[single_byte_buf];" + "r2 = *(u64 *)(r10 -8);" + "r1 += r2;" + "*(u8 *)(r1 + 0) = r2;" /* this should be fine */ + + /* for completeness, load U64 from ZERO REG slot */ + "r1 = %[single_byte_buf];" + "r2 = *(u64 *)(r10 -16);" + "r1 += r2;" + "*(u8 *)(r1 + 0) = r2;" /* this should be fine */ + + "r0 = 0;" + "exit;" + : + : __imm_ptr(single_byte_buf), + __imm_insn(fp8_st_zero, BPF_ST_MEM(BPF_DW, BPF_REG_FP, -8, 0)) + : __clobber_common); +} + char _license[] SEC("license") = "GPL"; -- cgit v1.2.3 From 064e0bea19b356c5d5f48a4549d80a3c03ce898b Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Tue, 5 Dec 2023 10:42:47 -0800 Subject: selftests/bpf: validate precision logic in partial_stack_load_preserves_zeros Enhance partial_stack_load_preserves_zeros subtest with detailed precision propagation log checks. We know expect fp-16 to be spilled, initially imprecise, zero const register, which is later marked as precise even when partial stack slot load is performed, even if it's not a register fill (!). Acked-by: Eduard Zingerman Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20231205184248.1502704-10-andrii@kernel.org Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/progs/verifier_spill_fill.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/progs/verifier_spill_fill.c b/tools/testing/selftests/bpf/progs/verifier_spill_fill.c index 41fd61299eab..df4920da3472 100644 --- a/tools/testing/selftests/bpf/progs/verifier_spill_fill.c +++ b/tools/testing/selftests/bpf/progs/verifier_spill_fill.c @@ -495,6 +495,22 @@ char single_byte_buf[1] SEC(".data.single_byte_buf"); SEC("raw_tp") __log_level(2) __success +/* make sure fp-8 is all STACK_ZERO */ +__msg("2: (7a) *(u64 *)(r10 -8) = 0 ; R10=fp0 fp-8_w=00000000") +/* but fp-16 is spilled IMPRECISE zero const reg */ +__msg("4: (7b) *(u64 *)(r10 -16) = r0 ; R0_w=0 R10=fp0 fp-16_w=0") +/* and now check that precision propagation works even for such tricky case */ +__msg("10: (71) r2 = *(u8 *)(r10 -9) ; R2_w=P0 R10=fp0 fp-16_w=0") +__msg("11: (0f) r1 += r2") +__msg("mark_precise: frame0: last_idx 11 first_idx 0 subseq_idx -1") +__msg("mark_precise: frame0: regs=r2 stack= before 10: (71) r2 = *(u8 *)(r10 -9)") +__msg("mark_precise: frame0: regs= stack=-16 before 9: (bf) r1 = r6") +__msg("mark_precise: frame0: regs= stack=-16 before 8: (73) *(u8 *)(r1 +0) = r2") +__msg("mark_precise: frame0: regs= stack=-16 before 7: (0f) r1 += r2") +__msg("mark_precise: frame0: regs= stack=-16 before 6: (71) r2 = *(u8 *)(r10 -1)") +__msg("mark_precise: frame0: regs= stack=-16 before 5: (bf) r1 = r6") +__msg("mark_precise: frame0: regs= stack=-16 before 4: (7b) *(u64 *)(r10 -16) = r0") +__msg("mark_precise: frame0: regs=r0 stack= before 3: (b7) r0 = 0") __naked void partial_stack_load_preserves_zeros(void) { asm volatile ( -- cgit v1.2.3 From e136735f0c263b8bb9572ff8d35d8d5bc9fd9b66 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Sat, 2 Dec 2023 13:10:05 -0800 Subject: tools: pynl: make flags argument optional for do() Commit 1768d8a767f8 ("tools/net/ynl: Add support for create flags") added support for setting legacy netlink CRUD flags on netlink messages (NLM_F_REPLACE, _EXCL, _CREATE etc.). Most of genetlink won't need these, don't force callers to pass in an empty argument to each do() call. Reviewed-by: Donald Hunter Link: https://lore.kernel.org/r/20231202211005.341613-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- tools/net/ynl/lib/ynl.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/net/ynl/lib/ynl.py b/tools/net/ynl/lib/ynl.py index 92995bca14e1..c56dad9593c6 100644 --- a/tools/net/ynl/lib/ynl.py +++ b/tools/net/ynl/lib/ynl.py @@ -705,7 +705,7 @@ class YnlFamily(SpecFamily): return op['do']['request']['attributes'].copy() - def _op(self, method, vals, flags, dump=False): + def _op(self, method, vals, flags=None, dump=False): op = self.ops[method] nl_flags = Netlink.NLM_F_REQUEST | Netlink.NLM_F_ACK @@ -769,7 +769,7 @@ class YnlFamily(SpecFamily): return rsp[0] return rsp - def do(self, method, vals, flags): + def do(self, method, vals, flags=None): return self._op(method, vals, flags) def dump(self, method, vals): -- cgit v1.2.3 From f2d4d9ad809a1c7173ce66c47cd45e11184a554c Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Sat, 2 Dec 2023 13:13:10 -0800 Subject: tools: ynl: use strerror() if no extack of note provided If kernel didn't give use any meaningful error - print a strerror() to the ynl error message. Reviewed-by: Nicolas Dichtel Link: https://lore.kernel.org/r/20231202211310.342716-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- tools/net/ynl/lib/ynl.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/net/ynl/lib/ynl.c b/tools/net/ynl/lib/ynl.c index 830d25097009..587286de10b5 100644 --- a/tools/net/ynl/lib/ynl.c +++ b/tools/net/ynl/lib/ynl.c @@ -145,8 +145,10 @@ ynl_ext_ack_check(struct ynl_sock *ys, const struct nlmsghdr *nlh, const struct nlattr *attr; const char *str = NULL; - if (!(nlh->nlmsg_flags & NLM_F_ACK_TLVS)) + if (!(nlh->nlmsg_flags & NLM_F_ACK_TLVS)) { + yerr_msg(ys, "%s", strerror(ys->err.code)); return MNL_CB_OK; + } mnl_attr_for_each(attr, nlh, hlen) { unsigned int len, type; @@ -249,6 +251,8 @@ ynl_ext_ack_check(struct ynl_sock *ys, const struct nlmsghdr *nlh, yerr_msg(ys, "Kernel %s: %s%s", ys->err.code ? "error" : "warning", bad_attr, miss_attr); + else + yerr_msg(ys, "%s", strerror(ys->err.code)); return MNL_CB_OK; } -- cgit v1.2.3 From f3c928008ab218055e26917ffdbdaf429e8e616a Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Sat, 2 Dec 2023 13:12:25 -0800 Subject: tools: ynl: move private definitions to a separate header ynl.h has a growing amount of "internal" stuff, which may confuse users who try to take a look at the external API. Currently the internals are at the bottom of the file with a banner in between, but this arrangement makes it hard to add external APIs / inline helpers which need internal definitions. Move internals to a separate header. Link: https://lore.kernel.org/r/20231202211225.342466-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- tools/net/ynl/lib/ynl-priv.h | 144 +++++++++++++++++++++++++++++++++++++++++ tools/net/ynl/lib/ynl.h | 148 +------------------------------------------ 2 files changed, 145 insertions(+), 147 deletions(-) create mode 100644 tools/net/ynl/lib/ynl-priv.h (limited to 'tools') diff --git a/tools/net/ynl/lib/ynl-priv.h b/tools/net/ynl/lib/ynl-priv.h new file mode 100644 index 000000000000..7491da8e7555 --- /dev/null +++ b/tools/net/ynl/lib/ynl-priv.h @@ -0,0 +1,144 @@ +/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */ +#ifndef __YNL_C_PRIV_H +#define __YNL_C_PRIV_H 1 + +#include +#include +#include + +/* + * YNL internals / low level stuff + */ + +/* Generic mnl helper code */ + +enum ynl_policy_type { + YNL_PT_REJECT = 1, + YNL_PT_IGNORE, + YNL_PT_NEST, + YNL_PT_FLAG, + YNL_PT_BINARY, + YNL_PT_U8, + YNL_PT_U16, + YNL_PT_U32, + YNL_PT_U64, + YNL_PT_UINT, + YNL_PT_NUL_STR, + YNL_PT_BITFIELD32, +}; + +struct ynl_policy_attr { + enum ynl_policy_type type; + unsigned int len; + const char *name; + struct ynl_policy_nest *nest; +}; + +struct ynl_policy_nest { + unsigned int max_attr; + struct ynl_policy_attr *table; +}; + +struct ynl_parse_arg { + struct ynl_sock *ys; + struct ynl_policy_nest *rsp_policy; + void *data; +}; + +struct ynl_dump_list_type { + struct ynl_dump_list_type *next; + unsigned char data[] __attribute__((aligned(8))); +}; +extern struct ynl_dump_list_type *YNL_LIST_END; + +static inline bool ynl_dump_obj_is_last(void *obj) +{ + unsigned long uptr = (unsigned long)obj; + + uptr -= offsetof(struct ynl_dump_list_type, data); + return uptr == (unsigned long)YNL_LIST_END; +} + +static inline void *ynl_dump_obj_next(void *obj) +{ + unsigned long uptr = (unsigned long)obj; + struct ynl_dump_list_type *list; + + uptr -= offsetof(struct ynl_dump_list_type, data); + list = (void *)uptr; + uptr = (unsigned long)list->next; + uptr += offsetof(struct ynl_dump_list_type, data); + + return (void *)uptr; +} + +struct ynl_ntf_base_type { + __u16 family; + __u8 cmd; + struct ynl_ntf_base_type *next; + void (*free)(struct ynl_ntf_base_type *ntf); + unsigned char data[] __attribute__((aligned(8))); +}; + +extern mnl_cb_t ynl_cb_array[NLMSG_MIN_TYPE]; + +struct nlmsghdr * +ynl_gemsg_start_req(struct ynl_sock *ys, __u32 id, __u8 cmd, __u8 version); +struct nlmsghdr * +ynl_gemsg_start_dump(struct ynl_sock *ys, __u32 id, __u8 cmd, __u8 version); + +int ynl_attr_validate(struct ynl_parse_arg *yarg, const struct nlattr *attr); + +int ynl_recv_ack(struct ynl_sock *ys, int ret); +int ynl_cb_null(const struct nlmsghdr *nlh, void *data); + +/* YNL specific helpers used by the auto-generated code */ + +struct ynl_req_state { + struct ynl_parse_arg yarg; + mnl_cb_t cb; + __u32 rsp_cmd; +}; + +struct ynl_dump_state { + struct ynl_sock *ys; + struct ynl_policy_nest *rsp_policy; + void *first; + struct ynl_dump_list_type *last; + size_t alloc_sz; + mnl_cb_t cb; + __u32 rsp_cmd; +}; + +struct ynl_ntf_info { + struct ynl_policy_nest *policy; + mnl_cb_t cb; + size_t alloc_sz; + void (*free)(struct ynl_ntf_base_type *ntf); +}; + +int ynl_exec(struct ynl_sock *ys, struct nlmsghdr *req_nlh, + struct ynl_req_state *yrs); +int ynl_exec_dump(struct ynl_sock *ys, struct nlmsghdr *req_nlh, + struct ynl_dump_state *yds); + +void ynl_error_unknown_notification(struct ynl_sock *ys, __u8 cmd); +int ynl_error_parse(struct ynl_parse_arg *yarg, const char *msg); + +#ifndef MNL_HAS_AUTO_SCALARS +static inline uint64_t mnl_attr_get_uint(const struct nlattr *attr) +{ + if (mnl_attr_get_payload_len(attr) == 4) + return mnl_attr_get_u32(attr); + return mnl_attr_get_u64(attr); +} + +static inline void +mnl_attr_put_uint(struct nlmsghdr *nlh, uint16_t type, uint64_t data) +{ + if ((uint32_t)data == (uint64_t)data) + return mnl_attr_put_u32(nlh, type, data); + return mnl_attr_put_u64(nlh, type, data); +} +#endif +#endif diff --git a/tools/net/ynl/lib/ynl.h b/tools/net/ynl/lib/ynl.h index 075d868f3b57..5de580b992b8 100644 --- a/tools/net/ynl/lib/ynl.h +++ b/tools/net/ynl/lib/ynl.h @@ -3,20 +3,10 @@ #define __YNL_C_H 1 #include -#include #include #include -struct mnl_socket; -struct nlmsghdr; - -/* - * User facing code - */ - -struct ynl_ntf_base_type; -struct ynl_ntf_info; -struct ynl_sock; +#include "ynl-priv.h" enum ynl_error_code { YNL_ERROR_NONE = 0, @@ -116,140 +106,4 @@ static inline bool ynl_has_ntf(struct ynl_sock *ys) struct ynl_ntf_base_type *ynl_ntf_dequeue(struct ynl_sock *ys); void ynl_ntf_free(struct ynl_ntf_base_type *ntf); - -/* - * YNL internals / low level stuff - */ - -/* Generic mnl helper code */ - -enum ynl_policy_type { - YNL_PT_REJECT = 1, - YNL_PT_IGNORE, - YNL_PT_NEST, - YNL_PT_FLAG, - YNL_PT_BINARY, - YNL_PT_U8, - YNL_PT_U16, - YNL_PT_U32, - YNL_PT_U64, - YNL_PT_UINT, - YNL_PT_NUL_STR, - YNL_PT_BITFIELD32, -}; - -struct ynl_policy_attr { - enum ynl_policy_type type; - unsigned int len; - const char *name; - struct ynl_policy_nest *nest; -}; - -struct ynl_policy_nest { - unsigned int max_attr; - struct ynl_policy_attr *table; -}; - -struct ynl_parse_arg { - struct ynl_sock *ys; - struct ynl_policy_nest *rsp_policy; - void *data; -}; - -struct ynl_dump_list_type { - struct ynl_dump_list_type *next; - unsigned char data[] __attribute__((aligned(8))); -}; -extern struct ynl_dump_list_type *YNL_LIST_END; - -static inline bool ynl_dump_obj_is_last(void *obj) -{ - unsigned long uptr = (unsigned long)obj; - - uptr -= offsetof(struct ynl_dump_list_type, data); - return uptr == (unsigned long)YNL_LIST_END; -} - -static inline void *ynl_dump_obj_next(void *obj) -{ - unsigned long uptr = (unsigned long)obj; - struct ynl_dump_list_type *list; - - uptr -= offsetof(struct ynl_dump_list_type, data); - list = (void *)uptr; - uptr = (unsigned long)list->next; - uptr += offsetof(struct ynl_dump_list_type, data); - - return (void *)uptr; -} - -struct ynl_ntf_base_type { - __u16 family; - __u8 cmd; - struct ynl_ntf_base_type *next; - void (*free)(struct ynl_ntf_base_type *ntf); - unsigned char data[] __attribute__((aligned(8))); -}; - -extern mnl_cb_t ynl_cb_array[NLMSG_MIN_TYPE]; - -struct nlmsghdr * -ynl_gemsg_start_req(struct ynl_sock *ys, __u32 id, __u8 cmd, __u8 version); -struct nlmsghdr * -ynl_gemsg_start_dump(struct ynl_sock *ys, __u32 id, __u8 cmd, __u8 version); - -int ynl_attr_validate(struct ynl_parse_arg *yarg, const struct nlattr *attr); - -int ynl_recv_ack(struct ynl_sock *ys, int ret); -int ynl_cb_null(const struct nlmsghdr *nlh, void *data); - -/* YNL specific helpers used by the auto-generated code */ - -struct ynl_req_state { - struct ynl_parse_arg yarg; - mnl_cb_t cb; - __u32 rsp_cmd; -}; - -struct ynl_dump_state { - struct ynl_sock *ys; - struct ynl_policy_nest *rsp_policy; - void *first; - struct ynl_dump_list_type *last; - size_t alloc_sz; - mnl_cb_t cb; - __u32 rsp_cmd; -}; - -struct ynl_ntf_info { - struct ynl_policy_nest *policy; - mnl_cb_t cb; - size_t alloc_sz; - void (*free)(struct ynl_ntf_base_type *ntf); -}; - -int ynl_exec(struct ynl_sock *ys, struct nlmsghdr *req_nlh, - struct ynl_req_state *yrs); -int ynl_exec_dump(struct ynl_sock *ys, struct nlmsghdr *req_nlh, - struct ynl_dump_state *yds); - -void ynl_error_unknown_notification(struct ynl_sock *ys, __u8 cmd); -int ynl_error_parse(struct ynl_parse_arg *yarg, const char *msg); - -#ifndef MNL_HAS_AUTO_SCALARS -static inline uint64_t mnl_attr_get_uint(const struct nlattr *attr) -{ - if (mnl_attr_get_payload_len(attr) == 4) - return mnl_attr_get_u32(attr); - return mnl_attr_get_u64(attr); -} - -static inline void -mnl_attr_put_uint(struct nlmsghdr *nlh, uint16_t type, uint64_t data) -{ - if ((uint32_t)data == (uint64_t)data) - return mnl_attr_put_u32(nlh, type, data); - return mnl_attr_put_u64(nlh, type, data); -} -#endif #endif -- cgit v1.2.3 From eb2eac0c7b6180332ced94d2979f3d3c7d22aaff Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Mon, 20 Nov 2023 16:04:20 -0800 Subject: perf evsel: Fallback to "task-clock" when not system wide When the "cycles" event isn't available evsel will fallback to the "cpu-clock" software event. "task-clock" is similar to "cpu-clock" but only runs when the process is running. Falling back to "cpu-clock" when not system wide leads to confusion, by falling back to "task-clock" it is hoped the confusion is less. Pass the target to determine if "task-clock" is more appropriate. Update a nearby comment and debug string for the change. Signed-off-by: Ian Rogers Tested-by: Athira Rajeev Acked-by: Namhyung Kim Cc: Adrian Hunter Cc: Ajay Kaher Cc: Alexander Shishkin Cc: Alexey Makhalov Cc: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Kan Liang Cc: Leo Yan Cc: Mark Rutland Cc: Peter Zijlstra Cc: Ravi Bangoria Cc: Sandipan Das Cc: Yang Jihong Link: https://lore.kernel.org/r/20231121000420.368075-1-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-record.c | 2 +- tools/perf/builtin-stat.c | 2 +- tools/perf/builtin-top.c | 2 +- tools/perf/util/evsel.c | 18 ++++++++++-------- tools/perf/util/evsel.h | 3 ++- 5 files changed, 15 insertions(+), 12 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 9b4f3805ca92..db814eadc16b 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -1360,7 +1360,7 @@ static int record__open(struct record *rec) evlist__for_each_entry(evlist, pos) { try_again: if (evsel__open(pos, pos->core.cpus, pos->core.threads) < 0) { - if (evsel__fallback(pos, errno, msg, sizeof(msg))) { + if (evsel__fallback(pos, &opts->target, errno, msg, sizeof(msg))) { if (verbose > 0) ui__warning("%s\n", msg); goto try_again; diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index a3af805a1d57..d8e5d6f7a87a 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -653,7 +653,7 @@ static enum counter_recovery stat_handle_error(struct evsel *counter) if ((evsel__leader(counter) != counter) || !(counter->core.leader->nr_members > 1)) return COUNTER_SKIP; - } else if (evsel__fallback(counter, errno, msg, sizeof(msg))) { + } else if (evsel__fallback(counter, &target, errno, msg, sizeof(msg))) { if (verbose > 0) ui__warning("%s\n", msg); return COUNTER_RETRY; diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index ea8c7eca5eee..1e42bd1c7d5a 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -1044,7 +1044,7 @@ try_again: perf_top_overwrite_fallback(top, counter)) goto try_again; - if (evsel__fallback(counter, errno, msg, sizeof(msg))) { + if (evsel__fallback(counter, &opts->target, errno, msg, sizeof(msg))) { if (verbose > 0) ui__warning("%s\n", msg); goto try_again; diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index a5da74e3a517..532f34d9fcb5 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -2853,7 +2853,8 @@ u64 evsel__intval_common(struct evsel *evsel, struct perf_sample *sample, const #endif -bool evsel__fallback(struct evsel *evsel, int err, char *msg, size_t msgsize) +bool evsel__fallback(struct evsel *evsel, struct target *target, int err, + char *msg, size_t msgsize) { int paranoid; @@ -2861,18 +2862,19 @@ bool evsel__fallback(struct evsel *evsel, int err, char *msg, size_t msgsize) evsel->core.attr.type == PERF_TYPE_HARDWARE && evsel->core.attr.config == PERF_COUNT_HW_CPU_CYCLES) { /* - * If it's cycles then fall back to hrtimer based - * cpu-clock-tick sw counter, which is always available even if - * no PMU support. + * If it's cycles then fall back to hrtimer based cpu-clock sw + * counter, which is always available even if no PMU support. * * PPC returns ENXIO until 2.6.37 (behavior changed with commit * b0a873e). */ - scnprintf(msg, msgsize, "%s", -"The cycles event is not supported, trying to fall back to cpu-clock-ticks"); - evsel->core.attr.type = PERF_TYPE_SOFTWARE; - evsel->core.attr.config = PERF_COUNT_SW_CPU_CLOCK; + evsel->core.attr.config = target__has_cpu(target) + ? PERF_COUNT_SW_CPU_CLOCK + : PERF_COUNT_SW_TASK_CLOCK; + scnprintf(msg, msgsize, + "The cycles event is not supported, trying to fall back to %s", + target__has_cpu(target) ? "cpu-clock" : "task-clock"); zfree(&evsel->name); return true; diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index f19ac9f027ef..efbb6e848287 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -460,7 +460,8 @@ static inline bool evsel__is_clock(const struct evsel *evsel) evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK); } -bool evsel__fallback(struct evsel *evsel, int err, char *msg, size_t msgsize); +bool evsel__fallback(struct evsel *evsel, struct target *target, int err, + char *msg, size_t msgsize); int evsel__open_strerror(struct evsel *evsel, struct target *target, int err, char *msg, size_t size); -- cgit v1.2.3 From 030ac3cad28992ae9099a857848861053273cc8f Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Mon, 27 Nov 2023 14:08:20 -0800 Subject: perf record: Be lazier in allocating lost samples buffer Wait until a lost sample occurs to allocate the lost samples buffer, often the buffer isn't necessary. This saves a 64kb allocation and 5.3kb of peak memory consumption. Signed-off-by: Ian Rogers Acked-by: Namhyung Kim Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Andi Kleen Cc: Athira Jajeev Cc: Changbin Du Cc: Colin Ian King Cc: Dmitrii Dolgov <9erthalion6@gmail.com> Cc: German Gomez Cc: Guilherme Amadio Cc: Huacai Chen Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: K Prateek Nayak Cc: Kajol Jain Cc: Kan Liang Cc: Leo Yan Cc: Li Dong Cc: Liam Howlett Cc: Mark Rutland Cc: Masami Hiramatsu (Google) Cc: Miguel Ojeda Cc: Ming Wang Cc: Nick Terrell Cc: Paolo Bonzini Cc: Peter Zijlstra Cc: Ravi Bangoria Cc: Sandipan Das Cc: Sean Christopherson Cc: Steinar H. Gunderson Cc: Vincent Whitchurch Cc: Wenyu Liu Cc: Yang Jihong Link: https://lore.kernel.org/r/20231127220902.1315692-9-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-record.c | 29 +++++++++++++++++++---------- 1 file changed, 19 insertions(+), 10 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index db814eadc16b..eb5a398ddb1d 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -1924,21 +1924,13 @@ static void __record__save_lost_samples(struct record *rec, struct evsel *evsel, static void record__read_lost_samples(struct record *rec) { struct perf_session *session = rec->session; - struct perf_record_lost_samples *lost; + struct perf_record_lost_samples *lost = NULL; struct evsel *evsel; /* there was an error during record__open */ if (session->evlist == NULL) return; - lost = zalloc(PERF_SAMPLE_MAX_SIZE); - if (lost == NULL) { - pr_debug("Memory allocation failed\n"); - return; - } - - lost->header.type = PERF_RECORD_LOST_SAMPLES; - evlist__for_each_entry(session->evlist, evsel) { struct xyarray *xy = evsel->core.sample_id; u64 lost_count; @@ -1961,6 +1953,14 @@ static void record__read_lost_samples(struct record *rec) } if (count.lost) { + if (!lost) { + lost = zalloc(PERF_SAMPLE_MAX_SIZE); + if (!lost) { + pr_debug("Memory allocation failed\n"); + return; + } + lost->header.type = PERF_RECORD_LOST_SAMPLES; + } __record__save_lost_samples(rec, evsel, lost, x, y, count.lost, 0); } @@ -1968,9 +1968,18 @@ static void record__read_lost_samples(struct record *rec) } lost_count = perf_bpf_filter__lost_count(evsel); - if (lost_count) + if (lost_count) { + if (!lost) { + lost = zalloc(PERF_SAMPLE_MAX_SIZE); + if (!lost) { + pr_debug("Memory allocation failed\n"); + return; + } + lost->header.type = PERF_RECORD_LOST_SAMPLES; + } __record__save_lost_samples(rec, evsel, lost, 0, 0, lost_count, PERF_RECORD_MISC_LOST_SAMPLES_BPF); + } } out: free(lost); -- cgit v1.2.3 From d0acce68285e8645038a72c6792483160ca36e5a Mon Sep 17 00:00:00 2001 From: Chengen Du Date: Thu, 30 Nov 2023 21:57:23 +0800 Subject: perf symbols: Parse NOTE segments until the build id is found In the ELF file, multiple NOTE segments may exist. To locate the build id, the process shall persist in parsing NOTE segments until the build id is found. Signed-off-by: Chengen Du Acked-by: Ian Rogers Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20231130135723.17562-1-chengen.du@canonical.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/symbol-minimal.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/symbol-minimal.c b/tools/perf/util/symbol-minimal.c index a81a14769bd1..1da8b713509c 100644 --- a/tools/perf/util/symbol-minimal.c +++ b/tools/perf/util/symbol-minimal.c @@ -159,9 +159,10 @@ int filename__read_build_id(const char *filename, struct build_id *bid) goto out_free; ret = read_build_id(buf, buf_size, bid, need_swap); - if (ret == 0) + if (ret == 0) { ret = bid->size; - break; + break; + } } } else { Elf64_Ehdr ehdr; @@ -210,9 +211,10 @@ int filename__read_build_id(const char *filename, struct build_id *bid) goto out_free; ret = read_build_id(buf, buf_size, bid, need_swap); - if (ret == 0) + if (ret == 0) { ret = bid->size; - break; + break; + } } } out_free: -- cgit v1.2.3 From 407a3898d72ee0020b8cc9dd28b88c8eb8d68214 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Tue, 5 Dec 2023 08:49:24 -0800 Subject: perf test shell diff: Skip test if test_loop symbol is missing in the perf binary The diff test depends on finding the symbol test_loop in perf and will fail if perf has been stripped and no debug object is available. In that case, skip the test instead. Suggested-by: Adrian Hunter Signed-off-by: Ian Rogers Tested-by: Adrian Hunter Cc: Alexander Shishkin Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20231205164924.835682-1-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/shell/diff.sh | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'tools') diff --git a/tools/perf/tests/shell/diff.sh b/tools/perf/tests/shell/diff.sh index 213185763688..14b87af88703 100755 --- a/tools/perf/tests/shell/diff.sh +++ b/tools/perf/tests/shell/diff.sh @@ -9,8 +9,15 @@ perfdata1=$(mktemp /tmp/__perf_test.perf.data.XXXXX) perfdata2=$(mktemp /tmp/__perf_test.perf.data.XXXXX) perfdata3=$(mktemp /tmp/__perf_test.perf.data.XXXXX) testprog="perf test -w thloop" + +shelldir=$(dirname "$0") +# shellcheck source=lib/perf_has_symbol.sh +. "${shelldir}"/lib/perf_has_symbol.sh + testsym="test_loop" +skip_test_missing_symbol ${testsym} + cleanup() { rm -rf "${perfdata1}" rm -rf "${perfdata1}".old -- cgit v1.2.3 From 9fa688ea341231837915f996b61f6e0a330d3b38 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Mon, 27 Nov 2023 14:08:24 -0800 Subject: perf map: Simplify map_ip/unmap_ip and make 'struct map' smaller When mapping an IP it is either an identity mapping or a DSO relative mapping, so a single bit is required in the struct to identify this. The current code uses function pointers, adding 2 pointers per map and also pushing the size of a map beyond 1 cache line. Switch to using a byte to identify the mapping type (as well as priv and erange_warned), to avoid any masking. Change struct maps's layout to avoid holes. Before: ``` struct map { u64 start; /* 0 8 */ u64 end; /* 8 8 */ _Bool erange_warned:1; /* 16: 0 1 */ _Bool priv:1; /* 16: 1 1 */ /* XXX 6 bits hole, try to pack */ /* XXX 3 bytes hole, try to pack */ u32 prot; /* 20 4 */ u64 pgoff; /* 24 8 */ u64 reloc; /* 32 8 */ u64 (*map_ip)(const struct map *, u64); /* 40 8 */ u64 (*unmap_ip)(const struct map *, u64); /* 48 8 */ struct dso * dso; /* 56 8 */ /* --- cacheline 1 boundary (64 bytes) --- */ refcount_t refcnt; /* 64 4 */ u32 flags; /* 68 4 */ /* size: 72, cachelines: 2, members: 12 */ /* sum members: 68, holes: 1, sum holes: 3 */ /* sum bitfield members: 2 bits, bit holes: 1, sum bit holes: 6 bits */ /* last cacheline: 8 bytes */ }; ``` After: ``` struct map { u64 start; /* 0 8 */ u64 end; /* 8 8 */ u64 pgoff; /* 16 8 */ u64 reloc; /* 24 8 */ struct dso * dso; /* 32 8 */ refcount_t refcnt; /* 40 4 */ u32 prot; /* 44 4 */ u32 flags; /* 48 4 */ enum mapping_type mapping_type:8; /* 52: 0 4 */ /* Bitfield combined with next fields */ _Bool erange_warned; /* 53 1 */ _Bool priv; /* 54 1 */ /* size: 56, cachelines: 1, members: 11 */ /* padding: 1 */ /* last cacheline: 56 bytes */ }; ``` Signed-off-by: Ian Rogers Acked-by: Namhyung Kim Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Andi Kleen Cc: Athira Jajeev Cc: Changbin Du Cc: Colin Ian King Cc: Dmitrii Dolgov <9erthalion6@gmail.com> Cc: German Gomez Cc: Guilherme Amadio Cc: Huacai Chen Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: K Prateek Nayak Cc: Kajol Jain Cc: Kan Liang Cc: Leo Yan Cc: Li Dong Cc: Liam Howlett Cc: Mark Rutland Cc: Masami Hiramatsu (Google) Cc: Miguel Ojeda Cc: Ming Wang Cc: Nick Terrell Cc: Paolo Bonzini Cc: Peter Zijlstra Cc: Ravi Bangoria Cc: Sandipan Das Cc: Sean Christopherson Cc: Steinar H. Gunderson Cc: Vincent Whitchurch Cc: Wenyu Liu Cc: Yang Jihong Link: https://lore.kernel.org/r/20231127220902.1315692-13-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/machine.c | 3 +- tools/perf/util/map.c | 20 ++--------- tools/perf/util/map.h | 83 +++++++++++++++++++++++--------------------- tools/perf/util/symbol-elf.c | 6 ++-- tools/perf/util/symbol.c | 6 ++-- 5 files changed, 50 insertions(+), 68 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index a985d004aa8d..c5de5363b5e7 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -1359,8 +1359,7 @@ __machine__create_kernel_maps(struct machine *machine, struct dso *kernel) if (machine->vmlinux_map == NULL) return -ENOMEM; - map__set_map_ip(machine->vmlinux_map, identity__map_ip); - map__set_unmap_ip(machine->vmlinux_map, identity__map_ip); + map__set_mapping_type(machine->vmlinux_map, MAPPING_TYPE__IDENTITY); return maps__insert(machine__kernel_maps(machine), machine->vmlinux_map); } diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c index f64b83004421..54c67cb7ecef 100644 --- a/tools/perf/util/map.c +++ b/tools/perf/util/map.c @@ -109,8 +109,7 @@ void map__init(struct map *map, u64 start, u64 end, u64 pgoff, struct dso *dso) map__set_pgoff(map, pgoff); map__set_reloc(map, 0); map__set_dso(map, dso__get(dso)); - map__set_map_ip(map, map__dso_map_ip); - map__set_unmap_ip(map, map__dso_unmap_ip); + map__set_mapping_type(map, MAPPING_TYPE__DSO); map__set_erange_warned(map, false); refcount_set(map__refcnt(map), 1); } @@ -172,7 +171,7 @@ struct map *map__new(struct machine *machine, u64 start, u64 len, map__init(result, start, start + len, pgoff, dso); if (anon || no_dso) { - map->map_ip = map->unmap_ip = identity__map_ip; + map->mapping_type = MAPPING_TYPE__IDENTITY; /* * Set memory without DSO as loaded. All map__find_* @@ -630,18 +629,3 @@ struct maps *map__kmaps(struct map *map) } return kmap->kmaps; } - -u64 map__dso_map_ip(const struct map *map, u64 ip) -{ - return ip - map__start(map) + map__pgoff(map); -} - -u64 map__dso_unmap_ip(const struct map *map, u64 ip) -{ - return ip + map__start(map) - map__pgoff(map); -} - -u64 identity__map_ip(const struct map *map __maybe_unused, u64 ip) -{ - return ip; -} diff --git a/tools/perf/util/map.h b/tools/perf/util/map.h index 1b53d53adc86..3a3b7757da5f 100644 --- a/tools/perf/util/map.h +++ b/tools/perf/util/map.h @@ -16,23 +16,25 @@ struct dso; struct maps; struct machine; +enum mapping_type { + /* map__map_ip/map__unmap_ip are given as offsets in the DSO. */ + MAPPING_TYPE__DSO, + /* map__map_ip/map__unmap_ip are just the given ip value. */ + MAPPING_TYPE__IDENTITY, +}; + DECLARE_RC_STRUCT(map) { u64 start; u64 end; - bool erange_warned:1; - bool priv:1; - u32 prot; u64 pgoff; u64 reloc; - - /* ip -> dso rip */ - u64 (*map_ip)(const struct map *, u64); - /* dso rip -> ip */ - u64 (*unmap_ip)(const struct map *, u64); - struct dso *dso; refcount_t refcnt; + u32 prot; u32 flags; + enum mapping_type mapping_type:8; + bool erange_warned; + bool priv; }; struct kmap; @@ -41,38 +43,11 @@ struct kmap *__map__kmap(struct map *map); struct kmap *map__kmap(struct map *map); struct maps *map__kmaps(struct map *map); -/* ip -> dso rip */ -u64 map__dso_map_ip(const struct map *map, u64 ip); -/* dso rip -> ip */ -u64 map__dso_unmap_ip(const struct map *map, u64 ip); -/* Returns ip */ -u64 identity__map_ip(const struct map *map __maybe_unused, u64 ip); - static inline struct dso *map__dso(const struct map *map) { return RC_CHK_ACCESS(map)->dso; } -static inline u64 map__map_ip(const struct map *map, u64 ip) -{ - return RC_CHK_ACCESS(map)->map_ip(map, ip); -} - -static inline u64 map__unmap_ip(const struct map *map, u64 ip) -{ - return RC_CHK_ACCESS(map)->unmap_ip(map, ip); -} - -static inline void *map__map_ip_ptr(struct map *map) -{ - return RC_CHK_ACCESS(map)->map_ip; -} - -static inline void* map__unmap_ip_ptr(struct map *map) -{ - return RC_CHK_ACCESS(map)->unmap_ip; -} - static inline u64 map__start(const struct map *map) { return RC_CHK_ACCESS(map)->start; @@ -123,6 +98,34 @@ static inline size_t map__size(const struct map *map) return map__end(map) - map__start(map); } +/* ip -> dso rip */ +static inline u64 map__dso_map_ip(const struct map *map, u64 ip) +{ + return ip - map__start(map) + map__pgoff(map); +} + +/* dso rip -> ip */ +static inline u64 map__dso_unmap_ip(const struct map *map, u64 ip) +{ + return ip + map__start(map) - map__pgoff(map); +} + +static inline u64 map__map_ip(const struct map *map, u64 ip) +{ + if ((RC_CHK_ACCESS(map)->mapping_type) == MAPPING_TYPE__DSO) + return map__dso_map_ip(map, ip); + else + return ip; +} + +static inline u64 map__unmap_ip(const struct map *map, u64 ip) +{ + if ((RC_CHK_ACCESS(map)->mapping_type) == MAPPING_TYPE__DSO) + return map__dso_unmap_ip(map, ip); + else + return ip; +} + /* rip/ip <-> addr suitable for passing to `objdump --start-address=` */ u64 map__rip_2objdump(struct map *map, u64 rip); @@ -294,13 +297,13 @@ static inline void map__set_dso(struct map *map, struct dso *dso) RC_CHK_ACCESS(map)->dso = dso; } -static inline void map__set_map_ip(struct map *map, u64 (*map_ip)(const struct map *map, u64 ip)) +static inline void map__set_mapping_type(struct map *map, enum mapping_type type) { - RC_CHK_ACCESS(map)->map_ip = map_ip; + RC_CHK_ACCESS(map)->mapping_type = type; } -static inline void map__set_unmap_ip(struct map *map, u64 (*unmap_ip)(const struct map *map, u64 rip)) +static inline enum mapping_type map__mapping_type(struct map *map) { - RC_CHK_ACCESS(map)->unmap_ip = unmap_ip; + return RC_CHK_ACCESS(map)->mapping_type; } #endif /* __PERF_MAP_H */ diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c index 9e7eeaf616b8..4b934ed3bfd1 100644 --- a/tools/perf/util/symbol-elf.c +++ b/tools/perf/util/symbol-elf.c @@ -1392,8 +1392,7 @@ static int dso__process_kernel_symbol(struct dso *dso, struct map *map, map__set_start(map, shdr->sh_addr + ref_reloc(kmap)); map__set_end(map, map__start(map) + shdr->sh_size); map__set_pgoff(map, shdr->sh_offset); - map__set_map_ip(map, map__dso_map_ip); - map__set_unmap_ip(map, map__dso_unmap_ip); + map__set_mapping_type(map, MAPPING_TYPE__DSO); /* Ensure maps are correctly ordered */ if (kmaps) { int err; @@ -1455,8 +1454,7 @@ static int dso__process_kernel_symbol(struct dso *dso, struct map *map, map__set_end(curr_map, map__start(curr_map) + shdr->sh_size); map__set_pgoff(curr_map, shdr->sh_offset); } else { - map__set_map_ip(curr_map, identity__map_ip); - map__set_unmap_ip(curr_map, identity__map_ip); + map__set_mapping_type(curr_map, MAPPING_TYPE__IDENTITY); } curr_dso->symtab_type = dso->symtab_type; if (maps__insert(kmaps, curr_map)) diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 82cc74b9358e..314c0263bf3c 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -956,8 +956,7 @@ static int maps__split_kallsyms(struct maps *kmaps, struct dso *dso, u64 delta, return -1; } - map__set_map_ip(curr_map, identity__map_ip); - map__set_unmap_ip(curr_map, identity__map_ip); + map__set_mapping_type(curr_map, MAPPING_TYPE__IDENTITY); if (maps__insert(kmaps, curr_map)) { dso__put(ndso); return -1; @@ -1475,8 +1474,7 @@ static int dso__load_kcore(struct dso *dso, struct map *map, map__set_start(map, map__start(new_map)); map__set_end(map, map__end(new_map)); map__set_pgoff(map, map__pgoff(new_map)); - map__set_map_ip(map, map__map_ip_ptr(new_map)); - map__set_unmap_ip(map, map__unmap_ip_ptr(new_map)); + map__set_mapping_type(map, map__mapping_type(new_map)); /* Ensure maps are correctly ordered */ map_ref = map__get(map); maps__remove(kmaps, map_ref); -- cgit v1.2.3 From 0f6ab6a3fb7e380a1277f8288f315724ed517114 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Mon, 27 Nov 2023 14:08:25 -0800 Subject: perf maps: Move symbol maps functions to maps.c Move the find and certain other symbol maps__* functions to maps.c for better abstraction. Signed-off-by: Ian Rogers Acked-by: Namhyung Kim Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Andi Kleen Cc: Athira Jajeev Cc: Changbin Du Cc: Colin Ian King Cc: Dmitrii Dolgov <9erthalion6@gmail.com> Cc: German Gomez Cc: Guilherme Amadio Cc: Huacai Chen Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: K Prateek Nayak Cc: Kajol Jain Cc: Kan Liang Cc: Leo Yan Cc: Li Dong Cc: Liam Howlett Cc: Mark Rutland Cc: Masami Hiramatsu (Google) Cc: Miguel Ojeda Cc: Ming Wang Cc: Nick Terrell Cc: Paolo Bonzini Cc: Peter Zijlstra Cc: Ravi Bangoria Cc: Sandipan Das Cc: Sean Christopherson Cc: Steinar H. Gunderson Cc: Vincent Whitchurch Cc: Wenyu Liu Cc: Yang Jihong Link: https://lore.kernel.org/r/20231127220902.1315692-14-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/maps.c | 238 +++++++++++++++++++++++++++++++++++++++++++++ tools/perf/util/maps.h | 12 +++ tools/perf/util/symbol.c | 248 ----------------------------------------------- tools/perf/util/symbol.h | 1 - 4 files changed, 250 insertions(+), 249 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/maps.c b/tools/perf/util/maps.c index 233438c95b53..9a011aed4b75 100644 --- a/tools/perf/util/maps.c +++ b/tools/perf/util/maps.c @@ -475,3 +475,241 @@ struct map_rb_node *map_rb_node__next(struct map_rb_node *node) return rb_entry(next, struct map_rb_node, rb_node); } + +static int map__strcmp(const void *a, const void *b) +{ + const struct map *map_a = *(const struct map **)a; + const struct map *map_b = *(const struct map **)b; + const struct dso *dso_a = map__dso(map_a); + const struct dso *dso_b = map__dso(map_b); + int ret = strcmp(dso_a->short_name, dso_b->short_name); + + if (ret == 0 && map_a != map_b) { + /* + * Ensure distinct but name equal maps have an order in part to + * aid reference counting. + */ + ret = (int)map__start(map_a) - (int)map__start(map_b); + if (ret == 0) + ret = (int)((intptr_t)map_a - (intptr_t)map_b); + } + + return ret; +} + +static int map__strcmp_name(const void *name, const void *b) +{ + const struct dso *dso = map__dso(*(const struct map **)b); + + return strcmp(name, dso->short_name); +} + +void __maps__sort_by_name(struct maps *maps) +{ + qsort(maps__maps_by_name(maps), maps__nr_maps(maps), sizeof(struct map *), map__strcmp); +} + +static int map__groups__sort_by_name_from_rbtree(struct maps *maps) +{ + struct map_rb_node *rb_node; + struct map **maps_by_name = realloc(maps__maps_by_name(maps), + maps__nr_maps(maps) * sizeof(struct map *)); + int i = 0; + + if (maps_by_name == NULL) + return -1; + + up_read(maps__lock(maps)); + down_write(maps__lock(maps)); + + RC_CHK_ACCESS(maps)->maps_by_name = maps_by_name; + RC_CHK_ACCESS(maps)->nr_maps_allocated = maps__nr_maps(maps); + + maps__for_each_entry(maps, rb_node) + maps_by_name[i++] = map__get(rb_node->map); + + __maps__sort_by_name(maps); + + up_write(maps__lock(maps)); + down_read(maps__lock(maps)); + + return 0; +} + +static struct map *__maps__find_by_name(struct maps *maps, const char *name) +{ + struct map **mapp; + + if (maps__maps_by_name(maps) == NULL && + map__groups__sort_by_name_from_rbtree(maps)) + return NULL; + + mapp = bsearch(name, maps__maps_by_name(maps), maps__nr_maps(maps), + sizeof(*mapp), map__strcmp_name); + if (mapp) + return *mapp; + return NULL; +} + +struct map *maps__find_by_name(struct maps *maps, const char *name) +{ + struct map_rb_node *rb_node; + struct map *map; + + down_read(maps__lock(maps)); + + + if (RC_CHK_ACCESS(maps)->last_search_by_name) { + const struct dso *dso = map__dso(RC_CHK_ACCESS(maps)->last_search_by_name); + + if (strcmp(dso->short_name, name) == 0) { + map = RC_CHK_ACCESS(maps)->last_search_by_name; + goto out_unlock; + } + } + /* + * If we have maps->maps_by_name, then the name isn't in the rbtree, + * as maps->maps_by_name mirrors the rbtree when lookups by name are + * made. + */ + map = __maps__find_by_name(maps, name); + if (map || maps__maps_by_name(maps) != NULL) + goto out_unlock; + + /* Fallback to traversing the rbtree... */ + maps__for_each_entry(maps, rb_node) { + struct dso *dso; + + map = rb_node->map; + dso = map__dso(map); + if (strcmp(dso->short_name, name) == 0) { + RC_CHK_ACCESS(maps)->last_search_by_name = map; + goto out_unlock; + } + } + map = NULL; + +out_unlock: + up_read(maps__lock(maps)); + return map; +} + +void maps__fixup_end(struct maps *maps) +{ + struct map_rb_node *prev = NULL, *curr; + + down_write(maps__lock(maps)); + + maps__for_each_entry(maps, curr) { + if (prev != NULL && !map__end(prev->map)) + map__set_end(prev->map, map__start(curr->map)); + + prev = curr; + } + + /* + * We still haven't the actual symbols, so guess the + * last map final address. + */ + if (curr && !map__end(curr->map)) + map__set_end(curr->map, ~0ULL); + + up_write(maps__lock(maps)); +} + +/* + * Merges map into maps by splitting the new map within the existing map + * regions. + */ +int maps__merge_in(struct maps *kmaps, struct map *new_map) +{ + struct map_rb_node *rb_node; + LIST_HEAD(merged); + int err = 0; + + maps__for_each_entry(kmaps, rb_node) { + struct map *old_map = rb_node->map; + + /* no overload with this one */ + if (map__end(new_map) < map__start(old_map) || + map__start(new_map) >= map__end(old_map)) + continue; + + if (map__start(new_map) < map__start(old_map)) { + /* + * |new...... + * |old.... + */ + if (map__end(new_map) < map__end(old_map)) { + /* + * |new......| -> |new..| + * |old....| -> |old....| + */ + map__set_end(new_map, map__start(old_map)); + } else { + /* + * |new.............| -> |new..| |new..| + * |old....| -> |old....| + */ + struct map_list_node *m = map_list_node__new(); + + if (!m) { + err = -ENOMEM; + goto out; + } + + m->map = map__clone(new_map); + if (!m->map) { + free(m); + err = -ENOMEM; + goto out; + } + + map__set_end(m->map, map__start(old_map)); + list_add_tail(&m->node, &merged); + map__add_pgoff(new_map, map__end(old_map) - map__start(new_map)); + map__set_start(new_map, map__end(old_map)); + } + } else { + /* + * |new...... + * |old.... + */ + if (map__end(new_map) < map__end(old_map)) { + /* + * |new..| -> x + * |old.........| -> |old.........| + */ + map__put(new_map); + new_map = NULL; + break; + } else { + /* + * |new......| -> |new...| + * |old....| -> |old....| + */ + map__add_pgoff(new_map, map__end(old_map) - map__start(new_map)); + map__set_start(new_map, map__end(old_map)); + } + } + } + +out: + while (!list_empty(&merged)) { + struct map_list_node *old_node; + + old_node = list_entry(merged.next, struct map_list_node, node); + list_del_init(&old_node->node); + if (!err) + err = maps__insert(kmaps, old_node->map); + map__put(old_node->map); + free(old_node); + } + + if (new_map) { + if (!err) + err = maps__insert(kmaps, new_map); + map__put(new_map); + } + return err; +} diff --git a/tools/perf/util/maps.h b/tools/perf/util/maps.h index 83144e0645ed..a689149be8c4 100644 --- a/tools/perf/util/maps.h +++ b/tools/perf/util/maps.h @@ -21,6 +21,16 @@ struct map_rb_node { struct map *map; }; +struct map_list_node { + struct list_head node; + struct map *map; +}; + +static inline struct map_list_node *map_list_node__new(void) +{ + return malloc(sizeof(struct map_list_node)); +} + struct map_rb_node *maps__first(struct maps *maps); struct map_rb_node *map_rb_node__next(struct map_rb_node *node); struct map_rb_node *maps__find_node(struct maps *maps, struct map *map); @@ -133,4 +143,6 @@ int maps__merge_in(struct maps *kmaps, struct map *new_map); void __maps__sort_by_name(struct maps *maps); +void maps__fixup_end(struct maps *maps); + #endif // __PERF_MAPS_H diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 314c0263bf3c..1cc42b8d8afb 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -48,11 +48,6 @@ static bool symbol__is_idle(const char *name); int vmlinux_path__nr_entries; char **vmlinux_path; -struct map_list_node { - struct list_head node; - struct map *map; -}; - struct symbol_conf symbol_conf = { .nanosecs = false, .use_modules = true, @@ -90,11 +85,6 @@ static enum dso_binary_type binary_type_symtab[] = { #define DSO_BINARY_TYPE__SYMTAB_CNT ARRAY_SIZE(binary_type_symtab) -static struct map_list_node *map_list_node__new(void) -{ - return malloc(sizeof(struct map_list_node)); -} - static bool symbol_type__filter(char symbol_type) { symbol_type = toupper(symbol_type); @@ -270,29 +260,6 @@ void symbols__fixup_end(struct rb_root_cached *symbols, bool is_kallsyms) curr->end = roundup(curr->start, 4096) + 4096; } -void maps__fixup_end(struct maps *maps) -{ - struct map_rb_node *prev = NULL, *curr; - - down_write(maps__lock(maps)); - - maps__for_each_entry(maps, curr) { - if (prev != NULL && !map__end(prev->map)) - map__set_end(prev->map, map__start(curr->map)); - - prev = curr; - } - - /* - * We still haven't the actual symbols, so guess the - * last map final address. - */ - if (curr && !map__end(curr->map)) - map__set_end(curr->map, ~0ULL); - - up_write(maps__lock(maps)); -} - struct symbol *symbol__new(u64 start, u64 len, u8 binding, u8 type, const char *name) { size_t namelen = strlen(name) + 1; @@ -1270,103 +1237,6 @@ static int kcore_mapfn(u64 start, u64 len, u64 pgoff, void *data) return 0; } -/* - * Merges map into maps by splitting the new map within the existing map - * regions. - */ -int maps__merge_in(struct maps *kmaps, struct map *new_map) -{ - struct map_rb_node *rb_node; - LIST_HEAD(merged); - int err = 0; - - maps__for_each_entry(kmaps, rb_node) { - struct map *old_map = rb_node->map; - - /* no overload with this one */ - if (map__end(new_map) < map__start(old_map) || - map__start(new_map) >= map__end(old_map)) - continue; - - if (map__start(new_map) < map__start(old_map)) { - /* - * |new...... - * |old.... - */ - if (map__end(new_map) < map__end(old_map)) { - /* - * |new......| -> |new..| - * |old....| -> |old....| - */ - map__set_end(new_map, map__start(old_map)); - } else { - /* - * |new.............| -> |new..| |new..| - * |old....| -> |old....| - */ - struct map_list_node *m = map_list_node__new(); - - if (!m) { - err = -ENOMEM; - goto out; - } - - m->map = map__clone(new_map); - if (!m->map) { - free(m); - err = -ENOMEM; - goto out; - } - - map__set_end(m->map, map__start(old_map)); - list_add_tail(&m->node, &merged); - map__add_pgoff(new_map, map__end(old_map) - map__start(new_map)); - map__set_start(new_map, map__end(old_map)); - } - } else { - /* - * |new...... - * |old.... - */ - if (map__end(new_map) < map__end(old_map)) { - /* - * |new..| -> x - * |old.........| -> |old.........| - */ - map__put(new_map); - new_map = NULL; - break; - } else { - /* - * |new......| -> |new...| - * |old....| -> |old....| - */ - map__add_pgoff(new_map, map__end(old_map) - map__start(new_map)); - map__set_start(new_map, map__end(old_map)); - } - } - } - -out: - while (!list_empty(&merged)) { - struct map_list_node *old_node; - - old_node = list_entry(merged.next, struct map_list_node, node); - list_del_init(&old_node->node); - if (!err) - err = maps__insert(kmaps, old_node->map); - map__put(old_node->map); - free(old_node); - } - - if (new_map) { - if (!err) - err = maps__insert(kmaps, new_map); - map__put(new_map); - } - return err; -} - static int dso__load_kcore(struct dso *dso, struct map *map, const char *kallsyms_filename) { @@ -2065,124 +1935,6 @@ out: return ret; } -static int map__strcmp(const void *a, const void *b) -{ - const struct map *map_a = *(const struct map **)a; - const struct map *map_b = *(const struct map **)b; - const struct dso *dso_a = map__dso(map_a); - const struct dso *dso_b = map__dso(map_b); - int ret = strcmp(dso_a->short_name, dso_b->short_name); - - if (ret == 0 && map_a != map_b) { - /* - * Ensure distinct but name equal maps have an order in part to - * aid reference counting. - */ - ret = (int)map__start(map_a) - (int)map__start(map_b); - if (ret == 0) - ret = (int)((intptr_t)map_a - (intptr_t)map_b); - } - - return ret; -} - -static int map__strcmp_name(const void *name, const void *b) -{ - const struct dso *dso = map__dso(*(const struct map **)b); - - return strcmp(name, dso->short_name); -} - -void __maps__sort_by_name(struct maps *maps) -{ - qsort(maps__maps_by_name(maps), maps__nr_maps(maps), sizeof(struct map *), map__strcmp); -} - -static int map__groups__sort_by_name_from_rbtree(struct maps *maps) -{ - struct map_rb_node *rb_node; - struct map **maps_by_name = realloc(maps__maps_by_name(maps), - maps__nr_maps(maps) * sizeof(struct map *)); - int i = 0; - - if (maps_by_name == NULL) - return -1; - - up_read(maps__lock(maps)); - down_write(maps__lock(maps)); - - RC_CHK_ACCESS(maps)->maps_by_name = maps_by_name; - RC_CHK_ACCESS(maps)->nr_maps_allocated = maps__nr_maps(maps); - - maps__for_each_entry(maps, rb_node) - maps_by_name[i++] = map__get(rb_node->map); - - __maps__sort_by_name(maps); - - up_write(maps__lock(maps)); - down_read(maps__lock(maps)); - - return 0; -} - -static struct map *__maps__find_by_name(struct maps *maps, const char *name) -{ - struct map **mapp; - - if (maps__maps_by_name(maps) == NULL && - map__groups__sort_by_name_from_rbtree(maps)) - return NULL; - - mapp = bsearch(name, maps__maps_by_name(maps), maps__nr_maps(maps), - sizeof(*mapp), map__strcmp_name); - if (mapp) - return *mapp; - return NULL; -} - -struct map *maps__find_by_name(struct maps *maps, const char *name) -{ - struct map_rb_node *rb_node; - struct map *map; - - down_read(maps__lock(maps)); - - - if (RC_CHK_ACCESS(maps)->last_search_by_name) { - const struct dso *dso = map__dso(RC_CHK_ACCESS(maps)->last_search_by_name); - - if (strcmp(dso->short_name, name) == 0) { - map = RC_CHK_ACCESS(maps)->last_search_by_name; - goto out_unlock; - } - } - /* - * If we have maps->maps_by_name, then the name isn't in the rbtree, - * as maps->maps_by_name mirrors the rbtree when lookups by name are - * made. - */ - map = __maps__find_by_name(maps, name); - if (map || maps__maps_by_name(maps) != NULL) - goto out_unlock; - - /* Fallback to traversing the rbtree... */ - maps__for_each_entry(maps, rb_node) { - struct dso *dso; - - map = rb_node->map; - dso = map__dso(map); - if (strcmp(dso->short_name, name) == 0) { - RC_CHK_ACCESS(maps)->last_search_by_name = map; - goto out_unlock; - } - } - map = NULL; - -out_unlock: - up_read(maps__lock(maps)); - return map; -} - int dso__load_vmlinux(struct dso *dso, struct map *map, const char *vmlinux, bool vmlinux_allocated) { diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index af87c46b3f89..071837ddce2a 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -189,7 +189,6 @@ void __symbols__insert(struct rb_root_cached *symbols, struct symbol *sym, void symbols__insert(struct rb_root_cached *symbols, struct symbol *sym); void symbols__fixup_duplicate(struct rb_root_cached *symbols); void symbols__fixup_end(struct rb_root_cached *symbols, bool is_kallsyms); -void maps__fixup_end(struct maps *maps); typedef int (*mapfn_t)(u64 start, u64 len, u64 pgoff, void *data); int file__read_maps(int fd, bool exe, mapfn_t mapfn, void *data, -- cgit v1.2.3 From 01261d8a0f082b1a926d14ecb3ae05e52c477c74 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Mon, 27 Nov 2023 14:08:26 -0800 Subject: perf thread: Add missing RC_CHK_EQUAL Comparing pointers without RC_CHK_ACCESS means the indirect object will be compared rather than the underlying maps when REFCNT_CHECKING is enabled. Fix by adding missing RC_CHK_EQUAL. Signed-off-by: Ian Rogers Acked-by: Namhyung Kim Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Andi Kleen Cc: Athira Jajeev Cc: Changbin Du Cc: Colin Ian King Cc: Dmitrii Dolgov <9erthalion6@gmail.com> Cc: German Gomez Cc: Guilherme Amadio Cc: Huacai Chen Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: K Prateek Nayak Cc: Kajol Jain Cc: Kan Liang Cc: Leo Yan Cc: Li Dong Cc: Liam Howlett Cc: Mark Rutland Cc: Masami Hiramatsu (Google) Cc: Miguel Ojeda Cc: Ming Wang Cc: Nick Terrell Cc: Paolo Bonzini Cc: Peter Zijlstra Cc: Ravi Bangoria Cc: Sandipan Das Cc: Sean Christopherson Cc: Steinar H. Gunderson Cc: Vincent Whitchurch Cc: Wenyu Liu Cc: Yang Jihong Link: https://lore.kernel.org/r/20231127220902.1315692-15-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/thread.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c index fe5e6991ae4b..b9c2039c4230 100644 --- a/tools/perf/util/thread.c +++ b/tools/perf/util/thread.c @@ -385,7 +385,7 @@ static int thread__clone_maps(struct thread *thread, struct thread *parent, bool if (thread__pid(thread) == thread__pid(parent)) return thread__prepare_access(thread); - if (thread__maps(thread) == thread__maps(parent)) { + if (RC_CHK_EQUAL(thread__maps(thread), thread__maps(parent))) { pr_debug("broken map groups on thread %d/%d parent %d/%d\n", thread__pid(thread), thread__tid(thread), thread__pid(parent), thread__tid(parent)); -- cgit v1.2.3 From 4527358b76861dfd64ee34aba45d81648fbc8a61 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Thu, 30 Nov 2023 10:52:15 -0800 Subject: bpf: introduce BPF token object Add new kind of BPF kernel object, BPF token. BPF token is meant to allow delegating privileged BPF functionality, like loading a BPF program or creating a BPF map, from privileged process to a *trusted* unprivileged process, all while having a good amount of control over which privileged operations could be performed using provided BPF token. This is achieved through mounting BPF FS instance with extra delegation mount options, which determine what operations are delegatable, and also constraining it to the owning user namespace (as mentioned in the previous patch). BPF token itself is just a derivative from BPF FS and can be created through a new bpf() syscall command, BPF_TOKEN_CREATE, which accepts BPF FS FD, which can be attained through open() API by opening BPF FS mount point. Currently, BPF token "inherits" delegated command, map types, prog type, and attach type bit sets from BPF FS as is. In the future, having an BPF token as a separate object with its own FD, we can allow to further restrict BPF token's allowable set of things either at the creation time or after the fact, allowing the process to guard itself further from unintentionally trying to load undesired kind of BPF programs. But for now we keep things simple and just copy bit sets as is. When BPF token is created from BPF FS mount, we take reference to the BPF super block's owning user namespace, and then use that namespace for checking all the {CAP_BPF, CAP_PERFMON, CAP_NET_ADMIN, CAP_SYS_ADMIN} capabilities that are normally only checked against init userns (using capable()), but now we check them using ns_capable() instead (if BPF token is provided). See bpf_token_capable() for details. Such setup means that BPF token in itself is not sufficient to grant BPF functionality. User namespaced process has to *also* have necessary combination of capabilities inside that user namespace. So while previously CAP_BPF was useless when granted within user namespace, now it gains a meaning and allows container managers and sys admins to have a flexible control over which processes can and need to use BPF functionality within the user namespace (i.e., container in practice). And BPF FS delegation mount options and derived BPF tokens serve as a per-container "flag" to grant overall ability to use bpf() (plus further restrict on which parts of bpf() syscalls are treated as namespaced). Note also, BPF_TOKEN_CREATE command itself requires ns_capable(CAP_BPF) within the BPF FS owning user namespace, rounding up the ns_capable() story of BPF token. Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20231130185229.2688956-4-andrii@kernel.org Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 41 ++++++++ include/uapi/linux/bpf.h | 37 +++++++ kernel/bpf/Makefile | 2 +- kernel/bpf/inode.c | 12 ++- kernel/bpf/syscall.c | 17 ++++ kernel/bpf/token.c | 214 +++++++++++++++++++++++++++++++++++++++++ tools/include/uapi/linux/bpf.h | 37 +++++++ 7 files changed, 354 insertions(+), 6 deletions(-) create mode 100644 kernel/bpf/token.c (limited to 'tools') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index d3c9acc593ea..aa9cf8e5fab1 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -51,6 +51,10 @@ struct module; struct bpf_func_state; struct ftrace_ops; struct cgroup; +struct bpf_token; +struct user_namespace; +struct super_block; +struct inode; extern struct idr btf_idr; extern spinlock_t btf_idr_lock; @@ -1591,6 +1595,13 @@ struct bpf_mount_opts { u64 delegate_attachs; }; +struct bpf_token { + struct work_struct work; + atomic64_t refcnt; + struct user_namespace *userns; + u64 allowed_cmds; +}; + struct bpf_struct_ops_value; struct btf_member; @@ -2048,6 +2059,7 @@ static inline void bpf_enable_instrumentation(void) migrate_enable(); } +extern const struct super_operations bpf_super_ops; extern const struct file_operations bpf_map_fops; extern const struct file_operations bpf_prog_fops; extern const struct file_operations bpf_iter_fops; @@ -2182,6 +2194,8 @@ static inline void bpf_map_dec_elem_count(struct bpf_map *map) extern int sysctl_unprivileged_bpf_disabled; +bool bpf_token_capable(const struct bpf_token *token, int cap); + static inline bool bpf_allow_ptr_leaks(void) { return perfmon_capable(); @@ -2216,8 +2230,17 @@ int bpf_link_new_fd(struct bpf_link *link); struct bpf_link *bpf_link_get_from_fd(u32 ufd); struct bpf_link *bpf_link_get_curr_or_next(u32 *id); +void bpf_token_inc(struct bpf_token *token); +void bpf_token_put(struct bpf_token *token); +int bpf_token_create(union bpf_attr *attr); +struct bpf_token *bpf_token_get_from_fd(u32 ufd); + +bool bpf_token_allow_cmd(const struct bpf_token *token, enum bpf_cmd cmd); + int bpf_obj_pin_user(u32 ufd, int path_fd, const char __user *pathname); int bpf_obj_get_user(int path_fd, const char __user *pathname, int flags); +struct inode *bpf_get_inode(struct super_block *sb, const struct inode *dir, + umode_t mode); #define BPF_ITER_FUNC_PREFIX "bpf_iter_" #define DEFINE_BPF_ITER_FUNC(target, args...) \ @@ -2580,6 +2603,24 @@ static inline int bpf_obj_get_user(const char __user *pathname, int flags) return -EOPNOTSUPP; } +static inline bool bpf_token_capable(const struct bpf_token *token, int cap) +{ + return capable(cap) || (cap != CAP_SYS_ADMIN && capable(CAP_SYS_ADMIN)); +} + +static inline void bpf_token_inc(struct bpf_token *token) +{ +} + +static inline void bpf_token_put(struct bpf_token *token) +{ +} + +static inline struct bpf_token *bpf_token_get_from_fd(u32 ufd) +{ + return ERR_PTR(-EOPNOTSUPP); +} + static inline void __dev_flush(void) { } diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index e88746ba7d21..d4a567e5bc3c 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -847,6 +847,36 @@ union bpf_iter_link_info { * Returns zero on success. On error, -1 is returned and *errno* * is set appropriately. * + * BPF_TOKEN_CREATE + * Description + * Create BPF token with embedded information about what + * BPF-related functionality it allows: + * - a set of allowed bpf() syscall commands; + * - a set of allowed BPF map types to be created with + * BPF_MAP_CREATE command, if BPF_MAP_CREATE itself is allowed; + * - a set of allowed BPF program types and BPF program attach + * types to be loaded with BPF_PROG_LOAD command, if + * BPF_PROG_LOAD itself is allowed. + * + * BPF token is created (derived) from an instance of BPF FS, + * assuming it has necessary delegation mount options specified. + * This BPF token can be passed as an extra parameter to various + * bpf() syscall commands to grant BPF subsystem functionality to + * unprivileged processes. + * + * When created, BPF token is "associated" with the owning + * user namespace of BPF FS instance (super block) that it was + * derived from, and subsequent BPF operations performed with + * BPF token would be performing capabilities checks (i.e., + * CAP_BPF, CAP_PERFMON, CAP_NET_ADMIN, CAP_SYS_ADMIN) within + * that user namespace. Without BPF token, such capabilities + * have to be granted in init user namespace, making bpf() + * syscall incompatible with user namespace, for the most part. + * + * Return + * A new file descriptor (a nonnegative integer), or -1 if an + * error occurred (in which case, *errno* is set appropriately). + * * NOTES * eBPF objects (maps and programs) can be shared between processes. * @@ -901,6 +931,8 @@ enum bpf_cmd { BPF_ITER_CREATE, BPF_LINK_DETACH, BPF_PROG_BIND_MAP, + BPF_TOKEN_CREATE, + __MAX_BPF_CMD, }; enum bpf_map_type { @@ -1712,6 +1744,11 @@ union bpf_attr { __u32 flags; /* extra flags */ } prog_bind_map; + struct { /* struct used by BPF_TOKEN_CREATE command */ + __u32 flags; + __u32 bpffs_fd; + } token_create; + } __attribute__((aligned(8))); /* The description below is an attempt at providing documentation to eBPF diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile index f526b7573e97..4ce95acfcaa7 100644 --- a/kernel/bpf/Makefile +++ b/kernel/bpf/Makefile @@ -6,7 +6,7 @@ cflags-nogcse-$(CONFIG_X86)$(CONFIG_CC_IS_GCC) := -fno-gcse endif CFLAGS_core.o += $(call cc-disable-warning, override-init) $(cflags-nogcse-yy) -obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o log.o +obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o log.o token.o obj-$(CONFIG_BPF_SYSCALL) += bpf_iter.o map_iter.o task_iter.o prog_iter.o link_iter.o obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o bpf_lru_list.o lpm_trie.o map_in_map.o bloom_filter.o obj-$(CONFIG_BPF_SYSCALL) += local_storage.o queue_stack_maps.o ringbuf.o diff --git a/kernel/bpf/inode.c b/kernel/bpf/inode.c index 220fe0f99095..6ce3f9696e72 100644 --- a/kernel/bpf/inode.c +++ b/kernel/bpf/inode.c @@ -99,9 +99,9 @@ static const struct inode_operations bpf_prog_iops = { }; static const struct inode_operations bpf_map_iops = { }; static const struct inode_operations bpf_link_iops = { }; -static struct inode *bpf_get_inode(struct super_block *sb, - const struct inode *dir, - umode_t mode) +struct inode *bpf_get_inode(struct super_block *sb, + const struct inode *dir, + umode_t mode) { struct inode *inode; @@ -602,11 +602,13 @@ static int bpf_show_options(struct seq_file *m, struct dentry *root) { struct bpf_mount_opts *opts = root->d_sb->s_fs_info; umode_t mode = d_inode(root)->i_mode & S_IALLUGO & ~S_ISVTX; + u64 mask; if (mode != S_IRWXUGO) seq_printf(m, ",mode=%o", mode); - if (opts->delegate_cmds == ~0ULL) + mask = (1ULL << __MAX_BPF_CMD) - 1; + if ((opts->delegate_cmds & mask) == mask) seq_printf(m, ",delegate_cmds=any"); else if (opts->delegate_cmds) seq_printf(m, ",delegate_cmds=0x%llx", opts->delegate_cmds); @@ -639,7 +641,7 @@ static void bpf_free_inode(struct inode *inode) free_inode_nonrcu(inode); } -static const struct super_operations bpf_super_ops = { +const struct super_operations bpf_super_ops = { .statfs = simple_statfs, .drop_inode = generic_delete_inode, .show_options = bpf_show_options, diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index ee33a52abf18..a156d549b356 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -5377,6 +5377,20 @@ out_prog_put: return ret; } +#define BPF_TOKEN_CREATE_LAST_FIELD token_create.bpffs_fd + +static int token_create(union bpf_attr *attr) +{ + if (CHECK_ATTR(BPF_TOKEN_CREATE)) + return -EINVAL; + + /* no flags are supported yet */ + if (attr->token_create.flags) + return -EINVAL; + + return bpf_token_create(attr); +} + static int __sys_bpf(int cmd, bpfptr_t uattr, unsigned int size) { union bpf_attr attr; @@ -5510,6 +5524,9 @@ static int __sys_bpf(int cmd, bpfptr_t uattr, unsigned int size) case BPF_PROG_BIND_MAP: err = bpf_prog_bind_map(&attr); break; + case BPF_TOKEN_CREATE: + err = token_create(&attr); + break; default: err = -EINVAL; break; diff --git a/kernel/bpf/token.c b/kernel/bpf/token.c new file mode 100644 index 000000000000..e18aaecc67e9 --- /dev/null +++ b/kernel/bpf/token.c @@ -0,0 +1,214 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +bool bpf_token_capable(const struct bpf_token *token, int cap) +{ + /* BPF token allows ns_capable() level of capabilities, but only if + * token's userns is *exactly* the same as current user's userns + */ + if (token && current_user_ns() == token->userns) { + if (ns_capable(token->userns, cap)) + return true; + if (cap != CAP_SYS_ADMIN && ns_capable(token->userns, CAP_SYS_ADMIN)) + return true; + } + /* otherwise fallback to capable() checks */ + return capable(cap) || (cap != CAP_SYS_ADMIN && capable(CAP_SYS_ADMIN)); +} + +void bpf_token_inc(struct bpf_token *token) +{ + atomic64_inc(&token->refcnt); +} + +static void bpf_token_free(struct bpf_token *token) +{ + put_user_ns(token->userns); + kvfree(token); +} + +static void bpf_token_put_deferred(struct work_struct *work) +{ + struct bpf_token *token = container_of(work, struct bpf_token, work); + + bpf_token_free(token); +} + +void bpf_token_put(struct bpf_token *token) +{ + if (!token) + return; + + if (!atomic64_dec_and_test(&token->refcnt)) + return; + + INIT_WORK(&token->work, bpf_token_put_deferred); + schedule_work(&token->work); +} + +static int bpf_token_release(struct inode *inode, struct file *filp) +{ + struct bpf_token *token = filp->private_data; + + bpf_token_put(token); + return 0; +} + +static void bpf_token_show_fdinfo(struct seq_file *m, struct file *filp) +{ + struct bpf_token *token = filp->private_data; + u64 mask; + + BUILD_BUG_ON(__MAX_BPF_CMD >= 64); + mask = (1ULL << __MAX_BPF_CMD) - 1; + if ((token->allowed_cmds & mask) == mask) + seq_printf(m, "allowed_cmds:\tany\n"); + else + seq_printf(m, "allowed_cmds:\t0x%llx\n", token->allowed_cmds); +} + +#define BPF_TOKEN_INODE_NAME "bpf-token" + +static const struct inode_operations bpf_token_iops = { }; + +static const struct file_operations bpf_token_fops = { + .release = bpf_token_release, + .show_fdinfo = bpf_token_show_fdinfo, +}; + +int bpf_token_create(union bpf_attr *attr) +{ + struct bpf_mount_opts *mnt_opts; + struct bpf_token *token = NULL; + struct user_namespace *userns; + struct inode *inode; + struct file *file; + struct path path; + struct fd f; + umode_t mode; + int err, fd; + + f = fdget(attr->token_create.bpffs_fd); + if (!f.file) + return -EBADF; + + path = f.file->f_path; + path_get(&path); + fdput(f); + + if (path.dentry != path.mnt->mnt_sb->s_root) { + err = -EINVAL; + goto out_path; + } + if (path.mnt->mnt_sb->s_op != &bpf_super_ops) { + err = -EINVAL; + goto out_path; + } + err = path_permission(&path, MAY_ACCESS); + if (err) + goto out_path; + + userns = path.dentry->d_sb->s_user_ns; + /* + * Enforce that creators of BPF tokens are in the same user + * namespace as the BPF FS instance. This makes reasoning about + * permissions a lot easier and we can always relax this later. + */ + if (current_user_ns() != userns) { + err = -EPERM; + goto out_path; + } + if (!ns_capable(userns, CAP_BPF)) { + err = -EPERM; + goto out_path; + } + + mode = S_IFREG | ((S_IRUSR | S_IWUSR) & ~current_umask()); + inode = bpf_get_inode(path.mnt->mnt_sb, NULL, mode); + if (IS_ERR(inode)) { + err = PTR_ERR(inode); + goto out_path; + } + + inode->i_op = &bpf_token_iops; + inode->i_fop = &bpf_token_fops; + clear_nlink(inode); /* make sure it is unlinked */ + + file = alloc_file_pseudo(inode, path.mnt, BPF_TOKEN_INODE_NAME, O_RDWR, &bpf_token_fops); + if (IS_ERR(file)) { + iput(inode); + err = PTR_ERR(file); + goto out_path; + } + + token = kvzalloc(sizeof(*token), GFP_USER); + if (!token) { + err = -ENOMEM; + goto out_file; + } + + atomic64_set(&token->refcnt, 1); + + /* remember bpffs owning userns for future ns_capable() checks */ + token->userns = get_user_ns(userns); + + mnt_opts = path.dentry->d_sb->s_fs_info; + token->allowed_cmds = mnt_opts->delegate_cmds; + + fd = get_unused_fd_flags(O_CLOEXEC); + if (fd < 0) { + err = fd; + goto out_token; + } + + file->private_data = token; + fd_install(fd, file); + + path_put(&path); + return fd; + +out_token: + bpf_token_free(token); +out_file: + fput(file); +out_path: + path_put(&path); + return err; +} + +struct bpf_token *bpf_token_get_from_fd(u32 ufd) +{ + struct fd f = fdget(ufd); + struct bpf_token *token; + + if (!f.file) + return ERR_PTR(-EBADF); + if (f.file->f_op != &bpf_token_fops) { + fdput(f); + return ERR_PTR(-EINVAL); + } + + token = f.file->private_data; + bpf_token_inc(token); + fdput(f); + + return token; +} + +bool bpf_token_allow_cmd(const struct bpf_token *token, enum bpf_cmd cmd) +{ + /* BPF token can be used only within exactly the same userns in which + * it was created + */ + if (!token || current_user_ns() != token->userns) + return false; + + return token->allowed_cmds & (1ULL << cmd); +} diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index e88746ba7d21..d4a567e5bc3c 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -847,6 +847,36 @@ union bpf_iter_link_info { * Returns zero on success. On error, -1 is returned and *errno* * is set appropriately. * + * BPF_TOKEN_CREATE + * Description + * Create BPF token with embedded information about what + * BPF-related functionality it allows: + * - a set of allowed bpf() syscall commands; + * - a set of allowed BPF map types to be created with + * BPF_MAP_CREATE command, if BPF_MAP_CREATE itself is allowed; + * - a set of allowed BPF program types and BPF program attach + * types to be loaded with BPF_PROG_LOAD command, if + * BPF_PROG_LOAD itself is allowed. + * + * BPF token is created (derived) from an instance of BPF FS, + * assuming it has necessary delegation mount options specified. + * This BPF token can be passed as an extra parameter to various + * bpf() syscall commands to grant BPF subsystem functionality to + * unprivileged processes. + * + * When created, BPF token is "associated" with the owning + * user namespace of BPF FS instance (super block) that it was + * derived from, and subsequent BPF operations performed with + * BPF token would be performing capabilities checks (i.e., + * CAP_BPF, CAP_PERFMON, CAP_NET_ADMIN, CAP_SYS_ADMIN) within + * that user namespace. Without BPF token, such capabilities + * have to be granted in init user namespace, making bpf() + * syscall incompatible with user namespace, for the most part. + * + * Return + * A new file descriptor (a nonnegative integer), or -1 if an + * error occurred (in which case, *errno* is set appropriately). + * * NOTES * eBPF objects (maps and programs) can be shared between processes. * @@ -901,6 +931,8 @@ enum bpf_cmd { BPF_ITER_CREATE, BPF_LINK_DETACH, BPF_PROG_BIND_MAP, + BPF_TOKEN_CREATE, + __MAX_BPF_CMD, }; enum bpf_map_type { @@ -1712,6 +1744,11 @@ union bpf_attr { __u32 flags; /* extra flags */ } prog_bind_map; + struct { /* struct used by BPF_TOKEN_CREATE command */ + __u32 flags; + __u32 bpffs_fd; + } token_create; + } __attribute__((aligned(8))); /* The description below is an attempt at providing documentation to eBPF -- cgit v1.2.3 From 688b7270b3cb75e8ac78123d719967db40336e5b Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Thu, 30 Nov 2023 10:52:16 -0800 Subject: bpf: add BPF token support to BPF_MAP_CREATE command Allow providing token_fd for BPF_MAP_CREATE command to allow controlled BPF map creation from unprivileged process through delegated BPF token. Wire through a set of allowed BPF map types to BPF token, derived from BPF FS at BPF token creation time. This, in combination with allowed_cmds allows to create a narrowly-focused BPF token (controlled by privileged agent) with a restrictive set of BPF maps that application can attempt to create. Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20231130185229.2688956-5-andrii@kernel.org Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 2 + include/uapi/linux/bpf.h | 2 + kernel/bpf/inode.c | 3 +- kernel/bpf/syscall.c | 52 ++++++++++++++++------ kernel/bpf/token.c | 16 +++++++ tools/include/uapi/linux/bpf.h | 2 + .../selftests/bpf/prog_tests/libbpf_probes.c | 2 + .../testing/selftests/bpf/prog_tests/libbpf_str.c | 3 ++ 8 files changed, 67 insertions(+), 15 deletions(-) (limited to 'tools') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index aa9cf8e5fab1..e08e8436df38 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1600,6 +1600,7 @@ struct bpf_token { atomic64_t refcnt; struct user_namespace *userns; u64 allowed_cmds; + u64 allowed_maps; }; struct bpf_struct_ops_value; @@ -2236,6 +2237,7 @@ int bpf_token_create(union bpf_attr *attr); struct bpf_token *bpf_token_get_from_fd(u32 ufd); bool bpf_token_allow_cmd(const struct bpf_token *token, enum bpf_cmd cmd); +bool bpf_token_allow_map_type(const struct bpf_token *token, enum bpf_map_type type); int bpf_obj_pin_user(u32 ufd, int path_fd, const char __user *pathname); int bpf_obj_get_user(int path_fd, const char __user *pathname, int flags); diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index d4a567e5bc3c..0bba3392b17a 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -983,6 +983,7 @@ enum bpf_map_type { BPF_MAP_TYPE_BLOOM_FILTER, BPF_MAP_TYPE_USER_RINGBUF, BPF_MAP_TYPE_CGRP_STORAGE, + __MAX_BPF_MAP_TYPE }; /* Note that tracing related programs such as @@ -1433,6 +1434,7 @@ union bpf_attr { * to using 5 hash functions). */ __u64 map_extra; + __u32 map_token_fd; }; struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */ diff --git a/kernel/bpf/inode.c b/kernel/bpf/inode.c index 6ce3f9696e72..9c7865d1c53d 100644 --- a/kernel/bpf/inode.c +++ b/kernel/bpf/inode.c @@ -613,7 +613,8 @@ static int bpf_show_options(struct seq_file *m, struct dentry *root) else if (opts->delegate_cmds) seq_printf(m, ",delegate_cmds=0x%llx", opts->delegate_cmds); - if (opts->delegate_maps == ~0ULL) + mask = (1ULL << __MAX_BPF_MAP_TYPE) - 1; + if ((opts->delegate_maps & mask) == mask) seq_printf(m, ",delegate_maps=any"); else if (opts->delegate_maps) seq_printf(m, ",delegate_maps=0x%llx", opts->delegate_maps); diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index a156d549b356..22e14124cd61 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -1009,8 +1009,8 @@ int map_check_no_btf(const struct bpf_map *map, return -ENOTSUPP; } -static int map_check_btf(struct bpf_map *map, const struct btf *btf, - u32 btf_key_id, u32 btf_value_id) +static int map_check_btf(struct bpf_map *map, struct bpf_token *token, + const struct btf *btf, u32 btf_key_id, u32 btf_value_id) { const struct btf_type *key_type, *value_type; u32 key_size, value_size; @@ -1038,7 +1038,7 @@ static int map_check_btf(struct bpf_map *map, const struct btf *btf, if (!IS_ERR_OR_NULL(map->record)) { int i; - if (!bpf_capable()) { + if (!bpf_token_capable(token, CAP_BPF)) { ret = -EPERM; goto free_map_tab; } @@ -1126,11 +1126,12 @@ static bool bpf_net_capable(void) return capable(CAP_NET_ADMIN) || capable(CAP_SYS_ADMIN); } -#define BPF_MAP_CREATE_LAST_FIELD map_extra +#define BPF_MAP_CREATE_LAST_FIELD map_token_fd /* called via syscall */ static int map_create(union bpf_attr *attr) { const struct bpf_map_ops *ops; + struct bpf_token *token = NULL; int numa_node = bpf_map_attr_numa_node(attr); u32 map_type = attr->map_type; struct bpf_map *map; @@ -1181,14 +1182,32 @@ static int map_create(union bpf_attr *attr) if (!ops->map_mem_usage) return -EINVAL; + if (attr->map_token_fd) { + token = bpf_token_get_from_fd(attr->map_token_fd); + if (IS_ERR(token)) + return PTR_ERR(token); + + /* if current token doesn't grant map creation permissions, + * then we can't use this token, so ignore it and rely on + * system-wide capabilities checks + */ + if (!bpf_token_allow_cmd(token, BPF_MAP_CREATE) || + !bpf_token_allow_map_type(token, attr->map_type)) { + bpf_token_put(token); + token = NULL; + } + } + + err = -EPERM; + /* Intent here is for unprivileged_bpf_disabled to block BPF map * creation for unprivileged users; other actions depend * on fd availability and access to bpffs, so are dependent on * object creation success. Even with unprivileged BPF disabled, * capability checks are still carried out. */ - if (sysctl_unprivileged_bpf_disabled && !bpf_capable()) - return -EPERM; + if (sysctl_unprivileged_bpf_disabled && !bpf_token_capable(token, CAP_BPF)) + goto put_token; /* check privileged map type permissions */ switch (map_type) { @@ -1221,25 +1240,27 @@ static int map_create(union bpf_attr *attr) case BPF_MAP_TYPE_LRU_PERCPU_HASH: case BPF_MAP_TYPE_STRUCT_OPS: case BPF_MAP_TYPE_CPUMAP: - if (!bpf_capable()) - return -EPERM; + if (!bpf_token_capable(token, CAP_BPF)) + goto put_token; break; case BPF_MAP_TYPE_SOCKMAP: case BPF_MAP_TYPE_SOCKHASH: case BPF_MAP_TYPE_DEVMAP: case BPF_MAP_TYPE_DEVMAP_HASH: case BPF_MAP_TYPE_XSKMAP: - if (!bpf_net_capable()) - return -EPERM; + if (!bpf_token_capable(token, CAP_NET_ADMIN)) + goto put_token; break; default: WARN(1, "unsupported map type %d", map_type); - return -EPERM; + goto put_token; } map = ops->map_alloc(attr); - if (IS_ERR(map)) - return PTR_ERR(map); + if (IS_ERR(map)) { + err = PTR_ERR(map); + goto put_token; + } map->ops = ops; map->map_type = map_type; @@ -1276,7 +1297,7 @@ static int map_create(union bpf_attr *attr) map->btf = btf; if (attr->btf_value_type_id) { - err = map_check_btf(map, btf, attr->btf_key_type_id, + err = map_check_btf(map, token, btf, attr->btf_key_type_id, attr->btf_value_type_id); if (err) goto free_map; @@ -1297,6 +1318,7 @@ static int map_create(union bpf_attr *attr) goto free_map_sec; bpf_map_save_memcg(map); + bpf_token_put(token); err = bpf_map_new_fd(map, f_flags); if (err < 0) { @@ -1317,6 +1339,8 @@ free_map_sec: free_map: btf_put(map->btf); map->ops->map_free(map); +put_token: + bpf_token_put(token); return err; } diff --git a/kernel/bpf/token.c b/kernel/bpf/token.c index e18aaecc67e9..06c34dae658e 100644 --- a/kernel/bpf/token.c +++ b/kernel/bpf/token.c @@ -72,6 +72,13 @@ static void bpf_token_show_fdinfo(struct seq_file *m, struct file *filp) seq_printf(m, "allowed_cmds:\tany\n"); else seq_printf(m, "allowed_cmds:\t0x%llx\n", token->allowed_cmds); + + BUILD_BUG_ON(__MAX_BPF_MAP_TYPE >= 64); + mask = (1ULL << __MAX_BPF_MAP_TYPE) - 1; + if ((token->allowed_maps & mask) == mask) + seq_printf(m, "allowed_maps:\tany\n"); + else + seq_printf(m, "allowed_maps:\t0x%llx\n", token->allowed_maps); } #define BPF_TOKEN_INODE_NAME "bpf-token" @@ -161,6 +168,7 @@ int bpf_token_create(union bpf_attr *attr) mnt_opts = path.dentry->d_sb->s_fs_info; token->allowed_cmds = mnt_opts->delegate_cmds; + token->allowed_maps = mnt_opts->delegate_maps; fd = get_unused_fd_flags(O_CLOEXEC); if (fd < 0) { @@ -212,3 +220,11 @@ bool bpf_token_allow_cmd(const struct bpf_token *token, enum bpf_cmd cmd) return token->allowed_cmds & (1ULL << cmd); } + +bool bpf_token_allow_map_type(const struct bpf_token *token, enum bpf_map_type type) +{ + if (!token || type >= __MAX_BPF_MAP_TYPE) + return false; + + return token->allowed_maps & (1ULL << type); +} diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index d4a567e5bc3c..0bba3392b17a 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -983,6 +983,7 @@ enum bpf_map_type { BPF_MAP_TYPE_BLOOM_FILTER, BPF_MAP_TYPE_USER_RINGBUF, BPF_MAP_TYPE_CGRP_STORAGE, + __MAX_BPF_MAP_TYPE }; /* Note that tracing related programs such as @@ -1433,6 +1434,7 @@ union bpf_attr { * to using 5 hash functions). */ __u64 map_extra; + __u32 map_token_fd; }; struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */ diff --git a/tools/testing/selftests/bpf/prog_tests/libbpf_probes.c b/tools/testing/selftests/bpf/prog_tests/libbpf_probes.c index 9f766ddd946a..573249a2814d 100644 --- a/tools/testing/selftests/bpf/prog_tests/libbpf_probes.c +++ b/tools/testing/selftests/bpf/prog_tests/libbpf_probes.c @@ -68,6 +68,8 @@ void test_libbpf_probe_map_types(void) if (map_type == BPF_MAP_TYPE_UNSPEC) continue; + if (strcmp(map_type_name, "__MAX_BPF_MAP_TYPE") == 0) + continue; if (!test__start_subtest(map_type_name)) continue; diff --git a/tools/testing/selftests/bpf/prog_tests/libbpf_str.c b/tools/testing/selftests/bpf/prog_tests/libbpf_str.c index c440ea3311ed..2a0633f43c73 100644 --- a/tools/testing/selftests/bpf/prog_tests/libbpf_str.c +++ b/tools/testing/selftests/bpf/prog_tests/libbpf_str.c @@ -132,6 +132,9 @@ static void test_libbpf_bpf_map_type_str(void) const char *map_type_str; char buf[256]; + if (map_type == __MAX_BPF_MAP_TYPE) + continue; + map_type_name = btf__str_by_offset(btf, e->name_off); map_type_str = libbpf_bpf_map_type_str(map_type); ASSERT_OK_PTR(map_type_str, map_type_name); -- cgit v1.2.3 From ee54b1a910e4d49c9a104f31ae3f5b979131adf8 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Thu, 30 Nov 2023 10:52:17 -0800 Subject: bpf: add BPF token support to BPF_BTF_LOAD command Accept BPF token FD in BPF_BTF_LOAD command to allow BTF data loading through delegated BPF token. BTF loading is a pretty straightforward operation, so as long as BPF token is created with allow_cmds granting BPF_BTF_LOAD command, kernel proceeds to parsing BTF data and creating BTF object. Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20231130185229.2688956-6-andrii@kernel.org Signed-off-by: Alexei Starovoitov --- include/uapi/linux/bpf.h | 1 + kernel/bpf/syscall.c | 20 ++++++++++++++++++-- tools/include/uapi/linux/bpf.h | 1 + 3 files changed, 20 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 0bba3392b17a..9f9989e0d062 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1616,6 +1616,7 @@ union bpf_attr { * truncated), or smaller (if log buffer wasn't filled completely). */ __u32 btf_log_true_size; + __u32 btf_token_fd; }; struct { diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 22e14124cd61..d87c5c27cde3 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -4777,15 +4777,31 @@ static int bpf_obj_get_info_by_fd(const union bpf_attr *attr, return err; } -#define BPF_BTF_LOAD_LAST_FIELD btf_log_true_size +#define BPF_BTF_LOAD_LAST_FIELD btf_token_fd static int bpf_btf_load(const union bpf_attr *attr, bpfptr_t uattr, __u32 uattr_size) { + struct bpf_token *token = NULL; + if (CHECK_ATTR(BPF_BTF_LOAD)) return -EINVAL; - if (!bpf_capable()) + if (attr->btf_token_fd) { + token = bpf_token_get_from_fd(attr->btf_token_fd); + if (IS_ERR(token)) + return PTR_ERR(token); + if (!bpf_token_allow_cmd(token, BPF_BTF_LOAD)) { + bpf_token_put(token); + token = NULL; + } + } + + if (!bpf_token_capable(token, CAP_BPF)) { + bpf_token_put(token); return -EPERM; + } + + bpf_token_put(token); return btf_new_fd(attr, uattr, uattr_size); } diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 0bba3392b17a..9f9989e0d062 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -1616,6 +1616,7 @@ union bpf_attr { * truncated), or smaller (if log buffer wasn't filled completely). */ __u32 btf_log_true_size; + __u32 btf_token_fd; }; struct { -- cgit v1.2.3 From e1cef620f598853a90f17701fcb1057a6768f7b8 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Thu, 30 Nov 2023 10:52:18 -0800 Subject: bpf: add BPF token support to BPF_PROG_LOAD command Add basic support of BPF token to BPF_PROG_LOAD. Wire through a set of allowed BPF program types and attach types, derived from BPF FS at BPF token creation time. Then make sure we perform bpf_token_capable() checks everywhere where it's relevant. Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20231130185229.2688956-7-andrii@kernel.org Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 6 ++ include/uapi/linux/bpf.h | 2 + kernel/bpf/core.c | 1 + kernel/bpf/inode.c | 6 +- kernel/bpf/syscall.c | 87 ++++++++++++++++------ kernel/bpf/token.c | 27 +++++++ tools/include/uapi/linux/bpf.h | 2 + .../selftests/bpf/prog_tests/libbpf_probes.c | 2 + .../testing/selftests/bpf/prog_tests/libbpf_str.c | 3 + 9 files changed, 110 insertions(+), 26 deletions(-) (limited to 'tools') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index e08e8436df38..20af87b59d70 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1461,6 +1461,7 @@ struct bpf_prog_aux { #ifdef CONFIG_SECURITY void *security; #endif + struct bpf_token *token; struct bpf_prog_offload *offload; struct btf *btf; struct bpf_func_info *func_info; @@ -1601,6 +1602,8 @@ struct bpf_token { struct user_namespace *userns; u64 allowed_cmds; u64 allowed_maps; + u64 allowed_progs; + u64 allowed_attachs; }; struct bpf_struct_ops_value; @@ -2238,6 +2241,9 @@ struct bpf_token *bpf_token_get_from_fd(u32 ufd); bool bpf_token_allow_cmd(const struct bpf_token *token, enum bpf_cmd cmd); bool bpf_token_allow_map_type(const struct bpf_token *token, enum bpf_map_type type); +bool bpf_token_allow_prog_type(const struct bpf_token *token, + enum bpf_prog_type prog_type, + enum bpf_attach_type attach_type); int bpf_obj_pin_user(u32 ufd, int path_fd, const char __user *pathname); int bpf_obj_get_user(int path_fd, const char __user *pathname, int flags); diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 9f9989e0d062..4df2d025c784 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1028,6 +1028,7 @@ enum bpf_prog_type { BPF_PROG_TYPE_SK_LOOKUP, BPF_PROG_TYPE_SYSCALL, /* a program that can execute syscalls */ BPF_PROG_TYPE_NETFILTER, + __MAX_BPF_PROG_TYPE }; enum bpf_attach_type { @@ -1504,6 +1505,7 @@ union bpf_attr { * truncated), or smaller (if log buffer wasn't filled completely). */ __u32 log_true_size; + __u32 prog_token_fd; }; struct { /* anonymous struct used by BPF_OBJ_* commands */ diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 4b813da8d6c0..47085839af8d 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -2751,6 +2751,7 @@ void bpf_prog_free(struct bpf_prog *fp) if (aux->dst_prog) bpf_prog_put(aux->dst_prog); + bpf_token_put(aux->token); INIT_WORK(&aux->work, bpf_prog_free_deferred); schedule_work(&aux->work); } diff --git a/kernel/bpf/inode.c b/kernel/bpf/inode.c index 9c7865d1c53d..5359a0929c35 100644 --- a/kernel/bpf/inode.c +++ b/kernel/bpf/inode.c @@ -619,12 +619,14 @@ static int bpf_show_options(struct seq_file *m, struct dentry *root) else if (opts->delegate_maps) seq_printf(m, ",delegate_maps=0x%llx", opts->delegate_maps); - if (opts->delegate_progs == ~0ULL) + mask = (1ULL << __MAX_BPF_PROG_TYPE) - 1; + if ((opts->delegate_progs & mask) == mask) seq_printf(m, ",delegate_progs=any"); else if (opts->delegate_progs) seq_printf(m, ",delegate_progs=0x%llx", opts->delegate_progs); - if (opts->delegate_attachs == ~0ULL) + mask = (1ULL << __MAX_BPF_ATTACH_TYPE) - 1; + if ((opts->delegate_attachs & mask) == mask) seq_printf(m, ",delegate_attachs=any"); else if (opts->delegate_attachs) seq_printf(m, ",delegate_attachs=0x%llx", opts->delegate_attachs); diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index d87c5c27cde3..2c8393c21b8c 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -2608,13 +2608,15 @@ static bool is_perfmon_prog_type(enum bpf_prog_type prog_type) } /* last field in 'union bpf_attr' used by this command */ -#define BPF_PROG_LOAD_LAST_FIELD log_true_size +#define BPF_PROG_LOAD_LAST_FIELD prog_token_fd static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size) { enum bpf_prog_type type = attr->prog_type; struct bpf_prog *prog, *dst_prog = NULL; struct btf *attach_btf = NULL; + struct bpf_token *token = NULL; + bool bpf_cap; int err; char license[128]; @@ -2631,10 +2633,31 @@ static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size) BPF_F_TEST_REG_INVARIANTS)) return -EINVAL; + bpf_prog_load_fixup_attach_type(attr); + + if (attr->prog_token_fd) { + token = bpf_token_get_from_fd(attr->prog_token_fd); + if (IS_ERR(token)) + return PTR_ERR(token); + /* if current token doesn't grant prog loading permissions, + * then we can't use this token, so ignore it and rely on + * system-wide capabilities checks + */ + if (!bpf_token_allow_cmd(token, BPF_PROG_LOAD) || + !bpf_token_allow_prog_type(token, attr->prog_type, + attr->expected_attach_type)) { + bpf_token_put(token); + token = NULL; + } + } + + bpf_cap = bpf_token_capable(token, CAP_BPF); + err = -EPERM; + if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && (attr->prog_flags & BPF_F_ANY_ALIGNMENT) && - !bpf_capable()) - return -EPERM; + !bpf_cap) + goto put_token; /* Intent here is for unprivileged_bpf_disabled to block BPF program * creation for unprivileged users; other actions depend @@ -2643,21 +2666,23 @@ static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size) * capability checks are still carried out for these * and other operations. */ - if (sysctl_unprivileged_bpf_disabled && !bpf_capable()) - return -EPERM; + if (sysctl_unprivileged_bpf_disabled && !bpf_cap) + goto put_token; if (attr->insn_cnt == 0 || - attr->insn_cnt > (bpf_capable() ? BPF_COMPLEXITY_LIMIT_INSNS : BPF_MAXINSNS)) - return -E2BIG; + attr->insn_cnt > (bpf_cap ? BPF_COMPLEXITY_LIMIT_INSNS : BPF_MAXINSNS)) { + err = -E2BIG; + goto put_token; + } if (type != BPF_PROG_TYPE_SOCKET_FILTER && type != BPF_PROG_TYPE_CGROUP_SKB && - !bpf_capable()) - return -EPERM; + !bpf_cap) + goto put_token; - if (is_net_admin_prog_type(type) && !bpf_net_capable()) - return -EPERM; - if (is_perfmon_prog_type(type) && !perfmon_capable()) - return -EPERM; + if (is_net_admin_prog_type(type) && !bpf_token_capable(token, CAP_NET_ADMIN)) + goto put_token; + if (is_perfmon_prog_type(type) && !bpf_token_capable(token, CAP_PERFMON)) + goto put_token; /* attach_prog_fd/attach_btf_obj_fd can specify fd of either bpf_prog * or btf, we need to check which one it is @@ -2667,27 +2692,33 @@ static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size) if (IS_ERR(dst_prog)) { dst_prog = NULL; attach_btf = btf_get_by_fd(attr->attach_btf_obj_fd); - if (IS_ERR(attach_btf)) - return -EINVAL; + if (IS_ERR(attach_btf)) { + err = -EINVAL; + goto put_token; + } if (!btf_is_kernel(attach_btf)) { /* attaching through specifying bpf_prog's BTF * objects directly might be supported eventually */ btf_put(attach_btf); - return -ENOTSUPP; + err = -ENOTSUPP; + goto put_token; } } } else if (attr->attach_btf_id) { /* fall back to vmlinux BTF, if BTF type ID is specified */ attach_btf = bpf_get_btf_vmlinux(); - if (IS_ERR(attach_btf)) - return PTR_ERR(attach_btf); - if (!attach_btf) - return -EINVAL; + if (IS_ERR(attach_btf)) { + err = PTR_ERR(attach_btf); + goto put_token; + } + if (!attach_btf) { + err = -EINVAL; + goto put_token; + } btf_get(attach_btf); } - bpf_prog_load_fixup_attach_type(attr); if (bpf_prog_load_check_attach(type, attr->expected_attach_type, attach_btf, attr->attach_btf_id, dst_prog)) { @@ -2695,7 +2726,8 @@ static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size) bpf_prog_put(dst_prog); if (attach_btf) btf_put(attach_btf); - return -EINVAL; + err = -EINVAL; + goto put_token; } /* plain bpf_prog allocation */ @@ -2705,7 +2737,8 @@ static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size) bpf_prog_put(dst_prog); if (attach_btf) btf_put(attach_btf); - return -ENOMEM; + err = -EINVAL; + goto put_token; } prog->expected_attach_type = attr->expected_attach_type; @@ -2716,6 +2749,10 @@ static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size) prog->aux->sleepable = attr->prog_flags & BPF_F_SLEEPABLE; prog->aux->xdp_has_frags = attr->prog_flags & BPF_F_XDP_HAS_FRAGS; + /* move token into prog->aux, reuse taken refcnt */ + prog->aux->token = token; + token = NULL; + err = security_bpf_prog_alloc(prog->aux); if (err) goto free_prog; @@ -2817,6 +2854,8 @@ free_prog: if (prog->aux->attach_btf) btf_put(prog->aux->attach_btf); bpf_prog_free(prog); +put_token: + bpf_token_put(token); return err; } @@ -3806,7 +3845,7 @@ static int bpf_prog_attach_check_attach_type(const struct bpf_prog *prog, case BPF_PROG_TYPE_SK_LOOKUP: return attach_type == prog->expected_attach_type ? 0 : -EINVAL; case BPF_PROG_TYPE_CGROUP_SKB: - if (!bpf_net_capable()) + if (!bpf_token_capable(prog->aux->token, CAP_NET_ADMIN)) /* cg-skb progs can be loaded by unpriv user. * check permissions at attach time. */ diff --git a/kernel/bpf/token.c b/kernel/bpf/token.c index 06c34dae658e..5a51e6b8f6bf 100644 --- a/kernel/bpf/token.c +++ b/kernel/bpf/token.c @@ -79,6 +79,20 @@ static void bpf_token_show_fdinfo(struct seq_file *m, struct file *filp) seq_printf(m, "allowed_maps:\tany\n"); else seq_printf(m, "allowed_maps:\t0x%llx\n", token->allowed_maps); + + BUILD_BUG_ON(__MAX_BPF_PROG_TYPE >= 64); + mask = (1ULL << __MAX_BPF_PROG_TYPE) - 1; + if ((token->allowed_progs & mask) == mask) + seq_printf(m, "allowed_progs:\tany\n"); + else + seq_printf(m, "allowed_progs:\t0x%llx\n", token->allowed_progs); + + BUILD_BUG_ON(__MAX_BPF_ATTACH_TYPE >= 64); + mask = (1ULL << __MAX_BPF_ATTACH_TYPE) - 1; + if ((token->allowed_attachs & mask) == mask) + seq_printf(m, "allowed_attachs:\tany\n"); + else + seq_printf(m, "allowed_attachs:\t0x%llx\n", token->allowed_attachs); } #define BPF_TOKEN_INODE_NAME "bpf-token" @@ -169,6 +183,8 @@ int bpf_token_create(union bpf_attr *attr) mnt_opts = path.dentry->d_sb->s_fs_info; token->allowed_cmds = mnt_opts->delegate_cmds; token->allowed_maps = mnt_opts->delegate_maps; + token->allowed_progs = mnt_opts->delegate_progs; + token->allowed_attachs = mnt_opts->delegate_attachs; fd = get_unused_fd_flags(O_CLOEXEC); if (fd < 0) { @@ -228,3 +244,14 @@ bool bpf_token_allow_map_type(const struct bpf_token *token, enum bpf_map_type t return token->allowed_maps & (1ULL << type); } + +bool bpf_token_allow_prog_type(const struct bpf_token *token, + enum bpf_prog_type prog_type, + enum bpf_attach_type attach_type) +{ + if (!token || prog_type >= __MAX_BPF_PROG_TYPE || attach_type >= __MAX_BPF_ATTACH_TYPE) + return false; + + return (token->allowed_progs & (1ULL << prog_type)) && + (token->allowed_attachs & (1ULL << attach_type)); +} diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 9f9989e0d062..4df2d025c784 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -1028,6 +1028,7 @@ enum bpf_prog_type { BPF_PROG_TYPE_SK_LOOKUP, BPF_PROG_TYPE_SYSCALL, /* a program that can execute syscalls */ BPF_PROG_TYPE_NETFILTER, + __MAX_BPF_PROG_TYPE }; enum bpf_attach_type { @@ -1504,6 +1505,7 @@ union bpf_attr { * truncated), or smaller (if log buffer wasn't filled completely). */ __u32 log_true_size; + __u32 prog_token_fd; }; struct { /* anonymous struct used by BPF_OBJ_* commands */ diff --git a/tools/testing/selftests/bpf/prog_tests/libbpf_probes.c b/tools/testing/selftests/bpf/prog_tests/libbpf_probes.c index 573249a2814d..4ed46ed58a7b 100644 --- a/tools/testing/selftests/bpf/prog_tests/libbpf_probes.c +++ b/tools/testing/selftests/bpf/prog_tests/libbpf_probes.c @@ -30,6 +30,8 @@ void test_libbpf_probe_prog_types(void) if (prog_type == BPF_PROG_TYPE_UNSPEC) continue; + if (strcmp(prog_type_name, "__MAX_BPF_PROG_TYPE") == 0) + continue; if (!test__start_subtest(prog_type_name)) continue; diff --git a/tools/testing/selftests/bpf/prog_tests/libbpf_str.c b/tools/testing/selftests/bpf/prog_tests/libbpf_str.c index 2a0633f43c73..384bc1f7a65e 100644 --- a/tools/testing/selftests/bpf/prog_tests/libbpf_str.c +++ b/tools/testing/selftests/bpf/prog_tests/libbpf_str.c @@ -189,6 +189,9 @@ static void test_libbpf_bpf_prog_type_str(void) const char *prog_type_str; char buf[256]; + if (prog_type == __MAX_BPF_PROG_TYPE) + continue; + prog_type_name = btf__str_by_offset(btf, e->name_off); prog_type_str = libbpf_bpf_prog_type_str(prog_type); ASSERT_OK_PTR(prog_type_str, prog_type_name); -- cgit v1.2.3 From ecd435143eb03611e25694141bf59d1c04ad5b9e Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Thu, 30 Nov 2023 10:52:24 -0800 Subject: libbpf: add bpf_token_create() API Add low-level wrapper API for BPF_TOKEN_CREATE command in bpf() syscall. Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20231130185229.2688956-13-andrii@kernel.org Signed-off-by: Alexei Starovoitov --- tools/lib/bpf/bpf.c | 17 +++++++++++++++++ tools/lib/bpf/bpf.h | 24 ++++++++++++++++++++++++ tools/lib/bpf/libbpf.map | 1 + 3 files changed, 42 insertions(+) (limited to 'tools') diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c index 9dc9625651dc..d4019928a864 100644 --- a/tools/lib/bpf/bpf.c +++ b/tools/lib/bpf/bpf.c @@ -1287,3 +1287,20 @@ int bpf_prog_bind_map(int prog_fd, int map_fd, ret = sys_bpf(BPF_PROG_BIND_MAP, &attr, attr_sz); return libbpf_err_errno(ret); } + +int bpf_token_create(int bpffs_fd, struct bpf_token_create_opts *opts) +{ + const size_t attr_sz = offsetofend(union bpf_attr, token_create); + union bpf_attr attr; + int fd; + + if (!OPTS_VALID(opts, bpf_token_create_opts)) + return libbpf_err(-EINVAL); + + memset(&attr, 0, attr_sz); + attr.token_create.bpffs_fd = bpffs_fd; + attr.token_create.flags = OPTS_GET(opts, flags, 0); + + fd = sys_bpf_fd(BPF_TOKEN_CREATE, &attr, attr_sz); + return libbpf_err_errno(fd); +} diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h index d0f53772bdc0..e49254c9f68f 100644 --- a/tools/lib/bpf/bpf.h +++ b/tools/lib/bpf/bpf.h @@ -640,6 +640,30 @@ struct bpf_test_run_opts { LIBBPF_API int bpf_prog_test_run_opts(int prog_fd, struct bpf_test_run_opts *opts); +struct bpf_token_create_opts { + size_t sz; /* size of this struct for forward/backward compatibility */ + __u32 flags; + size_t :0; +}; +#define bpf_token_create_opts__last_field flags + +/** + * @brief **bpf_token_create()** creates a new instance of BPF token derived + * from specified BPF FS mount point. + * + * BPF token created with this API can be passed to bpf() syscall for + * commands like BPF_PROG_LOAD, BPF_MAP_CREATE, etc. + * + * @param bpffs_fd FD for BPF FS instance from which to derive a BPF token + * instance. + * @param opts optional BPF token creation options, can be NULL + * + * @return BPF token FD > 0, on success; negative error code, otherwise (errno + * is also set to the error code) + */ +LIBBPF_API int bpf_token_create(int bpffs_fd, + struct bpf_token_create_opts *opts); + #ifdef __cplusplus } /* extern "C" */ #endif diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map index 91c5aef7dae7..df7657b65c47 100644 --- a/tools/lib/bpf/libbpf.map +++ b/tools/lib/bpf/libbpf.map @@ -401,6 +401,7 @@ LIBBPF_1.3.0 { bpf_program__attach_netkit; bpf_program__attach_tcx; bpf_program__attach_uprobe_multi; + bpf_token_create; ring__avail_data_size; ring__consume; ring__consumer_pos; -- cgit v1.2.3 From 37891cea6699200fb83eae464ebe1c0f73040474 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Thu, 30 Nov 2023 10:52:25 -0800 Subject: libbpf: add BPF token support to bpf_map_create() API Add ability to provide token_fd for BPF_MAP_CREATE command through bpf_map_create() API. Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20231130185229.2688956-14-andrii@kernel.org Signed-off-by: Alexei Starovoitov --- tools/lib/bpf/bpf.c | 4 +++- tools/lib/bpf/bpf.h | 5 ++++- 2 files changed, 7 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c index d4019928a864..1653b64b7015 100644 --- a/tools/lib/bpf/bpf.c +++ b/tools/lib/bpf/bpf.c @@ -169,7 +169,7 @@ int bpf_map_create(enum bpf_map_type map_type, __u32 max_entries, const struct bpf_map_create_opts *opts) { - const size_t attr_sz = offsetofend(union bpf_attr, map_extra); + const size_t attr_sz = offsetofend(union bpf_attr, map_token_fd); union bpf_attr attr; int fd; @@ -198,6 +198,8 @@ int bpf_map_create(enum bpf_map_type map_type, attr.numa_node = OPTS_GET(opts, numa_node, 0); attr.map_ifindex = OPTS_GET(opts, map_ifindex, 0); + attr.map_token_fd = OPTS_GET(opts, token_fd, 0); + fd = sys_bpf_fd(BPF_MAP_CREATE, &attr, attr_sz); return libbpf_err_errno(fd); } diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h index e49254c9f68f..ae2136f596b4 100644 --- a/tools/lib/bpf/bpf.h +++ b/tools/lib/bpf/bpf.h @@ -51,8 +51,11 @@ struct bpf_map_create_opts { __u32 numa_node; __u32 map_ifindex; + + __u32 token_fd; + size_t :0; }; -#define bpf_map_create_opts__last_field map_ifindex +#define bpf_map_create_opts__last_field token_fd LIBBPF_API int bpf_map_create(enum bpf_map_type map_type, const char *map_name, -- cgit v1.2.3 From 1a8df7fa00aac35aff9ef1941c5334b3a01d09e4 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Thu, 30 Nov 2023 10:52:26 -0800 Subject: libbpf: add BPF token support to bpf_btf_load() API Allow user to specify token_fd for bpf_btf_load() API that wraps kernel's BPF_BTF_LOAD command. This allows loading BTF from unprivileged process as long as it has BPF token allowing BPF_BTF_LOAD command, which can be created and delegated by privileged process. Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20231130185229.2688956-15-andrii@kernel.org Signed-off-by: Alexei Starovoitov --- tools/lib/bpf/bpf.c | 4 +++- tools/lib/bpf/bpf.h | 3 ++- 2 files changed, 5 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c index 1653b64b7015..544ae2376b6b 100644 --- a/tools/lib/bpf/bpf.c +++ b/tools/lib/bpf/bpf.c @@ -1184,7 +1184,7 @@ int bpf_raw_tracepoint_open(const char *name, int prog_fd) int bpf_btf_load(const void *btf_data, size_t btf_size, struct bpf_btf_load_opts *opts) { - const size_t attr_sz = offsetofend(union bpf_attr, btf_log_true_size); + const size_t attr_sz = offsetofend(union bpf_attr, btf_token_fd); union bpf_attr attr; char *log_buf; size_t log_size; @@ -1209,6 +1209,8 @@ int bpf_btf_load(const void *btf_data, size_t btf_size, struct bpf_btf_load_opts attr.btf = ptr_to_u64(btf_data); attr.btf_size = btf_size; + attr.btf_token_fd = OPTS_GET(opts, token_fd, 0); + /* log_level == 0 and log_buf != NULL means "try loading without * log_buf, but retry with log_buf and log_level=1 on error", which is * consistent across low-level and high-level BTF and program loading diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h index ae2136f596b4..4b0f25e97b0d 100644 --- a/tools/lib/bpf/bpf.h +++ b/tools/lib/bpf/bpf.h @@ -133,9 +133,10 @@ struct bpf_btf_load_opts { * If kernel doesn't support this feature, log_size is left unchanged. */ __u32 log_true_size; + __u32 token_fd; size_t :0; }; -#define bpf_btf_load_opts__last_field log_true_size +#define bpf_btf_load_opts__last_field token_fd LIBBPF_API int bpf_btf_load(const void *btf_data, size_t btf_size, struct bpf_btf_load_opts *opts); -- cgit v1.2.3 From 1571740a9ba036f26cc5211a86021199987219e8 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Thu, 30 Nov 2023 10:52:27 -0800 Subject: libbpf: add BPF token support to bpf_prog_load() API Wire through token_fd into bpf_prog_load(). Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20231130185229.2688956-16-andrii@kernel.org Signed-off-by: Alexei Starovoitov --- tools/lib/bpf/bpf.c | 3 ++- tools/lib/bpf/bpf.h | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c index 544ae2376b6b..f4e1da3c6d5f 100644 --- a/tools/lib/bpf/bpf.c +++ b/tools/lib/bpf/bpf.c @@ -234,7 +234,7 @@ int bpf_prog_load(enum bpf_prog_type prog_type, const struct bpf_insn *insns, size_t insn_cnt, struct bpf_prog_load_opts *opts) { - const size_t attr_sz = offsetofend(union bpf_attr, log_true_size); + const size_t attr_sz = offsetofend(union bpf_attr, prog_token_fd); void *finfo = NULL, *linfo = NULL; const char *func_info, *line_info; __u32 log_size, log_level, attach_prog_fd, attach_btf_obj_fd; @@ -263,6 +263,7 @@ int bpf_prog_load(enum bpf_prog_type prog_type, attr.prog_flags = OPTS_GET(opts, prog_flags, 0); attr.prog_ifindex = OPTS_GET(opts, prog_ifindex, 0); attr.kern_version = OPTS_GET(opts, kern_version, 0); + attr.prog_token_fd = OPTS_GET(opts, token_fd, 0); if (prog_name && kernel_supports(NULL, FEAT_PROG_NAME)) libbpf_strlcpy(attr.prog_name, prog_name, sizeof(attr.prog_name)); diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h index 4b0f25e97b0d..991b86bfe7e4 100644 --- a/tools/lib/bpf/bpf.h +++ b/tools/lib/bpf/bpf.h @@ -105,9 +105,10 @@ struct bpf_prog_load_opts { * If kernel doesn't support this feature, log_size is left unchanged. */ __u32 log_true_size; + __u32 token_fd; size_t :0; }; -#define bpf_prog_load_opts__last_field log_true_size +#define bpf_prog_load_opts__last_field token_fd LIBBPF_API int bpf_prog_load(enum bpf_prog_type prog_type, const char *prog_name, const char *license, -- cgit v1.2.3 From dc5196fac40c2cb96330bcb98eef868a7fd225b3 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Thu, 30 Nov 2023 10:52:28 -0800 Subject: selftests/bpf: add BPF token-enabled tests Add a selftest that attempts to conceptually replicate intended BPF token use cases inside user namespaced container. Child process is forked. It is then put into its own userns and mountns. Child creates BPF FS context object. This ensures child userns is captured as the owning userns for this instance of BPF FS. Given setting delegation mount options is privileged operation, we ensure that child cannot set them. This context is passed back to privileged parent process through Unix socket, where parent sets up delegation options, creates, and mounts it as a detached mount. This mount FD is passed back to the child to be used for BPF token creation, which allows otherwise privileged BPF operations to succeed inside userns. We validate that all of token-enabled privileged commands (BPF_BTF_LOAD, BPF_MAP_CREATE, and BPF_PROG_LOAD) work as intended. They should only succeed inside the userns if a) BPF token is provided with proper allowed sets of commands and types; and b) namespaces CAP_BPF and other privileges are set. Lacking a) or b) should lead to -EPERM failures. Based on suggested workflow by Christian Brauner ([0]). [0] https://lore.kernel.org/bpf/20230704-hochverdient-lehne-eeb9eeef785e@brauner/ Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20231130185229.2688956-17-andrii@kernel.org Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/prog_tests/token.c | 672 +++++++++++++++++++++++++ 1 file changed, 672 insertions(+) create mode 100644 tools/testing/selftests/bpf/prog_tests/token.c (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/token.c b/tools/testing/selftests/bpf/prog_tests/token.c new file mode 100644 index 000000000000..dc03790c6272 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/token.c @@ -0,0 +1,672 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */ +#define _GNU_SOURCE +#include +#include +#include "cap_helpers.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static inline int sys_mount(const char *dev_name, const char *dir_name, + const char *type, unsigned long flags, + const void *data) +{ + return syscall(__NR_mount, dev_name, dir_name, type, flags, data); +} + +static inline int sys_fsopen(const char *fsname, unsigned flags) +{ + return syscall(__NR_fsopen, fsname, flags); +} + +static inline int sys_fspick(int dfd, const char *path, unsigned flags) +{ + return syscall(__NR_fspick, dfd, path, flags); +} + +static inline int sys_fsconfig(int fs_fd, unsigned cmd, const char *key, const void *val, int aux) +{ + return syscall(__NR_fsconfig, fs_fd, cmd, key, val, aux); +} + +static inline int sys_fsmount(int fs_fd, unsigned flags, unsigned ms_flags) +{ + return syscall(__NR_fsmount, fs_fd, flags, ms_flags); +} + +static int drop_priv_caps(__u64 *old_caps) +{ + return cap_disable_effective((1ULL << CAP_BPF) | + (1ULL << CAP_PERFMON) | + (1ULL << CAP_NET_ADMIN) | + (1ULL << CAP_SYS_ADMIN), old_caps); +} + +static int restore_priv_caps(__u64 old_caps) +{ + return cap_enable_effective(old_caps, NULL); +} + +static int set_delegate_mask(int fs_fd, const char *key, __u64 mask) +{ + char buf[32]; + int err; + + snprintf(buf, sizeof(buf), "0x%llx", (unsigned long long)mask); + err = sys_fsconfig(fs_fd, FSCONFIG_SET_STRING, key, + mask == ~0ULL ? "any" : buf, 0); + if (err < 0) + err = -errno; + return err; +} + +#define zclose(fd) do { if (fd >= 0) close(fd); fd = -1; } while (0) + +struct bpffs_opts { + __u64 cmds; + __u64 maps; + __u64 progs; + __u64 attachs; +}; + +static int create_bpffs_fd(void) +{ + int fs_fd; + + /* create VFS context */ + fs_fd = sys_fsopen("bpf", 0); + ASSERT_GE(fs_fd, 0, "fs_fd"); + + return fs_fd; +} + +static int materialize_bpffs_fd(int fs_fd, struct bpffs_opts *opts) +{ + int mnt_fd, err; + + /* set up token delegation mount options */ + err = set_delegate_mask(fs_fd, "delegate_cmds", opts->cmds); + if (!ASSERT_OK(err, "fs_cfg_cmds")) + return err; + err = set_delegate_mask(fs_fd, "delegate_maps", opts->maps); + if (!ASSERT_OK(err, "fs_cfg_maps")) + return err; + err = set_delegate_mask(fs_fd, "delegate_progs", opts->progs); + if (!ASSERT_OK(err, "fs_cfg_progs")) + return err; + err = set_delegate_mask(fs_fd, "delegate_attachs", opts->attachs); + if (!ASSERT_OK(err, "fs_cfg_attachs")) + return err; + + /* instantiate FS object */ + err = sys_fsconfig(fs_fd, FSCONFIG_CMD_CREATE, NULL, NULL, 0); + if (err < 0) + return -errno; + + /* create O_PATH fd for detached mount */ + mnt_fd = sys_fsmount(fs_fd, 0, 0); + if (err < 0) + return -errno; + + return mnt_fd; +} + +/* send FD over Unix domain (AF_UNIX) socket */ +static int sendfd(int sockfd, int fd) +{ + struct msghdr msg = {}; + struct cmsghdr *cmsg; + int fds[1] = { fd }, err; + char iobuf[1]; + struct iovec io = { + .iov_base = iobuf, + .iov_len = sizeof(iobuf), + }; + union { + char buf[CMSG_SPACE(sizeof(fds))]; + struct cmsghdr align; + } u; + + msg.msg_iov = &io; + msg.msg_iovlen = 1; + msg.msg_control = u.buf; + msg.msg_controllen = sizeof(u.buf); + cmsg = CMSG_FIRSTHDR(&msg); + cmsg->cmsg_level = SOL_SOCKET; + cmsg->cmsg_type = SCM_RIGHTS; + cmsg->cmsg_len = CMSG_LEN(sizeof(fds)); + memcpy(CMSG_DATA(cmsg), fds, sizeof(fds)); + + err = sendmsg(sockfd, &msg, 0); + if (err < 0) + err = -errno; + if (!ASSERT_EQ(err, 1, "sendmsg")) + return -EINVAL; + + return 0; +} + +/* receive FD over Unix domain (AF_UNIX) socket */ +static int recvfd(int sockfd, int *fd) +{ + struct msghdr msg = {}; + struct cmsghdr *cmsg; + int fds[1], err; + char iobuf[1]; + struct iovec io = { + .iov_base = iobuf, + .iov_len = sizeof(iobuf), + }; + union { + char buf[CMSG_SPACE(sizeof(fds))]; + struct cmsghdr align; + } u; + + msg.msg_iov = &io; + msg.msg_iovlen = 1; + msg.msg_control = u.buf; + msg.msg_controllen = sizeof(u.buf); + + err = recvmsg(sockfd, &msg, 0); + if (err < 0) + err = -errno; + if (!ASSERT_EQ(err, 1, "recvmsg")) + return -EINVAL; + + cmsg = CMSG_FIRSTHDR(&msg); + if (!ASSERT_OK_PTR(cmsg, "cmsg_null") || + !ASSERT_EQ(cmsg->cmsg_len, CMSG_LEN(sizeof(fds)), "cmsg_len") || + !ASSERT_EQ(cmsg->cmsg_level, SOL_SOCKET, "cmsg_level") || + !ASSERT_EQ(cmsg->cmsg_type, SCM_RIGHTS, "cmsg_type")) + return -EINVAL; + + memcpy(fds, CMSG_DATA(cmsg), sizeof(fds)); + *fd = fds[0]; + + return 0; +} + +static ssize_t write_nointr(int fd, const void *buf, size_t count) +{ + ssize_t ret; + + do { + ret = write(fd, buf, count); + } while (ret < 0 && errno == EINTR); + + return ret; +} + +static int write_file(const char *path, const void *buf, size_t count) +{ + int fd; + ssize_t ret; + + fd = open(path, O_WRONLY | O_CLOEXEC | O_NOCTTY | O_NOFOLLOW); + if (fd < 0) + return -1; + + ret = write_nointr(fd, buf, count); + close(fd); + if (ret < 0 || (size_t)ret != count) + return -1; + + return 0; +} + +static int create_and_enter_userns(void) +{ + uid_t uid; + gid_t gid; + char map[100]; + + uid = getuid(); + gid = getgid(); + + if (unshare(CLONE_NEWUSER)) + return -1; + + if (write_file("/proc/self/setgroups", "deny", sizeof("deny") - 1) && + errno != ENOENT) + return -1; + + snprintf(map, sizeof(map), "0 %d 1", uid); + if (write_file("/proc/self/uid_map", map, strlen(map))) + return -1; + + + snprintf(map, sizeof(map), "0 %d 1", gid); + if (write_file("/proc/self/gid_map", map, strlen(map))) + return -1; + + if (setgid(0)) + return -1; + + if (setuid(0)) + return -1; + + return 0; +} + +typedef int (*child_callback_fn)(int); + +static void child(int sock_fd, struct bpffs_opts *opts, child_callback_fn callback) +{ + LIBBPF_OPTS(bpf_map_create_opts, map_opts); + int mnt_fd = -1, fs_fd = -1, err = 0, bpffs_fd = -1; + + /* setup userns with root mappings */ + err = create_and_enter_userns(); + if (!ASSERT_OK(err, "create_and_enter_userns")) + goto cleanup; + + /* setup mountns to allow creating BPF FS (fsopen("bpf")) from unpriv process */ + err = unshare(CLONE_NEWNS); + if (!ASSERT_OK(err, "create_mountns")) + goto cleanup; + + err = sys_mount(NULL, "/", NULL, MS_REC | MS_PRIVATE, 0); + if (!ASSERT_OK(err, "remount_root")) + goto cleanup; + + fs_fd = create_bpffs_fd(); + if (!ASSERT_GE(fs_fd, 0, "create_bpffs_fd")) { + err = -EINVAL; + goto cleanup; + } + + /* ensure unprivileged child cannot set delegation options */ + err = set_delegate_mask(fs_fd, "delegate_cmds", 0x1); + ASSERT_EQ(err, -EPERM, "delegate_cmd_eperm"); + err = set_delegate_mask(fs_fd, "delegate_maps", 0x1); + ASSERT_EQ(err, -EPERM, "delegate_maps_eperm"); + err = set_delegate_mask(fs_fd, "delegate_progs", 0x1); + ASSERT_EQ(err, -EPERM, "delegate_progs_eperm"); + err = set_delegate_mask(fs_fd, "delegate_attachs", 0x1); + ASSERT_EQ(err, -EPERM, "delegate_attachs_eperm"); + + /* pass BPF FS context object to parent */ + err = sendfd(sock_fd, fs_fd); + if (!ASSERT_OK(err, "send_fs_fd")) + goto cleanup; + zclose(fs_fd); + + /* avoid mucking around with mount namespaces and mounting at + * well-known path, just get detach-mounted BPF FS fd back from parent + */ + err = recvfd(sock_fd, &mnt_fd); + if (!ASSERT_OK(err, "recv_mnt_fd")) + goto cleanup; + + /* try to fspick() BPF FS and try to add some delegation options */ + fs_fd = sys_fspick(mnt_fd, "", FSPICK_EMPTY_PATH); + if (!ASSERT_GE(fs_fd, 0, "bpffs_fspick")) { + err = -EINVAL; + goto cleanup; + } + + /* ensure unprivileged child cannot reconfigure to set delegation options */ + err = set_delegate_mask(fs_fd, "delegate_cmds", ~0ULL); + if (!ASSERT_EQ(err, -EPERM, "delegate_cmd_eperm_reconfig")) { + err = -EINVAL; + goto cleanup; + } + err = set_delegate_mask(fs_fd, "delegate_maps", ~0ULL); + if (!ASSERT_EQ(err, -EPERM, "delegate_maps_eperm_reconfig")) { + err = -EINVAL; + goto cleanup; + } + err = set_delegate_mask(fs_fd, "delegate_progs", ~0ULL); + if (!ASSERT_EQ(err, -EPERM, "delegate_progs_eperm_reconfig")) { + err = -EINVAL; + goto cleanup; + } + err = set_delegate_mask(fs_fd, "delegate_attachs", ~0ULL); + if (!ASSERT_EQ(err, -EPERM, "delegate_attachs_eperm_reconfig")) { + err = -EINVAL; + goto cleanup; + } + zclose(fs_fd); + + bpffs_fd = openat(mnt_fd, ".", 0, O_RDWR); + if (!ASSERT_GE(bpffs_fd, 0, "bpffs_open")) { + err = -EINVAL; + goto cleanup; + } + + /* do custom test logic with customly set up BPF FS instance */ + err = callback(bpffs_fd); + if (!ASSERT_OK(err, "test_callback")) + goto cleanup; + + err = 0; +cleanup: + zclose(sock_fd); + zclose(mnt_fd); + zclose(fs_fd); + zclose(bpffs_fd); + + exit(-err); +} + +static int wait_for_pid(pid_t pid) +{ + int status, ret; + +again: + ret = waitpid(pid, &status, 0); + if (ret == -1) { + if (errno == EINTR) + goto again; + + return -1; + } + + if (!WIFEXITED(status)) + return -1; + + return WEXITSTATUS(status); +} + +static void parent(int child_pid, struct bpffs_opts *bpffs_opts, int sock_fd) +{ + int fs_fd = -1, mnt_fd = -1, err; + + err = recvfd(sock_fd, &fs_fd); + if (!ASSERT_OK(err, "recv_bpffs_fd")) + goto cleanup; + + mnt_fd = materialize_bpffs_fd(fs_fd, bpffs_opts); + if (!ASSERT_GE(mnt_fd, 0, "materialize_bpffs_fd")) { + err = -EINVAL; + goto cleanup; + } + zclose(fs_fd); + + /* pass BPF FS context object to parent */ + err = sendfd(sock_fd, mnt_fd); + if (!ASSERT_OK(err, "send_mnt_fd")) + goto cleanup; + zclose(mnt_fd); + + err = wait_for_pid(child_pid); + ASSERT_OK(err, "waitpid_child"); + +cleanup: + zclose(sock_fd); + zclose(fs_fd); + zclose(mnt_fd); + + if (child_pid > 0) + (void)kill(child_pid, SIGKILL); +} + +static void subtest_userns(struct bpffs_opts *bpffs_opts, child_callback_fn cb) +{ + int sock_fds[2] = { -1, -1 }; + int child_pid = 0, err; + + err = socketpair(AF_UNIX, SOCK_STREAM, 0, sock_fds); + if (!ASSERT_OK(err, "socketpair")) + goto cleanup; + + child_pid = fork(); + if (!ASSERT_GE(child_pid, 0, "fork")) + goto cleanup; + + if (child_pid == 0) { + zclose(sock_fds[0]); + return child(sock_fds[1], bpffs_opts, cb); + + } else { + zclose(sock_fds[1]); + return parent(child_pid, bpffs_opts, sock_fds[0]); + } + +cleanup: + zclose(sock_fds[0]); + zclose(sock_fds[1]); + if (child_pid > 0) + (void)kill(child_pid, SIGKILL); +} + +static int userns_map_create(int mnt_fd) +{ + LIBBPF_OPTS(bpf_map_create_opts, map_opts); + int err, token_fd = -1, map_fd = -1; + __u64 old_caps = 0; + + /* create BPF token from BPF FS mount */ + token_fd = bpf_token_create(mnt_fd, NULL); + if (!ASSERT_GT(token_fd, 0, "token_create")) { + err = -EINVAL; + goto cleanup; + } + + /* while inside non-init userns, we need both a BPF token *and* + * CAP_BPF inside current userns to create privileged map; let's test + * that neither BPF token alone nor namespaced CAP_BPF is sufficient + */ + err = drop_priv_caps(&old_caps); + if (!ASSERT_OK(err, "drop_caps")) + goto cleanup; + + /* no token, no CAP_BPF -> fail */ + map_opts.token_fd = 0; + map_fd = bpf_map_create(BPF_MAP_TYPE_STACK, "wo_token_wo_bpf", 0, 8, 1, &map_opts); + if (!ASSERT_LT(map_fd, 0, "stack_map_wo_token_wo_cap_bpf_should_fail")) { + err = -EINVAL; + goto cleanup; + } + + /* token without CAP_BPF -> fail */ + map_opts.token_fd = token_fd; + map_fd = bpf_map_create(BPF_MAP_TYPE_STACK, "w_token_wo_bpf", 0, 8, 1, &map_opts); + if (!ASSERT_LT(map_fd, 0, "stack_map_w_token_wo_cap_bpf_should_fail")) { + err = -EINVAL; + goto cleanup; + } + + /* get back effective local CAP_BPF (and CAP_SYS_ADMIN) */ + err = restore_priv_caps(old_caps); + if (!ASSERT_OK(err, "restore_caps")) + goto cleanup; + + /* CAP_BPF without token -> fail */ + map_opts.token_fd = 0; + map_fd = bpf_map_create(BPF_MAP_TYPE_STACK, "wo_token_w_bpf", 0, 8, 1, &map_opts); + if (!ASSERT_LT(map_fd, 0, "stack_map_wo_token_w_cap_bpf_should_fail")) { + err = -EINVAL; + goto cleanup; + } + + /* finally, namespaced CAP_BPF + token -> success */ + map_opts.token_fd = token_fd; + map_fd = bpf_map_create(BPF_MAP_TYPE_STACK, "w_token_w_bpf", 0, 8, 1, &map_opts); + if (!ASSERT_GT(map_fd, 0, "stack_map_w_token_w_cap_bpf")) { + err = -EINVAL; + goto cleanup; + } + +cleanup: + zclose(token_fd); + zclose(map_fd); + return err; +} + +static int userns_btf_load(int mnt_fd) +{ + LIBBPF_OPTS(bpf_btf_load_opts, btf_opts); + int err, token_fd = -1, btf_fd = -1; + const void *raw_btf_data; + struct btf *btf = NULL; + __u32 raw_btf_size; + __u64 old_caps = 0; + + /* create BPF token from BPF FS mount */ + token_fd = bpf_token_create(mnt_fd, NULL); + if (!ASSERT_GT(token_fd, 0, "token_create")) { + err = -EINVAL; + goto cleanup; + } + + /* while inside non-init userns, we need both a BPF token *and* + * CAP_BPF inside current userns to create privileged map; let's test + * that neither BPF token alone nor namespaced CAP_BPF is sufficient + */ + err = drop_priv_caps(&old_caps); + if (!ASSERT_OK(err, "drop_caps")) + goto cleanup; + + /* setup a trivial BTF data to load to the kernel */ + btf = btf__new_empty(); + if (!ASSERT_OK_PTR(btf, "empty_btf")) + goto cleanup; + + ASSERT_GT(btf__add_int(btf, "int", 4, 0), 0, "int_type"); + + raw_btf_data = btf__raw_data(btf, &raw_btf_size); + if (!ASSERT_OK_PTR(raw_btf_data, "raw_btf_data")) + goto cleanup; + + /* no token + no CAP_BPF -> failure */ + btf_opts.token_fd = 0; + btf_fd = bpf_btf_load(raw_btf_data, raw_btf_size, &btf_opts); + if (!ASSERT_LT(btf_fd, 0, "no_token_no_cap_should_fail")) + goto cleanup; + + /* token + no CAP_BPF -> failure */ + btf_opts.token_fd = token_fd; + btf_fd = bpf_btf_load(raw_btf_data, raw_btf_size, &btf_opts); + if (!ASSERT_LT(btf_fd, 0, "token_no_cap_should_fail")) + goto cleanup; + + /* get back effective local CAP_BPF (and CAP_SYS_ADMIN) */ + err = restore_priv_caps(old_caps); + if (!ASSERT_OK(err, "restore_caps")) + goto cleanup; + + /* token + CAP_BPF -> success */ + btf_opts.token_fd = token_fd; + btf_fd = bpf_btf_load(raw_btf_data, raw_btf_size, &btf_opts); + if (!ASSERT_GT(btf_fd, 0, "token_and_cap_success")) + goto cleanup; + + err = 0; +cleanup: + btf__free(btf); + zclose(btf_fd); + zclose(token_fd); + return err; +} + +static int userns_prog_load(int mnt_fd) +{ + LIBBPF_OPTS(bpf_prog_load_opts, prog_opts); + int err, token_fd = -1, prog_fd = -1; + struct bpf_insn insns[] = { + /* bpf_jiffies64() requires CAP_BPF */ + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_jiffies64), + /* bpf_get_current_task() requires CAP_PERFMON */ + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_current_task), + /* r0 = 0; exit; */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }; + size_t insn_cnt = ARRAY_SIZE(insns); + __u64 old_caps = 0; + + /* create BPF token from BPF FS mount */ + token_fd = bpf_token_create(mnt_fd, NULL); + if (!ASSERT_GT(token_fd, 0, "token_create")) { + err = -EINVAL; + goto cleanup; + } + + /* validate we can successfully load BPF program with token; this + * being XDP program (CAP_NET_ADMIN) using bpf_jiffies64() (CAP_BPF) + * and bpf_get_current_task() (CAP_PERFMON) helpers validates we have + * BPF token wired properly in a bunch of places in the kernel + */ + prog_opts.token_fd = token_fd; + prog_opts.expected_attach_type = BPF_XDP; + prog_fd = bpf_prog_load(BPF_PROG_TYPE_XDP, "token_prog", "GPL", + insns, insn_cnt, &prog_opts); + if (!ASSERT_GT(prog_fd, 0, "prog_fd")) { + err = -EPERM; + goto cleanup; + } + + /* no token + caps -> failure */ + prog_opts.token_fd = 0; + prog_fd = bpf_prog_load(BPF_PROG_TYPE_XDP, "token_prog", "GPL", + insns, insn_cnt, &prog_opts); + if (!ASSERT_EQ(prog_fd, -EPERM, "prog_fd_eperm")) { + err = -EPERM; + goto cleanup; + } + + err = drop_priv_caps(&old_caps); + if (!ASSERT_OK(err, "drop_caps")) + goto cleanup; + + /* no caps + token -> failure */ + prog_opts.token_fd = token_fd; + prog_fd = bpf_prog_load(BPF_PROG_TYPE_XDP, "token_prog", "GPL", + insns, insn_cnt, &prog_opts); + if (!ASSERT_EQ(prog_fd, -EPERM, "prog_fd_eperm")) { + err = -EPERM; + goto cleanup; + } + + /* no caps + no token -> definitely a failure */ + prog_opts.token_fd = 0; + prog_fd = bpf_prog_load(BPF_PROG_TYPE_XDP, "token_prog", "GPL", + insns, insn_cnt, &prog_opts); + if (!ASSERT_EQ(prog_fd, -EPERM, "prog_fd_eperm")) { + err = -EPERM; + goto cleanup; + } + + err = 0; +cleanup: + zclose(prog_fd); + zclose(token_fd); + return err; +} + +void test_token(void) +{ + if (test__start_subtest("map_token")) { + struct bpffs_opts opts = { + .cmds = 1ULL << BPF_MAP_CREATE, + .maps = 1ULL << BPF_MAP_TYPE_STACK, + }; + + subtest_userns(&opts, userns_map_create); + } + if (test__start_subtest("btf_token")) { + struct bpffs_opts opts = { + .cmds = 1ULL << BPF_BTF_LOAD, + }; + + subtest_userns(&opts, userns_btf_load); + } + if (test__start_subtest("prog_token")) { + struct bpffs_opts opts = { + .cmds = 1ULL << BPF_PROG_LOAD, + .progs = 1ULL << BPF_PROG_TYPE_XDP, + .attachs = 1ULL << BPF_XDP, + }; + + subtest_userns(&opts, userns_prog_load); + } +} -- cgit v1.2.3 From 0713ab3bd169da82c35eefd012b07b715e4ebcf7 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Wed, 6 Dec 2023 10:35:33 -0800 Subject: perf stat: Exit perf stat if parse groups fails Metrics were added by a callback but commit a4b8cfcabb1d90ec ("perf stat: Delay metric parsing") postponed this to allow optimizations based on the CPU configuration. In doing so it stopped errors in metric parsing from causing 'perf stat' termination. This change adds the termination for bad metric names back in. Fixes: a4b8cfcabb1d90ec ("perf stat: Delay metric parsing") Reported-by: Arnaldo Carvalho de Melo Signed-off-by: Ian Rogers Tested-by: Arnaldo Carvalho de Melo Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Closes: https://lore.kernel.org/lkml/ZXByT1K6enTh2EHT@kernel.org/ Link: https://lore.kernel.org/r/20231206183533.972028-1-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index d8e5d6f7a87a..d22228eddccb 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -2695,15 +2695,19 @@ int cmd_stat(int argc, const char **argv) */ if (metrics) { const char *pmu = parse_events_option_args.pmu_filter ?: "all"; + int ret = metricgroup__parse_groups(evsel_list, pmu, metrics, + stat_config.metric_no_group, + stat_config.metric_no_merge, + stat_config.metric_no_threshold, + stat_config.user_requested_cpu_list, + stat_config.system_wide, + &stat_config.metric_events); - metricgroup__parse_groups(evsel_list, pmu, metrics, - stat_config.metric_no_group, - stat_config.metric_no_merge, - stat_config.metric_no_threshold, - stat_config.user_requested_cpu_list, - stat_config.system_wide, - &stat_config.metric_events); zfree(&metrics); + if (ret) { + status = ret; + goto out; + } } if (add_default_attributes()) -- cgit v1.2.3 From ffed24eff9e0e52d8e74df1c18db8ed43b4666e6 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 6 Dec 2023 09:30:41 +0100 Subject: selftests/bpf: Add test for early update in prog_array_map_poke_run Adding test that tries to trigger the BUG_IN during early map update in prog_array_map_poke_run function. The idea is to share prog array map between thread that constantly updates it and another one loading a program that uses that prog array. Eventually we will hit a place where the program is ok to be updated (poke->tailcall_target_stable check) but the address is still not registered in kallsyms, so the bpf_arch_text_poke returns -EINVAL and cause imbalance for the next tail call update check, which will fail with -EBUSY in bpf_arch_text_poke as described in previous fix. Signed-off-by: Jiri Olsa Signed-off-by: Daniel Borkmann Acked-by: Ilya Leoshkevich Link: https://lore.kernel.org/bpf/20231206083041.1306660-3-jolsa@kernel.org --- tools/testing/selftests/bpf/prog_tests/tailcalls.c | 84 ++++++++++++++++++++++ tools/testing/selftests/bpf/progs/tailcall_poke.c | 32 +++++++++ 2 files changed, 116 insertions(+) create mode 100644 tools/testing/selftests/bpf/progs/tailcall_poke.c (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/tailcalls.c b/tools/testing/selftests/bpf/prog_tests/tailcalls.c index fc6b2954e8f5..59993fc9c0d7 100644 --- a/tools/testing/selftests/bpf/prog_tests/tailcalls.c +++ b/tools/testing/selftests/bpf/prog_tests/tailcalls.c @@ -1,6 +1,9 @@ // SPDX-License-Identifier: GPL-2.0 +#include #include #include +#include "tailcall_poke.skel.h" + /* test_tailcall_1 checks basic functionality by patching multiple locations * in a single program for a single tail call slot with nop->jmp, jmp->nop @@ -1105,6 +1108,85 @@ out: bpf_object__close(tgt_obj); } +#define JMP_TABLE "/sys/fs/bpf/jmp_table" + +static int poke_thread_exit; + +static void *poke_update(void *arg) +{ + __u32 zero = 0, prog1_fd, prog2_fd, map_fd; + struct tailcall_poke *call = arg; + + map_fd = bpf_map__fd(call->maps.jmp_table); + prog1_fd = bpf_program__fd(call->progs.call1); + prog2_fd = bpf_program__fd(call->progs.call2); + + while (!poke_thread_exit) { + bpf_map_update_elem(map_fd, &zero, &prog1_fd, BPF_ANY); + bpf_map_update_elem(map_fd, &zero, &prog2_fd, BPF_ANY); + } + + return NULL; +} + +/* + * We are trying to hit prog array update during another program load + * that shares the same prog array map. + * + * For that we share the jmp_table map between two skeleton instances + * by pinning the jmp_table to same path. Then first skeleton instance + * periodically updates jmp_table in 'poke update' thread while we load + * the second skeleton instance in the main thread. + */ +static void test_tailcall_poke(void) +{ + struct tailcall_poke *call, *test; + int err, cnt = 10; + pthread_t thread; + + unlink(JMP_TABLE); + + call = tailcall_poke__open_and_load(); + if (!ASSERT_OK_PTR(call, "tailcall_poke__open")) + return; + + err = bpf_map__pin(call->maps.jmp_table, JMP_TABLE); + if (!ASSERT_OK(err, "bpf_map__pin")) + goto out; + + err = pthread_create(&thread, NULL, poke_update, call); + if (!ASSERT_OK(err, "new toggler")) + goto out; + + while (cnt--) { + test = tailcall_poke__open(); + if (!ASSERT_OK_PTR(test, "tailcall_poke__open")) + break; + + err = bpf_map__set_pin_path(test->maps.jmp_table, JMP_TABLE); + if (!ASSERT_OK(err, "bpf_map__pin")) { + tailcall_poke__destroy(test); + break; + } + + bpf_program__set_autoload(test->progs.test, true); + bpf_program__set_autoload(test->progs.call1, false); + bpf_program__set_autoload(test->progs.call2, false); + + err = tailcall_poke__load(test); + tailcall_poke__destroy(test); + if (!ASSERT_OK(err, "tailcall_poke__load")) + break; + } + + poke_thread_exit = 1; + ASSERT_OK(pthread_join(thread, NULL), "pthread_join"); + +out: + bpf_map__unpin(call->maps.jmp_table, JMP_TABLE); + tailcall_poke__destroy(call); +} + void test_tailcalls(void) { if (test__start_subtest("tailcall_1")) @@ -1139,4 +1221,6 @@ void test_tailcalls(void) test_tailcall_bpf2bpf_fentry_fexit(); if (test__start_subtest("tailcall_bpf2bpf_fentry_entry")) test_tailcall_bpf2bpf_fentry_entry(); + if (test__start_subtest("tailcall_poke")) + test_tailcall_poke(); } diff --git a/tools/testing/selftests/bpf/progs/tailcall_poke.c b/tools/testing/selftests/bpf/progs/tailcall_poke.c new file mode 100644 index 000000000000..c78b94b75e83 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/tailcall_poke.c @@ -0,0 +1,32 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include + +char _license[] SEC("license") = "GPL"; + +struct { + __uint(type, BPF_MAP_TYPE_PROG_ARRAY); + __uint(max_entries, 1); + __uint(key_size, sizeof(__u32)); + __uint(value_size, sizeof(__u32)); +} jmp_table SEC(".maps"); + +SEC("?fentry/bpf_fentry_test1") +int BPF_PROG(test, int a) +{ + bpf_tail_call_static(ctx, &jmp_table, 0); + return 0; +} + +SEC("fentry/bpf_fentry_test1") +int BPF_PROG(call1, int a) +{ + return 0; +} + +SEC("fentry/bpf_fentry_test1") +int BPF_PROG(call2, int a) +{ + return 0; +} -- cgit v1.2.3 From 7065eefb38f16c91e9ace36fb7c873e4c9857c27 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Wed, 6 Dec 2023 11:09:20 -0800 Subject: bpf: rename MAX_BPF_LINK_TYPE into __MAX_BPF_LINK_TYPE for consistency To stay consistent with the naming pattern used for similar cases in BPF UAPI (__MAX_BPF_ATTACH_TYPE, etc), rename MAX_BPF_LINK_TYPE into __MAX_BPF_LINK_TYPE. Also similar to MAX_BPF_ATTACH_TYPE and MAX_BPF_REG, add: #define MAX_BPF_LINK_TYPE __MAX_BPF_LINK_TYPE Not all __MAX_xxx enums have such #define, so I'm not sure if we should add it or not, but I figured I'll start with a completely backwards compatible way, and we can drop that, if necessary. Also adjust a selftest that used MAX_BPF_LINK_TYPE enum. Suggested-by: Alexei Starovoitov Signed-off-by: Andrii Nakryiko Acked-by: Yonghong Song Link: https://lore.kernel.org/r/20231206190920.1651226-1-andrii@kernel.org Signed-off-by: Alexei Starovoitov --- include/uapi/linux/bpf.h | 4 +++- tools/include/uapi/linux/bpf.h | 4 +++- tools/testing/selftests/bpf/prog_tests/libbpf_str.c | 2 +- 3 files changed, 7 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 4df2d025c784..e0545201b55f 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1108,9 +1108,11 @@ enum bpf_link_type { BPF_LINK_TYPE_TCX = 11, BPF_LINK_TYPE_UPROBE_MULTI = 12, BPF_LINK_TYPE_NETKIT = 13, - MAX_BPF_LINK_TYPE, + __MAX_BPF_LINK_TYPE, }; +#define MAX_BPF_LINK_TYPE __MAX_BPF_LINK_TYPE + enum bpf_perf_event_type { BPF_PERF_EVENT_UNSPEC = 0, BPF_PERF_EVENT_UPROBE = 1, diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 4df2d025c784..e0545201b55f 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -1108,9 +1108,11 @@ enum bpf_link_type { BPF_LINK_TYPE_TCX = 11, BPF_LINK_TYPE_UPROBE_MULTI = 12, BPF_LINK_TYPE_NETKIT = 13, - MAX_BPF_LINK_TYPE, + __MAX_BPF_LINK_TYPE, }; +#define MAX_BPF_LINK_TYPE __MAX_BPF_LINK_TYPE + enum bpf_perf_event_type { BPF_PERF_EVENT_UNSPEC = 0, BPF_PERF_EVENT_UPROBE = 1, diff --git a/tools/testing/selftests/bpf/prog_tests/libbpf_str.c b/tools/testing/selftests/bpf/prog_tests/libbpf_str.c index 384bc1f7a65e..62ea855ec4d0 100644 --- a/tools/testing/selftests/bpf/prog_tests/libbpf_str.c +++ b/tools/testing/selftests/bpf/prog_tests/libbpf_str.c @@ -87,7 +87,7 @@ static void test_libbpf_bpf_link_type_str(void) const char *link_type_str; char buf[256]; - if (link_type == MAX_BPF_LINK_TYPE) + if (link_type == __MAX_BPF_LINK_TYPE) continue; link_type_name = btf__str_by_offset(btf, e->name_off); -- cgit v1.2.3 From 3f3cac5c0a5c3b09bdfb919a7518dca83523a52c Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Thu, 16 Nov 2023 15:15:47 -0500 Subject: mm/selftests: fix pagemap_ioctl memory map test __FILE__ is not guaranteed to exist in current dir. Replace that with argv[0] for memory map test. Link: https://lkml.kernel.org/r/20231116201547.536857-4-peterx@redhat.com Fixes: 46fd75d4a3c9 ("selftests: mm: add pagemap ioctl tests") Signed-off-by: Peter Xu Reviewed-by: David Hildenbrand Cc: Andrei Vagin Cc: David Hildenbrand Cc: Muhammad Usama Anjum Signed-off-by: Andrew Morton --- tools/testing/selftests/mm/pagemap_ioctl.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/mm/pagemap_ioctl.c b/tools/testing/selftests/mm/pagemap_ioctl.c index befab43719ba..d59517ed3d48 100644 --- a/tools/testing/selftests/mm/pagemap_ioctl.c +++ b/tools/testing/selftests/mm/pagemap_ioctl.c @@ -36,6 +36,7 @@ int pagemap_fd; int uffd; int page_size; int hpage_size; +const char *progname; #define LEN(region) ((region.end - region.start)/page_size) @@ -1149,11 +1150,11 @@ int sanity_tests(void) munmap(mem, mem_size); /* 9. Memory mapped file */ - fd = open(__FILE__, O_RDONLY); + fd = open(progname, O_RDONLY); if (fd < 0) ksft_exit_fail_msg("%s Memory mapped file\n", __func__); - ret = stat(__FILE__, &sbuf); + ret = stat(progname, &sbuf); if (ret < 0) ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno)); @@ -1472,12 +1473,14 @@ static void transact_test(int page_size) extra_thread_faults); } -int main(void) +int main(int argc, char *argv[]) { int mem_size, shmid, buf_size, fd, i, ret; char *mem, *map, *fmem; struct stat sbuf; + progname = argv[0]; + ksft_print_header(); if (init_uffd()) -- cgit v1.2.3 From f39fb633fe9b9a4a7572ec32debf766d971a500b Mon Sep 17 00:00:00 2001 From: Nico Pache Date: Mon, 20 Nov 2023 15:29:08 -0700 Subject: selftests/mm: prevent duplicate runs caused by TEST_GEN_PROGS Commit 05f1edac8009 ("selftests/mm: run all tests from run_vmtests.sh") fixed the inconsistency caused by tests being defined as TEST_GEN_PROGS. This issue was leading to tests not being executed via run_vmtests.sh and furthermore some tests running twice due to the kselftests wrapper also executing them. Fix the definition of two tests (soft-dirty and pagemap_ioctl) that are still incorrectly defined. Link: https://lkml.kernel.org/r/20231120222908.28559-1-npache@redhat.com Signed-off-by: Nico Pache Reviewed-by: David Hildenbrand Cc: Joel Savitz Cc: Shuah Khan Signed-off-by: Andrew Morton --- tools/testing/selftests/mm/Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/mm/Makefile b/tools/testing/selftests/mm/Makefile index 78dfec8bc676..dede0bcf97a3 100644 --- a/tools/testing/selftests/mm/Makefile +++ b/tools/testing/selftests/mm/Makefile @@ -60,7 +60,7 @@ TEST_GEN_FILES += mrelease_test TEST_GEN_FILES += mremap_dontunmap TEST_GEN_FILES += mremap_test TEST_GEN_FILES += on-fault-limit -TEST_GEN_PROGS += pagemap_ioctl +TEST_GEN_FILES += pagemap_ioctl TEST_GEN_FILES += thuge-gen TEST_GEN_FILES += transhuge-stress TEST_GEN_FILES += uffd-stress @@ -72,7 +72,7 @@ TEST_GEN_FILES += mdwe_test TEST_GEN_FILES += hugetlb_fault_after_madv ifneq ($(ARCH),arm64) -TEST_GEN_PROGS += soft-dirty +TEST_GEN_FILES += soft-dirty endif ifeq ($(ARCH),x86_64) -- cgit v1.2.3 From 887f8094b3352127d1bc68f768774e97acf4e7fa Mon Sep 17 00:00:00 2001 From: Benjamin Tissoires Date: Wed, 6 Dec 2023 11:45:52 +0100 Subject: selftests/hid: vmtest.sh: update vm2c and container boot2container is now on an official project, so let's use that. The container image is now the same I use for the CI, so let's keep to it. Reviewed-by: Peter Hutterer Acked-by: Jiri Kosina Link: https://lore.kernel.org/r/20231206-wip-selftests-v2-1-c0350c2f5986@kernel.org Signed-off-by: Benjamin Tissoires --- tools/testing/selftests/hid/vmtest.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/hid/vmtest.sh b/tools/testing/selftests/hid/vmtest.sh index 4da48bf6b328..301b4e162336 100755 --- a/tools/testing/selftests/hid/vmtest.sh +++ b/tools/testing/selftests/hid/vmtest.sh @@ -19,12 +19,12 @@ esac SCRIPT_DIR="$(dirname $(realpath $0))" OUTPUT_DIR="$SCRIPT_DIR/results" KCONFIG_REL_PATHS=("${SCRIPT_DIR}/config" "${SCRIPT_DIR}/config.common" "${SCRIPT_DIR}/config.${ARCH}") -B2C_URL="https://gitlab.freedesktop.org/mupuf/boot2container/-/raw/master/vm2c.py" +B2C_URL="https://gitlab.freedesktop.org/gfx-ci/boot2container/-/raw/main/vm2c.py" NUM_COMPILE_JOBS="$(nproc)" LOG_FILE_BASE="$(date +"hid_selftests.%Y-%m-%d_%H-%M-%S")" LOG_FILE="${LOG_FILE_BASE}.log" EXIT_STATUS_FILE="${LOG_FILE_BASE}.exit_status" -CONTAINER_IMAGE="registry.freedesktop.org/libevdev/hid-tools/fedora/37:2023-02-17.1" +CONTAINER_IMAGE="registry.freedesktop.org/bentiss/hid/fedora/39:2023-11-22.1" TARGETS="${TARGETS:=$(basename ${SCRIPT_DIR})}" DEFAULT_COMMAND="pip3 install hid-tools; make -C tools/testing/selftests TARGETS=${TARGETS} run_tests" -- cgit v1.2.3 From 46bc0277c2507338d98e166826afec330962309e Mon Sep 17 00:00:00 2001 From: Benjamin Tissoires Date: Wed, 6 Dec 2023 11:45:53 +0100 Subject: selftests/hid: vmtest.sh: allow finer control on the build steps vmtest.sh works great for a one shot test, but not so much for CI where I want to build (with different configs) the bzImage in a separate job than the one I am running it. Add a "build_only" option to specify whether we need to boot the currently built kernel in the vm. Reviewed-by: Peter Hutterer Acked-by: Jiri Kosina Link: https://lore.kernel.org/r/20231206-wip-selftests-v2-2-c0350c2f5986@kernel.org Signed-off-by: Benjamin Tissoires --- tools/testing/selftests/hid/vmtest.sh | 42 ++++++++++++++++++++--------------- 1 file changed, 24 insertions(+), 18 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/hid/vmtest.sh b/tools/testing/selftests/hid/vmtest.sh index 301b4e162336..db534e9099a8 100755 --- a/tools/testing/selftests/hid/vmtest.sh +++ b/tools/testing/selftests/hid/vmtest.sh @@ -32,7 +32,7 @@ DEFAULT_COMMAND="pip3 install hid-tools; make -C tools/testing/selftests TARGETS usage() { cat <] -- [] +Usage: $0 [-j N] [-s] [-b] [-d ] -- [] is the command you would normally run when you are in the source kernel direcory. e.g: @@ -55,6 +55,7 @@ Options: -u) Update the boot2container script to a newer version. -d) Update the output directory (default: ${OUTPUT_DIR}) + -b) Run only the build steps for the kernel and the selftests -j) Number of jobs for compilation, similar to -j in make (default: ${NUM_COMPILE_JOBS}) -s) Instead of powering off the VM, start an interactive @@ -191,8 +192,9 @@ main() local command="${DEFAULT_COMMAND}" local update_b2c="no" local debug_shell="no" + local build_only="no" - while getopts ':hsud:j:' opt; do + while getopts ':hsud:j:b' opt; do case ${opt} in u) update_b2c="yes" @@ -207,6 +209,9 @@ main() command="/bin/sh" debug_shell="yes" ;; + b) + build_only="yes" + ;; h) usage exit 0 @@ -226,8 +231,7 @@ main() shift $((OPTIND -1)) # trap 'catch "$?"' EXIT - - if [[ "${debug_shell}" == "no" ]]; then + if [[ "${build_only}" == "no" && "${debug_shell}" == "no" ]]; then if [[ $# -eq 0 ]]; then echo "No command specified, will run ${DEFAULT_COMMAND} in the vm" else @@ -267,24 +271,26 @@ main() update_kconfig "${kernel_checkout}" "${kconfig_file}" recompile_kernel "${kernel_checkout}" "${make_command}" + update_selftests "${kernel_checkout}" "${make_command}" - if [[ "${update_b2c}" == "no" && ! -f "${b2c}" ]]; then - echo "vm2c script not found in ${b2c}" - update_b2c="yes" - fi + if [[ "${build_only}" == "no" ]]; then + if [[ "${update_b2c}" == "no" && ! -f "${b2c}" ]]; then + echo "vm2c script not found in ${b2c}" + update_b2c="yes" + fi - if [[ "${update_b2c}" == "yes" ]]; then - download $B2C_URL $b2c - chmod +x $b2c - fi + if [[ "${update_b2c}" == "yes" ]]; then + download $B2C_URL $b2c + chmod +x $b2c + fi - update_selftests "${kernel_checkout}" "${make_command}" - run_vm "${kernel_checkout}" $b2c "${kernel_bzimage}" "${command}" - if [[ "${debug_shell}" != "yes" ]]; then - echo "Logs saved in ${OUTPUT_DIR}/${LOG_FILE}" - fi + run_vm "${kernel_checkout}" $b2c "${kernel_bzimage}" "${command}" + if [[ "${debug_shell}" != "yes" ]]; then + echo "Logs saved in ${OUTPUT_DIR}/${LOG_FILE}" + fi - exit $(cat ${OUTPUT_DIR}/${EXIT_STATUS_FILE}) + exit $(cat ${OUTPUT_DIR}/${EXIT_STATUS_FILE}) + fi } main "$@" -- cgit v1.2.3 From 110292a77f7c8d11eaa472e65f6a2084b25be2db Mon Sep 17 00:00:00 2001 From: Benjamin Tissoires Date: Wed, 6 Dec 2023 11:45:54 +0100 Subject: selftests/hid: base: allow for multiple skip_if_uhdev We can actually have multiple occurences of `skip_if_uhdev` if we follow the information from the pytest doc[0]. This is not immediately used, but can be if we need multiple conditions on a given test. [0] https://docs.pytest.org/en/latest/historical-notes.html#update-marker-code Reviewed-by: Peter Hutterer Acked-by: Jiri Kosina Link: https://lore.kernel.org/r/20231206-wip-selftests-v2-3-c0350c2f5986@kernel.org Signed-off-by: Benjamin Tissoires --- tools/testing/selftests/hid/tests/base.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/hid/tests/base.py b/tools/testing/selftests/hid/tests/base.py index 1305cfc9646e..5d9c26dfc460 100644 --- a/tools/testing/selftests/hid/tests/base.py +++ b/tools/testing/selftests/hid/tests/base.py @@ -238,8 +238,7 @@ class BaseTestCase: try: with HIDTestUdevRule.instance(): with new_uhdev as self.uhdev: - skip_cond = request.node.get_closest_marker("skip_if_uhdev") - if skip_cond: + for skip_cond in request.node.iter_markers("skip_if_uhdev"): test, message, *rest = skip_cond.args if test(self.uhdev): -- cgit v1.2.3 From b5edacf79c8e1990200f9392e6354583298e8765 Mon Sep 17 00:00:00 2001 From: Benjamin Tissoires Date: Wed, 6 Dec 2023 11:45:55 +0100 Subject: selftests/hid: tablets: remove unused class Looks like this is a leftover Reviewed-by: Peter Hutterer Acked-by: Jiri Kosina Link: https://lore.kernel.org/r/20231206-wip-selftests-v2-4-c0350c2f5986@kernel.org Signed-off-by: Benjamin Tissoires --- tools/testing/selftests/hid/tests/test_tablet.py | 4 ---- 1 file changed, 4 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/hid/tests/test_tablet.py b/tools/testing/selftests/hid/tests/test_tablet.py index 303ffff9ee8d..cd9c1269afa6 100644 --- a/tools/testing/selftests/hid/tests/test_tablet.py +++ b/tools/testing/selftests/hid/tests/test_tablet.py @@ -133,10 +133,6 @@ class PenState(Enum): return tuple() -class Data(object): - pass - - class Pen(object): def __init__(self, x, y): self.x = x -- cgit v1.2.3 From d52f52069fed639113b1014cde5eff8902d3064d Mon Sep 17 00:00:00 2001 From: Benjamin Tissoires Date: Wed, 6 Dec 2023 11:45:56 +0100 Subject: selftests/hid: tablets: move the transitions to PenState Those transitions have nothing to do with `Pen`, so migrate them to `PenState`. The hidden agenda is to remove `Pen` and integrate it into `PenDigitizer` so that we can tweak the events in each state to emulate firmware bugs. Reviewed-by: Peter Hutterer Acked-by: Jiri Kosina Link: https://lore.kernel.org/r/20231206-wip-selftests-v2-5-c0350c2f5986@kernel.org Signed-off-by: Benjamin Tissoires --- tools/testing/selftests/hid/tests/test_tablet.py | 215 ++++++++++++----------- 1 file changed, 109 insertions(+), 106 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/hid/tests/test_tablet.py b/tools/testing/selftests/hid/tests/test_tablet.py index cd9c1269afa6..ddf28c245046 100644 --- a/tools/testing/selftests/hid/tests/test_tablet.py +++ b/tools/testing/selftests/hid/tests/test_tablet.py @@ -132,104 +132,8 @@ class PenState(Enum): return tuple() - -class Pen(object): - def __init__(self, x, y): - self.x = x - self.y = y - self.tipswitch = False - self.tippressure = 15 - self.azimuth = 0 - self.inrange = False - self.width = 10 - self.height = 10 - self.barrelswitch = False - self.invert = False - self.eraser = False - self.x_tilt = 0 - self.y_tilt = 0 - self.twist = 0 - self._old_values = None - self.current_state = None - - def _restore(self): - if self._old_values is not None: - for i in [ - "x", - "y", - "tippressure", - "azimuth", - "width", - "height", - "twist", - "x_tilt", - "y_tilt", - ]: - setattr(self, i, getattr(self._old_values, i)) - - def move_to(self, state): - # fill in the previous values - if self.current_state == PenState.PEN_IS_OUT_OF_RANGE: - self._restore() - - print(f"\n *** pen is moving to {state} ***") - - if state == PenState.PEN_IS_OUT_OF_RANGE: - self._old_values = copy.copy(self) - self.x = 0 - self.y = 0 - self.tipswitch = False - self.tippressure = 0 - self.azimuth = 0 - self.inrange = False - self.width = 0 - self.height = 0 - self.invert = False - self.eraser = False - self.x_tilt = 0 - self.y_tilt = 0 - self.twist = 0 - elif state == PenState.PEN_IS_IN_RANGE: - self.tipswitch = False - self.inrange = True - self.invert = False - self.eraser = False - elif state == PenState.PEN_IS_IN_CONTACT: - self.tipswitch = True - self.inrange = True - self.invert = False - self.eraser = False - elif state == PenState.PEN_IS_IN_RANGE_WITH_ERASING_INTENT: - self.tipswitch = False - self.inrange = True - self.invert = True - self.eraser = False - elif state == PenState.PEN_IS_ERASING: - self.tipswitch = False - self.inrange = True - self.invert = True - self.eraser = True - - self.current_state = state - - def __assert_axis(self, evdev, axis, value): - if ( - axis == libevdev.EV_KEY.BTN_TOOL_RUBBER - and evdev.value[libevdev.EV_KEY.BTN_TOOL_RUBBER] is None - ): - return - - assert ( - evdev.value[axis] == value - ), f"assert evdev.value[{axis}] ({evdev.value[axis]}) != {value}" - - def assert_expected_input_events(self, evdev): - assert evdev.value[libevdev.EV_ABS.ABS_X] == self.x - assert evdev.value[libevdev.EV_ABS.ABS_Y] == self.y - assert self.current_state == PenState.from_evdev(evdev) - @staticmethod - def legal_transitions() -> Dict[str, Tuple[PenState, ...]]: + def legal_transitions() -> Dict[str, Tuple["PenState", ...]]: """This is the first half of the Windows Pen Implementation state machine: we don't have Invert nor Erase bits, so just move in/out-of-range or proximity. https://docs.microsoft.com/en-us/windows-hardware/design/component-guidelines/windows-pen-states @@ -255,7 +159,7 @@ class Pen(object): } @staticmethod - def legal_transitions_with_invert() -> Dict[str, Tuple[PenState, ...]]: + def legal_transitions_with_invert() -> Dict[str, Tuple["PenState", ...]]: """This is the second half of the Windows Pen Implementation state machine: we now have Invert and Erase bits, so move in/out or proximity with the intend to erase. @@ -293,7 +197,7 @@ class Pen(object): } @staticmethod - def tolerated_transitions() -> Dict[str, Tuple[PenState, ...]]: + def tolerated_transitions() -> Dict[str, Tuple["PenState", ...]]: """This is not adhering to the Windows Pen Implementation state machine but we should expect the kernel to behave properly, mostly for historical reasons.""" @@ -306,7 +210,7 @@ class Pen(object): } @staticmethod - def tolerated_transitions_with_invert() -> Dict[str, Tuple[PenState, ...]]: + def tolerated_transitions_with_invert() -> Dict[str, Tuple["PenState", ...]]: """This is the second half of the Windows Pen Implementation state machine: we now have Invert and Erase bits, so move in/out or proximity with the intend to erase. @@ -321,7 +225,7 @@ class Pen(object): } @staticmethod - def broken_transitions() -> Dict[str, Tuple[PenState, ...]]: + def broken_transitions() -> Dict[str, Tuple["PenState", ...]]: """Those tests are definitely not part of the Windows specification. However, a half broken device might export those transitions. For example, a pen that has the eraser button might wobble between @@ -359,6 +263,102 @@ class Pen(object): } +class Pen(object): + def __init__(self, x, y): + self.x = x + self.y = y + self.tipswitch = False + self.tippressure = 15 + self.azimuth = 0 + self.inrange = False + self.width = 10 + self.height = 10 + self.barrelswitch = False + self.invert = False + self.eraser = False + self.x_tilt = 0 + self.y_tilt = 0 + self.twist = 0 + self._old_values = None + self.current_state = None + + def _restore(self): + if self._old_values is not None: + for i in [ + "x", + "y", + "tippressure", + "azimuth", + "width", + "height", + "twist", + "x_tilt", + "y_tilt", + ]: + setattr(self, i, getattr(self._old_values, i)) + + def move_to(self, state): + # fill in the previous values + if self.current_state == PenState.PEN_IS_OUT_OF_RANGE: + self._restore() + + print(f"\n *** pen is moving to {state} ***") + + if state == PenState.PEN_IS_OUT_OF_RANGE: + self._old_values = copy.copy(self) + self.x = 0 + self.y = 0 + self.tipswitch = False + self.tippressure = 0 + self.azimuth = 0 + self.inrange = False + self.width = 0 + self.height = 0 + self.invert = False + self.eraser = False + self.x_tilt = 0 + self.y_tilt = 0 + self.twist = 0 + elif state == PenState.PEN_IS_IN_RANGE: + self.tipswitch = False + self.inrange = True + self.invert = False + self.eraser = False + elif state == PenState.PEN_IS_IN_CONTACT: + self.tipswitch = True + self.inrange = True + self.invert = False + self.eraser = False + elif state == PenState.PEN_IS_IN_RANGE_WITH_ERASING_INTENT: + self.tipswitch = False + self.inrange = True + self.invert = True + self.eraser = False + elif state == PenState.PEN_IS_ERASING: + self.tipswitch = False + self.inrange = True + self.invert = True + self.eraser = True + + self.current_state = state + + def __assert_axis(self, evdev, axis, value): + if ( + axis == libevdev.EV_KEY.BTN_TOOL_RUBBER + and evdev.value[libevdev.EV_KEY.BTN_TOOL_RUBBER] is None + ): + return + + assert ( + evdev.value[axis] == value + ), f"assert evdev.value[{axis}] ({evdev.value[axis]}) != {value}" + + def assert_expected_input_events(self, evdev): + assert evdev.value[libevdev.EV_ABS.ABS_X] == self.x + assert evdev.value[libevdev.EV_ABS.ABS_Y] == self.y + assert self.current_state == PenState.from_evdev(evdev) + + class PenDigitizer(base.UHIDTestDevice): def __init__( self, @@ -486,7 +486,7 @@ class BaseTest: @pytest.mark.parametrize("scribble", [True, False], ids=["scribble", "static"]) @pytest.mark.parametrize( "state_list", - [pytest.param(v, id=k) for k, v in Pen.legal_transitions().items()], + [pytest.param(v, id=k) for k, v in PenState.legal_transitions().items()], ) def test_valid_pen_states(self, state_list, scribble): """This is the first half of the Windows Pen Implementation state machine: @@ -498,7 +498,10 @@ class BaseTest: @pytest.mark.parametrize("scribble", [True, False], ids=["scribble", "static"]) @pytest.mark.parametrize( "state_list", - [pytest.param(v, id=k) for k, v in Pen.tolerated_transitions().items()], + [ + pytest.param(v, id=k) + for k, v in PenState.tolerated_transitions().items() + ], ) def test_tolerated_pen_states(self, state_list, scribble): """This is not adhering to the Windows Pen Implementation state machine @@ -515,7 +518,7 @@ class BaseTest: "state_list", [ pytest.param(v, id=k) - for k, v in Pen.legal_transitions_with_invert().items() + for k, v in PenState.legal_transitions_with_invert().items() ], ) def test_valid_invert_pen_states(self, state_list, scribble): @@ -535,7 +538,7 @@ class BaseTest: "state_list", [ pytest.param(v, id=k) - for k, v in Pen.tolerated_transitions_with_invert().items() + for k, v in PenState.tolerated_transitions_with_invert().items() ], ) def test_tolerated_invert_pen_states(self, state_list, scribble): @@ -553,7 +556,7 @@ class BaseTest: @pytest.mark.parametrize("scribble", [True, False], ids=["scribble", "static"]) @pytest.mark.parametrize( "state_list", - [pytest.param(v, id=k) for k, v in Pen.broken_transitions().items()], + [pytest.param(v, id=k) for k, v in PenState.broken_transitions().items()], ) def test_tolerated_broken_pen_states(self, state_list, scribble): """Those tests are definitely not part of the Windows specification. -- cgit v1.2.3 From 881ccc36b42670ebbd5fb32a79b239ce1136e84d Mon Sep 17 00:00:00 2001 From: Benjamin Tissoires Date: Wed, 6 Dec 2023 11:45:57 +0100 Subject: selftests/hid: tablets: move move_to function to PenDigitizer We can easily subclass PenDigitizer for introducing firmware bugs when subclassing Pen is harder. Move move_to from Pen to PenDigitizer so we get that ability Reviewed-by: Peter Hutterer Acked-by: Jiri Kosina Link: https://lore.kernel.org/r/20231206-wip-selftests-v2-6-c0350c2f5986@kernel.org Signed-off-by: Benjamin Tissoires --- tools/testing/selftests/hid/tests/test_tablet.py | 97 ++++++++++++------------ 1 file changed, 50 insertions(+), 47 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/hid/tests/test_tablet.py b/tools/testing/selftests/hid/tests/test_tablet.py index ddf28c245046..27260dc02cc4 100644 --- a/tools/testing/selftests/hid/tests/test_tablet.py +++ b/tools/testing/selftests/hid/tests/test_tablet.py @@ -282,7 +282,7 @@ class Pen(object): self._old_values = None self.current_state = None - def _restore(self): + def restore(self): if self._old_values is not None: for i in [ "x", @@ -297,50 +297,8 @@ class Pen(object): ]: setattr(self, i, getattr(self._old_values, i)) - def move_to(self, state): - # fill in the previous values - if self.current_state == PenState.PEN_IS_OUT_OF_RANGE: - self._restore() - - print(f"\n *** pen is moving to {state} ***") - - if state == PenState.PEN_IS_OUT_OF_RANGE: - self._old_values = copy.copy(self) - self.x = 0 - self.y = 0 - self.tipswitch = False - self.tippressure = 0 - self.azimuth = 0 - self.inrange = False - self.width = 0 - self.height = 0 - self.invert = False - self.eraser = False - self.x_tilt = 0 - self.y_tilt = 0 - self.twist = 0 - elif state == PenState.PEN_IS_IN_RANGE: - self.tipswitch = False - self.inrange = True - self.invert = False - self.eraser = False - elif state == PenState.PEN_IS_IN_CONTACT: - self.tipswitch = True - self.inrange = True - self.invert = False - self.eraser = False - elif state == PenState.PEN_IS_IN_RANGE_WITH_ERASING_INTENT: - self.tipswitch = False - self.inrange = True - self.invert = True - self.eraser = False - elif state == PenState.PEN_IS_ERASING: - self.tipswitch = False - self.inrange = True - self.invert = True - self.eraser = True - - self.current_state = state + def backup(self): + self._old_values = copy.copy(self) def __assert_axis(self, evdev, axis, value): if ( @@ -384,6 +342,51 @@ class PenDigitizer(base.UHIDTestDevice): continue self.fields = [f.usage_name for f in r] + def move_to(self, pen, state): + # fill in the previous values + if pen.current_state == PenState.PEN_IS_OUT_OF_RANGE: + pen.restore() + + print(f"\n *** pen is moving to {state} ***") + + if state == PenState.PEN_IS_OUT_OF_RANGE: + pen.backup() + pen.x = 0 + pen.y = 0 + pen.tipswitch = False + pen.tippressure = 0 + pen.azimuth = 0 + pen.inrange = False + pen.width = 0 + pen.height = 0 + pen.invert = False + pen.eraser = False + pen.x_tilt = 0 + pen.y_tilt = 0 + pen.twist = 0 + elif state == PenState.PEN_IS_IN_RANGE: + pen.tipswitch = False + pen.inrange = True + pen.invert = False + pen.eraser = False + elif state == PenState.PEN_IS_IN_CONTACT: + pen.tipswitch = True + pen.inrange = True + pen.invert = False + pen.eraser = False + elif state == PenState.PEN_IS_IN_RANGE_WITH_ERASING_INTENT: + pen.tipswitch = False + pen.inrange = True + pen.invert = True + pen.eraser = False + elif state == PenState.PEN_IS_ERASING: + pen.tipswitch = False + pen.inrange = True + pen.invert = True + pen.eraser = True + + pen.current_state = state + def event(self, pen): rs = [] r = self.create_report(application=self.cur_application, data=pen) @@ -462,7 +465,7 @@ class BaseTest: cur_state = PenState.PEN_IS_OUT_OF_RANGE p = Pen(50, 60) - p.move_to(PenState.PEN_IS_OUT_OF_RANGE) + uhdev.move_to(p, PenState.PEN_IS_OUT_OF_RANGE) events = self.post(uhdev, p) self.validate_transitions(cur_state, p, evdev, events) @@ -475,7 +478,7 @@ class BaseTest: events = self.post(uhdev, p) self.validate_transitions(cur_state, p, evdev, events) assert len(events) >= 3 # X, Y, SYN - p.move_to(state) + uhdev.move_to(p, state) if scribble and state != PenState.PEN_IS_OUT_OF_RANGE: p.x += 1 p.y -= 1 -- cgit v1.2.3 From d8d7aa2266a7957e24cf05392b2b899be1031ed4 Mon Sep 17 00:00:00 2001 From: Benjamin Tissoires Date: Wed, 6 Dec 2023 11:45:58 +0100 Subject: selftests/hid: tablets: do not set invert when the eraser is used Turns out that the chart from Microsoft is not exactly what I got here: when the rubber is used, and is touching the surface, invert can (should) be set to 0... [0] https://learn.microsoft.com/en-us/windows-hardware/design/component-guidelines/windows-pen-states Reviewed-by: Peter Hutterer Acked-by: Jiri Kosina Link: https://lore.kernel.org/r/20231206-wip-selftests-v2-7-c0350c2f5986@kernel.org Signed-off-by: Benjamin Tissoires --- tools/testing/selftests/hid/tests/test_tablet.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/hid/tests/test_tablet.py b/tools/testing/selftests/hid/tests/test_tablet.py index 27260dc02cc4..cb3955bf0ec5 100644 --- a/tools/testing/selftests/hid/tests/test_tablet.py +++ b/tools/testing/selftests/hid/tests/test_tablet.py @@ -382,7 +382,7 @@ class PenDigitizer(base.UHIDTestDevice): elif state == PenState.PEN_IS_ERASING: pen.tipswitch = False pen.inrange = True - pen.invert = True + pen.invert = False pen.eraser = True pen.current_state = state -- cgit v1.2.3 From e08e493ff9619daab9c11e61a80c604895a4d671 Mon Sep 17 00:00:00 2001 From: Benjamin Tissoires Date: Wed, 6 Dec 2023 11:45:59 +0100 Subject: selftests/hid: tablets: set initial data for tilt/twist Avoids getting a null event when these usages are set Reviewed-by: Peter Hutterer Acked-by: Jiri Kosina Link: https://lore.kernel.org/r/20231206-wip-selftests-v2-8-c0350c2f5986@kernel.org Signed-off-by: Benjamin Tissoires --- tools/testing/selftests/hid/tests/test_tablet.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/hid/tests/test_tablet.py b/tools/testing/selftests/hid/tests/test_tablet.py index cb3955bf0ec5..0ddf82695ed9 100644 --- a/tools/testing/selftests/hid/tests/test_tablet.py +++ b/tools/testing/selftests/hid/tests/test_tablet.py @@ -276,9 +276,9 @@ class Pen(object): self.barrelswitch = False self.invert = False self.eraser = False - self.x_tilt = 0 - self.y_tilt = 0 - self.twist = 0 + self.xtilt = 1 + self.ytilt = 1 + self.twist = 1 self._old_values = None self.current_state = None @@ -292,8 +292,8 @@ class Pen(object): "width", "height", "twist", - "x_tilt", - "y_tilt", + "xtilt", + "ytilt", ]: setattr(self, i, getattr(self._old_values, i)) @@ -361,8 +361,8 @@ class PenDigitizer(base.UHIDTestDevice): pen.height = 0 pen.invert = False pen.eraser = False - pen.x_tilt = 0 - pen.y_tilt = 0 + pen.xtilt = 0 + pen.ytilt = 0 pen.twist = 0 elif state == PenState.PEN_IS_IN_RANGE: pen.tipswitch = False -- cgit v1.2.3 From 83912f83fabcb5086613e6382756750390ed48aa Mon Sep 17 00:00:00 2001 From: Benjamin Tissoires Date: Wed, 6 Dec 2023 11:46:00 +0100 Subject: selftests/hid: tablets: define the elements of PenState This introduces a little bit more readability by not using the raw values but a dedicated Enum Acked-by: Jiri Kosina Link: https://lore.kernel.org/r/20231206-wip-selftests-v2-9-c0350c2f5986@kernel.org Signed-off-by: Benjamin Tissoires --- tools/testing/selftests/hid/tests/test_tablet.py | 36 ++++++++++++++++-------- 1 file changed, 24 insertions(+), 12 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/hid/tests/test_tablet.py b/tools/testing/selftests/hid/tests/test_tablet.py index 0ddf82695ed9..cec65294c9ec 100644 --- a/tools/testing/selftests/hid/tests/test_tablet.py +++ b/tools/testing/selftests/hid/tests/test_tablet.py @@ -13,40 +13,52 @@ from hidtools.util import BusType import libevdev import logging import pytest -from typing import Dict, Tuple +from typing import Dict, Optional, Tuple logger = logging.getLogger("hidtools.test.tablet") +class BtnTouch(Enum): + """Represents whether the BTN_TOUCH event is set to True or False""" + + DOWN = True + UP = False + + +class ToolType(Enum): + PEN = libevdev.EV_KEY.BTN_TOOL_PEN + RUBBER = libevdev.EV_KEY.BTN_TOOL_RUBBER + + class PenState(Enum): """Pen states according to Microsoft reference: https://docs.microsoft.com/en-us/windows-hardware/design/component-guidelines/windows-pen-states """ - PEN_IS_OUT_OF_RANGE = (False, None) - PEN_IS_IN_RANGE = (False, libevdev.EV_KEY.BTN_TOOL_PEN) - PEN_IS_IN_CONTACT = (True, libevdev.EV_KEY.BTN_TOOL_PEN) - PEN_IS_IN_RANGE_WITH_ERASING_INTENT = (False, libevdev.EV_KEY.BTN_TOOL_RUBBER) - PEN_IS_ERASING = (True, libevdev.EV_KEY.BTN_TOOL_RUBBER) + PEN_IS_OUT_OF_RANGE = BtnTouch.UP, None + PEN_IS_IN_RANGE = BtnTouch.UP, ToolType.PEN + PEN_IS_IN_CONTACT = BtnTouch.DOWN, ToolType.PEN + PEN_IS_IN_RANGE_WITH_ERASING_INTENT = BtnTouch.UP, ToolType.RUBBER + PEN_IS_ERASING = BtnTouch.DOWN, ToolType.RUBBER - def __init__(self, touch, tool): + def __init__(self, touch: BtnTouch, tool: Optional[ToolType]): self.touch = touch self.tool = tool @classmethod def from_evdev(cls, evdev) -> "PenState": - touch = bool(evdev.value[libevdev.EV_KEY.BTN_TOUCH]) + touch = BtnTouch(evdev.value[libevdev.EV_KEY.BTN_TOUCH]) tool = None if ( evdev.value[libevdev.EV_KEY.BTN_TOOL_RUBBER] and not evdev.value[libevdev.EV_KEY.BTN_TOOL_PEN] ): - tool = libevdev.EV_KEY.BTN_TOOL_RUBBER + tool = ToolType(libevdev.EV_KEY.BTN_TOOL_RUBBER) elif ( evdev.value[libevdev.EV_KEY.BTN_TOOL_PEN] and not evdev.value[libevdev.EV_KEY.BTN_TOOL_RUBBER] ): - tool = libevdev.EV_KEY.BTN_TOOL_PEN + tool = ToolType(libevdev.EV_KEY.BTN_TOOL_PEN) elif ( evdev.value[libevdev.EV_KEY.BTN_TOOL_PEN] or evdev.value[libevdev.EV_KEY.BTN_TOOL_RUBBER] @@ -68,7 +80,7 @@ class PenState(Enum): if touch_found: raise ValueError(f"duplicated BTN_TOUCH in {events}") touch_found = True - touch = bool(ev.value) + touch = BtnTouch(ev.value) elif ev in ( libevdev.InputEvent(libevdev.EV_KEY.BTN_TOOL_PEN), libevdev.InputEvent(libevdev.EV_KEY.BTN_TOOL_RUBBER), @@ -77,7 +89,7 @@ class PenState(Enum): raise ValueError(f"duplicated BTN_TOOL_* in {events}") tool_found = True if ev.value: - tool = ev.code + tool = ToolType(ev.code) else: tool = None -- cgit v1.2.3 From 74452d6329be73dd2f5562005e5eef2c7bda7c5b Mon Sep 17 00:00:00 2001 From: Benjamin Tissoires Date: Wed, 6 Dec 2023 11:46:01 +0100 Subject: selftests/hid: tablets: add variants of states with buttons Turns out that there are transitions that are unlikely to happen: for example, having both the tip switch and a button being changed at the same time (in the same report) would require either a very talented and precise user or a very bad hardware with a very low sampling rate. So instead of manually building the button test by hand and forgetting about some cases, let's reuse the state machine and transitions we have. This patch only adds the states and the valid transitions. The actual tests will be replaced later. Reviewed-by: Peter Hutterer Acked-by: Jiri Kosina Link: https://lore.kernel.org/r/20231206-wip-selftests-v2-10-c0350c2f5986@kernel.org Signed-off-by: Benjamin Tissoires --- tools/testing/selftests/hid/tests/test_tablet.py | 173 +++++++++++++++++++++-- 1 file changed, 160 insertions(+), 13 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/hid/tests/test_tablet.py b/tools/testing/selftests/hid/tests/test_tablet.py index cec65294c9ec..a77534f23c75 100644 --- a/tools/testing/selftests/hid/tests/test_tablet.py +++ b/tools/testing/selftests/hid/tests/test_tablet.py @@ -30,25 +30,72 @@ class ToolType(Enum): RUBBER = libevdev.EV_KEY.BTN_TOOL_RUBBER +class BtnPressed(Enum): + """Represents whether a button is pressed on the stylus""" + + PRIMARY_PRESSED = libevdev.EV_KEY.BTN_STYLUS + SECONDARY_PRESSED = libevdev.EV_KEY.BTN_STYLUS2 + + class PenState(Enum): """Pen states according to Microsoft reference: https://docs.microsoft.com/en-us/windows-hardware/design/component-guidelines/windows-pen-states - """ - PEN_IS_OUT_OF_RANGE = BtnTouch.UP, None - PEN_IS_IN_RANGE = BtnTouch.UP, ToolType.PEN - PEN_IS_IN_CONTACT = BtnTouch.DOWN, ToolType.PEN - PEN_IS_IN_RANGE_WITH_ERASING_INTENT = BtnTouch.UP, ToolType.RUBBER - PEN_IS_ERASING = BtnTouch.DOWN, ToolType.RUBBER + We extend it with the various buttons when we need to check them. + """ - def __init__(self, touch: BtnTouch, tool: Optional[ToolType]): + PEN_IS_OUT_OF_RANGE = BtnTouch.UP, None, None + PEN_IS_IN_RANGE = BtnTouch.UP, ToolType.PEN, None + PEN_IS_IN_RANGE_WITH_BUTTON = BtnTouch.UP, ToolType.PEN, BtnPressed.PRIMARY_PRESSED + PEN_IS_IN_RANGE_WITH_SECOND_BUTTON = ( + BtnTouch.UP, + ToolType.PEN, + BtnPressed.SECONDARY_PRESSED, + ) + PEN_IS_IN_CONTACT = BtnTouch.DOWN, ToolType.PEN, None + PEN_IS_IN_CONTACT_WITH_BUTTON = ( + BtnTouch.DOWN, + ToolType.PEN, + BtnPressed.PRIMARY_PRESSED, + ) + PEN_IS_IN_CONTACT_WITH_SECOND_BUTTON = ( + BtnTouch.DOWN, + ToolType.PEN, + BtnPressed.SECONDARY_PRESSED, + ) + PEN_IS_IN_RANGE_WITH_ERASING_INTENT = BtnTouch.UP, ToolType.RUBBER, None + PEN_IS_IN_RANGE_WITH_ERASING_INTENT_WITH_BUTTON = ( + BtnTouch.UP, + ToolType.RUBBER, + BtnPressed.PRIMARY_PRESSED, + ) + PEN_IS_IN_RANGE_WITH_ERASING_INTENT_WITH_SECOND_BUTTON = ( + BtnTouch.UP, + ToolType.RUBBER, + BtnPressed.SECONDARY_PRESSED, + ) + PEN_IS_ERASING = BtnTouch.DOWN, ToolType.RUBBER, None + PEN_IS_ERASING_WITH_BUTTON = ( + BtnTouch.DOWN, + ToolType.RUBBER, + BtnPressed.PRIMARY_PRESSED, + ) + PEN_IS_ERASING_WITH_SECOND_BUTTON = ( + BtnTouch.DOWN, + ToolType.RUBBER, + BtnPressed.SECONDARY_PRESSED, + ) + + def __init__(self, touch: BtnTouch, tool: Optional[ToolType], button: Optional[BtnPressed]): self.touch = touch self.tool = tool + self.button = button @classmethod def from_evdev(cls, evdev) -> "PenState": touch = BtnTouch(evdev.value[libevdev.EV_KEY.BTN_TOUCH]) tool = None + button = None if ( evdev.value[libevdev.EV_KEY.BTN_TOOL_RUBBER] and not evdev.value[libevdev.EV_KEY.BTN_TOOL_PEN] @@ -65,7 +112,17 @@ class PenState(Enum): ): raise ValueError("2 tools are not allowed") - return cls((touch, tool)) + # we take only the highest button in account + for b in [libevdev.EV_KEY.BTN_STYLUS, libevdev.EV_KEY.BTN_STYLUS2]: + if bool(evdev.value[b]): + button = b + + # the kernel tends to insert an EV_SYN once removing the tool, so + # the button will be released after + if tool is None: + button = None + + return cls((touch, tool, button)) def apply(self, events) -> "PenState": if libevdev.EV_SYN.SYN_REPORT in events: @@ -74,6 +131,8 @@ class PenState(Enum): touch_found = False tool = self.tool tool_found = False + button = self.button + button_found = False for ev in events: if ev == libevdev.InputEvent(libevdev.EV_KEY.BTN_TOUCH): @@ -88,12 +147,22 @@ class PenState(Enum): if tool_found: raise ValueError(f"duplicated BTN_TOOL_* in {events}") tool_found = True - if ev.value: - tool = ToolType(ev.code) - else: - tool = None + tool = ToolType(ev.code) if ev.value else None + elif ev in ( + libevdev.InputEvent(libevdev.EV_KEY.BTN_STYLUS), + libevdev.InputEvent(libevdev.EV_KEY.BTN_STYLUS2), + ): + if button_found: + raise ValueError(f"duplicated BTN_STYLUS* in {events}") + button_found = True + button = ev.code if ev.value else None - new_state = PenState((touch, tool)) + # the kernel tends to insert an EV_SYN once removing the tool, so + # the button will be released after + if tool is None: + button = None + + new_state = PenState((touch, tool, button)) assert ( new_state in self.valid_transitions() ), f"moving from {self} to {new_state} is forbidden" @@ -109,14 +178,20 @@ class PenState(Enum): return ( PenState.PEN_IS_OUT_OF_RANGE, PenState.PEN_IS_IN_RANGE, + PenState.PEN_IS_IN_RANGE_WITH_BUTTON, + PenState.PEN_IS_IN_RANGE_WITH_SECOND_BUTTON, PenState.PEN_IS_IN_RANGE_WITH_ERASING_INTENT, PenState.PEN_IS_IN_CONTACT, + PenState.PEN_IS_IN_CONTACT_WITH_BUTTON, + PenState.PEN_IS_IN_CONTACT_WITH_SECOND_BUTTON, PenState.PEN_IS_ERASING, ) if self == PenState.PEN_IS_IN_RANGE: return ( PenState.PEN_IS_IN_RANGE, + PenState.PEN_IS_IN_RANGE_WITH_BUTTON, + PenState.PEN_IS_IN_RANGE_WITH_SECOND_BUTTON, PenState.PEN_IS_OUT_OF_RANGE, PenState.PEN_IS_IN_CONTACT, ) @@ -124,6 +199,8 @@ class PenState(Enum): if self == PenState.PEN_IS_IN_CONTACT: return ( PenState.PEN_IS_IN_CONTACT, + PenState.PEN_IS_IN_CONTACT_WITH_BUTTON, + PenState.PEN_IS_IN_CONTACT_WITH_SECOND_BUTTON, PenState.PEN_IS_IN_RANGE, PenState.PEN_IS_OUT_OF_RANGE, ) @@ -142,6 +219,38 @@ class PenState(Enum): PenState.PEN_IS_OUT_OF_RANGE, ) + if self == PenState.PEN_IS_IN_RANGE_WITH_BUTTON: + return ( + PenState.PEN_IS_IN_RANGE_WITH_BUTTON, + PenState.PEN_IS_IN_RANGE, + PenState.PEN_IS_OUT_OF_RANGE, + PenState.PEN_IS_IN_CONTACT_WITH_BUTTON, + ) + + if self == PenState.PEN_IS_IN_CONTACT_WITH_BUTTON: + return ( + PenState.PEN_IS_IN_CONTACT_WITH_BUTTON, + PenState.PEN_IS_IN_CONTACT, + PenState.PEN_IS_IN_RANGE_WITH_BUTTON, + PenState.PEN_IS_OUT_OF_RANGE, + ) + + if self == PenState.PEN_IS_IN_RANGE_WITH_SECOND_BUTTON: + return ( + PenState.PEN_IS_IN_RANGE_WITH_SECOND_BUTTON, + PenState.PEN_IS_IN_RANGE, + PenState.PEN_IS_OUT_OF_RANGE, + PenState.PEN_IS_IN_CONTACT_WITH_SECOND_BUTTON, + ) + + if self == PenState.PEN_IS_IN_CONTACT_WITH_SECOND_BUTTON: + return ( + PenState.PEN_IS_IN_CONTACT_WITH_SECOND_BUTTON, + PenState.PEN_IS_IN_CONTACT, + PenState.PEN_IS_IN_RANGE_WITH_SECOND_BUTTON, + PenState.PEN_IS_OUT_OF_RANGE, + ) + return tuple() @staticmethod @@ -376,26 +485,64 @@ class PenDigitizer(base.UHIDTestDevice): pen.xtilt = 0 pen.ytilt = 0 pen.twist = 0 + pen.barrelswitch = False + pen.secondarybarrelswitch = False elif state == PenState.PEN_IS_IN_RANGE: pen.tipswitch = False pen.inrange = True pen.invert = False pen.eraser = False + pen.barrelswitch = False + pen.secondarybarrelswitch = False elif state == PenState.PEN_IS_IN_CONTACT: pen.tipswitch = True pen.inrange = True pen.invert = False pen.eraser = False + pen.barrelswitch = False + pen.secondarybarrelswitch = False + elif state == PenState.PEN_IS_IN_RANGE_WITH_BUTTON: + pen.tipswitch = False + pen.inrange = True + pen.invert = False + pen.eraser = False + pen.barrelswitch = True + pen.secondarybarrelswitch = False + elif state == PenState.PEN_IS_IN_CONTACT_WITH_BUTTON: + pen.tipswitch = True + pen.inrange = True + pen.invert = False + pen.eraser = False + pen.barrelswitch = True + pen.secondarybarrelswitch = False + elif state == PenState.PEN_IS_IN_RANGE_WITH_SECOND_BUTTON: + pen.tipswitch = False + pen.inrange = True + pen.invert = False + pen.eraser = False + pen.barrelswitch = False + pen.secondarybarrelswitch = True + elif state == PenState.PEN_IS_IN_CONTACT_WITH_SECOND_BUTTON: + pen.tipswitch = True + pen.inrange = True + pen.invert = False + pen.eraser = False + pen.barrelswitch = False + pen.secondarybarrelswitch = True elif state == PenState.PEN_IS_IN_RANGE_WITH_ERASING_INTENT: pen.tipswitch = False pen.inrange = True pen.invert = True pen.eraser = False + pen.barrelswitch = False + pen.secondarybarrelswitch = False elif state == PenState.PEN_IS_ERASING: pen.tipswitch = False pen.inrange = True pen.invert = False pen.eraser = True + pen.barrelswitch = False + pen.secondarybarrelswitch = False pen.current_state = state -- cgit v1.2.3 From 1f01537ef17efec97a8936c62e4ffa704cab7c06 Mon Sep 17 00:00:00 2001 From: Benjamin Tissoires Date: Wed, 6 Dec 2023 11:46:02 +0100 Subject: selftests/hid: tablets: convert the primary button tests We get more descriptive in what we are doing, and also get more information of what is actually being tested. Instead of having a non exhaustive button changes that are semi-randomly done, we can describe all the states we want to test. Reviewed-by: Peter Hutterer Acked-by: Jiri Kosina Link: https://lore.kernel.org/r/20231206-wip-selftests-v2-11-c0350c2f5986@kernel.org Signed-off-by: Benjamin Tissoires --- tools/testing/selftests/hid/tests/test_tablet.py | 160 +++++++++-------------- 1 file changed, 65 insertions(+), 95 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/hid/tests/test_tablet.py b/tools/testing/selftests/hid/tests/test_tablet.py index a77534f23c75..20a7a7fdcd9d 100644 --- a/tools/testing/selftests/hid/tests/test_tablet.py +++ b/tools/testing/selftests/hid/tests/test_tablet.py @@ -317,6 +317,55 @@ class PenState(Enum): ), } + @staticmethod + def legal_transitions_with_primary_button() -> Dict[str, Tuple["PenState", ...]]: + """We revisit the Windows Pen Implementation state machine: + we now have a primary button. + """ + return { + "hover-button": (PenState.PEN_IS_IN_RANGE_WITH_BUTTON,), + "hover-button -> out-of-range": ( + PenState.PEN_IS_IN_RANGE_WITH_BUTTON, + PenState.PEN_IS_OUT_OF_RANGE, + ), + "in-range -> button-press": ( + PenState.PEN_IS_IN_RANGE, + PenState.PEN_IS_IN_RANGE_WITH_BUTTON, + ), + "in-range -> button-press -> button-release": ( + PenState.PEN_IS_IN_RANGE, + PenState.PEN_IS_IN_RANGE_WITH_BUTTON, + PenState.PEN_IS_IN_RANGE, + ), + "in-range -> touch -> button-press -> button-release": ( + PenState.PEN_IS_IN_RANGE, + PenState.PEN_IS_IN_CONTACT, + PenState.PEN_IS_IN_CONTACT_WITH_BUTTON, + PenState.PEN_IS_IN_CONTACT, + ), + "in-range -> touch -> button-press -> release -> button-release": ( + PenState.PEN_IS_IN_RANGE, + PenState.PEN_IS_IN_CONTACT, + PenState.PEN_IS_IN_CONTACT_WITH_BUTTON, + PenState.PEN_IS_IN_RANGE_WITH_BUTTON, + PenState.PEN_IS_IN_RANGE, + ), + "in-range -> button-press -> touch -> release -> button-release": ( + PenState.PEN_IS_IN_RANGE, + PenState.PEN_IS_IN_RANGE_WITH_BUTTON, + PenState.PEN_IS_IN_CONTACT_WITH_BUTTON, + PenState.PEN_IS_IN_RANGE_WITH_BUTTON, + PenState.PEN_IS_IN_RANGE, + ), + "in-range -> button-press -> touch -> button-release -> release": ( + PenState.PEN_IS_IN_RANGE, + PenState.PEN_IS_IN_RANGE_WITH_BUTTON, + PenState.PEN_IS_IN_CONTACT_WITH_BUTTON, + PenState.PEN_IS_IN_CONTACT, + PenState.PEN_IS_IN_RANGE, + ), + } + @staticmethod def tolerated_transitions() -> Dict[str, Tuple["PenState", ...]]: """This is not adhering to the Windows Pen Implementation state machine @@ -671,6 +720,22 @@ class BaseTest: reasons.""" self._test_states(state_list, scribble) + @pytest.mark.skip_if_uhdev( + lambda uhdev: "Barrel Switch" not in uhdev.fields, + "Device not compatible, missing Barrel Switch usage", + ) + @pytest.mark.parametrize("scribble", [True, False], ids=["scribble", "static"]) + @pytest.mark.parametrize( + "state_list", + [ + pytest.param(v, id=k) + for k, v in PenState.legal_transitions_with_primary_button().items() + ], + ) + def test_valid_primary_button_pen_states(self, state_list, scribble): + """Rework the transition state machine by adding the primary button.""" + self._test_states(state_list, scribble) + @pytest.mark.skip_if_uhdev( lambda uhdev: "Invert" not in uhdev.fields, "Device not compatible, missing Invert usage", @@ -728,101 +793,6 @@ class BaseTest: state machine.""" self._test_states(state_list, scribble) - @pytest.mark.skip_if_uhdev( - lambda uhdev: "Barrel Switch" not in uhdev.fields, - "Device not compatible, missing Barrel Switch usage", - ) - def test_primary_button(self): - """Primary button (stylus) pressed, reports as pressed even while hovering. - Actual reporting from the device: hid=TIPSWITCH,BARRELSWITCH,INRANGE (code=TOUCH,STYLUS,PEN): - { 0, 0, 1 } <- hover - { 0, 1, 1 } <- primary button pressed - { 0, 1, 1 } <- liftoff - { 0, 0, 0 } <- leaves - """ - - uhdev = self.uhdev - evdev = uhdev.get_evdev() - - p = Pen(50, 60) - p.inrange = True - events = self.post(uhdev, p) - assert libevdev.InputEvent(libevdev.EV_KEY.BTN_TOOL_PEN, 1) in events - assert evdev.value[libevdev.EV_ABS.ABS_X] == 50 - assert evdev.value[libevdev.EV_ABS.ABS_Y] == 60 - assert not evdev.value[libevdev.EV_KEY.BTN_STYLUS] - - p.barrelswitch = True - events = self.post(uhdev, p) - assert libevdev.InputEvent(libevdev.EV_KEY.BTN_STYLUS, 1) in events - - p.x += 1 - p.y -= 1 - events = self.post(uhdev, p) - assert len(events) == 3 # X, Y, SYN - assert libevdev.InputEvent(libevdev.EV_ABS.ABS_X, 51) in events - assert libevdev.InputEvent(libevdev.EV_ABS.ABS_Y, 59) in events - - p.barrelswitch = False - events = self.post(uhdev, p) - assert libevdev.InputEvent(libevdev.EV_KEY.BTN_STYLUS, 0) in events - - p.inrange = False - events = self.post(uhdev, p) - assert libevdev.InputEvent(libevdev.EV_KEY.BTN_TOOL_PEN, 0) in events - - @pytest.mark.skip_if_uhdev( - lambda uhdev: "Barrel Switch" not in uhdev.fields, - "Device not compatible, missing Barrel Switch usage", - ) - def test_contact_primary_button(self): - """Primary button (stylus) pressed, reports as pressed even while hovering. - Actual reporting from the device: hid=TIPSWITCH,BARRELSWITCH,INRANGE (code=TOUCH,STYLUS,PEN): - { 0, 0, 1 } <- hover - { 0, 1, 1 } <- primary button pressed - { 1, 1, 1 } <- touch-down - { 1, 1, 1 } <- still touch, scribble on the screen - { 0, 1, 1 } <- liftoff - { 0, 0, 0 } <- leaves - """ - - uhdev = self.uhdev - evdev = uhdev.get_evdev() - - p = Pen(50, 60) - p.inrange = True - events = self.post(uhdev, p) - assert libevdev.InputEvent(libevdev.EV_KEY.BTN_TOOL_PEN, 1) in events - assert evdev.value[libevdev.EV_ABS.ABS_X] == 50 - assert evdev.value[libevdev.EV_ABS.ABS_Y] == 60 - assert not evdev.value[libevdev.EV_KEY.BTN_STYLUS] - - p.barrelswitch = True - events = self.post(uhdev, p) - assert libevdev.InputEvent(libevdev.EV_KEY.BTN_STYLUS, 1) in events - - p.tipswitch = True - events = self.post(uhdev, p) - assert libevdev.InputEvent(libevdev.EV_KEY.BTN_TOUCH, 1) in events - assert evdev.value[libevdev.EV_KEY.BTN_STYLUS] - - p.x += 1 - p.y -= 1 - events = self.post(uhdev, p) - assert len(events) == 3 # X, Y, SYN - assert libevdev.InputEvent(libevdev.EV_ABS.ABS_X, 51) in events - assert libevdev.InputEvent(libevdev.EV_ABS.ABS_Y, 59) in events - - p.tipswitch = False - events = self.post(uhdev, p) - assert libevdev.InputEvent(libevdev.EV_KEY.BTN_TOUCH, 0) in events - - p.barrelswitch = False - p.inrange = False - events = self.post(uhdev, p) - assert libevdev.InputEvent(libevdev.EV_KEY.BTN_TOOL_PEN, 0) in events - assert libevdev.InputEvent(libevdev.EV_KEY.BTN_STYLUS, 0) in events - class GXTP_pen(PenDigitizer): def event(self, pen): -- cgit v1.2.3 From 76df1f72fb258cc7da6eff5dd8db95f4e2856517 Mon Sep 17 00:00:00 2001 From: Benjamin Tissoires Date: Wed, 6 Dec 2023 11:46:03 +0100 Subject: selftests/hid: tablets: add a secondary barrel switch test Some tablets report 2 barrel switches. We better test those too. Use the same transistions description from the primary button tests. Reviewed-by: Peter Hutterer Acked-by: Jiri Kosina Link: https://lore.kernel.org/r/20231206-wip-selftests-v2-12-c0350c2f5986@kernel.org Signed-off-by: Benjamin Tissoires --- tools/testing/selftests/hid/tests/test_tablet.py | 67 ++++++++++++++++++++++++ 1 file changed, 67 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/hid/tests/test_tablet.py b/tools/testing/selftests/hid/tests/test_tablet.py index 20a7a7fdcd9d..a82db66264c5 100644 --- a/tools/testing/selftests/hid/tests/test_tablet.py +++ b/tools/testing/selftests/hid/tests/test_tablet.py @@ -366,6 +366,56 @@ class PenState(Enum): ), } + @staticmethod + def legal_transitions_with_secondary_button() -> Dict[str, Tuple["PenState", ...]]: + """We revisit the Windows Pen Implementation state machine: + we now have a secondary button. + Note: we don't looks for 2 buttons interactions. + """ + return { + "hover-button": (PenState.PEN_IS_IN_RANGE_WITH_SECOND_BUTTON,), + "hover-button -> out-of-range": ( + PenState.PEN_IS_IN_RANGE_WITH_SECOND_BUTTON, + PenState.PEN_IS_OUT_OF_RANGE, + ), + "in-range -> button-press": ( + PenState.PEN_IS_IN_RANGE, + PenState.PEN_IS_IN_RANGE_WITH_SECOND_BUTTON, + ), + "in-range -> button-press -> button-release": ( + PenState.PEN_IS_IN_RANGE, + PenState.PEN_IS_IN_RANGE_WITH_SECOND_BUTTON, + PenState.PEN_IS_IN_RANGE, + ), + "in-range -> touch -> button-press -> button-release": ( + PenState.PEN_IS_IN_RANGE, + PenState.PEN_IS_IN_CONTACT, + PenState.PEN_IS_IN_CONTACT_WITH_SECOND_BUTTON, + PenState.PEN_IS_IN_CONTACT, + ), + "in-range -> touch -> button-press -> release -> button-release": ( + PenState.PEN_IS_IN_RANGE, + PenState.PEN_IS_IN_CONTACT, + PenState.PEN_IS_IN_CONTACT_WITH_SECOND_BUTTON, + PenState.PEN_IS_IN_RANGE_WITH_SECOND_BUTTON, + PenState.PEN_IS_IN_RANGE, + ), + "in-range -> button-press -> touch -> release -> button-release": ( + PenState.PEN_IS_IN_RANGE, + PenState.PEN_IS_IN_RANGE_WITH_SECOND_BUTTON, + PenState.PEN_IS_IN_CONTACT_WITH_SECOND_BUTTON, + PenState.PEN_IS_IN_RANGE_WITH_SECOND_BUTTON, + PenState.PEN_IS_IN_RANGE, + ), + "in-range -> button-press -> touch -> button-release -> release": ( + PenState.PEN_IS_IN_RANGE, + PenState.PEN_IS_IN_RANGE_WITH_SECOND_BUTTON, + PenState.PEN_IS_IN_CONTACT_WITH_SECOND_BUTTON, + PenState.PEN_IS_IN_CONTACT, + PenState.PEN_IS_IN_RANGE, + ), + } + @staticmethod def tolerated_transitions() -> Dict[str, Tuple["PenState", ...]]: """This is not adhering to the Windows Pen Implementation state machine @@ -444,6 +494,7 @@ class Pen(object): self.width = 10 self.height = 10 self.barrelswitch = False + self.secondarybarrelswitch = False self.invert = False self.eraser = False self.xtilt = 1 @@ -736,6 +787,22 @@ class BaseTest: """Rework the transition state machine by adding the primary button.""" self._test_states(state_list, scribble) + @pytest.mark.skip_if_uhdev( + lambda uhdev: "Secondary Barrel Switch" not in uhdev.fields, + "Device not compatible, missing Secondary Barrel Switch usage", + ) + @pytest.mark.parametrize("scribble", [True, False], ids=["scribble", "static"]) + @pytest.mark.parametrize( + "state_list", + [ + pytest.param(v, id=k) + for k, v in PenState.legal_transitions_with_secondary_button().items() + ], + ) + def test_valid_secondary_button_pen_states(self, state_list, scribble): + """Rework the transition state machine by adding the secondary button.""" + self._test_states(state_list, scribble) + @pytest.mark.skip_if_uhdev( lambda uhdev: "Invert" not in uhdev.fields, "Device not compatible, missing Invert usage", -- cgit v1.2.3 From ab9b82909e9baa5b559d6457487525cfd4df2738 Mon Sep 17 00:00:00 2001 From: Benjamin Tissoires Date: Wed, 6 Dec 2023 11:46:04 +0100 Subject: selftests/hid: tablets: be stricter for some transitions To accommodate for legacy devices, we rely on the last state of a transition to be valid: for example when we test PEN_IS_OUT_OF_RANGE to PEN_IS_IN_CONTACT, any "normal" device that reports an InRange bit would insert a PEN_IS_IN_RANGE state between the 2. This is of course valid, but this solution prevents to detect false releases emitted by some firmware: when pressing an "eraser mode" button, they might send an extra PEN_IS_OUT_OF_RANGE that we may want to filter. So define 2 sets of transitions: one that is the ideal behavior, and one that is OK, it won't break user space, but we have serious doubts if we are doing the right thing. And depending on the test, either ask only for valid transitions, or tolerate weird ones. Reviewed-by: Peter Hutterer Acked-by: Jiri Kosina Link: https://lore.kernel.org/r/20231206-wip-selftests-v2-13-c0350c2f5986@kernel.org Signed-off-by: Benjamin Tissoires --- tools/testing/selftests/hid/tests/test_tablet.py | 132 +++++++++++++++++++---- 1 file changed, 113 insertions(+), 19 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/hid/tests/test_tablet.py b/tools/testing/selftests/hid/tests/test_tablet.py index a82db66264c5..9374bd7524ef 100644 --- a/tools/testing/selftests/hid/tests/test_tablet.py +++ b/tools/testing/selftests/hid/tests/test_tablet.py @@ -13,7 +13,7 @@ from hidtools.util import BusType import libevdev import logging import pytest -from typing import Dict, Optional, Tuple +from typing import Dict, List, Optional, Tuple logger = logging.getLogger("hidtools.test.tablet") @@ -124,7 +124,7 @@ class PenState(Enum): return cls((touch, tool, button)) - def apply(self, events) -> "PenState": + def apply(self, events: List[libevdev.InputEvent], strict: bool) -> "PenState": if libevdev.EV_SYN.SYN_REPORT in events: raise ValueError("EV_SYN is in the event sequence") touch = self.touch @@ -163,13 +163,97 @@ class PenState(Enum): button = None new_state = PenState((touch, tool, button)) - assert ( - new_state in self.valid_transitions() - ), f"moving from {self} to {new_state} is forbidden" + if strict: + assert ( + new_state in self.valid_transitions() + ), f"moving from {self} to {new_state} is forbidden" + else: + assert ( + new_state in self.historically_tolerated_transitions() + ), f"moving from {self} to {new_state} is forbidden" return new_state def valid_transitions(self) -> Tuple["PenState", ...]: + """Following the state machine in the URL above. + + Note that those transitions are from the evdev point of view, not HID""" + if self == PenState.PEN_IS_OUT_OF_RANGE: + return ( + PenState.PEN_IS_OUT_OF_RANGE, + PenState.PEN_IS_IN_RANGE, + PenState.PEN_IS_IN_RANGE_WITH_BUTTON, + PenState.PEN_IS_IN_RANGE_WITH_SECOND_BUTTON, + PenState.PEN_IS_IN_RANGE_WITH_ERASING_INTENT, + PenState.PEN_IS_IN_CONTACT, + PenState.PEN_IS_IN_CONTACT_WITH_BUTTON, + PenState.PEN_IS_IN_CONTACT_WITH_SECOND_BUTTON, + PenState.PEN_IS_ERASING, + ) + + if self == PenState.PEN_IS_IN_RANGE: + return ( + PenState.PEN_IS_IN_RANGE, + PenState.PEN_IS_IN_RANGE_WITH_BUTTON, + PenState.PEN_IS_IN_RANGE_WITH_SECOND_BUTTON, + PenState.PEN_IS_OUT_OF_RANGE, + PenState.PEN_IS_IN_CONTACT, + ) + + if self == PenState.PEN_IS_IN_CONTACT: + return ( + PenState.PEN_IS_IN_CONTACT, + PenState.PEN_IS_IN_CONTACT_WITH_BUTTON, + PenState.PEN_IS_IN_CONTACT_WITH_SECOND_BUTTON, + PenState.PEN_IS_IN_RANGE, + ) + + if self == PenState.PEN_IS_IN_RANGE_WITH_ERASING_INTENT: + return ( + PenState.PEN_IS_IN_RANGE_WITH_ERASING_INTENT, + PenState.PEN_IS_OUT_OF_RANGE, + PenState.PEN_IS_ERASING, + ) + + if self == PenState.PEN_IS_ERASING: + return ( + PenState.PEN_IS_ERASING, + PenState.PEN_IS_IN_RANGE_WITH_ERASING_INTENT, + ) + + if self == PenState.PEN_IS_IN_RANGE_WITH_BUTTON: + return ( + PenState.PEN_IS_IN_RANGE_WITH_BUTTON, + PenState.PEN_IS_IN_RANGE, + PenState.PEN_IS_OUT_OF_RANGE, + PenState.PEN_IS_IN_CONTACT_WITH_BUTTON, + ) + + if self == PenState.PEN_IS_IN_CONTACT_WITH_BUTTON: + return ( + PenState.PEN_IS_IN_CONTACT_WITH_BUTTON, + PenState.PEN_IS_IN_CONTACT, + PenState.PEN_IS_IN_RANGE_WITH_BUTTON, + ) + + if self == PenState.PEN_IS_IN_RANGE_WITH_SECOND_BUTTON: + return ( + PenState.PEN_IS_IN_RANGE_WITH_SECOND_BUTTON, + PenState.PEN_IS_IN_RANGE, + PenState.PEN_IS_OUT_OF_RANGE, + PenState.PEN_IS_IN_CONTACT_WITH_SECOND_BUTTON, + ) + + if self == PenState.PEN_IS_IN_CONTACT_WITH_SECOND_BUTTON: + return ( + PenState.PEN_IS_IN_CONTACT_WITH_SECOND_BUTTON, + PenState.PEN_IS_IN_CONTACT, + PenState.PEN_IS_IN_RANGE_WITH_SECOND_BUTTON, + ) + + return tuple() + + def historically_tolerated_transitions(self) -> Tuple["PenState", ...]: """Following the state machine in the URL above, with a couple of addition for skipping the in-range state, due to historical reasons. @@ -693,10 +777,14 @@ class BaseTest: self.debug_reports(r, uhdev, events) return events - def validate_transitions(self, from_state, pen, evdev, events): + def validate_transitions( + self, from_state, pen, evdev, events, allow_intermediate_states + ): # check that the final state is correct pen.assert_expected_input_events(evdev) + state = from_state + # check that the transitions are valid sync_events = [] while libevdev.InputEvent(libevdev.EV_SYN.SYN_REPORT) in events: @@ -706,12 +794,12 @@ class BaseTest: events = events[idx + 1 :] # now check for a valid transition - from_state = from_state.apply(sync_events) + state = state.apply(sync_events, not allow_intermediate_states) if events: - from_state = from_state.apply(sync_events) + state = state.apply(sync_events, not allow_intermediate_states) - def _test_states(self, state_list, scribble): + def _test_states(self, state_list, scribble, allow_intermediate_states): """Internal method to test against a list of transition between states. state_list is a list of PenState objects @@ -726,7 +814,9 @@ class BaseTest: p = Pen(50, 60) uhdev.move_to(p, PenState.PEN_IS_OUT_OF_RANGE) events = self.post(uhdev, p) - self.validate_transitions(cur_state, p, evdev, events) + self.validate_transitions( + cur_state, p, evdev, events, allow_intermediate_states + ) cur_state = p.current_state @@ -735,14 +825,18 @@ class BaseTest: p.x += 1 p.y -= 1 events = self.post(uhdev, p) - self.validate_transitions(cur_state, p, evdev, events) + self.validate_transitions( + cur_state, p, evdev, events, allow_intermediate_states + ) assert len(events) >= 3 # X, Y, SYN uhdev.move_to(p, state) if scribble and state != PenState.PEN_IS_OUT_OF_RANGE: p.x += 1 p.y -= 1 events = self.post(uhdev, p) - self.validate_transitions(cur_state, p, evdev, events) + self.validate_transitions( + cur_state, p, evdev, events, allow_intermediate_states + ) cur_state = p.current_state @pytest.mark.parametrize("scribble", [True, False], ids=["scribble", "static"]) @@ -755,7 +849,7 @@ class BaseTest: we don't have Invert nor Erase bits, so just move in/out-of-range or proximity. https://docs.microsoft.com/en-us/windows-hardware/design/component-guidelines/windows-pen-states """ - self._test_states(state_list, scribble) + self._test_states(state_list, scribble, allow_intermediate_states=False) @pytest.mark.parametrize("scribble", [True, False], ids=["scribble", "static"]) @pytest.mark.parametrize( @@ -769,7 +863,7 @@ class BaseTest: """This is not adhering to the Windows Pen Implementation state machine but we should expect the kernel to behave properly, mostly for historical reasons.""" - self._test_states(state_list, scribble) + self._test_states(state_list, scribble, allow_intermediate_states=True) @pytest.mark.skip_if_uhdev( lambda uhdev: "Barrel Switch" not in uhdev.fields, @@ -785,7 +879,7 @@ class BaseTest: ) def test_valid_primary_button_pen_states(self, state_list, scribble): """Rework the transition state machine by adding the primary button.""" - self._test_states(state_list, scribble) + self._test_states(state_list, scribble, allow_intermediate_states=False) @pytest.mark.skip_if_uhdev( lambda uhdev: "Secondary Barrel Switch" not in uhdev.fields, @@ -801,7 +895,7 @@ class BaseTest: ) def test_valid_secondary_button_pen_states(self, state_list, scribble): """Rework the transition state machine by adding the secondary button.""" - self._test_states(state_list, scribble) + self._test_states(state_list, scribble, allow_intermediate_states=False) @pytest.mark.skip_if_uhdev( lambda uhdev: "Invert" not in uhdev.fields, @@ -821,7 +915,7 @@ class BaseTest: to erase. https://docs.microsoft.com/en-us/windows-hardware/design/component-guidelines/windows-pen-states """ - self._test_states(state_list, scribble) + self._test_states(state_list, scribble, allow_intermediate_states=False) @pytest.mark.skip_if_uhdev( lambda uhdev: "Invert" not in uhdev.fields, @@ -841,7 +935,7 @@ class BaseTest: to erase. https://docs.microsoft.com/en-us/windows-hardware/design/component-guidelines/windows-pen-states """ - self._test_states(state_list, scribble) + self._test_states(state_list, scribble, allow_intermediate_states=True) @pytest.mark.skip_if_uhdev( lambda uhdev: "Invert" not in uhdev.fields, @@ -858,7 +952,7 @@ class BaseTest: For example, a pen that has the eraser button might wobble between touching and erasing if the tablet doesn't enforce the Windows state machine.""" - self._test_states(state_list, scribble) + self._test_states(state_list, scribble, allow_intermediate_states=True) class GXTP_pen(PenDigitizer): -- cgit v1.2.3 From ed5bc56cedca19ac429533aa527bce5287ab0a5a Mon Sep 17 00:00:00 2001 From: Benjamin Tissoires Date: Wed, 6 Dec 2023 11:46:05 +0100 Subject: selftests/hid: fix mypy complains No code change, only typing information added/ignored Acked-by: Jiri Kosina Link: https://lore.kernel.org/r/20231206-wip-selftests-v2-14-c0350c2f5986@kernel.org Signed-off-by: Benjamin Tissoires --- tools/testing/selftests/hid/tests/base.py | 4 ++-- tools/testing/selftests/hid/tests/test_tablet.py | 10 +++++----- 2 files changed, 7 insertions(+), 7 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/hid/tests/base.py b/tools/testing/selftests/hid/tests/base.py index 5d9c26dfc460..51433063b227 100644 --- a/tools/testing/selftests/hid/tests/base.py +++ b/tools/testing/selftests/hid/tests/base.py @@ -14,7 +14,7 @@ import logging from hidtools.device.base_device import BaseDevice, EvdevMatch, SysfsFile from pathlib import Path -from typing import Final +from typing import Final, List, Tuple logger = logging.getLogger("hidtools.test.base") @@ -155,7 +155,7 @@ class BaseTestCase: # if any module is not available (not compiled), the test will skip. # Each element is a tuple '(kernel driver name, kernel module)', # for example ("playstation", "hid-playstation") - kernel_modules = [] + kernel_modules: List[Tuple[str, str]] = [] def assertInputEventsIn(self, expected_events, effective_events): effective_events = effective_events.copy() diff --git a/tools/testing/selftests/hid/tests/test_tablet.py b/tools/testing/selftests/hid/tests/test_tablet.py index 9374bd7524ef..dc8b0fe9e7f3 100644 --- a/tools/testing/selftests/hid/tests/test_tablet.py +++ b/tools/testing/selftests/hid/tests/test_tablet.py @@ -87,9 +87,9 @@ class PenState(Enum): ) def __init__(self, touch: BtnTouch, tool: Optional[ToolType], button: Optional[BtnPressed]): - self.touch = touch - self.tool = tool - self.button = button + self.touch = touch # type: ignore + self.tool = tool # type: ignore + self.button = button # type: ignore @classmethod def from_evdev(cls, evdev) -> "PenState": @@ -122,7 +122,7 @@ class PenState(Enum): if tool is None: button = None - return cls((touch, tool, button)) + return cls((touch, tool, button)) # type: ignore def apply(self, events: List[libevdev.InputEvent], strict: bool) -> "PenState": if libevdev.EV_SYN.SYN_REPORT in events: @@ -162,7 +162,7 @@ class PenState(Enum): if tool is None: button = None - new_state = PenState((touch, tool, button)) + new_state = PenState((touch, tool, button)) # type: ignore if strict: assert ( new_state in self.valid_transitions() -- cgit v1.2.3 From f556aa957df8cb3e98af0f54bf1fa65f59ae47a3 Mon Sep 17 00:00:00 2001 From: Benjamin Tissoires Date: Wed, 6 Dec 2023 11:46:06 +0100 Subject: selftests/hid: fix ruff linter complains rename ambiguous variables l, r, and m, and ignore the return values of uhdev.get_evdev() and uhdev.get_slot() Acked-by: Jiri Kosina Link: https://lore.kernel.org/r/20231206-wip-selftests-v2-15-c0350c2f5986@kernel.org Signed-off-by: Benjamin Tissoires --- tools/testing/selftests/hid/tests/test_mouse.py | 14 +++++++------- tools/testing/selftests/hid/tests/test_wacom_generic.py | 6 +++--- 2 files changed, 10 insertions(+), 10 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/hid/tests/test_mouse.py b/tools/testing/selftests/hid/tests/test_mouse.py index fd2ba62e783a..66daf7e5975c 100644 --- a/tools/testing/selftests/hid/tests/test_mouse.py +++ b/tools/testing/selftests/hid/tests/test_mouse.py @@ -52,13 +52,13 @@ class BaseMouse(base.UHIDTestDevice): :param reportID: the numeric report ID for this report, if needed """ if buttons is not None: - l, r, m = buttons - if l is not None: - self.left = l - if r is not None: - self.right = r - if m is not None: - self.middle = m + left, right, middle = buttons + if left is not None: + self.left = left + if right is not None: + self.right = right + if middle is not None: + self.middle = middle left = self.left right = self.right middle = self.middle diff --git a/tools/testing/selftests/hid/tests/test_wacom_generic.py b/tools/testing/selftests/hid/tests/test_wacom_generic.py index f92fe8e02c1b..49186a27467e 100644 --- a/tools/testing/selftests/hid/tests/test_wacom_generic.py +++ b/tools/testing/selftests/hid/tests/test_wacom_generic.py @@ -909,7 +909,7 @@ class TestDTH2452Tablet(test_multitouch.BaseTest.TestMultitouch, TouchTabletTest Ensure that the confidence bit being set to false should not result in a touch event. """ uhdev = self.uhdev - evdev = uhdev.get_evdev() + _evdev = uhdev.get_evdev() t0 = test_multitouch.Touch(1, 50, 100) t0.confidence = False @@ -917,6 +917,6 @@ class TestDTH2452Tablet(test_multitouch.BaseTest.TestMultitouch, TouchTabletTest events = uhdev.next_sync_events() self.debug_reports(r, uhdev, events) - slot = self.get_slot(uhdev, t0, 0) + _slot = self.get_slot(uhdev, t0, 0) - assert not events \ No newline at end of file + assert not events -- cgit v1.2.3 From da2c1b861065b452590d75a1e2f5ee9b396fef92 Mon Sep 17 00:00:00 2001 From: Benjamin Tissoires Date: Thu, 7 Dec 2023 13:22:39 +0100 Subject: selftests/hid: fix failing tablet button tests An overlook from commit 74452d6329be ("selftests/hid: tablets: add variants of states with buttons"), where I don't use the Enum... Fixes: 74452d6329be ("selftests/hid: tablets: add variants of states with buttons") Acked-by: Jiri Kosina Link: https://lore.kernel.org/r/20231207-b4-wip-selftests-v1-1-c4e13fe04a70@kernel.org Signed-off-by: Benjamin Tissoires --- tools/testing/selftests/hid/tests/test_tablet.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/hid/tests/test_tablet.py b/tools/testing/selftests/hid/tests/test_tablet.py index dc8b0fe9e7f3..903f19f7cbe9 100644 --- a/tools/testing/selftests/hid/tests/test_tablet.py +++ b/tools/testing/selftests/hid/tests/test_tablet.py @@ -115,7 +115,7 @@ class PenState(Enum): # we take only the highest button in account for b in [libevdev.EV_KEY.BTN_STYLUS, libevdev.EV_KEY.BTN_STYLUS2]: if bool(evdev.value[b]): - button = b + button = BtnPressed(b) # the kernel tends to insert an EV_SYN once removing the tool, so # the button will be released after @@ -155,7 +155,7 @@ class PenState(Enum): if button_found: raise ValueError(f"duplicated BTN_STYLUS* in {events}") button_found = True - button = ev.code if ev.value else None + button = BtnPressed(ev.code) if ev.value else None # the kernel tends to insert an EV_SYN once removing the tool, so # the button will be released after -- cgit v1.2.3 From 6dac1195181cb561a1ac32b92f58c92e87a91c70 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Tue, 5 Dec 2023 11:36:23 +0100 Subject: KVM: selftests: Make Hyper-V tests explicitly require KVM Hyper-V support In preparation for conditional Hyper-V emulation enablement in KVM, make Hyper-V specific tests skip gracefully instead of failing when KVM support for emulating Hyper-V is not there. Reviewed-by: Maxim Levitsky Tested-by: Jeremi Piotrowski Signed-off-by: Vitaly Kuznetsov Link: https://lore.kernel.org/r/20231205103630.1391318-10-vkuznets@redhat.com Signed-off-by: Sean Christopherson --- tools/testing/selftests/kvm/x86_64/hyperv_clock.c | 2 ++ tools/testing/selftests/kvm/x86_64/hyperv_evmcs.c | 5 +++-- tools/testing/selftests/kvm/x86_64/hyperv_extended_hypercalls.c | 2 ++ tools/testing/selftests/kvm/x86_64/hyperv_features.c | 2 ++ tools/testing/selftests/kvm/x86_64/hyperv_ipi.c | 2 ++ tools/testing/selftests/kvm/x86_64/hyperv_svm_test.c | 1 + tools/testing/selftests/kvm/x86_64/hyperv_tlb_flush.c | 2 ++ 7 files changed, 14 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_clock.c b/tools/testing/selftests/kvm/x86_64/hyperv_clock.c index f25749eaa6a8..f5e1e98f04f9 100644 --- a/tools/testing/selftests/kvm/x86_64/hyperv_clock.c +++ b/tools/testing/selftests/kvm/x86_64/hyperv_clock.c @@ -211,6 +211,8 @@ int main(void) vm_vaddr_t tsc_page_gva; int stage; + TEST_REQUIRE(kvm_has_cap(KVM_CAP_HYPERV_TIME)); + vm = vm_create_with_one_vcpu(&vcpu, guest_main); vcpu_set_hv_cpuid(vcpu); diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_evmcs.c b/tools/testing/selftests/kvm/x86_64/hyperv_evmcs.c index 7bde0c4dfdbd..4c7257ecd2a6 100644 --- a/tools/testing/selftests/kvm/x86_64/hyperv_evmcs.c +++ b/tools/testing/selftests/kvm/x86_64/hyperv_evmcs.c @@ -240,11 +240,12 @@ int main(int argc, char *argv[]) struct ucall uc; int stage; - vm = vm_create_with_one_vcpu(&vcpu, guest_code); - TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX)); TEST_REQUIRE(kvm_has_cap(KVM_CAP_NESTED_STATE)); TEST_REQUIRE(kvm_has_cap(KVM_CAP_HYPERV_ENLIGHTENED_VMCS)); + TEST_REQUIRE(kvm_has_cap(KVM_CAP_HYPERV_DIRECT_TLBFLUSH)); + + vm = vm_create_with_one_vcpu(&vcpu, guest_code); hcall_page = vm_vaddr_alloc_pages(vm, 1); memset(addr_gva2hva(vm, hcall_page), 0x0, getpagesize()); diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_extended_hypercalls.c b/tools/testing/selftests/kvm/x86_64/hyperv_extended_hypercalls.c index e036db1f32b9..949e08e98f31 100644 --- a/tools/testing/selftests/kvm/x86_64/hyperv_extended_hypercalls.c +++ b/tools/testing/selftests/kvm/x86_64/hyperv_extended_hypercalls.c @@ -43,6 +43,8 @@ int main(void) uint64_t *outval; struct ucall uc; + TEST_REQUIRE(kvm_has_cap(KVM_CAP_HYPERV_CPUID)); + /* Verify if extended hypercalls are supported */ if (!kvm_cpuid_has(kvm_get_supported_hv_cpuid(), HV_ENABLE_EXTENDED_HYPERCALLS)) { diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_features.c b/tools/testing/selftests/kvm/x86_64/hyperv_features.c index 9f28aa276c4e..387c605a3077 100644 --- a/tools/testing/selftests/kvm/x86_64/hyperv_features.c +++ b/tools/testing/selftests/kvm/x86_64/hyperv_features.c @@ -690,6 +690,8 @@ static void guest_test_hcalls_access(void) int main(void) { + TEST_REQUIRE(kvm_has_cap(KVM_CAP_HYPERV_ENFORCE_CPUID)); + pr_info("Testing access to Hyper-V specific MSRs\n"); guest_test_msrs_access(); diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_ipi.c b/tools/testing/selftests/kvm/x86_64/hyperv_ipi.c index 6feb5ddb031d..65e5f4c05068 100644 --- a/tools/testing/selftests/kvm/x86_64/hyperv_ipi.c +++ b/tools/testing/selftests/kvm/x86_64/hyperv_ipi.c @@ -248,6 +248,8 @@ int main(int argc, char *argv[]) int stage = 1, r; struct ucall uc; + TEST_REQUIRE(kvm_has_cap(KVM_CAP_HYPERV_SEND_IPI)); + vm = vm_create_with_one_vcpu(&vcpu[0], sender_guest_code); /* Hypercall input/output */ diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_svm_test.c b/tools/testing/selftests/kvm/x86_64/hyperv_svm_test.c index 6c1278562090..c9b18707edc0 100644 --- a/tools/testing/selftests/kvm/x86_64/hyperv_svm_test.c +++ b/tools/testing/selftests/kvm/x86_64/hyperv_svm_test.c @@ -158,6 +158,7 @@ int main(int argc, char *argv[]) int stage; TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_SVM)); + TEST_REQUIRE(kvm_has_cap(KVM_CAP_HYPERV_DIRECT_TLBFLUSH)); /* Create VM */ vm = vm_create_with_one_vcpu(&vcpu, guest_code); diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_tlb_flush.c b/tools/testing/selftests/kvm/x86_64/hyperv_tlb_flush.c index 4758b6ef5618..c4443f71f8dd 100644 --- a/tools/testing/selftests/kvm/x86_64/hyperv_tlb_flush.c +++ b/tools/testing/selftests/kvm/x86_64/hyperv_tlb_flush.c @@ -590,6 +590,8 @@ int main(int argc, char *argv[]) struct ucall uc; int stage = 1, r, i; + TEST_REQUIRE(kvm_has_cap(KVM_CAP_HYPERV_TLBFLUSH)); + vm = vm_create_with_one_vcpu(&vcpu[0], sender_guest_code); /* Test data page */ -- cgit v1.2.3 From 225b7c1117b2f6dadbdb1d40538d37b9685e8b18 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Tue, 5 Dec 2023 11:36:24 +0100 Subject: KVM: selftests: Fix vmxon_pa == vmcs12_pa == -1ull nVMX testcase for !eVMCS The "vmxon_pa == vmcs12_pa == -1ull" test happens to work by accident: as Enlightened VMCS is always supported, set_default_vmx_state() adds 'KVM_STATE_NESTED_EVMCS' to 'flags' and the following branch of vmx_set_nested_state() is executed: if ((kvm_state->flags & KVM_STATE_NESTED_EVMCS) && (!guest_can_use(vcpu, X86_FEATURE_VMX) || !vmx->nested.enlightened_vmcs_enabled)) return -EINVAL; as 'enlightened_vmcs_enabled' is false. In fact, "vmxon_pa == vmcs12_pa == -1ull" is a valid state when not tainted by wrong flags so the test should aim for this branch: if (kvm_state->hdr.vmx.vmxon_pa == INVALID_GPA) return 0; Test all this properly: - Without KVM_STATE_NESTED_EVMCS in the flags, the expected return value is '0'. - With KVM_STATE_NESTED_EVMCS flag (when supported) set, the expected return value is '-EINVAL' prior to enabling eVMCS and '0' after. Reviewed-by: Maxim Levitsky Tested-by: Jeremi Piotrowski Signed-off-by: Vitaly Kuznetsov Link: https://lore.kernel.org/r/20231205103630.1391318-11-vkuznets@redhat.com Signed-off-by: Sean Christopherson --- .../selftests/kvm/x86_64/vmx_set_nested_state_test.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/kvm/x86_64/vmx_set_nested_state_test.c b/tools/testing/selftests/kvm/x86_64/vmx_set_nested_state_test.c index 41ea7028a1f8..67a62a5a8895 100644 --- a/tools/testing/selftests/kvm/x86_64/vmx_set_nested_state_test.c +++ b/tools/testing/selftests/kvm/x86_64/vmx_set_nested_state_test.c @@ -125,21 +125,25 @@ void test_vmx_nested_state(struct kvm_vcpu *vcpu) /* * Setting vmxon_pa == -1ull and vmcs_pa == -1ull exits early without - * setting the nested state but flags other than eVMCS must be clear. - * The eVMCS flag can be set if the enlightened VMCS capability has - * been enabled. + * setting the nested state. When the eVMCS flag is not set, the + * expected return value is '0'. */ set_default_vmx_state(state, state_sz); + state->flags = 0; state->hdr.vmx.vmxon_pa = -1ull; state->hdr.vmx.vmcs12_pa = -1ull; - test_nested_state_expect_einval(vcpu, state); + test_nested_state(vcpu, state); - state->flags &= KVM_STATE_NESTED_EVMCS; + /* + * When eVMCS is supported, the eVMCS flag can only be set if the + * enlightened VMCS capability has been enabled. + */ if (have_evmcs) { + state->flags = KVM_STATE_NESTED_EVMCS; test_nested_state_expect_einval(vcpu, state); vcpu_enable_evmcs(vcpu); + test_nested_state(vcpu, state); } - test_nested_state(vcpu, state); /* It is invalid to have vmxon_pa == -1ull and SMM flags non-zero. */ state->hdr.vmx.smm.flags = 1; -- cgit v1.2.3 From 9d03194a36345796d4f0f8d6b72eb770a45d614e Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Tue, 28 Nov 2023 09:54:34 -0800 Subject: perf annotate: Introduce global annotation_options The annotation options are to control the behavior of objdump and the output. It's basically used by 'perf annotate' but 'perf report' and 'perf top' can call it on TUI dynamically. But it doesn't need to have a copy of annotation options in many places. As most of the work is done in the util/annotate.c file, add a global variable and set/use it instead of having their own copies. Reviewed-by: Ian Rogers Signed-off-by: Namhyung Kim Cc: Adrian Hunter Cc: Ingo Molnar Cc: Jiri Olsa Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20231128175441.721579-2-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-annotate.c | 43 +++++++++++++++++++++---------------------- tools/perf/util/annotate.c | 3 +++ tools/perf/util/annotate.h | 2 ++ 3 files changed, 26 insertions(+), 22 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index a9129b51d511..67b36a7a12e3 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -45,7 +45,6 @@ struct perf_annotate { struct perf_tool tool; struct perf_session *session; - struct annotation_options opts; #ifdef HAVE_SLANG_SUPPORT bool use_tui; #endif @@ -318,9 +317,9 @@ static int hist_entry__tty_annotate(struct hist_entry *he, struct perf_annotate *ann) { if (!ann->use_stdio2) - return symbol__tty_annotate(&he->ms, evsel, &ann->opts); + return symbol__tty_annotate(&he->ms, evsel, &annotate_opts); - return symbol__tty_annotate2(&he->ms, evsel, &ann->opts); + return symbol__tty_annotate2(&he->ms, evsel, &annotate_opts); } static void hists__find_annotations(struct hists *hists, @@ -376,14 +375,14 @@ find_next: return; } - ret = annotate(he, evsel, &ann->opts, NULL); + ret = annotate(he, evsel, &annotate_opts, NULL); if (!ret || !ann->skip_missing) return; /* skip missing symbols */ nd = rb_next(nd); } else if (use_browser == 1) { - key = hist_entry__tui_annotate(he, evsel, NULL, &ann->opts); + key = hist_entry__tui_annotate(he, evsel, NULL, &annotate_opts); switch (key) { case -1: @@ -425,9 +424,9 @@ static int __cmd_annotate(struct perf_annotate *ann) goto out; } - if (!ann->opts.objdump_path) { + if (!annotate_opts.objdump_path) { ret = perf_env__lookup_objdump(&session->header.env, - &ann->opts.objdump_path); + &annotate_opts.objdump_path); if (ret) goto out; } @@ -561,9 +560,9 @@ int cmd_annotate(int argc, const char **argv) "file", "vmlinux pathname"), OPT_BOOLEAN('m', "modules", &symbol_conf.use_modules, "load module symbols - WARNING: use only with -k and LIVE kernel"), - OPT_BOOLEAN('l', "print-line", &annotate.opts.print_lines, + OPT_BOOLEAN('l', "print-line", &annotate_opts.print_lines, "print matching source lines (may be slow)"), - OPT_BOOLEAN('P', "full-paths", &annotate.opts.full_path, + OPT_BOOLEAN('P', "full-paths", &annotate_opts.full_path, "Don't shorten the displayed pathnames"), OPT_BOOLEAN(0, "skip-missing", &annotate.skip_missing, "Skip symbols that cannot be annotated"), @@ -574,15 +573,15 @@ int cmd_annotate(int argc, const char **argv) OPT_CALLBACK(0, "symfs", NULL, "directory", "Look for files with symbols relative to this directory", symbol__config_symfs), - OPT_BOOLEAN(0, "source", &annotate.opts.annotate_src, + OPT_BOOLEAN(0, "source", &annotate_opts.annotate_src, "Interleave source code with assembly code (default)"), - OPT_BOOLEAN(0, "asm-raw", &annotate.opts.show_asm_raw, + OPT_BOOLEAN(0, "asm-raw", &annotate_opts.show_asm_raw, "Display raw encoding of assembly instructions (default)"), OPT_STRING('M', "disassembler-style", &disassembler_style, "disassembler style", "Specify disassembler style (e.g. -M intel for intel syntax)"), - OPT_STRING(0, "prefix", &annotate.opts.prefix, "prefix", + OPT_STRING(0, "prefix", &annotate_opts.prefix, "prefix", "Add prefix to source file path names in programs (with --prefix-strip)"), - OPT_STRING(0, "prefix-strip", &annotate.opts.prefix_strip, "N", + OPT_STRING(0, "prefix-strip", &annotate_opts.prefix_strip, "N", "Strip first N entries of source file path name in programs (with --prefix)"), OPT_STRING(0, "objdump", &objdump_path, "path", "objdump binary to use for disassembly and annotations"), @@ -601,7 +600,7 @@ int cmd_annotate(int argc, const char **argv) OPT_CALLBACK_DEFAULT(0, "stdio-color", NULL, "mode", "'always' (default), 'never' or 'auto' only applicable to --stdio mode", stdio__config_color, "always"), - OPT_CALLBACK(0, "percent-type", &annotate.opts, "local-period", + OPT_CALLBACK(0, "percent-type", &annotate_opts, "local-period", "Set percent type local/global-period/hits", annotate_parse_percent_type), OPT_CALLBACK(0, "percent-limit", &annotate, "percent", @@ -617,13 +616,13 @@ int cmd_annotate(int argc, const char **argv) set_option_flag(options, 0, "show-total-period", PARSE_OPT_EXCLUSIVE); set_option_flag(options, 0, "show-nr-samples", PARSE_OPT_EXCLUSIVE); - annotation_options__init(&annotate.opts); + annotation_options__init(&annotate_opts); ret = hists__init(); if (ret < 0) return ret; - annotation_config__init(&annotate.opts); + annotation_config__init(&annotate_opts); argc = parse_options(argc, argv, options, annotate_usage, 0); if (argc) { @@ -638,13 +637,13 @@ int cmd_annotate(int argc, const char **argv) } if (disassembler_style) { - annotate.opts.disassembler_style = strdup(disassembler_style); - if (!annotate.opts.disassembler_style) + annotate_opts.disassembler_style = strdup(disassembler_style); + if (!annotate_opts.disassembler_style) return -ENOMEM; } if (objdump_path) { - annotate.opts.objdump_path = strdup(objdump_path); - if (!annotate.opts.objdump_path) + annotate_opts.objdump_path = strdup(objdump_path); + if (!annotate_opts.objdump_path) return -ENOMEM; } if (addr2line_path) { @@ -653,7 +652,7 @@ int cmd_annotate(int argc, const char **argv) return -ENOMEM; } - if (annotate_check_args(&annotate.opts) < 0) + if (annotate_check_args(&annotate_opts) < 0) return -EINVAL; #ifdef HAVE_GTK2_SUPPORT @@ -734,7 +733,7 @@ out_delete: #ifndef NDEBUG perf_session__delete(annotate.session); #endif - annotation_options__exit(&annotate.opts); + annotation_options__exit(&annotate_opts); return ret; } diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 9a828dc601c7..77b78001b94d 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -57,6 +57,9 @@ #include +/* global annotation options */ +struct annotation_options annotate_opts; + static regex_t file_lineno; static struct ins_ops *ins__find(struct arch *arch, const char *name); diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h index b64a2be287b3..8c1a070725fa 100644 --- a/tools/perf/util/annotate.h +++ b/tools/perf/util/annotate.h @@ -105,6 +105,8 @@ struct annotation_options { unsigned int percent_type; }; +extern struct annotation_options annotate_opts; + enum { ANNOTATION__OFFSET_JUMP_TARGETS = 1, ANNOTATION__OFFSET_CALL, -- cgit v1.2.3 From 14953f038d6b30e3dc9d1aa4d4584ac505e5a8ec Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Tue, 28 Nov 2023 09:54:35 -0800 Subject: perf report: Convert to the global annotation_options Use the global option and drop the local copy. Reviewed-by: Ian Rogers Signed-off-by: Namhyung Kim Cc: Adrian Hunter Cc: Ingo Molnar Cc: Jiri Olsa Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20231128175441.721579-3-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-report.c | 33 ++++++++++++++++----------------- 1 file changed, 16 insertions(+), 17 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 121a2781323c..90f98953587c 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -98,7 +98,6 @@ struct report { bool skip_empty; int max_stack; struct perf_read_values show_threads_values; - struct annotation_options annotation_opts; const char *pretty_printing_style; const char *cpu_list; const char *symbol_filter_str; @@ -542,7 +541,7 @@ static int evlist__tui_block_hists_browse(struct evlist *evlist, struct report * ret = report__browse_block_hists(&rep->block_reports[i++].hist, rep->min_percent, pos, &rep->session->header.env, - &rep->annotation_opts); + &annotate_opts); if (ret != 0) return ret; } @@ -670,7 +669,7 @@ static int report__browse_hists(struct report *rep) } ret = evlist__tui_browse_hists(evlist, help, NULL, rep->min_percent, - &session->header.env, true, &rep->annotation_opts); + &session->header.env, true, &annotate_opts); /* * Usually "ret" is the last pressed key, and we only * care if the key notifies us to switch data file. @@ -745,7 +744,7 @@ static int hists__resort_cb(struct hist_entry *he, void *arg) if (rep->symbol_ipc && sym && !sym->annotate2) { struct evsel *evsel = hists_to_evsel(he->hists); - symbol__annotate2(&he->ms, evsel, &rep->annotation_opts, NULL); + symbol__annotate2(&he->ms, evsel, &annotate_opts, NULL); } return 0; @@ -1341,15 +1340,15 @@ int cmd_report(int argc, const char **argv) "list of cpus to profile"), OPT_BOOLEAN('I', "show-info", &report.show_full_info, "Display extended information about perf.data file"), - OPT_BOOLEAN(0, "source", &report.annotation_opts.annotate_src, + OPT_BOOLEAN(0, "source", &annotate_opts.annotate_src, "Interleave source code with assembly code (default)"), - OPT_BOOLEAN(0, "asm-raw", &report.annotation_opts.show_asm_raw, + OPT_BOOLEAN(0, "asm-raw", &annotate_opts.show_asm_raw, "Display raw encoding of assembly instructions (default)"), OPT_STRING('M', "disassembler-style", &disassembler_style, "disassembler style", "Specify disassembler style (e.g. -M intel for intel syntax)"), - OPT_STRING(0, "prefix", &report.annotation_opts.prefix, "prefix", + OPT_STRING(0, "prefix", &annotate_opts.prefix, "prefix", "Add prefix to source file path names in programs (with --prefix-strip)"), - OPT_STRING(0, "prefix-strip", &report.annotation_opts.prefix_strip, "N", + OPT_STRING(0, "prefix-strip", &annotate_opts.prefix_strip, "N", "Strip first N entries of source file path name in programs (with --prefix)"), OPT_BOOLEAN(0, "show-total-period", &symbol_conf.show_total_period, "Show a column with the sum of periods"), @@ -1401,7 +1400,7 @@ int cmd_report(int argc, const char **argv) "Time span of interest (start,stop)"), OPT_BOOLEAN(0, "inline", &symbol_conf.inline_name, "Show inline function"), - OPT_CALLBACK(0, "percent-type", &report.annotation_opts, "local-period", + OPT_CALLBACK(0, "percent-type", &annotate_opts, "local-period", "Set percent type local/global-period/hits", annotate_parse_percent_type), OPT_BOOLEAN(0, "ns", &symbol_conf.nanosecs, "Show times in nanosecs"), @@ -1433,7 +1432,7 @@ int cmd_report(int argc, const char **argv) */ symbol_conf.keep_exited_threads = true; - annotation_options__init(&report.annotation_opts); + annotation_options__init(&annotate_opts); ret = perf_config(report__config, &report); if (ret) @@ -1452,13 +1451,13 @@ int cmd_report(int argc, const char **argv) } if (disassembler_style) { - report.annotation_opts.disassembler_style = strdup(disassembler_style); - if (!report.annotation_opts.disassembler_style) + annotate_opts.disassembler_style = strdup(disassembler_style); + if (!annotate_opts.disassembler_style) return -ENOMEM; } if (objdump_path) { - report.annotation_opts.objdump_path = strdup(objdump_path); - if (!report.annotation_opts.objdump_path) + annotate_opts.objdump_path = strdup(objdump_path); + if (!annotate_opts.objdump_path) return -ENOMEM; } if (addr2line_path) { @@ -1467,7 +1466,7 @@ int cmd_report(int argc, const char **argv) return -ENOMEM; } - if (annotate_check_args(&report.annotation_opts) < 0) { + if (annotate_check_args(&annotate_opts) < 0) { ret = -EINVAL; goto exit; } @@ -1699,7 +1698,7 @@ repeat: */ symbol_conf.priv_size += sizeof(u32); } - annotation_config__init(&report.annotation_opts); + annotation_config__init(&annotate_opts); } if (symbol__init(&session->header.env) < 0) @@ -1753,7 +1752,7 @@ error: zstd_fini(&(session->zstd_data)); perf_session__delete(session); exit: - annotation_options__exit(&report.annotation_opts); + annotation_options__exit(&annotate_opts); free(sort_order_help); free(field_order_help); return ret; -- cgit v1.2.3 From c9a21a872c69032cb9a94ebc171649c0c28141d7 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Tue, 28 Nov 2023 09:54:36 -0800 Subject: perf top: Convert to the global annotation_options Use the global option and drop the local copy. Reviewed-by: Ian Rogers Signed-off-by: Namhyung Kim Cc: Adrian Hunter Cc: Ingo Molnar Cc: Jiri Olsa Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20231128175441.721579-4-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-top.c | 44 ++++++++++++++++++++++---------------------- tools/perf/util/top.h | 1 - 2 files changed, 22 insertions(+), 23 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 1e42bd1c7d5a..a6495aa5898e 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -147,7 +147,7 @@ static int perf_top__parse_source(struct perf_top *top, struct hist_entry *he) return err; } - err = symbol__annotate(&he->ms, evsel, &top->annotation_opts, NULL); + err = symbol__annotate(&he->ms, evsel, &annotate_opts, NULL); if (err == 0) { top->sym_filter_entry = he; } else { @@ -261,9 +261,9 @@ static void perf_top__show_details(struct perf_top *top) goto out_unlock; printf("Showing %s for %s\n", evsel__name(top->sym_evsel), symbol->name); - printf(" Events Pcnt (>=%d%%)\n", top->annotation_opts.min_pcnt); + printf(" Events Pcnt (>=%d%%)\n", annotate_opts.min_pcnt); - more = symbol__annotate_printf(&he->ms, top->sym_evsel, &top->annotation_opts); + more = symbol__annotate_printf(&he->ms, top->sym_evsel, &annotate_opts); if (top->evlist->enabled) { if (top->zero) @@ -450,7 +450,7 @@ static void perf_top__print_mapped_keys(struct perf_top *top) fprintf(stdout, "\t[f] profile display filter (count). \t(%d)\n", top->count_filter); - fprintf(stdout, "\t[F] annotate display filter (percent). \t(%d%%)\n", top->annotation_opts.min_pcnt); + fprintf(stdout, "\t[F] annotate display filter (percent). \t(%d%%)\n", annotate_opts.min_pcnt); fprintf(stdout, "\t[s] annotate symbol. \t(%s)\n", name?: "NULL"); fprintf(stdout, "\t[S] stop annotation.\n"); @@ -553,7 +553,7 @@ static bool perf_top__handle_keypress(struct perf_top *top, int c) prompt_integer(&top->count_filter, "Enter display event count filter"); break; case 'F': - prompt_percent(&top->annotation_opts.min_pcnt, + prompt_percent(&annotate_opts.min_pcnt, "Enter details display event filter (percent)"); break; case 'K': @@ -647,7 +647,7 @@ repeat: ret = evlist__tui_browse_hists(top->evlist, help, &hbt, top->min_percent, &top->session->header.env, !top->record_opts.overwrite, - &top->annotation_opts); + &annotate_opts); if (ret == K_RELOAD) { top->zero = true; goto repeat; @@ -1241,9 +1241,9 @@ static int __cmd_top(struct perf_top *top) pthread_t thread, thread_process; int ret; - if (!top->annotation_opts.objdump_path) { + if (!annotate_opts.objdump_path) { ret = perf_env__lookup_objdump(&top->session->header.env, - &top->annotation_opts.objdump_path); + &annotate_opts.objdump_path); if (ret) return ret; } @@ -1536,9 +1536,9 @@ int cmd_top(int argc, const char **argv) "only consider symbols in these comms"), OPT_STRING(0, "symbols", &symbol_conf.sym_list_str, "symbol[,symbol...]", "only consider these symbols"), - OPT_BOOLEAN(0, "source", &top.annotation_opts.annotate_src, + OPT_BOOLEAN(0, "source", &annotate_opts.annotate_src, "Interleave source code with assembly code (default)"), - OPT_BOOLEAN(0, "asm-raw", &top.annotation_opts.show_asm_raw, + OPT_BOOLEAN(0, "asm-raw", &annotate_opts.show_asm_raw, "Display raw encoding of assembly instructions (default)"), OPT_BOOLEAN(0, "demangle-kernel", &symbol_conf.demangle_kernel, "Enable kernel symbol demangling"), @@ -1549,9 +1549,9 @@ int cmd_top(int argc, const char **argv) "addr2line binary to use for line numbers"), OPT_STRING('M', "disassembler-style", &disassembler_style, "disassembler style", "Specify disassembler style (e.g. -M intel for intel syntax)"), - OPT_STRING(0, "prefix", &top.annotation_opts.prefix, "prefix", + OPT_STRING(0, "prefix", &annotate_opts.prefix, "prefix", "Add prefix to source file path names in programs (with --prefix-strip)"), - OPT_STRING(0, "prefix-strip", &top.annotation_opts.prefix_strip, "N", + OPT_STRING(0, "prefix-strip", &annotate_opts.prefix_strip, "N", "Strip first N entries of source file path name in programs (with --prefix)"), OPT_STRING('u', "uid", &target->uid_str, "user", "user to profile"), OPT_CALLBACK(0, "percent-limit", &top, "percent", @@ -1609,10 +1609,10 @@ int cmd_top(int argc, const char **argv) if (status < 0) return status; - annotation_options__init(&top.annotation_opts); + annotation_options__init(&annotate_opts); - top.annotation_opts.min_pcnt = 5; - top.annotation_opts.context = 4; + annotate_opts.min_pcnt = 5; + annotate_opts.context = 4; top.evlist = evlist__new(); if (top.evlist == NULL) @@ -1642,13 +1642,13 @@ int cmd_top(int argc, const char **argv) usage_with_options(top_usage, options); if (disassembler_style) { - top.annotation_opts.disassembler_style = strdup(disassembler_style); - if (!top.annotation_opts.disassembler_style) + annotate_opts.disassembler_style = strdup(disassembler_style); + if (!annotate_opts.disassembler_style) return -ENOMEM; } if (objdump_path) { - top.annotation_opts.objdump_path = strdup(objdump_path); - if (!top.annotation_opts.objdump_path) + annotate_opts.objdump_path = strdup(objdump_path); + if (!annotate_opts.objdump_path) return -ENOMEM; } if (addr2line_path) { @@ -1661,7 +1661,7 @@ int cmd_top(int argc, const char **argv) if (status) goto out_delete_evlist; - if (annotate_check_args(&top.annotation_opts) < 0) + if (annotate_check_args(&annotate_opts) < 0) goto out_delete_evlist; if (!top.evlist->core.nr_entries) { @@ -1787,7 +1787,7 @@ int cmd_top(int argc, const char **argv) if (status < 0) goto out_delete_evlist; - annotation_config__init(&top.annotation_opts); + annotation_config__init(&annotate_opts); symbol_conf.try_vmlinux_path = (symbol_conf.vmlinux_name == NULL); status = symbol__init(NULL); @@ -1840,7 +1840,7 @@ int cmd_top(int argc, const char **argv) out_delete_evlist: evlist__delete(top.evlist); perf_session__delete(top.session); - annotation_options__exit(&top.annotation_opts); + annotation_options__exit(&annotate_opts); return status; } diff --git a/tools/perf/util/top.h b/tools/perf/util/top.h index a8b0d79bd96c..4c5588dbb131 100644 --- a/tools/perf/util/top.h +++ b/tools/perf/util/top.h @@ -21,7 +21,6 @@ struct perf_top { struct perf_tool tool; struct evlist *evlist, *sb_evlist; struct record_opts record_opts; - struct annotation_options annotation_opts; struct evswitch evswitch; /* * Symbols will be added here in perf_event__process_sample and will -- cgit v1.2.3 From 41fd3cacd29f47f6b9c6474b27c5b0513786c4e9 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Tue, 28 Nov 2023 09:54:37 -0800 Subject: perf annotate: Use global annotation_options Now it can directly use the global options and no need to pass it as an argument. Reviewed-by: Ian Rogers Signed-off-by: Namhyung Kim Cc: Adrian Hunter Cc: Ingo Molnar Cc: Jiri Olsa Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20231128175441.721579-5-namhyung@kernel.org [ Fixup build with GTK2=1 ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-annotate.c | 7 +-- tools/perf/builtin-report.c | 2 +- tools/perf/builtin-top.c | 4 +- tools/perf/ui/browsers/annotate.c | 6 +- tools/perf/ui/gtk/annotate.c | 6 +- tools/perf/ui/gtk/gtk.h | 2 - tools/perf/util/annotate.c | 118 ++++++++++++++++++-------------------- tools/perf/util/annotate.h | 15 ++--- 8 files changed, 71 insertions(+), 89 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index 67b36a7a12e3..9c1e2b2b5bc0 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -317,9 +317,9 @@ static int hist_entry__tty_annotate(struct hist_entry *he, struct perf_annotate *ann) { if (!ann->use_stdio2) - return symbol__tty_annotate(&he->ms, evsel, &annotate_opts); + return symbol__tty_annotate(&he->ms, evsel); - return symbol__tty_annotate2(&he->ms, evsel, &annotate_opts); + return symbol__tty_annotate2(&he->ms, evsel); } static void hists__find_annotations(struct hists *hists, @@ -365,7 +365,6 @@ find_next: int ret; int (*annotate)(struct hist_entry *he, struct evsel *evsel, - struct annotation_options *options, struct hist_browser_timer *hbt); annotate = dlsym(perf_gtk_handle, @@ -375,7 +374,7 @@ find_next: return; } - ret = annotate(he, evsel, &annotate_opts, NULL); + ret = annotate(he, evsel, NULL); if (!ret || !ann->skip_missing) return; diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 90f98953587c..2b86651615cd 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -744,7 +744,7 @@ static int hists__resort_cb(struct hist_entry *he, void *arg) if (rep->symbol_ipc && sym && !sym->annotate2) { struct evsel *evsel = hists_to_evsel(he->hists); - symbol__annotate2(&he->ms, evsel, &annotate_opts, NULL); + symbol__annotate2(&he->ms, evsel, NULL); } return 0; diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index a6495aa5898e..46a61634701b 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -147,7 +147,7 @@ static int perf_top__parse_source(struct perf_top *top, struct hist_entry *he) return err; } - err = symbol__annotate(&he->ms, evsel, &annotate_opts, NULL); + err = symbol__annotate(&he->ms, evsel, NULL); if (err == 0) { top->sym_filter_entry = he; } else { @@ -263,7 +263,7 @@ static void perf_top__show_details(struct perf_top *top) printf("Showing %s for %s\n", evsel__name(top->sym_evsel), symbol->name); printf(" Events Pcnt (>=%d%%)\n", annotate_opts.min_pcnt); - more = symbol__annotate_printf(&he->ms, top->sym_evsel, &annotate_opts); + more = symbol__annotate_printf(&he->ms, top->sym_evsel); if (top->evlist->enabled) { if (top->zero) diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c index 163f916fff68..ed0e692afdbe 100644 --- a/tools/perf/ui/browsers/annotate.c +++ b/tools/perf/ui/browsers/annotate.c @@ -114,7 +114,7 @@ static void annotate_browser__write(struct ui_browser *browser, void *entry, int if (!browser->navkeypressed) ops.width += 1; - annotation_line__write(al, notes, &ops, ab->opts); + annotation_line__write(al, notes, &ops); if (ops.current_entry) ab->selection = al; @@ -884,7 +884,7 @@ show_sup_ins: continue; } case 'P': - map_symbol__annotation_dump(ms, evsel, browser->opts); + map_symbol__annotation_dump(ms, evsel); continue; case 't': if (symbol_conf.show_total_period) { @@ -979,7 +979,7 @@ int symbol__tui_annotate(struct map_symbol *ms, struct evsel *evsel, return -1; if (not_annotated) { - err = symbol__annotate2(ms, evsel, opts, &browser.arch); + err = symbol__annotate2(ms, evsel, &browser.arch); if (err) { char msg[BUFSIZ]; dso->annotate_warned = true; diff --git a/tools/perf/ui/gtk/annotate.c b/tools/perf/ui/gtk/annotate.c index 2effac77ca8c..394861245fd3 100644 --- a/tools/perf/ui/gtk/annotate.c +++ b/tools/perf/ui/gtk/annotate.c @@ -162,7 +162,6 @@ static int perf_gtk__annotate_symbol(GtkWidget *window, struct map_symbol *ms, } static int symbol__gtk_annotate(struct map_symbol *ms, struct evsel *evsel, - struct annotation_options *options, struct hist_browser_timer *hbt) { struct dso *dso = map__dso(ms->map); @@ -176,7 +175,7 @@ static int symbol__gtk_annotate(struct map_symbol *ms, struct evsel *evsel, if (dso->annotate_warned) return -1; - err = symbol__annotate(ms, evsel, options, NULL); + err = symbol__annotate(ms, evsel, NULL); if (err) { char msg[BUFSIZ]; dso->annotate_warned = true; @@ -244,10 +243,9 @@ static int symbol__gtk_annotate(struct map_symbol *ms, struct evsel *evsel, int hist_entry__gtk_annotate(struct hist_entry *he, struct evsel *evsel, - struct annotation_options *options, struct hist_browser_timer *hbt) { - return symbol__gtk_annotate(&he->ms, evsel, options, hbt); + return symbol__gtk_annotate(&he->ms, evsel, hbt); } void perf_gtk__show_annotations(void) diff --git a/tools/perf/ui/gtk/gtk.h b/tools/perf/ui/gtk/gtk.h index 1e84dceb5267..a2b497f03fd6 100644 --- a/tools/perf/ui/gtk/gtk.h +++ b/tools/perf/ui/gtk/gtk.h @@ -56,13 +56,11 @@ struct evsel; struct evlist; struct hist_entry; struct hist_browser_timer; -struct annotation_options; int evlist__gtk_browse_hists(struct evlist *evlist, const char *help, struct hist_browser_timer *hbt, float min_pcnt); int hist_entry__gtk_annotate(struct hist_entry *he, struct evsel *evsel, - struct annotation_options *options, struct hist_browser_timer *hbt); void perf_gtk__show_annotations(void); diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 77b78001b94d..daff9af552f4 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -1896,7 +1896,6 @@ static int symbol__disassemble_bpf(struct symbol *sym, struct annotate_args *args) { struct annotation *notes = symbol__annotation(sym); - struct annotation_options *opts = args->options; struct bpf_prog_linfo *prog_linfo = NULL; struct bpf_prog_info_node *info_node; int len = sym->end - sym->start; @@ -2006,7 +2005,7 @@ static int symbol__disassemble_bpf(struct symbol *sym, prev_buf_size = buf_size; fflush(s); - if (!opts->hide_src_code && srcline) { + if (!annotate_opts.hide_src_code && srcline) { args->offset = -1; args->line = strdup(srcline); args->line_nr = 0; @@ -2129,7 +2128,7 @@ static char *expand_tabs(char *line, char **storage, size_t *storage_len) static int symbol__disassemble(struct symbol *sym, struct annotate_args *args) { - struct annotation_options *opts = args->options; + struct annotation_options *opts = &annotate_opts; struct map *map = args->ms.map; struct dso *dso = map__dso(map); char *command; @@ -2380,13 +2379,13 @@ void symbol__calc_percent(struct symbol *sym, struct evsel *evsel) } int symbol__annotate(struct map_symbol *ms, struct evsel *evsel, - struct annotation_options *options, struct arch **parch) + struct arch **parch) { struct symbol *sym = ms->sym; struct annotation *notes = symbol__annotation(sym); struct annotate_args args = { .evsel = evsel, - .options = options, + .options = &annotate_opts, }; struct perf_env *env = evsel__env(evsel); const char *arch_name = perf_env__arch(env); @@ -2414,7 +2413,7 @@ int symbol__annotate(struct map_symbol *ms, struct evsel *evsel, } args.ms = *ms; - if (notes->options && notes->options->full_addr) + if (annotate_opts.full_addr) notes->start = map__objdump_2mem(ms->map, ms->sym->start); else notes->start = map__rip_2objdump(ms->map, ms->sym->start); @@ -2422,12 +2421,12 @@ int symbol__annotate(struct map_symbol *ms, struct evsel *evsel, return symbol__disassemble(sym, &args); } -static void insert_source_line(struct rb_root *root, struct annotation_line *al, - struct annotation_options *opts) +static void insert_source_line(struct rb_root *root, struct annotation_line *al) { struct annotation_line *iter; struct rb_node **p = &root->rb_node; struct rb_node *parent = NULL; + unsigned int percent_type = annotate_opts.percent_type; int i, ret; while (*p != NULL) { @@ -2438,7 +2437,7 @@ static void insert_source_line(struct rb_root *root, struct annotation_line *al, if (ret == 0) { for (i = 0; i < al->data_nr; i++) { iter->data[i].percent_sum += annotation_data__percent(&al->data[i], - opts->percent_type); + percent_type); } return; } @@ -2451,7 +2450,7 @@ static void insert_source_line(struct rb_root *root, struct annotation_line *al, for (i = 0; i < al->data_nr; i++) { al->data[i].percent_sum = annotation_data__percent(&al->data[i], - opts->percent_type); + percent_type); } rb_link_node(&al->rb_node, parent, p); @@ -2573,8 +2572,7 @@ static int annotated_source__addr_fmt_width(struct list_head *lines, u64 start) return 0; } -int symbol__annotate_printf(struct map_symbol *ms, struct evsel *evsel, - struct annotation_options *opts) +int symbol__annotate_printf(struct map_symbol *ms, struct evsel *evsel) { struct map *map = ms->map; struct symbol *sym = ms->sym; @@ -2585,6 +2583,7 @@ int symbol__annotate_printf(struct map_symbol *ms, struct evsel *evsel, struct annotation *notes = symbol__annotation(sym); struct sym_hist *h = annotation__histogram(notes, evsel->core.idx); struct annotation_line *pos, *queue = NULL; + struct annotation_options *opts = &annotate_opts; u64 start = map__rip_2objdump(map, sym->start); int printed = 2, queue_len = 0, addr_fmt_width; int more = 0; @@ -2713,8 +2712,7 @@ static void FILE__write_graph(void *fp, int graph) fputs(s, fp); } -static int symbol__annotate_fprintf2(struct symbol *sym, FILE *fp, - struct annotation_options *opts) +static int symbol__annotate_fprintf2(struct symbol *sym, FILE *fp) { struct annotation *notes = symbol__annotation(sym); struct annotation_write_ops wops = { @@ -2731,7 +2729,7 @@ static int symbol__annotate_fprintf2(struct symbol *sym, FILE *fp, list_for_each_entry(al, ¬es->src->source, node) { if (annotation_line__filter(al, notes)) continue; - annotation_line__write(al, notes, &wops, opts); + annotation_line__write(al, notes, &wops); fputc('\n', fp); wops.first_line = false; } @@ -2739,8 +2737,7 @@ static int symbol__annotate_fprintf2(struct symbol *sym, FILE *fp, return 0; } -int map_symbol__annotation_dump(struct map_symbol *ms, struct evsel *evsel, - struct annotation_options *opts) +int map_symbol__annotation_dump(struct map_symbol *ms, struct evsel *evsel) { const char *ev_name = evsel__name(evsel); char buf[1024]; @@ -2762,7 +2759,7 @@ int map_symbol__annotation_dump(struct map_symbol *ms, struct evsel *evsel, fprintf(fp, "%s() %s\nEvent: %s\n\n", ms->sym->name, map__dso(ms->map)->long_name, ev_name); - symbol__annotate_fprintf2(ms->sym, fp, opts); + symbol__annotate_fprintf2(ms->sym, fp); fclose(fp); err = 0; @@ -2939,24 +2936,24 @@ void annotation__init_column_widths(struct annotation *notes, struct symbol *sym void annotation__update_column_widths(struct annotation *notes) { - if (notes->options->use_offset) + if (annotate_opts.use_offset) notes->widths.target = notes->widths.min_addr; - else if (notes->options->full_addr) + else if (annotate_opts.full_addr) notes->widths.target = BITS_PER_LONG / 4; else notes->widths.target = notes->widths.max_addr; notes->widths.addr = notes->widths.target; - if (notes->options->show_nr_jumps) + if (annotate_opts.show_nr_jumps) notes->widths.addr += notes->widths.jumps + 1; } void annotation__toggle_full_addr(struct annotation *notes, struct map_symbol *ms) { - notes->options->full_addr = !notes->options->full_addr; + annotate_opts.full_addr = !annotate_opts.full_addr; - if (notes->options->full_addr) + if (annotate_opts.full_addr) notes->start = map__objdump_2mem(ms->map, ms->sym->start); else notes->start = map__rip_2objdump(ms->map, ms->sym->start); @@ -2965,8 +2962,7 @@ void annotation__toggle_full_addr(struct annotation *notes, struct map_symbol *m } static void annotation__calc_lines(struct annotation *notes, struct map *map, - struct rb_root *root, - struct annotation_options *opts) + struct rb_root *root) { struct annotation_line *al; struct rb_root tmp_root = RB_ROOT; @@ -2979,7 +2975,7 @@ static void annotation__calc_lines(struct annotation *notes, struct map *map, double percent; percent = annotation_data__percent(&al->data[i], - opts->percent_type); + annotate_opts.percent_type); if (percent > percent_max) percent_max = percent; @@ -2990,22 +2986,20 @@ static void annotation__calc_lines(struct annotation *notes, struct map *map, al->path = get_srcline(map__dso(map), notes->start + al->offset, NULL, false, true, notes->start + al->offset); - insert_source_line(&tmp_root, al, opts); + insert_source_line(&tmp_root, al); } resort_source_line(root, &tmp_root); } -static void symbol__calc_lines(struct map_symbol *ms, struct rb_root *root, - struct annotation_options *opts) +static void symbol__calc_lines(struct map_symbol *ms, struct rb_root *root) { struct annotation *notes = symbol__annotation(ms->sym); - annotation__calc_lines(notes, ms->map, root, opts); + annotation__calc_lines(notes, ms->map, root); } -int symbol__tty_annotate2(struct map_symbol *ms, struct evsel *evsel, - struct annotation_options *opts) +int symbol__tty_annotate2(struct map_symbol *ms, struct evsel *evsel) { struct dso *dso = map__dso(ms->map); struct symbol *sym = ms->sym; @@ -3014,7 +3008,7 @@ int symbol__tty_annotate2(struct map_symbol *ms, struct evsel *evsel, char buf[1024]; int err; - err = symbol__annotate2(ms, evsel, opts, NULL); + err = symbol__annotate2(ms, evsel, NULL); if (err) { char msg[BUFSIZ]; @@ -3024,31 +3018,31 @@ int symbol__tty_annotate2(struct map_symbol *ms, struct evsel *evsel, return -1; } - if (opts->print_lines) { - srcline_full_filename = opts->full_path; - symbol__calc_lines(ms, &source_line, opts); + if (annotate_opts.print_lines) { + srcline_full_filename = annotate_opts.full_path; + symbol__calc_lines(ms, &source_line); print_summary(&source_line, dso->long_name); } hists__scnprintf_title(hists, buf, sizeof(buf)); fprintf(stdout, "%s, [percent: %s]\n%s() %s\n", - buf, percent_type_str(opts->percent_type), sym->name, dso->long_name); - symbol__annotate_fprintf2(sym, stdout, opts); + buf, percent_type_str(annotate_opts.percent_type), sym->name, + dso->long_name); + symbol__annotate_fprintf2(sym, stdout); annotated_source__purge(symbol__annotation(sym)->src); return 0; } -int symbol__tty_annotate(struct map_symbol *ms, struct evsel *evsel, - struct annotation_options *opts) +int symbol__tty_annotate(struct map_symbol *ms, struct evsel *evsel) { struct dso *dso = map__dso(ms->map); struct symbol *sym = ms->sym; struct rb_root source_line = RB_ROOT; int err; - err = symbol__annotate(ms, evsel, opts, NULL); + err = symbol__annotate(ms, evsel, NULL); if (err) { char msg[BUFSIZ]; @@ -3060,13 +3054,13 @@ int symbol__tty_annotate(struct map_symbol *ms, struct evsel *evsel, symbol__calc_percent(sym, evsel); - if (opts->print_lines) { - srcline_full_filename = opts->full_path; - symbol__calc_lines(ms, &source_line, opts); + if (annotate_opts.print_lines) { + srcline_full_filename = annotate_opts.full_path; + symbol__calc_lines(ms, &source_line); print_summary(&source_line, dso->long_name); } - symbol__annotate_printf(ms, evsel, opts); + symbol__annotate_printf(ms, evsel); annotated_source__purge(symbol__annotation(sym)->src); @@ -3127,7 +3121,7 @@ call_like: obj__printf(obj, " "); } - disasm_line__scnprintf(dl, bf, size, !notes->options->use_offset, notes->widths.max_ins_name); + disasm_line__scnprintf(dl, bf, size, !annotate_opts.use_offset, notes->widths.max_ins_name); } static void ipc_coverage_string(char *bf, int size, struct annotation *notes) @@ -3210,7 +3204,7 @@ static void __annotation_line__write(struct annotation_line *al, struct annotati else obj__printf(obj, "%*s ", ANNOTATION__IPC_WIDTH - 1, "IPC"); - if (!notes->options->show_minmax_cycle) { + if (!annotate_opts.show_minmax_cycle) { if (al->cycles && al->cycles->avg) obj__printf(obj, "%*" PRIu64 " ", ANNOTATION__CYCLES_WIDTH - 1, al->cycles->avg); @@ -3254,7 +3248,7 @@ static void __annotation_line__write(struct annotation_line *al, struct annotati if (!*al->line) obj__printf(obj, "%-*s", width - pcnt_width - cycles_width, " "); else if (al->offset == -1) { - if (al->line_nr && notes->options->show_linenr) + if (al->line_nr && annotate_opts.show_linenr) printed = scnprintf(bf, sizeof(bf), "%-*d ", notes->widths.addr + 1, al->line_nr); else printed = scnprintf(bf, sizeof(bf), "%-*s ", notes->widths.addr, " "); @@ -3264,15 +3258,15 @@ static void __annotation_line__write(struct annotation_line *al, struct annotati u64 addr = al->offset; int color = -1; - if (!notes->options->use_offset) + if (!annotate_opts.use_offset) addr += notes->start; - if (!notes->options->use_offset) { + if (!annotate_opts.use_offset) { printed = scnprintf(bf, sizeof(bf), "%" PRIx64 ": ", addr); } else { if (al->jump_sources && - notes->options->offset_level >= ANNOTATION__OFFSET_JUMP_TARGETS) { - if (notes->options->show_nr_jumps) { + annotate_opts.offset_level >= ANNOTATION__OFFSET_JUMP_TARGETS) { + if (annotate_opts.show_nr_jumps) { int prev; printed = scnprintf(bf, sizeof(bf), "%*d ", notes->widths.jumps, @@ -3286,9 +3280,9 @@ print_addr: printed = scnprintf(bf, sizeof(bf), "%*" PRIx64 ": ", notes->widths.target, addr); } else if (ins__is_call(&disasm_line(al)->ins) && - notes->options->offset_level >= ANNOTATION__OFFSET_CALL) { + annotate_opts.offset_level >= ANNOTATION__OFFSET_CALL) { goto print_addr; - } else if (notes->options->offset_level == ANNOTATION__MAX_OFFSET_LEVEL) { + } else if (annotate_opts.offset_level == ANNOTATION__MAX_OFFSET_LEVEL) { goto print_addr; } else { printed = scnprintf(bf, sizeof(bf), "%-*s ", @@ -3310,19 +3304,18 @@ print_addr: } void annotation_line__write(struct annotation_line *al, struct annotation *notes, - struct annotation_write_ops *wops, - struct annotation_options *opts) + struct annotation_write_ops *wops) { __annotation_line__write(al, notes, wops->first_line, wops->current_entry, wops->change_color, wops->width, wops->obj, - opts->percent_type, + annotate_opts.percent_type, wops->set_color, wops->set_percent_color, wops->set_jumps_percent_color, wops->printf, wops->write_graph); } int symbol__annotate2(struct map_symbol *ms, struct evsel *evsel, - struct annotation_options *options, struct arch **parch) + struct arch **parch) { struct symbol *sym = ms->sym; struct annotation *notes = symbol__annotation(sym); @@ -3336,11 +3329,11 @@ int symbol__annotate2(struct map_symbol *ms, struct evsel *evsel, if (evsel__is_group_event(evsel)) nr_pcnt = evsel->core.nr_members; - err = symbol__annotate(ms, evsel, options, parch); + err = symbol__annotate(ms, evsel, parch); if (err) goto out_free_offsets; - notes->options = options; + notes->options = &annotate_opts; symbol__calc_percent(sym, evsel); @@ -3468,10 +3461,9 @@ static unsigned int parse_percent_type(char *str1, char *str2) return type; } -int annotate_parse_percent_type(const struct option *opt, const char *_str, +int annotate_parse_percent_type(const struct option *opt __maybe_unused, const char *_str, int unset __maybe_unused) { - struct annotation_options *opts = opt->value; unsigned int type; char *str1, *str2; int err = -1; @@ -3490,7 +3482,7 @@ int annotate_parse_percent_type(const struct option *opt, const char *_str, if (type == (unsigned int) -1) type = parse_percent_type(str2, str1); if (type != (unsigned int) -1) { - opts->percent_type = type; + annotate_opts.percent_type = type; err = 0; } diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h index 8c1a070725fa..7bf29baa43f5 100644 --- a/tools/perf/util/annotate.h +++ b/tools/perf/util/annotate.h @@ -224,8 +224,7 @@ struct annotation_write_ops { }; void annotation_line__write(struct annotation_line *al, struct annotation *notes, - struct annotation_write_ops *ops, - struct annotation_options *opts); + struct annotation_write_ops *ops); int __annotation__scnprintf_samples_period(struct annotation *notes, char *bf, size_t size, @@ -375,11 +374,9 @@ void symbol__annotate_zero_histograms(struct symbol *sym); int symbol__annotate(struct map_symbol *ms, struct evsel *evsel, - struct annotation_options *options, struct arch **parch); int symbol__annotate2(struct map_symbol *ms, struct evsel *evsel, - struct annotation_options *options, struct arch **parch); enum symbol_disassemble_errno { @@ -406,20 +403,18 @@ enum symbol_disassemble_errno { int symbol__strerror_disassemble(struct map_symbol *ms, int errnum, char *buf, size_t buflen); -int symbol__annotate_printf(struct map_symbol *ms, struct evsel *evsel, - struct annotation_options *options); +int symbol__annotate_printf(struct map_symbol *ms, struct evsel *evsel); void symbol__annotate_zero_histogram(struct symbol *sym, int evidx); void symbol__annotate_decay_histogram(struct symbol *sym, int evidx); void annotated_source__purge(struct annotated_source *as); -int map_symbol__annotation_dump(struct map_symbol *ms, struct evsel *evsel, - struct annotation_options *opts); +int map_symbol__annotation_dump(struct map_symbol *ms, struct evsel *evsel); bool ui__has_annotation(void); -int symbol__tty_annotate(struct map_symbol *ms, struct evsel *evsel, struct annotation_options *opts); +int symbol__tty_annotate(struct map_symbol *ms, struct evsel *evsel); -int symbol__tty_annotate2(struct map_symbol *ms, struct evsel *evsel, struct annotation_options *opts); +int symbol__tty_annotate2(struct map_symbol *ms, struct evsel *evsel); #ifdef HAVE_SLANG_SUPPORT int symbol__tui_annotate(struct map_symbol *ms, struct evsel *evsel, -- cgit v1.2.3 From 22197fb296913f83c7182befd2a8b23bf042f279 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Tue, 28 Nov 2023 09:54:38 -0800 Subject: perf ui/browser/annotate: Use global annotation_options Now it can use the global options and no need save local browser options separately. Reviewed-by: Ian Rogers Signed-off-by: Namhyung Kim Cc: Adrian Hunter Cc: Ingo Molnar Cc: Jiri Olsa Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20231128175441.721579-6-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-annotate.c | 2 +- tools/perf/builtin-report.c | 8 ++--- tools/perf/builtin-top.c | 3 +- tools/perf/ui/browsers/annotate.c | 65 +++++++++++++++++---------------------- tools/perf/ui/browsers/hists.c | 34 ++++++++------------ tools/perf/ui/browsers/hists.h | 2 -- tools/perf/util/annotate.h | 6 ++-- tools/perf/util/block-info.c | 6 ++-- tools/perf/util/block-info.h | 3 +- tools/perf/util/hist.h | 25 +++++---------- 10 files changed, 59 insertions(+), 95 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index 9c1e2b2b5bc0..d17213bd8332 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -381,7 +381,7 @@ find_next: /* skip missing symbols */ nd = rb_next(nd); } else if (use_browser == 1) { - key = hist_entry__tui_annotate(he, evsel, NULL, &annotate_opts); + key = hist_entry__tui_annotate(he, evsel, NULL); switch (key) { case -1: diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 2b86651615cd..bc0d986c1e0c 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -540,8 +540,7 @@ static int evlist__tui_block_hists_browse(struct evlist *evlist, struct report * evlist__for_each_entry(evlist, pos) { ret = report__browse_block_hists(&rep->block_reports[i++].hist, rep->min_percent, pos, - &rep->session->header.env, - &annotate_opts); + &rep->session->header.env); if (ret != 0) return ret; } @@ -573,8 +572,7 @@ static int evlist__tty_browse_hists(struct evlist *evlist, struct report *rep, c if (rep->total_cycles_mode) { report__browse_block_hists(&rep->block_reports[i++].hist, - rep->min_percent, pos, - NULL, NULL); + rep->min_percent, pos, NULL); continue; } @@ -669,7 +667,7 @@ static int report__browse_hists(struct report *rep) } ret = evlist__tui_browse_hists(evlist, help, NULL, rep->min_percent, - &session->header.env, true, &annotate_opts); + &session->header.env, true); /* * Usually "ret" is the last pressed key, and we only * care if the key notifies us to switch data file. diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 46a61634701b..b5222d241983 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -646,8 +646,7 @@ repeat: } ret = evlist__tui_browse_hists(top->evlist, help, &hbt, top->min_percent, - &top->session->header.env, !top->record_opts.overwrite, - &annotate_opts); + &top->session->header.env, !top->record_opts.overwrite); if (ret == K_RELOAD) { top->zero = true; goto repeat; diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c index ed0e692afdbe..fda17c1f2031 100644 --- a/tools/perf/ui/browsers/annotate.c +++ b/tools/perf/ui/browsers/annotate.c @@ -27,7 +27,6 @@ struct annotate_browser { struct rb_node *curr_hot; struct annotation_line *selection; struct arch *arch; - struct annotation_options *opts; bool searching_backwards; char search_bf[128]; }; @@ -97,7 +96,7 @@ static void annotate_browser__write(struct ui_browser *browser, void *entry, int struct annotation_write_ops ops = { .first_line = row == 0, .current_entry = is_current_entry, - .change_color = (!notes->options->hide_src_code && + .change_color = (!annotate_opts.hide_src_code && (!is_current_entry || (browser->use_navkeypressed && !browser->navkeypressed))), @@ -128,7 +127,7 @@ static int is_fused(struct annotate_browser *ab, struct disasm_line *cursor) while (pos && pos->al.offset == -1) { pos = list_prev_entry(pos, al.node); - if (!ab->opts->hide_src_code) + if (!annotate_opts.hide_src_code) diff++; } @@ -195,7 +194,7 @@ static void annotate_browser__draw_current_jump(struct ui_browser *browser) return; } - if (notes->options->hide_src_code) { + if (annotate_opts.hide_src_code) { from = cursor->al.idx_asm; to = target->idx_asm; } else { @@ -224,7 +223,7 @@ static unsigned int annotate_browser__refresh(struct ui_browser *browser) int ret = ui_browser__list_head_refresh(browser); int pcnt_width = annotation__pcnt_width(notes); - if (notes->options->jump_arrows) + if (annotate_opts.jump_arrows) annotate_browser__draw_current_jump(browser); ui_browser__set_color(browser, HE_COLORSET_NORMAL); @@ -258,7 +257,7 @@ static void disasm_rb_tree__insert(struct annotate_browser *browser, parent = *p; l = rb_entry(parent, struct annotation_line, rb_node); - if (disasm__cmp(al, l, browser->opts->percent_type) < 0) + if (disasm__cmp(al, l, annotate_opts.percent_type) < 0) p = &(*p)->rb_left; else p = &(*p)->rb_right; @@ -294,11 +293,10 @@ static void annotate_browser__set_top(struct annotate_browser *browser, static void annotate_browser__set_rb_top(struct annotate_browser *browser, struct rb_node *nd) { - struct annotation *notes = browser__annotation(&browser->b); struct annotation_line * pos = rb_entry(nd, struct annotation_line, rb_node); u32 idx = pos->idx; - if (notes->options->hide_src_code) + if (annotate_opts.hide_src_code) idx = pos->idx_asm; annotate_browser__set_top(browser, pos, idx); browser->curr_hot = nd; @@ -331,7 +329,7 @@ static void annotate_browser__calc_percent(struct annotate_browser *browser, double percent; percent = annotation_data__percent(&pos->al.data[i], - browser->opts->percent_type); + annotate_opts.percent_type); if (max_percent < percent) max_percent = percent; @@ -380,12 +378,12 @@ static bool annotate_browser__toggle_source(struct annotate_browser *browser) browser->b.seek(&browser->b, offset, SEEK_CUR); al = list_entry(browser->b.top, struct annotation_line, node); - if (notes->options->hide_src_code) { + if (annotate_opts.hide_src_code) { if (al->idx_asm < offset) offset = al->idx; browser->b.nr_entries = notes->src->nr_entries; - notes->options->hide_src_code = false; + annotate_opts.hide_src_code = false; browser->b.seek(&browser->b, -offset, SEEK_CUR); browser->b.top_idx = al->idx - offset; browser->b.index = al->idx; @@ -403,7 +401,7 @@ static bool annotate_browser__toggle_source(struct annotate_browser *browser) offset = al->idx_asm; browser->b.nr_entries = notes->src->nr_asm_entries; - notes->options->hide_src_code = true; + annotate_opts.hide_src_code = true; browser->b.seek(&browser->b, -offset, SEEK_CUR); browser->b.top_idx = al->idx_asm - offset; browser->b.index = al->idx_asm; @@ -483,8 +481,8 @@ static bool annotate_browser__callq(struct annotate_browser *browser, target_ms.map = ms->map; target_ms.sym = dl->ops.target.sym; annotation__unlock(notes); - symbol__tui_annotate(&target_ms, evsel, hbt, browser->opts); - sym_title(ms->sym, ms->map, title, sizeof(title), browser->opts->percent_type); + symbol__tui_annotate(&target_ms, evsel, hbt); + sym_title(ms->sym, ms->map, title, sizeof(title), annotate_opts.percent_type); ui_browser__show_title(&browser->b, title); return true; } @@ -659,7 +657,6 @@ bool annotate_browser__continue_search_reverse(struct annotate_browser *browser, static int annotate_browser__show(struct ui_browser *browser, char *title, const char *help) { - struct annotate_browser *ab = container_of(browser, struct annotate_browser, b); struct map_symbol *ms = browser->priv; struct symbol *sym = ms->sym; char symbol_dso[SYM_TITLE_MAX_SIZE]; @@ -667,7 +664,7 @@ static int annotate_browser__show(struct ui_browser *browser, char *title, const if (ui_browser__show(browser, title, help) < 0) return -1; - sym_title(sym, ms->map, symbol_dso, sizeof(symbol_dso), ab->opts->percent_type); + sym_title(sym, ms->map, symbol_dso, sizeof(symbol_dso), annotate_opts.percent_type); ui_browser__gotorc_title(browser, 0, 0); ui_browser__set_color(browser, HE_COLORSET_ROOT); @@ -809,7 +806,7 @@ static int annotate_browser__run(struct annotate_browser *browser, annotate_browser__show(&browser->b, title, help); continue; case 'k': - notes->options->show_linenr = !notes->options->show_linenr; + annotate_opts.show_linenr = !annotate_opts.show_linenr; continue; case 'l': annotate_browser__show_full_location (&browser->b); @@ -822,18 +819,18 @@ static int annotate_browser__run(struct annotate_browser *browser, ui_helpline__puts(help); continue; case 'o': - notes->options->use_offset = !notes->options->use_offset; + annotate_opts.use_offset = !annotate_opts.use_offset; annotation__update_column_widths(notes); continue; case 'O': - if (++notes->options->offset_level > ANNOTATION__MAX_OFFSET_LEVEL) - notes->options->offset_level = ANNOTATION__MIN_OFFSET_LEVEL; + if (++annotate_opts.offset_level > ANNOTATION__MAX_OFFSET_LEVEL) + annotate_opts.offset_level = ANNOTATION__MIN_OFFSET_LEVEL; continue; case 'j': - notes->options->jump_arrows = !notes->options->jump_arrows; + annotate_opts.jump_arrows = !annotate_opts.jump_arrows; continue; case 'J': - notes->options->show_nr_jumps = !notes->options->show_nr_jumps; + annotate_opts.show_nr_jumps = !annotate_opts.show_nr_jumps; annotation__update_column_widths(notes); continue; case '/': @@ -897,15 +894,15 @@ show_sup_ins: annotation__update_column_widths(notes); continue; case 'c': - if (notes->options->show_minmax_cycle) - notes->options->show_minmax_cycle = false; + if (annotate_opts.show_minmax_cycle) + annotate_opts.show_minmax_cycle = false; else - notes->options->show_minmax_cycle = true; + annotate_opts.show_minmax_cycle = true; annotation__update_column_widths(notes); continue; case 'p': case 'b': - switch_percent_type(browser->opts, key == 'b'); + switch_percent_type(&annotate_opts, key == 'b'); hists__scnprintf_title(hists, title, sizeof(title)); annotate_browser__show(&browser->b, title, help); continue; @@ -932,26 +929,23 @@ out: } int map_symbol__tui_annotate(struct map_symbol *ms, struct evsel *evsel, - struct hist_browser_timer *hbt, - struct annotation_options *opts) + struct hist_browser_timer *hbt) { - return symbol__tui_annotate(ms, evsel, hbt, opts); + return symbol__tui_annotate(ms, evsel, hbt); } int hist_entry__tui_annotate(struct hist_entry *he, struct evsel *evsel, - struct hist_browser_timer *hbt, - struct annotation_options *opts) + struct hist_browser_timer *hbt) { /* reset abort key so that it can get Ctrl-C as a key */ SLang_reset_tty(); SLang_init_tty(0, 0, 0); - return map_symbol__tui_annotate(&he->ms, evsel, hbt, opts); + return map_symbol__tui_annotate(&he->ms, evsel, hbt); } int symbol__tui_annotate(struct map_symbol *ms, struct evsel *evsel, - struct hist_browser_timer *hbt, - struct annotation_options *opts) + struct hist_browser_timer *hbt) { struct symbol *sym = ms->sym; struct annotation *notes = symbol__annotation(sym); @@ -965,7 +959,6 @@ int symbol__tui_annotate(struct map_symbol *ms, struct evsel *evsel, .priv = ms, .use_navkeypressed = true, }, - .opts = opts, }; struct dso *dso; int ret = -1, err; @@ -996,7 +989,7 @@ int symbol__tui_annotate(struct map_symbol *ms, struct evsel *evsel, browser.b.entries = ¬es->src->source, browser.b.width += 18; /* Percentage */ - if (notes->options->hide_src_code) + if (annotate_opts.hide_src_code) ui_browser__init_asm_mode(&browser.b); ret = annotate_browser__run(&browser, evsel, hbt); diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index f4812b226818..3061dea29e6b 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -2250,8 +2250,7 @@ struct hist_browser *hist_browser__new(struct hists *hists) static struct hist_browser * perf_evsel_browser__new(struct evsel *evsel, struct hist_browser_timer *hbt, - struct perf_env *env, - struct annotation_options *annotation_opts) + struct perf_env *env) { struct hist_browser *browser = hist_browser__new(evsel__hists(evsel)); @@ -2259,7 +2258,6 @@ perf_evsel_browser__new(struct evsel *evsel, browser->hbt = hbt; browser->env = env; browser->title = hists_browser__scnprintf_title; - browser->annotation_opts = annotation_opts; } return browser; } @@ -2432,8 +2430,8 @@ do_annotate(struct hist_browser *browser, struct popup_action *act) struct hist_entry *he; int err; - if (!browser->annotation_opts->objdump_path && - perf_env__lookup_objdump(browser->env, &browser->annotation_opts->objdump_path)) + if (!annotate_opts.objdump_path && + perf_env__lookup_objdump(browser->env, &annotate_opts.objdump_path)) return 0; notes = symbol__annotation(act->ms.sym); @@ -2445,8 +2443,7 @@ do_annotate(struct hist_browser *browser, struct popup_action *act) else evsel = hists_to_evsel(browser->hists); - err = map_symbol__tui_annotate(&act->ms, evsel, browser->hbt, - browser->annotation_opts); + err = map_symbol__tui_annotate(&act->ms, evsel, browser->hbt); he = hist_browser__selected_entry(browser); /* * offer option to annotate the other branch source or target @@ -2943,11 +2940,10 @@ next: static int evsel__hists_browse(struct evsel *evsel, int nr_events, const char *helpline, bool left_exits, struct hist_browser_timer *hbt, float min_pcnt, - struct perf_env *env, bool warn_lost_event, - struct annotation_options *annotation_opts) + struct perf_env *env, bool warn_lost_event) { struct hists *hists = evsel__hists(evsel); - struct hist_browser *browser = perf_evsel_browser__new(evsel, hbt, env, annotation_opts); + struct hist_browser *browser = perf_evsel_browser__new(evsel, hbt, env); struct branch_info *bi = NULL; #define MAX_OPTIONS 16 char *options[MAX_OPTIONS]; @@ -3398,7 +3394,6 @@ out: struct evsel_menu { struct ui_browser b; struct evsel *selection; - struct annotation_options *annotation_opts; bool lost_events, lost_events_warned; float min_pcnt; struct perf_env *env; @@ -3499,8 +3494,7 @@ browse_hists: hbt->timer(hbt->arg); key = evsel__hists_browse(pos, nr_events, help, true, hbt, menu->min_pcnt, menu->env, - warn_lost_event, - menu->annotation_opts); + warn_lost_event); ui_browser__show_title(&menu->b, title); switch (key) { case K_TAB: @@ -3557,7 +3551,7 @@ static bool filter_group_entries(struct ui_browser *browser __maybe_unused, static int __evlist__tui_browse_hists(struct evlist *evlist, int nr_entries, const char *help, struct hist_browser_timer *hbt, float min_pcnt, struct perf_env *env, - bool warn_lost_event, struct annotation_options *annotation_opts) + bool warn_lost_event) { struct evsel *pos; struct evsel_menu menu = { @@ -3572,7 +3566,6 @@ static int __evlist__tui_browse_hists(struct evlist *evlist, int nr_entries, con }, .min_pcnt = min_pcnt, .env = env, - .annotation_opts = annotation_opts, }; ui_helpline__push("Press ESC to exit"); @@ -3607,8 +3600,7 @@ static bool evlist__single_entry(struct evlist *evlist) } int evlist__tui_browse_hists(struct evlist *evlist, const char *help, struct hist_browser_timer *hbt, - float min_pcnt, struct perf_env *env, bool warn_lost_event, - struct annotation_options *annotation_opts) + float min_pcnt, struct perf_env *env, bool warn_lost_event) { int nr_entries = evlist->core.nr_entries; @@ -3617,7 +3609,7 @@ single_entry: { struct evsel *first = evlist__first(evlist); return evsel__hists_browse(first, nr_entries, help, false, hbt, min_pcnt, - env, warn_lost_event, annotation_opts); + env, warn_lost_event); } } @@ -3635,7 +3627,7 @@ single_entry: { } return __evlist__tui_browse_hists(evlist, nr_entries, help, hbt, min_pcnt, env, - warn_lost_event, annotation_opts); + warn_lost_event); } static int block_hists_browser__title(struct hist_browser *browser, char *bf, @@ -3654,8 +3646,7 @@ static int block_hists_browser__title(struct hist_browser *browser, char *bf, } int block_hists_tui_browse(struct block_hist *bh, struct evsel *evsel, - float min_percent, struct perf_env *env, - struct annotation_options *annotation_opts) + float min_percent, struct perf_env *env) { struct hists *hists = &bh->block_hists; struct hist_browser *browser; @@ -3672,7 +3663,6 @@ int block_hists_tui_browse(struct block_hist *bh, struct evsel *evsel, browser->title = block_hists_browser__title; browser->min_pcnt = min_percent; browser->env = env; - browser->annotation_opts = annotation_opts; /* reset abort key so that it can get Ctrl-C as a key */ SLang_reset_tty(); diff --git a/tools/perf/ui/browsers/hists.h b/tools/perf/ui/browsers/hists.h index 1e938d9ffa5e..de46f6c56b0e 100644 --- a/tools/perf/ui/browsers/hists.h +++ b/tools/perf/ui/browsers/hists.h @@ -4,7 +4,6 @@ #include "ui/browser.h" -struct annotation_options; struct evsel; struct hist_browser { @@ -15,7 +14,6 @@ struct hist_browser { struct hist_browser_timer *hbt; struct pstack *pstack; struct perf_env *env; - struct annotation_options *annotation_opts; struct evsel *block_evsel; int print_seq; bool show_dso; diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h index 7bf29baa43f5..857c5fa0e6b1 100644 --- a/tools/perf/util/annotate.h +++ b/tools/perf/util/annotate.h @@ -418,13 +418,11 @@ int symbol__tty_annotate2(struct map_symbol *ms, struct evsel *evsel); #ifdef HAVE_SLANG_SUPPORT int symbol__tui_annotate(struct map_symbol *ms, struct evsel *evsel, - struct hist_browser_timer *hbt, - struct annotation_options *opts); + struct hist_browser_timer *hbt); #else static inline int symbol__tui_annotate(struct map_symbol *ms __maybe_unused, struct evsel *evsel __maybe_unused, - struct hist_browser_timer *hbt __maybe_unused, - struct annotation_options *opts __maybe_unused) + struct hist_browser_timer *hbt __maybe_unused) { return 0; } diff --git a/tools/perf/util/block-info.c b/tools/perf/util/block-info.c index 08f82c1f166c..dec910989701 100644 --- a/tools/perf/util/block-info.c +++ b/tools/perf/util/block-info.c @@ -464,8 +464,7 @@ void block_info__free_report(struct block_report *reps, int nr_reps) } int report__browse_block_hists(struct block_hist *bh, float min_percent, - struct evsel *evsel, struct perf_env *env, - struct annotation_options *annotation_opts) + struct evsel *evsel, struct perf_env *env) { int ret; @@ -477,8 +476,7 @@ int report__browse_block_hists(struct block_hist *bh, float min_percent, return 0; case 1: symbol_conf.report_individual_block = true; - ret = block_hists_tui_browse(bh, evsel, min_percent, - env, annotation_opts); + ret = block_hists_tui_browse(bh, evsel, min_percent, env); return ret; default: return -1; diff --git a/tools/perf/util/block-info.h b/tools/perf/util/block-info.h index 42e9dcc4cf0a..96f53e89795e 100644 --- a/tools/perf/util/block-info.h +++ b/tools/perf/util/block-info.h @@ -78,8 +78,7 @@ struct block_report *block_info__create_report(struct evlist *evlist, void block_info__free_report(struct block_report *reps, int nr_reps); int report__browse_block_hists(struct block_hist *bh, float min_percent, - struct evsel *evsel, struct perf_env *env, - struct annotation_options *annotation_opts); + struct evsel *evsel, struct perf_env *env); float block_info__total_cycles_percent(struct hist_entry *he); diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index afc9f1c7f4dc..5d0db96609df 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -457,7 +457,6 @@ struct hist_browser_timer { int refresh; }; -struct annotation_options; struct res_sample; enum rstype { @@ -473,16 +472,13 @@ struct block_hist; void attr_to_script(char *buf, struct perf_event_attr *attr); int map_symbol__tui_annotate(struct map_symbol *ms, struct evsel *evsel, - struct hist_browser_timer *hbt, - struct annotation_options *annotation_opts); + struct hist_browser_timer *hbt); int hist_entry__tui_annotate(struct hist_entry *he, struct evsel *evsel, - struct hist_browser_timer *hbt, - struct annotation_options *annotation_opts); + struct hist_browser_timer *hbt); int evlist__tui_browse_hists(struct evlist *evlist, const char *help, struct hist_browser_timer *hbt, - float min_pcnt, struct perf_env *env, bool warn_lost_event, - struct annotation_options *annotation_options); + float min_pcnt, struct perf_env *env, bool warn_lost_event); int script_browse(const char *script_opt, struct evsel *evsel); @@ -492,8 +488,7 @@ int res_sample_browse(struct res_sample *res_samples, int num_res, void res_sample_init(void); int block_hists_tui_browse(struct block_hist *bh, struct evsel *evsel, - float min_percent, struct perf_env *env, - struct annotation_options *annotation_opts); + float min_percent, struct perf_env *env); #else static inline int evlist__tui_browse_hists(struct evlist *evlist __maybe_unused, @@ -501,23 +496,20 @@ int evlist__tui_browse_hists(struct evlist *evlist __maybe_unused, struct hist_browser_timer *hbt __maybe_unused, float min_pcnt __maybe_unused, struct perf_env *env __maybe_unused, - bool warn_lost_event __maybe_unused, - struct annotation_options *annotation_options __maybe_unused) + bool warn_lost_event __maybe_unused) { return 0; } static inline int map_symbol__tui_annotate(struct map_symbol *ms __maybe_unused, struct evsel *evsel __maybe_unused, - struct hist_browser_timer *hbt __maybe_unused, - struct annotation_options *annotation_options __maybe_unused) + struct hist_browser_timer *hbt __maybe_unused) { return 0; } static inline int hist_entry__tui_annotate(struct hist_entry *he __maybe_unused, struct evsel *evsel __maybe_unused, - struct hist_browser_timer *hbt __maybe_unused, - struct annotation_options *annotation_opts __maybe_unused) + struct hist_browser_timer *hbt __maybe_unused) { return 0; } @@ -541,8 +533,7 @@ static inline void res_sample_init(void) {} static inline int block_hists_tui_browse(struct block_hist *bh __maybe_unused, struct evsel *evsel __maybe_unused, float min_percent __maybe_unused, - struct perf_env *env __maybe_unused, - struct annotation_options *annotation_opts __maybe_unused) + struct perf_env *env __maybe_unused) { return 0; } -- cgit v1.2.3 From 7f929aea21fd0be5e0d9ee5827d5b809daa69f29 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Tue, 28 Nov 2023 09:54:39 -0800 Subject: perf annotate: Ensure init/exit for global options Now it only cares about the global options so it can just handle it without the argument. Reviewed-by: Ian Rogers Signed-off-by: Namhyung Kim Cc: Adrian Hunter Cc: Ingo Molnar Cc: Jiri Olsa Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20231128175441.721579-7-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-annotate.c | 8 ++++---- tools/perf/builtin-report.c | 8 ++++---- tools/perf/builtin-top.c | 8 ++++---- tools/perf/util/annotate.c | 19 +++++++++++-------- tools/perf/util/annotate.h | 8 ++++---- 5 files changed, 27 insertions(+), 24 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index d17213bd8332..d880f1b039fd 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -615,13 +615,13 @@ int cmd_annotate(int argc, const char **argv) set_option_flag(options, 0, "show-total-period", PARSE_OPT_EXCLUSIVE); set_option_flag(options, 0, "show-nr-samples", PARSE_OPT_EXCLUSIVE); - annotation_options__init(&annotate_opts); + annotation_options__init(); ret = hists__init(); if (ret < 0) return ret; - annotation_config__init(&annotate_opts); + annotation_config__init(); argc = parse_options(argc, argv, options, annotate_usage, 0); if (argc) { @@ -651,7 +651,7 @@ int cmd_annotate(int argc, const char **argv) return -ENOMEM; } - if (annotate_check_args(&annotate_opts) < 0) + if (annotate_check_args() < 0) return -EINVAL; #ifdef HAVE_GTK2_SUPPORT @@ -732,7 +732,7 @@ out_delete: #ifndef NDEBUG perf_session__delete(annotate.session); #endif - annotation_options__exit(&annotate_opts); + annotation_options__exit(); return ret; } diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index bc0d986c1e0c..17fb171e898b 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -1430,7 +1430,7 @@ int cmd_report(int argc, const char **argv) */ symbol_conf.keep_exited_threads = true; - annotation_options__init(&annotate_opts); + annotation_options__init(); ret = perf_config(report__config, &report); if (ret) @@ -1464,7 +1464,7 @@ int cmd_report(int argc, const char **argv) return -ENOMEM; } - if (annotate_check_args(&annotate_opts) < 0) { + if (annotate_check_args() < 0) { ret = -EINVAL; goto exit; } @@ -1696,7 +1696,7 @@ repeat: */ symbol_conf.priv_size += sizeof(u32); } - annotation_config__init(&annotate_opts); + annotation_config__init(); } if (symbol__init(&session->header.env) < 0) @@ -1750,7 +1750,7 @@ error: zstd_fini(&(session->zstd_data)); perf_session__delete(session); exit: - annotation_options__exit(&annotate_opts); + annotation_options__exit(); free(sort_order_help); free(field_order_help); return ret; diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index b5222d241983..ed83afeeced0 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -1608,7 +1608,7 @@ int cmd_top(int argc, const char **argv) if (status < 0) return status; - annotation_options__init(&annotate_opts); + annotation_options__init(); annotate_opts.min_pcnt = 5; annotate_opts.context = 4; @@ -1660,7 +1660,7 @@ int cmd_top(int argc, const char **argv) if (status) goto out_delete_evlist; - if (annotate_check_args(&annotate_opts) < 0) + if (annotate_check_args() < 0) goto out_delete_evlist; if (!top.evlist->core.nr_entries) { @@ -1786,7 +1786,7 @@ int cmd_top(int argc, const char **argv) if (status < 0) goto out_delete_evlist; - annotation_config__init(&annotate_opts); + annotation_config__init(); symbol_conf.try_vmlinux_path = (symbol_conf.vmlinux_name == NULL); status = symbol__init(NULL); @@ -1839,7 +1839,7 @@ int cmd_top(int argc, const char **argv) out_delete_evlist: evlist__delete(top.evlist); perf_session__delete(top.session); - annotation_options__exit(&annotate_opts); + annotation_options__exit(); return status; } diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index daff9af552f4..626ff3baeb85 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -3416,8 +3416,10 @@ static int annotation__config(const char *var, const char *value, void *data) return 0; } -void annotation_options__init(struct annotation_options *opt) +void annotation_options__init(void) { + struct annotation_options *opt = &annotate_opts; + memset(opt, 0, sizeof(*opt)); /* Default values. */ @@ -3428,16 +3430,15 @@ void annotation_options__init(struct annotation_options *opt) opt->percent_type = PERCENT_PERIOD_LOCAL; } - -void annotation_options__exit(struct annotation_options *opt) +void annotation_options__exit(void) { - zfree(&opt->disassembler_style); - zfree(&opt->objdump_path); + zfree(&annotate_opts.disassembler_style); + zfree(&annotate_opts.objdump_path); } -void annotation_config__init(struct annotation_options *opt) +void annotation_config__init(void) { - perf_config(annotation__config, opt); + perf_config(annotation__config, &annotate_opts); } static unsigned int parse_percent_type(char *str1, char *str2) @@ -3491,8 +3492,10 @@ out: return err; } -int annotate_check_args(struct annotation_options *args) +int annotate_check_args(void) { + struct annotation_options *args = &annotate_opts; + if (args->prefix_strip && !args->prefix) { pr_err("--prefix-strip requires --prefix\n"); return -1; diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h index 857c5fa0e6b1..4283eb4522b2 100644 --- a/tools/perf/util/annotate.h +++ b/tools/perf/util/annotate.h @@ -428,14 +428,14 @@ static inline int symbol__tui_annotate(struct map_symbol *ms __maybe_unused, } #endif -void annotation_options__init(struct annotation_options *opt); -void annotation_options__exit(struct annotation_options *opt); +void annotation_options__init(void); +void annotation_options__exit(void); -void annotation_config__init(struct annotation_options *opt); +void annotation_config__init(void); int annotate_parse_percent_type(const struct option *opt, const char *_str, int unset); -int annotate_check_args(struct annotation_options *args); +int annotate_check_args(void); #endif /* __PERF_ANNOTATE_H */ -- cgit v1.2.3 From 2fa21d694c63081f26444847c916e5fc83bcefa1 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Tue, 28 Nov 2023 09:54:40 -0800 Subject: perf annotate: Remove remaining usages of local annotation options So that it can get rid of the unused data. Reviewed-by: Ian Rogers Signed-off-by: Namhyung Kim Cc: Adrian Hunter Cc: Ingo Molnar Cc: Jiri Olsa Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20231128175441.721579-8-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/browsers/annotate.c | 14 ++++++-------- tools/perf/util/annotate.c | 2 +- tools/perf/util/annotate.h | 6 +++--- 3 files changed, 10 insertions(+), 12 deletions(-) (limited to 'tools') diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c index fda17c1f2031..cb2eb6dcb532 100644 --- a/tools/perf/ui/browsers/annotate.c +++ b/tools/perf/ui/browsers/annotate.c @@ -37,11 +37,10 @@ static inline struct annotation *browser__annotation(struct ui_browser *browser) return symbol__annotation(ms->sym); } -static bool disasm_line__filter(struct ui_browser *browser, void *entry) +static bool disasm_line__filter(struct ui_browser *browser __maybe_unused, void *entry) { - struct annotation *notes = browser__annotation(browser); struct annotation_line *al = list_entry(entry, struct annotation_line, node); - return annotation_line__filter(al, notes); + return annotation_line__filter(al); } static int ui_browser__jumps_percent_color(struct ui_browser *browser, int nr, bool current) @@ -269,7 +268,6 @@ static void disasm_rb_tree__insert(struct annotate_browser *browser, static void annotate_browser__set_top(struct annotate_browser *browser, struct annotation_line *pos, u32 idx) { - struct annotation *notes = browser__annotation(&browser->b); unsigned back; ui_browser__refresh_dimensions(&browser->b); @@ -279,7 +277,7 @@ static void annotate_browser__set_top(struct annotate_browser *browser, while (browser->b.top_idx != 0 && back != 0) { pos = list_entry(pos->node.prev, struct annotation_line, node); - if (annotation_line__filter(pos, notes)) + if (annotation_line__filter(pos)) continue; --browser->b.top_idx; @@ -498,7 +496,7 @@ struct disasm_line *annotate_browser__find_offset(struct annotate_browser *brows list_for_each_entry(pos, ¬es->src->source, al.node) { if (pos->al.offset == offset) return pos; - if (!annotation_line__filter(&pos->al, notes)) + if (!annotation_line__filter(&pos->al)) ++*idx; } @@ -542,7 +540,7 @@ struct annotation_line *annotate_browser__find_string(struct annotate_browser *b *idx = browser->b.index; list_for_each_entry_continue(al, ¬es->src->source, node) { - if (annotation_line__filter(al, notes)) + if (annotation_line__filter(al)) continue; ++*idx; @@ -579,7 +577,7 @@ struct annotation_line *annotate_browser__find_string_reverse(struct annotate_br *idx = browser->b.index; list_for_each_entry_continue_reverse(al, ¬es->src->source, node) { - if (annotation_line__filter(al, notes)) + if (annotation_line__filter(al)) continue; --*idx; diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 626ff3baeb85..09c399ab0384 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -2727,7 +2727,7 @@ static int symbol__annotate_fprintf2(struct symbol *sym, FILE *fp) struct annotation_line *al; list_for_each_entry(al, ¬es->src->source, node) { - if (annotation_line__filter(al, notes)) + if (annotation_line__filter(al)) continue; annotation_line__write(al, notes, &wops); fputc('\n', fp); diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h index 4283eb4522b2..6d5a6bb49a97 100644 --- a/tools/perf/util/annotate.h +++ b/tools/perf/util/annotate.h @@ -320,7 +320,7 @@ bool annotation__trylock(struct annotation *notes) EXCLUSIVE_TRYLOCK_FUNCTION(tr static inline int annotation__cycles_width(struct annotation *notes) { - if (notes->branch && notes->options->show_minmax_cycle) + if (notes->branch && annotate_opts.show_minmax_cycle) return ANNOTATION__IPC_WIDTH + ANNOTATION__MINMAX_CYCLES_WIDTH; return notes->branch ? ANNOTATION__IPC_WIDTH + ANNOTATION__CYCLES_WIDTH : 0; @@ -331,9 +331,9 @@ static inline int annotation__pcnt_width(struct annotation *notes) return (symbol_conf.show_total_period ? 12 : 7) * notes->nr_events; } -static inline bool annotation_line__filter(struct annotation_line *al, struct annotation *notes) +static inline bool annotation_line__filter(struct annotation_line *al) { - return notes->options->hide_src_code && al->offset == -1; + return annotate_opts.hide_src_code && al->offset == -1; } void annotation__set_offsets(struct annotation *notes, s64 size); -- cgit v1.2.3 From 327f7533cc596427b62f431c8852951412b6c0dc Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Tue, 28 Nov 2023 09:54:41 -0800 Subject: perf annotate: Get rid of local annotation options It doesn't need the option in the struct annotation which is allocated for each symbol. It can directly use the global options and save 8 bytes per symbol. Reviewed-by: Ian Rogers Signed-off-by: Namhyung Kim Cc: Adrian Hunter Cc: Ingo Molnar Cc: Jiri Olsa Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20231128175441.721579-9-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/annotate.c | 2 -- tools/perf/util/annotate.h | 1 - 2 files changed, 3 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 09c399ab0384..c81fa0791918 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -3333,8 +3333,6 @@ int symbol__annotate2(struct map_symbol *ms, struct evsel *evsel, if (err) goto out_free_offsets; - notes->options = &annotate_opts; - symbol__calc_percent(sym, evsel); annotation__set_offsets(notes, size); diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h index 6d5a6bb49a97..589f8aaf0236 100644 --- a/tools/perf/util/annotate.h +++ b/tools/perf/util/annotate.h @@ -294,7 +294,6 @@ struct annotated_branch { struct LOCKABLE annotation { u64 start; - struct annotation_options *options; int nr_events; int max_jump_sources; struct { -- cgit v1.2.3 From e28bd359bcc8eb849aaa475f3c3f9705fba26d6e Mon Sep 17 00:00:00 2001 From: Andrei Matei Date: Wed, 6 Dec 2023 23:11:49 -0500 Subject: bpf: Add verifier regression test for previous patch Add a regression test for var-off zero-sized reads. Signed-off-by: Andrei Matei Signed-off-by: Andrii Nakryiko Acked-by: Eduard Zingerman Link: https://lore.kernel.org/bpf/20231207041150.229139-3-andreimatei1@gmail.com --- .../testing/selftests/bpf/progs/verifier_var_off.c | 29 ++++++++++++++++++++++ 1 file changed, 29 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/progs/verifier_var_off.c b/tools/testing/selftests/bpf/progs/verifier_var_off.c index 83a90afba785..b7bdd7db3a35 100644 --- a/tools/testing/selftests/bpf/progs/verifier_var_off.c +++ b/tools/testing/selftests/bpf/progs/verifier_var_off.c @@ -224,6 +224,35 @@ __naked void access_max_out_of_bound(void) : __clobber_all); } +/* Similar to the test above, but this time check the special case of a + * zero-sized stack access. We used to have a bug causing crashes for zero-sized + * out-of-bounds accesses. + */ +SEC("socket") +__description("indirect variable-offset stack access, zero-sized, max out of bound") +__failure __msg("invalid variable-offset indirect access to stack R1") +__naked void zero_sized_access_max_out_of_bound(void) +{ + asm volatile (" \ + r0 = 0; \ + /* Fill some stack */ \ + *(u64*)(r10 - 16) = r0; \ + *(u64*)(r10 - 8) = r0; \ + /* Get an unknown value */ \ + r1 = *(u32*)(r1 + 0); \ + r1 &= 63; \ + r1 += -16; \ + /* r1 is now anywhere in [-16,48) */ \ + r1 += r10; \ + r2 = 0; \ + r3 = 0; \ + call %[bpf_probe_read_kernel]; \ + exit; \ +" : + : __imm(bpf_probe_read_kernel) + : __clobber_all); +} + SEC("lwt_in") __description("indirect variable-offset stack access, min out of bound") __failure __msg("invalid variable-offset indirect access to stack R2") -- cgit v1.2.3 From 4624a78c18c62da815f3253966b7a87995f77e1b Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Wed, 6 Dec 2023 15:07:53 +0800 Subject: selftests/net: convert test_bridge_backup_port.sh to run it in unique namespace There is no h1 h2 actually. Remove it. Here is the test result after conversion. ]# ./test_bridge_backup_port.sh Backup port ----------- TEST: Forwarding out of swp1 [ OK ] TEST: No forwarding out of vx0 [ OK ] TEST: swp1 carrier off [ OK ] TEST: No forwarding out of swp1 [ OK ] ... Backup nexthop ID - ping ------------------------ TEST: Ping with backup nexthop ID [ OK ] TEST: Ping after disabling backup nexthop ID [ OK ] Backup nexthop ID - torture test -------------------------------- TEST: Torture test [ OK ] Tests passed: 83 Tests failed: 0 Acked-by: David Ahern Signed-off-by: Hangbin Liu Reviewed-by: Ido Schimmel Tested-by: Ido Schimmel Signed-off-by: David S. Miller --- .../selftests/net/test_bridge_backup_port.sh | 371 ++++++++++----------- 1 file changed, 182 insertions(+), 189 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/net/test_bridge_backup_port.sh b/tools/testing/selftests/net/test_bridge_backup_port.sh index 112cfd8a10ad..70a7d87ba2d2 100755 --- a/tools/testing/selftests/net/test_bridge_backup_port.sh +++ b/tools/testing/selftests/net/test_bridge_backup_port.sh @@ -35,9 +35,8 @@ # | sw1 | | sw2 | # +------------------------------------+ +------------------------------------+ +source lib.sh ret=0 -# Kselftest framework requirement - SKIP code is 4. -ksft_skip=4 # All tests in this script. Can be overridden with -t option. TESTS=" @@ -132,9 +131,6 @@ setup_topo_ns() { local ns=$1; shift - ip netns add $ns - ip -n $ns link set dev lo up - ip netns exec $ns sysctl -qw net.ipv6.conf.all.keep_addr_on_down=1 ip netns exec $ns sysctl -qw net.ipv6.conf.default.ignore_routes_with_linkdown=1 ip netns exec $ns sysctl -qw net.ipv6.conf.all.accept_dad=0 @@ -145,13 +141,14 @@ setup_topo() { local ns - for ns in sw1 sw2; do + setup_ns sw1 sw2 + for ns in $sw1 $sw2; do setup_topo_ns $ns done ip link add name veth0 type veth peer name veth1 - ip link set dev veth0 netns sw1 name veth0 - ip link set dev veth1 netns sw2 name veth0 + ip link set dev veth0 netns $sw1 name veth0 + ip link set dev veth1 netns $sw2 name veth0 } setup_sw_common() @@ -190,7 +187,7 @@ setup_sw_common() setup_sw1() { - local ns=sw1 + local ns=$sw1 local local_addr=192.0.2.33 local remote_addr=192.0.2.34 local veth_addr=192.0.2.49 @@ -203,7 +200,7 @@ setup_sw1() setup_sw2() { - local ns=sw2 + local ns=$sw2 local local_addr=192.0.2.34 local remote_addr=192.0.2.33 local veth_addr=192.0.2.50 @@ -229,11 +226,7 @@ setup() cleanup() { - local ns - - for ns in h1 h2 sw1 sw2; do - ip netns del $ns &> /dev/null - done + cleanup_ns $sw1 $sw2 } ################################################################################ @@ -248,85 +241,85 @@ backup_port() echo "Backup port" echo "-----------" - run_cmd "tc -n sw1 qdisc replace dev swp1 clsact" - run_cmd "tc -n sw1 filter replace dev swp1 egress pref 1 handle 101 proto ip flower src_mac $smac dst_mac $dmac action pass" + run_cmd "tc -n $sw1 qdisc replace dev swp1 clsact" + run_cmd "tc -n $sw1 filter replace dev swp1 egress pref 1 handle 101 proto ip flower src_mac $smac dst_mac $dmac action pass" - run_cmd "tc -n sw1 qdisc replace dev vx0 clsact" - run_cmd "tc -n sw1 filter replace dev vx0 egress pref 1 handle 101 proto ip flower src_mac $smac dst_mac $dmac action pass" + run_cmd "tc -n $sw1 qdisc replace dev vx0 clsact" + run_cmd "tc -n $sw1 filter replace dev vx0 egress pref 1 handle 101 proto ip flower src_mac $smac dst_mac $dmac action pass" - run_cmd "bridge -n sw1 fdb replace $dmac dev swp1 master static vlan 10" + run_cmd "bridge -n $sw1 fdb replace $dmac dev swp1 master static vlan 10" # Initial state - check that packets are forwarded out of swp1 when it # has a carrier and not forwarded out of any port when it does not have # a carrier. - run_cmd "ip netns exec sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1" - tc_check_packets sw1 "dev swp1 egress" 101 1 + run_cmd "ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1" + tc_check_packets $sw1 "dev swp1 egress" 101 1 log_test $? 0 "Forwarding out of swp1" - tc_check_packets sw1 "dev vx0 egress" 101 0 + tc_check_packets $sw1 "dev vx0 egress" 101 0 log_test $? 0 "No forwarding out of vx0" - run_cmd "ip -n sw1 link set dev swp1 carrier off" + run_cmd "ip -n $sw1 link set dev swp1 carrier off" log_test $? 0 "swp1 carrier off" - run_cmd "ip netns exec sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1" - tc_check_packets sw1 "dev swp1 egress" 101 1 + run_cmd "ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1" + tc_check_packets $sw1 "dev swp1 egress" 101 1 log_test $? 0 "No forwarding out of swp1" - tc_check_packets sw1 "dev vx0 egress" 101 0 + tc_check_packets $sw1 "dev vx0 egress" 101 0 log_test $? 0 "No forwarding out of vx0" - run_cmd "ip -n sw1 link set dev swp1 carrier on" + run_cmd "ip -n $sw1 link set dev swp1 carrier on" log_test $? 0 "swp1 carrier on" # Configure vx0 as the backup port of swp1 and check that packets are # forwarded out of swp1 when it has a carrier and out of vx0 when swp1 # does not have a carrier. - run_cmd "bridge -n sw1 link set dev swp1 backup_port vx0" - run_cmd "bridge -n sw1 -d link show dev swp1 | grep \"backup_port vx0\"" + run_cmd "bridge -n $sw1 link set dev swp1 backup_port vx0" + run_cmd "bridge -n $sw1 -d link show dev swp1 | grep \"backup_port vx0\"" log_test $? 0 "vx0 configured as backup port of swp1" - run_cmd "ip netns exec sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1" - tc_check_packets sw1 "dev swp1 egress" 101 2 + run_cmd "ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1" + tc_check_packets $sw1 "dev swp1 egress" 101 2 log_test $? 0 "Forwarding out of swp1" - tc_check_packets sw1 "dev vx0 egress" 101 0 + tc_check_packets $sw1 "dev vx0 egress" 101 0 log_test $? 0 "No forwarding out of vx0" - run_cmd "ip -n sw1 link set dev swp1 carrier off" + run_cmd "ip -n $sw1 link set dev swp1 carrier off" log_test $? 0 "swp1 carrier off" - run_cmd "ip netns exec sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1" - tc_check_packets sw1 "dev swp1 egress" 101 2 + run_cmd "ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1" + tc_check_packets $sw1 "dev swp1 egress" 101 2 log_test $? 0 "No forwarding out of swp1" - tc_check_packets sw1 "dev vx0 egress" 101 1 + tc_check_packets $sw1 "dev vx0 egress" 101 1 log_test $? 0 "Forwarding out of vx0" - run_cmd "ip -n sw1 link set dev swp1 carrier on" + run_cmd "ip -n $sw1 link set dev swp1 carrier on" log_test $? 0 "swp1 carrier on" - run_cmd "ip netns exec sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1" - tc_check_packets sw1 "dev swp1 egress" 101 3 + run_cmd "ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1" + tc_check_packets $sw1 "dev swp1 egress" 101 3 log_test $? 0 "Forwarding out of swp1" - tc_check_packets sw1 "dev vx0 egress" 101 1 + tc_check_packets $sw1 "dev vx0 egress" 101 1 log_test $? 0 "No forwarding out of vx0" # Remove vx0 as the backup port of swp1 and check that packets are no # longer forwarded out of vx0 when swp1 does not have a carrier. - run_cmd "bridge -n sw1 link set dev swp1 nobackup_port" - run_cmd "bridge -n sw1 -d link show dev swp1 | grep \"backup_port vx0\"" + run_cmd "bridge -n $sw1 link set dev swp1 nobackup_port" + run_cmd "bridge -n $sw1 -d link show dev swp1 | grep \"backup_port vx0\"" log_test $? 1 "vx0 not configured as backup port of swp1" - run_cmd "ip netns exec sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1" - tc_check_packets sw1 "dev swp1 egress" 101 4 + run_cmd "ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1" + tc_check_packets $sw1 "dev swp1 egress" 101 4 log_test $? 0 "Forwarding out of swp1" - tc_check_packets sw1 "dev vx0 egress" 101 1 + tc_check_packets $sw1 "dev vx0 egress" 101 1 log_test $? 0 "No forwarding out of vx0" - run_cmd "ip -n sw1 link set dev swp1 carrier off" + run_cmd "ip -n $sw1 link set dev swp1 carrier off" log_test $? 0 "swp1 carrier off" - run_cmd "ip netns exec sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1" - tc_check_packets sw1 "dev swp1 egress" 101 4 + run_cmd "ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1" + tc_check_packets $sw1 "dev swp1 egress" 101 4 log_test $? 0 "No forwarding out of swp1" - tc_check_packets sw1 "dev vx0 egress" 101 1 + tc_check_packets $sw1 "dev vx0 egress" 101 1 log_test $? 0 "No forwarding out of vx0" } @@ -339,125 +332,125 @@ backup_nhid() echo "Backup nexthop ID" echo "-----------------" - run_cmd "tc -n sw1 qdisc replace dev swp1 clsact" - run_cmd "tc -n sw1 filter replace dev swp1 egress pref 1 handle 101 proto ip flower src_mac $smac dst_mac $dmac action pass" + run_cmd "tc -n $sw1 qdisc replace dev swp1 clsact" + run_cmd "tc -n $sw1 filter replace dev swp1 egress pref 1 handle 101 proto ip flower src_mac $smac dst_mac $dmac action pass" - run_cmd "tc -n sw1 qdisc replace dev vx0 clsact" - run_cmd "tc -n sw1 filter replace dev vx0 egress pref 1 handle 101 proto ip flower src_mac $smac dst_mac $dmac action pass" + run_cmd "tc -n $sw1 qdisc replace dev vx0 clsact" + run_cmd "tc -n $sw1 filter replace dev vx0 egress pref 1 handle 101 proto ip flower src_mac $smac dst_mac $dmac action pass" - run_cmd "ip -n sw1 nexthop replace id 1 via 192.0.2.34 fdb" - run_cmd "ip -n sw1 nexthop replace id 2 via 192.0.2.34 fdb" - run_cmd "ip -n sw1 nexthop replace id 10 group 1/2 fdb" + run_cmd "ip -n $sw1 nexthop replace id 1 via 192.0.2.34 fdb" + run_cmd "ip -n $sw1 nexthop replace id 2 via 192.0.2.34 fdb" + run_cmd "ip -n $sw1 nexthop replace id 10 group 1/2 fdb" - run_cmd "bridge -n sw1 fdb replace $dmac dev swp1 master static vlan 10" - run_cmd "bridge -n sw1 fdb replace $dmac dev vx0 self static dst 192.0.2.36 src_vni 10010" + run_cmd "bridge -n $sw1 fdb replace $dmac dev swp1 master static vlan 10" + run_cmd "bridge -n $sw1 fdb replace $dmac dev vx0 self static dst 192.0.2.36 src_vni 10010" - run_cmd "ip -n sw2 address replace 192.0.2.36/32 dev lo" + run_cmd "ip -n $sw2 address replace 192.0.2.36/32 dev lo" # The first filter matches on packets forwarded using the backup # nexthop ID and the second filter matches on packets forwarded using a # regular VXLAN FDB entry. - run_cmd "tc -n sw2 qdisc replace dev vx0 clsact" - run_cmd "tc -n sw2 filter replace dev vx0 ingress pref 1 handle 101 proto ip flower src_mac $smac dst_mac $dmac enc_key_id 10010 enc_dst_ip 192.0.2.34 action pass" - run_cmd "tc -n sw2 filter replace dev vx0 ingress pref 1 handle 102 proto ip flower src_mac $smac dst_mac $dmac enc_key_id 10010 enc_dst_ip 192.0.2.36 action pass" + run_cmd "tc -n $sw2 qdisc replace dev vx0 clsact" + run_cmd "tc -n $sw2 filter replace dev vx0 ingress pref 1 handle 101 proto ip flower src_mac $smac dst_mac $dmac enc_key_id 10010 enc_dst_ip 192.0.2.34 action pass" + run_cmd "tc -n $sw2 filter replace dev vx0 ingress pref 1 handle 102 proto ip flower src_mac $smac dst_mac $dmac enc_key_id 10010 enc_dst_ip 192.0.2.36 action pass" # Configure vx0 as the backup port of swp1 and check that packets are # forwarded out of swp1 when it has a carrier and out of vx0 when swp1 # does not have a carrier. When packets are forwarded out of vx0, check # that they are forwarded by the VXLAN FDB entry. - run_cmd "bridge -n sw1 link set dev swp1 backup_port vx0" - run_cmd "bridge -n sw1 -d link show dev swp1 | grep \"backup_port vx0\"" + run_cmd "bridge -n $sw1 link set dev swp1 backup_port vx0" + run_cmd "bridge -n $sw1 -d link show dev swp1 | grep \"backup_port vx0\"" log_test $? 0 "vx0 configured as backup port of swp1" - run_cmd "ip netns exec sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1" - tc_check_packets sw1 "dev swp1 egress" 101 1 + run_cmd "ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1" + tc_check_packets $sw1 "dev swp1 egress" 101 1 log_test $? 0 "Forwarding out of swp1" - tc_check_packets sw1 "dev vx0 egress" 101 0 + tc_check_packets $sw1 "dev vx0 egress" 101 0 log_test $? 0 "No forwarding out of vx0" - run_cmd "ip -n sw1 link set dev swp1 carrier off" + run_cmd "ip -n $sw1 link set dev swp1 carrier off" log_test $? 0 "swp1 carrier off" - run_cmd "ip netns exec sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1" - tc_check_packets sw1 "dev swp1 egress" 101 1 + run_cmd "ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1" + tc_check_packets $sw1 "dev swp1 egress" 101 1 log_test $? 0 "No forwarding out of swp1" - tc_check_packets sw1 "dev vx0 egress" 101 1 + tc_check_packets $sw1 "dev vx0 egress" 101 1 log_test $? 0 "Forwarding out of vx0" - tc_check_packets sw2 "dev vx0 ingress" 101 0 + tc_check_packets $sw2 "dev vx0 ingress" 101 0 log_test $? 0 "No forwarding using backup nexthop ID" - tc_check_packets sw2 "dev vx0 ingress" 102 1 + tc_check_packets $sw2 "dev vx0 ingress" 102 1 log_test $? 0 "Forwarding using VXLAN FDB entry" - run_cmd "ip -n sw1 link set dev swp1 carrier on" + run_cmd "ip -n $sw1 link set dev swp1 carrier on" log_test $? 0 "swp1 carrier on" # Configure nexthop ID 10 as the backup nexthop ID of swp1 and check # that when packets are forwarded out of vx0, they are forwarded using # the backup nexthop ID. - run_cmd "bridge -n sw1 link set dev swp1 backup_nhid 10" - run_cmd "bridge -n sw1 -d link show dev swp1 | grep \"backup_nhid 10\"" + run_cmd "bridge -n $sw1 link set dev swp1 backup_nhid 10" + run_cmd "bridge -n $sw1 -d link show dev swp1 | grep \"backup_nhid 10\"" log_test $? 0 "nexthop ID 10 configured as backup nexthop ID of swp1" - run_cmd "ip netns exec sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1" - tc_check_packets sw1 "dev swp1 egress" 101 2 + run_cmd "ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1" + tc_check_packets $sw1 "dev swp1 egress" 101 2 log_test $? 0 "Forwarding out of swp1" - tc_check_packets sw1 "dev vx0 egress" 101 1 + tc_check_packets $sw1 "dev vx0 egress" 101 1 log_test $? 0 "No forwarding out of vx0" - run_cmd "ip -n sw1 link set dev swp1 carrier off" + run_cmd "ip -n $sw1 link set dev swp1 carrier off" log_test $? 0 "swp1 carrier off" - run_cmd "ip netns exec sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1" - tc_check_packets sw1 "dev swp1 egress" 101 2 + run_cmd "ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1" + tc_check_packets $sw1 "dev swp1 egress" 101 2 log_test $? 0 "No forwarding out of swp1" - tc_check_packets sw1 "dev vx0 egress" 101 2 + tc_check_packets $sw1 "dev vx0 egress" 101 2 log_test $? 0 "Forwarding out of vx0" - tc_check_packets sw2 "dev vx0 ingress" 101 1 + tc_check_packets $sw2 "dev vx0 ingress" 101 1 log_test $? 0 "Forwarding using backup nexthop ID" - tc_check_packets sw2 "dev vx0 ingress" 102 1 + tc_check_packets $sw2 "dev vx0 ingress" 102 1 log_test $? 0 "No forwarding using VXLAN FDB entry" - run_cmd "ip -n sw1 link set dev swp1 carrier on" + run_cmd "ip -n $sw1 link set dev swp1 carrier on" log_test $? 0 "swp1 carrier on" - run_cmd "ip netns exec sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1" - tc_check_packets sw1 "dev swp1 egress" 101 3 + run_cmd "ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1" + tc_check_packets $sw1 "dev swp1 egress" 101 3 log_test $? 0 "Forwarding out of swp1" - tc_check_packets sw1 "dev vx0 egress" 101 2 + tc_check_packets $sw1 "dev vx0 egress" 101 2 log_test $? 0 "No forwarding out of vx0" - tc_check_packets sw2 "dev vx0 ingress" 101 1 + tc_check_packets $sw2 "dev vx0 ingress" 101 1 log_test $? 0 "No forwarding using backup nexthop ID" - tc_check_packets sw2 "dev vx0 ingress" 102 1 + tc_check_packets $sw2 "dev vx0 ingress" 102 1 log_test $? 0 "No forwarding using VXLAN FDB entry" # Reset the backup nexthop ID to 0 and check that packets are no longer # forwarded using the backup nexthop ID when swp1 does not have a # carrier and are instead forwarded by the VXLAN FDB. - run_cmd "bridge -n sw1 link set dev swp1 backup_nhid 0" - run_cmd "bridge -n sw1 -d link show dev swp1 | grep \"backup_nhid\"" + run_cmd "bridge -n $sw1 link set dev swp1 backup_nhid 0" + run_cmd "bridge -n $sw1 -d link show dev swp1 | grep \"backup_nhid\"" log_test $? 1 "No backup nexthop ID configured for swp1" - run_cmd "ip netns exec sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1" - tc_check_packets sw1 "dev swp1 egress" 101 4 + run_cmd "ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1" + tc_check_packets $sw1 "dev swp1 egress" 101 4 log_test $? 0 "Forwarding out of swp1" - tc_check_packets sw1 "dev vx0 egress" 101 2 + tc_check_packets $sw1 "dev vx0 egress" 101 2 log_test $? 0 "No forwarding out of vx0" - tc_check_packets sw2 "dev vx0 ingress" 101 1 + tc_check_packets $sw2 "dev vx0 ingress" 101 1 log_test $? 0 "No forwarding using backup nexthop ID" - tc_check_packets sw2 "dev vx0 ingress" 102 1 + tc_check_packets $sw2 "dev vx0 ingress" 102 1 log_test $? 0 "No forwarding using VXLAN FDB entry" - run_cmd "ip -n sw1 link set dev swp1 carrier off" + run_cmd "ip -n $sw1 link set dev swp1 carrier off" log_test $? 0 "swp1 carrier off" - run_cmd "ip netns exec sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1" - tc_check_packets sw1 "dev swp1 egress" 101 4 + run_cmd "ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1" + tc_check_packets $sw1 "dev swp1 egress" 101 4 log_test $? 0 "No forwarding out of swp1" - tc_check_packets sw1 "dev vx0 egress" 101 3 + tc_check_packets $sw1 "dev vx0 egress" 101 3 log_test $? 0 "Forwarding out of vx0" - tc_check_packets sw2 "dev vx0 ingress" 101 1 + tc_check_packets $sw2 "dev vx0 ingress" 101 1 log_test $? 0 "No forwarding using backup nexthop ID" - tc_check_packets sw2 "dev vx0 ingress" 102 2 + tc_check_packets $sw2 "dev vx0 ingress" 102 2 log_test $? 0 "Forwarding using VXLAN FDB entry" } @@ -475,109 +468,109 @@ backup_nhid_invalid() # is forwarded out of the VXLAN port, but dropped by the VXLAN driver # and does not crash the host. - run_cmd "tc -n sw1 qdisc replace dev swp1 clsact" - run_cmd "tc -n sw1 filter replace dev swp1 egress pref 1 handle 101 proto ip flower src_mac $smac dst_mac $dmac action pass" + run_cmd "tc -n $sw1 qdisc replace dev swp1 clsact" + run_cmd "tc -n $sw1 filter replace dev swp1 egress pref 1 handle 101 proto ip flower src_mac $smac dst_mac $dmac action pass" - run_cmd "tc -n sw1 qdisc replace dev vx0 clsact" - run_cmd "tc -n sw1 filter replace dev vx0 egress pref 1 handle 101 proto ip flower src_mac $smac dst_mac $dmac action pass" + run_cmd "tc -n $sw1 qdisc replace dev vx0 clsact" + run_cmd "tc -n $sw1 filter replace dev vx0 egress pref 1 handle 101 proto ip flower src_mac $smac dst_mac $dmac action pass" # Drop all other Tx traffic to avoid changes to Tx drop counter. - run_cmd "tc -n sw1 filter replace dev vx0 egress pref 2 handle 102 proto all matchall action drop" + run_cmd "tc -n $sw1 filter replace dev vx0 egress pref 2 handle 102 proto all matchall action drop" - tx_drop=$(ip -n sw1 -s -j link show dev vx0 | jq '.[]["stats64"]["tx"]["dropped"]') + tx_drop=$(ip -n $sw1 -s -j link show dev vx0 | jq '.[]["stats64"]["tx"]["dropped"]') - run_cmd "ip -n sw1 nexthop replace id 1 via 192.0.2.34 fdb" - run_cmd "ip -n sw1 nexthop replace id 2 via 192.0.2.34 fdb" - run_cmd "ip -n sw1 nexthop replace id 10 group 1/2 fdb" + run_cmd "ip -n $sw1 nexthop replace id 1 via 192.0.2.34 fdb" + run_cmd "ip -n $sw1 nexthop replace id 2 via 192.0.2.34 fdb" + run_cmd "ip -n $sw1 nexthop replace id 10 group 1/2 fdb" - run_cmd "bridge -n sw1 fdb replace $dmac dev swp1 master static vlan 10" + run_cmd "bridge -n $sw1 fdb replace $dmac dev swp1 master static vlan 10" - run_cmd "tc -n sw2 qdisc replace dev vx0 clsact" - run_cmd "tc -n sw2 filter replace dev vx0 ingress pref 1 handle 101 proto ip flower src_mac $smac dst_mac $dmac enc_key_id 10010 enc_dst_ip 192.0.2.34 action pass" + run_cmd "tc -n $sw2 qdisc replace dev vx0 clsact" + run_cmd "tc -n $sw2 filter replace dev vx0 ingress pref 1 handle 101 proto ip flower src_mac $smac dst_mac $dmac enc_key_id 10010 enc_dst_ip 192.0.2.34 action pass" # First, check that redirection works. - run_cmd "bridge -n sw1 link set dev swp1 backup_port vx0" - run_cmd "bridge -n sw1 -d link show dev swp1 | grep \"backup_port vx0\"" + run_cmd "bridge -n $sw1 link set dev swp1 backup_port vx0" + run_cmd "bridge -n $sw1 -d link show dev swp1 | grep \"backup_port vx0\"" log_test $? 0 "vx0 configured as backup port of swp1" - run_cmd "bridge -n sw1 link set dev swp1 backup_nhid 10" - run_cmd "bridge -n sw1 -d link show dev swp1 | grep \"backup_nhid 10\"" + run_cmd "bridge -n $sw1 link set dev swp1 backup_nhid 10" + run_cmd "bridge -n $sw1 -d link show dev swp1 | grep \"backup_nhid 10\"" log_test $? 0 "Valid nexthop as backup nexthop" - run_cmd "ip -n sw1 link set dev swp1 carrier off" + run_cmd "ip -n $sw1 link set dev swp1 carrier off" log_test $? 0 "swp1 carrier off" - run_cmd "ip netns exec sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1" - tc_check_packets sw1 "dev swp1 egress" 101 0 + run_cmd "ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1" + tc_check_packets $sw1 "dev swp1 egress" 101 0 log_test $? 0 "No forwarding out of swp1" - tc_check_packets sw1 "dev vx0 egress" 101 1 + tc_check_packets $sw1 "dev vx0 egress" 101 1 log_test $? 0 "Forwarding out of vx0" - tc_check_packets sw2 "dev vx0 ingress" 101 1 + tc_check_packets $sw2 "dev vx0 ingress" 101 1 log_test $? 0 "Forwarding using backup nexthop ID" - run_cmd "ip -n sw1 -s -j link show dev vx0 | jq -e '.[][\"stats64\"][\"tx\"][\"dropped\"] == $tx_drop'" + run_cmd "ip -n $sw1 -s -j link show dev vx0 | jq -e '.[][\"stats64\"][\"tx\"][\"dropped\"] == $tx_drop'" log_test $? 0 "No Tx drop increase" # Use a non-existent nexthop ID. - run_cmd "bridge -n sw1 link set dev swp1 backup_nhid 20" - run_cmd "bridge -n sw1 -d link show dev swp1 | grep \"backup_nhid 20\"" + run_cmd "bridge -n $sw1 link set dev swp1 backup_nhid 20" + run_cmd "bridge -n $sw1 -d link show dev swp1 | grep \"backup_nhid 20\"" log_test $? 0 "Non-existent nexthop as backup nexthop" - run_cmd "ip netns exec sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1" - tc_check_packets sw1 "dev swp1 egress" 101 0 + run_cmd "ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1" + tc_check_packets $sw1 "dev swp1 egress" 101 0 log_test $? 0 "No forwarding out of swp1" - tc_check_packets sw1 "dev vx0 egress" 101 2 + tc_check_packets $sw1 "dev vx0 egress" 101 2 log_test $? 0 "Forwarding out of vx0" - tc_check_packets sw2 "dev vx0 ingress" 101 1 + tc_check_packets $sw2 "dev vx0 ingress" 101 1 log_test $? 0 "No forwarding using backup nexthop ID" - run_cmd "ip -n sw1 -s -j link show dev vx0 | jq -e '.[][\"stats64\"][\"tx\"][\"dropped\"] == $((tx_drop + 1))'" + run_cmd "ip -n $sw1 -s -j link show dev vx0 | jq -e '.[][\"stats64\"][\"tx\"][\"dropped\"] == $((tx_drop + 1))'" log_test $? 0 "Tx drop increased" # Use a blckhole nexthop. - run_cmd "ip -n sw1 nexthop replace id 30 blackhole" - run_cmd "bridge -n sw1 link set dev swp1 backup_nhid 30" - run_cmd "bridge -n sw1 -d link show dev swp1 | grep \"backup_nhid 30\"" + run_cmd "ip -n $sw1 nexthop replace id 30 blackhole" + run_cmd "bridge -n $sw1 link set dev swp1 backup_nhid 30" + run_cmd "bridge -n $sw1 -d link show dev swp1 | grep \"backup_nhid 30\"" log_test $? 0 "Blackhole nexthop as backup nexthop" - run_cmd "ip netns exec sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1" - tc_check_packets sw1 "dev swp1 egress" 101 0 + run_cmd "ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1" + tc_check_packets $sw1 "dev swp1 egress" 101 0 log_test $? 0 "No forwarding out of swp1" - tc_check_packets sw1 "dev vx0 egress" 101 3 + tc_check_packets $sw1 "dev vx0 egress" 101 3 log_test $? 0 "Forwarding out of vx0" - tc_check_packets sw2 "dev vx0 ingress" 101 1 + tc_check_packets $sw2 "dev vx0 ingress" 101 1 log_test $? 0 "No forwarding using backup nexthop ID" - run_cmd "ip -n sw1 -s -j link show dev vx0 | jq -e '.[][\"stats64\"][\"tx\"][\"dropped\"] == $((tx_drop + 2))'" + run_cmd "ip -n $sw1 -s -j link show dev vx0 | jq -e '.[][\"stats64\"][\"tx\"][\"dropped\"] == $((tx_drop + 2))'" log_test $? 0 "Tx drop increased" # Non-group FDB nexthop. - run_cmd "bridge -n sw1 link set dev swp1 backup_nhid 1" - run_cmd "bridge -n sw1 -d link show dev swp1 | grep \"backup_nhid 1\"" + run_cmd "bridge -n $sw1 link set dev swp1 backup_nhid 1" + run_cmd "bridge -n $sw1 -d link show dev swp1 | grep \"backup_nhid 1\"" log_test $? 0 "Non-group FDB nexthop as backup nexthop" - run_cmd "ip netns exec sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1" - tc_check_packets sw1 "dev swp1 egress" 101 0 + run_cmd "ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1" + tc_check_packets $sw1 "dev swp1 egress" 101 0 log_test $? 0 "No forwarding out of swp1" - tc_check_packets sw1 "dev vx0 egress" 101 4 + tc_check_packets $sw1 "dev vx0 egress" 101 4 log_test $? 0 "Forwarding out of vx0" - tc_check_packets sw2 "dev vx0 ingress" 101 1 + tc_check_packets $sw2 "dev vx0 ingress" 101 1 log_test $? 0 "No forwarding using backup nexthop ID" - run_cmd "ip -n sw1 -s -j link show dev vx0 | jq -e '.[][\"stats64\"][\"tx\"][\"dropped\"] == $((tx_drop + 3))'" + run_cmd "ip -n $sw1 -s -j link show dev vx0 | jq -e '.[][\"stats64\"][\"tx\"][\"dropped\"] == $((tx_drop + 3))'" log_test $? 0 "Tx drop increased" # IPv6 address family nexthop. - run_cmd "ip -n sw1 nexthop replace id 100 via 2001:db8:100::1 fdb" - run_cmd "ip -n sw1 nexthop replace id 200 via 2001:db8:100::1 fdb" - run_cmd "ip -n sw1 nexthop replace id 300 group 100/200 fdb" - run_cmd "bridge -n sw1 link set dev swp1 backup_nhid 300" - run_cmd "bridge -n sw1 -d link show dev swp1 | grep \"backup_nhid 300\"" + run_cmd "ip -n $sw1 nexthop replace id 100 via 2001:db8:100::1 fdb" + run_cmd "ip -n $sw1 nexthop replace id 200 via 2001:db8:100::1 fdb" + run_cmd "ip -n $sw1 nexthop replace id 300 group 100/200 fdb" + run_cmd "bridge -n $sw1 link set dev swp1 backup_nhid 300" + run_cmd "bridge -n $sw1 -d link show dev swp1 | grep \"backup_nhid 300\"" log_test $? 0 "IPv6 address family nexthop as backup nexthop" - run_cmd "ip netns exec sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1" - tc_check_packets sw1 "dev swp1 egress" 101 0 + run_cmd "ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1" + tc_check_packets $sw1 "dev swp1 egress" 101 0 log_test $? 0 "No forwarding out of swp1" - tc_check_packets sw1 "dev vx0 egress" 101 5 + tc_check_packets $sw1 "dev vx0 egress" 101 5 log_test $? 0 "Forwarding out of vx0" - tc_check_packets sw2 "dev vx0 ingress" 101 1 + tc_check_packets $sw2 "dev vx0 ingress" 101 1 log_test $? 0 "No forwarding using backup nexthop ID" - run_cmd "ip -n sw1 -s -j link show dev vx0 | jq -e '.[][\"stats64\"][\"tx\"][\"dropped\"] == $((tx_drop + 4))'" + run_cmd "ip -n $sw1 -s -j link show dev vx0 | jq -e '.[][\"stats64\"][\"tx\"][\"dropped\"] == $((tx_drop + 4))'" log_test $? 0 "Tx drop increased" } @@ -591,44 +584,44 @@ backup_nhid_ping() echo "------------------------" # Test bidirectional traffic when traffic is redirected in both VTEPs. - sw1_mac=$(ip -n sw1 -j -p link show br0.10 | jq -r '.[]["address"]') - sw2_mac=$(ip -n sw2 -j -p link show br0.10 | jq -r '.[]["address"]') + sw1_mac=$(ip -n $sw1 -j -p link show br0.10 | jq -r '.[]["address"]') + sw2_mac=$(ip -n $sw2 -j -p link show br0.10 | jq -r '.[]["address"]') - run_cmd "bridge -n sw1 fdb replace $sw2_mac dev swp1 master static vlan 10" - run_cmd "bridge -n sw2 fdb replace $sw1_mac dev swp1 master static vlan 10" + run_cmd "bridge -n $sw1 fdb replace $sw2_mac dev swp1 master static vlan 10" + run_cmd "bridge -n $sw2 fdb replace $sw1_mac dev swp1 master static vlan 10" - run_cmd "ip -n sw1 neigh replace 192.0.2.66 lladdr $sw2_mac nud perm dev br0.10" - run_cmd "ip -n sw2 neigh replace 192.0.2.65 lladdr $sw1_mac nud perm dev br0.10" + run_cmd "ip -n $sw1 neigh replace 192.0.2.66 lladdr $sw2_mac nud perm dev br0.10" + run_cmd "ip -n $sw2 neigh replace 192.0.2.65 lladdr $sw1_mac nud perm dev br0.10" - run_cmd "ip -n sw1 nexthop replace id 1 via 192.0.2.34 fdb" - run_cmd "ip -n sw2 nexthop replace id 1 via 192.0.2.33 fdb" - run_cmd "ip -n sw1 nexthop replace id 10 group 1 fdb" - run_cmd "ip -n sw2 nexthop replace id 10 group 1 fdb" + run_cmd "ip -n $sw1 nexthop replace id 1 via 192.0.2.34 fdb" + run_cmd "ip -n $sw2 nexthop replace id 1 via 192.0.2.33 fdb" + run_cmd "ip -n $sw1 nexthop replace id 10 group 1 fdb" + run_cmd "ip -n $sw2 nexthop replace id 10 group 1 fdb" - run_cmd "bridge -n sw1 link set dev swp1 backup_port vx0" - run_cmd "bridge -n sw2 link set dev swp1 backup_port vx0" - run_cmd "bridge -n sw1 link set dev swp1 backup_nhid 10" - run_cmd "bridge -n sw2 link set dev swp1 backup_nhid 10" + run_cmd "bridge -n $sw1 link set dev swp1 backup_port vx0" + run_cmd "bridge -n $sw2 link set dev swp1 backup_port vx0" + run_cmd "bridge -n $sw1 link set dev swp1 backup_nhid 10" + run_cmd "bridge -n $sw2 link set dev swp1 backup_nhid 10" - run_cmd "ip -n sw1 link set dev swp1 carrier off" - run_cmd "ip -n sw2 link set dev swp1 carrier off" + run_cmd "ip -n $sw1 link set dev swp1 carrier off" + run_cmd "ip -n $sw2 link set dev swp1 carrier off" - run_cmd "ip netns exec sw1 ping -i 0.1 -c 10 -w $PING_TIMEOUT 192.0.2.66" + run_cmd "ip netns exec $sw1 ping -i 0.1 -c 10 -w $PING_TIMEOUT 192.0.2.66" log_test $? 0 "Ping with backup nexthop ID" # Reset the backup nexthop ID to 0 and check that ping fails. - run_cmd "bridge -n sw1 link set dev swp1 backup_nhid 0" - run_cmd "bridge -n sw2 link set dev swp1 backup_nhid 0" + run_cmd "bridge -n $sw1 link set dev swp1 backup_nhid 0" + run_cmd "bridge -n $sw2 link set dev swp1 backup_nhid 0" - run_cmd "ip netns exec sw1 ping -i 0.1 -c 10 -w $PING_TIMEOUT 192.0.2.66" + run_cmd "ip netns exec $sw1 ping -i 0.1 -c 10 -w $PING_TIMEOUT 192.0.2.66" log_test $? 1 "Ping after disabling backup nexthop ID" } backup_nhid_add_del_loop() { while true; do - ip -n sw1 nexthop del id 10 - ip -n sw1 nexthop replace id 10 group 1/2 fdb + ip -n $sw1 nexthop del id 10 + ip -n $sw1 nexthop replace id 10 group 1/2 fdb done >/dev/null 2>&1 } @@ -648,19 +641,19 @@ backup_nhid_torture() # deleting the group. The test is considered successful if nothing # crashed. - run_cmd "ip -n sw1 nexthop replace id 1 via 192.0.2.34 fdb" - run_cmd "ip -n sw1 nexthop replace id 2 via 192.0.2.34 fdb" - run_cmd "ip -n sw1 nexthop replace id 10 group 1/2 fdb" + run_cmd "ip -n $sw1 nexthop replace id 1 via 192.0.2.34 fdb" + run_cmd "ip -n $sw1 nexthop replace id 2 via 192.0.2.34 fdb" + run_cmd "ip -n $sw1 nexthop replace id 10 group 1/2 fdb" - run_cmd "bridge -n sw1 fdb replace $dmac dev swp1 master static vlan 10" + run_cmd "bridge -n $sw1 fdb replace $dmac dev swp1 master static vlan 10" - run_cmd "bridge -n sw1 link set dev swp1 backup_port vx0" - run_cmd "bridge -n sw1 link set dev swp1 backup_nhid 10" - run_cmd "ip -n sw1 link set dev swp1 carrier off" + run_cmd "bridge -n $sw1 link set dev swp1 backup_port vx0" + run_cmd "bridge -n $sw1 link set dev swp1 backup_nhid 10" + run_cmd "ip -n $sw1 link set dev swp1 carrier off" backup_nhid_add_del_loop & pid1=$! - ip netns exec sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 0 & + ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 0 & pid2=$! sleep 30 -- cgit v1.2.3 From 312abe3d93a35f9c486a4703d39cab52457266f0 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Wed, 6 Dec 2023 15:07:54 +0800 Subject: selftests/net: convert test_bridge_neigh_suppress.sh to run it in unique namespace Here is the test result after conversion. ]# ./test_bridge_neigh_suppress.sh Per-port ARP suppression - VLAN 10 ---------------------------------- TEST: arping [ OK ] TEST: ARP suppression [ OK ] ... TEST: NS suppression (VLAN 20) [ OK ] Tests passed: 148 Tests failed: 0 Acked-by: David Ahern Signed-off-by: Hangbin Liu Reviewed-by: Ido Schimmel Tested-by: Ido Schimmel Signed-off-by: David S. Miller --- .../selftests/net/test_bridge_neigh_suppress.sh | 331 ++++++++++----------- 1 file changed, 162 insertions(+), 169 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/net/test_bridge_neigh_suppress.sh b/tools/testing/selftests/net/test_bridge_neigh_suppress.sh index d80f2cd87614..8533393a4f18 100755 --- a/tools/testing/selftests/net/test_bridge_neigh_suppress.sh +++ b/tools/testing/selftests/net/test_bridge_neigh_suppress.sh @@ -45,9 +45,8 @@ # | sw1 | | sw2 | # +------------------------------------+ +------------------------------------+ +source lib.sh ret=0 -# Kselftest framework requirement - SKIP code is 4. -ksft_skip=4 # All tests in this script. Can be overridden with -t option. TESTS=" @@ -140,9 +139,6 @@ setup_topo_ns() { local ns=$1; shift - ip netns add $ns - ip -n $ns link set dev lo up - ip netns exec $ns sysctl -qw net.ipv6.conf.all.keep_addr_on_down=1 ip netns exec $ns sysctl -qw net.ipv6.conf.default.ignore_routes_with_linkdown=1 ip netns exec $ns sysctl -qw net.ipv6.conf.all.accept_dad=0 @@ -153,21 +149,22 @@ setup_topo() { local ns - for ns in h1 h2 sw1 sw2; do + setup_ns h1 h2 sw1 sw2 + for ns in $h1 $h2 $sw1 $sw2; do setup_topo_ns $ns done ip link add name veth0 type veth peer name veth1 - ip link set dev veth0 netns h1 name eth0 - ip link set dev veth1 netns sw1 name swp1 + ip link set dev veth0 netns $h1 name eth0 + ip link set dev veth1 netns $sw1 name swp1 ip link add name veth0 type veth peer name veth1 - ip link set dev veth0 netns sw1 name veth0 - ip link set dev veth1 netns sw2 name veth0 + ip link set dev veth0 netns $sw1 name veth0 + ip link set dev veth1 netns $sw2 name veth0 ip link add name veth0 type veth peer name veth1 - ip link set dev veth0 netns h2 name eth0 - ip link set dev veth1 netns sw2 name swp1 + ip link set dev veth0 netns $h2 name eth0 + ip link set dev veth1 netns $sw2 name swp1 } setup_host_common() @@ -190,7 +187,7 @@ setup_host_common() setup_h1() { - local ns=h1 + local ns=$h1 local v4addr1=192.0.2.1/28 local v4addr2=192.0.2.17/28 local v6addr1=2001:db8:1::1/64 @@ -201,7 +198,7 @@ setup_h1() setup_h2() { - local ns=h2 + local ns=$h2 local v4addr1=192.0.2.2/28 local v4addr2=192.0.2.18/28 local v6addr1=2001:db8:1::2/64 @@ -254,7 +251,7 @@ setup_sw_common() setup_sw1() { - local ns=sw1 + local ns=$sw1 local local_addr=192.0.2.33 local remote_addr=192.0.2.34 local veth_addr=192.0.2.49 @@ -265,7 +262,7 @@ setup_sw1() setup_sw2() { - local ns=sw2 + local ns=$sw2 local local_addr=192.0.2.34 local remote_addr=192.0.2.33 local veth_addr=192.0.2.50 @@ -291,11 +288,7 @@ setup() cleanup() { - local ns - - for ns in h1 h2 sw1 sw2; do - ip netns del $ns &> /dev/null - done + cleanup_ns $h1 $h2 $sw1 $sw2 } ################################################################################ @@ -312,80 +305,80 @@ neigh_suppress_arp_common() echo "Per-port ARP suppression - VLAN $vid" echo "----------------------------------" - run_cmd "tc -n sw1 qdisc replace dev vx0 clsact" - run_cmd "tc -n sw1 filter replace dev vx0 egress pref 1 handle 101 proto 0x0806 flower indev swp1 arp_tip $tip arp_sip $sip arp_op request action pass" + run_cmd "tc -n $sw1 qdisc replace dev vx0 clsact" + run_cmd "tc -n $sw1 filter replace dev vx0 egress pref 1 handle 101 proto 0x0806 flower indev swp1 arp_tip $tip arp_sip $sip arp_op request action pass" # Initial state - check that ARP requests are not suppressed and that # ARP replies are received. - run_cmd "ip netns exec h1 arping -q -b -c 1 -w 5 -s $sip -I eth0.$vid $tip" + run_cmd "ip netns exec $h1 arping -q -b -c 1 -w 5 -s $sip -I eth0.$vid $tip" log_test $? 0 "arping" - tc_check_packets sw1 "dev vx0 egress" 101 1 + tc_check_packets $sw1 "dev vx0 egress" 101 1 log_test $? 0 "ARP suppression" # Enable neighbor suppression and check that nothing changes compared # to the initial state. - run_cmd "bridge -n sw1 link set dev vx0 neigh_suppress on" - run_cmd "bridge -n sw1 -d link show dev vx0 | grep \"neigh_suppress on\"" + run_cmd "bridge -n $sw1 link set dev vx0 neigh_suppress on" + run_cmd "bridge -n $sw1 -d link show dev vx0 | grep \"neigh_suppress on\"" log_test $? 0 "\"neigh_suppress\" is on" - run_cmd "ip netns exec h1 arping -q -b -c 1 -w 5 -s $sip -I eth0.$vid $tip" + run_cmd "ip netns exec $h1 arping -q -b -c 1 -w 5 -s $sip -I eth0.$vid $tip" log_test $? 0 "arping" - tc_check_packets sw1 "dev vx0 egress" 101 2 + tc_check_packets $sw1 "dev vx0 egress" 101 2 log_test $? 0 "ARP suppression" # Install an FDB entry for the remote host and check that nothing # changes compared to the initial state. - h2_mac=$(ip -n h2 -j -p link show eth0.$vid | jq -r '.[]["address"]') - run_cmd "bridge -n sw1 fdb replace $h2_mac dev vx0 master static vlan $vid" + h2_mac=$(ip -n $h2 -j -p link show eth0.$vid | jq -r '.[]["address"]') + run_cmd "bridge -n $sw1 fdb replace $h2_mac dev vx0 master static vlan $vid" log_test $? 0 "FDB entry installation" - run_cmd "ip netns exec h1 arping -q -b -c 1 -w 5 -s $sip -I eth0.$vid $tip" + run_cmd "ip netns exec $h1 arping -q -b -c 1 -w 5 -s $sip -I eth0.$vid $tip" log_test $? 0 "arping" - tc_check_packets sw1 "dev vx0 egress" 101 3 + tc_check_packets $sw1 "dev vx0 egress" 101 3 log_test $? 0 "ARP suppression" # Install a neighbor on the matching SVI interface and check that ARP # requests are suppressed. - run_cmd "ip -n sw1 neigh replace $tip lladdr $h2_mac nud permanent dev br0.$vid" + run_cmd "ip -n $sw1 neigh replace $tip lladdr $h2_mac nud permanent dev br0.$vid" log_test $? 0 "Neighbor entry installation" - run_cmd "ip netns exec h1 arping -q -b -c 1 -w 5 -s $sip -I eth0.$vid $tip" + run_cmd "ip netns exec $h1 arping -q -b -c 1 -w 5 -s $sip -I eth0.$vid $tip" log_test $? 0 "arping" - tc_check_packets sw1 "dev vx0 egress" 101 3 + tc_check_packets $sw1 "dev vx0 egress" 101 3 log_test $? 0 "ARP suppression" # Take the second host down and check that ARP requests are suppressed # and that ARP replies are received. - run_cmd "ip -n h2 link set dev eth0.$vid down" + run_cmd "ip -n $h2 link set dev eth0.$vid down" log_test $? 0 "H2 down" - run_cmd "ip netns exec h1 arping -q -b -c 1 -w 5 -s $sip -I eth0.$vid $tip" + run_cmd "ip netns exec $h1 arping -q -b -c 1 -w 5 -s $sip -I eth0.$vid $tip" log_test $? 0 "arping" - tc_check_packets sw1 "dev vx0 egress" 101 3 + tc_check_packets $sw1 "dev vx0 egress" 101 3 log_test $? 0 "ARP suppression" - run_cmd "ip -n h2 link set dev eth0.$vid up" + run_cmd "ip -n $h2 link set dev eth0.$vid up" log_test $? 0 "H2 up" # Disable neighbor suppression and check that ARP requests are no # longer suppressed. - run_cmd "bridge -n sw1 link set dev vx0 neigh_suppress off" - run_cmd "bridge -n sw1 -d link show dev vx0 | grep \"neigh_suppress off\"" + run_cmd "bridge -n $sw1 link set dev vx0 neigh_suppress off" + run_cmd "bridge -n $sw1 -d link show dev vx0 | grep \"neigh_suppress off\"" log_test $? 0 "\"neigh_suppress\" is off" - run_cmd "ip netns exec h1 arping -q -b -c 1 -w 5 -s $sip -I eth0.$vid $tip" + run_cmd "ip netns exec $h1 arping -q -b -c 1 -w 5 -s $sip -I eth0.$vid $tip" log_test $? 0 "arping" - tc_check_packets sw1 "dev vx0 egress" 101 4 + tc_check_packets $sw1 "dev vx0 egress" 101 4 log_test $? 0 "ARP suppression" # Take the second host down and check that ARP requests are not # suppressed and that ARP replies are not received. - run_cmd "ip -n h2 link set dev eth0.$vid down" + run_cmd "ip -n $h2 link set dev eth0.$vid down" log_test $? 0 "H2 down" - run_cmd "ip netns exec h1 arping -q -b -c 1 -w 5 -s $sip -I eth0.$vid $tip" + run_cmd "ip netns exec $h1 arping -q -b -c 1 -w 5 -s $sip -I eth0.$vid $tip" log_test $? 1 "arping" - tc_check_packets sw1 "dev vx0 egress" 101 5 + tc_check_packets $sw1 "dev vx0 egress" 101 5 log_test $? 0 "ARP suppression" } @@ -415,80 +408,80 @@ neigh_suppress_ns_common() echo "Per-port NS suppression - VLAN $vid" echo "---------------------------------" - run_cmd "tc -n sw1 qdisc replace dev vx0 clsact" - run_cmd "tc -n sw1 filter replace dev vx0 egress pref 1 handle 101 proto ipv6 flower indev swp1 ip_proto icmpv6 dst_ip $maddr src_ip $saddr type 135 code 0 action pass" + run_cmd "tc -n $sw1 qdisc replace dev vx0 clsact" + run_cmd "tc -n $sw1 filter replace dev vx0 egress pref 1 handle 101 proto ipv6 flower indev swp1 ip_proto icmpv6 dst_ip $maddr src_ip $saddr type 135 code 0 action pass" # Initial state - check that NS messages are not suppressed and that ND # messages are received. - run_cmd "ip netns exec h1 ndisc6 -q -r 1 -s $saddr -w 5000 $daddr eth0.$vid" + run_cmd "ip netns exec $h1 ndisc6 -q -r 1 -s $saddr -w 5000 $daddr eth0.$vid" log_test $? 0 "ndisc6" - tc_check_packets sw1 "dev vx0 egress" 101 1 + tc_check_packets $sw1 "dev vx0 egress" 101 1 log_test $? 0 "NS suppression" # Enable neighbor suppression and check that nothing changes compared # to the initial state. - run_cmd "bridge -n sw1 link set dev vx0 neigh_suppress on" - run_cmd "bridge -n sw1 -d link show dev vx0 | grep \"neigh_suppress on\"" + run_cmd "bridge -n $sw1 link set dev vx0 neigh_suppress on" + run_cmd "bridge -n $sw1 -d link show dev vx0 | grep \"neigh_suppress on\"" log_test $? 0 "\"neigh_suppress\" is on" - run_cmd "ip netns exec h1 ndisc6 -q -r 1 -s $saddr -w 5000 $daddr eth0.$vid" + run_cmd "ip netns exec $h1 ndisc6 -q -r 1 -s $saddr -w 5000 $daddr eth0.$vid" log_test $? 0 "ndisc6" - tc_check_packets sw1 "dev vx0 egress" 101 2 + tc_check_packets $sw1 "dev vx0 egress" 101 2 log_test $? 0 "NS suppression" # Install an FDB entry for the remote host and check that nothing # changes compared to the initial state. - h2_mac=$(ip -n h2 -j -p link show eth0.$vid | jq -r '.[]["address"]') - run_cmd "bridge -n sw1 fdb replace $h2_mac dev vx0 master static vlan $vid" + h2_mac=$(ip -n $h2 -j -p link show eth0.$vid | jq -r '.[]["address"]') + run_cmd "bridge -n $sw1 fdb replace $h2_mac dev vx0 master static vlan $vid" log_test $? 0 "FDB entry installation" - run_cmd "ip netns exec h1 ndisc6 -q -r 1 -s $saddr -w 5000 $daddr eth0.$vid" + run_cmd "ip netns exec $h1 ndisc6 -q -r 1 -s $saddr -w 5000 $daddr eth0.$vid" log_test $? 0 "ndisc6" - tc_check_packets sw1 "dev vx0 egress" 101 3 + tc_check_packets $sw1 "dev vx0 egress" 101 3 log_test $? 0 "NS suppression" # Install a neighbor on the matching SVI interface and check that NS # messages are suppressed. - run_cmd "ip -n sw1 neigh replace $daddr lladdr $h2_mac nud permanent dev br0.$vid" + run_cmd "ip -n $sw1 neigh replace $daddr lladdr $h2_mac nud permanent dev br0.$vid" log_test $? 0 "Neighbor entry installation" - run_cmd "ip netns exec h1 ndisc6 -q -r 1 -s $saddr -w 5000 $daddr eth0.$vid" + run_cmd "ip netns exec $h1 ndisc6 -q -r 1 -s $saddr -w 5000 $daddr eth0.$vid" log_test $? 0 "ndisc6" - tc_check_packets sw1 "dev vx0 egress" 101 3 + tc_check_packets $sw1 "dev vx0 egress" 101 3 log_test $? 0 "NS suppression" # Take the second host down and check that NS messages are suppressed # and that ND messages are received. - run_cmd "ip -n h2 link set dev eth0.$vid down" + run_cmd "ip -n $h2 link set dev eth0.$vid down" log_test $? 0 "H2 down" - run_cmd "ip netns exec h1 ndisc6 -q -r 1 -s $saddr -w 5000 $daddr eth0.$vid" + run_cmd "ip netns exec $h1 ndisc6 -q -r 1 -s $saddr -w 5000 $daddr eth0.$vid" log_test $? 0 "ndisc6" - tc_check_packets sw1 "dev vx0 egress" 101 3 + tc_check_packets $sw1 "dev vx0 egress" 101 3 log_test $? 0 "NS suppression" - run_cmd "ip -n h2 link set dev eth0.$vid up" + run_cmd "ip -n $h2 link set dev eth0.$vid up" log_test $? 0 "H2 up" # Disable neighbor suppression and check that NS messages are no longer # suppressed. - run_cmd "bridge -n sw1 link set dev vx0 neigh_suppress off" - run_cmd "bridge -n sw1 -d link show dev vx0 | grep \"neigh_suppress off\"" + run_cmd "bridge -n $sw1 link set dev vx0 neigh_suppress off" + run_cmd "bridge -n $sw1 -d link show dev vx0 | grep \"neigh_suppress off\"" log_test $? 0 "\"neigh_suppress\" is off" - run_cmd "ip netns exec h1 ndisc6 -q -r 1 -s $saddr -w 5000 $daddr eth0.$vid" + run_cmd "ip netns exec $h1 ndisc6 -q -r 1 -s $saddr -w 5000 $daddr eth0.$vid" log_test $? 0 "ndisc6" - tc_check_packets sw1 "dev vx0 egress" 101 4 + tc_check_packets $sw1 "dev vx0 egress" 101 4 log_test $? 0 "NS suppression" # Take the second host down and check that NS messages are not # suppressed and that ND messages are not received. - run_cmd "ip -n h2 link set dev eth0.$vid down" + run_cmd "ip -n $h2 link set dev eth0.$vid down" log_test $? 0 "H2 down" - run_cmd "ip netns exec h1 ndisc6 -q -r 1 -s $saddr -w 5000 $daddr eth0.$vid" + run_cmd "ip netns exec $h1 ndisc6 -q -r 1 -s $saddr -w 5000 $daddr eth0.$vid" log_test $? 2 "ndisc6" - tc_check_packets sw1 "dev vx0 egress" 101 5 + tc_check_packets $sw1 "dev vx0 egress" 101 5 log_test $? 0 "NS suppression" } @@ -524,118 +517,118 @@ neigh_vlan_suppress_arp() echo "Per-{Port, VLAN} ARP suppression" echo "--------------------------------" - run_cmd "tc -n sw1 qdisc replace dev vx0 clsact" - run_cmd "tc -n sw1 filter replace dev vx0 egress pref 1 handle 101 proto 0x0806 flower indev swp1 arp_tip $tip1 arp_sip $sip1 arp_op request action pass" - run_cmd "tc -n sw1 filter replace dev vx0 egress pref 1 handle 102 proto 0x0806 flower indev swp1 arp_tip $tip2 arp_sip $sip2 arp_op request action pass" + run_cmd "tc -n $sw1 qdisc replace dev vx0 clsact" + run_cmd "tc -n $sw1 filter replace dev vx0 egress pref 1 handle 101 proto 0x0806 flower indev swp1 arp_tip $tip1 arp_sip $sip1 arp_op request action pass" + run_cmd "tc -n $sw1 filter replace dev vx0 egress pref 1 handle 102 proto 0x0806 flower indev swp1 arp_tip $tip2 arp_sip $sip2 arp_op request action pass" - h2_mac1=$(ip -n h2 -j -p link show eth0.$vid1 | jq -r '.[]["address"]') - h2_mac2=$(ip -n h2 -j -p link show eth0.$vid2 | jq -r '.[]["address"]') - run_cmd "bridge -n sw1 fdb replace $h2_mac1 dev vx0 master static vlan $vid1" - run_cmd "bridge -n sw1 fdb replace $h2_mac2 dev vx0 master static vlan $vid2" - run_cmd "ip -n sw1 neigh replace $tip1 lladdr $h2_mac1 nud permanent dev br0.$vid1" - run_cmd "ip -n sw1 neigh replace $tip2 lladdr $h2_mac2 nud permanent dev br0.$vid2" + h2_mac1=$(ip -n $h2 -j -p link show eth0.$vid1 | jq -r '.[]["address"]') + h2_mac2=$(ip -n $h2 -j -p link show eth0.$vid2 | jq -r '.[]["address"]') + run_cmd "bridge -n $sw1 fdb replace $h2_mac1 dev vx0 master static vlan $vid1" + run_cmd "bridge -n $sw1 fdb replace $h2_mac2 dev vx0 master static vlan $vid2" + run_cmd "ip -n $sw1 neigh replace $tip1 lladdr $h2_mac1 nud permanent dev br0.$vid1" + run_cmd "ip -n $sw1 neigh replace $tip2 lladdr $h2_mac2 nud permanent dev br0.$vid2" # Enable per-{Port, VLAN} neighbor suppression and check that ARP # requests are not suppressed and that ARP replies are received. - run_cmd "bridge -n sw1 link set dev vx0 neigh_vlan_suppress on" - run_cmd "bridge -n sw1 -d link show dev vx0 | grep \"neigh_vlan_suppress on\"" + run_cmd "bridge -n $sw1 link set dev vx0 neigh_vlan_suppress on" + run_cmd "bridge -n $sw1 -d link show dev vx0 | grep \"neigh_vlan_suppress on\"" log_test $? 0 "\"neigh_vlan_suppress\" is on" - run_cmd "ip netns exec h1 arping -q -b -c 1 -w 5 -s $sip1 -I eth0.$vid1 $tip1" + run_cmd "ip netns exec $h1 arping -q -b -c 1 -w 5 -s $sip1 -I eth0.$vid1 $tip1" log_test $? 0 "arping (VLAN $vid1)" - run_cmd "ip netns exec h1 arping -q -b -c 1 -w 5 -s $sip2 -I eth0.$vid2 $tip2" + run_cmd "ip netns exec $h1 arping -q -b -c 1 -w 5 -s $sip2 -I eth0.$vid2 $tip2" log_test $? 0 "arping (VLAN $vid2)" - tc_check_packets sw1 "dev vx0 egress" 101 1 + tc_check_packets $sw1 "dev vx0 egress" 101 1 log_test $? 0 "ARP suppression (VLAN $vid1)" - tc_check_packets sw1 "dev vx0 egress" 102 1 + tc_check_packets $sw1 "dev vx0 egress" 102 1 log_test $? 0 "ARP suppression (VLAN $vid2)" # Enable neighbor suppression on VLAN 10 and check that only on this # VLAN ARP requests are suppressed. - run_cmd "bridge -n sw1 vlan set vid $vid1 dev vx0 neigh_suppress on" - run_cmd "bridge -n sw1 -d vlan show dev vx0 vid $vid1 | grep \"neigh_suppress on\"" + run_cmd "bridge -n $sw1 vlan set vid $vid1 dev vx0 neigh_suppress on" + run_cmd "bridge -n $sw1 -d vlan show dev vx0 vid $vid1 | grep \"neigh_suppress on\"" log_test $? 0 "\"neigh_suppress\" is on (VLAN $vid1)" - run_cmd "bridge -n sw1 -d vlan show dev vx0 vid $vid2 | grep \"neigh_suppress off\"" + run_cmd "bridge -n $sw1 -d vlan show dev vx0 vid $vid2 | grep \"neigh_suppress off\"" log_test $? 0 "\"neigh_suppress\" is off (VLAN $vid2)" - run_cmd "ip netns exec h1 arping -q -b -c 1 -w 5 -s $sip1 -I eth0.$vid1 $tip1" + run_cmd "ip netns exec $h1 arping -q -b -c 1 -w 5 -s $sip1 -I eth0.$vid1 $tip1" log_test $? 0 "arping (VLAN $vid1)" - run_cmd "ip netns exec h1 arping -q -b -c 1 -w 5 -s $sip2 -I eth0.$vid2 $tip2" + run_cmd "ip netns exec $h1 arping -q -b -c 1 -w 5 -s $sip2 -I eth0.$vid2 $tip2" log_test $? 0 "arping (VLAN $vid2)" - tc_check_packets sw1 "dev vx0 egress" 101 1 + tc_check_packets $sw1 "dev vx0 egress" 101 1 log_test $? 0 "ARP suppression (VLAN $vid1)" - tc_check_packets sw1 "dev vx0 egress" 102 2 + tc_check_packets $sw1 "dev vx0 egress" 102 2 log_test $? 0 "ARP suppression (VLAN $vid2)" # Enable neighbor suppression on the port and check that it has no # effect compared to previous state. - run_cmd "bridge -n sw1 link set dev vx0 neigh_suppress on" - run_cmd "bridge -n sw1 -d link show dev vx0 | grep \"neigh_suppress on\"" + run_cmd "bridge -n $sw1 link set dev vx0 neigh_suppress on" + run_cmd "bridge -n $sw1 -d link show dev vx0 | grep \"neigh_suppress on\"" log_test $? 0 "\"neigh_suppress\" is on" - run_cmd "ip netns exec h1 arping -q -b -c 1 -w 5 -s $sip1 -I eth0.$vid1 $tip1" + run_cmd "ip netns exec $h1 arping -q -b -c 1 -w 5 -s $sip1 -I eth0.$vid1 $tip1" log_test $? 0 "arping (VLAN $vid1)" - run_cmd "ip netns exec h1 arping -q -b -c 1 -w 5 -s $sip2 -I eth0.$vid2 $tip2" + run_cmd "ip netns exec $h1 arping -q -b -c 1 -w 5 -s $sip2 -I eth0.$vid2 $tip2" log_test $? 0 "arping (VLAN $vid2)" - tc_check_packets sw1 "dev vx0 egress" 101 1 + tc_check_packets $sw1 "dev vx0 egress" 101 1 log_test $? 0 "ARP suppression (VLAN $vid1)" - tc_check_packets sw1 "dev vx0 egress" 102 3 + tc_check_packets $sw1 "dev vx0 egress" 102 3 log_test $? 0 "ARP suppression (VLAN $vid2)" # Disable neighbor suppression on the port and check that it has no # effect compared to previous state. - run_cmd "bridge -n sw1 link set dev vx0 neigh_suppress off" - run_cmd "bridge -n sw1 -d link show dev vx0 | grep \"neigh_suppress off\"" + run_cmd "bridge -n $sw1 link set dev vx0 neigh_suppress off" + run_cmd "bridge -n $sw1 -d link show dev vx0 | grep \"neigh_suppress off\"" log_test $? 0 "\"neigh_suppress\" is off" - run_cmd "ip netns exec h1 arping -q -b -c 1 -w 5 -s $sip1 -I eth0.$vid1 $tip1" + run_cmd "ip netns exec $h1 arping -q -b -c 1 -w 5 -s $sip1 -I eth0.$vid1 $tip1" log_test $? 0 "arping (VLAN $vid1)" - run_cmd "ip netns exec h1 arping -q -b -c 1 -w 5 -s $sip2 -I eth0.$vid2 $tip2" + run_cmd "ip netns exec $h1 arping -q -b -c 1 -w 5 -s $sip2 -I eth0.$vid2 $tip2" log_test $? 0 "arping (VLAN $vid2)" - tc_check_packets sw1 "dev vx0 egress" 101 1 + tc_check_packets $sw1 "dev vx0 egress" 101 1 log_test $? 0 "ARP suppression (VLAN $vid1)" - tc_check_packets sw1 "dev vx0 egress" 102 4 + tc_check_packets $sw1 "dev vx0 egress" 102 4 log_test $? 0 "ARP suppression (VLAN $vid2)" # Disable neighbor suppression on VLAN 10 and check that ARP requests # are no longer suppressed on this VLAN. - run_cmd "bridge -n sw1 vlan set vid $vid1 dev vx0 neigh_suppress off" - run_cmd "bridge -n sw1 -d vlan show dev vx0 vid $vid1 | grep \"neigh_suppress off\"" + run_cmd "bridge -n $sw1 vlan set vid $vid1 dev vx0 neigh_suppress off" + run_cmd "bridge -n $sw1 -d vlan show dev vx0 vid $vid1 | grep \"neigh_suppress off\"" log_test $? 0 "\"neigh_suppress\" is off (VLAN $vid1)" - run_cmd "ip netns exec h1 arping -q -b -c 1 -w 5 -s $sip1 -I eth0.$vid1 $tip1" + run_cmd "ip netns exec $h1 arping -q -b -c 1 -w 5 -s $sip1 -I eth0.$vid1 $tip1" log_test $? 0 "arping (VLAN $vid1)" - run_cmd "ip netns exec h1 arping -q -b -c 1 -w 5 -s $sip2 -I eth0.$vid2 $tip2" + run_cmd "ip netns exec $h1 arping -q -b -c 1 -w 5 -s $sip2 -I eth0.$vid2 $tip2" log_test $? 0 "arping (VLAN $vid2)" - tc_check_packets sw1 "dev vx0 egress" 101 2 + tc_check_packets $sw1 "dev vx0 egress" 101 2 log_test $? 0 "ARP suppression (VLAN $vid1)" - tc_check_packets sw1 "dev vx0 egress" 102 5 + tc_check_packets $sw1 "dev vx0 egress" 102 5 log_test $? 0 "ARP suppression (VLAN $vid2)" # Disable per-{Port, VLAN} neighbor suppression, enable neighbor # suppression on the port and check that on both VLANs ARP requests are # suppressed. - run_cmd "bridge -n sw1 link set dev vx0 neigh_vlan_suppress off" - run_cmd "bridge -n sw1 -d link show dev vx0 | grep \"neigh_vlan_suppress off\"" + run_cmd "bridge -n $sw1 link set dev vx0 neigh_vlan_suppress off" + run_cmd "bridge -n $sw1 -d link show dev vx0 | grep \"neigh_vlan_suppress off\"" log_test $? 0 "\"neigh_vlan_suppress\" is off" - run_cmd "bridge -n sw1 link set dev vx0 neigh_suppress on" - run_cmd "bridge -n sw1 -d link show dev vx0 | grep \"neigh_suppress on\"" + run_cmd "bridge -n $sw1 link set dev vx0 neigh_suppress on" + run_cmd "bridge -n $sw1 -d link show dev vx0 | grep \"neigh_suppress on\"" log_test $? 0 "\"neigh_suppress\" is on" - run_cmd "ip netns exec h1 arping -q -b -c 1 -w 5 -s $sip1 -I eth0.$vid1 $tip1" + run_cmd "ip netns exec $h1 arping -q -b -c 1 -w 5 -s $sip1 -I eth0.$vid1 $tip1" log_test $? 0 "arping (VLAN $vid1)" - run_cmd "ip netns exec h1 arping -q -b -c 1 -w 5 -s $sip2 -I eth0.$vid2 $tip2" + run_cmd "ip netns exec $h1 arping -q -b -c 1 -w 5 -s $sip2 -I eth0.$vid2 $tip2" log_test $? 0 "arping (VLAN $vid2)" - tc_check_packets sw1 "dev vx0 egress" 101 2 + tc_check_packets $sw1 "dev vx0 egress" 101 2 log_test $? 0 "ARP suppression (VLAN $vid1)" - tc_check_packets sw1 "dev vx0 egress" 102 5 + tc_check_packets $sw1 "dev vx0 egress" 102 5 log_test $? 0 "ARP suppression (VLAN $vid2)" } @@ -655,118 +648,118 @@ neigh_vlan_suppress_ns() echo "Per-{Port, VLAN} NS suppression" echo "-------------------------------" - run_cmd "tc -n sw1 qdisc replace dev vx0 clsact" - run_cmd "tc -n sw1 filter replace dev vx0 egress pref 1 handle 101 proto ipv6 flower indev swp1 ip_proto icmpv6 dst_ip $maddr src_ip $saddr1 type 135 code 0 action pass" - run_cmd "tc -n sw1 filter replace dev vx0 egress pref 1 handle 102 proto ipv6 flower indev swp1 ip_proto icmpv6 dst_ip $maddr src_ip $saddr2 type 135 code 0 action pass" + run_cmd "tc -n $sw1 qdisc replace dev vx0 clsact" + run_cmd "tc -n $sw1 filter replace dev vx0 egress pref 1 handle 101 proto ipv6 flower indev swp1 ip_proto icmpv6 dst_ip $maddr src_ip $saddr1 type 135 code 0 action pass" + run_cmd "tc -n $sw1 filter replace dev vx0 egress pref 1 handle 102 proto ipv6 flower indev swp1 ip_proto icmpv6 dst_ip $maddr src_ip $saddr2 type 135 code 0 action pass" - h2_mac1=$(ip -n h2 -j -p link show eth0.$vid1 | jq -r '.[]["address"]') - h2_mac2=$(ip -n h2 -j -p link show eth0.$vid2 | jq -r '.[]["address"]') - run_cmd "bridge -n sw1 fdb replace $h2_mac1 dev vx0 master static vlan $vid1" - run_cmd "bridge -n sw1 fdb replace $h2_mac2 dev vx0 master static vlan $vid2" - run_cmd "ip -n sw1 neigh replace $daddr1 lladdr $h2_mac1 nud permanent dev br0.$vid1" - run_cmd "ip -n sw1 neigh replace $daddr2 lladdr $h2_mac2 nud permanent dev br0.$vid2" + h2_mac1=$(ip -n $h2 -j -p link show eth0.$vid1 | jq -r '.[]["address"]') + h2_mac2=$(ip -n $h2 -j -p link show eth0.$vid2 | jq -r '.[]["address"]') + run_cmd "bridge -n $sw1 fdb replace $h2_mac1 dev vx0 master static vlan $vid1" + run_cmd "bridge -n $sw1 fdb replace $h2_mac2 dev vx0 master static vlan $vid2" + run_cmd "ip -n $sw1 neigh replace $daddr1 lladdr $h2_mac1 nud permanent dev br0.$vid1" + run_cmd "ip -n $sw1 neigh replace $daddr2 lladdr $h2_mac2 nud permanent dev br0.$vid2" # Enable per-{Port, VLAN} neighbor suppression and check that NS # messages are not suppressed and that ND messages are received. - run_cmd "bridge -n sw1 link set dev vx0 neigh_vlan_suppress on" - run_cmd "bridge -n sw1 -d link show dev vx0 | grep \"neigh_vlan_suppress on\"" + run_cmd "bridge -n $sw1 link set dev vx0 neigh_vlan_suppress on" + run_cmd "bridge -n $sw1 -d link show dev vx0 | grep \"neigh_vlan_suppress on\"" log_test $? 0 "\"neigh_vlan_suppress\" is on" - run_cmd "ip netns exec h1 ndisc6 -q -r 1 -s $saddr1 -w 5000 $daddr1 eth0.$vid1" + run_cmd "ip netns exec $h1 ndisc6 -q -r 1 -s $saddr1 -w 5000 $daddr1 eth0.$vid1" log_test $? 0 "ndisc6 (VLAN $vid1)" - run_cmd "ip netns exec h1 ndisc6 -q -r 1 -s $saddr2 -w 5000 $daddr2 eth0.$vid2" + run_cmd "ip netns exec $h1 ndisc6 -q -r 1 -s $saddr2 -w 5000 $daddr2 eth0.$vid2" log_test $? 0 "ndisc6 (VLAN $vid2)" - tc_check_packets sw1 "dev vx0 egress" 101 1 + tc_check_packets $sw1 "dev vx0 egress" 101 1 log_test $? 0 "NS suppression (VLAN $vid1)" - tc_check_packets sw1 "dev vx0 egress" 102 1 + tc_check_packets $sw1 "dev vx0 egress" 102 1 log_test $? 0 "NS suppression (VLAN $vid2)" # Enable neighbor suppression on VLAN 10 and check that only on this # VLAN NS messages are suppressed. - run_cmd "bridge -n sw1 vlan set vid $vid1 dev vx0 neigh_suppress on" - run_cmd "bridge -n sw1 -d vlan show dev vx0 vid $vid1 | grep \"neigh_suppress on\"" + run_cmd "bridge -n $sw1 vlan set vid $vid1 dev vx0 neigh_suppress on" + run_cmd "bridge -n $sw1 -d vlan show dev vx0 vid $vid1 | grep \"neigh_suppress on\"" log_test $? 0 "\"neigh_suppress\" is on (VLAN $vid1)" - run_cmd "bridge -n sw1 -d vlan show dev vx0 vid $vid2 | grep \"neigh_suppress off\"" + run_cmd "bridge -n $sw1 -d vlan show dev vx0 vid $vid2 | grep \"neigh_suppress off\"" log_test $? 0 "\"neigh_suppress\" is off (VLAN $vid2)" - run_cmd "ip netns exec h1 ndisc6 -q -r 1 -s $saddr1 -w 5000 $daddr1 eth0.$vid1" + run_cmd "ip netns exec $h1 ndisc6 -q -r 1 -s $saddr1 -w 5000 $daddr1 eth0.$vid1" log_test $? 0 "ndisc6 (VLAN $vid1)" - run_cmd "ip netns exec h1 ndisc6 -q -r 1 -s $saddr2 -w 5000 $daddr2 eth0.$vid2" + run_cmd "ip netns exec $h1 ndisc6 -q -r 1 -s $saddr2 -w 5000 $daddr2 eth0.$vid2" log_test $? 0 "ndisc6 (VLAN $vid2)" - tc_check_packets sw1 "dev vx0 egress" 101 1 + tc_check_packets $sw1 "dev vx0 egress" 101 1 log_test $? 0 "NS suppression (VLAN $vid1)" - tc_check_packets sw1 "dev vx0 egress" 102 2 + tc_check_packets $sw1 "dev vx0 egress" 102 2 log_test $? 0 "NS suppression (VLAN $vid2)" # Enable neighbor suppression on the port and check that it has no # effect compared to previous state. - run_cmd "bridge -n sw1 link set dev vx0 neigh_suppress on" - run_cmd "bridge -n sw1 -d link show dev vx0 | grep \"neigh_suppress on\"" + run_cmd "bridge -n $sw1 link set dev vx0 neigh_suppress on" + run_cmd "bridge -n $sw1 -d link show dev vx0 | grep \"neigh_suppress on\"" log_test $? 0 "\"neigh_suppress\" is on" - run_cmd "ip netns exec h1 ndisc6 -q -r 1 -s $saddr1 -w 5000 $daddr1 eth0.$vid1" + run_cmd "ip netns exec $h1 ndisc6 -q -r 1 -s $saddr1 -w 5000 $daddr1 eth0.$vid1" log_test $? 0 "ndisc6 (VLAN $vid1)" - run_cmd "ip netns exec h1 ndisc6 -q -r 1 -s $saddr2 -w 5000 $daddr2 eth0.$vid2" + run_cmd "ip netns exec $h1 ndisc6 -q -r 1 -s $saddr2 -w 5000 $daddr2 eth0.$vid2" log_test $? 0 "ndisc6 (VLAN $vid2)" - tc_check_packets sw1 "dev vx0 egress" 101 1 + tc_check_packets $sw1 "dev vx0 egress" 101 1 log_test $? 0 "NS suppression (VLAN $vid1)" - tc_check_packets sw1 "dev vx0 egress" 102 3 + tc_check_packets $sw1 "dev vx0 egress" 102 3 log_test $? 0 "NS suppression (VLAN $vid2)" # Disable neighbor suppression on the port and check that it has no # effect compared to previous state. - run_cmd "bridge -n sw1 link set dev vx0 neigh_suppress off" - run_cmd "bridge -n sw1 -d link show dev vx0 | grep \"neigh_suppress off\"" + run_cmd "bridge -n $sw1 link set dev vx0 neigh_suppress off" + run_cmd "bridge -n $sw1 -d link show dev vx0 | grep \"neigh_suppress off\"" log_test $? 0 "\"neigh_suppress\" is off" - run_cmd "ip netns exec h1 ndisc6 -q -r 1 -s $saddr1 -w 5000 $daddr1 eth0.$vid1" + run_cmd "ip netns exec $h1 ndisc6 -q -r 1 -s $saddr1 -w 5000 $daddr1 eth0.$vid1" log_test $? 0 "ndisc6 (VLAN $vid1)" - run_cmd "ip netns exec h1 ndisc6 -q -r 1 -s $saddr2 -w 5000 $daddr2 eth0.$vid2" + run_cmd "ip netns exec $h1 ndisc6 -q -r 1 -s $saddr2 -w 5000 $daddr2 eth0.$vid2" log_test $? 0 "ndisc6 (VLAN $vid2)" - tc_check_packets sw1 "dev vx0 egress" 101 1 + tc_check_packets $sw1 "dev vx0 egress" 101 1 log_test $? 0 "NS suppression (VLAN $vid1)" - tc_check_packets sw1 "dev vx0 egress" 102 4 + tc_check_packets $sw1 "dev vx0 egress" 102 4 log_test $? 0 "NS suppression (VLAN $vid2)" # Disable neighbor suppression on VLAN 10 and check that NS messages # are no longer suppressed on this VLAN. - run_cmd "bridge -n sw1 vlan set vid $vid1 dev vx0 neigh_suppress off" - run_cmd "bridge -n sw1 -d vlan show dev vx0 vid $vid1 | grep \"neigh_suppress off\"" + run_cmd "bridge -n $sw1 vlan set vid $vid1 dev vx0 neigh_suppress off" + run_cmd "bridge -n $sw1 -d vlan show dev vx0 vid $vid1 | grep \"neigh_suppress off\"" log_test $? 0 "\"neigh_suppress\" is off (VLAN $vid1)" - run_cmd "ip netns exec h1 ndisc6 -q -r 1 -s $saddr1 -w 5000 $daddr1 eth0.$vid1" + run_cmd "ip netns exec $h1 ndisc6 -q -r 1 -s $saddr1 -w 5000 $daddr1 eth0.$vid1" log_test $? 0 "ndisc6 (VLAN $vid1)" - run_cmd "ip netns exec h1 ndisc6 -q -r 1 -s $saddr2 -w 5000 $daddr2 eth0.$vid2" + run_cmd "ip netns exec $h1 ndisc6 -q -r 1 -s $saddr2 -w 5000 $daddr2 eth0.$vid2" log_test $? 0 "ndisc6 (VLAN $vid2)" - tc_check_packets sw1 "dev vx0 egress" 101 2 + tc_check_packets $sw1 "dev vx0 egress" 101 2 log_test $? 0 "NS suppression (VLAN $vid1)" - tc_check_packets sw1 "dev vx0 egress" 102 5 + tc_check_packets $sw1 "dev vx0 egress" 102 5 log_test $? 0 "NS suppression (VLAN $vid2)" # Disable per-{Port, VLAN} neighbor suppression, enable neighbor # suppression on the port and check that on both VLANs NS messages are # suppressed. - run_cmd "bridge -n sw1 link set dev vx0 neigh_vlan_suppress off" - run_cmd "bridge -n sw1 -d link show dev vx0 | grep \"neigh_vlan_suppress off\"" + run_cmd "bridge -n $sw1 link set dev vx0 neigh_vlan_suppress off" + run_cmd "bridge -n $sw1 -d link show dev vx0 | grep \"neigh_vlan_suppress off\"" log_test $? 0 "\"neigh_vlan_suppress\" is off" - run_cmd "bridge -n sw1 link set dev vx0 neigh_suppress on" - run_cmd "bridge -n sw1 -d link show dev vx0 | grep \"neigh_suppress on\"" + run_cmd "bridge -n $sw1 link set dev vx0 neigh_suppress on" + run_cmd "bridge -n $sw1 -d link show dev vx0 | grep \"neigh_suppress on\"" log_test $? 0 "\"neigh_suppress\" is on" - run_cmd "ip netns exec h1 ndisc6 -q -r 1 -s $saddr1 -w 5000 $daddr1 eth0.$vid1" + run_cmd "ip netns exec $h1 ndisc6 -q -r 1 -s $saddr1 -w 5000 $daddr1 eth0.$vid1" log_test $? 0 "ndisc6 (VLAN $vid1)" - run_cmd "ip netns exec h1 ndisc6 -q -r 1 -s $saddr2 -w 5000 $daddr2 eth0.$vid2" + run_cmd "ip netns exec $h1 ndisc6 -q -r 1 -s $saddr2 -w 5000 $daddr2 eth0.$vid2" log_test $? 0 "ndisc6 (VLAN $vid2)" - tc_check_packets sw1 "dev vx0 egress" 101 2 + tc_check_packets $sw1 "dev vx0 egress" 101 2 log_test $? 0 "NS suppression (VLAN $vid1)" - tc_check_packets sw1 "dev vx0 egress" 102 5 + tc_check_packets $sw1 "dev vx0 egress" 102 5 log_test $? 0 "NS suppression (VLAN $vid2)" } -- cgit v1.2.3 From a8258e64ca74314478fda85a42b1c1eb2db29c67 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Wed, 6 Dec 2023 15:07:55 +0800 Subject: selftests/net: convert test_vxlan_mdb.sh to run it in unique namespace Here is the test result after conversion. ]# ./test_vxlan_mdb.sh Control path: Basic (*, G) operations - IPv4 overlay / IPv4 underlay -------------------------------------------------------------------- TEST: MDB entry addition [ OK ] ... Data path: MDB torture test - IPv6 overlay / IPv6 underlay ---------------------------------------------------------- TEST: Torture test [ OK ] Tests passed: 620 Tests failed: 0 Acked-by: David Ahern Signed-off-by: Hangbin Liu Reviewed-by: Ido Schimmel Tested-by: Ido Schimmel Signed-off-by: David S. Miller --- tools/testing/selftests/net/test_vxlan_mdb.sh | 202 +++++++++++++------------- 1 file changed, 99 insertions(+), 103 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/net/test_vxlan_mdb.sh b/tools/testing/selftests/net/test_vxlan_mdb.sh index 6e996f8063cd..6725fd9157b9 100755 --- a/tools/testing/selftests/net/test_vxlan_mdb.sh +++ b/tools/testing/selftests/net/test_vxlan_mdb.sh @@ -55,9 +55,8 @@ # | ns2_v4 | | ns2_v6 | # +------------------------------------+ +------------------------------------+ +source lib.sh ret=0 -# Kselftest framework requirement - SKIP code is 4. -ksft_skip=4 CONTROL_PATH_TESTS=" basic_star_g_ipv4_ipv4 @@ -260,9 +259,6 @@ setup_common() local local_addr1=$1; shift local local_addr2=$1; shift - ip netns add $ns1 - ip netns add $ns2 - ip link add name veth0 type veth peer name veth1 ip link set dev veth0 netns $ns1 name veth0 ip link set dev veth1 netns $ns2 name veth0 @@ -273,36 +269,36 @@ setup_common() setup_v4() { - setup_common ns1_v4 ns2_v4 192.0.2.1 192.0.2.2 + setup_ns ns1_v4 ns2_v4 + setup_common $ns1_v4 $ns2_v4 192.0.2.1 192.0.2.2 - ip -n ns1_v4 address add 192.0.2.17/28 dev veth0 - ip -n ns2_v4 address add 192.0.2.18/28 dev veth0 + ip -n $ns1_v4 address add 192.0.2.17/28 dev veth0 + ip -n $ns2_v4 address add 192.0.2.18/28 dev veth0 - ip -n ns1_v4 route add default via 192.0.2.18 - ip -n ns2_v4 route add default via 192.0.2.17 + ip -n $ns1_v4 route add default via 192.0.2.18 + ip -n $ns2_v4 route add default via 192.0.2.17 } cleanup_v4() { - ip netns del ns2_v4 - ip netns del ns1_v4 + cleanup_ns $ns2_v4 $ns1_v4 } setup_v6() { - setup_common ns1_v6 ns2_v6 2001:db8:1::1 2001:db8:1::2 + setup_ns ns1_v6 ns2_v6 + setup_common $ns1_v6 $ns2_v6 2001:db8:1::1 2001:db8:1::2 - ip -n ns1_v6 address add 2001:db8:2::1/64 dev veth0 nodad - ip -n ns2_v6 address add 2001:db8:2::2/64 dev veth0 nodad + ip -n $ns1_v6 address add 2001:db8:2::1/64 dev veth0 nodad + ip -n $ns2_v6 address add 2001:db8:2::2/64 dev veth0 nodad - ip -n ns1_v6 route add default via 2001:db8:2::2 - ip -n ns2_v6 route add default via 2001:db8:2::1 + ip -n $ns1_v6 route add default via 2001:db8:2::2 + ip -n $ns2_v6 route add default via 2001:db8:2::1 } cleanup_v6() { - ip netns del ns2_v6 - ip netns del ns1_v6 + cleanup_ns $ns2_v6 $ns1_v6 } setup() @@ -433,7 +429,7 @@ basic_common() basic_star_g_ipv4_ipv4() { - local ns1=ns1_v4 + local ns1=$ns1_v4 local grp_key="grp 239.1.1.1" local vtep_ip=198.51.100.100 @@ -446,7 +442,7 @@ basic_star_g_ipv4_ipv4() basic_star_g_ipv6_ipv4() { - local ns1=ns1_v4 + local ns1=$ns1_v4 local grp_key="grp ff0e::1" local vtep_ip=198.51.100.100 @@ -459,7 +455,7 @@ basic_star_g_ipv6_ipv4() basic_star_g_ipv4_ipv6() { - local ns1=ns1_v6 + local ns1=$ns1_v6 local grp_key="grp 239.1.1.1" local vtep_ip=2001:db8:1000::1 @@ -472,7 +468,7 @@ basic_star_g_ipv4_ipv6() basic_star_g_ipv6_ipv6() { - local ns1=ns1_v6 + local ns1=$ns1_v6 local grp_key="grp ff0e::1" local vtep_ip=2001:db8:1000::1 @@ -485,7 +481,7 @@ basic_star_g_ipv6_ipv6() basic_sg_ipv4_ipv4() { - local ns1=ns1_v4 + local ns1=$ns1_v4 local grp_key="grp 239.1.1.1 src 192.0.2.129" local vtep_ip=198.51.100.100 @@ -498,7 +494,7 @@ basic_sg_ipv4_ipv4() basic_sg_ipv6_ipv4() { - local ns1=ns1_v4 + local ns1=$ns1_v4 local grp_key="grp ff0e::1 src 2001:db8:100::1" local vtep_ip=198.51.100.100 @@ -511,7 +507,7 @@ basic_sg_ipv6_ipv4() basic_sg_ipv4_ipv6() { - local ns1=ns1_v6 + local ns1=$ns1_v6 local grp_key="grp 239.1.1.1 src 192.0.2.129" local vtep_ip=2001:db8:1000::1 @@ -524,7 +520,7 @@ basic_sg_ipv4_ipv6() basic_sg_ipv6_ipv6() { - local ns1=ns1_v6 + local ns1=$ns1_v6 local grp_key="grp ff0e::1 src 2001:db8:100::1" local vtep_ip=2001:db8:1000::1 @@ -694,7 +690,7 @@ star_g_common() star_g_ipv4_ipv4() { - local ns1=ns1_v4 + local ns1=$ns1_v4 local grp=239.1.1.1 local src1=192.0.2.129 local src2=192.0.2.130 @@ -711,7 +707,7 @@ star_g_ipv4_ipv4() star_g_ipv6_ipv4() { - local ns1=ns1_v4 + local ns1=$ns1_v4 local grp=ff0e::1 local src1=2001:db8:100::1 local src2=2001:db8:100::2 @@ -728,7 +724,7 @@ star_g_ipv6_ipv4() star_g_ipv4_ipv6() { - local ns1=ns1_v6 + local ns1=$ns1_v6 local grp=239.1.1.1 local src1=192.0.2.129 local src2=192.0.2.130 @@ -745,7 +741,7 @@ star_g_ipv4_ipv6() star_g_ipv6_ipv6() { - local ns1=ns1_v6 + local ns1=$ns1_v6 local grp=ff0e::1 local src1=2001:db8:100::1 local src2=2001:db8:100::2 @@ -793,7 +789,7 @@ sg_common() sg_ipv4_ipv4() { - local ns1=ns1_v4 + local ns1=$ns1_v4 local grp=239.1.1.1 local src=192.0.2.129 local vtep_ip=198.51.100.100 @@ -808,7 +804,7 @@ sg_ipv4_ipv4() sg_ipv6_ipv4() { - local ns1=ns1_v4 + local ns1=$ns1_v4 local grp=ff0e::1 local src=2001:db8:100::1 local vtep_ip=198.51.100.100 @@ -823,7 +819,7 @@ sg_ipv6_ipv4() sg_ipv4_ipv6() { - local ns1=ns1_v6 + local ns1=$ns1_v6 local grp=239.1.1.1 local src=192.0.2.129 local vtep_ip=2001:db8:1000::1 @@ -838,7 +834,7 @@ sg_ipv4_ipv6() sg_ipv6_ipv6() { - local ns1=ns1_v6 + local ns1=$ns1_v6 local grp=ff0e::1 local src=2001:db8:100::1 local vtep_ip=2001:db8:1000::1 @@ -918,7 +914,7 @@ dump_common() dump_ipv4_ipv4() { - local ns1=ns1_v4 + local ns1=$ns1_v4 local local_addr=192.0.2.1 local remote_prefix=198.51.100. local fn=ipv4_grps_get @@ -932,7 +928,7 @@ dump_ipv4_ipv4() dump_ipv6_ipv4() { - local ns1=ns1_v4 + local ns1=$ns1_v4 local local_addr=192.0.2.1 local remote_prefix=198.51.100. local fn=ipv6_grps_get @@ -946,7 +942,7 @@ dump_ipv6_ipv4() dump_ipv4_ipv6() { - local ns1=ns1_v6 + local ns1=$ns1_v6 local local_addr=2001:db8:1::1 local remote_prefix=2001:db8:1000:: local fn=ipv4_grps_get @@ -960,7 +956,7 @@ dump_ipv4_ipv6() dump_ipv6_ipv6() { - local ns1=ns1_v6 + local ns1=$ns1_v6 local local_addr=2001:db8:1::1 local remote_prefix=2001:db8:1000:: local fn=ipv6_grps_get @@ -1072,8 +1068,8 @@ encap_params_common() encap_params_ipv4_ipv4() { - local ns1=ns1_v4 - local ns2=ns2_v4 + local ns1=$ns1_v4 + local ns2=$ns2_v4 local vtep1_ip=198.51.100.100 local vtep2_ip=198.51.100.200 local plen=32 @@ -1091,8 +1087,8 @@ encap_params_ipv4_ipv4() encap_params_ipv6_ipv4() { - local ns1=ns1_v4 - local ns2=ns2_v4 + local ns1=$ns1_v4 + local ns2=$ns2_v4 local vtep1_ip=198.51.100.100 local vtep2_ip=198.51.100.200 local plen=32 @@ -1110,8 +1106,8 @@ encap_params_ipv6_ipv4() encap_params_ipv4_ipv6() { - local ns1=ns1_v6 - local ns2=ns2_v6 + local ns1=$ns1_v6 + local ns2=$ns2_v6 local vtep1_ip=2001:db8:1000::1 local vtep2_ip=2001:db8:2000::1 local plen=128 @@ -1129,8 +1125,8 @@ encap_params_ipv4_ipv6() encap_params_ipv6_ipv6() { - local ns1=ns1_v6 - local ns2=ns2_v6 + local ns1=$ns1_v6 + local ns2=$ns2_v6 local vtep1_ip=2001:db8:1000::1 local vtep2_ip=2001:db8:2000::1 local plen=128 @@ -1208,8 +1204,8 @@ starg_exclude_ir_common() starg_exclude_ir_ipv4_ipv4() { - local ns1=ns1_v4 - local ns2=ns2_v4 + local ns1=$ns1_v4 + local ns2=$ns2_v4 local vtep1_ip=198.51.100.100 local vtep2_ip=198.51.100.200 local plen=32 @@ -1227,8 +1223,8 @@ starg_exclude_ir_ipv4_ipv4() starg_exclude_ir_ipv6_ipv4() { - local ns1=ns1_v4 - local ns2=ns2_v4 + local ns1=$ns1_v4 + local ns2=$ns2_v4 local vtep1_ip=198.51.100.100 local vtep2_ip=198.51.100.200 local plen=32 @@ -1246,8 +1242,8 @@ starg_exclude_ir_ipv6_ipv4() starg_exclude_ir_ipv4_ipv6() { - local ns1=ns1_v6 - local ns2=ns2_v6 + local ns1=$ns1_v6 + local ns2=$ns2_v6 local vtep1_ip=2001:db8:1000::1 local vtep2_ip=2001:db8:2000::1 local plen=128 @@ -1265,8 +1261,8 @@ starg_exclude_ir_ipv4_ipv6() starg_exclude_ir_ipv6_ipv6() { - local ns1=ns1_v6 - local ns2=ns2_v6 + local ns1=$ns1_v6 + local ns2=$ns2_v6 local vtep1_ip=2001:db8:1000::1 local vtep2_ip=2001:db8:2000::1 local plen=128 @@ -1344,8 +1340,8 @@ starg_include_ir_common() starg_include_ir_ipv4_ipv4() { - local ns1=ns1_v4 - local ns2=ns2_v4 + local ns1=$ns1_v4 + local ns2=$ns2_v4 local vtep1_ip=198.51.100.100 local vtep2_ip=198.51.100.200 local plen=32 @@ -1363,8 +1359,8 @@ starg_include_ir_ipv4_ipv4() starg_include_ir_ipv6_ipv4() { - local ns1=ns1_v4 - local ns2=ns2_v4 + local ns1=$ns1_v4 + local ns2=$ns2_v4 local vtep1_ip=198.51.100.100 local vtep2_ip=198.51.100.200 local plen=32 @@ -1382,8 +1378,8 @@ starg_include_ir_ipv6_ipv4() starg_include_ir_ipv4_ipv6() { - local ns1=ns1_v6 - local ns2=ns2_v6 + local ns1=$ns1_v6 + local ns2=$ns2_v6 local vtep1_ip=2001:db8:1000::1 local vtep2_ip=2001:db8:2000::1 local plen=128 @@ -1401,8 +1397,8 @@ starg_include_ir_ipv4_ipv6() starg_include_ir_ipv6_ipv6() { - local ns1=ns1_v6 - local ns2=ns2_v6 + local ns1=$ns1_v6 + local ns2=$ns2_v6 local vtep1_ip=2001:db8:1000::1 local vtep2_ip=2001:db8:2000::1 local plen=128 @@ -1462,8 +1458,8 @@ starg_exclude_p2mp_common() starg_exclude_p2mp_ipv4_ipv4() { - local ns1=ns1_v4 - local ns2=ns2_v4 + local ns1=$ns1_v4 + local ns2=$ns2_v4 local mcast_grp=238.1.1.1 local plen=32 local grp=239.1.1.1 @@ -1480,8 +1476,8 @@ starg_exclude_p2mp_ipv4_ipv4() starg_exclude_p2mp_ipv6_ipv4() { - local ns1=ns1_v4 - local ns2=ns2_v4 + local ns1=$ns1_v4 + local ns2=$ns2_v4 local mcast_grp=238.1.1.1 local plen=32 local grp=ff0e::1 @@ -1498,8 +1494,8 @@ starg_exclude_p2mp_ipv6_ipv4() starg_exclude_p2mp_ipv4_ipv6() { - local ns1=ns1_v6 - local ns2=ns2_v6 + local ns1=$ns1_v6 + local ns2=$ns2_v6 local mcast_grp=ff0e::2 local plen=128 local grp=239.1.1.1 @@ -1516,8 +1512,8 @@ starg_exclude_p2mp_ipv4_ipv6() starg_exclude_p2mp_ipv6_ipv6() { - local ns1=ns1_v6 - local ns2=ns2_v6 + local ns1=$ns1_v6 + local ns2=$ns2_v6 local mcast_grp=ff0e::2 local plen=128 local grp=ff0e::1 @@ -1576,8 +1572,8 @@ starg_include_p2mp_common() starg_include_p2mp_ipv4_ipv4() { - local ns1=ns1_v4 - local ns2=ns2_v4 + local ns1=$ns1_v4 + local ns2=$ns2_v4 local mcast_grp=238.1.1.1 local plen=32 local grp=239.1.1.1 @@ -1594,8 +1590,8 @@ starg_include_p2mp_ipv4_ipv4() starg_include_p2mp_ipv6_ipv4() { - local ns1=ns1_v4 - local ns2=ns2_v4 + local ns1=$ns1_v4 + local ns2=$ns2_v4 local mcast_grp=238.1.1.1 local plen=32 local grp=ff0e::1 @@ -1612,8 +1608,8 @@ starg_include_p2mp_ipv6_ipv4() starg_include_p2mp_ipv4_ipv6() { - local ns1=ns1_v6 - local ns2=ns2_v6 + local ns1=$ns1_v6 + local ns2=$ns2_v6 local mcast_grp=ff0e::2 local plen=128 local grp=239.1.1.1 @@ -1630,8 +1626,8 @@ starg_include_p2mp_ipv4_ipv6() starg_include_p2mp_ipv6_ipv6() { - local ns1=ns1_v6 - local ns2=ns2_v6 + local ns1=$ns1_v6 + local ns2=$ns2_v6 local mcast_grp=ff0e::2 local plen=128 local grp=ff0e::1 @@ -1709,8 +1705,8 @@ egress_vni_translation_common() egress_vni_translation_ipv4_ipv4() { - local ns1=ns1_v4 - local ns2=ns2_v4 + local ns1=$ns1_v4 + local ns2=$ns2_v4 local mcast_grp=238.1.1.1 local plen=32 local proto="ipv4" @@ -1727,8 +1723,8 @@ egress_vni_translation_ipv4_ipv4() egress_vni_translation_ipv6_ipv4() { - local ns1=ns1_v4 - local ns2=ns2_v4 + local ns1=$ns1_v4 + local ns2=$ns2_v4 local mcast_grp=238.1.1.1 local plen=32 local proto="ipv6" @@ -1745,8 +1741,8 @@ egress_vni_translation_ipv6_ipv4() egress_vni_translation_ipv4_ipv6() { - local ns1=ns1_v6 - local ns2=ns2_v6 + local ns1=$ns1_v6 + local ns2=$ns2_v6 local mcast_grp=ff0e::2 local plen=128 local proto="ipv4" @@ -1763,8 +1759,8 @@ egress_vni_translation_ipv4_ipv6() egress_vni_translation_ipv6_ipv6() { - local ns1=ns1_v6 - local ns2=ns2_v6 + local ns1=$ns1_v6 + local ns2=$ns2_v6 local mcast_grp=ff0e::2 local plen=128 local proto="ipv6" @@ -1929,8 +1925,8 @@ all_zeros_mdb_common() all_zeros_mdb_ipv4() { - local ns1=ns1_v4 - local ns2=ns2_v4 + local ns1=$ns1_v4 + local ns2=$ns2_v4 local vtep1_ip=198.51.100.101 local vtep2_ip=198.51.100.102 local vtep3_ip=198.51.100.103 @@ -1947,8 +1943,8 @@ all_zeros_mdb_ipv4() all_zeros_mdb_ipv6() { - local ns1=ns1_v6 - local ns2=ns2_v6 + local ns1=$ns1_v6 + local ns2=$ns2_v6 local vtep1_ip=2001:db8:1000::1 local vtep2_ip=2001:db8:2000::1 local vtep3_ip=2001:db8:3000::1 @@ -2021,8 +2017,8 @@ mdb_fdb_common() mdb_fdb_ipv4_ipv4() { - local ns1=ns1_v4 - local ns2=ns2_v4 + local ns1=$ns1_v4 + local ns2=$ns2_v4 local vtep1_ip=198.51.100.100 local vtep2_ip=198.51.100.200 local plen=32 @@ -2040,8 +2036,8 @@ mdb_fdb_ipv4_ipv4() mdb_fdb_ipv6_ipv4() { - local ns1=ns1_v4 - local ns2=ns2_v4 + local ns1=$ns1_v4 + local ns2=$ns2_v4 local vtep1_ip=198.51.100.100 local vtep2_ip=198.51.100.200 local plen=32 @@ -2059,8 +2055,8 @@ mdb_fdb_ipv6_ipv4() mdb_fdb_ipv4_ipv6() { - local ns1=ns1_v6 - local ns2=ns2_v6 + local ns1=$ns1_v6 + local ns2=$ns2_v6 local vtep1_ip=2001:db8:1000::1 local vtep2_ip=2001:db8:2000::1 local plen=128 @@ -2078,8 +2074,8 @@ mdb_fdb_ipv4_ipv6() mdb_fdb_ipv6_ipv6() { - local ns1=ns1_v6 - local ns2=ns2_v6 + local ns1=$ns1_v6 + local ns2=$ns2_v6 local vtep1_ip=2001:db8:1000::1 local vtep2_ip=2001:db8:2000::1 local plen=128 @@ -2166,7 +2162,7 @@ mdb_torture_common() mdb_torture_ipv4_ipv4() { - local ns1=ns1_v4 + local ns1=$ns1_v4 local vtep1_ip=198.51.100.100 local vtep2_ip=198.51.100.200 local grp1=239.1.1.1 @@ -2183,7 +2179,7 @@ mdb_torture_ipv4_ipv4() mdb_torture_ipv6_ipv4() { - local ns1=ns1_v4 + local ns1=$ns1_v4 local vtep1_ip=198.51.100.100 local vtep2_ip=198.51.100.200 local grp1=ff0e::1 @@ -2200,7 +2196,7 @@ mdb_torture_ipv6_ipv4() mdb_torture_ipv4_ipv6() { - local ns1=ns1_v6 + local ns1=$ns1_v6 local vtep1_ip=2001:db8:1000::1 local vtep2_ip=2001:db8:2000::1 local grp1=239.1.1.1 @@ -2217,7 +2213,7 @@ mdb_torture_ipv4_ipv6() mdb_torture_ipv6_ipv6() { - local ns1=ns1_v6 + local ns1=$ns1_v6 local vtep1_ip=2001:db8:1000::1 local vtep2_ip=2001:db8:2000::1 local grp1=ff0e::1 -- cgit v1.2.3 From d79e907b425d1dcc831f9eddbdb09682292faed0 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Wed, 6 Dec 2023 15:07:56 +0800 Subject: selftests/net: convert test_vxlan_nolocalbypass.sh to run it in unique namespace Here is the test result after conversion. ]# ./test_vxlan_nolocalbypass.sh TEST: localbypass enabled [ OK ] TEST: Packet received by local VXLAN device - localbypass [ OK ] TEST: localbypass disabled [ OK ] TEST: Packet not received by local VXLAN device - nolocalbypass [ OK ] TEST: localbypass enabled [ OK ] TEST: Packet received by local VXLAN device - localbypass [ OK ] Tests passed: 6 Tests failed: 0 Acked-by: David Ahern Signed-off-by: Hangbin Liu Reviewed-by: Ido Schimmel Tested-by: Ido Schimmel Signed-off-by: David S. Miller --- .../selftests/net/test_vxlan_nolocalbypass.sh | 48 +++++++++++----------- 1 file changed, 23 insertions(+), 25 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/net/test_vxlan_nolocalbypass.sh b/tools/testing/selftests/net/test_vxlan_nolocalbypass.sh index f75212bf142c..b8805983b728 100755 --- a/tools/testing/selftests/net/test_vxlan_nolocalbypass.sh +++ b/tools/testing/selftests/net/test_vxlan_nolocalbypass.sh @@ -9,9 +9,8 @@ # option and verifies that packets are no longer received by the second VXLAN # device. +source lib.sh ret=0 -# Kselftest framework requirement - SKIP code is 4. -ksft_skip=4 TESTS=" nolocalbypass @@ -98,20 +97,19 @@ tc_check_packets() setup() { - ip netns add ns1 + setup_ns ns1 - ip -n ns1 link set dev lo up - ip -n ns1 address add 192.0.2.1/32 dev lo - ip -n ns1 address add 198.51.100.1/32 dev lo + ip -n $ns1 address add 192.0.2.1/32 dev lo + ip -n $ns1 address add 198.51.100.1/32 dev lo - ip -n ns1 link add name vx0 up type vxlan id 100 local 198.51.100.1 \ + ip -n $ns1 link add name vx0 up type vxlan id 100 local 198.51.100.1 \ dstport 4789 nolearning - ip -n ns1 link add name vx1 up type vxlan id 100 dstport 4790 + ip -n $ns1 link add name vx1 up type vxlan id 100 dstport 4790 } cleanup() { - ip netns del ns1 &> /dev/null + cleanup_ns $ns1 } ################################################################################ @@ -122,40 +120,40 @@ nolocalbypass() local smac=00:01:02:03:04:05 local dmac=00:0a:0b:0c:0d:0e - run_cmd "bridge -n ns1 fdb add $dmac dev vx0 self static dst 192.0.2.1 port 4790" + run_cmd "bridge -n $ns1 fdb add $dmac dev vx0 self static dst 192.0.2.1 port 4790" - run_cmd "tc -n ns1 qdisc add dev vx1 clsact" - run_cmd "tc -n ns1 filter add dev vx1 ingress pref 1 handle 101 proto all flower src_mac $smac dst_mac $dmac action pass" + run_cmd "tc -n $ns1 qdisc add dev vx1 clsact" + run_cmd "tc -n $ns1 filter add dev vx1 ingress pref 1 handle 101 proto all flower src_mac $smac dst_mac $dmac action pass" - run_cmd "tc -n ns1 qdisc add dev lo clsact" - run_cmd "tc -n ns1 filter add dev lo ingress pref 1 handle 101 proto ip flower ip_proto udp dst_port 4790 action drop" + run_cmd "tc -n $ns1 qdisc add dev lo clsact" + run_cmd "tc -n $ns1 filter add dev lo ingress pref 1 handle 101 proto ip flower ip_proto udp dst_port 4790 action drop" - run_cmd "ip -n ns1 -d -j link show dev vx0 | jq -e '.[][\"linkinfo\"][\"info_data\"][\"localbypass\"] == true'" + run_cmd "ip -n $ns1 -d -j link show dev vx0 | jq -e '.[][\"linkinfo\"][\"info_data\"][\"localbypass\"] == true'" log_test $? 0 "localbypass enabled" - run_cmd "ip netns exec ns1 mausezahn vx0 -a $smac -b $dmac -c 1 -p 100 -q" + run_cmd "ip netns exec $ns1 mausezahn vx0 -a $smac -b $dmac -c 1 -p 100 -q" - tc_check_packets "ns1" "dev vx1 ingress" 101 1 + tc_check_packets "$ns1" "dev vx1 ingress" 101 1 log_test $? 0 "Packet received by local VXLAN device - localbypass" - run_cmd "ip -n ns1 link set dev vx0 type vxlan nolocalbypass" + run_cmd "ip -n $ns1 link set dev vx0 type vxlan nolocalbypass" - run_cmd "ip -n ns1 -d -j link show dev vx0 | jq -e '.[][\"linkinfo\"][\"info_data\"][\"localbypass\"] == false'" + run_cmd "ip -n $ns1 -d -j link show dev vx0 | jq -e '.[][\"linkinfo\"][\"info_data\"][\"localbypass\"] == false'" log_test $? 0 "localbypass disabled" - run_cmd "ip netns exec ns1 mausezahn vx0 -a $smac -b $dmac -c 1 -p 100 -q" + run_cmd "ip netns exec $ns1 mausezahn vx0 -a $smac -b $dmac -c 1 -p 100 -q" - tc_check_packets "ns1" "dev vx1 ingress" 101 1 + tc_check_packets "$ns1" "dev vx1 ingress" 101 1 log_test $? 0 "Packet not received by local VXLAN device - nolocalbypass" - run_cmd "ip -n ns1 link set dev vx0 type vxlan localbypass" + run_cmd "ip -n $ns1 link set dev vx0 type vxlan localbypass" - run_cmd "ip -n ns1 -d -j link show dev vx0 | jq -e '.[][\"linkinfo\"][\"info_data\"][\"localbypass\"] == true'" + run_cmd "ip -n $ns1 -d -j link show dev vx0 | jq -e '.[][\"linkinfo\"][\"info_data\"][\"localbypass\"] == true'" log_test $? 0 "localbypass enabled" - run_cmd "ip netns exec ns1 mausezahn vx0 -a $smac -b $dmac -c 1 -p 100 -q" + run_cmd "ip netns exec $ns1 mausezahn vx0 -a $smac -b $dmac -c 1 -p 100 -q" - tc_check_packets "ns1" "dev vx1 ingress" 101 2 + tc_check_packets "$ns1" "dev vx1 ingress" 101 2 log_test $? 0 "Packet received by local VXLAN device - localbypass" } -- cgit v1.2.3 From d6aab1f632978880660f41c48b760dd48461dcfb Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Wed, 6 Dec 2023 15:07:57 +0800 Subject: selftests/net: convert test_vxlan_under_vrf.sh to run it in unique namespace Here is the test result after conversion. ]# ./test_vxlan_under_vrf.sh Checking HV connectivity [ OK ] Check VM connectivity through VXLAN (underlay in the default VRF) [ OK ] Check VM connectivity through VXLAN (underlay in a VRF) [ OK ] Acked-by: David Ahern Signed-off-by: Hangbin Liu Reviewed-by: Ido Schimmel Tested-by: Ido Schimmel Signed-off-by: David S. Miller --- .../testing/selftests/net/test_vxlan_under_vrf.sh | 70 +++++++++++----------- 1 file changed, 36 insertions(+), 34 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/net/test_vxlan_under_vrf.sh b/tools/testing/selftests/net/test_vxlan_under_vrf.sh index 1fd1250ebc66..ae8fbe3f0779 100755 --- a/tools/testing/selftests/net/test_vxlan_under_vrf.sh +++ b/tools/testing/selftests/net/test_vxlan_under_vrf.sh @@ -43,15 +43,14 @@ # This tests both the connectivity between vm-1 and vm-2, and that the underlay # can be moved in and out of the vrf by unsetting and setting veth0's master. +source lib.sh set -e cleanup() { ip link del veth-hv-1 2>/dev/null || true ip link del veth-tap 2>/dev/null || true - for ns in hv-1 hv-2 vm-1 vm-2; do - ip netns del $ns 2>/dev/null || true - done + cleanup_ns $hv_1 $hv_2 $vm_1 $vm_2 } # Clean start @@ -60,72 +59,75 @@ cleanup &> /dev/null [[ $1 == "clean" ]] && exit 0 trap cleanup EXIT +setup_ns hv_1 hv_2 vm_1 vm_2 +hv[1]=$hv_1 +hv[2]=$hv_2 +vm[1]=$vm_1 +vm[2]=$vm_2 # Setup "Hypervisors" simulated with netns ip link add veth-hv-1 type veth peer name veth-hv-2 setup-hv-networking() { - hv=$1 + id=$1 - ip netns add hv-$hv - ip link set veth-hv-$hv netns hv-$hv - ip -netns hv-$hv link set veth-hv-$hv name veth0 + ip link set veth-hv-$id netns ${hv[$id]} + ip -netns ${hv[$id]} link set veth-hv-$id name veth0 - ip -netns hv-$hv link add vrf-underlay type vrf table 1 - ip -netns hv-$hv link set vrf-underlay up - ip -netns hv-$hv addr add 172.16.0.$hv/24 dev veth0 - ip -netns hv-$hv link set veth0 up + ip -netns ${hv[$id]} link add vrf-underlay type vrf table 1 + ip -netns ${hv[$id]} link set vrf-underlay up + ip -netns ${hv[$id]} addr add 172.16.0.$id/24 dev veth0 + ip -netns ${hv[$id]} link set veth0 up - ip -netns hv-$hv link add br0 type bridge - ip -netns hv-$hv link set br0 up + ip -netns ${hv[$id]} link add br0 type bridge + ip -netns ${hv[$id]} link set br0 up - ip -netns hv-$hv link add vxlan0 type vxlan id 10 local 172.16.0.$hv dev veth0 dstport 4789 - ip -netns hv-$hv link set vxlan0 master br0 - ip -netns hv-$hv link set vxlan0 up + ip -netns ${hv[$id]} link add vxlan0 type vxlan id 10 local 172.16.0.$id dev veth0 dstport 4789 + ip -netns ${hv[$id]} link set vxlan0 master br0 + ip -netns ${hv[$id]} link set vxlan0 up } setup-hv-networking 1 setup-hv-networking 2 # Check connectivity between HVs by pinging hv-2 from hv-1 echo -n "Checking HV connectivity " -ip netns exec hv-1 ping -c 1 -W 1 172.16.0.2 &> /dev/null || (echo "[FAIL]"; false) +ip netns exec $hv_1 ping -c 1 -W 1 172.16.0.2 &> /dev/null || (echo "[FAIL]"; false) echo "[ OK ]" # Setups a "VM" simulated by a netns an a veth pair setup-vm() { id=$1 - ip netns add vm-$id ip link add veth-tap type veth peer name veth-hv - ip link set veth-tap netns hv-$id - ip -netns hv-$id link set veth-tap master br0 - ip -netns hv-$id link set veth-tap up + ip link set veth-tap netns ${hv[$id]} + ip -netns ${hv[$id]} link set veth-tap master br0 + ip -netns ${hv[$id]} link set veth-tap up ip link set veth-hv address 02:1d:8d:dd:0c:6$id - ip link set veth-hv netns vm-$id - ip -netns vm-$id addr add 10.0.0.$id/24 dev veth-hv - ip -netns vm-$id link set veth-hv up + ip link set veth-hv netns ${vm[$id]} + ip -netns ${vm[$id]} addr add 10.0.0.$id/24 dev veth-hv + ip -netns ${vm[$id]} link set veth-hv up } setup-vm 1 setup-vm 2 # Setup VTEP routes to make ARP work -bridge -netns hv-1 fdb add 00:00:00:00:00:00 dev vxlan0 dst 172.16.0.2 self permanent -bridge -netns hv-2 fdb add 00:00:00:00:00:00 dev vxlan0 dst 172.16.0.1 self permanent +bridge -netns $hv_1 fdb add 00:00:00:00:00:00 dev vxlan0 dst 172.16.0.2 self permanent +bridge -netns $hv_2 fdb add 00:00:00:00:00:00 dev vxlan0 dst 172.16.0.1 self permanent echo -n "Check VM connectivity through VXLAN (underlay in the default VRF) " -ip netns exec vm-1 ping -c 1 -W 1 10.0.0.2 &> /dev/null || (echo "[FAIL]"; false) +ip netns exec $vm_1 ping -c 1 -W 1 10.0.0.2 &> /dev/null || (echo "[FAIL]"; false) echo "[ OK ]" # Move the underlay to a non-default VRF -ip -netns hv-1 link set veth0 vrf vrf-underlay -ip -netns hv-1 link set vxlan0 down -ip -netns hv-1 link set vxlan0 up -ip -netns hv-2 link set veth0 vrf vrf-underlay -ip -netns hv-2 link set vxlan0 down -ip -netns hv-2 link set vxlan0 up +ip -netns $hv_1 link set veth0 vrf vrf-underlay +ip -netns $hv_1 link set vxlan0 down +ip -netns $hv_1 link set vxlan0 up +ip -netns $hv_2 link set veth0 vrf vrf-underlay +ip -netns $hv_2 link set vxlan0 down +ip -netns $hv_2 link set vxlan0 up echo -n "Check VM connectivity through VXLAN (underlay in a VRF) " -ip netns exec vm-1 ping -c 1 -W 1 10.0.0.2 &> /dev/null || (echo "[FAIL]"; false) +ip netns exec $vm_1 ping -c 1 -W 1 10.0.0.2 &> /dev/null || (echo "[FAIL]"; false) echo "[ OK ]" -- cgit v1.2.3 From 5ece8371747d57ae113aaf8a7b6468d6681d099f Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Wed, 6 Dec 2023 15:07:58 +0800 Subject: selftests/net: convert test_vxlan_vnifiltering.sh to run it in unique namespace Here is the test result after conversion. ]# ./test_vxlan_vnifiltering.sh TEST: Create traditional vxlan device [ OK ] TEST: Cannot create vnifilter device without external flag [ OK ] TEST: Creating external vxlan device with vnifilter flag [ OK ] ... TEST: VM connectivity over traditional vxlan (ipv6 default rdst) [ OK ] TEST: VM connectivity over metadata nonfiltering vxlan (ipv4 default rdst) [ OK ] Tests passed: 27 Tests failed: 0 Acked-by: David Ahern Signed-off-by: Hangbin Liu Reviewed-by: Ido Schimmel Tested-by: Ido Schimmel Signed-off-by: David S. Miller --- .../selftests/net/test_vxlan_vnifiltering.sh | 154 +++++++++++++-------- 1 file changed, 95 insertions(+), 59 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/net/test_vxlan_vnifiltering.sh b/tools/testing/selftests/net/test_vxlan_vnifiltering.sh index 8c3ac0a72545..6127a78ee988 100755 --- a/tools/testing/selftests/net/test_vxlan_vnifiltering.sh +++ b/tools/testing/selftests/net/test_vxlan_vnifiltering.sh @@ -78,10 +78,8 @@ # # # This test tests the new vxlan vnifiltering api - +source lib.sh ret=0 -# Kselftest framework requirement - SKIP code is 4. -ksft_skip=4 # all tests in this script. Can be overridden with -t option TESTS=" @@ -148,18 +146,18 @@ run_cmd() } check_hv_connectivity() { - ip netns exec hv-1 ping -c 1 -W 1 $1 &>/dev/null + ip netns exec $hv_1 ping -c 1 -W 1 $1 &>/dev/null sleep 1 - ip netns exec hv-1 ping -c 1 -W 1 $2 &>/dev/null + ip netns exec $hv_1 ping -c 1 -W 1 $2 &>/dev/null return $? } check_vm_connectivity() { - run_cmd "ip netns exec vm-11 ping -c 1 -W 1 10.0.10.12" + run_cmd "ip netns exec $vm_11 ping -c 1 -W 1 10.0.10.12" log_test $? 0 "VM connectivity over $1 (ipv4 default rdst)" - run_cmd "ip netns exec vm-21 ping -c 1 -W 1 10.0.10.22" + run_cmd "ip netns exec $vm_21 ping -c 1 -W 1 10.0.10.22" log_test $? 0 "VM connectivity over $1 (ipv6 default rdst)" } @@ -167,26 +165,23 @@ cleanup() { ip link del veth-hv-1 2>/dev/null || true ip link del vethhv-11 vethhv-12 vethhv-21 vethhv-22 2>/dev/null || true - for ns in hv-1 hv-2 vm-11 vm-21 vm-12 vm-22 vm-31 vm-32; do - ip netns del $ns 2>/dev/null || true - done + cleanup_ns $hv_1 $hv_2 $vm_11 $vm_21 $vm_12 $vm_22 $vm_31 $vm_32 } trap cleanup EXIT setup-hv-networking() { - hv=$1 + id=$1 local1=$2 mask1=$3 local2=$4 mask2=$5 - ip netns add hv-$hv - ip link set veth-hv-$hv netns hv-$hv - ip -netns hv-$hv link set veth-hv-$hv name veth0 - ip -netns hv-$hv addr add $local1/$mask1 dev veth0 - ip -netns hv-$hv addr add $local2/$mask2 dev veth0 - ip -netns hv-$hv link set veth0 up + ip link set veth-hv-$id netns ${hv[$id]} + ip -netns ${hv[$id]} link set veth-hv-$id name veth0 + ip -netns ${hv[$id]} addr add $local1/$mask1 dev veth0 + ip -netns ${hv[$id]} addr add $local2/$mask2 dev veth0 + ip -netns ${hv[$id]} link set veth0 up } # Setups a "VM" simulated by a netns an a veth pair @@ -208,21 +203,20 @@ setup-vm() { lastvxlandev="" # create bridge - ip -netns hv-$hvid link add br$brid type bridge vlan_filtering 1 vlan_default_pvid 0 \ + ip -netns ${hv[$hvid]} link add br$brid type bridge vlan_filtering 1 vlan_default_pvid 0 \ mcast_snooping 0 - ip -netns hv-$hvid link set br$brid up + ip -netns ${hv[$hvid]} link set br$brid up # create vm namespace and interfaces and connect to hypervisor # namespace - ip netns add vm-$vmid hvvethif="vethhv-$vmid" vmvethif="veth-$vmid" ip link add $hvvethif type veth peer name $vmvethif - ip link set $hvvethif netns hv-$hvid - ip link set $vmvethif netns vm-$vmid - ip -netns hv-$hvid link set $hvvethif up - ip -netns vm-$vmid link set $vmvethif up - ip -netns hv-$hvid link set $hvvethif master br$brid + ip link set $hvvethif netns ${hv[$hvid]} + ip link set $vmvethif netns ${vm[$vmid]} + ip -netns ${hv[$hvid]} link set $hvvethif up + ip -netns ${vm[$vmid]} link set $vmvethif up + ip -netns ${hv[$hvid]} link set $hvvethif master br$brid # configure VM vlan/vni filtering on hypervisor for vmap in $(echo $vattrs | cut -d "," -f1- --output-delimiter=' ') @@ -234,9 +228,9 @@ setup-vm() { local vtype=$(echo $vmap | awk -F'-' '{print ($5)}') local port=$(echo $vmap | awk -F'-' '{print ($6)}') - ip -netns vm-$vmid link add name $vmvethif.$vid link $vmvethif type vlan id $vid - ip -netns vm-$vmid addr add 10.0.$vid.$vmid/24 dev $vmvethif.$vid - ip -netns vm-$vmid link set $vmvethif.$vid up + ip -netns ${vm[$vmid]} link add name $vmvethif.$vid link $vmvethif type vlan id $vid + ip -netns ${vm[$vmid]} addr add 10.0.$vid.$vmid/24 dev $vmvethif.$vid + ip -netns ${vm[$vmid]} link set $vmvethif.$vid up tid=$vid vxlandev="vxlan$brid" @@ -268,35 +262,35 @@ setup-vm() { # create vxlan device if [ "$vxlandev" != "$lastvxlandev" ]; then - ip -netns hv-$hvid link add $vxlandev type vxlan local $localip $vxlandevflags dev veth0 2>/dev/null - ip -netns hv-$hvid link set $vxlandev master br$brid - ip -netns hv-$hvid link set $vxlandev up + ip -netns ${hv[$hvid]} link add $vxlandev type vxlan local $localip $vxlandevflags dev veth0 2>/dev/null + ip -netns ${hv[$hvid]} link set $vxlandev master br$brid + ip -netns ${hv[$hvid]} link set $vxlandev up lastvxlandev=$vxlandev fi # add vlan - bridge -netns hv-$hvid vlan add vid $vid dev $hvvethif - bridge -netns hv-$hvid vlan add vid $vid pvid dev $vxlandev + bridge -netns ${hv[$hvid]} vlan add vid $vid dev $hvvethif + bridge -netns ${hv[$hvid]} vlan add vid $vid pvid dev $vxlandev # Add bridge vni filter for tx if [[ -n $vtype && $vtype == "metadata" || $vtype == "vnifilter" || $vtype == "vnifilterg" ]]; then - bridge -netns hv-$hvid link set dev $vxlandev vlan_tunnel on - bridge -netns hv-$hvid vlan add dev $vxlandev vid $vid tunnel_info id $tid + bridge -netns ${hv[$hvid]} link set dev $vxlandev vlan_tunnel on + bridge -netns ${hv[$hvid]} vlan add dev $vxlandev vid $vid tunnel_info id $tid fi if [[ -n $vtype && $vtype == "metadata" ]]; then - bridge -netns hv-$hvid fdb add 00:00:00:00:00:00 dev $vxlandev \ + bridge -netns ${hv[$hvid]} fdb add 00:00:00:00:00:00 dev $vxlandev \ src_vni $tid vni $tid dst $group self elif [[ -n $vtype && $vtype == "vnifilter" ]]; then # Add per vni rx filter with 'bridge vni' api - bridge -netns hv-$hvid vni add dev $vxlandev vni $tid + bridge -netns ${hv[$hvid]} vni add dev $vxlandev vni $tid elif [[ -n $vtype && $vtype == "vnifilterg" ]]; then # Add per vni group config with 'bridge vni' api if [ -n "$group" ]; then if [ $mcast -eq 1 ]; then - bridge -netns hv-$hvid vni add dev $vxlandev vni $tid group $group + bridge -netns ${hv[$hvid]} vni add dev $vxlandev vni $tid group $group else - bridge -netns hv-$hvid vni add dev $vxlandev vni $tid remote $group + bridge -netns ${hv[$hvid]} vni add dev $vxlandev vni $tid remote $group fi fi fi @@ -306,14 +300,14 @@ setup-vm() { setup_vnifilter_api() { ip link add veth-host type veth peer name veth-testns - ip netns add testns - ip link set veth-testns netns testns + setup_ns testns + ip link set veth-testns netns $testns } cleanup_vnifilter_api() { ip link del veth-host 2>/dev/null || true - ip netns del testns 2>/dev/null || true + ip netns del $testns 2>/dev/null || true } # tests vxlan filtering api @@ -331,52 +325,52 @@ vxlan_vnifilter_api() # Duplicate vni test # create non-vnifiltering traditional vni device - run_cmd "ip -netns testns link add vxlan100 type vxlan id 100 local $localip dev veth-testns dstport 4789" + run_cmd "ip -netns $testns link add vxlan100 type vxlan id 100 local $localip dev veth-testns dstport 4789" log_test $? 0 "Create traditional vxlan device" # create vni filtering device - run_cmd "ip -netns testns link add vxlan-ext1 type vxlan vnifilter local $localip dev veth-testns dstport 4789" + run_cmd "ip -netns $testns link add vxlan-ext1 type vxlan vnifilter local $localip dev veth-testns dstport 4789" log_test $? 1 "Cannot create vnifilter device without external flag" - run_cmd "ip -netns testns link add vxlan-ext1 type vxlan external vnifilter local $localip dev veth-testns dstport 4789" + run_cmd "ip -netns $testns link add vxlan-ext1 type vxlan external vnifilter local $localip dev veth-testns dstport 4789" log_test $? 0 "Creating external vxlan device with vnifilter flag" - run_cmd "bridge -netns testns vni add dev vxlan-ext1 vni 100" + run_cmd "bridge -netns $testns vni add dev vxlan-ext1 vni 100" log_test $? 0 "Cannot set in-use vni id on vnifiltering device" - run_cmd "bridge -netns testns vni add dev vxlan-ext1 vni 200" + run_cmd "bridge -netns $testns vni add dev vxlan-ext1 vni 200" log_test $? 0 "Set new vni id on vnifiltering device" - run_cmd "ip -netns testns link add vxlan-ext2 type vxlan external vnifilter local $localip dev veth-testns dstport 4789" + run_cmd "ip -netns $testns link add vxlan-ext2 type vxlan external vnifilter local $localip dev veth-testns dstport 4789" log_test $? 0 "Create second external vxlan device with vnifilter flag" - run_cmd "bridge -netns testns vni add dev vxlan-ext2 vni 200" + run_cmd "bridge -netns $testns vni add dev vxlan-ext2 vni 200" log_test $? 255 "Cannot set in-use vni id on vnifiltering device" - run_cmd "bridge -netns testns vni add dev vxlan-ext2 vni 300" + run_cmd "bridge -netns $testns vni add dev vxlan-ext2 vni 300" log_test $? 0 "Set new vni id on vnifiltering device" # check in bridge vni show - run_cmd "bridge -netns testns vni add dev vxlan-ext2 vni 300" + run_cmd "bridge -netns $testns vni add dev vxlan-ext2 vni 300" log_test $? 0 "Update vni id on vnifiltering device" - run_cmd "bridge -netns testns vni add dev vxlan-ext2 vni 400" + run_cmd "bridge -netns $testns vni add dev vxlan-ext2 vni 400" log_test $? 0 "Add new vni id on vnifiltering device" # add multicast group per vni - run_cmd "bridge -netns testns vni add dev vxlan-ext1 vni 200 group $group" + run_cmd "bridge -netns $testns vni add dev vxlan-ext1 vni 200 group $group" log_test $? 0 "Set multicast group on existing vni" # add multicast group per vni - run_cmd "bridge -netns testns vni add dev vxlan-ext2 vni 300 group $group" + run_cmd "bridge -netns $testns vni add dev vxlan-ext2 vni 300 group $group" log_test $? 0 "Set multicast group on existing vni" # set vnifilter on an existing external vxlan device - run_cmd "ip -netns testns link set dev vxlan-ext1 type vxlan external vnifilter" + run_cmd "ip -netns $testns link set dev vxlan-ext1 type vxlan external vnifilter" log_test $? 2 "Cannot set vnifilter flag on a device" # change vxlan vnifilter flag - run_cmd "ip -netns testns link set dev vxlan-ext1 type vxlan external novnifilter" + run_cmd "ip -netns $testns link set dev vxlan-ext1 type vxlan external novnifilter" log_test $? 2 "Cannot unset vnifilter flag on a device" } @@ -390,12 +384,20 @@ vxlan_vnifilter_datapath() hv1addr2="2002:fee1::1" hv2addr2="2002:fee1::2" + setup_ns hv_1 hv_2 + hv[1]=$hv_1 + hv[2]=$hv_2 ip link add veth-hv-1 type veth peer name veth-hv-2 setup-hv-networking 1 $hv1addr1 24 $hv1addr2 64 $hv2addr1 $hv2addr2 setup-hv-networking 2 $hv2addr1 24 $hv2addr2 64 $hv1addr1 $hv1addr2 check_hv_connectivity hv2addr1 hv2addr2 + setup_ns vm_11 vm_21 vm_12 vm_22 + vm[11]=$vm_11 + vm[21]=$vm_21 + vm[12]=$vm_12 + vm[22]=$vm_22 setup-vm 1 11 1 10-v4-$hv1addr1-$hv2addr1-vnifilter,20-v4-$hv1addr1-$hv2addr1-vnifilter 0 setup-vm 1 21 2 10-v6-$hv1addr2-$hv2addr2-vnifilter,20-v6-$hv1addr2-$hv2addr2-vnifilter 0 @@ -415,12 +417,20 @@ vxlan_vnifilter_datapath_pervni() hv1addr2="2002:fee1::1" hv2addr2="2002:fee1::2" + setup_ns hv_1 hv_2 + hv[1]=$hv_1 + hv[2]=$hv_2 ip link add veth-hv-1 type veth peer name veth-hv-2 setup-hv-networking 1 $hv1addr1 24 $hv1addr2 64 setup-hv-networking 2 $hv2addr1 24 $hv2addr2 64 check_hv_connectivity hv2addr1 hv2addr2 + setup_ns vm_11 vm_21 vm_12 vm_22 + vm[11]=$vm_11 + vm[21]=$vm_21 + vm[12]=$vm_12 + vm[22]=$vm_22 setup-vm 1 11 1 10-v4-$hv1addr1-$hv2addr1-vnifilterg,20-v4-$hv1addr1-$hv2addr1-vnifilterg 0 setup-vm 1 21 2 10-v6-$hv1addr2-$hv2addr2-vnifilterg,20-v6-$hv1addr2-$hv2addr2-vnifilterg 0 @@ -440,12 +450,20 @@ vxlan_vnifilter_datapath_mgroup() group="239.1.1.100" group6="ff07::1" + setup_ns hv_1 hv_2 + hv[1]=$hv_1 + hv[2]=$hv_2 ip link add veth-hv-1 type veth peer name veth-hv-2 setup-hv-networking 1 $hv1addr1 24 $hv1addr2 64 setup-hv-networking 2 $hv2addr1 24 $hv2addr2 64 check_hv_connectivity hv2addr1 hv2addr2 + setup_ns vm_11 vm_21 vm_12 vm_22 + vm[11]=$vm_11 + vm[21]=$vm_21 + vm[12]=$vm_12 + vm[22]=$vm_22 setup-vm 1 11 1 10-v4-$hv1addr1-$group-vnifilter,20-v4-$hv1addr1-$group-vnifilter 1 setup-vm 1 21 2 "10-v6-$hv1addr2-$group6-vnifilter,20-v6-$hv1addr2-$group6-vnifilter" 1 @@ -464,12 +482,20 @@ vxlan_vnifilter_datapath_mgroup_pervni() group="239.1.1.100" group6="ff07::1" + setup_ns hv_1 hv_2 + hv[1]=$hv_1 + hv[2]=$hv_2 ip link add veth-hv-1 type veth peer name veth-hv-2 setup-hv-networking 1 $hv1addr1 24 $hv1addr2 64 setup-hv-networking 2 $hv2addr1 24 $hv2addr2 64 check_hv_connectivity hv2addr1 hv2addr2 + setup_ns vm_11 vm_21 vm_12 vm_22 + vm[11]=$vm_11 + vm[21]=$vm_21 + vm[12]=$vm_12 + vm[22]=$vm_22 setup-vm 1 11 1 10-v4-$hv1addr1-$group-vnifilterg,20-v4-$hv1addr1-$group-vnifilterg 1 setup-vm 1 21 2 10-v6-$hv1addr2-$group6-vnifilterg,20-v6-$hv1addr2-$group6-vnifilterg 1 @@ -486,12 +512,22 @@ vxlan_vnifilter_metadata_and_traditional_mix() hv1addr2="2002:fee1::1" hv2addr2="2002:fee1::2" + setup_ns hv_1 hv_2 + hv[1]=$hv_1 + hv[2]=$hv_2 ip link add veth-hv-1 type veth peer name veth-hv-2 setup-hv-networking 1 $hv1addr1 24 $hv1addr2 64 setup-hv-networking 2 $hv2addr1 24 $hv2addr2 64 check_hv_connectivity hv2addr1 hv2addr2 + setup_ns vm_11 vm_21 vm_31 vm_12 vm_22 vm_32 + vm[11]=$vm_11 + vm[21]=$vm_21 + vm[31]=$vm_31 + vm[12]=$vm_12 + vm[22]=$vm_22 + vm[32]=$vm_32 setup-vm 1 11 1 10-v4-$hv1addr1-$hv2addr1-vnifilter,20-v4-$hv1addr1-$hv2addr1-vnifilter 0 setup-vm 1 21 2 10-v6-$hv1addr2-$hv2addr2-vnifilter,20-v6-$hv1addr2-$hv2addr2-vnifilter 0 setup-vm 1 31 3 30-v4-$hv1addr1-$hv2addr1-default-4790,40-v6-$hv1addr2-$hv2addr2-default-4790,50-v4-$hv1addr1-$hv2addr1-metadata-4791 0 @@ -504,13 +540,13 @@ vxlan_vnifilter_metadata_and_traditional_mix() check_vm_connectivity "vnifiltering vxlan pervni remote mix" # check VM connectivity over traditional/non-vxlan filtering vxlan devices - run_cmd "ip netns exec vm-31 ping -c 1 -W 1 10.0.30.32" + run_cmd "ip netns exec $vm_31 ping -c 1 -W 1 10.0.30.32" log_test $? 0 "VM connectivity over traditional vxlan (ipv4 default rdst)" - run_cmd "ip netns exec vm-31 ping -c 1 -W 1 10.0.40.32" + run_cmd "ip netns exec $vm_31 ping -c 1 -W 1 10.0.40.32" log_test $? 0 "VM connectivity over traditional vxlan (ipv6 default rdst)" - run_cmd "ip netns exec vm-31 ping -c 1 -W 1 10.0.50.32" + run_cmd "ip netns exec $vm_31 ping -c 1 -W 1 10.0.50.32" log_test $? 0 "VM connectivity over metadata nonfiltering vxlan (ipv4 default rdst)" } -- cgit v1.2.3 From bedc99abcaf88df25b044fc4e3c80d69d0dbfc9b Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Wed, 6 Dec 2023 15:07:59 +0800 Subject: selftests/net: convert vrf_route_leaking.sh to run it in unique namespace Here is the test result after conversion. ]# ./vrf_route_leaking.sh ########################################################################### IPv4 (sym route): VRF ICMP ttl error route lookup ping ########################################################################### TEST: Basic IPv4 connectivity [ OK ] TEST: Ping received ICMP ttl exceeded [ OK ] ... TEST: Basic IPv6 connectivity [ OK ] TEST: Traceroute6 reports a hop on r1 [ OK ] Tests passed: 18 Tests failed: 0 Acked-by: David Ahern Signed-off-by: Hangbin Liu Signed-off-by: David S. Miller --- tools/testing/selftests/net/vrf_route_leaking.sh | 201 +++++++++++------------ 1 file changed, 96 insertions(+), 105 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/net/vrf_route_leaking.sh b/tools/testing/selftests/net/vrf_route_leaking.sh index dedc52562b4f..2da32f4c479b 100755 --- a/tools/testing/selftests/net/vrf_route_leaking.sh +++ b/tools/testing/selftests/net/vrf_route_leaking.sh @@ -58,6 +58,7 @@ # to send an ICMP error back to the source when the ttl of a packet reaches 1 # while it is forwarded between different vrfs. +source lib.sh VERBOSE=0 PAUSE_ON_FAIL=no DEFAULT_TTYPE=sym @@ -171,11 +172,7 @@ run_cmd_grep() cleanup() { - local ns - - for ns in h1 h2 r1 r2; do - ip netns del $ns 2>/dev/null - done + cleanup_ns $h1 $h2 $r1 $r2 } setup_vrf() @@ -212,72 +209,69 @@ setup_sym() # # create nodes as namespaces - # - for ns in h1 h2 r1; do - ip netns add $ns - ip -netns $ns link set lo up - - case "${ns}" in - h[12]) ip netns exec $ns sysctl -q -w net.ipv6.conf.all.forwarding=0 - ip netns exec $ns sysctl -q -w net.ipv6.conf.all.keep_addr_on_down=1 - ;; - r1) ip netns exec $ns sysctl -q -w net.ipv4.ip_forward=1 - ip netns exec $ns sysctl -q -w net.ipv6.conf.all.forwarding=1 - esac + setup_ns h1 h2 r1 + for ns in $h1 $h2 $r1; do + if echo $ns | grep -q h[12]-; then + ip netns exec $ns sysctl -q -w net.ipv6.conf.all.forwarding=0 + ip netns exec $ns sysctl -q -w net.ipv6.conf.all.keep_addr_on_down=1 + else + ip netns exec $ns sysctl -q -w net.ipv4.ip_forward=1 + ip netns exec $ns sysctl -q -w net.ipv6.conf.all.forwarding=1 + fi done # # create interconnects # - ip -netns h1 link add eth0 type veth peer name r1h1 - ip -netns h1 link set r1h1 netns r1 name eth0 up + ip -netns $h1 link add eth0 type veth peer name r1h1 + ip -netns $h1 link set r1h1 netns $r1 name eth0 up - ip -netns h2 link add eth0 type veth peer name r1h2 - ip -netns h2 link set r1h2 netns r1 name eth1 up + ip -netns $h2 link add eth0 type veth peer name r1h2 + ip -netns $h2 link set r1h2 netns $r1 name eth1 up # # h1 # - ip -netns h1 addr add dev eth0 ${H1_N1_IP}/24 - ip -netns h1 -6 addr add dev eth0 ${H1_N1_IP6}/64 nodad - ip -netns h1 link set eth0 up + ip -netns $h1 addr add dev eth0 ${H1_N1_IP}/24 + ip -netns $h1 -6 addr add dev eth0 ${H1_N1_IP6}/64 nodad + ip -netns $h1 link set eth0 up # h1 to h2 via r1 - ip -netns h1 route add ${H2_N2} via ${R1_N1_IP} dev eth0 - ip -netns h1 -6 route add ${H2_N2_6} via "${R1_N1_IP6}" dev eth0 + ip -netns $h1 route add ${H2_N2} via ${R1_N1_IP} dev eth0 + ip -netns $h1 -6 route add ${H2_N2_6} via "${R1_N1_IP6}" dev eth0 # # h2 # - ip -netns h2 addr add dev eth0 ${H2_N2_IP}/24 - ip -netns h2 -6 addr add dev eth0 ${H2_N2_IP6}/64 nodad - ip -netns h2 link set eth0 up + ip -netns $h2 addr add dev eth0 ${H2_N2_IP}/24 + ip -netns $h2 -6 addr add dev eth0 ${H2_N2_IP6}/64 nodad + ip -netns $h2 link set eth0 up # h2 to h1 via r1 - ip -netns h2 route add default via ${R1_N2_IP} dev eth0 - ip -netns h2 -6 route add default via ${R1_N2_IP6} dev eth0 + ip -netns $h2 route add default via ${R1_N2_IP} dev eth0 + ip -netns $h2 -6 route add default via ${R1_N2_IP6} dev eth0 # # r1 # - setup_vrf r1 - create_vrf r1 blue 1101 - create_vrf r1 red 1102 - ip -netns r1 link set mtu 1400 dev eth1 - ip -netns r1 link set eth0 vrf blue up - ip -netns r1 link set eth1 vrf red up - ip -netns r1 addr add dev eth0 ${R1_N1_IP}/24 - ip -netns r1 -6 addr add dev eth0 ${R1_N1_IP6}/64 nodad - ip -netns r1 addr add dev eth1 ${R1_N2_IP}/24 - ip -netns r1 -6 addr add dev eth1 ${R1_N2_IP6}/64 nodad + setup_vrf $r1 + create_vrf $r1 blue 1101 + create_vrf $r1 red 1102 + ip -netns $r1 link set mtu 1400 dev eth1 + ip -netns $r1 link set eth0 vrf blue up + ip -netns $r1 link set eth1 vrf red up + ip -netns $r1 addr add dev eth0 ${R1_N1_IP}/24 + ip -netns $r1 -6 addr add dev eth0 ${R1_N1_IP6}/64 nodad + ip -netns $r1 addr add dev eth1 ${R1_N2_IP}/24 + ip -netns $r1 -6 addr add dev eth1 ${R1_N2_IP6}/64 nodad # Route leak from blue to red - ip -netns r1 route add vrf blue ${H2_N2} dev red - ip -netns r1 -6 route add vrf blue ${H2_N2_6} dev red + ip -netns $r1 route add vrf blue ${H2_N2} dev red + ip -netns $r1 -6 route add vrf blue ${H2_N2_6} dev red # Route leak from red to blue - ip -netns r1 route add vrf red ${H1_N1} dev blue - ip -netns r1 -6 route add vrf red ${H1_N1_6} dev blue + ip -netns $r1 route add vrf red ${H1_N1} dev blue + ip -netns $r1 -6 route add vrf red ${H1_N1_6} dev blue # Wait for ip config to settle @@ -293,90 +287,87 @@ setup_asym() # # create nodes as namespaces - # - for ns in h1 h2 r1 r2; do - ip netns add $ns - ip -netns $ns link set lo up - - case "${ns}" in - h[12]) ip netns exec $ns sysctl -q -w net.ipv6.conf.all.forwarding=0 - ip netns exec $ns sysctl -q -w net.ipv6.conf.all.keep_addr_on_down=1 - ;; - r[12]) ip netns exec $ns sysctl -q -w net.ipv4.ip_forward=1 - ip netns exec $ns sysctl -q -w net.ipv6.conf.all.forwarding=1 - esac + setup_ns h1 h2 r1 r2 + for ns in $h1 $h2 $r1 $r2; do + if echo $ns | grep -q h[12]-; then + ip netns exec $ns sysctl -q -w net.ipv6.conf.all.forwarding=0 + ip netns exec $ns sysctl -q -w net.ipv6.conf.all.keep_addr_on_down=1 + else + ip netns exec $ns sysctl -q -w net.ipv4.ip_forward=1 + ip netns exec $ns sysctl -q -w net.ipv6.conf.all.forwarding=1 + fi done # # create interconnects # - ip -netns h1 link add eth0 type veth peer name r1h1 - ip -netns h1 link set r1h1 netns r1 name eth0 up + ip -netns $h1 link add eth0 type veth peer name r1h1 + ip -netns $h1 link set r1h1 netns $r1 name eth0 up - ip -netns h1 link add eth1 type veth peer name r2h1 - ip -netns h1 link set r2h1 netns r2 name eth0 up + ip -netns $h1 link add eth1 type veth peer name r2h1 + ip -netns $h1 link set r2h1 netns $r2 name eth0 up - ip -netns h2 link add eth0 type veth peer name r1h2 - ip -netns h2 link set r1h2 netns r1 name eth1 up + ip -netns $h2 link add eth0 type veth peer name r1h2 + ip -netns $h2 link set r1h2 netns $r1 name eth1 up - ip -netns h2 link add eth1 type veth peer name r2h2 - ip -netns h2 link set r2h2 netns r2 name eth1 up + ip -netns $h2 link add eth1 type veth peer name r2h2 + ip -netns $h2 link set r2h2 netns $r2 name eth1 up # # h1 # - ip -netns h1 link add br0 type bridge - ip -netns h1 link set br0 up - ip -netns h1 addr add dev br0 ${H1_N1_IP}/24 - ip -netns h1 -6 addr add dev br0 ${H1_N1_IP6}/64 nodad - ip -netns h1 link set eth0 master br0 up - ip -netns h1 link set eth1 master br0 up + ip -netns $h1 link add br0 type bridge + ip -netns $h1 link set br0 up + ip -netns $h1 addr add dev br0 ${H1_N1_IP}/24 + ip -netns $h1 -6 addr add dev br0 ${H1_N1_IP6}/64 nodad + ip -netns $h1 link set eth0 master br0 up + ip -netns $h1 link set eth1 master br0 up # h1 to h2 via r1 - ip -netns h1 route add ${H2_N2} via ${R1_N1_IP} dev br0 - ip -netns h1 -6 route add ${H2_N2_6} via "${R1_N1_IP6}" dev br0 + ip -netns $h1 route add ${H2_N2} via ${R1_N1_IP} dev br0 + ip -netns $h1 -6 route add ${H2_N2_6} via "${R1_N1_IP6}" dev br0 # # h2 # - ip -netns h2 link add br0 type bridge - ip -netns h2 link set br0 up - ip -netns h2 addr add dev br0 ${H2_N2_IP}/24 - ip -netns h2 -6 addr add dev br0 ${H2_N2_IP6}/64 nodad - ip -netns h2 link set eth0 master br0 up - ip -netns h2 link set eth1 master br0 up + ip -netns $h2 link add br0 type bridge + ip -netns $h2 link set br0 up + ip -netns $h2 addr add dev br0 ${H2_N2_IP}/24 + ip -netns $h2 -6 addr add dev br0 ${H2_N2_IP6}/64 nodad + ip -netns $h2 link set eth0 master br0 up + ip -netns $h2 link set eth1 master br0 up # h2 to h1 via r2 - ip -netns h2 route add default via ${R2_N2_IP} dev br0 - ip -netns h2 -6 route add default via ${R2_N2_IP6} dev br0 + ip -netns $h2 route add default via ${R2_N2_IP} dev br0 + ip -netns $h2 -6 route add default via ${R2_N2_IP6} dev br0 # # r1 # - setup_vrf r1 - create_vrf r1 blue 1101 - create_vrf r1 red 1102 - ip -netns r1 link set mtu 1400 dev eth1 - ip -netns r1 link set eth0 vrf blue up - ip -netns r1 link set eth1 vrf red up - ip -netns r1 addr add dev eth0 ${R1_N1_IP}/24 - ip -netns r1 -6 addr add dev eth0 ${R1_N1_IP6}/64 nodad - ip -netns r1 addr add dev eth1 ${R1_N2_IP}/24 - ip -netns r1 -6 addr add dev eth1 ${R1_N2_IP6}/64 nodad + setup_vrf $r1 + create_vrf $r1 blue 1101 + create_vrf $r1 red 1102 + ip -netns $r1 link set mtu 1400 dev eth1 + ip -netns $r1 link set eth0 vrf blue up + ip -netns $r1 link set eth1 vrf red up + ip -netns $r1 addr add dev eth0 ${R1_N1_IP}/24 + ip -netns $r1 -6 addr add dev eth0 ${R1_N1_IP6}/64 nodad + ip -netns $r1 addr add dev eth1 ${R1_N2_IP}/24 + ip -netns $r1 -6 addr add dev eth1 ${R1_N2_IP6}/64 nodad # Route leak from blue to red - ip -netns r1 route add vrf blue ${H2_N2} dev red - ip -netns r1 -6 route add vrf blue ${H2_N2_6} dev red + ip -netns $r1 route add vrf blue ${H2_N2} dev red + ip -netns $r1 -6 route add vrf blue ${H2_N2_6} dev red # No route leak from red to blue # # r2 # - ip -netns r2 addr add dev eth0 ${R2_N1_IP}/24 - ip -netns r2 -6 addr add dev eth0 ${R2_N1_IP6}/64 nodad - ip -netns r2 addr add dev eth1 ${R2_N2_IP}/24 - ip -netns r2 -6 addr add dev eth1 ${R2_N2_IP6}/64 nodad + ip -netns $r2 addr add dev eth0 ${R2_N1_IP}/24 + ip -netns $r2 -6 addr add dev eth0 ${R2_N1_IP6}/64 nodad + ip -netns $r2 addr add dev eth1 ${R2_N2_IP}/24 + ip -netns $r2 -6 addr add dev eth1 ${R2_N2_IP6}/64 nodad # Wait for ip config to settle sleep 2 @@ -384,14 +375,14 @@ setup_asym() check_connectivity() { - ip netns exec h1 ping -c1 -w1 ${H2_N2_IP} >/dev/null 2>&1 + ip netns exec $h1 ping -c1 -w1 ${H2_N2_IP} >/dev/null 2>&1 log_test $? 0 "Basic IPv4 connectivity" return $? } check_connectivity6() { - ip netns exec h1 "${ping6}" -c1 -w1 ${H2_N2_IP6} >/dev/null 2>&1 + ip netns exec $h1 "${ping6}" -c1 -w1 ${H2_N2_IP6} >/dev/null 2>&1 log_test $? 0 "Basic IPv6 connectivity" return $? } @@ -426,7 +417,7 @@ ipv4_traceroute() check_connectivity || return - run_cmd_grep "${R1_N1_IP}" ip netns exec h1 traceroute ${H2_N2_IP} + run_cmd_grep "${R1_N1_IP}" ip netns exec $h1 traceroute ${H2_N2_IP} log_test $? 0 "Traceroute reports a hop on r1" } @@ -449,7 +440,7 @@ ipv6_traceroute() check_connectivity6 || return - run_cmd_grep "${R1_N1_IP6}" ip netns exec h1 traceroute6 ${H2_N2_IP6} + run_cmd_grep "${R1_N1_IP6}" ip netns exec $h1 traceroute6 ${H2_N2_IP6} log_test $? 0 "Traceroute6 reports a hop on r1" } @@ -470,7 +461,7 @@ ipv4_ping_ttl() check_connectivity || return - run_cmd_grep "Time to live exceeded" ip netns exec h1 ping -t1 -c1 -W2 ${H2_N2_IP} + run_cmd_grep "Time to live exceeded" ip netns exec $h1 ping -t1 -c1 -W2 ${H2_N2_IP} log_test $? 0 "Ping received ICMP ttl exceeded" } @@ -491,7 +482,7 @@ ipv4_ping_frag() check_connectivity || return - run_cmd_grep "Frag needed" ip netns exec h1 ping -s 1450 -Mdo -c1 -W2 ${H2_N2_IP} + run_cmd_grep "Frag needed" ip netns exec $h1 ping -s 1450 -Mdo -c1 -W2 ${H2_N2_IP} log_test $? 0 "Ping received ICMP Frag needed" } @@ -512,7 +503,7 @@ ipv6_ping_ttl() check_connectivity6 || return - run_cmd_grep "Time exceeded: Hop limit" ip netns exec h1 "${ping6}" -t1 -c1 -W2 ${H2_N2_IP6} + run_cmd_grep "Time exceeded: Hop limit" ip netns exec $h1 "${ping6}" -t1 -c1 -W2 ${H2_N2_IP6} log_test $? 0 "Ping received ICMP Hop limit" } @@ -533,7 +524,7 @@ ipv6_ping_frag() check_connectivity6 || return - run_cmd_grep "Packet too big" ip netns exec h1 "${ping6}" -s 1450 -Mdo -c1 -W2 ${H2_N2_IP6} + run_cmd_grep "Packet too big" ip netns exec $h1 "${ping6}" -s 1450 -Mdo -c1 -W2 ${H2_N2_IP6} log_test $? 0 "Ping received ICMP Packet too big" } -- cgit v1.2.3 From 51f64acbe36e13e113d284fcdefc751216984c01 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Wed, 6 Dec 2023 15:08:00 +0800 Subject: selftests/net: convert vrf_strict_mode_test.sh to run it in unique namespace Here is the test result after conversion. ]# ./vrf_strict_mode_test.sh ################################################################################ TEST SECTION: VRF strict_mode test on init network namespace ################################################################################ TEST: init: net.vrf.strict_mode is available [ OK ] TEST: init: strict_mode=0 by default, 0 vrfs [ OK ] ... TEST: init: check strict_mode=1 [ OK ] TEST: testns-HvoZkB: check strict_mode=0 [ OK ] Tests passed: 37 Tests failed: 0 Acked-by: David Ahern Signed-off-by: Hangbin Liu Signed-off-by: David S. Miller --- .../testing/selftests/net/vrf_strict_mode_test.sh | 47 ++++++++++------------ 1 file changed, 22 insertions(+), 25 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/net/vrf_strict_mode_test.sh b/tools/testing/selftests/net/vrf_strict_mode_test.sh index 417d214264f3..01552b542544 100755 --- a/tools/testing/selftests/net/vrf_strict_mode_test.sh +++ b/tools/testing/selftests/net/vrf_strict_mode_test.sh @@ -3,9 +3,7 @@ # This test is designed for testing the new VRF strict_mode functionality. -# Kselftest framework requirement - SKIP code is 4. -ksft_skip=4 - +source lib.sh ret=0 # identifies the "init" network namespace which is often called root network @@ -247,13 +245,12 @@ setup() { modprobe vrf - ip netns add testns - ip netns exec testns ip link set lo up + setup_ns testns } cleanup() { - ip netns del testns 2>/dev/null + ip netns del $testns 2>/dev/null ip link del vrf100 2>/dev/null ip link del vrf101 2>/dev/null @@ -298,28 +295,28 @@ vrf_strict_mode_tests_testns() { log_section "VRF strict_mode test on testns network namespace" - vrf_strict_mode_check_support testns + vrf_strict_mode_check_support $testns - strict_mode_check_default testns + strict_mode_check_default $testns - enable_strict_mode_and_check testns + enable_strict_mode_and_check $testns - add_vrf_and_check testns vrf100 100 - config_vrf_and_check testns 10.0.100.1/24 vrf100 + add_vrf_and_check $testns vrf100 100 + config_vrf_and_check $testns 10.0.100.1/24 vrf100 - add_vrf_and_check_fail testns vrf101 100 + add_vrf_and_check_fail $testns vrf101 100 - add_vrf_and_check_fail testns vrf102 100 + add_vrf_and_check_fail $testns vrf102 100 - add_vrf_and_check testns vrf200 200 + add_vrf_and_check $testns vrf200 200 - disable_strict_mode_and_check testns + disable_strict_mode_and_check $testns - add_vrf_and_check testns vrf101 100 + add_vrf_and_check $testns vrf101 100 - add_vrf_and_check testns vrf102 100 + add_vrf_and_check $testns vrf102 100 - #the strict_mode is disabled in the testns + #the strict_mode is disabled in the $testns } vrf_strict_mode_tests_mix() @@ -328,25 +325,25 @@ vrf_strict_mode_tests_mix() read_strict_mode_compare_and_check init 1 - read_strict_mode_compare_and_check testns 0 + read_strict_mode_compare_and_check $testns 0 - del_vrf_and_check testns vrf101 + del_vrf_and_check $testns vrf101 - del_vrf_and_check testns vrf102 + del_vrf_and_check $testns vrf102 disable_strict_mode_and_check init - enable_strict_mode_and_check testns + enable_strict_mode_and_check $testns enable_strict_mode_and_check init enable_strict_mode_and_check init - disable_strict_mode_and_check testns - disable_strict_mode_and_check testns + disable_strict_mode_and_check $testns + disable_strict_mode_and_check $testns read_strict_mode_compare_and_check init 1 - read_strict_mode_compare_and_check testns 0 + read_strict_mode_compare_and_check $testns 0 } ################################################################################ -- cgit v1.2.3 From 61b12ebe439adfd9853c13499c2099e2a6d41771 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Wed, 6 Dec 2023 15:08:01 +0800 Subject: selftests/net: convert vrf-xfrm-tests.sh to run it in unique namespace Here is the test result after conversion. ]# ./vrf-xfrm-tests.sh No qdisc on VRF device TEST: IPv4 no xfrm policy [ OK ] TEST: IPv6 no xfrm policy [ OK ] TEST: IPv4 xfrm policy based on address [ OK ] TEST: IPv6 xfrm policy based on address [ OK ] TEST: IPv6 xfrm policy with VRF in selector [ OK ] TEST: IPv4 xfrm policy with xfrm device [ OK ] TEST: IPv6 xfrm policy with xfrm device [ OK ] netem qdisc on VRF device TEST: IPv4 no xfrm policy [ OK ] TEST: IPv6 no xfrm policy [ OK ] TEST: IPv4 xfrm policy based on address [ OK ] TEST: IPv6 xfrm policy based on address [ OK ] TEST: IPv6 xfrm policy with VRF in selector [ OK ] TEST: IPv4 xfrm policy with xfrm device [ OK ] TEST: IPv6 xfrm policy with xfrm device [ OK ] Tests passed: 14 Tests failed: 0 Acked-by: David Ahern Signed-off-by: Hangbin Liu Signed-off-by: David S. Miller --- tools/testing/selftests/net/vrf-xfrm-tests.sh | 77 +++++++++++++-------------- 1 file changed, 36 insertions(+), 41 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/net/vrf-xfrm-tests.sh b/tools/testing/selftests/net/vrf-xfrm-tests.sh index 452638ae8aed..b64dd891699d 100755 --- a/tools/testing/selftests/net/vrf-xfrm-tests.sh +++ b/tools/testing/selftests/net/vrf-xfrm-tests.sh @@ -3,9 +3,7 @@ # # Various combinations of VRF with xfrms and qdisc. -# Kselftest framework requirement - SKIP code is 4. -ksft_skip=4 - +source lib.sh PAUSE_ON_FAIL=no VERBOSE=0 ret=0 @@ -67,7 +65,7 @@ run_cmd_host1() printf " COMMAND: $cmd\n" fi - out=$(eval ip netns exec host1 $cmd 2>&1) + out=$(eval ip netns exec $host1 $cmd 2>&1) rc=$? if [ "$VERBOSE" = "1" ]; then if [ -n "$out" ]; then @@ -116,9 +114,6 @@ create_ns() [ -z "${addr}" ] && addr="-" [ -z "${addr6}" ] && addr6="-" - ip netns add ${ns} - - ip -netns ${ns} link set lo up if [ "${addr}" != "-" ]; then ip -netns ${ns} addr add dev lo ${addr} fi @@ -177,25 +172,25 @@ connect_ns() cleanup() { - ip netns del host1 - ip netns del host2 + cleanup_ns $host1 $host2 } setup() { - create_ns "host1" - create_ns "host2" + setup_ns host1 host2 + create_ns "$host1" + create_ns "$host2" - connect_ns "host1" eth0 ${HOST1_4}/24 ${HOST1_6}/64 \ - "host2" eth0 ${HOST2_4}/24 ${HOST2_6}/64 + connect_ns "$host1" eth0 ${HOST1_4}/24 ${HOST1_6}/64 \ + "$host2" eth0 ${HOST2_4}/24 ${HOST2_6}/64 - create_vrf "host1" ${VRF} ${TABLE} - ip -netns host1 link set dev eth0 master ${VRF} + create_vrf "$host1" ${VRF} ${TABLE} + ip -netns $host1 link set dev eth0 master ${VRF} } cleanup_xfrm() { - for ns in host1 host2 + for ns in $host1 $host2 do for x in state policy do @@ -218,57 +213,57 @@ setup_xfrm() # # host1 - IPv4 out - ip -netns host1 xfrm policy add \ + ip -netns $host1 xfrm policy add \ src ${h1_4} dst ${h2_4} ${devarg} dir out \ tmpl src ${HOST1_4} dst ${HOST2_4} proto esp mode tunnel # host2 - IPv4 in - ip -netns host2 xfrm policy add \ + ip -netns $host2 xfrm policy add \ src ${h1_4} dst ${h2_4} dir in \ tmpl src ${HOST1_4} dst ${HOST2_4} proto esp mode tunnel # host1 - IPv4 in - ip -netns host1 xfrm policy add \ + ip -netns $host1 xfrm policy add \ src ${h2_4} dst ${h1_4} ${devarg} dir in \ tmpl src ${HOST2_4} dst ${HOST1_4} proto esp mode tunnel # host2 - IPv4 out - ip -netns host2 xfrm policy add \ + ip -netns $host2 xfrm policy add \ src ${h2_4} dst ${h1_4} dir out \ tmpl src ${HOST2_4} dst ${HOST1_4} proto esp mode tunnel # host1 - IPv6 out - ip -6 -netns host1 xfrm policy add \ + ip -6 -netns $host1 xfrm policy add \ src ${h1_6} dst ${h2_6} ${devarg} dir out \ tmpl src ${HOST1_6} dst ${HOST2_6} proto esp mode tunnel # host2 - IPv6 in - ip -6 -netns host2 xfrm policy add \ + ip -6 -netns $host2 xfrm policy add \ src ${h1_6} dst ${h2_6} dir in \ tmpl src ${HOST1_6} dst ${HOST2_6} proto esp mode tunnel # host1 - IPv6 in - ip -6 -netns host1 xfrm policy add \ + ip -6 -netns $host1 xfrm policy add \ src ${h2_6} dst ${h1_6} ${devarg} dir in \ tmpl src ${HOST2_6} dst ${HOST1_6} proto esp mode tunnel # host2 - IPv6 out - ip -6 -netns host2 xfrm policy add \ + ip -6 -netns $host2 xfrm policy add \ src ${h2_6} dst ${h1_6} dir out \ tmpl src ${HOST2_6} dst ${HOST1_6} proto esp mode tunnel # # state # - ip -netns host1 xfrm state add src ${HOST1_4} dst ${HOST2_4} \ + ip -netns $host1 xfrm state add src ${HOST1_4} dst ${HOST2_4} \ proto esp spi ${SPI_1} reqid 0 mode tunnel \ replay-window 4 replay-oseq 0x4 \ auth-trunc 'hmac(sha1)' ${AUTH_1} 96 \ enc 'cbc(aes)' ${ENC_1} \ sel src ${h1_4} dst ${h2_4} ${devarg} - ip -netns host2 xfrm state add src ${HOST1_4} dst ${HOST2_4} \ + ip -netns $host2 xfrm state add src ${HOST1_4} dst ${HOST2_4} \ proto esp spi ${SPI_1} reqid 0 mode tunnel \ replay-window 4 replay-oseq 0x4 \ auth-trunc 'hmac(sha1)' ${AUTH_1} 96 \ @@ -276,14 +271,14 @@ setup_xfrm() sel src ${h1_4} dst ${h2_4} - ip -netns host1 xfrm state add src ${HOST2_4} dst ${HOST1_4} \ + ip -netns $host1 xfrm state add src ${HOST2_4} dst ${HOST1_4} \ proto esp spi ${SPI_2} reqid 0 mode tunnel \ replay-window 4 replay-oseq 0x4 \ auth-trunc 'hmac(sha1)' ${AUTH_2} 96 \ enc 'cbc(aes)' ${ENC_2} \ sel src ${h2_4} dst ${h1_4} ${devarg} - ip -netns host2 xfrm state add src ${HOST2_4} dst ${HOST1_4} \ + ip -netns $host2 xfrm state add src ${HOST2_4} dst ${HOST1_4} \ proto esp spi ${SPI_2} reqid 0 mode tunnel \ replay-window 4 replay-oseq 0x4 \ auth-trunc 'hmac(sha1)' ${AUTH_2} 96 \ @@ -291,14 +286,14 @@ setup_xfrm() sel src ${h2_4} dst ${h1_4} - ip -6 -netns host1 xfrm state add src ${HOST1_6} dst ${HOST2_6} \ + ip -6 -netns $host1 xfrm state add src ${HOST1_6} dst ${HOST2_6} \ proto esp spi ${SPI_1} reqid 0 mode tunnel \ replay-window 4 replay-oseq 0x4 \ auth-trunc 'hmac(sha1)' ${AUTH_1} 96 \ enc 'cbc(aes)' ${ENC_1} \ sel src ${h1_6} dst ${h2_6} ${devarg} - ip -6 -netns host2 xfrm state add src ${HOST1_6} dst ${HOST2_6} \ + ip -6 -netns $host2 xfrm state add src ${HOST1_6} dst ${HOST2_6} \ proto esp spi ${SPI_1} reqid 0 mode tunnel \ replay-window 4 replay-oseq 0x4 \ auth-trunc 'hmac(sha1)' ${AUTH_1} 96 \ @@ -306,14 +301,14 @@ setup_xfrm() sel src ${h1_6} dst ${h2_6} - ip -6 -netns host1 xfrm state add src ${HOST2_6} dst ${HOST1_6} \ + ip -6 -netns $host1 xfrm state add src ${HOST2_6} dst ${HOST1_6} \ proto esp spi ${SPI_2} reqid 0 mode tunnel \ replay-window 4 replay-oseq 0x4 \ auth-trunc 'hmac(sha1)' ${AUTH_2} 96 \ enc 'cbc(aes)' ${ENC_2} \ sel src ${h2_6} dst ${h1_6} ${devarg} - ip -6 -netns host2 xfrm state add src ${HOST2_6} dst ${HOST1_6} \ + ip -6 -netns $host2 xfrm state add src ${HOST2_6} dst ${HOST1_6} \ proto esp spi ${SPI_2} reqid 0 mode tunnel \ replay-window 4 replay-oseq 0x4 \ auth-trunc 'hmac(sha1)' ${AUTH_2} 96 \ @@ -323,22 +318,22 @@ setup_xfrm() cleanup_xfrm_dev() { - ip -netns host1 li del xfrm0 - ip -netns host2 addr del ${XFRM2_4}/24 dev eth0 - ip -netns host2 addr del ${XFRM2_6}/64 dev eth0 + ip -netns $host1 li del xfrm0 + ip -netns $host2 addr del ${XFRM2_4}/24 dev eth0 + ip -netns $host2 addr del ${XFRM2_6}/64 dev eth0 } setup_xfrm_dev() { local vrfarg="vrf ${VRF}" - ip -netns host1 li add type xfrm dev eth0 if_id ${IF_ID} - ip -netns host1 li set xfrm0 ${vrfarg} up - ip -netns host1 addr add ${XFRM1_4}/24 dev xfrm0 - ip -netns host1 addr add ${XFRM1_6}/64 dev xfrm0 + ip -netns $host1 li add type xfrm dev eth0 if_id ${IF_ID} + ip -netns $host1 li set xfrm0 ${vrfarg} up + ip -netns $host1 addr add ${XFRM1_4}/24 dev xfrm0 + ip -netns $host1 addr add ${XFRM1_6}/64 dev xfrm0 - ip -netns host2 addr add ${XFRM2_4}/24 dev eth0 - ip -netns host2 addr add ${XFRM2_6}/64 dev eth0 + ip -netns $host2 addr add ${XFRM2_4}/24 dev eth0 + ip -netns $host2 addr add ${XFRM2_6}/64 dev eth0 setup_xfrm ${XFRM1_4} ${XFRM2_4} ${XFRM1_6} ${XFRM2_6} "if_id ${IF_ID}" } -- cgit v1.2.3 From 8b7b0e5fe47de90ba6c350f9abece589fb637f79 Mon Sep 17 00:00:00 2001 From: David Vernet Date: Fri, 8 Dec 2023 00:17:03 -0600 Subject: bpf: Load vmlinux btf for any struct_ops map In libbpf, when determining whether we need to load vmlinux btf, we're currently (among other things) checking whether there is any struct_ops program present in the object. This works for most realistic struct_ops maps, as a struct_ops map is of course typically composed of one or more struct_ops programs. However, that technically need not be the case. A struct_ops interface could be defined which allows a map to be specified which one or more non-prog fields, and which provides default behavior if no struct_ops progs is actually provided otherwise. For sched_ext, for example, you technically only need to specify the name of the scheduler in the struct_ops map, with the core scheduler logic providing default behavior if no prog is actually specified. If we were to define and try to load such a struct_ops map, we would crash in libbpf when initializing it as obj->btf_vmlinux will be NULL: Reading symbols from minimal... (gdb) r Starting program: minimal_example [Thread debugging using libthread_db enabled] Using host libthread_db library "/usr/lib/libthread_db.so.1". Program received signal SIGSEGV, Segmentation fault. 0x000055555558308c in btf__type_cnt (btf=0x0) at btf.c:612 612 return btf->start_id + btf->nr_types; (gdb) bt type_name=0x5555555d99e3 "sched_ext_ops", kind=4) at btf.c:914 kind=4) at btf.c:942 type=0x7fffffffe558, type_id=0x7fffffffe548, ... data_member=0x7fffffffe568) at libbpf.c:948 kern_btf=0x0) at libbpf.c:1017 at libbpf.c:8059 So as to account for such bare-bones struct_ops maps, let's update obj_needs_vmlinux_btf() to also iterate over an obj's maps and check whether any of them are struct_ops maps. Signed-off-by: David Vernet Signed-off-by: Andrii Nakryiko Reviewed-by: Alan Maguire Link: https://lore.kernel.org/bpf/20231208061704.400463-1-void@manifault.com --- tools/lib/bpf/libbpf.c | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'tools') diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index ea9b8158c20d..ac54ebc0629f 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -3054,9 +3054,15 @@ static bool prog_needs_vmlinux_btf(struct bpf_program *prog) return false; } +static bool map_needs_vmlinux_btf(struct bpf_map *map) +{ + return bpf_map__is_struct_ops(map); +} + static bool obj_needs_vmlinux_btf(const struct bpf_object *obj) { struct bpf_program *prog; + struct bpf_map *map; int i; /* CO-RE relocations need kernel BTF, only when btf_custom_path @@ -3081,6 +3087,11 @@ static bool obj_needs_vmlinux_btf(const struct bpf_object *obj) return true; } + bpf_object__for_each_map(map, obj) { + if (map_needs_vmlinux_btf(map)) + return true; + } + return false; } -- cgit v1.2.3 From 79eba8c924f7decfa71ddf187d38cb9f5f2cd7b3 Mon Sep 17 00:00:00 2001 From: Jo Van Bulck Date: Thu, 5 Oct 2023 17:38:42 +0200 Subject: selftests/sgx: Fix uninitialized pointer dereference in error path Ensure ctx is zero-initialized, such that the encl_measure function will not call EVP_MD_CTX_destroy with an uninitialized ctx pointer in case of an early error during key generation. Fixes: 2adcba79e69d ("selftests/x86: Add a selftest for SGX") Signed-off-by: Jo Van Bulck Signed-off-by: Dave Hansen Reviewed-by: Jarkko Sakkinen Acked-by: Kai Huang Link: https://lore.kernel.org/all/20231005153854.25566-2-jo.vanbulck%40cs.kuleuven.be --- tools/testing/selftests/sgx/sigstruct.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/sgx/sigstruct.c b/tools/testing/selftests/sgx/sigstruct.c index a07896a46364..d73b29becf5b 100644 --- a/tools/testing/selftests/sgx/sigstruct.c +++ b/tools/testing/selftests/sgx/sigstruct.c @@ -318,9 +318,9 @@ bool encl_measure(struct encl *encl) struct sgx_sigstruct *sigstruct = &encl->sigstruct; struct sgx_sigstruct_payload payload; uint8_t digest[SHA256_DIGEST_LENGTH]; + EVP_MD_CTX *ctx = NULL; unsigned int siglen; RSA *key = NULL; - EVP_MD_CTX *ctx; int i; memset(sigstruct, 0, sizeof(*sigstruct)); @@ -384,7 +384,8 @@ bool encl_measure(struct encl *encl) return true; err: - EVP_MD_CTX_destroy(ctx); + if (ctx) + EVP_MD_CTX_destroy(ctx); RSA_free(key); return false; } -- cgit v1.2.3 From b84fc2e0139ba4b23b8039bd7cfd242894fe8f8b Mon Sep 17 00:00:00 2001 From: Jo Van Bulck Date: Thu, 5 Oct 2023 17:38:43 +0200 Subject: selftests/sgx: Fix uninitialized pointer dereferences in encl_get_entry Ensure sym_tab and sym_names are zero-initialized and add an early-out condition in the unlikely (erroneous) case that the enclave ELF file would not contain a symbol table. This addresses -Werror=maybe-uninitialized compiler warnings for gcc -O2. Fixes: 33c5aac3bf32 ("selftests/sgx: Test complete changing of page type flow") Signed-off-by: Jo Van Bulck Signed-off-by: Dave Hansen Reviewed-by: Jarkko Sakkinen Link: https://lore.kernel.org/all/20231005153854.25566-3-jo.vanbulck%40cs.kuleuven.be --- tools/testing/selftests/sgx/load.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/sgx/load.c b/tools/testing/selftests/sgx/load.c index 94bdeac1cf04..c9f658e44de6 100644 --- a/tools/testing/selftests/sgx/load.c +++ b/tools/testing/selftests/sgx/load.c @@ -136,11 +136,11 @@ static bool encl_ioc_add_pages(struct encl *encl, struct encl_segment *seg) */ uint64_t encl_get_entry(struct encl *encl, const char *symbol) { + Elf64_Sym *symtab = NULL; + char *sym_names = NULL; Elf64_Shdr *sections; - Elf64_Sym *symtab; Elf64_Ehdr *ehdr; - char *sym_names; - int num_sym; + int num_sym = 0; int i; ehdr = encl->bin; @@ -161,6 +161,9 @@ uint64_t encl_get_entry(struct encl *encl, const char *symbol) } } + if (!symtab || !sym_names) + return 0; + for (i = 0; i < num_sym; i++) { Elf64_Sym *sym = &symtab[i]; -- cgit v1.2.3 From 853a57a43ebdb8c024160c1a0990bae85f4bcc2f Mon Sep 17 00:00:00 2001 From: Jo Van Bulck Date: Thu, 5 Oct 2023 17:38:44 +0200 Subject: selftests/sgx: Include memory clobber for inline asm in test enclave Add the "memory" clobber to the EMODPE and EACCEPT asm blocks to tell the compiler the assembly code accesses to the secinfo struct. This ensures the compiler treats the asm block as a memory barrier and the write to secinfo will be visible to ENCLU. Fixes: 20404a808593 ("selftests/sgx: Add test for EPCM permission changes") Signed-off-by: Jo Van Bulck Signed-off-by: Dave Hansen Reviewed-by: Kai Huang Reviewed-by: Jarkko Sakkinen Link: https://lore.kernel.org/all/20231005153854.25566-4-jo.vanbulck%40cs.kuleuven.be --- tools/testing/selftests/sgx/test_encl.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/sgx/test_encl.c b/tools/testing/selftests/sgx/test_encl.c index c0d6397295e3..ae791df3e5a5 100644 --- a/tools/testing/selftests/sgx/test_encl.c +++ b/tools/testing/selftests/sgx/test_encl.c @@ -24,10 +24,11 @@ static void do_encl_emodpe(void *_op) secinfo.flags = op->flags; asm volatile(".byte 0x0f, 0x01, 0xd7" - : + : /* no outputs */ : "a" (EMODPE), "b" (&secinfo), - "c" (op->epc_addr)); + "c" (op->epc_addr) + : "memory" /* read from secinfo pointer */); } static void do_encl_eaccept(void *_op) @@ -42,7 +43,8 @@ static void do_encl_eaccept(void *_op) : "=a" (rax) : "a" (EACCEPT), "b" (&secinfo), - "c" (op->epc_addr)); + "c" (op->epc_addr) + : "memory" /* read from secinfo pointer */); op->ret = rax; } -- cgit v1.2.3 From f79464658d858e07af0c4c9b30f5baedeeae0c04 Mon Sep 17 00:00:00 2001 From: Jo Van Bulck Date: Thu, 5 Oct 2023 17:38:45 +0200 Subject: selftests/sgx: Separate linker options Fixes "'linker' input unused [-Wunused-command-line-argument]" errors when compiling with clang. Signed-off-by: Jo Van Bulck Signed-off-by: Dave Hansen Reviewed-by: Jarkko Sakkinen Link: https://lore.kernel.org/all/20231005153854.25566-5-jo.vanbulck%40cs.kuleuven.be --- tools/testing/selftests/sgx/Makefile | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/sgx/Makefile b/tools/testing/selftests/sgx/Makefile index 50aab6b57da3..dcdd04b322f8 100644 --- a/tools/testing/selftests/sgx/Makefile +++ b/tools/testing/selftests/sgx/Makefile @@ -12,9 +12,11 @@ OBJCOPY := $(CROSS_COMPILE)objcopy endif INCLUDES := -I$(top_srcdir)/tools/include -HOST_CFLAGS := -Wall -Werror -g $(INCLUDES) -fPIC -z noexecstack -ENCL_CFLAGS := -Wall -Werror -static -nostdlib -nostartfiles -fPIC \ +HOST_CFLAGS := -Wall -Werror -g $(INCLUDES) -fPIC +HOST_LDFLAGS := -z noexecstack -lcrypto +ENCL_CFLAGS += -Wall -Werror -static -nostdlib -nostartfiles -fPIC \ -fno-stack-protector -mrdrnd $(INCLUDES) +ENCL_LDFLAGS := -Wl,-T,test_encl.lds,--build-id=none TEST_CUSTOM_PROGS := $(OUTPUT)/test_sgx TEST_FILES := $(OUTPUT)/test_encl.elf @@ -28,7 +30,7 @@ $(OUTPUT)/test_sgx: $(OUTPUT)/main.o \ $(OUTPUT)/sigstruct.o \ $(OUTPUT)/call.o \ $(OUTPUT)/sign_key.o - $(CC) $(HOST_CFLAGS) -o $@ $^ -lcrypto + $(CC) $(HOST_CFLAGS) -o $@ $^ $(HOST_LDFLAGS) $(OUTPUT)/main.o: main.c $(CC) $(HOST_CFLAGS) -c $< -o $@ @@ -45,8 +47,8 @@ $(OUTPUT)/call.o: call.S $(OUTPUT)/sign_key.o: sign_key.S $(CC) $(HOST_CFLAGS) -c $< -o $@ -$(OUTPUT)/test_encl.elf: test_encl.lds test_encl.c test_encl_bootstrap.S - $(CC) $(ENCL_CFLAGS) -T $^ -o $@ -Wl,--build-id=none +$(OUTPUT)/test_encl.elf: test_encl.c test_encl_bootstrap.S + $(CC) $(ENCL_CFLAGS) $^ -o $@ $(ENCL_LDFLAGS) EXTRA_CLEAN := \ $(OUTPUT)/test_encl.elf \ -- cgit v1.2.3 From 304b259e63b94c4f825e0648b33b15f8962955c7 Mon Sep 17 00:00:00 2001 From: Jo Van Bulck Date: Thu, 5 Oct 2023 17:38:46 +0200 Subject: selftests/sgx: Specify freestanding environment for enclave compilation Use -ffreestanding to assert the enclave compilation targets a freestanding environment (i.e., without "main" or standard libraries). This fixes clang reporting "undefined reference to `memset'" after erroneously optimizing away the provided memset/memcpy implementations. Still need to instruct the linker from using standard system startup functions, but drop -nostartfiles as it is implied by -nostdlib. Signed-off-by: Jo Van Bulck Signed-off-by: Dave Hansen Reviewed-by: Jarkko Sakkinen Acked-by: Kai Huang Link: https://lore.kernel.org/all/20231005153854.25566-6-jo.vanbulck%40cs.kuleuven.be --- tools/testing/selftests/sgx/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/sgx/Makefile b/tools/testing/selftests/sgx/Makefile index dcdd04b322f8..7eb890bdd3f0 100644 --- a/tools/testing/selftests/sgx/Makefile +++ b/tools/testing/selftests/sgx/Makefile @@ -14,7 +14,7 @@ endif INCLUDES := -I$(top_srcdir)/tools/include HOST_CFLAGS := -Wall -Werror -g $(INCLUDES) -fPIC HOST_LDFLAGS := -z noexecstack -lcrypto -ENCL_CFLAGS += -Wall -Werror -static -nostdlib -nostartfiles -fPIC \ +ENCL_CFLAGS += -Wall -Werror -static -nostdlib -ffreestanding -fPIC \ -fno-stack-protector -mrdrnd $(INCLUDES) ENCL_LDFLAGS := -Wl,-T,test_encl.lds,--build-id=none -- cgit v1.2.3 From 4f812df8f37463767c2a74c2bd77de94acdb2be6 Mon Sep 17 00:00:00 2001 From: Jo Van Bulck Date: Thu, 5 Oct 2023 17:38:47 +0200 Subject: selftests/sgx: Remove redundant enclave base address save/restore Remove redundant push/pop pair that stores and restores the enclave base address in the test enclave, as it is never used after the pop and can anyway be easily retrieved via the __encl_base symbol. Signed-off-by: Jo Van Bulck Signed-off-by: Dave Hansen Reviewed-by: Jarkko Sakkinen Acked-by: Kai Huang Link: https://lore.kernel.org/all/20231005153854.25566-7-jo.vanbulck%40cs.kuleuven.be --- tools/testing/selftests/sgx/test_encl_bootstrap.S | 3 --- 1 file changed, 3 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/sgx/test_encl_bootstrap.S b/tools/testing/selftests/sgx/test_encl_bootstrap.S index 03ae0f57e29d..e0ce993d3f2c 100644 --- a/tools/testing/selftests/sgx/test_encl_bootstrap.S +++ b/tools/testing/selftests/sgx/test_encl_bootstrap.S @@ -55,12 +55,9 @@ encl_entry_core: push %rax push %rcx # push the address after EENTER - push %rbx # push the enclave base address call encl_body - pop %rbx # pop the enclave base address - /* Clear volatile GPRs, except RAX (EEXIT function). */ xor %rcx, %rcx xor %rdx, %rdx -- cgit v1.2.3 From f7884e732841450de36c2b1ed49b91e8e854a7d1 Mon Sep 17 00:00:00 2001 From: Jo Van Bulck Date: Thu, 5 Oct 2023 17:38:48 +0200 Subject: selftests/sgx: Produce static-pie executable for test enclave The current combination of -static and -fPIC creates a static executable with position-dependent addresses for global variables. Use -static-pie and -fPIE to create a proper static position independent executable that can be loaded at any address without a dynamic linker. When building the original "lea (encl_stack)(%rbx), %rax" assembly code with -static-pie -fPIE, the linker complains about a relocation it cannot resolve: /usr/local/bin/ld: /tmp/cchIWyfG.o: relocation R_X86_64_32S against `.data' can not be used when making a PIE object; recompile with -fPIE collect2: error: ld returned 1 exit status Thus, since only RIP-relative addressing is legit for local symbols, use "encl_stack(%rip)" and declare an explicit "__encl_base" symbol at the start of the linker script to be able to calculate the stack address relative to the current TCS in the enclave assembly entry code. Signed-off-by: Jo Van Bulck Signed-off-by: Dave Hansen Reviewed-by: Jarkko Sakkinen Acked-by: Kai Huang Link: https://lore.kernel.org/all/f9c24d89-ed72-7d9e-c650-050d722c6b04@cs.kuleuven.be/ Link: https://lore.kernel.org/all/20231005153854.25566-8-jo.vanbulck%40cs.kuleuven.be --- tools/testing/selftests/sgx/Makefile | 2 +- tools/testing/selftests/sgx/test_encl.lds | 1 + tools/testing/selftests/sgx/test_encl_bootstrap.S | 9 ++++++--- 3 files changed, 8 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/sgx/Makefile b/tools/testing/selftests/sgx/Makefile index 7eb890bdd3f0..8d2ba6adc92b 100644 --- a/tools/testing/selftests/sgx/Makefile +++ b/tools/testing/selftests/sgx/Makefile @@ -14,7 +14,7 @@ endif INCLUDES := -I$(top_srcdir)/tools/include HOST_CFLAGS := -Wall -Werror -g $(INCLUDES) -fPIC HOST_LDFLAGS := -z noexecstack -lcrypto -ENCL_CFLAGS += -Wall -Werror -static -nostdlib -ffreestanding -fPIC \ +ENCL_CFLAGS += -Wall -Werror -static-pie -nostdlib -ffreestanding -fPIE \ -fno-stack-protector -mrdrnd $(INCLUDES) ENCL_LDFLAGS := -Wl,-T,test_encl.lds,--build-id=none diff --git a/tools/testing/selftests/sgx/test_encl.lds b/tools/testing/selftests/sgx/test_encl.lds index a1ec64f7d91f..62d37160f59b 100644 --- a/tools/testing/selftests/sgx/test_encl.lds +++ b/tools/testing/selftests/sgx/test_encl.lds @@ -10,6 +10,7 @@ PHDRS SECTIONS { . = 0; + __encl_base = .; .tcs : { *(.tcs*) } : tcs diff --git a/tools/testing/selftests/sgx/test_encl_bootstrap.S b/tools/testing/selftests/sgx/test_encl_bootstrap.S index e0ce993d3f2c..28fe5d2ac0af 100644 --- a/tools/testing/selftests/sgx/test_encl_bootstrap.S +++ b/tools/testing/selftests/sgx/test_encl_bootstrap.S @@ -42,9 +42,12 @@ encl_entry: # RBX contains the base address for TCS, which is the first address # inside the enclave for TCS #1 and one page into the enclave for - # TCS #2. By adding the value of encl_stack to it, we get - # the absolute address for the stack. - lea (encl_stack)(%rbx), %rax + # TCS #2. First make it relative by substracting __encl_base and + # then add the address of encl_stack to get the address for the stack. + lea __encl_base(%rip), %rax + sub %rax, %rbx + lea encl_stack(%rip), %rax + add %rbx, %rax jmp encl_entry_core encl_dyn_entry: # Entry point for dynamically created TCS page expected to follow -- cgit v1.2.3 From d06978e8e47a348c0d33462a8c2bf8f46d2b7df5 Mon Sep 17 00:00:00 2001 From: Jo Van Bulck Date: Thu, 5 Oct 2023 17:38:49 +0200 Subject: selftests/sgx: Handle relocations in test enclave Static-pie binaries normally include a startup routine to perform any ELF relocations from .rela.dyn. Since the enclave loading process is different and glibc is not included, do the necessary relocation for encl_op_array entries manually at runtime relative to the enclave base to ensure correct function pointers. When keeping encl_op_array as a local variable on the stack, gcc without optimizations generates code that explicitly gets the right function addresses and stores them to create the array on the stack: encl_body: /* snipped */ lea do_encl_op_put_to_buf(%rip), %rax mov %rax, -0x50(%rbp) lea do_encl_op_get_from_buf(%rip), %rax mov %rax,-0x48(%rbp) lea do_encl_op_put_to_addr(%rip), %rax /* snipped */ However, gcc -Os or clang generate more efficient code that initializes encl_op_array by copying a "prepared copy" containing the absolute addresses of the functions (i.e., relative to the image base starting from 0) generated by the compiler/linker: encl_body: /* snipped */ lea prepared_copy(%rip), %rsi lea -0x48(%rsp), %rdi mov $0x10,%ecx rep movsl %ds:(%rsi),%es:(%rdi) /* snipped */ When building the enclave with -static-pie, the compiler/linker includes relocation entries for the function symbols in the "prepared copy": Relocation section '.rela.dyn' at offset 0x4000 contains 12 entries: Offset Info Type Symbol /* snipped; "prepared_copy" starts at 0x6000 */ 000000006000 000000000008 R_X86_64_RELATIVE 000000006008 000000000008 R_X86_64_RELATIVE 000000006010 000000000008 R_X86_64_RELATIVE 000000006018 000000000008 R_X86_64_RELATIVE 000000006020 000000000008 R_X86_64_RELATIVE 000000006028 000000000008 R_X86_64_RELATIVE 000000006030 000000000008 R_X86_64_RELATIVE 000000006038 000000000008 R_X86_64_RELATIVE Static-pie binaries normally include a glibc "_dl_relocate_static_pie" routine that will perform these relocations as part of the startup. However, since the enclave loading process is different and glibc is not included, we cannot rely on these relocations to be performed. Without relocations, the code would erroneously jump to the _absolute_ function address loaded from the local copy. Thus, declare "encl_op_array" as global and manually relocate the loaded function-pointer entries relative to the enclave base at runtime. This generates the following code: encl_body: /* snipped */ lea encl_op_array(%rip), %rcx lea __encl_base(%rip), %rax add (%rcx,%rdx,8),%rax jmp *%rax Signed-off-by: Jo Van Bulck Signed-off-by: Dave Hansen Reviewed-by: Jarkko Sakkinen Acked-by: Kai Huang Link: https://lore.kernel.org/all/150d8ca8-2c66-60d1-f9fc-8e6279824e94@cs.kuleuven.be/ Link: https://lore.kernel.org/all/5c22de5a-4b3b-1f38-9771-409b4ec7f96d@cs.kuleuven.be/#r Link: https://lore.kernel.org/all/20231005153854.25566-9-jo.vanbulck%40cs.kuleuven.be --- tools/testing/selftests/sgx/test_encl.c | 50 +++++++++++++++++++++++---------- 1 file changed, 35 insertions(+), 15 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/sgx/test_encl.c b/tools/testing/selftests/sgx/test_encl.c index ae791df3e5a5..649604c526e7 100644 --- a/tools/testing/selftests/sgx/test_encl.c +++ b/tools/testing/selftests/sgx/test_encl.c @@ -121,21 +121,41 @@ static void do_encl_op_nop(void *_op) } +/* + * Symbol placed at the start of the enclave image by the linker script. + * Declare this extern symbol with visibility "hidden" to ensure the compiler + * does not access it through the GOT and generates position-independent + * addressing as __encl_base(%rip), so we can get the actual enclave base + * during runtime. + */ +extern const uint8_t __attribute__((visibility("hidden"))) __encl_base; + +typedef void (*encl_op_t)(void *); +static const encl_op_t encl_op_array[ENCL_OP_MAX] = { + do_encl_op_put_to_buf, + do_encl_op_get_from_buf, + do_encl_op_put_to_addr, + do_encl_op_get_from_addr, + do_encl_op_nop, + do_encl_eaccept, + do_encl_emodpe, + do_encl_init_tcs_page, +}; + void encl_body(void *rdi, void *rsi) { - const void (*encl_op_array[ENCL_OP_MAX])(void *) = { - do_encl_op_put_to_buf, - do_encl_op_get_from_buf, - do_encl_op_put_to_addr, - do_encl_op_get_from_addr, - do_encl_op_nop, - do_encl_eaccept, - do_encl_emodpe, - do_encl_init_tcs_page, - }; - - struct encl_op_header *op = (struct encl_op_header *)rdi; - - if (op->type < ENCL_OP_MAX) - (*encl_op_array[op->type])(op); + struct encl_op_header *header = (struct encl_op_header *)rdi; + encl_op_t op; + + if (header->type >= ENCL_OP_MAX) + return; + + /* + * The enclave base address needs to be added, as this call site + * *cannot be* made rip-relative by the compiler, or fixed up by + * any other possible means. + */ + op = ((uint64_t)&__encl_base) + encl_op_array[header->type]; + + (*op)(header); } -- cgit v1.2.3 From 9fd552ee32c6c1e27c125016b87d295bea6faea7 Mon Sep 17 00:00:00 2001 From: Jo Van Bulck Date: Thu, 5 Oct 2023 17:38:50 +0200 Subject: selftests/sgx: Fix linker script asserts DEFINED only considers symbols, not section names. Hence, replace the check for .got.plt with the _GLOBAL_OFFSET_TABLE_ symbol and remove other (non-essential) asserts. Signed-off-by: Jo Van Bulck Signed-off-by: Dave Hansen Reviewed-by: Jarkko Sakkinen Link: https://lore.kernel.org/all/20231005153854.25566-10-jo.vanbulck%40cs.kuleuven.be --- tools/testing/selftests/sgx/test_encl.lds | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/sgx/test_encl.lds b/tools/testing/selftests/sgx/test_encl.lds index 62d37160f59b..6ffdfc9fb4cf 100644 --- a/tools/testing/selftests/sgx/test_encl.lds +++ b/tools/testing/selftests/sgx/test_encl.lds @@ -35,8 +35,4 @@ SECTIONS } } -ASSERT(!DEFINED(.altinstructions), "ALTERNATIVES are not supported in enclaves") -ASSERT(!DEFINED(.altinstr_replacement), "ALTERNATIVES are not supported in enclaves") -ASSERT(!DEFINED(.discard.retpoline_safe), "RETPOLINE ALTERNATIVES are not supported in enclaves") -ASSERT(!DEFINED(.discard.nospec), "RETPOLINE ALTERNATIVES are not supported in enclaves") -ASSERT(!DEFINED(.got.plt), "Libcalls are not supported in enclaves") +ASSERT(!DEFINED(_GLOBAL_OFFSET_TABLE_), "Libcalls through GOT are not supported in enclaves") -- cgit v1.2.3 From a4c39ef4ed43103caef80029cd30427a9ff342d8 Mon Sep 17 00:00:00 2001 From: Jo Van Bulck Date: Thu, 5 Oct 2023 17:38:51 +0200 Subject: selftests/sgx: Ensure test enclave buffer is entirely preserved Attach the "used" attribute to instruct the compiler to preserve the static encl_buffer, even if it appears it is not entirely referenced in the enclave code, as expected by the external tests manipulating page permissions. Signed-off-by: Jo Van Bulck Signed-off-by: Dave Hansen Reviewed-by: Jarkko Sakkinen Acked-by: Kai Huang Link: https://lore.kernel.org/all/a2732938-f3db-a0af-3d68-a18060f66e79@cs.kuleuven.be/ Link: https://lore.kernel.org/all/20231005153854.25566-11-jo.vanbulck%40cs.kuleuven.be --- tools/testing/selftests/sgx/defines.h | 1 + tools/testing/selftests/sgx/test_encl.c | 9 +++++---- 2 files changed, 6 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/sgx/defines.h b/tools/testing/selftests/sgx/defines.h index d8587c971941..b8f482667ce1 100644 --- a/tools/testing/selftests/sgx/defines.h +++ b/tools/testing/selftests/sgx/defines.h @@ -13,6 +13,7 @@ #define __aligned(x) __attribute__((__aligned__(x))) #define __packed __attribute__((packed)) +#define __used __attribute__((used)) #include "../../../../arch/x86/include/asm/sgx.h" #include "../../../../arch/x86/include/asm/enclu.h" diff --git a/tools/testing/selftests/sgx/test_encl.c b/tools/testing/selftests/sgx/test_encl.c index 649604c526e7..7465f121fb74 100644 --- a/tools/testing/selftests/sgx/test_encl.c +++ b/tools/testing/selftests/sgx/test_encl.c @@ -5,11 +5,12 @@ #include "defines.h" /* - * Data buffer spanning two pages that will be placed first in .data - * segment. Even if not used internally the second page is needed by - * external test manipulating page permissions. + * Data buffer spanning two pages that will be placed first in the .data + * segment. Even if not used internally the second page is needed by external + * test manipulating page permissions, so mark encl_buffer as "used" to make + * sure it is entirely preserved by the compiler. */ -static uint8_t encl_buffer[8192] = { 1 }; +static uint8_t __used encl_buffer[8192] = { 1 }; enum sgx_enclu_function { EACCEPT = 0x5, -- cgit v1.2.3 From 022416496008bd51cb7b33975cc0008749329a86 Mon Sep 17 00:00:00 2001 From: Jo Van Bulck Date: Thu, 5 Oct 2023 17:38:52 +0200 Subject: selftests/sgx: Ensure expected location of test enclave buffer The external tests manipulating page permissions expect encl_buffer to be placed at the start of the test enclave's .data section. As this is not guaranteed per the C standard, explicitly place encl_buffer in a separate section that is explicitly placed at the start of the .data segment in the linker script to avoid the compiler placing it somewhere else in .data. Signed-off-by: Jo Van Bulck Signed-off-by: Dave Hansen Reviewed-by: Jarkko Sakkinen Acked-by: Kai Huang Link: https://lore.kernel.org/all/20231005153854.25566-12-jo.vanbulck%40cs.kuleuven.be --- tools/testing/selftests/sgx/defines.h | 1 + tools/testing/selftests/sgx/test_encl.c | 8 ++++---- tools/testing/selftests/sgx/test_encl.lds | 1 + 3 files changed, 6 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/sgx/defines.h b/tools/testing/selftests/sgx/defines.h index b8f482667ce1..402f8787a71c 100644 --- a/tools/testing/selftests/sgx/defines.h +++ b/tools/testing/selftests/sgx/defines.h @@ -14,6 +14,7 @@ #define __aligned(x) __attribute__((__aligned__(x))) #define __packed __attribute__((packed)) #define __used __attribute__((used)) +#define __section(x)__attribute__((__section__(x))) #include "../../../../arch/x86/include/asm/sgx.h" #include "../../../../arch/x86/include/asm/enclu.h" diff --git a/tools/testing/selftests/sgx/test_encl.c b/tools/testing/selftests/sgx/test_encl.c index 7465f121fb74..2c4d709cce2d 100644 --- a/tools/testing/selftests/sgx/test_encl.c +++ b/tools/testing/selftests/sgx/test_encl.c @@ -6,11 +6,11 @@ /* * Data buffer spanning two pages that will be placed first in the .data - * segment. Even if not used internally the second page is needed by external - * test manipulating page permissions, so mark encl_buffer as "used" to make - * sure it is entirely preserved by the compiler. + * segment via the linker script. Even if not used internally the second page + * is needed by external test manipulating page permissions, so mark + * encl_buffer as "used" to make sure it is entirely preserved by the compiler. */ -static uint8_t __used encl_buffer[8192] = { 1 }; +static uint8_t __used __section(".data.encl_buffer") encl_buffer[8192] = { 1 }; enum sgx_enclu_function { EACCEPT = 0x5, diff --git a/tools/testing/selftests/sgx/test_encl.lds b/tools/testing/selftests/sgx/test_encl.lds index 6ffdfc9fb4cf..333a3e78fdc9 100644 --- a/tools/testing/selftests/sgx/test_encl.lds +++ b/tools/testing/selftests/sgx/test_encl.lds @@ -24,6 +24,7 @@ SECTIONS } : text .data : { + *(.data.encl_buffer) *(.data*) } : data -- cgit v1.2.3 From ec44ca1e34bc3a9864a8c1bf8636066ec6d5d2e5 Mon Sep 17 00:00:00 2001 From: Jo Van Bulck Date: Thu, 5 Oct 2023 17:38:53 +0200 Subject: selftests/sgx: Discard unsupported ELF sections Building the test enclave with -static-pie may produce a dynamic symbol table, but this is not supported for enclaves and any relocations need to happen manually (e.g., as for "encl_op_array"). Thus, opportunistically discard ".dyn*" and ".gnu.hash" which the enclave loader cannot handle. Signed-off-by: Jo Van Bulck Signed-off-by: Dave Hansen Reviewed-by: Jarkko Sakkinen Link: https://lore.kernel.org/all/20231005153854.25566-13-jo.vanbulck%40cs.kuleuven.be --- tools/testing/selftests/sgx/test_encl.lds | 2 ++ 1 file changed, 2 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/sgx/test_encl.lds b/tools/testing/selftests/sgx/test_encl.lds index 333a3e78fdc9..ffe851a1cac4 100644 --- a/tools/testing/selftests/sgx/test_encl.lds +++ b/tools/testing/selftests/sgx/test_encl.lds @@ -33,6 +33,8 @@ SECTIONS *(.note*) *(.debug*) *(.eh_frame*) + *(.dyn*) + *(.gnu.hash) } } -- cgit v1.2.3 From 886c5be0b12e89b2905c26c4f24d50ae91f261da Mon Sep 17 00:00:00 2001 From: Jo Van Bulck Date: Thu, 5 Oct 2023 17:38:54 +0200 Subject: selftests/sgx: Remove incomplete ABI sanitization code in test enclave As the selftest enclave is *not* intended for production, simplify the code by not initializing CPU configuration registers as expected by the ABI on enclave entry or cleansing caller-save registers on enclave exit. Signed-off-by: Jo Van Bulck Signed-off-by: Dave Hansen Reviewed-by: Jarkko Sakkinen Link: https://lore.kernel.org/all/da0cfb1e-e347-f7f2-ac72-aec0ee0d867d@intel.com/ Link: https://lore.kernel.org/all/20231005153854.25566-14-jo.vanbulck%40cs.kuleuven.be --- tools/testing/selftests/sgx/test_encl_bootstrap.S | 16 +++------------- 1 file changed, 3 insertions(+), 13 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/sgx/test_encl_bootstrap.S b/tools/testing/selftests/sgx/test_encl_bootstrap.S index 28fe5d2ac0af..d8c4ac94e032 100644 --- a/tools/testing/selftests/sgx/test_encl_bootstrap.S +++ b/tools/testing/selftests/sgx/test_encl_bootstrap.S @@ -59,21 +59,11 @@ encl_entry_core: push %rcx # push the address after EENTER + # NOTE: as the selftest enclave is *not* intended for production, + # simplify the code by not initializing ABI registers on entry or + # cleansing caller-save registers on exit. call encl_body - /* Clear volatile GPRs, except RAX (EEXIT function). */ - xor %rcx, %rcx - xor %rdx, %rdx - xor %rdi, %rdi - xor %rsi, %rsi - xor %r8, %r8 - xor %r9, %r9 - xor %r10, %r10 - xor %r11, %r11 - - # Reset status flags. - add %rdx, %rdx # OF = SF = AF = CF = 0; ZF = PF = 1 - # Prepare EEXIT target by popping the address of the instruction after # EENTER to RBX. pop %rbx -- cgit v1.2.3 From 981cf568a8644161c2f15c02278ebc2834b51ba6 Mon Sep 17 00:00:00 2001 From: Zhao Mengmeng Date: Tue, 5 Dec 2023 21:56:05 -0500 Subject: selftests/sgx: Skip non X86_64 platform When building whole selftests on arm64, rsync gives an erorr about sgx: rsync: [sender] link_stat "/root/linux-next/tools/testing/selftests/sgx/test_encl.elf" failed: No such file or directory (2) rsync error: some files/attrs were not transferred (see previous errors) (code 23) at main.c(1327) [sender=3.2.5] The root casue is sgx only used on X86_64, and shall be skipped on other platforms. Fix this by moving TEST_CUSTOM_PROGS and TEST_FILES inside the if check, then the build result will be "Skipping non-existent dir: sgx". Fixes: 2adcba79e69d ("selftests/x86: Add a selftest for SGX") Signed-off-by: Zhao Mengmeng Signed-off-by: Dave Hansen Reviewed-by: Jarkko Sakkinen Link: https://lore.kernel.org/all/20231206025605.3965302-1-zhaomzhao%40126.com --- tools/testing/selftests/sgx/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/sgx/Makefile b/tools/testing/selftests/sgx/Makefile index 8d2ba6adc92b..867f88ce2570 100644 --- a/tools/testing/selftests/sgx/Makefile +++ b/tools/testing/selftests/sgx/Makefile @@ -18,10 +18,10 @@ ENCL_CFLAGS += -Wall -Werror -static-pie -nostdlib -ffreestanding -fPIE \ -fno-stack-protector -mrdrnd $(INCLUDES) ENCL_LDFLAGS := -Wl,-T,test_encl.lds,--build-id=none +ifeq ($(CAN_BUILD_X86_64), 1) TEST_CUSTOM_PROGS := $(OUTPUT)/test_sgx TEST_FILES := $(OUTPUT)/test_encl.elf -ifeq ($(CAN_BUILD_X86_64), 1) all: $(TEST_CUSTOM_PROGS) $(OUTPUT)/test_encl.elf endif -- cgit v1.2.3 From 4a073e813477be4ae95dfd23cb08baf36e93a29f Mon Sep 17 00:00:00 2001 From: angquan yu Date: Tue, 28 Nov 2023 16:11:05 -0600 Subject: KVM: selftests: Actually print out magic token in NX hugepages skip message MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pass MAGIC_TOKEN to __TEST_REQUIRE() when printing the help message about needing to pass a magic value to manually run the NX hugepages test, otherwise the help message will contain garbage. In file included from x86_64/nx_huge_pages_test.c:15: x86_64/nx_huge_pages_test.c: In function ‘main’: include/test_util.h:40:32: error: format ‘%d’ expects a matching ‘int’ argument [-Werror=format=] 40 | ksft_exit_skip("- " fmt "\n", ##__VA_ARGS__); \ | ^~~~ x86_64/nx_huge_pages_test.c:259:9: note: in expansion of macro ‘__TEST_REQUIRE’ 259 | __TEST_REQUIRE(token == MAGIC_TOKEN, | ^~~~~~~~~~~~~~ Signed-off-by: angquan yu Link: https://lore.kernel.org/r/20231128221105.63093-1-angquan21@gmail.com [sean: rewrite shortlog+changelog] Signed-off-by: Sean Christopherson Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c b/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c index 18ac5c1952a3..83e25bccc139 100644 --- a/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c +++ b/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c @@ -259,7 +259,7 @@ int main(int argc, char **argv) __TEST_REQUIRE(token == MAGIC_TOKEN, "This test must be run with the magic token %d.\n" "This is done by nx_huge_pages_test.sh, which\n" - "also handles environment setup for the test."); + "also handles environment setup for the test.", MAGIC_TOKEN); run_test(reclaim_period_ms, false, reboot_permissions); run_test(reclaim_period_ms, true, reboot_permissions); -- cgit v1.2.3 From 96f124015f825a4a186b8497e20cf87f6519c7b4 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Sat, 28 Oct 2023 20:34:53 +0100 Subject: KVM: selftests: add -MP to CFLAGS Using -MD without -MP causes build failures when a header file is deleted or moved. With -MP, the compiler will emit phony targets for the header files it lists as dependencies, and the Makefiles won't refuse to attempt to rebuild a C unit which no longer includes the deleted header. Signed-off-by: David Woodhouse Link: https://lore.kernel.org/r/9fc8b5395321abbfcaf5d78477a9a7cd350b08e4.camel@infradead.org Signed-off-by: Sean Christopherson Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index 52c59bad7213..963435959a92 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -224,7 +224,7 @@ else LINUX_TOOL_ARCH_INCLUDE = $(top_srcdir)/tools/arch/$(ARCH)/include endif CFLAGS += -Wall -Wstrict-prototypes -Wuninitialized -O2 -g -std=gnu99 \ - -Wno-gnu-variable-sized-type-not-at-end -MD\ + -Wno-gnu-variable-sized-type-not-at-end -MD -MP \ -fno-builtin-memcmp -fno-builtin-memcpy -fno-builtin-memset \ -fno-builtin-strnlen \ -fno-stack-protector -fno-PIE -I$(LINUX_TOOL_INCLUDE) \ -- cgit v1.2.3 From 849c1816436fe359e85587fba5b69ddd3a957b31 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 8 Dec 2023 13:46:22 -0500 Subject: KVM: selftests: fix supported_flags for aarch64 KVM/Arm supports readonly memslots; fix the calculation of supported_flags in set_memory_region_test.c, otherwise the test fails. Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/set_memory_region_test.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/kvm/set_memory_region_test.c b/tools/testing/selftests/kvm/set_memory_region_test.c index 6637a0845acf..dfd1d1e22da3 100644 --- a/tools/testing/selftests/kvm/set_memory_region_test.c +++ b/tools/testing/selftests/kvm/set_memory_region_test.c @@ -333,9 +333,11 @@ static void test_invalid_memory_region_flags(void) struct kvm_vm *vm; int r, i; -#ifdef __x86_64__ +#if defined __aarch64__ || defined __x86_64__ supported_flags |= KVM_MEM_READONLY; +#endif +#ifdef __x86_64__ if (kvm_check_cap(KVM_CAP_VM_TYPES) & BIT(KVM_X86_SW_PROTECTED_VM)) vm = vm_create_barebones_protected_vm(); else -- cgit v1.2.3 From 6b4a64bafd107e521c01eec3453ce94a3fb38529 Mon Sep 17 00:00:00 2001 From: Andrei Matei Date: Thu, 7 Dec 2023 22:25:18 -0500 Subject: bpf: Fix accesses to uninit stack slots Privileged programs are supposed to be able to read uninitialized stack memory (ever since 6715df8d5) but, before this patch, these accesses were permitted inconsistently. In particular, accesses were permitted above state->allocated_stack, but not below it. In other words, if the stack was already "large enough", the access was permitted, but otherwise the access was rejected instead of being allowed to "grow the stack". This undesired rejection was happening in two places: - in check_stack_slot_within_bounds() - in check_stack_range_initialized() This patch arranges for these accesses to be permitted. A bunch of tests that were relying on the old rejection had to change; all of them were changed to add also run unprivileged, in which case the old behavior persists. One tests couldn't be updated - global_func16 - because it can't run unprivileged for other reasons. This patch also fixes the tracking of the stack size for variable-offset reads. This second fix is bundled in the same commit as the first one because they're inter-related. Before this patch, writes to the stack using registers containing a variable offset (as opposed to registers with fixed, known values) were not properly contributing to the function's needed stack size. As a result, it was possible for a program to verify, but then to attempt to read out-of-bounds data at runtime because a too small stack had been allocated for it. Each function tracks the size of the stack it needs in bpf_subprog_info.stack_depth, which is maintained by update_stack_depth(). For regular memory accesses, check_mem_access() was calling update_state_depth() but it was passing in only the fixed part of the offset register, ignoring the variable offset. This was incorrect; the minimum possible value of that register should be used instead. This tracking is now fixed by centralizing the tracking of stack size in grow_stack_state(), and by lifting the calls to grow_stack_state() to check_stack_access_within_bounds() as suggested by Andrii. The code is now simpler and more convincingly tracks the correct maximum stack size. check_stack_range_initialized() can now rely on enough stack having been allocated for the access; this helps with the fix for the first issue. A few tests were changed to also check the stack depth computation. The one that fails without this patch is verifier_var_off:stack_write_priv_vs_unpriv. Fixes: 01f810ace9ed3 ("bpf: Allow variable-offset stack access") Reported-by: Hao Sun Signed-off-by: Andrei Matei Signed-off-by: Andrii Nakryiko Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20231208032519.260451-3-andreimatei1@gmail.com Closes: https://lore.kernel.org/bpf/CABWLsev9g8UP_c3a=1qbuZUi20tGoUXoU07FPf-5FLvhOKOY+Q@mail.gmail.com/ --- kernel/bpf/verifier.c | 65 +++++++++------------- tools/testing/selftests/bpf/progs/iters.c | 2 +- .../selftests/bpf/progs/test_global_func16.c | 2 +- .../selftests/bpf/progs/verifier_basic_stack.c | 8 +-- .../testing/selftests/bpf/progs/verifier_int_ptr.c | 5 +- .../selftests/bpf/progs/verifier_raw_stack.c | 5 +- .../testing/selftests/bpf/progs/verifier_var_off.c | 62 +++++++++++++++++---- .../selftests/bpf/verifier/atomic_cmpxchg.c | 11 ---- tools/testing/selftests/bpf/verifier/calls.c | 4 +- 9 files changed, 92 insertions(+), 72 deletions(-) (limited to 'tools') diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 0e77bb52542d..de1e29fa467e 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -1259,7 +1259,10 @@ static int resize_reference_state(struct bpf_func_state *state, size_t n) return 0; } -static int grow_stack_state(struct bpf_func_state *state, int size) +/* Possibly update state->allocated_stack to be at least size bytes. Also + * possibly update the function's high-water mark in its bpf_subprog_info. + */ +static int grow_stack_state(struct bpf_verifier_env *env, struct bpf_func_state *state, int size) { size_t old_n = state->allocated_stack / BPF_REG_SIZE, n = size / BPF_REG_SIZE; @@ -1271,6 +1274,11 @@ static int grow_stack_state(struct bpf_func_state *state, int size) return -ENOMEM; state->allocated_stack = size; + + /* update known max for given subprogram */ + if (env->subprog_info[state->subprogno].stack_depth < size) + env->subprog_info[state->subprogno].stack_depth = size; + return 0; } @@ -4440,9 +4448,6 @@ static int check_stack_write_fixed_off(struct bpf_verifier_env *env, struct bpf_reg_state *reg = NULL; int insn_flags = insn_stack_access_flags(state->frameno, spi); - err = grow_stack_state(state, round_up(slot + 1, BPF_REG_SIZE)); - if (err) - return err; /* caller checked that off % size == 0 and -MAX_BPF_STACK <= off < 0, * so it's aligned access and [off, off + size) are within stack limits */ @@ -4595,10 +4600,6 @@ static int check_stack_write_var_off(struct bpf_verifier_env *env, (!value_reg && is_bpf_st_mem(insn) && insn->imm == 0)) writing_zero = true; - err = grow_stack_state(state, round_up(-min_off, BPF_REG_SIZE)); - if (err) - return err; - for (i = min_off; i < max_off; i++) { int spi; @@ -5774,20 +5775,6 @@ static int check_ptr_alignment(struct bpf_verifier_env *env, strict); } -static int update_stack_depth(struct bpf_verifier_env *env, - const struct bpf_func_state *func, - int off) -{ - u16 stack = env->subprog_info[func->subprogno].stack_depth; - - if (stack >= -off) - return 0; - - /* update known max for given subprogram */ - env->subprog_info[func->subprogno].stack_depth = -off; - return 0; -} - /* starting from main bpf function walk all instructions of the function * and recursively walk all callees that given function can call. * Ignore jump and exit insns. @@ -6577,13 +6564,14 @@ static int check_ptr_to_map_access(struct bpf_verifier_env *env, * The minimum valid offset is -MAX_BPF_STACK for writes, and * -state->allocated_stack for reads. */ -static int check_stack_slot_within_bounds(s64 off, - struct bpf_func_state *state, - enum bpf_access_type t) +static int check_stack_slot_within_bounds(struct bpf_verifier_env *env, + s64 off, + struct bpf_func_state *state, + enum bpf_access_type t) { int min_valid_off; - if (t == BPF_WRITE) + if (t == BPF_WRITE || env->allow_uninit_stack) min_valid_off = -MAX_BPF_STACK; else min_valid_off = -state->allocated_stack; @@ -6632,7 +6620,7 @@ static int check_stack_access_within_bounds( max_off = reg->smax_value + off + access_size; } - err = check_stack_slot_within_bounds(min_off, state, type); + err = check_stack_slot_within_bounds(env, min_off, state, type); if (!err && max_off > 0) err = -EINVAL; /* out of stack access into non-negative offsets */ @@ -6647,8 +6635,10 @@ static int check_stack_access_within_bounds( verbose(env, "invalid variable-offset%s stack R%d var_off=%s off=%d size=%d\n", err_extra, regno, tn_buf, off, access_size); } + return err; } - return err; + + return grow_stack_state(env, state, round_up(-min_off, BPF_REG_SIZE)); } /* check whether memory at (regno + off) is accessible for t = (read | write) @@ -6663,7 +6653,6 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn { struct bpf_reg_state *regs = cur_regs(env); struct bpf_reg_state *reg = regs + regno; - struct bpf_func_state *state; int size, err = 0; size = bpf_size_to_bytes(bpf_size); @@ -6806,11 +6795,6 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn if (err) return err; - state = func(env, reg); - err = update_stack_depth(env, state, off); - if (err) - return err; - if (t == BPF_READ) err = check_stack_read(env, regno, off, size, value_regno); @@ -7004,7 +6988,8 @@ static int check_atomic(struct bpf_verifier_env *env, int insn_idx, struct bpf_i /* When register 'regno' is used to read the stack (either directly or through * a helper function) make sure that it's within stack boundary and, depending - * on the access type, that all elements of the stack are initialized. + * on the access type and privileges, that all elements of the stack are + * initialized. * * 'off' includes 'regno->off', but not its dynamic part (if any). * @@ -7112,8 +7097,11 @@ static int check_stack_range_initialized( slot = -i - 1; spi = slot / BPF_REG_SIZE; - if (state->allocated_stack <= slot) - goto err; + if (state->allocated_stack <= slot) { + verbose(env, "verifier bug: allocated_stack too small"); + return -EFAULT; + } + stype = &state->stack[spi].slot_type[slot % BPF_REG_SIZE]; if (*stype == STACK_MISC) goto mark; @@ -7137,7 +7125,6 @@ static int check_stack_range_initialized( goto mark; } -err: if (tnum_is_const(reg->var_off)) { verbose(env, "invalid%s read from stack R%d off %d+%d size %d\n", err_extra, regno, min_off, i - min_off, access_size); @@ -7162,7 +7149,7 @@ mark: * helper may write to the entire memory range. */ } - return update_stack_depth(env, state, min_off); + return 0; } static int check_helper_mem_access(struct bpf_verifier_env *env, int regno, diff --git a/tools/testing/selftests/bpf/progs/iters.c b/tools/testing/selftests/bpf/progs/iters.c index b2181f850d3e..3aca3dc145b5 100644 --- a/tools/testing/selftests/bpf/progs/iters.c +++ b/tools/testing/selftests/bpf/progs/iters.c @@ -846,7 +846,7 @@ __naked int delayed_precision_mark(void) "call %[bpf_iter_num_next];" "if r0 == 0 goto 2f;" "if r6 != 42 goto 3f;" - "r7 = -32;" + "r7 = -33;" "call %[bpf_get_prandom_u32];" "r6 = r0;" "goto 1b;\n" diff --git a/tools/testing/selftests/bpf/progs/test_global_func16.c b/tools/testing/selftests/bpf/progs/test_global_func16.c index e7206304632e..e3e64bc472cd 100644 --- a/tools/testing/selftests/bpf/progs/test_global_func16.c +++ b/tools/testing/selftests/bpf/progs/test_global_func16.c @@ -13,7 +13,7 @@ __noinline int foo(int (*arr)[10]) } SEC("cgroup_skb/ingress") -__failure __msg("invalid indirect read from stack") +__success int global_func16(struct __sk_buff *skb) { int array[10]; diff --git a/tools/testing/selftests/bpf/progs/verifier_basic_stack.c b/tools/testing/selftests/bpf/progs/verifier_basic_stack.c index 359df865a8f3..8d77cc5323d3 100644 --- a/tools/testing/selftests/bpf/progs/verifier_basic_stack.c +++ b/tools/testing/selftests/bpf/progs/verifier_basic_stack.c @@ -27,8 +27,8 @@ __naked void stack_out_of_bounds(void) SEC("socket") __description("uninitialized stack1") -__failure __msg("invalid indirect read from stack") -__failure_unpriv +__success __log_level(4) __msg("stack depth 8") +__failure_unpriv __msg_unpriv("invalid indirect read from stack") __naked void uninitialized_stack1(void) { asm volatile (" \ @@ -45,8 +45,8 @@ __naked void uninitialized_stack1(void) SEC("socket") __description("uninitialized stack2") -__failure __msg("invalid read from stack") -__failure_unpriv +__success __log_level(4) __msg("stack depth 8") +__failure_unpriv __msg_unpriv("invalid read from stack") __naked void uninitialized_stack2(void) { asm volatile (" \ diff --git a/tools/testing/selftests/bpf/progs/verifier_int_ptr.c b/tools/testing/selftests/bpf/progs/verifier_int_ptr.c index 74d9cad469d9..9fc3fae5cd83 100644 --- a/tools/testing/selftests/bpf/progs/verifier_int_ptr.c +++ b/tools/testing/selftests/bpf/progs/verifier_int_ptr.c @@ -5,9 +5,10 @@ #include #include "bpf_misc.h" -SEC("cgroup/sysctl") +SEC("socket") __description("ARG_PTR_TO_LONG uninitialized") -__failure __msg("invalid indirect read from stack R4 off -16+0 size 8") +__success +__failure_unpriv __msg_unpriv("invalid indirect read from stack R4 off -16+0 size 8") __naked void arg_ptr_to_long_uninitialized(void) { asm volatile (" \ diff --git a/tools/testing/selftests/bpf/progs/verifier_raw_stack.c b/tools/testing/selftests/bpf/progs/verifier_raw_stack.c index efbfc3a4ad6a..f67390224a9c 100644 --- a/tools/testing/selftests/bpf/progs/verifier_raw_stack.c +++ b/tools/testing/selftests/bpf/progs/verifier_raw_stack.c @@ -5,9 +5,10 @@ #include #include "bpf_misc.h" -SEC("tc") +SEC("socket") __description("raw_stack: no skb_load_bytes") -__failure __msg("invalid read from stack R6 off=-8 size=8") +__success +__failure_unpriv __msg_unpriv("invalid read from stack R6 off=-8 size=8") __naked void stack_no_skb_load_bytes(void) { asm volatile (" \ diff --git a/tools/testing/selftests/bpf/progs/verifier_var_off.c b/tools/testing/selftests/bpf/progs/verifier_var_off.c index b7bdd7db3a35..c810f4f6f479 100644 --- a/tools/testing/selftests/bpf/progs/verifier_var_off.c +++ b/tools/testing/selftests/bpf/progs/verifier_var_off.c @@ -59,9 +59,10 @@ __naked void stack_read_priv_vs_unpriv(void) " ::: __clobber_all); } -SEC("lwt_in") +SEC("cgroup/skb") __description("variable-offset stack read, uninitialized") -__failure __msg("invalid variable-offset read from stack R2") +__success +__failure_unpriv __msg_unpriv("R2 variable stack access prohibited for !root") __naked void variable_offset_stack_read_uninitialized(void) { asm volatile (" \ @@ -83,12 +84,55 @@ __naked void variable_offset_stack_read_uninitialized(void) SEC("socket") __description("variable-offset stack write, priv vs unpriv") -__success __failure_unpriv +__success +/* Check that the maximum stack depth is correctly maintained according to the + * maximum possible variable offset. + */ +__log_level(4) __msg("stack depth 16") +__failure_unpriv /* Variable stack access is rejected for unprivileged. */ __msg_unpriv("R2 variable stack access prohibited for !root") __retval(0) __naked void stack_write_priv_vs_unpriv(void) +{ + asm volatile (" \ + /* Get an unknown value */ \ + r2 = *(u32*)(r1 + 0); \ + /* Make it small and 8-byte aligned */ \ + r2 &= 8; \ + r2 -= 16; \ + /* Add it to fp. We now have either fp-8 or \ + * fp-16, but we don't know which \ + */ \ + r2 += r10; \ + /* Dereference it for a stack write */ \ + r0 = 0; \ + *(u64*)(r2 + 0) = r0; \ + exit; \ +" ::: __clobber_all); +} + +/* Similar to the previous test, but this time also perform a read from the + * address written to with a variable offset. The read is allowed, showing that, + * after a variable-offset write, a priviledged program can read the slots that + * were in the range of that write (even if the verifier doesn't actually know if + * the slot being read was really written to or not. + * + * Despite this test being mostly a superset, the previous test is also kept for + * the sake of it checking the stack depth in the case where there is no read. + */ +SEC("socket") +__description("variable-offset stack write followed by read") +__success +/* Check that the maximum stack depth is correctly maintained according to the + * maximum possible variable offset. + */ +__log_level(4) __msg("stack depth 16") +__failure_unpriv +__msg_unpriv("R2 variable stack access prohibited for !root") +__retval(0) +__naked void stack_write_followed_by_read(void) { asm volatile (" \ /* Get an unknown value */ \ @@ -103,12 +147,7 @@ __naked void stack_write_priv_vs_unpriv(void) /* Dereference it for a stack write */ \ r0 = 0; \ *(u64*)(r2 + 0) = r0; \ - /* Now read from the address we just wrote. This shows\ - * that, after a variable-offset write, a priviledged\ - * program can read the slots that were in the range of\ - * that write (even if the verifier doesn't actually know\ - * if the slot being read was really written to or not.\ - */ \ + /* Now read from the address we just wrote. */ \ r3 = *(u64*)(r2 + 0); \ r0 = 0; \ exit; \ @@ -282,9 +321,10 @@ __naked void access_min_out_of_bound(void) : __clobber_all); } -SEC("lwt_in") +SEC("cgroup/skb") __description("indirect variable-offset stack access, min_off < min_initialized") -__failure __msg("invalid indirect read from stack R2 var_off") +__success +__failure_unpriv __msg_unpriv("R2 variable stack access prohibited for !root") __naked void access_min_off_min_initialized(void) { asm volatile (" \ diff --git a/tools/testing/selftests/bpf/verifier/atomic_cmpxchg.c b/tools/testing/selftests/bpf/verifier/atomic_cmpxchg.c index 319337bdcfc8..9a7b1106fda8 100644 --- a/tools/testing/selftests/bpf/verifier/atomic_cmpxchg.c +++ b/tools/testing/selftests/bpf/verifier/atomic_cmpxchg.c @@ -83,17 +83,6 @@ .result = REJECT, .errstr = "!read_ok", }, -{ - "Can't use cmpxchg on uninit memory", - .insns = { - BPF_MOV64_IMM(BPF_REG_0, 3), - BPF_MOV64_IMM(BPF_REG_2, 4), - BPF_ATOMIC_OP(BPF_DW, BPF_CMPXCHG, BPF_REG_10, BPF_REG_2, -8), - BPF_EXIT_INSN(), - }, - .result = REJECT, - .errstr = "invalid read from stack", -}, { "BPF_W cmpxchg should zero top 32 bits", .insns = { diff --git a/tools/testing/selftests/bpf/verifier/calls.c b/tools/testing/selftests/bpf/verifier/calls.c index 3d5cd51071f0..ab25a81fd3a1 100644 --- a/tools/testing/selftests/bpf/verifier/calls.c +++ b/tools/testing/selftests/bpf/verifier/calls.c @@ -1505,7 +1505,9 @@ .prog_type = BPF_PROG_TYPE_XDP, .fixup_map_hash_8b = { 23 }, .result = REJECT, - .errstr = "invalid read from stack R7 off=-16 size=8", + .errstr = "R0 invalid mem access 'scalar'", + .result_unpriv = REJECT, + .errstr_unpriv = "invalid read from stack R7 off=-16 size=8", }, { "calls: two calls that receive map_value via arg=ptr_stack_of_caller. test1", -- cgit v1.2.3 From 1720c42b90c8f14ffcb2f2f39a1abafc82a5b22e Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Fri, 8 Dec 2023 15:30:28 -0800 Subject: selftests/bpf: fix timer/test_bad_ret subtest on test_progs-cpuv4 flavor Because test_bad_ret main program is not written in assembly, we don't control instruction indices in timer_cb_ret_bad() subprog. This bites us in timer/test_bad_ret subtest, where we see difference between cpuv4 and other flavors. For now, make __msg() expectations not rely on instruction indices by anchoring them around bpf_get_prandom_u32 call. Once we have regex/glob support for __msg(), this can be expressed a bit more nicely, but for now just mitigating the problem with available means. Fixes: e02dea158dda ("selftests/bpf: validate async callback return value check correctness") Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20231208233028.3412690-1-andrii@kernel.org Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/progs/timer_failure.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/progs/timer_failure.c b/tools/testing/selftests/bpf/progs/timer_failure.c index 9fbc69c77bbb..0996c2486f05 100644 --- a/tools/testing/selftests/bpf/progs/timer_failure.c +++ b/tools/testing/selftests/bpf/progs/timer_failure.c @@ -47,9 +47,10 @@ __log_level(2) __flag(BPF_F_TEST_STATE_FREQ) __failure /* check that fallthrough code path marks r0 as precise */ -__msg("mark_precise: frame0: regs=r0 stack= before 22: (b7) r0 = 0") +__msg("mark_precise: frame0: regs=r0 stack= before") +__msg(": (85) call bpf_get_prandom_u32#7") /* anchor message */ /* check that branch code path marks r0 as precise */ -__msg("mark_precise: frame0: regs=r0 stack= before 24: (85) call bpf_get_prandom_u32#7") +__msg("mark_precise: frame0: regs=r0 stack= before ") __msg(": (85) call bpf_get_prandom_u32#7") __msg("should have been in [0, 0]") long BPF_PROG2(test_bad_ret, int, a) { -- cgit v1.2.3 From f4199271dae12ae407fa739e7012914ea6b3f37b Mon Sep 17 00:00:00 2001 From: Yafang Shao Date: Wed, 6 Dec 2023 11:53:25 +0000 Subject: selftests/bpf: Add a new cgroup helper open_classid() This new helper allows us to obtain the fd of a net_cls cgroup, which will be utilized in the subsequent patch. Signed-off-by: Yafang Shao Acked-by: Tejun Heo Acked-by: Yonghong Song Link: https://lore.kernel.org/r/20231206115326.4295-3-laoar.shao@gmail.com Signed-off-by: Martin KaFai Lau --- tools/testing/selftests/bpf/cgroup_helpers.c | 16 ++++++++++++++++ tools/testing/selftests/bpf/cgroup_helpers.h | 1 + 2 files changed, 17 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/cgroup_helpers.c b/tools/testing/selftests/bpf/cgroup_helpers.c index 5aa133bf3688..19be9c63d5e8 100644 --- a/tools/testing/selftests/bpf/cgroup_helpers.c +++ b/tools/testing/selftests/bpf/cgroup_helpers.c @@ -689,3 +689,19 @@ int get_cgroup1_hierarchy_id(const char *subsys_name) fclose(file); return found ? id : -1; } + +/** + * open_classid() - Open a cgroupv1 net_cls classid + * + * This function expects the cgroup work dir to be already created, as we + * open it here. + * + * On success, it returns the file descriptor. On failure it returns -1. + */ +int open_classid(void) +{ + char cgroup_workdir[PATH_MAX + 1]; + + format_classid_path(cgroup_workdir); + return open(cgroup_workdir, O_RDONLY); +} diff --git a/tools/testing/selftests/bpf/cgroup_helpers.h b/tools/testing/selftests/bpf/cgroup_helpers.h index ee053641c026..502845160d88 100644 --- a/tools/testing/selftests/bpf/cgroup_helpers.h +++ b/tools/testing/selftests/bpf/cgroup_helpers.h @@ -33,6 +33,7 @@ void cleanup_cgroup_environment(void); int set_classid(void); int join_classid(void); unsigned long long get_classid_cgroup_id(void); +int open_classid(void); int setup_classid_environment(void); void cleanup_classid_environment(void); -- cgit v1.2.3 From a2c6380b17b6339bfedc98d253b6d85e7014953b Mon Sep 17 00:00:00 2001 From: Yafang Shao Date: Wed, 6 Dec 2023 11:53:26 +0000 Subject: selftests/bpf: Add selftests for cgroup1 local storage Expanding the test coverage from cgroup2 to include cgroup1. The result as follows, Already existing test cases for cgroup2: #48/1 cgrp_local_storage/tp_btf:OK #48/2 cgrp_local_storage/attach_cgroup:OK #48/3 cgrp_local_storage/recursion:OK #48/4 cgrp_local_storage/negative:OK #48/5 cgrp_local_storage/cgroup_iter_sleepable:OK #48/6 cgrp_local_storage/yes_rcu_lock:OK #48/7 cgrp_local_storage/no_rcu_lock:OK Expanded test cases for cgroup1: #48/8 cgrp_local_storage/cgrp1_tp_btf:OK #48/9 cgrp_local_storage/cgrp1_recursion:OK #48/10 cgrp_local_storage/cgrp1_negative:OK #48/11 cgrp_local_storage/cgrp1_iter_sleepable:OK #48/12 cgrp_local_storage/cgrp1_yes_rcu_lock:OK #48/13 cgrp_local_storage/cgrp1_no_rcu_lock:OK Summary: #48 cgrp_local_storage:OK Summary: 1/13 PASSED, 0 SKIPPED, 0 FAILED Signed-off-by: Yafang Shao Acked-by: Tejun Heo Acked-by: Yonghong Song Link: https://lore.kernel.org/r/20231206115326.4295-4-laoar.shao@gmail.com Signed-off-by: Martin KaFai Lau --- .../selftests/bpf/prog_tests/cgrp_local_storage.c | 98 +++++++++++++++++++++- .../selftests/bpf/progs/cgrp_ls_recursion.c | 84 +++++++++++++++---- .../selftests/bpf/progs/cgrp_ls_sleepable.c | 61 ++++++++++++-- tools/testing/selftests/bpf/progs/cgrp_ls_tp_btf.c | 82 +++++++++++++----- 4 files changed, 278 insertions(+), 47 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/cgrp_local_storage.c b/tools/testing/selftests/bpf/prog_tests/cgrp_local_storage.c index 63e776f4176e..747761572098 100644 --- a/tools/testing/selftests/bpf/prog_tests/cgrp_local_storage.c +++ b/tools/testing/selftests/bpf/prog_tests/cgrp_local_storage.c @@ -19,6 +19,21 @@ struct socket_cookie { __u64 cookie_value; }; +static bool is_cgroup1; +static int target_hid; + +#define CGROUP_MODE_SET(skel) \ +{ \ + skel->bss->is_cgroup1 = is_cgroup1; \ + skel->bss->target_hid = target_hid; \ +} + +static void cgroup_mode_value_init(bool cgroup, int hid) +{ + is_cgroup1 = cgroup; + target_hid = hid; +} + static void test_tp_btf(int cgroup_fd) { struct cgrp_ls_tp_btf *skel; @@ -29,6 +44,8 @@ static void test_tp_btf(int cgroup_fd) if (!ASSERT_OK_PTR(skel, "skel_open_and_load")) return; + CGROUP_MODE_SET(skel); + /* populate a value in map_b */ err = bpf_map_update_elem(bpf_map__fd(skel->maps.map_b), &cgroup_fd, &val1, BPF_ANY); if (!ASSERT_OK(err, "map_update_elem")) @@ -130,6 +147,8 @@ static void test_recursion(int cgroup_fd) if (!ASSERT_OK_PTR(skel, "skel_open_and_load")) return; + CGROUP_MODE_SET(skel); + err = cgrp_ls_recursion__attach(skel); if (!ASSERT_OK(err, "skel_attach")) goto out; @@ -165,6 +184,8 @@ static void test_cgroup_iter_sleepable(int cgroup_fd, __u64 cgroup_id) if (!ASSERT_OK_PTR(skel, "skel_open")) return; + CGROUP_MODE_SET(skel); + bpf_program__set_autoload(skel->progs.cgroup_iter, true); err = cgrp_ls_sleepable__load(skel); if (!ASSERT_OK(err, "skel_load")) @@ -202,6 +223,7 @@ static void test_yes_rcu_lock(__u64 cgroup_id) if (!ASSERT_OK_PTR(skel, "skel_open")) return; + CGROUP_MODE_SET(skel); skel->bss->target_pid = syscall(SYS_gettid); bpf_program__set_autoload(skel->progs.yes_rcu_lock, true); @@ -229,6 +251,8 @@ static void test_no_rcu_lock(void) if (!ASSERT_OK_PTR(skel, "skel_open")) return; + CGROUP_MODE_SET(skel); + bpf_program__set_autoload(skel->progs.no_rcu_lock, true); err = cgrp_ls_sleepable__load(skel); ASSERT_ERR(err, "skel_load"); @@ -236,7 +260,25 @@ static void test_no_rcu_lock(void) cgrp_ls_sleepable__destroy(skel); } -void test_cgrp_local_storage(void) +static void test_cgrp1_no_rcu_lock(void) +{ + struct cgrp_ls_sleepable *skel; + int err; + + skel = cgrp_ls_sleepable__open(); + if (!ASSERT_OK_PTR(skel, "skel_open")) + return; + + CGROUP_MODE_SET(skel); + + bpf_program__set_autoload(skel->progs.cgrp1_no_rcu_lock, true); + err = cgrp_ls_sleepable__load(skel); + ASSERT_OK(err, "skel_load"); + + cgrp_ls_sleepable__destroy(skel); +} + +static void cgrp2_local_storage(void) { __u64 cgroup_id; int cgroup_fd; @@ -245,6 +287,8 @@ void test_cgrp_local_storage(void) if (!ASSERT_GE(cgroup_fd, 0, "join_cgroup /cgrp_local_storage")) return; + cgroup_mode_value_init(0, -1); + cgroup_id = get_cgroup_id("/cgrp_local_storage"); if (test__start_subtest("tp_btf")) test_tp_btf(cgroup_fd); @@ -263,3 +307,55 @@ void test_cgrp_local_storage(void) close(cgroup_fd); } + +static void cgrp1_local_storage(void) +{ + int cgrp1_fd, cgrp1_hid, cgrp1_id, err; + + /* Setup cgroup1 hierarchy */ + err = setup_classid_environment(); + if (!ASSERT_OK(err, "setup_classid_environment")) + return; + + err = join_classid(); + if (!ASSERT_OK(err, "join_cgroup1")) + goto cleanup; + + cgrp1_fd = open_classid(); + if (!ASSERT_GE(cgrp1_fd, 0, "cgroup1 fd")) + goto cleanup; + + cgrp1_id = get_classid_cgroup_id(); + if (!ASSERT_GE(cgrp1_id, 0, "cgroup1 id")) + goto close_fd; + + cgrp1_hid = get_cgroup1_hierarchy_id("net_cls"); + if (!ASSERT_GE(cgrp1_hid, 0, "cgroup1 hid")) + goto close_fd; + + cgroup_mode_value_init(1, cgrp1_hid); + + if (test__start_subtest("cgrp1_tp_btf")) + test_tp_btf(cgrp1_fd); + if (test__start_subtest("cgrp1_recursion")) + test_recursion(cgrp1_fd); + if (test__start_subtest("cgrp1_negative")) + test_negative(); + if (test__start_subtest("cgrp1_iter_sleepable")) + test_cgroup_iter_sleepable(cgrp1_fd, cgrp1_id); + if (test__start_subtest("cgrp1_yes_rcu_lock")) + test_yes_rcu_lock(cgrp1_id); + if (test__start_subtest("cgrp1_no_rcu_lock")) + test_cgrp1_no_rcu_lock(); + +close_fd: + close(cgrp1_fd); +cleanup: + cleanup_classid_environment(); +} + +void test_cgrp_local_storage(void) +{ + cgrp2_local_storage(); + cgrp1_local_storage(); +} diff --git a/tools/testing/selftests/bpf/progs/cgrp_ls_recursion.c b/tools/testing/selftests/bpf/progs/cgrp_ls_recursion.c index a043d8fefdac..610c2427fd93 100644 --- a/tools/testing/selftests/bpf/progs/cgrp_ls_recursion.c +++ b/tools/testing/selftests/bpf/progs/cgrp_ls_recursion.c @@ -21,50 +21,100 @@ struct { __type(value, long); } map_b SEC(".maps"); +int target_hid = 0; +bool is_cgroup1 = 0; + +struct cgroup *bpf_task_get_cgroup1(struct task_struct *task, int hierarchy_id) __ksym; +void bpf_cgroup_release(struct cgroup *cgrp) __ksym; + +static void __on_lookup(struct cgroup *cgrp) +{ + bpf_cgrp_storage_delete(&map_a, cgrp); + bpf_cgrp_storage_delete(&map_b, cgrp); +} + SEC("fentry/bpf_local_storage_lookup") int BPF_PROG(on_lookup) { struct task_struct *task = bpf_get_current_task_btf(); + struct cgroup *cgrp; + + if (is_cgroup1) { + cgrp = bpf_task_get_cgroup1(task, target_hid); + if (!cgrp) + return 0; - bpf_cgrp_storage_delete(&map_a, task->cgroups->dfl_cgrp); - bpf_cgrp_storage_delete(&map_b, task->cgroups->dfl_cgrp); + __on_lookup(cgrp); + bpf_cgroup_release(cgrp); + return 0; + } + + __on_lookup(task->cgroups->dfl_cgrp); return 0; } -SEC("fentry/bpf_local_storage_update") -int BPF_PROG(on_update) +static void __on_update(struct cgroup *cgrp) { - struct task_struct *task = bpf_get_current_task_btf(); long *ptr; - ptr = bpf_cgrp_storage_get(&map_a, task->cgroups->dfl_cgrp, 0, - BPF_LOCAL_STORAGE_GET_F_CREATE); + ptr = bpf_cgrp_storage_get(&map_a, cgrp, 0, BPF_LOCAL_STORAGE_GET_F_CREATE); if (ptr) *ptr += 1; - ptr = bpf_cgrp_storage_get(&map_b, task->cgroups->dfl_cgrp, 0, - BPF_LOCAL_STORAGE_GET_F_CREATE); + ptr = bpf_cgrp_storage_get(&map_b, cgrp, 0, BPF_LOCAL_STORAGE_GET_F_CREATE); if (ptr) *ptr += 1; +} +SEC("fentry/bpf_local_storage_update") +int BPF_PROG(on_update) +{ + struct task_struct *task = bpf_get_current_task_btf(); + struct cgroup *cgrp; + + if (is_cgroup1) { + cgrp = bpf_task_get_cgroup1(task, target_hid); + if (!cgrp) + return 0; + + __on_update(cgrp); + bpf_cgroup_release(cgrp); + return 0; + } + + __on_update(task->cgroups->dfl_cgrp); return 0; } -SEC("tp_btf/sys_enter") -int BPF_PROG(on_enter, struct pt_regs *regs, long id) +static void __on_enter(struct pt_regs *regs, long id, struct cgroup *cgrp) { - struct task_struct *task; long *ptr; - task = bpf_get_current_task_btf(); - ptr = bpf_cgrp_storage_get(&map_a, task->cgroups->dfl_cgrp, 0, - BPF_LOCAL_STORAGE_GET_F_CREATE); + ptr = bpf_cgrp_storage_get(&map_a, cgrp, 0, BPF_LOCAL_STORAGE_GET_F_CREATE); if (ptr) *ptr = 200; - ptr = bpf_cgrp_storage_get(&map_b, task->cgroups->dfl_cgrp, 0, - BPF_LOCAL_STORAGE_GET_F_CREATE); + ptr = bpf_cgrp_storage_get(&map_b, cgrp, 0, BPF_LOCAL_STORAGE_GET_F_CREATE); if (ptr) *ptr = 100; +} + +SEC("tp_btf/sys_enter") +int BPF_PROG(on_enter, struct pt_regs *regs, long id) +{ + struct task_struct *task = bpf_get_current_task_btf(); + struct cgroup *cgrp; + + if (is_cgroup1) { + cgrp = bpf_task_get_cgroup1(task, target_hid); + if (!cgrp) + return 0; + + __on_enter(regs, id, cgrp); + bpf_cgroup_release(cgrp); + return 0; + } + + __on_enter(regs, id, task->cgroups->dfl_cgrp); return 0; } diff --git a/tools/testing/selftests/bpf/progs/cgrp_ls_sleepable.c b/tools/testing/selftests/bpf/progs/cgrp_ls_sleepable.c index 4c7844e1dbfa..facedd8b8250 100644 --- a/tools/testing/selftests/bpf/progs/cgrp_ls_sleepable.c +++ b/tools/testing/selftests/bpf/progs/cgrp_ls_sleepable.c @@ -17,7 +17,11 @@ struct { __u32 target_pid; __u64 cgroup_id; +int target_hid; +bool is_cgroup1; +struct cgroup *bpf_task_get_cgroup1(struct task_struct *task, int hierarchy_id) __ksym; +void bpf_cgroup_release(struct cgroup *cgrp) __ksym; void bpf_rcu_read_lock(void) __ksym; void bpf_rcu_read_unlock(void) __ksym; @@ -37,23 +41,50 @@ int cgroup_iter(struct bpf_iter__cgroup *ctx) return 0; } +static void __no_rcu_lock(struct cgroup *cgrp) +{ + long *ptr; + + /* Note that trace rcu is held in sleepable prog, so we can use + * bpf_cgrp_storage_get() in sleepable prog. + */ + ptr = bpf_cgrp_storage_get(&map_a, cgrp, 0, + BPF_LOCAL_STORAGE_GET_F_CREATE); + if (ptr) + cgroup_id = cgrp->kn->id; +} + SEC("?fentry.s/" SYS_PREFIX "sys_getpgid") -int no_rcu_lock(void *ctx) +int cgrp1_no_rcu_lock(void *ctx) { struct task_struct *task; struct cgroup *cgrp; - long *ptr; + + task = bpf_get_current_task_btf(); + if (task->pid != target_pid) + return 0; + + /* bpf_task_get_cgroup1 can work in sleepable prog */ + cgrp = bpf_task_get_cgroup1(task, target_hid); + if (!cgrp) + return 0; + + __no_rcu_lock(cgrp); + bpf_cgroup_release(cgrp); + return 0; +} + +SEC("?fentry.s/" SYS_PREFIX "sys_getpgid") +int no_rcu_lock(void *ctx) +{ + struct task_struct *task; task = bpf_get_current_task_btf(); if (task->pid != target_pid) return 0; /* task->cgroups is untrusted in sleepable prog outside of RCU CS */ - cgrp = task->cgroups->dfl_cgrp; - ptr = bpf_cgrp_storage_get(&map_a, cgrp, 0, - BPF_LOCAL_STORAGE_GET_F_CREATE); - if (ptr) - cgroup_id = cgrp->kn->id; + __no_rcu_lock(task->cgroups->dfl_cgrp); return 0; } @@ -68,6 +99,22 @@ int yes_rcu_lock(void *ctx) if (task->pid != target_pid) return 0; + if (is_cgroup1) { + bpf_rcu_read_lock(); + cgrp = bpf_task_get_cgroup1(task, target_hid); + if (!cgrp) { + bpf_rcu_read_unlock(); + return 0; + } + + ptr = bpf_cgrp_storage_get(&map_a, cgrp, 0, BPF_LOCAL_STORAGE_GET_F_CREATE); + if (ptr) + cgroup_id = cgrp->kn->id; + bpf_cgroup_release(cgrp); + bpf_rcu_read_unlock(); + return 0; + } + bpf_rcu_read_lock(); cgrp = task->cgroups->dfl_cgrp; /* cgrp is trusted under RCU CS */ diff --git a/tools/testing/selftests/bpf/progs/cgrp_ls_tp_btf.c b/tools/testing/selftests/bpf/progs/cgrp_ls_tp_btf.c index 9ebb8e2fe541..1c348f000f38 100644 --- a/tools/testing/selftests/bpf/progs/cgrp_ls_tp_btf.c +++ b/tools/testing/selftests/bpf/progs/cgrp_ls_tp_btf.c @@ -27,62 +27,100 @@ pid_t target_pid = 0; int mismatch_cnt = 0; int enter_cnt = 0; int exit_cnt = 0; +int target_hid = 0; +bool is_cgroup1 = 0; -SEC("tp_btf/sys_enter") -int BPF_PROG(on_enter, struct pt_regs *regs, long id) +struct cgroup *bpf_task_get_cgroup1(struct task_struct *task, int hierarchy_id) __ksym; +void bpf_cgroup_release(struct cgroup *cgrp) __ksym; + +static void __on_enter(struct pt_regs *regs, long id, struct cgroup *cgrp) { - struct task_struct *task; long *ptr; int err; - task = bpf_get_current_task_btf(); - if (task->pid != target_pid) - return 0; - /* populate value 0 */ - ptr = bpf_cgrp_storage_get(&map_a, task->cgroups->dfl_cgrp, 0, + ptr = bpf_cgrp_storage_get(&map_a, cgrp, 0, BPF_LOCAL_STORAGE_GET_F_CREATE); if (!ptr) - return 0; + return; /* delete value 0 */ - err = bpf_cgrp_storage_delete(&map_a, task->cgroups->dfl_cgrp); + err = bpf_cgrp_storage_delete(&map_a, cgrp); if (err) - return 0; + return; /* value is not available */ - ptr = bpf_cgrp_storage_get(&map_a, task->cgroups->dfl_cgrp, 0, 0); + ptr = bpf_cgrp_storage_get(&map_a, cgrp, 0, 0); if (ptr) - return 0; + return; /* re-populate the value */ - ptr = bpf_cgrp_storage_get(&map_a, task->cgroups->dfl_cgrp, 0, + ptr = bpf_cgrp_storage_get(&map_a, cgrp, 0, BPF_LOCAL_STORAGE_GET_F_CREATE); if (!ptr) - return 0; + return; __sync_fetch_and_add(&enter_cnt, 1); *ptr = MAGIC_VALUE + enter_cnt; - - return 0; } -SEC("tp_btf/sys_exit") -int BPF_PROG(on_exit, struct pt_regs *regs, long id) +SEC("tp_btf/sys_enter") +int BPF_PROG(on_enter, struct pt_regs *regs, long id) { struct task_struct *task; - long *ptr; + struct cgroup *cgrp; task = bpf_get_current_task_btf(); if (task->pid != target_pid) return 0; - ptr = bpf_cgrp_storage_get(&map_a, task->cgroups->dfl_cgrp, 0, + if (is_cgroup1) { + cgrp = bpf_task_get_cgroup1(task, target_hid); + if (!cgrp) + return 0; + + __on_enter(regs, id, cgrp); + bpf_cgroup_release(cgrp); + return 0; + } + + __on_enter(regs, id, task->cgroups->dfl_cgrp); + return 0; +} + +static void __on_exit(struct pt_regs *regs, long id, struct cgroup *cgrp) +{ + long *ptr; + + ptr = bpf_cgrp_storage_get(&map_a, cgrp, 0, BPF_LOCAL_STORAGE_GET_F_CREATE); if (!ptr) - return 0; + return; __sync_fetch_and_add(&exit_cnt, 1); if (*ptr != MAGIC_VALUE + exit_cnt) __sync_fetch_and_add(&mismatch_cnt, 1); +} + +SEC("tp_btf/sys_exit") +int BPF_PROG(on_exit, struct pt_regs *regs, long id) +{ + struct task_struct *task; + struct cgroup *cgrp; + + task = bpf_get_current_task_btf(); + if (task->pid != target_pid) + return 0; + + if (is_cgroup1) { + cgrp = bpf_task_get_cgroup1(task, target_hid); + if (!cgrp) + return 0; + + __on_exit(regs, id, cgrp); + bpf_cgroup_release(cgrp); + return 0; + } + + __on_exit(regs, id, task->cgroups->dfl_cgrp); return 0; } -- cgit v1.2.3 From 32fa058398624166dd04ff4af49cfef69c94abbc Mon Sep 17 00:00:00 2001 From: Sergei Trofimovich Date: Fri, 8 Dec 2023 21:51:00 +0000 Subject: libbpf: Add pr_warn() for EINVAL cases in linker_sanity_check_elf Before the change on `i686-linux` `systemd` build failed as: $ bpftool gen object src/core/bpf/socket_bind/socket-bind.bpf.o src/core/bpf/socket_bind/socket-bind.bpf.unstripped.o Error: failed to link 'src/core/bpf/socket_bind/socket-bind.bpf.unstripped.o': Invalid argument (22) After the change it fails as: $ bpftool gen object src/core/bpf/socket_bind/socket-bind.bpf.o src/core/bpf/socket_bind/socket-bind.bpf.unstripped.o libbpf: ELF section #9 has inconsistent alignment addr=8 != d=4 in src/core/bpf/socket_bind/socket-bind.bpf.unstripped.o Error: failed to link 'src/core/bpf/socket_bind/socket-bind.bpf.unstripped.o': Invalid argument (22) Now it's slightly easier to figure out what is wrong with an ELF file. Signed-off-by: Sergei Trofimovich Signed-off-by: Andrii Nakryiko Acked-by: Eduard Zingerman Link: https://lore.kernel.org/bpf/20231208215100.435876-1-slyich@gmail.com --- tools/lib/bpf/linker.c | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/lib/bpf/linker.c b/tools/lib/bpf/linker.c index 5ced96d99f8c..52a2901e8bd0 100644 --- a/tools/lib/bpf/linker.c +++ b/tools/lib/bpf/linker.c @@ -719,13 +719,25 @@ static int linker_sanity_check_elf(struct src_obj *obj) return -EINVAL; } - if (sec->shdr->sh_addralign && !is_pow_of_2(sec->shdr->sh_addralign)) + if (sec->shdr->sh_addralign && !is_pow_of_2(sec->shdr->sh_addralign)) { + pr_warn("ELF section #%zu alignment %llu is non pow-of-2 alignment in %s\n", + sec->sec_idx, (long long unsigned)sec->shdr->sh_addralign, + obj->filename); return -EINVAL; - if (sec->shdr->sh_addralign != sec->data->d_align) + } + if (sec->shdr->sh_addralign != sec->data->d_align) { + pr_warn("ELF section #%zu has inconsistent alignment addr=%llu != d=%llu in %s\n", + sec->sec_idx, (long long unsigned)sec->shdr->sh_addralign, + (long long unsigned)sec->data->d_align, obj->filename); return -EINVAL; + } - if (sec->shdr->sh_size != sec->data->d_size) + if (sec->shdr->sh_size != sec->data->d_size) { + pr_warn("ELF section #%zu has inconsistent section size sh=%llu != d=%llu in %s\n", + sec->sec_idx, (long long unsigned)sec->shdr->sh_size, + (long long unsigned)sec->data->d_size, obj->filename); return -EINVAL; + } switch (sec->shdr->sh_type) { case SHT_SYMTAB: @@ -737,8 +749,12 @@ static int linker_sanity_check_elf(struct src_obj *obj) break; case SHT_PROGBITS: if (sec->shdr->sh_flags & SHF_EXECINSTR) { - if (sec->shdr->sh_size % sizeof(struct bpf_insn) != 0) + if (sec->shdr->sh_size % sizeof(struct bpf_insn) != 0) { + pr_warn("ELF section #%zu has unexpected size alignment %llu in %s\n", + sec->sec_idx, (long long unsigned)sec->shdr->sh_size, + obj->filename); return -EINVAL; + } } break; case SHT_NOBITS: -- cgit v1.2.3 From 7d8ed51bcb32716a40d71043fcd01c4118858c51 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Fri, 8 Dec 2023 17:09:58 -0800 Subject: selftests/bpf: validate fake register spill/fill precision backtracking logic Add two tests validating that verifier's precision backtracking logic handles BPF_ST_MEM instructions that produce fake register spill into register slot. This is happening when non-zero constant is written directly to a slot, e.g., *(u64 *)(r10 -8) = 123. Add both full 64-bit register spill, as well as 32-bit "sub-spill". Signed-off-by: Andrii Nakryiko Acked-by: Eduard Zingerman Link: https://lore.kernel.org/r/20231209010958.66758-2-andrii@kernel.org Signed-off-by: Alexei Starovoitov --- .../selftests/bpf/progs/verifier_spill_fill.c | 154 +++++++++++++++++++++ 1 file changed, 154 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/progs/verifier_spill_fill.c b/tools/testing/selftests/bpf/progs/verifier_spill_fill.c index df4920da3472..508f5d6c7347 100644 --- a/tools/testing/selftests/bpf/progs/verifier_spill_fill.c +++ b/tools/testing/selftests/bpf/progs/verifier_spill_fill.c @@ -577,4 +577,158 @@ __naked void partial_stack_load_preserves_zeros(void) : __clobber_common); } +char two_byte_buf[2] SEC(".data.two_byte_buf"); + +SEC("raw_tp") +__log_level(2) __flag(BPF_F_TEST_STATE_FREQ) +__success +/* make sure fp-8 is IMPRECISE fake register spill */ +__msg("3: (7a) *(u64 *)(r10 -8) = 1 ; R10=fp0 fp-8_w=1") +/* and fp-16 is spilled IMPRECISE const reg */ +__msg("5: (7b) *(u64 *)(r10 -16) = r0 ; R0_w=1 R10=fp0 fp-16_w=1") +/* validate load from fp-8, which was initialized using BPF_ST_MEM */ +__msg("8: (79) r2 = *(u64 *)(r10 -8) ; R2_w=1 R10=fp0 fp-8=1") +__msg("9: (0f) r1 += r2") +__msg("mark_precise: frame0: last_idx 9 first_idx 7 subseq_idx -1") +__msg("mark_precise: frame0: regs=r2 stack= before 8: (79) r2 = *(u64 *)(r10 -8)") +__msg("mark_precise: frame0: regs= stack=-8 before 7: (bf) r1 = r6") +/* note, fp-8 is precise, fp-16 is not yet precise, we'll get there */ +__msg("mark_precise: frame0: parent state regs= stack=-8: R0_w=1 R1=ctx() R6_r=map_value(map=.data.two_byte_,ks=4,vs=2) R10=fp0 fp-8_rw=P1 fp-16_w=1") +__msg("mark_precise: frame0: last_idx 6 first_idx 3 subseq_idx 7") +__msg("mark_precise: frame0: regs= stack=-8 before 6: (05) goto pc+0") +__msg("mark_precise: frame0: regs= stack=-8 before 5: (7b) *(u64 *)(r10 -16) = r0") +__msg("mark_precise: frame0: regs= stack=-8 before 4: (b7) r0 = 1") +__msg("mark_precise: frame0: regs= stack=-8 before 3: (7a) *(u64 *)(r10 -8) = 1") +__msg("10: R1_w=map_value(map=.data.two_byte_,ks=4,vs=2,off=1) R2_w=1") +/* validate load from fp-16, which was initialized using BPF_STX_MEM */ +__msg("12: (79) r2 = *(u64 *)(r10 -16) ; R2_w=1 R10=fp0 fp-16=1") +__msg("13: (0f) r1 += r2") +__msg("mark_precise: frame0: last_idx 13 first_idx 7 subseq_idx -1") +__msg("mark_precise: frame0: regs=r2 stack= before 12: (79) r2 = *(u64 *)(r10 -16)") +__msg("mark_precise: frame0: regs= stack=-16 before 11: (bf) r1 = r6") +__msg("mark_precise: frame0: regs= stack=-16 before 10: (73) *(u8 *)(r1 +0) = r2") +__msg("mark_precise: frame0: regs= stack=-16 before 9: (0f) r1 += r2") +__msg("mark_precise: frame0: regs= stack=-16 before 8: (79) r2 = *(u64 *)(r10 -8)") +__msg("mark_precise: frame0: regs= stack=-16 before 7: (bf) r1 = r6") +/* now both fp-8 and fp-16 are precise, very good */ +__msg("mark_precise: frame0: parent state regs= stack=-16: R0_w=1 R1=ctx() R6_r=map_value(map=.data.two_byte_,ks=4,vs=2) R10=fp0 fp-8_rw=P1 fp-16_rw=P1") +__msg("mark_precise: frame0: last_idx 6 first_idx 3 subseq_idx 7") +__msg("mark_precise: frame0: regs= stack=-16 before 6: (05) goto pc+0") +__msg("mark_precise: frame0: regs= stack=-16 before 5: (7b) *(u64 *)(r10 -16) = r0") +__msg("mark_precise: frame0: regs=r0 stack= before 4: (b7) r0 = 1") +__msg("14: R1_w=map_value(map=.data.two_byte_,ks=4,vs=2,off=1) R2_w=1") +__naked void stack_load_preserves_const_precision(void) +{ + asm volatile ( + /* establish checkpoint with state that has no stack slots; + * if we bubble up to this state without finding desired stack + * slot, then it's a bug and should be caught + */ + "goto +0;" + + /* fp-8 is const 1 *fake* register */ + ".8byte %[fp8_st_one];" /* LLVM-18+: *(u64 *)(r10 -8) = 1; */ + + /* fp-16 is const 1 register */ + "r0 = 1;" + "*(u64 *)(r10 -16) = r0;" + + /* force checkpoint to check precision marks preserved in parent states */ + "goto +0;" + + /* load single U64 from aligned FAKE_REG=1 slot */ + "r1 = %[two_byte_buf];" + "r2 = *(u64 *)(r10 -8);" + "r1 += r2;" + "*(u8 *)(r1 + 0) = r2;" /* this should be fine */ + + /* load single U64 from aligned REG=1 slot */ + "r1 = %[two_byte_buf];" + "r2 = *(u64 *)(r10 -16);" + "r1 += r2;" + "*(u8 *)(r1 + 0) = r2;" /* this should be fine */ + + "r0 = 0;" + "exit;" + : + : __imm_ptr(two_byte_buf), + __imm_insn(fp8_st_one, BPF_ST_MEM(BPF_DW, BPF_REG_FP, -8, 1)) + : __clobber_common); +} + +SEC("raw_tp") +__log_level(2) __flag(BPF_F_TEST_STATE_FREQ) +__success +/* make sure fp-8 is 32-bit FAKE subregister spill */ +__msg("3: (62) *(u32 *)(r10 -8) = 1 ; R10=fp0 fp-8=????1") +/* but fp-16 is spilled IMPRECISE zero const reg */ +__msg("5: (63) *(u32 *)(r10 -16) = r0 ; R0_w=1 R10=fp0 fp-16=????1") +/* validate load from fp-8, which was initialized using BPF_ST_MEM */ +__msg("8: (61) r2 = *(u32 *)(r10 -8) ; R2_w=1 R10=fp0 fp-8=????1") +__msg("9: (0f) r1 += r2") +__msg("mark_precise: frame0: last_idx 9 first_idx 7 subseq_idx -1") +__msg("mark_precise: frame0: regs=r2 stack= before 8: (61) r2 = *(u32 *)(r10 -8)") +__msg("mark_precise: frame0: regs= stack=-8 before 7: (bf) r1 = r6") +__msg("mark_precise: frame0: parent state regs= stack=-8: R0_w=1 R1=ctx() R6_r=map_value(map=.data.two_byte_,ks=4,vs=2) R10=fp0 fp-8_r=????P1 fp-16=????1") +__msg("mark_precise: frame0: last_idx 6 first_idx 3 subseq_idx 7") +__msg("mark_precise: frame0: regs= stack=-8 before 6: (05) goto pc+0") +__msg("mark_precise: frame0: regs= stack=-8 before 5: (63) *(u32 *)(r10 -16) = r0") +__msg("mark_precise: frame0: regs= stack=-8 before 4: (b7) r0 = 1") +__msg("mark_precise: frame0: regs= stack=-8 before 3: (62) *(u32 *)(r10 -8) = 1") +__msg("10: R1_w=map_value(map=.data.two_byte_,ks=4,vs=2,off=1) R2_w=1") +/* validate load from fp-16, which was initialized using BPF_STX_MEM */ +__msg("12: (61) r2 = *(u32 *)(r10 -16) ; R2_w=1 R10=fp0 fp-16=????1") +__msg("13: (0f) r1 += r2") +__msg("mark_precise: frame0: last_idx 13 first_idx 7 subseq_idx -1") +__msg("mark_precise: frame0: regs=r2 stack= before 12: (61) r2 = *(u32 *)(r10 -16)") +__msg("mark_precise: frame0: regs= stack=-16 before 11: (bf) r1 = r6") +__msg("mark_precise: frame0: regs= stack=-16 before 10: (73) *(u8 *)(r1 +0) = r2") +__msg("mark_precise: frame0: regs= stack=-16 before 9: (0f) r1 += r2") +__msg("mark_precise: frame0: regs= stack=-16 before 8: (61) r2 = *(u32 *)(r10 -8)") +__msg("mark_precise: frame0: regs= stack=-16 before 7: (bf) r1 = r6") +__msg("mark_precise: frame0: parent state regs= stack=-16: R0_w=1 R1=ctx() R6_r=map_value(map=.data.two_byte_,ks=4,vs=2) R10=fp0 fp-8_r=????P1 fp-16_r=????P1") +__msg("mark_precise: frame0: last_idx 6 first_idx 3 subseq_idx 7") +__msg("mark_precise: frame0: regs= stack=-16 before 6: (05) goto pc+0") +__msg("mark_precise: frame0: regs= stack=-16 before 5: (63) *(u32 *)(r10 -16) = r0") +__msg("mark_precise: frame0: regs=r0 stack= before 4: (b7) r0 = 1") +__msg("14: R1_w=map_value(map=.data.two_byte_,ks=4,vs=2,off=1) R2_w=1") +__naked void stack_load_preserves_const_precision_subreg(void) +{ + asm volatile ( + /* establish checkpoint with state that has no stack slots; + * if we bubble up to this state without finding desired stack + * slot, then it's a bug and should be caught + */ + "goto +0;" + + /* fp-8 is const 1 *fake* SUB-register */ + ".8byte %[fp8_st_one];" /* LLVM-18+: *(u32 *)(r10 -8) = 1; */ + + /* fp-16 is const 1 SUB-register */ + "r0 = 1;" + "*(u32 *)(r10 -16) = r0;" + + /* force checkpoint to check precision marks preserved in parent states */ + "goto +0;" + + /* load single U32 from aligned FAKE_REG=1 slot */ + "r1 = %[two_byte_buf];" + "r2 = *(u32 *)(r10 -8);" + "r1 += r2;" + "*(u8 *)(r1 + 0) = r2;" /* this should be fine */ + + /* load single U32 from aligned REG=1 slot */ + "r1 = %[two_byte_buf];" + "r2 = *(u32 *)(r10 -16);" + "r1 += r2;" + "*(u8 *)(r1 + 0) = r2;" /* this should be fine */ + + "r0 = 0;" + "exit;" + : + : __imm_ptr(two_byte_buf), + __imm_insn(fp8_st_one, BPF_ST_MEM(BPF_W, BPF_REG_FP, -8, 1)) /* 32-bit spill */ + : __clobber_common); +} + char _license[] SEC("license") = "GPL"; -- cgit v1.2.3 From 88f6047191e69bdd02cf1b9b5b514f7e514e8b86 Mon Sep 17 00:00:00 2001 From: David Vernet Date: Thu, 7 Dec 2023 15:08:43 -0600 Subject: selftests/bpf: Add test for bpf_cpumask_weight() kfunc The new bpf_cpumask_weight() kfunc can be used to count the number of bits that are set in a struct cpumask* kptr. Let's add a selftest to verify its behavior. Signed-off-by: David Vernet Acked-by: Yonghong Song Link: https://lore.kernel.org/r/20231207210843.168466-3-void@manifault.com Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/prog_tests/cpumask.c | 1 + tools/testing/selftests/bpf/progs/cpumask_common.h | 1 + .../testing/selftests/bpf/progs/cpumask_success.c | 43 ++++++++++++++++++++++ 3 files changed, 45 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/cpumask.c b/tools/testing/selftests/bpf/prog_tests/cpumask.c index 756ea8b590b6..c2e886399e3c 100644 --- a/tools/testing/selftests/bpf/prog_tests/cpumask.c +++ b/tools/testing/selftests/bpf/prog_tests/cpumask.c @@ -18,6 +18,7 @@ static const char * const cpumask_success_testcases[] = { "test_insert_leave", "test_insert_remove_release", "test_global_mask_rcu", + "test_cpumask_weight", }; static void verify_success(const char *prog_name) diff --git a/tools/testing/selftests/bpf/progs/cpumask_common.h b/tools/testing/selftests/bpf/progs/cpumask_common.h index b15c588ace15..0cd4aebb97cf 100644 --- a/tools/testing/selftests/bpf/progs/cpumask_common.h +++ b/tools/testing/selftests/bpf/progs/cpumask_common.h @@ -54,6 +54,7 @@ bool bpf_cpumask_full(const struct cpumask *cpumask) __ksym; void bpf_cpumask_copy(struct bpf_cpumask *dst, const struct cpumask *src) __ksym; u32 bpf_cpumask_any_distribute(const struct cpumask *src) __ksym; u32 bpf_cpumask_any_and_distribute(const struct cpumask *src1, const struct cpumask *src2) __ksym; +u32 bpf_cpumask_weight(const struct cpumask *cpumask) __ksym; void bpf_rcu_read_lock(void) __ksym; void bpf_rcu_read_unlock(void) __ksym; diff --git a/tools/testing/selftests/bpf/progs/cpumask_success.c b/tools/testing/selftests/bpf/progs/cpumask_success.c index 674a63424dee..fc3666edf456 100644 --- a/tools/testing/selftests/bpf/progs/cpumask_success.c +++ b/tools/testing/selftests/bpf/progs/cpumask_success.c @@ -460,6 +460,49 @@ int BPF_PROG(test_global_mask_rcu, struct task_struct *task, u64 clone_flags) return 0; } +SEC("tp_btf/task_newtask") +int BPF_PROG(test_cpumask_weight, struct task_struct *task, u64 clone_flags) +{ + struct bpf_cpumask *local; + + if (!is_test_task()) + return 0; + + local = create_cpumask(); + if (!local) + return 0; + + if (bpf_cpumask_weight(cast(local)) != 0) { + err = 3; + goto out; + } + + bpf_cpumask_set_cpu(0, local); + if (bpf_cpumask_weight(cast(local)) != 1) { + err = 4; + goto out; + } + + /* + * Make sure that adding additional CPUs changes the weight. Test to + * see whether the CPU was set to account for running on UP machines. + */ + bpf_cpumask_set_cpu(1, local); + if (bpf_cpumask_test_cpu(1, cast(local)) && bpf_cpumask_weight(cast(local)) != 2) { + err = 5; + goto out; + } + + bpf_cpumask_clear(local); + if (bpf_cpumask_weight(cast(local)) != 0) { + err = 6; + goto out; + } +out: + bpf_cpumask_release(local); + return 0; +} + SEC("tp_btf/task_newtask") __success int BPF_PROG(test_refcount_null_tracking, struct task_struct *task, u64 clone_flags) -- cgit v1.2.3 From b2472efe4316b2687c153919c1513a098bd82c17 Mon Sep 17 00:00:00 2001 From: Peng Zhang Date: Fri, 27 Oct 2023 11:38:37 +0800 Subject: maple_tree: introduce {mtree,mas}_lock_nested() In some cases, nested locks may be needed, so {mtree,mas}_lock_nested is introduced. For example, when duplicating maple tree, we need to hold the locks of two trees, in which case nested locks are needed. At the same time, add the definition of spin_lock_nested() in tools for testing. Link: https://lkml.kernel.org/r/20231027033845.90608-3-zhangpeng.00@bytedance.com Signed-off-by: Peng Zhang Reviewed-by: Liam R. Howlett Cc: Christian Brauner Cc: Jonathan Corbet Cc: Mateusz Guzik Cc: Mathieu Desnoyers Cc: Matthew Wilcox Cc: Michael S. Tsirkin Cc: Mike Christie Cc: Nicholas Piggin Cc: Peter Zijlstra Cc: Suren Baghdasaryan Signed-off-by: Andrew Morton --- include/linux/maple_tree.h | 4 ++++ tools/include/linux/spinlock.h | 1 + 2 files changed, 5 insertions(+) (limited to 'tools') diff --git a/include/linux/maple_tree.h b/include/linux/maple_tree.h index d01e850b570f..f91dbc7fe091 100644 --- a/include/linux/maple_tree.h +++ b/include/linux/maple_tree.h @@ -256,6 +256,8 @@ struct maple_tree { struct maple_tree name = MTREE_INIT(name, 0) #define mtree_lock(mt) spin_lock((&(mt)->ma_lock)) +#define mtree_lock_nested(mas, subclass) \ + spin_lock_nested((&(mt)->ma_lock), subclass) #define mtree_unlock(mt) spin_unlock((&(mt)->ma_lock)) /* @@ -406,6 +408,8 @@ struct ma_wr_state { }; #define mas_lock(mas) spin_lock(&((mas)->tree->ma_lock)) +#define mas_lock_nested(mas, subclass) \ + spin_lock_nested(&((mas)->tree->ma_lock), subclass) #define mas_unlock(mas) spin_unlock(&((mas)->tree->ma_lock)) diff --git a/tools/include/linux/spinlock.h b/tools/include/linux/spinlock.h index 622266b197d0..a6cdf25b6b9d 100644 --- a/tools/include/linux/spinlock.h +++ b/tools/include/linux/spinlock.h @@ -11,6 +11,7 @@ #define spin_lock_init(x) pthread_mutex_init(x, NULL) #define spin_lock(x) pthread_mutex_lock(x) +#define spin_lock_nested(x, subclass) pthread_mutex_lock(x) #define spin_unlock(x) pthread_mutex_unlock(x) #define spin_lock_bh(x) pthread_mutex_lock(x) #define spin_unlock_bh(x) pthread_mutex_unlock(x) -- cgit v1.2.3 From 46c99e26f2f86260fed226cab217d0b3ca8dca56 Mon Sep 17 00:00:00 2001 From: Peng Zhang Date: Fri, 27 Oct 2023 11:38:39 +0800 Subject: radix tree test suite: align kmem_cache_alloc_bulk() with kernel behavior. When kmem_cache_alloc_bulk() fails to allocate, leave the freed pointers in the array. This enables a more accurate simulation of the kernel's behavior and allows for testing potential double-free scenarios. Link: https://lkml.kernel.org/r/20231027033845.90608-5-zhangpeng.00@bytedance.com Signed-off-by: Peng Zhang Reviewed-by: Liam R. Howlett Cc: Christian Brauner Cc: Jonathan Corbet Cc: Mateusz Guzik Cc: Mathieu Desnoyers Cc: Matthew Wilcox Cc: Michael S. Tsirkin Cc: Mike Christie Cc: Nicholas Piggin Cc: Peter Zijlstra Cc: Suren Baghdasaryan Signed-off-by: Andrew Morton --- tools/testing/radix-tree/linux.c | 45 +++++++++++++++++++++++++++++----------- 1 file changed, 33 insertions(+), 12 deletions(-) (limited to 'tools') diff --git a/tools/testing/radix-tree/linux.c b/tools/testing/radix-tree/linux.c index 61fe2601cb3a..4eb442206d01 100644 --- a/tools/testing/radix-tree/linux.c +++ b/tools/testing/radix-tree/linux.c @@ -93,13 +93,9 @@ void *kmem_cache_alloc_lru(struct kmem_cache *cachep, struct list_lru *lru, return p; } -void kmem_cache_free_locked(struct kmem_cache *cachep, void *objp) +void __kmem_cache_free_locked(struct kmem_cache *cachep, void *objp) { assert(objp); - uatomic_dec(&nr_allocated); - uatomic_dec(&cachep->nr_allocated); - if (kmalloc_verbose) - printf("Freeing %p to slab\n", objp); if (cachep->nr_objs > 10 || cachep->align) { memset(objp, POISON_FREE, cachep->size); free(objp); @@ -111,6 +107,15 @@ void kmem_cache_free_locked(struct kmem_cache *cachep, void *objp) } } +void kmem_cache_free_locked(struct kmem_cache *cachep, void *objp) +{ + uatomic_dec(&nr_allocated); + uatomic_dec(&cachep->nr_allocated); + if (kmalloc_verbose) + printf("Freeing %p to slab\n", objp); + __kmem_cache_free_locked(cachep, objp); +} + void kmem_cache_free(struct kmem_cache *cachep, void *objp) { pthread_mutex_lock(&cachep->lock); @@ -141,18 +146,17 @@ int kmem_cache_alloc_bulk(struct kmem_cache *cachep, gfp_t gfp, size_t size, if (kmalloc_verbose) pr_debug("Bulk alloc %lu\n", size); - if (!(gfp & __GFP_DIRECT_RECLAIM)) { - if (cachep->non_kernel < size) - return 0; - - cachep->non_kernel -= size; - } - pthread_mutex_lock(&cachep->lock); if (cachep->nr_objs >= size) { struct radix_tree_node *node; for (i = 0; i < size; i++) { + if (!(gfp & __GFP_DIRECT_RECLAIM)) { + if (!cachep->non_kernel) + break; + cachep->non_kernel--; + } + node = cachep->objs; cachep->nr_objs--; cachep->objs = node->parent; @@ -163,11 +167,19 @@ int kmem_cache_alloc_bulk(struct kmem_cache *cachep, gfp_t gfp, size_t size, } else { pthread_mutex_unlock(&cachep->lock); for (i = 0; i < size; i++) { + if (!(gfp & __GFP_DIRECT_RECLAIM)) { + if (!cachep->non_kernel) + break; + cachep->non_kernel--; + } + if (cachep->align) { posix_memalign(&p[i], cachep->align, cachep->size); } else { p[i] = malloc(cachep->size); + if (!p[i]) + break; } if (cachep->ctor) cachep->ctor(p[i]); @@ -176,6 +188,15 @@ int kmem_cache_alloc_bulk(struct kmem_cache *cachep, gfp_t gfp, size_t size, } } + if (i < size) { + size = i; + pthread_mutex_lock(&cachep->lock); + for (i = 0; i < size; i++) + __kmem_cache_free_locked(cachep, p[i]); + pthread_mutex_unlock(&cachep->lock); + return 0; + } + for (i = 0; i < size; i++) { uatomic_inc(&nr_allocated); uatomic_inc(&cachep->nr_allocated); -- cgit v1.2.3 From a2587a7e8d37885dc063255f5400a66299b42e48 Mon Sep 17 00:00:00 2001 From: Peng Zhang Date: Fri, 27 Oct 2023 11:38:40 +0800 Subject: maple_tree: add test for mtree_dup() Add test for mtree_dup(). Test by duplicating different maple trees and then comparing the two trees. Includes tests for duplicating full trees and memory allocation failures on different nodes. Link: https://lkml.kernel.org/r/20231027033845.90608-6-zhangpeng.00@bytedance.com Signed-off-by: Peng Zhang Reviewed-by: Liam R. Howlett Cc: Christian Brauner Cc: Jonathan Corbet Cc: Mateusz Guzik Cc: Mathieu Desnoyers Cc: Matthew Wilcox Cc: Michael S. Tsirkin Cc: Mike Christie Cc: Nicholas Piggin Cc: Peter Zijlstra Cc: Suren Baghdasaryan Signed-off-by: Andrew Morton --- tools/testing/radix-tree/maple.c | 361 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 361 insertions(+) (limited to 'tools') diff --git a/tools/testing/radix-tree/maple.c b/tools/testing/radix-tree/maple.c index e5da1cad70ba..12b3390e9591 100644 --- a/tools/testing/radix-tree/maple.c +++ b/tools/testing/radix-tree/maple.c @@ -35857,6 +35857,363 @@ static noinline void __init check_locky(struct maple_tree *mt) mt_clear_in_rcu(mt); } +/* + * Compares two nodes except for the addresses stored in the nodes. + * Returns zero if they are the same, otherwise returns non-zero. + */ +static int __init compare_node(struct maple_enode *enode_a, + struct maple_enode *enode_b) +{ + struct maple_node *node_a, *node_b; + struct maple_node a, b; + void **slots_a, **slots_b; /* Do not use the rcu tag. */ + enum maple_type type; + int i; + + if (((unsigned long)enode_a & MAPLE_NODE_MASK) != + ((unsigned long)enode_b & MAPLE_NODE_MASK)) { + pr_err("The lower 8 bits of enode are different.\n"); + return -1; + } + + type = mte_node_type(enode_a); + node_a = mte_to_node(enode_a); + node_b = mte_to_node(enode_b); + a = *node_a; + b = *node_b; + + /* Do not compare addresses. */ + if (ma_is_root(node_a) || ma_is_root(node_b)) { + a.parent = (struct maple_pnode *)((unsigned long)a.parent & + MA_ROOT_PARENT); + b.parent = (struct maple_pnode *)((unsigned long)b.parent & + MA_ROOT_PARENT); + } else { + a.parent = (struct maple_pnode *)((unsigned long)a.parent & + MAPLE_NODE_MASK); + b.parent = (struct maple_pnode *)((unsigned long)b.parent & + MAPLE_NODE_MASK); + } + + if (a.parent != b.parent) { + pr_err("The lower 8 bits of parents are different. %p %p\n", + a.parent, b.parent); + return -1; + } + + /* + * If it is a leaf node, the slots do not contain the node address, and + * no special processing of slots is required. + */ + if (ma_is_leaf(type)) + goto cmp; + + slots_a = ma_slots(&a, type); + slots_b = ma_slots(&b, type); + + for (i = 0; i < mt_slots[type]; i++) { + if (!slots_a[i] && !slots_b[i]) + break; + + if (!slots_a[i] || !slots_b[i]) { + pr_err("The number of slots is different.\n"); + return -1; + } + + /* Do not compare addresses in slots. */ + ((unsigned long *)slots_a)[i] &= MAPLE_NODE_MASK; + ((unsigned long *)slots_b)[i] &= MAPLE_NODE_MASK; + } + +cmp: + /* + * Compare all contents of two nodes, including parent (except address), + * slots (except address), pivots, gaps and metadata. + */ + return memcmp(&a, &b, sizeof(struct maple_node)); +} + +/* + * Compare two trees and return 0 if they are the same, non-zero otherwise. + */ +static int __init compare_tree(struct maple_tree *mt_a, struct maple_tree *mt_b) +{ + MA_STATE(mas_a, mt_a, 0, 0); + MA_STATE(mas_b, mt_b, 0, 0); + + if (mt_a->ma_flags != mt_b->ma_flags) { + pr_err("The flags of the two trees are different.\n"); + return -1; + } + + mas_dfs_preorder(&mas_a); + mas_dfs_preorder(&mas_b); + + if (mas_is_ptr(&mas_a) || mas_is_ptr(&mas_b)) { + if (!(mas_is_ptr(&mas_a) && mas_is_ptr(&mas_b))) { + pr_err("One is MAS_ROOT and the other is not.\n"); + return -1; + } + return 0; + } + + while (!mas_is_none(&mas_a) || !mas_is_none(&mas_b)) { + + if (mas_is_none(&mas_a) || mas_is_none(&mas_b)) { + pr_err("One is MAS_NONE and the other is not.\n"); + return -1; + } + + if (mas_a.min != mas_b.min || + mas_a.max != mas_b.max) { + pr_err("mas->min, mas->max do not match.\n"); + return -1; + } + + if (compare_node(mas_a.node, mas_b.node)) { + pr_err("The contents of nodes %p and %p are different.\n", + mas_a.node, mas_b.node); + mt_dump(mt_a, mt_dump_dec); + mt_dump(mt_b, mt_dump_dec); + return -1; + } + + mas_dfs_preorder(&mas_a); + mas_dfs_preorder(&mas_b); + } + + return 0; +} + +static __init void mas_subtree_max_range(struct ma_state *mas) +{ + unsigned long limit = mas->max; + MA_STATE(newmas, mas->tree, 0, 0); + void *entry; + + mas_for_each(mas, entry, limit) { + if (mas->last - mas->index >= + newmas.last - newmas.index) { + newmas = *mas; + } + } + + *mas = newmas; +} + +/* + * build_full_tree() - Build a full tree. + * @mt: The tree to build. + * @flags: Use @flags to build the tree. + * @height: The height of the tree to build. + * + * Build a tree with full leaf nodes and internal nodes. Note that the height + * should not exceed 3, otherwise it will take a long time to build. + * Return: zero if the build is successful, non-zero if it fails. + */ +static __init int build_full_tree(struct maple_tree *mt, unsigned int flags, + int height) +{ + MA_STATE(mas, mt, 0, 0); + unsigned long step; + int ret = 0, cnt = 1; + enum maple_type type; + + mt_init_flags(mt, flags); + mtree_insert_range(mt, 0, ULONG_MAX, xa_mk_value(5), GFP_KERNEL); + + mtree_lock(mt); + + while (1) { + mas_set(&mas, 0); + if (mt_height(mt) < height) { + mas.max = ULONG_MAX; + goto store; + } + + while (1) { + mas_dfs_preorder(&mas); + if (mas_is_none(&mas)) + goto unlock; + + type = mte_node_type(mas.node); + if (mas_data_end(&mas) + 1 < mt_slots[type]) { + mas_set(&mas, mas.min); + goto store; + } + } +store: + mas_subtree_max_range(&mas); + step = mas.last - mas.index; + if (step < 1) { + ret = -1; + goto unlock; + } + + step /= 2; + mas.last = mas.index + step; + mas_store_gfp(&mas, xa_mk_value(5), + GFP_KERNEL); + ++cnt; + } +unlock: + mtree_unlock(mt); + + MT_BUG_ON(mt, mt_height(mt) != height); + /* pr_info("height:%u number of elements:%d\n", mt_height(mt), cnt); */ + return ret; +} + +static noinline void __init check_mtree_dup(struct maple_tree *mt) +{ + DEFINE_MTREE(new); + int i, j, ret, count = 0; + unsigned int rand_seed = 17, rand; + + /* store a value at [0, 0] */ + mt_init_flags(mt, 0); + mtree_store_range(mt, 0, 0, xa_mk_value(0), GFP_KERNEL); + ret = mtree_dup(mt, &new, GFP_KERNEL); + MT_BUG_ON(&new, ret); + mt_validate(&new); + if (compare_tree(mt, &new)) + MT_BUG_ON(&new, 1); + + mtree_destroy(mt); + mtree_destroy(&new); + + /* The two trees have different attributes. */ + mt_init_flags(mt, 0); + mt_init_flags(&new, MT_FLAGS_ALLOC_RANGE); + ret = mtree_dup(mt, &new, GFP_KERNEL); + MT_BUG_ON(&new, ret != -EINVAL); + mtree_destroy(mt); + mtree_destroy(&new); + + /* The new tree is not empty */ + mt_init_flags(mt, 0); + mt_init_flags(&new, 0); + mtree_store(&new, 5, xa_mk_value(5), GFP_KERNEL); + ret = mtree_dup(mt, &new, GFP_KERNEL); + MT_BUG_ON(&new, ret != -EINVAL); + mtree_destroy(mt); + mtree_destroy(&new); + + /* Test for duplicating full trees. */ + for (i = 1; i <= 3; i++) { + ret = build_full_tree(mt, 0, i); + MT_BUG_ON(mt, ret); + mt_init_flags(&new, 0); + + ret = mtree_dup(mt, &new, GFP_KERNEL); + MT_BUG_ON(&new, ret); + mt_validate(&new); + if (compare_tree(mt, &new)) + MT_BUG_ON(&new, 1); + + mtree_destroy(mt); + mtree_destroy(&new); + } + + for (i = 1; i <= 3; i++) { + ret = build_full_tree(mt, MT_FLAGS_ALLOC_RANGE, i); + MT_BUG_ON(mt, ret); + mt_init_flags(&new, MT_FLAGS_ALLOC_RANGE); + + ret = mtree_dup(mt, &new, GFP_KERNEL); + MT_BUG_ON(&new, ret); + mt_validate(&new); + if (compare_tree(mt, &new)) + MT_BUG_ON(&new, 1); + + mtree_destroy(mt); + mtree_destroy(&new); + } + + /* Test for normal duplicating. */ + for (i = 0; i < 1000; i += 3) { + if (i & 1) { + mt_init_flags(mt, 0); + mt_init_flags(&new, 0); + } else { + mt_init_flags(mt, MT_FLAGS_ALLOC_RANGE); + mt_init_flags(&new, MT_FLAGS_ALLOC_RANGE); + } + + for (j = 0; j < i; j++) { + mtree_store_range(mt, j * 10, j * 10 + 5, + xa_mk_value(j), GFP_KERNEL); + } + + ret = mtree_dup(mt, &new, GFP_KERNEL); + MT_BUG_ON(&new, ret); + mt_validate(&new); + if (compare_tree(mt, &new)) + MT_BUG_ON(&new, 1); + + mtree_destroy(mt); + mtree_destroy(&new); + } + + /* Test memory allocation failed. */ + mt_init_flags(mt, MT_FLAGS_ALLOC_RANGE); + for (i = 0; i < 30; i += 3) { + mtree_store_range(mt, j * 10, j * 10 + 5, + xa_mk_value(j), GFP_KERNEL); + } + + /* Failed at the first node. */ + mt_init_flags(&new, MT_FLAGS_ALLOC_RANGE); + mt_set_non_kernel(0); + ret = mtree_dup(mt, &new, GFP_NOWAIT); + mt_set_non_kernel(0); + MT_BUG_ON(&new, ret != -ENOMEM); + mtree_destroy(mt); + mtree_destroy(&new); + + /* Random maple tree fails at a random node. */ + for (i = 0; i < 1000; i += 3) { + if (i & 1) { + mt_init_flags(mt, 0); + mt_init_flags(&new, 0); + } else { + mt_init_flags(mt, MT_FLAGS_ALLOC_RANGE); + mt_init_flags(&new, MT_FLAGS_ALLOC_RANGE); + } + + for (j = 0; j < i; j++) { + mtree_store_range(mt, j * 10, j * 10 + 5, + xa_mk_value(j), GFP_KERNEL); + } + /* + * The rand() library function is not used, so we can generate + * the same random numbers on any platform. + */ + rand_seed = rand_seed * 1103515245 + 12345; + rand = rand_seed / 65536 % 128; + mt_set_non_kernel(rand); + + ret = mtree_dup(mt, &new, GFP_NOWAIT); + mt_set_non_kernel(0); + if (ret != 0) { + MT_BUG_ON(&new, ret != -ENOMEM); + count++; + mtree_destroy(mt); + continue; + } + + mt_validate(&new); + if (compare_tree(mt, &new)) + MT_BUG_ON(&new, 1); + + mtree_destroy(mt); + mtree_destroy(&new); + } + + /* pr_info("mtree_dup() fail %d times\n", count); */ + BUG_ON(!count); +} + extern void test_kmem_cache_bulk(void); void farmer_tests(void) @@ -35904,6 +36261,10 @@ void farmer_tests(void) check_null_expand(&tree); mtree_destroy(&tree); + mt_init_flags(&tree, 0); + check_mtree_dup(&tree); + mtree_destroy(&tree); + /* RCU testing */ mt_init_flags(&tree, 0); check_erase_testset(&tree); -- cgit v1.2.3 From f670fa1caadb4ea532a89012c5451e4c6789bfcc Mon Sep 17 00:00:00 2001 From: Peng Zhang Date: Fri, 27 Oct 2023 11:38:42 +0800 Subject: maple_tree: skip other tests when BENCH is enabled Skip other tests when BENCH is enabled so that performance can be measured in user space. Link: https://lkml.kernel.org/r/20231027033845.90608-8-zhangpeng.00@bytedance.com Signed-off-by: Peng Zhang Reviewed-by: Liam R. Howlett Cc: Christian Brauner Cc: Jonathan Corbet Cc: Mateusz Guzik Cc: Mathieu Desnoyers Cc: Matthew Wilcox Cc: Michael S. Tsirkin Cc: Mike Christie Cc: Nicholas Piggin Cc: Peter Zijlstra Cc: Suren Baghdasaryan Signed-off-by: Andrew Morton --- lib/test_maple_tree.c | 8 ++++---- tools/testing/radix-tree/maple.c | 2 ++ 2 files changed, 6 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/lib/test_maple_tree.c b/lib/test_maple_tree.c index 464eeb90d5ad..de470950714f 100644 --- a/lib/test_maple_tree.c +++ b/lib/test_maple_tree.c @@ -3585,10 +3585,6 @@ static int __init maple_tree_seed(void) pr_info("\nTEST STARTING\n\n"); - mt_init_flags(&tree, MT_FLAGS_ALLOC_RANGE); - check_root_expand(&tree); - mtree_destroy(&tree); - #if defined(BENCH_SLOT_STORE) #define BENCH mt_init_flags(&tree, MT_FLAGS_ALLOC_RANGE); @@ -3646,6 +3642,10 @@ static int __init maple_tree_seed(void) goto skip; #endif + mt_init_flags(&tree, MT_FLAGS_ALLOC_RANGE); + check_root_expand(&tree); + mtree_destroy(&tree); + mt_init_flags(&tree, MT_FLAGS_ALLOC_RANGE); check_iteration(&tree); mtree_destroy(&tree); diff --git a/tools/testing/radix-tree/maple.c b/tools/testing/radix-tree/maple.c index 12b3390e9591..cb5358674521 100644 --- a/tools/testing/radix-tree/maple.c +++ b/tools/testing/radix-tree/maple.c @@ -36299,7 +36299,9 @@ void farmer_tests(void) void maple_tree_tests(void) { +#if !defined(BENCH) farmer_tests(); +#endif maple_tree_seed(); maple_tree_harvest(); } -- cgit v1.2.3 From 446e1867e6df3cbdd19af6be8f8f4ed56176adb4 Mon Sep 17 00:00:00 2001 From: Peng Zhang Date: Fri, 27 Oct 2023 11:38:43 +0800 Subject: maple_tree: update check_forking() and bench_forking() Updated check_forking() and bench_forking() to use __mt_dup() to duplicate maple tree. Link: https://lkml.kernel.org/r/20231027033845.90608-9-zhangpeng.00@bytedance.com Signed-off-by: Peng Zhang Reviewed-by: Liam R. Howlett Cc: Christian Brauner Cc: Jonathan Corbet Cc: Mateusz Guzik Cc: Mathieu Desnoyers Cc: Matthew Wilcox Cc: Michael S. Tsirkin Cc: Mike Christie Cc: Nicholas Piggin Cc: Peter Zijlstra Cc: Suren Baghdasaryan Signed-off-by: Andrew Morton --- lib/test_maple_tree.c | 117 ++++++++++++++++++++++---------------------- tools/include/linux/rwsem.h | 4 ++ 2 files changed, 62 insertions(+), 59 deletions(-) (limited to 'tools') diff --git a/lib/test_maple_tree.c b/lib/test_maple_tree.c index de470950714f..3e4597fb49d3 100644 --- a/lib/test_maple_tree.c +++ b/lib/test_maple_tree.c @@ -1834,47 +1834,48 @@ static noinline void __init bench_mas_prev(struct maple_tree *mt) } #endif /* check_forking - simulate the kernel forking sequence with the tree. */ -static noinline void __init check_forking(struct maple_tree *mt) +static noinline void __init check_forking(void) { - - struct maple_tree newmt; - int i, nr_entries = 134; + struct maple_tree mt, newmt; + int i, nr_entries = 134, ret; void *val; - MA_STATE(mas, mt, 0, 0); - MA_STATE(newmas, mt, 0, 0); - struct rw_semaphore newmt_lock; + MA_STATE(mas, &mt, 0, 0); + MA_STATE(newmas, &newmt, 0, 0); + struct rw_semaphore mt_lock, newmt_lock; + init_rwsem(&mt_lock); init_rwsem(&newmt_lock); - for (i = 0; i <= nr_entries; i++) - mtree_store_range(mt, i*10, i*10 + 5, - xa_mk_value(i), GFP_KERNEL); + mt_init_flags(&mt, MT_FLAGS_ALLOC_RANGE | MT_FLAGS_LOCK_EXTERN); + mt_set_external_lock(&mt, &mt_lock); - mt_set_non_kernel(99999); mt_init_flags(&newmt, MT_FLAGS_ALLOC_RANGE | MT_FLAGS_LOCK_EXTERN); mt_set_external_lock(&newmt, &newmt_lock); - newmas.tree = &newmt; - mas_reset(&newmas); - mas_reset(&mas); - down_write(&newmt_lock); - mas.index = 0; - mas.last = 0; - if (mas_expected_entries(&newmas, nr_entries)) { + + down_write(&mt_lock); + for (i = 0; i <= nr_entries; i++) { + mas_set_range(&mas, i*10, i*10 + 5); + mas_store_gfp(&mas, xa_mk_value(i), GFP_KERNEL); + } + + down_write_nested(&newmt_lock, SINGLE_DEPTH_NESTING); + ret = __mt_dup(&mt, &newmt, GFP_KERNEL); + if (ret) { pr_err("OOM!"); BUG_ON(1); } - rcu_read_lock(); - mas_for_each(&mas, val, ULONG_MAX) { - newmas.index = mas.index; - newmas.last = mas.last; + + mas_set(&newmas, 0); + mas_for_each(&newmas, val, ULONG_MAX) mas_store(&newmas, val); - } - rcu_read_unlock(); + mas_destroy(&newmas); + mas_destroy(&mas); mt_validate(&newmt); - mt_set_non_kernel(0); __mt_destroy(&newmt); + __mt_destroy(&mt); up_write(&newmt_lock); + up_write(&mt_lock); } static noinline void __init check_iteration(struct maple_tree *mt) @@ -1977,49 +1978,51 @@ static noinline void __init check_mas_store_gfp(struct maple_tree *mt) } #if defined(BENCH_FORK) -static noinline void __init bench_forking(struct maple_tree *mt) +static noinline void __init bench_forking(void) { - - struct maple_tree newmt; - int i, nr_entries = 134, nr_fork = 80000; + struct maple_tree mt, newmt; + int i, nr_entries = 134, nr_fork = 80000, ret; void *val; - MA_STATE(mas, mt, 0, 0); - MA_STATE(newmas, mt, 0, 0); - struct rw_semaphore newmt_lock; + MA_STATE(mas, &mt, 0, 0); + MA_STATE(newmas, &newmt, 0, 0); + struct rw_semaphore mt_lock, newmt_lock; + init_rwsem(&mt_lock); init_rwsem(&newmt_lock); - mt_set_external_lock(&newmt, &newmt_lock); - for (i = 0; i <= nr_entries; i++) - mtree_store_range(mt, i*10, i*10 + 5, - xa_mk_value(i), GFP_KERNEL); + mt_init_flags(&mt, MT_FLAGS_ALLOC_RANGE | MT_FLAGS_LOCK_EXTERN); + mt_set_external_lock(&mt, &mt_lock); + + down_write(&mt_lock); + for (i = 0; i <= nr_entries; i++) { + mas_set_range(&mas, i*10, i*10 + 5); + mas_store_gfp(&mas, xa_mk_value(i), GFP_KERNEL); + } for (i = 0; i < nr_fork; i++) { - mt_set_non_kernel(99999); - mt_init_flags(&newmt, MT_FLAGS_ALLOC_RANGE); - newmas.tree = &newmt; - mas_reset(&newmas); - mas_reset(&mas); - mas.index = 0; - mas.last = 0; - rcu_read_lock(); - down_write(&newmt_lock); - if (mas_expected_entries(&newmas, nr_entries)) { - printk("OOM!"); + mt_init_flags(&newmt, + MT_FLAGS_ALLOC_RANGE | MT_FLAGS_LOCK_EXTERN); + mt_set_external_lock(&newmt, &newmt_lock); + + down_write_nested(&newmt_lock, SINGLE_DEPTH_NESTING); + ret = __mt_dup(&mt, &newmt, GFP_KERNEL); + if (ret) { + pr_err("OOM!"); BUG_ON(1); } - mas_for_each(&mas, val, ULONG_MAX) { - newmas.index = mas.index; - newmas.last = mas.last; + + mas_set(&newmas, 0); + mas_for_each(&newmas, val, ULONG_MAX) mas_store(&newmas, val); - } + mas_destroy(&newmas); - rcu_read_unlock(); mt_validate(&newmt); - mt_set_non_kernel(0); __mt_destroy(&newmt); up_write(&newmt_lock); } + mas_destroy(&mas); + __mt_destroy(&mt); + up_write(&mt_lock); } #endif @@ -3615,9 +3618,7 @@ static int __init maple_tree_seed(void) #endif #if defined(BENCH_FORK) #define BENCH - mt_init_flags(&tree, MT_FLAGS_ALLOC_RANGE); - bench_forking(&tree); - mtree_destroy(&tree); + bench_forking(); goto skip; #endif #if defined(BENCH_MT_FOR_EACH) @@ -3650,9 +3651,7 @@ static int __init maple_tree_seed(void) check_iteration(&tree); mtree_destroy(&tree); - mt_init_flags(&tree, MT_FLAGS_ALLOC_RANGE); - check_forking(&tree); - mtree_destroy(&tree); + check_forking(); mt_init_flags(&tree, MT_FLAGS_ALLOC_RANGE); check_mas_store_gfp(&tree); diff --git a/tools/include/linux/rwsem.h b/tools/include/linux/rwsem.h index 83971b3cbfce..f8bffd4a987c 100644 --- a/tools/include/linux/rwsem.h +++ b/tools/include/linux/rwsem.h @@ -37,4 +37,8 @@ static inline int up_write(struct rw_semaphore *sem) { return pthread_rwlock_unlock(&sem->lock); } + +#define down_read_nested(sem, subclass) down_read(sem) +#define down_write_nested(sem, subclass) down_write(sem) + #endif /* _TOOLS_RWSEM_H */ -- cgit v1.2.3 From e6a9a2cbc13bf43e4c03f57666e93d511249d5d7 Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Mon, 6 Nov 2023 14:09:58 -0800 Subject: fs/proc/task_mmu: report SOFT_DIRTY bits through the PAGEMAP_SCAN ioctl MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The PAGEMAP_SCAN ioctl returns information regarding page table entries. It is more efficient compared to reading pagemap files. CRIU can start to utilize this ioctl, but it needs info about soft-dirty bits to track memory changes. We are aware of a new method for tracking memory changes implemented in the PAGEMAP_SCAN ioctl. For CRIU, the primary advantage of this method is its usability by unprivileged users. However, it is not feasible to transparently replace the soft-dirty tracker with the new one. The main problem here is userfault descriptors that have to be preserved between pre-dump iterations. It means criu continues supporting the soft-dirty method to avoid breakage for current users. The new method will be implemented as a separate feature. [avagin@google.com: update tools/include/uapi/linux/fs.h] Link: https://lkml.kernel.org/r/20231107164139.576046-1-avagin@google.com Link: https://lkml.kernel.org/r/20231106220959.296568-1-avagin@google.com Signed-off-by: Andrei Vagin Reviewed-by: Muhammad Usama Anjum Cc: MichaÅ‚ MirosÅ‚aw Signed-off-by: Andrew Morton --- Documentation/admin-guide/mm/pagemap.rst | 1 + fs/proc/task_mmu.c | 17 ++++++++++++++++- include/uapi/linux/fs.h | 1 + tools/include/uapi/linux/fs.h | 1 + 4 files changed, 19 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/Documentation/admin-guide/mm/pagemap.rst b/Documentation/admin-guide/mm/pagemap.rst index fe17cf210426..f5f065c67615 100644 --- a/Documentation/admin-guide/mm/pagemap.rst +++ b/Documentation/admin-guide/mm/pagemap.rst @@ -253,6 +253,7 @@ Following flags about pages are currently supported: - ``PAGE_IS_SWAPPED`` - Page is in swapped - ``PAGE_IS_PFNZERO`` - Page has zero PFN - ``PAGE_IS_HUGE`` - Page is THP or Hugetlb backed +- ``PAGE_IS_SOFT_DIRTY`` - Page is soft-dirty The ``struct pm_scan_arg`` is used as the argument of the IOCTL. diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 435b61054b5b..d19924bf0a39 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -1761,7 +1761,7 @@ static int pagemap_release(struct inode *inode, struct file *file) #define PM_SCAN_CATEGORIES (PAGE_IS_WPALLOWED | PAGE_IS_WRITTEN | \ PAGE_IS_FILE | PAGE_IS_PRESENT | \ PAGE_IS_SWAPPED | PAGE_IS_PFNZERO | \ - PAGE_IS_HUGE) + PAGE_IS_HUGE | PAGE_IS_SOFT_DIRTY) #define PM_SCAN_FLAGS (PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC) struct pagemap_scan_private { @@ -1793,6 +1793,8 @@ static unsigned long pagemap_page_category(struct pagemap_scan_private *p, if (is_zero_pfn(pte_pfn(pte))) categories |= PAGE_IS_PFNZERO; + if (pte_soft_dirty(pte)) + categories |= PAGE_IS_SOFT_DIRTY; } else if (is_swap_pte(pte)) { swp_entry_t swp; @@ -1806,6 +1808,8 @@ static unsigned long pagemap_page_category(struct pagemap_scan_private *p, !PageAnon(pfn_swap_entry_to_page(swp))) categories |= PAGE_IS_FILE; } + if (pte_swp_soft_dirty(pte)) + categories |= PAGE_IS_SOFT_DIRTY; } return categories; @@ -1853,12 +1857,16 @@ static unsigned long pagemap_thp_category(struct pagemap_scan_private *p, if (is_zero_pfn(pmd_pfn(pmd))) categories |= PAGE_IS_PFNZERO; + if (pmd_soft_dirty(pmd)) + categories |= PAGE_IS_SOFT_DIRTY; } else if (is_swap_pmd(pmd)) { swp_entry_t swp; categories |= PAGE_IS_SWAPPED; if (!pmd_swp_uffd_wp(pmd)) categories |= PAGE_IS_WRITTEN; + if (pmd_swp_soft_dirty(pmd)) + categories |= PAGE_IS_SOFT_DIRTY; if (p->masks_of_interest & PAGE_IS_FILE) { swp = pmd_to_swp_entry(pmd); @@ -1905,10 +1913,14 @@ static unsigned long pagemap_hugetlb_category(pte_t pte) categories |= PAGE_IS_FILE; if (is_zero_pfn(pte_pfn(pte))) categories |= PAGE_IS_PFNZERO; + if (pte_soft_dirty(pte)) + categories |= PAGE_IS_SOFT_DIRTY; } else if (is_swap_pte(pte)) { categories |= PAGE_IS_SWAPPED; if (!pte_swp_uffd_wp_any(pte)) categories |= PAGE_IS_WRITTEN; + if (pte_swp_soft_dirty(pte)) + categories |= PAGE_IS_SOFT_DIRTY; } return categories; @@ -2007,6 +2019,9 @@ static int pagemap_scan_test_walk(unsigned long start, unsigned long end, if (wp_allowed) vma_category |= PAGE_IS_WPALLOWED; + if (vma->vm_flags & VM_SOFTDIRTY) + vma_category |= PAGE_IS_SOFT_DIRTY; + if (!pagemap_scan_is_interesting_vma(vma_category, p)) return 1; diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h index da43810b7485..48ad69f7722e 100644 --- a/include/uapi/linux/fs.h +++ b/include/uapi/linux/fs.h @@ -316,6 +316,7 @@ typedef int __bitwise __kernel_rwf_t; #define PAGE_IS_SWAPPED (1 << 4) #define PAGE_IS_PFNZERO (1 << 5) #define PAGE_IS_HUGE (1 << 6) +#define PAGE_IS_SOFT_DIRTY (1 << 7) /* * struct page_region - Page region with flags diff --git a/tools/include/uapi/linux/fs.h b/tools/include/uapi/linux/fs.h index da43810b7485..48ad69f7722e 100644 --- a/tools/include/uapi/linux/fs.h +++ b/tools/include/uapi/linux/fs.h @@ -316,6 +316,7 @@ typedef int __bitwise __kernel_rwf_t; #define PAGE_IS_SWAPPED (1 << 4) #define PAGE_IS_PFNZERO (1 << 5) #define PAGE_IS_HUGE (1 << 6) +#define PAGE_IS_SOFT_DIRTY (1 << 7) /* * struct page_region - Page region with flags -- cgit v1.2.3 From 600bca580579d8d8454cc8fe3290e2f8b9c01884 Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Mon, 6 Nov 2023 14:09:59 -0800 Subject: selftests/mm: check that PAGEMAP_SCAN returns correct categories MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Right now, tests read page flags from /proc/pid/pagemap files. With this change, tests will check that PAGEMAP_SCAN return correct information too. [colin.i.king@gmail.com: fix spelling mistake "succedded" -> "succeeded"] Link: https://lkml.kernel.org/r/20231121093104.1728332-1-colin.i.king@gmail.com Link: https://lkml.kernel.org/r/20231106220959.296568-2-avagin@google.com Signed-off-by: Andrei Vagin Signed-off-by: Colin Ian King Reviewed-by: Muhammad Usama Anjum Tested-by: Muhammad Usama Anjum Cc: MichaÅ‚ MirosÅ‚aw [avagin@google.com: allow running tests on old kernels] Link: https://lkml.kernel.org/r/20231117181127.2574897-1-avagin@google.com Signed-off-by: Andrew Morton --- tools/testing/selftests/mm/vm_util.c | 80 ++++++++++++++++++++++++++++++++++-- 1 file changed, 77 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/mm/vm_util.c b/tools/testing/selftests/mm/vm_util.c index 3082b40492dd..05736c615734 100644 --- a/tools/testing/selftests/mm/vm_util.c +++ b/tools/testing/selftests/mm/vm_util.c @@ -4,6 +4,7 @@ #include #include #include +#include #include #include #include "../kselftest.h" @@ -28,19 +29,92 @@ uint64_t pagemap_get_entry(int fd, char *start) return entry; } +static uint64_t __pagemap_scan_get_categories(int fd, char *start, struct page_region *r) +{ + struct pm_scan_arg arg; + + arg.start = (uintptr_t)start; + arg.end = (uintptr_t)(start + psize()); + arg.vec = (uintptr_t)r; + arg.vec_len = 1; + arg.flags = 0; + arg.size = sizeof(struct pm_scan_arg); + arg.max_pages = 0; + arg.category_inverted = 0; + arg.category_mask = 0; + arg.category_anyof_mask = PAGE_IS_WPALLOWED | PAGE_IS_WRITTEN | PAGE_IS_FILE | + PAGE_IS_PRESENT | PAGE_IS_SWAPPED | PAGE_IS_PFNZERO | + PAGE_IS_HUGE | PAGE_IS_SOFT_DIRTY; + arg.return_mask = arg.category_anyof_mask; + + return ioctl(fd, PAGEMAP_SCAN, &arg); +} + +static uint64_t pagemap_scan_get_categories(int fd, char *start) +{ + struct page_region r; + long ret; + + ret = __pagemap_scan_get_categories(fd, start, &r); + if (ret < 0) + ksft_exit_fail_msg("PAGEMAP_SCAN failed: %s\n", strerror(errno)); + if (ret == 0) + return 0; + return r.categories; +} + +/* `start` is any valid address. */ +static bool pagemap_scan_supported(int fd, char *start) +{ + static int supported = -1; + int ret; + + if (supported != -1) + return supported; + + /* Provide an invalid address in order to trigger EFAULT. */ + ret = __pagemap_scan_get_categories(fd, start, (struct page_region *) ~0UL); + if (ret == 0) + ksft_exit_fail_msg("PAGEMAP_SCAN succeeded unexpectedly\n"); + + supported = errno == EFAULT; + + return supported; +} + +static bool page_entry_is(int fd, char *start, char *desc, + uint64_t pagemap_flags, uint64_t pagescan_flags) +{ + bool m = pagemap_get_entry(fd, start) & pagemap_flags; + + if (pagemap_scan_supported(fd, start)) { + bool s = pagemap_scan_get_categories(fd, start) & pagescan_flags; + + if (m == s) + return m; + + ksft_exit_fail_msg( + "read and ioctl return unmatched results for %s: %d %d", desc, m, s); + } + return m; +} + bool pagemap_is_softdirty(int fd, char *start) { - return pagemap_get_entry(fd, start) & PM_SOFT_DIRTY; + return page_entry_is(fd, start, "soft-dirty", + PM_SOFT_DIRTY, PAGE_IS_SOFT_DIRTY); } bool pagemap_is_swapped(int fd, char *start) { - return pagemap_get_entry(fd, start) & PM_SWAP; + return page_entry_is(fd, start, "swap", PM_SWAP, PAGE_IS_SWAPPED); } bool pagemap_is_populated(int fd, char *start) { - return pagemap_get_entry(fd, start) & (PM_PRESENT | PM_SWAP); + return page_entry_is(fd, start, "populated", + PM_PRESENT | PM_SWAP, + PAGE_IS_PRESENT | PAGE_IS_SWAPPED); } unsigned long pagemap_get_pfn(int fd, char *start) -- cgit v1.2.3 From 60433a9d038db006ca2f49e3c5f050dc46aaad3a Mon Sep 17 00:00:00 2001 From: Dmitry Rokosov Date: Thu, 23 Nov 2023 10:19:43 +0300 Subject: samples: introduce new samples subdir for cgroup Patch series "samples: introduce cgroup events listeners", v3. To begin with, this patch series relocates the cgroup example code to the samples/cgroup directory, which is the appropriate location for such code snippets. Furthermore, a new memcg events listener is introduced. This listener is a simple yet effective tool for monitoring memory events and managing counter changes during runtime. Additionally, as per Andrew Morton's suggestion, a helpful reminder comment is included in the memcontrol implementation. This comment serves to ensure that the samples code is updated whenever new events are added. This patch (of 3): Move the cgroup_event_listener for cgroup v1 to the samples directory. This suggestion was proposed by Andrew Morton during the discussion [1]. Link: https://lore.kernel.org/all/20231106140934.3f5d4960141562fe8da53906@linux-foundation.org/ [1] Link: https://lkml.kernel.org/r/20231123071945.25811-1-ddrokosov@salutedevices.com Link: https://lkml.kernel.org/r/20231123071945.25811-2-ddrokosov@salutedevices.com Signed-off-by: Dmitry Rokosov Cc: Johannes Weiner Cc: Michal Hocko Cc: Muchun Song Cc: Roman Gushchin Cc: Shakeel Butt Signed-off-by: Andrew Morton --- MAINTAINERS | 1 + samples/Kconfig | 6 +++ samples/Makefile | 1 + samples/cgroup/Makefile | 5 ++ samples/cgroup/cgroup_event_listener.c | 83 ++++++++++++++++++++++++++++++++++ tools/cgroup/Makefile | 11 ----- tools/cgroup/cgroup_event_listener.c | 83 ---------------------------------- 7 files changed, 96 insertions(+), 94 deletions(-) create mode 100644 samples/cgroup/Makefile create mode 100644 samples/cgroup/cgroup_event_listener.c delete mode 100644 tools/cgroup/Makefile delete mode 100644 tools/cgroup/cgroup_event_listener.c (limited to 'tools') diff --git a/MAINTAINERS b/MAINTAINERS index 5c9d3d854671..6f5d6962d26d 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -5339,6 +5339,7 @@ L: linux-mm@kvack.org S: Maintained F: mm/memcontrol.c F: mm/swap_cgroup.c +F: samples/cgroup/* F: tools/testing/selftests/cgroup/memcg_protection.m F: tools/testing/selftests/cgroup/test_hugetlb_memcg.c F: tools/testing/selftests/cgroup/test_kmem.c diff --git a/samples/Kconfig b/samples/Kconfig index b0ddf5f36738..b288d9991d27 100644 --- a/samples/Kconfig +++ b/samples/Kconfig @@ -285,6 +285,12 @@ config SAMPLE_KMEMLEAK Build a sample program which have explicitly leaks memory to test kmemleak +config SAMPLE_CGROUP + bool "Build cgroup sample code" + depends on CGROUPS && CC_CAN_LINK && HEADERS_INSTALL + help + Build samples that demonstrate the usage of the cgroup API. + source "samples/rust/Kconfig" endif # SAMPLES diff --git a/samples/Makefile b/samples/Makefile index 0a551c2b33f4..b85fa64390c5 100644 --- a/samples/Makefile +++ b/samples/Makefile @@ -3,6 +3,7 @@ subdir-$(CONFIG_SAMPLE_AUXDISPLAY) += auxdisplay subdir-$(CONFIG_SAMPLE_ANDROID_BINDERFS) += binderfs +subdir-$(CONFIG_SAMPLE_CGROUP) += cgroup obj-$(CONFIG_SAMPLE_CONFIGFS) += configfs/ obj-$(CONFIG_SAMPLE_CONNECTOR) += connector/ obj-$(CONFIG_SAMPLE_FANOTIFY_ERROR) += fanotify/ diff --git a/samples/cgroup/Makefile b/samples/cgroup/Makefile new file mode 100644 index 000000000000..deef4530f5e7 --- /dev/null +++ b/samples/cgroup/Makefile @@ -0,0 +1,5 @@ +# SPDX-License-Identifier: GPL-2.0 + +userprogs-always-y += cgroup_event_listener + +userccflags += -I usr/include diff --git a/samples/cgroup/cgroup_event_listener.c b/samples/cgroup/cgroup_event_listener.c new file mode 100644 index 000000000000..3d70dc831a76 --- /dev/null +++ b/samples/cgroup/cgroup_event_listener.c @@ -0,0 +1,83 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * cgroup_event_listener.c - Simple listener of cgroup events + * + * Copyright (C) Kirill A. Shutemov + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#define USAGE_STR "Usage: cgroup_event_listener " + +int main(int argc, char **argv) +{ + int efd = -1; + int cfd = -1; + int event_control = -1; + char event_control_path[PATH_MAX]; + char line[LINE_MAX]; + int ret; + + if (argc != 3) + errx(1, "%s", USAGE_STR); + + cfd = open(argv[1], O_RDONLY); + if (cfd == -1) + err(1, "Cannot open %s", argv[1]); + + ret = snprintf(event_control_path, PATH_MAX, "%s/cgroup.event_control", + dirname(argv[1])); + if (ret >= PATH_MAX) + errx(1, "Path to cgroup.event_control is too long"); + + event_control = open(event_control_path, O_WRONLY); + if (event_control == -1) + err(1, "Cannot open %s", event_control_path); + + efd = eventfd(0, 0); + if (efd == -1) + err(1, "eventfd() failed"); + + ret = snprintf(line, LINE_MAX, "%d %d %s", efd, cfd, argv[2]); + if (ret >= LINE_MAX) + errx(1, "Arguments string is too long"); + + ret = write(event_control, line, strlen(line) + 1); + if (ret == -1) + err(1, "Cannot write to cgroup.event_control"); + + while (1) { + uint64_t result; + + ret = read(efd, &result, sizeof(result)); + if (ret == -1) { + if (errno == EINTR) + continue; + err(1, "Cannot read from eventfd"); + } + assert(ret == sizeof(result)); + + ret = access(event_control_path, W_OK); + if ((ret == -1) && (errno == ENOENT)) { + puts("The cgroup seems to have removed."); + break; + } + + if (ret == -1) + err(1, "cgroup.event_control is not accessible any more"); + + printf("%s %s: crossed\n", argv[1], argv[2]); + } + + return 0; +} diff --git a/tools/cgroup/Makefile b/tools/cgroup/Makefile deleted file mode 100644 index ffca068e4a76..000000000000 --- a/tools/cgroup/Makefile +++ /dev/null @@ -1,11 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0 -# Makefile for cgroup tools - -CFLAGS = -Wall -Wextra - -all: cgroup_event_listener -%: %.c - $(CC) $(CFLAGS) -o $@ $^ - -clean: - $(RM) cgroup_event_listener diff --git a/tools/cgroup/cgroup_event_listener.c b/tools/cgroup/cgroup_event_listener.c deleted file mode 100644 index 3d70dc831a76..000000000000 --- a/tools/cgroup/cgroup_event_listener.c +++ /dev/null @@ -1,83 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * cgroup_event_listener.c - Simple listener of cgroup events - * - * Copyright (C) Kirill A. Shutemov - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -#define USAGE_STR "Usage: cgroup_event_listener " - -int main(int argc, char **argv) -{ - int efd = -1; - int cfd = -1; - int event_control = -1; - char event_control_path[PATH_MAX]; - char line[LINE_MAX]; - int ret; - - if (argc != 3) - errx(1, "%s", USAGE_STR); - - cfd = open(argv[1], O_RDONLY); - if (cfd == -1) - err(1, "Cannot open %s", argv[1]); - - ret = snprintf(event_control_path, PATH_MAX, "%s/cgroup.event_control", - dirname(argv[1])); - if (ret >= PATH_MAX) - errx(1, "Path to cgroup.event_control is too long"); - - event_control = open(event_control_path, O_WRONLY); - if (event_control == -1) - err(1, "Cannot open %s", event_control_path); - - efd = eventfd(0, 0); - if (efd == -1) - err(1, "eventfd() failed"); - - ret = snprintf(line, LINE_MAX, "%d %d %s", efd, cfd, argv[2]); - if (ret >= LINE_MAX) - errx(1, "Arguments string is too long"); - - ret = write(event_control, line, strlen(line) + 1); - if (ret == -1) - err(1, "Cannot write to cgroup.event_control"); - - while (1) { - uint64_t result; - - ret = read(efd, &result, sizeof(result)); - if (ret == -1) { - if (errno == EINTR) - continue; - err(1, "Cannot read from eventfd"); - } - assert(ret == sizeof(result)); - - ret = access(event_control_path, W_OK); - if ((ret == -1) && (errno == ENOENT)) { - puts("The cgroup seems to have removed."); - break; - } - - if (ret == -1) - err(1, "cgroup.event_control is not accessible any more"); - - printf("%s %s: crossed\n", argv[1], argv[2]); - } - - return 0; -} -- cgit v1.2.3 From 33c1a7785a41216ec44d0fadc1890103e2db88b0 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Fri, 24 Nov 2023 15:22:21 +0000 Subject: kselftest/arm64: Improve output for skipped TPIDR2 ABI test When TPIDR2 is not supported the tpidr2 ABI test prints the same message for each skipped test: ok 1 skipped, TPIDR2 not supported which isn't ideal for test automation software since it tracks kselftest results based on the string used to describe the test. This is also not standard KTAP output, the expected format is: ok 1 # SKIP default_value Updated the program to generate this, using the same set of test names that we would run if the test actually executed. Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20231124-kselftest-arm64-tpidr2-skip-v1-1-e05d0ccef101@kernel.org Signed-off-by: Will Deacon --- tools/testing/selftests/arm64/abi/tpidr2.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/arm64/abi/tpidr2.c b/tools/testing/selftests/arm64/abi/tpidr2.c index 351a098b503a..02ee3a91b780 100644 --- a/tools/testing/selftests/arm64/abi/tpidr2.c +++ b/tools/testing/selftests/arm64/abi/tpidr2.c @@ -254,6 +254,12 @@ static int write_clone_read(void) putnum(++tests_run); \ putstr(" " #name "\n"); +#define skip_test(name) \ + tests_skipped++; \ + putstr("ok "); \ + putnum(++tests_run); \ + putstr(" # SKIP " #name "\n"); + int main(int argc, char **argv) { int ret, i; @@ -283,13 +289,11 @@ int main(int argc, char **argv) } else { putstr("# SME support not present\n"); - for (i = 0; i < EXPECTED_TESTS; i++) { - putstr("ok "); - putnum(i); - putstr(" skipped, TPIDR2 not supported\n"); - } - - tests_skipped += EXPECTED_TESTS; + skip_test(default_value); + skip_test(write_read); + skip_test(write_sleep_read); + skip_test(write_fork_read); + skip_test(write_clone_read); } print_summary(); -- cgit v1.2.3 From 48f7ab21f731f5a02ec34a4b83ae88c4a41d6a10 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 5 Dec 2023 14:24:44 +0000 Subject: kselftest/arm64: Log SVCR when the SME tests barf On failure we log the actual and expected value of the register we detect a mismatch in. For SME one obvious potential source of corruption would be if we had corrupted SVCR since changes in streaming mode will reset the register values, log the value to aid in understanding issues. Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20231205-arm64-kselftest-log-svcr-v1-1-b77abd9ee7f3@kernel.org Signed-off-by: Will Deacon --- tools/testing/selftests/arm64/fp/sve-test.S | 10 ++++++++++ tools/testing/selftests/arm64/fp/za-test.S | 6 ++++++ tools/testing/selftests/arm64/fp/zt-test.S | 5 +++++ 3 files changed, 21 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/arm64/fp/sve-test.S b/tools/testing/selftests/arm64/fp/sve-test.S index 547d077e3517..fff60e2a25ad 100644 --- a/tools/testing/selftests/arm64/fp/sve-test.S +++ b/tools/testing/selftests/arm64/fp/sve-test.S @@ -515,6 +515,10 @@ function barf mov x11, x1 // actual data mov x12, x2 // data size +#ifdef SSVE + mrs x13, S3_3_C4_C2_2 +#endif + puts "Mismatch: PID=" mov x0, x20 bl putdec @@ -534,6 +538,12 @@ function barf bl dumphex puts "]\n" +#ifdef SSVE + puts "\tSVCR: " + mov x0, x13 + bl putdecn +#endif + mov x8, #__NR_getpid svc #0 // fpsimd.c acitivty log dump hack diff --git a/tools/testing/selftests/arm64/fp/za-test.S b/tools/testing/selftests/arm64/fp/za-test.S index 9dcd70911397..095b45531640 100644 --- a/tools/testing/selftests/arm64/fp/za-test.S +++ b/tools/testing/selftests/arm64/fp/za-test.S @@ -333,6 +333,9 @@ function barf // mov w8, #__NR_exit // svc #0 // end hack + + mrs x13, S3_3_C4_C2_2 + smstop mov x10, x0 // expected data mov x11, x1 // actual data @@ -356,6 +359,9 @@ function barf mov x1, x12 bl dumphex puts "]\n" + puts "\tSVCR: " + mov x0, x13 + bl putdecn mov x8, #__NR_getpid svc #0 diff --git a/tools/testing/selftests/arm64/fp/zt-test.S b/tools/testing/selftests/arm64/fp/zt-test.S index d63286397638..b5c81e81a379 100644 --- a/tools/testing/selftests/arm64/fp/zt-test.S +++ b/tools/testing/selftests/arm64/fp/zt-test.S @@ -267,6 +267,8 @@ function barf // mov w8, #__NR_exit // svc #0 // end hack + + mrs x13, S3_3_C4_C2_2 smstop mov x10, x0 // expected data mov x11, x1 // actual data @@ -287,6 +289,9 @@ function barf mov x1, x12 bl dumphex puts "]\n" + puts "\tSVCR: " + mov x0, x13 + bl putdecn mov x8, #__NR_getpid svc #0 -- cgit v1.2.3 From 15c79c6507c0eab5ec0d4cd402ac52d42735a43e Mon Sep 17 00:00:00 2001 From: Larysa Zaremba Date: Wed, 6 Dec 2023 21:59:18 +0100 Subject: selftests/bpf: Increase invalid metadata size Changed check expects passed data meta to be deemed invalid. After loosening the requirement, the size of 36 bytes becomes valid. Therefore, increase tested meta size to 256, so we do not get an unexpected success. Signed-off-by: Larysa Zaremba Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20231206205919.404415-2-larysa.zaremba@intel.com --- tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c b/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c index ab4952b9fb1d..e6a783c7f5db 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c @@ -77,8 +77,8 @@ void test_xdp_context_test_run(void) test_xdp_context_error(prog_fd, opts, 4, sizeof(__u32), sizeof(data), 0, 0, 0); - /* Meta data must be 32 bytes or smaller */ - test_xdp_context_error(prog_fd, opts, 0, 36, sizeof(data), 0, 0, 0); + /* Meta data must be 255 bytes or smaller */ + test_xdp_context_error(prog_fd, opts, 0, 256, sizeof(data), 0, 0, 0); /* Total size of data must match data_end - data_meta */ test_xdp_context_error(prog_fd, opts, 0, sizeof(__u32), -- cgit v1.2.3 From 8596ba324356a7392a6639024de8c9ae7a9fce92 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Wed, 29 Nov 2023 14:35:40 -0800 Subject: perf stat: Fix help message for --metric-no-threshold option Copy-paste error led to help message for metric-no-threshold repeating that of metric-no-merge. Fixes: 1fd09e299bdd434b ("perf metric: Add --metric-no-threshold option") Reported-by: Stephane Eranian Signed-off-by: Ian Rogers Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20231129223540.2247030-1-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index d22228eddccb..0bfa70791cfc 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -1255,7 +1255,7 @@ static struct option stat_options[] = { OPT_BOOLEAN(0, "metric-no-merge", &stat_config.metric_no_merge, "don't try to share events between metrics in a group"), OPT_BOOLEAN(0, "metric-no-threshold", &stat_config.metric_no_threshold, - "don't try to share events between metrics in a group "), + "disable adding events for the metric threshold calculation"), OPT_BOOLEAN(0, "topdown", &topdown_run, "measure top-down statistics"), OPT_UINTEGER(0, "td-level", &stat_config.topdown_level, -- cgit v1.2.3 From bb6ec2e9fd8b83b2db68a449754c899a211bf84b Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 23 Oct 2023 19:42:45 +0100 Subject: tools/nolibc: Use linux/wait.h rather than duplicating it MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Linux defines a few custom flags for waitpid() which aren't currently provided by nolibc, make them available to nolibc based programs by just including linux/wait.h where they are defined instead of defining our own copy of the flags. Signed-off-by: Mark Brown Signed-off-by: Willy Tarreau Signed-off-by: Thomas Weißschuh --- tools/include/nolibc/types.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/include/nolibc/types.h b/tools/include/nolibc/types.h index 8cfc4c860fa4..ad0ddaa89e50 100644 --- a/tools/include/nolibc/types.h +++ b/tools/include/nolibc/types.h @@ -12,6 +12,7 @@ #include /* for LINUX_REBOOT_* */ #include #include +#include /* Only the generic macros and types may be defined here. The arch-specific @@ -108,9 +109,6 @@ #define WTERMSIG(status) ((status) & 0x7f) #define WIFSIGNALED(status) ((status) - 1 < 0xff) -/* waitpid() flags */ -#define WNOHANG 1 - /* standard exit() codes */ #define EXIT_SUCCESS 0 #define EXIT_FAILURE 1 -- cgit v1.2.3 From bdeeeaba83682225a7bf5f100fe8652a59590d33 Mon Sep 17 00:00:00 2001 From: Thomas Weißschuh Date: Tue, 31 Oct 2023 21:36:58 +0100 Subject: selftests/nolibc: use EFI -bios for LoongArch qemu MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit qemu for LoongArch does not work properly with direct kernel boot. The kernel will panic during initialization and hang without any output. When booting in EFI mode everything work correctly. While users most likely don't have the LoongArch EFI binary installed at least an explicit error about 'file not found' is better than a hanging test without output that can never succeed. Link: https://lore.kernel.org/loongarch/1738d60a-df3a-4102-b1da-d16a29b6e06a@t-8ch.de/ Signed-off-by: Thomas Weißschuh Acked-by: Willy Tarreau Link: https://lore.kernel.org/r/20231031-nolibc-out-of-tree-v1-1-47c92f73590a@weissschuh.net --- tools/testing/selftests/nolibc/Makefile | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/nolibc/Makefile b/tools/testing/selftests/nolibc/Makefile index 6c7040a75d81..28e616d64612 100644 --- a/tools/testing/selftests/nolibc/Makefile +++ b/tools/testing/selftests/nolibc/Makefile @@ -88,6 +88,13 @@ QEMU_ARCH_s390 = s390x QEMU_ARCH_loongarch = loongarch64 QEMU_ARCH = $(QEMU_ARCH_$(XARCH)) +QEMU_BIOS_DIR = /usr/share/edk2/ +QEMU_BIOS_loongarch = $(QEMU_BIOS_DIR)/loongarch64/OVMF_CODE.fd + +ifneq ($(QEMU_BIOS_$(XARCH)),) +QEMU_ARGS_BIOS = -bios $(QEMU_BIOS_$(XARCH)) +endif + # QEMU_ARGS : some arch-specific args to pass to qemu QEMU_ARGS_i386 = -M pc -append "console=ttyS0,9600 i8042.noaux panic=-1 $(TEST:%=NOLIBC_TEST=%)" QEMU_ARGS_x86_64 = -M pc -append "console=ttyS0,9600 i8042.noaux panic=-1 $(TEST:%=NOLIBC_TEST=%)" @@ -101,7 +108,7 @@ QEMU_ARGS_ppc64le = -M powernv -append "console=hvc0 panic=-1 $(TEST:%=NOLIBC QEMU_ARGS_riscv = -M virt -append "console=ttyS0 panic=-1 $(TEST:%=NOLIBC_TEST=%)" QEMU_ARGS_s390 = -M s390-ccw-virtio -m 1G -append "console=ttyS0 panic=-1 $(TEST:%=NOLIBC_TEST=%)" QEMU_ARGS_loongarch = -M virt -append "console=ttyS0,115200 panic=-1 $(TEST:%=NOLIBC_TEST=%)" -QEMU_ARGS = $(QEMU_ARGS_$(XARCH)) $(QEMU_ARGS_EXTRA) +QEMU_ARGS = $(QEMU_ARGS_$(XARCH)) $(QEMU_ARGS_BIOS) $(QEMU_ARGS_EXTRA) # OUTPUT is only set when run from the main makefile, otherwise # it defaults to this nolibc directory. -- cgit v1.2.3 From 7263c9d9b67a9412fcfc2c90b259a28d55d0e970 Mon Sep 17 00:00:00 2001 From: Thomas Weißschuh Date: Tue, 31 Oct 2023 21:36:59 +0100 Subject: selftests/nolibc: anchor paths in $(srcdir) if possible MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It is easier to recognize paths from their well-known location in the source tree than having to resolve the relative path in ones head. Signed-off-by: Thomas Weißschuh Acked-by: Willy Tarreau Link: https://lore.kernel.org/r/20231031-nolibc-out-of-tree-v1-2-47c92f73590a@weissschuh.net --- tools/testing/selftests/nolibc/Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/nolibc/Makefile b/tools/testing/selftests/nolibc/Makefile index 28e616d64612..be89a4fe0e23 100644 --- a/tools/testing/selftests/nolibc/Makefile +++ b/tools/testing/selftests/nolibc/Makefile @@ -174,7 +174,7 @@ sysroot: sysroot/$(ARCH)/include sysroot/$(ARCH)/include: $(Q)rm -rf sysroot/$(ARCH) sysroot/sysroot $(QUIET_MKDIR)mkdir -p sysroot - $(Q)$(MAKE) -C ../../../include/nolibc ARCH=$(ARCH) OUTPUT=$(CURDIR)/sysroot/ headers_standalone + $(Q)$(MAKE) -C $(srctree)/tools/include/nolibc ARCH=$(ARCH) OUTPUT=$(CURDIR)/sysroot/ headers_standalone $(Q)mv sysroot/sysroot sysroot/$(ARCH) ifneq ($(NOLIBC_SYSROOT),0) @@ -184,7 +184,7 @@ nolibc-test: nolibc-test.c nolibc-test-linkage.c sysroot/$(ARCH)/include else nolibc-test: nolibc-test.c nolibc-test-linkage.c $(QUIET_CC)$(CC) $(CFLAGS) $(LDFLAGS) -o $@ \ - -nostdlib -static -include ../../../include/nolibc/nolibc.h nolibc-test.c nolibc-test-linkage.c -lgcc + -nostdlib -static -include $(srctree)/tools/include/nolibc/nolibc.h nolibc-test.c nolibc-test-linkage.c -lgcc endif libc-test: nolibc-test.c nolibc-test-linkage.c -- cgit v1.2.3 From 69620b3a5bc5e6798724ab9bf0dd1b3c980a4949 Mon Sep 17 00:00:00 2001 From: Thomas Weißschuh Date: Tue, 31 Oct 2023 21:37:00 +0100 Subject: selftests/nolibc: support out-of-tree builds MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Out of tree builds are much more convenient when building for multiple architectures or configurations in parallel. Only absolute O= parameters are supported as Makefile.include will always resolve relative paths in relation to $(srctree) instead of the current directory. Add a call to "make outputmakefile" to verify that the sourcetree is clean. This is based on Zhangjins out-of-tree patch. It extends that work for get_init_cpio support and also drops relative O= specifications explicitly. Link: https://lore.kernel.org/lkml/06d96bd81fe812a9718098a383678ad3beba98b1.1691215074.git.falcon@tinylab.org/ Co-developed-by: Zhangjin Wu Signed-off-by: Zhangjin Wu Signed-off-by: Thomas Weißschuh Acked-by: Willy Tarreau Link: https://lore.kernel.org/r/20231031-nolibc-out-of-tree-v1-3-47c92f73590a@weissschuh.net --- tools/testing/selftests/nolibc/Makefile | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/nolibc/Makefile b/tools/testing/selftests/nolibc/Makefile index be89a4fe0e23..4818ae4bdff4 100644 --- a/tools/testing/selftests/nolibc/Makefile +++ b/tools/testing/selftests/nolibc/Makefile @@ -1,9 +1,16 @@ # SPDX-License-Identifier: GPL-2.0 # Makefile for nolibc tests include ../../../scripts/Makefile.include +include ../../../scripts/utilities.mak # We need this for the "cc-option" macro. include ../../../build/Build.include +ifneq ($(O),) +ifneq ($(call is-absolute,$(O)),y) +$(error Only absolute O= parameters are supported) +endif +endif + # we're in ".../tools/testing/selftests/nolibc" ifeq ($(srctree),) srctree := $(patsubst %/tools/testing/selftests/,%,$(dir $(CURDIR))) @@ -14,6 +21,8 @@ include $(srctree)/scripts/subarch.include ARCH = $(SUBARCH) endif +objtree ?= $(srctree) + # XARCH extends the kernel's ARCH with a few variants of the same # architecture that only differ by the configuration, the toolchain # and the Qemu program used. It is copied as-is into ARCH except for @@ -52,7 +61,7 @@ IMAGE_ppc64le = arch/powerpc/boot/zImage IMAGE_riscv = arch/riscv/boot/Image IMAGE_s390 = arch/s390/boot/bzImage IMAGE_loongarch = arch/loongarch/boot/vmlinuz.efi -IMAGE = $(IMAGE_$(XARCH)) +IMAGE = $(objtree)/$(IMAGE_$(XARCH)) IMAGE_NAME = $(notdir $(IMAGE)) # default kernel configurations that appear to be usable @@ -174,6 +183,7 @@ sysroot: sysroot/$(ARCH)/include sysroot/$(ARCH)/include: $(Q)rm -rf sysroot/$(ARCH) sysroot/sysroot $(QUIET_MKDIR)mkdir -p sysroot + $(Q)$(MAKE) -C $(srctree) outputmakefile $(Q)$(MAKE) -C $(srctree)/tools/include/nolibc ARCH=$(ARCH) OUTPUT=$(CURDIR)/sysroot/ headers_standalone $(Q)mv sysroot/sysroot sysroot/$(ARCH) @@ -206,7 +216,7 @@ run-user: nolibc-test $(Q)$(REPORT) $(CURDIR)/run.out initramfs.cpio: kernel nolibc-test - $(QUIET_GEN)echo 'file /init nolibc-test 755 0 0' | $(srctree)/usr/gen_init_cpio - > initramfs.cpio + $(QUIET_GEN)echo 'file /init nolibc-test 755 0 0' | $(objtree)/usr/gen_init_cpio - > initramfs.cpio initramfs: nolibc-test $(QUIET_MKDIR)mkdir -p initramfs @@ -224,12 +234,12 @@ kernel-standalone: initramfs # run the tests after building the kernel run: kernel initramfs.cpio - $(Q)qemu-system-$(QEMU_ARCH) -display none -no-reboot -kernel "$(srctree)/$(IMAGE)" -initrd initramfs.cpio -serial stdio $(QEMU_ARGS) > "$(CURDIR)/run.out" + $(Q)qemu-system-$(QEMU_ARCH) -display none -no-reboot -kernel "$(IMAGE)" -initrd initramfs.cpio -serial stdio $(QEMU_ARGS) > "$(CURDIR)/run.out" $(Q)$(REPORT) $(CURDIR)/run.out # re-run the tests from an existing kernel rerun: - $(Q)qemu-system-$(QEMU_ARCH) -display none -no-reboot -kernel "$(srctree)/$(IMAGE)" -initrd initramfs.cpio -serial stdio $(QEMU_ARGS) > "$(CURDIR)/run.out" + $(Q)qemu-system-$(QEMU_ARCH) -display none -no-reboot -kernel "$(IMAGE)" -initrd initramfs.cpio -serial stdio $(QEMU_ARGS) > "$(CURDIR)/run.out" $(Q)$(REPORT) $(CURDIR)/run.out # report with existing test log -- cgit v1.2.3 From 91f16451593b4709036e72a6aaccadc16d87a339 Mon Sep 17 00:00:00 2001 From: Thomas Weißschuh Date: Sun, 5 Nov 2023 10:23:05 +0100 Subject: selftests/nolibc: add script to run testsuite MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The script can run the testsuite for multiple architectures and provides an overall test report. Furthermore it can automatically download crosstools from mirrors.kernel.org if requested by the user. Example execution: $ ./run-tests.sh i386: 162 test(s): 162 passed, 0 skipped, 0 failed => status: success x86_64: 162 test(s): 162 passed, 0 skipped, 0 failed => status: success arm64: 162 test(s): 162 passed, 0 skipped, 0 failed => status: success arm: 162 test(s): 162 passed, 0 skipped, 0 failed => status: success mips: 162 test(s): 161 passed, 1 skipped, 0 failed => status: warning ppc: 162 test(s): 162 passed, 0 skipped, 0 failed => status: success ppc64: 162 test(s): 162 passed, 0 skipped, 0 failed => status: success ppc64le: 162 test(s): 162 passed, 0 skipped, 0 failed => status: success riscv: 162 test(s): 162 passed, 0 skipped, 0 failed => status: success s390: 162 test(s): 161 passed, 1 skipped, 0 failed => status: warning loongarch: 162 test(s): 161 passed, 1 skipped, 0 failed => status: warning Signed-off-by: Thomas Weißschuh Acked-by: Willy Tarreau Link: https://lore.kernel.org/r/20231105-nolibc-run-tests-v1-1-b59ff770a978@weissschuh.net --- tools/testing/selftests/nolibc/.gitignore | 1 + tools/testing/selftests/nolibc/run-tests.sh | 153 ++++++++++++++++++++++++++++ 2 files changed, 154 insertions(+) create mode 100755 tools/testing/selftests/nolibc/run-tests.sh (limited to 'tools') diff --git a/tools/testing/selftests/nolibc/.gitignore b/tools/testing/selftests/nolibc/.gitignore index 5119f9f7afd2..35d247a0d5bd 100644 --- a/tools/testing/selftests/nolibc/.gitignore +++ b/tools/testing/selftests/nolibc/.gitignore @@ -3,4 +3,5 @@ /libc-test /nolibc-test /run.out +/run.out.* /sysroot/ diff --git a/tools/testing/selftests/nolibc/run-tests.sh b/tools/testing/selftests/nolibc/run-tests.sh new file mode 100755 index 000000000000..1bf020d49f54 --- /dev/null +++ b/tools/testing/selftests/nolibc/run-tests.sh @@ -0,0 +1,153 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# Test runner for nolibc tests + +set -e + +trap 'echo Aborting...' 'ERR' + +crosstool_version=13.2.0 +hostarch=x86_64 +nproc=$(( $(nproc) + 2)) +cache_dir="${XDG_CACHE_HOME:-"$HOME"/.cache}" +download_location="${cache_dir}/crosstools/" +build_location="$(realpath "${cache_dir}"/nolibc-tests/)" +perform_download=0 +archs="i386 x86_64 arm64 arm mips ppc ppc64 ppc64le riscv s390 loongarch" + +TEMP=$(getopt -o 'j:d:c:b:a:ph' -n "$0" -- "$@") + +eval set -- "$TEMP" +unset TEMP + +print_usage() { + cat < + +Known architectures: + ${archs} + +Options: + -j [N] Allow N jobs at once (default: ${nproc}) + -p Allow download of toolchains + -d [DIR] Download location for toolchains (default: ${download_location}) + -c [VERSION] Version of toolchains to use (default: ${crosstool_version}) + -a [ARCH] Host architecture of toolchains to use (default: ${hostarch}) + -b [DIR] Build location (default: ${build_location}) +EOF +} + +while true; do + case "$1" in + '-j') + nproc="$2" + shift 2; continue ;; + '-p') + perform_download=1 + shift; continue ;; + '-d') + download_location="$2" + shift 2; continue ;; + '-c') + crosstool_version="$2" + shift 2; continue ;; + '-a') + hostarch="$2" + shift 2; continue ;; + '-b') + build_location="$(realpath "$2")" + shift 2; continue ;; + '-h') + print_usage + exit 0 + ;; + '--') + shift; break ;; + *) + echo 'Internal error!' >&2; exit 1 ;; + esac +done + +if [[ -n "$*" ]]; then + archs="$*" +fi + +crosstool_arch() { + case "$1" in + arm64) echo aarch64;; + ppc) echo powerpc;; + ppc64) echo powerpc64;; + ppc64le) echo powerpc64;; + riscv) echo riscv64;; + loongarch) echo loongarch64;; + mips*) echo mips;; + *) echo "$1";; + esac +} + +crosstool_abi() { + case "$1" in + arm) echo linux-gnueabi;; + *) echo linux;; + esac +} + +download_crosstool() { + arch="$(crosstool_arch "$1")" + abi="$(crosstool_abi "$1")" + + archive_name="${hostarch}-gcc-${crosstool_version}-nolibc-${arch}-${abi}.tar.gz" + url="https://mirrors.edge.kernel.org/pub/tools/crosstool/files/bin/${hostarch}/${crosstool_version}/${archive_name}" + archive="${download_location}${archive_name}" + stamp="${archive}.stamp" + + [ -f "${stamp}" ] && return + + echo "Downloading crosstools ${arch} ${crosstool_version}" + mkdir -p "${download_location}" + curl -o "${archive}" --fail --continue-at - "${url}" + tar -C "${download_location}" -xf "${archive}" + touch "${stamp}" +} + +# capture command output, print it on failure +# mimics chronic(1) from moreutils +function swallow_output() { + if ! OUTPUT="$("$@" 2>&1)"; then + echo "$OUTPUT" + return 1 + fi + return 0 +} + +test_arch() { + arch=$1 + ct_arch=$(crosstool_arch "$arch") + ct_abi=$(crosstool_abi "$1") + cross_compile=$(realpath "${download_location}gcc-${crosstool_version}-nolibc/${ct_arch}-${ct_abi}/bin/${ct_arch}-${ct_abi}-") + build_dir="${build_location}/${arch}" + MAKE=(make -j"${nproc}" XARCH="${arch}" CROSS_COMPILE="${cross_compile}" O="${build_dir}") + + mkdir -p "$build_dir" + if [ ! -f "${build_dir}/.config" ]; then + swallow_output "${MAKE[@]}" defconfig + fi + printf '%-15s' "$arch:" + swallow_output "${MAKE[@]}" run V=1 + cp run.out run.out."${arch}" + "${MAKE[@]}" report | grep passed +} + +if [ "$perform_download" -ne 0 ]; then + for arch in $archs; do + download_crosstool "$arch" + done +fi + +for arch in $archs; do + test_arch "$arch" +done -- cgit v1.2.3 From 48946c5aa7a848c7dfc2151267af92956f492f58 Mon Sep 17 00:00:00 2001 From: Thomas Weißschuh Date: Sun, 5 Nov 2023 11:07:05 +0100 Subject: tools/nolibc: error out on unsupported architecture MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When an architecture is unsupported arch.h would silently continue. This leads to a lot of followup errors because my_syscallX() is not defined and the startup code is missing. Avoid these confusing errors and fail the build early with a clear error message and location. Signed-off-by: Thomas Weißschuh Acked-by: Willy Tarreau --- tools/include/nolibc/arch.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'tools') diff --git a/tools/include/nolibc/arch.h b/tools/include/nolibc/arch.h index e276fb0680af..2f72ccac0378 100644 --- a/tools/include/nolibc/arch.h +++ b/tools/include/nolibc/arch.h @@ -33,6 +33,8 @@ #include "arch-s390.h" #elif defined(__loongarch__) #include "arch-loongarch.h" +#else +#error Unsupported Architecture #endif #endif /* _NOLIBC_ARCH_H */ -- cgit v1.2.3 From aa68a5a83a0acc4c1babcb4f8be49261514ab65c Mon Sep 17 00:00:00 2001 From: Thomas Weißschuh Date: Fri, 20 Oct 2023 09:30:33 +0200 Subject: tools/nolibc: move MIPS ABI validation into arch-mips.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When installing nolibc to a sysroot arch.h is not used so its ABI check is bypassed. This makes is possible to compile nolibc with a non O32 ABI which may build but can not run. Move the check into arch-mips.h so it will always be evaluated. Signed-off-by: Thomas Weißschuh Acked-by: Willy Tarreau --- tools/include/nolibc/arch-mips.h | 4 ++++ tools/include/nolibc/arch.h | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/include/nolibc/arch-mips.h b/tools/include/nolibc/arch-mips.h index 4ab6fa54beee..3a2c76716b83 100644 --- a/tools/include/nolibc/arch-mips.h +++ b/tools/include/nolibc/arch-mips.h @@ -10,6 +10,10 @@ #include "compiler.h" #include "crt.h" +#if !defined(_ABIO32) +#error Unsupported MIPS ABI +#endif + /* Syscalls for MIPS ABI O32 : * - WARNING! there's always a delayed slot! * - WARNING again, the syntax is different, registers take a '$' and numbers diff --git a/tools/include/nolibc/arch.h b/tools/include/nolibc/arch.h index 2f72ccac0378..c8f4e5d3add9 100644 --- a/tools/include/nolibc/arch.h +++ b/tools/include/nolibc/arch.h @@ -23,7 +23,7 @@ #include "arch-arm.h" #elif defined(__aarch64__) #include "arch-aarch64.h" -#elif defined(__mips__) && defined(_ABIO32) +#elif defined(__mips__) #include "arch-mips.h" #elif defined(__powerpc__) #include "arch-powerpc.h" -- cgit v1.2.3 From c4c20a7d6ef9d5a4330a63c1fd4553dac5f93c04 Mon Sep 17 00:00:00 2001 From: Thomas Weißschuh Date: Fri, 20 Oct 2023 09:36:18 +0200 Subject: selftests/nolibc: use XARCH for MIPS MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit MIPS has many different configurations prepare the support of additional ones by moving the build of MIPS to the generic XARCH infrastructure. Signed-off-by: Thomas Weißschuh Acked-by: Willy Tarreau --- tools/testing/selftests/nolibc/Makefile | 12 +++++++----- tools/testing/selftests/nolibc/run-tests.sh | 2 +- 2 files changed, 8 insertions(+), 6 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/nolibc/Makefile b/tools/testing/selftests/nolibc/Makefile index 4818ae4bdff4..07c94df50dd1 100644 --- a/tools/testing/selftests/nolibc/Makefile +++ b/tools/testing/selftests/nolibc/Makefile @@ -40,12 +40,14 @@ objtree ?= $(srctree) # configure default variants for target kernel supported architectures XARCH_powerpc = ppc +XARCH_mips = mips32le XARCH = $(or $(XARCH_$(ARCH)),$(ARCH)) # map from user input variants to their kernel supported architectures ARCH_ppc = powerpc ARCH_ppc64 = powerpc ARCH_ppc64le = powerpc +ARCH_mips32le = mips ARCH := $(or $(ARCH_$(XARCH)),$(XARCH)) # kernel image names by architecture @@ -54,7 +56,7 @@ IMAGE_x86_64 = arch/x86/boot/bzImage IMAGE_x86 = arch/x86/boot/bzImage IMAGE_arm64 = arch/arm64/boot/Image IMAGE_arm = arch/arm/boot/zImage -IMAGE_mips = vmlinuz +IMAGE_mips32le = vmlinuz IMAGE_ppc = vmlinux IMAGE_ppc64 = vmlinux IMAGE_ppc64le = arch/powerpc/boot/zImage @@ -70,7 +72,7 @@ DEFCONFIG_x86_64 = defconfig DEFCONFIG_x86 = defconfig DEFCONFIG_arm64 = defconfig DEFCONFIG_arm = multi_v7_defconfig -DEFCONFIG_mips = malta_defconfig +DEFCONFIG_mips32le = malta_defconfig DEFCONFIG_ppc = pmac32_defconfig DEFCONFIG_ppc64 = powernv_be_defconfig DEFCONFIG_ppc64le = powernv_defconfig @@ -88,7 +90,7 @@ QEMU_ARCH_x86_64 = x86_64 QEMU_ARCH_x86 = x86_64 QEMU_ARCH_arm64 = aarch64 QEMU_ARCH_arm = arm -QEMU_ARCH_mips = mipsel # works with malta_defconfig +QEMU_ARCH_mips32le = mipsel # works with malta_defconfig QEMU_ARCH_ppc = ppc QEMU_ARCH_ppc64 = ppc64 QEMU_ARCH_ppc64le = ppc64 @@ -110,7 +112,7 @@ QEMU_ARGS_x86_64 = -M pc -append "console=ttyS0,9600 i8042.noaux panic=-1 $( QEMU_ARGS_x86 = -M pc -append "console=ttyS0,9600 i8042.noaux panic=-1 $(TEST:%=NOLIBC_TEST=%)" QEMU_ARGS_arm64 = -M virt -cpu cortex-a53 -append "panic=-1 $(TEST:%=NOLIBC_TEST=%)" QEMU_ARGS_arm = -M virt -append "panic=-1 $(TEST:%=NOLIBC_TEST=%)" -QEMU_ARGS_mips = -M malta -append "panic=-1 $(TEST:%=NOLIBC_TEST=%)" +QEMU_ARGS_mips32le = -M malta -append "panic=-1 $(TEST:%=NOLIBC_TEST=%)" QEMU_ARGS_ppc = -M g3beige -append "console=ttyS0 panic=-1 $(TEST:%=NOLIBC_TEST=%)" QEMU_ARGS_ppc64 = -M powernv -append "console=hvc0 panic=-1 $(TEST:%=NOLIBC_TEST=%)" QEMU_ARGS_ppc64le = -M powernv -append "console=hvc0 panic=-1 $(TEST:%=NOLIBC_TEST=%)" @@ -134,7 +136,7 @@ CFLAGS_ppc = -m32 -mbig-endian -mno-vsx $(call cc-option,-mmultiple) CFLAGS_ppc64 = -m64 -mbig-endian -mno-vsx $(call cc-option,-mmultiple) CFLAGS_ppc64le = -m64 -mlittle-endian -mno-vsx $(call cc-option,-mabi=elfv2) CFLAGS_s390 = -m64 -CFLAGS_mips = -EL +CFLAGS_mips32le = -EL CFLAGS_STACKPROTECTOR ?= $(call cc-option,-mstack-protector-guard=global $(call cc-option,-fstack-protector-all)) CFLAGS ?= -Os -fno-ident -fno-asynchronous-unwind-tables -std=c89 -W -Wall -Wextra \ $(call cc-option,-fno-stack-protector) \ diff --git a/tools/testing/selftests/nolibc/run-tests.sh b/tools/testing/selftests/nolibc/run-tests.sh index 1bf020d49f54..8f2c3bc572cb 100755 --- a/tools/testing/selftests/nolibc/run-tests.sh +++ b/tools/testing/selftests/nolibc/run-tests.sh @@ -14,7 +14,7 @@ cache_dir="${XDG_CACHE_HOME:-"$HOME"/.cache}" download_location="${cache_dir}/crosstools/" build_location="$(realpath "${cache_dir}"/nolibc-tests/)" perform_download=0 -archs="i386 x86_64 arm64 arm mips ppc ppc64 ppc64le riscv s390 loongarch" +archs="i386 x86_64 arm64 arm mips32le ppc ppc64 ppc64le riscv s390 loongarch" TEMP=$(getopt -o 'j:d:c:b:a:ph' -n "$0" -- "$@") -- cgit v1.2.3 From bb503f5f01546c65fc510787b2964de3b62b6646 Mon Sep 17 00:00:00 2001 From: Thomas Weißschuh Date: Fri, 20 Oct 2023 13:39:39 +0200 Subject: selftests/nolibc: explicitly specify ABI for MIPS MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit More ABIs exist, for better clarity specify it explicitly everywhere. Signed-off-by: Thomas Weißschuh Acked-by: Willy Tarreau --- tools/testing/selftests/nolibc/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/nolibc/Makefile b/tools/testing/selftests/nolibc/Makefile index 07c94df50dd1..6c7bc6ad3387 100644 --- a/tools/testing/selftests/nolibc/Makefile +++ b/tools/testing/selftests/nolibc/Makefile @@ -136,7 +136,7 @@ CFLAGS_ppc = -m32 -mbig-endian -mno-vsx $(call cc-option,-mmultiple) CFLAGS_ppc64 = -m64 -mbig-endian -mno-vsx $(call cc-option,-mmultiple) CFLAGS_ppc64le = -m64 -mlittle-endian -mno-vsx $(call cc-option,-mabi=elfv2) CFLAGS_s390 = -m64 -CFLAGS_mips32le = -EL +CFLAGS_mips32le = -EL -mabi=32 CFLAGS_STACKPROTECTOR ?= $(call cc-option,-mstack-protector-guard=global $(call cc-option,-fstack-protector-all)) CFLAGS ?= -Os -fno-ident -fno-asynchronous-unwind-tables -std=c89 -W -Wall -Wextra \ $(call cc-option,-fno-stack-protector) \ -- cgit v1.2.3 From 3ab1e9db098a41dcfc0d93ae964bd5901e4ef1b2 Mon Sep 17 00:00:00 2001 From: Thomas Weißschuh Date: Fri, 20 Oct 2023 13:34:27 +0200 Subject: selftests/nolibc: extraconfig support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Allow some postprocessing of defconfig files. Suggested-by: Zhangjin Wu Signed-off-by: Thomas Weißschuh Acked-by: Willy Tarreau --- tools/testing/selftests/nolibc/Makefile | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/nolibc/Makefile b/tools/testing/selftests/nolibc/Makefile index 6c7bc6ad3387..4983718866b6 100644 --- a/tools/testing/selftests/nolibc/Makefile +++ b/tools/testing/selftests/nolibc/Makefile @@ -81,6 +81,8 @@ DEFCONFIG_s390 = defconfig DEFCONFIG_loongarch = defconfig DEFCONFIG = $(DEFCONFIG_$(XARCH)) +EXTRACONFIG = $(EXTRACONFIG_$(XARCH)) + # optional tests to run (default = all) TEST = @@ -227,6 +229,10 @@ initramfs: nolibc-test defconfig: $(Q)$(MAKE) -C $(srctree) ARCH=$(ARCH) CC=$(CC) CROSS_COMPILE=$(CROSS_COMPILE) mrproper $(DEFCONFIG) prepare + $(Q)if [ -n "$(EXTRACONFIG)" ]; then \ + $(srctree)/scripts/config --file $(objtree)/.config $(EXTRACONFIG); \ + $(MAKE) -C $(srctree) ARCH=$(ARCH) CC=$(CC) CROSS_COMPILE=$(CROSS_COMPILE) olddefconfig < /dev/null; \ + fi kernel: $(Q)$(MAKE) -C $(srctree) ARCH=$(ARCH) CC=$(CC) CROSS_COMPILE=$(CROSS_COMPILE) $(IMAGE_NAME) < /dev/null -- cgit v1.2.3 From b4b9fb91da99035ce59ac74c9a27562afddfc21d Mon Sep 17 00:00:00 2001 From: Thomas Weißschuh Date: Fri, 20 Oct 2023 11:04:10 +0200 Subject: selftests/nolibc: add configuration for mipso32be MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Allow testing MIPS O32 big endian. Signed-off-by: Thomas Weißschuh Acked-by: Willy Tarreau --- tools/testing/selftests/nolibc/Makefile | 7 +++++++ tools/testing/selftests/nolibc/run-tests.sh | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/nolibc/Makefile b/tools/testing/selftests/nolibc/Makefile index 4983718866b6..546859c9f7ac 100644 --- a/tools/testing/selftests/nolibc/Makefile +++ b/tools/testing/selftests/nolibc/Makefile @@ -48,6 +48,7 @@ ARCH_ppc = powerpc ARCH_ppc64 = powerpc ARCH_ppc64le = powerpc ARCH_mips32le = mips +ARCH_mips32be = mips ARCH := $(or $(ARCH_$(XARCH)),$(XARCH)) # kernel image names by architecture @@ -57,6 +58,7 @@ IMAGE_x86 = arch/x86/boot/bzImage IMAGE_arm64 = arch/arm64/boot/Image IMAGE_arm = arch/arm/boot/zImage IMAGE_mips32le = vmlinuz +IMAGE_mips32be = vmlinuz IMAGE_ppc = vmlinux IMAGE_ppc64 = vmlinux IMAGE_ppc64le = arch/powerpc/boot/zImage @@ -73,6 +75,7 @@ DEFCONFIG_x86 = defconfig DEFCONFIG_arm64 = defconfig DEFCONFIG_arm = multi_v7_defconfig DEFCONFIG_mips32le = malta_defconfig +DEFCONFIG_mips32be = malta_defconfig DEFCONFIG_ppc = pmac32_defconfig DEFCONFIG_ppc64 = powernv_be_defconfig DEFCONFIG_ppc64le = powernv_defconfig @@ -81,6 +84,7 @@ DEFCONFIG_s390 = defconfig DEFCONFIG_loongarch = defconfig DEFCONFIG = $(DEFCONFIG_$(XARCH)) +EXTRACONFIG_mips32be = -d CONFIG_CPU_LITTLE_ENDIAN -e CONFIG_CPU_BIG_ENDIAN EXTRACONFIG = $(EXTRACONFIG_$(XARCH)) # optional tests to run (default = all) @@ -93,6 +97,7 @@ QEMU_ARCH_x86 = x86_64 QEMU_ARCH_arm64 = aarch64 QEMU_ARCH_arm = arm QEMU_ARCH_mips32le = mipsel # works with malta_defconfig +QEMU_ARCH_mips32be = mips QEMU_ARCH_ppc = ppc QEMU_ARCH_ppc64 = ppc64 QEMU_ARCH_ppc64le = ppc64 @@ -115,6 +120,7 @@ QEMU_ARGS_x86 = -M pc -append "console=ttyS0,9600 i8042.noaux panic=-1 $( QEMU_ARGS_arm64 = -M virt -cpu cortex-a53 -append "panic=-1 $(TEST:%=NOLIBC_TEST=%)" QEMU_ARGS_arm = -M virt -append "panic=-1 $(TEST:%=NOLIBC_TEST=%)" QEMU_ARGS_mips32le = -M malta -append "panic=-1 $(TEST:%=NOLIBC_TEST=%)" +QEMU_ARGS_mips32be = -M malta -append "panic=-1 $(TEST:%=NOLIBC_TEST=%)" QEMU_ARGS_ppc = -M g3beige -append "console=ttyS0 panic=-1 $(TEST:%=NOLIBC_TEST=%)" QEMU_ARGS_ppc64 = -M powernv -append "console=hvc0 panic=-1 $(TEST:%=NOLIBC_TEST=%)" QEMU_ARGS_ppc64le = -M powernv -append "console=hvc0 panic=-1 $(TEST:%=NOLIBC_TEST=%)" @@ -139,6 +145,7 @@ CFLAGS_ppc64 = -m64 -mbig-endian -mno-vsx $(call cc-option,-mmultiple) CFLAGS_ppc64le = -m64 -mlittle-endian -mno-vsx $(call cc-option,-mabi=elfv2) CFLAGS_s390 = -m64 CFLAGS_mips32le = -EL -mabi=32 +CFLAGS_mips32be = -EB -mabi=32 CFLAGS_STACKPROTECTOR ?= $(call cc-option,-mstack-protector-guard=global $(call cc-option,-fstack-protector-all)) CFLAGS ?= -Os -fno-ident -fno-asynchronous-unwind-tables -std=c89 -W -Wall -Wextra \ $(call cc-option,-fno-stack-protector) \ diff --git a/tools/testing/selftests/nolibc/run-tests.sh b/tools/testing/selftests/nolibc/run-tests.sh index 8f2c3bc572cb..3a1eaccfbd8d 100755 --- a/tools/testing/selftests/nolibc/run-tests.sh +++ b/tools/testing/selftests/nolibc/run-tests.sh @@ -14,7 +14,7 @@ cache_dir="${XDG_CACHE_HOME:-"$HOME"/.cache}" download_location="${cache_dir}/crosstools/" build_location="$(realpath "${cache_dir}"/nolibc-tests/)" perform_download=0 -archs="i386 x86_64 arm64 arm mips32le ppc ppc64 ppc64le riscv s390 loongarch" +archs="i386 x86_64 arm64 arm mips32le mips32be ppc ppc64 ppc64le riscv s390 loongarch" TEMP=$(getopt -o 'j:d:c:b:a:ph' -n "$0" -- "$@") -- cgit v1.2.3 From 07f679b50252dc9e3d0c19aca5801f82c230c527 Mon Sep 17 00:00:00 2001 From: Thomas Weißschuh Date: Sun, 5 Nov 2023 15:16:38 +0100 Subject: selftests/nolibc: fix testcase status alignment MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Center-align all possible status reports. Before OK and FAIL were center-aligned in relation to each other but SKIPPED and FAILED would be left-aligned. Before: 7 environ_addr = <0x7fffef3e7c50> [OK] 8 environ_envp = <0x7fffef3e7c58> [FAIL] 9 environ_auxv [SKIPPED] 10 environ_total [SKIPPED] 11 environ_HOME = <0x7fffef3e99bd> [OK] 12 auxv_addr [SKIPPED] 13 auxv_AT_UID = 1000 [OK] After: 7 environ_addr = <0x7ffff13b00a0> [OK] 8 environ_envp = <0x7ffff13b00a8> [FAIL] 9 environ_auxv [SKIPPED] 10 environ_total [SKIPPED] 11 environ_HOME = <0x7ffff13b19bd> [OK] 12 auxv_addr [SKIPPED] 13 auxv_AT_UID = 1000 [OK] Signed-off-by: Thomas Weißschuh --- tools/testing/selftests/nolibc/nolibc-test.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/nolibc/nolibc-test.c b/tools/testing/selftests/nolibc/nolibc-test.c index 2f10541e6f38..e173014f6b66 100644 --- a/tools/testing/selftests/nolibc/nolibc-test.c +++ b/tools/testing/selftests/nolibc/nolibc-test.c @@ -150,11 +150,11 @@ static void result(int llen, enum RESULT r) const char *msg; if (r == OK) - msg = " [OK]"; + msg = " [OK]"; else if (r == SKIPPED) msg = "[SKIPPED]"; else - msg = "[FAIL]"; + msg = " [FAIL]"; if (llen < 64) putcharn(' ', 64 - llen); -- cgit v1.2.3 From d7233e2b758b927695e63e078fe55abcc6ecd3a2 Mon Sep 17 00:00:00 2001 From: Thomas Weißschuh Date: Wed, 15 Sep 2077 02:13:52 +0200 Subject: selftests/nolibc: introduce QEMU_ARCH_USER MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit While ppc64le shares the same executable with regular ppc64 the user variant needs has a dedicated executable. Introduce a new QEMU_ARCH_USER Makefile variable to accommodate that. Fixes: 17362f3d0bd3 ("selftests/nolibc: use qemu-system-ppc64 for ppc64le") Link: https://lore.kernel.org/r/20770915-nolibc-run-user-v1-1-3caec61726dc@weissschuh.net Signed-off-by: Thomas Weißschuh --- tools/testing/selftests/nolibc/Makefile | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/nolibc/Makefile b/tools/testing/selftests/nolibc/Makefile index 546859c9f7ac..47f0fb52fe55 100644 --- a/tools/testing/selftests/nolibc/Makefile +++ b/tools/testing/selftests/nolibc/Makefile @@ -106,6 +106,9 @@ QEMU_ARCH_s390 = s390x QEMU_ARCH_loongarch = loongarch64 QEMU_ARCH = $(QEMU_ARCH_$(XARCH)) +QEMU_ARCH_USER_ppc64le = ppc64le +QEMU_ARCH_USER = $(or $(QEMU_ARCH_USER_$(XARCH)),$(QEMU_ARCH_$(XARCH))) + QEMU_BIOS_DIR = /usr/share/edk2/ QEMU_BIOS_loongarch = $(QEMU_BIOS_DIR)/loongarch64/OVMF_CODE.fd @@ -223,7 +226,7 @@ run-nolibc-test: nolibc-test # qemu user-land test run-user: nolibc-test - $(Q)qemu-$(QEMU_ARCH) ./nolibc-test > "$(CURDIR)/run.out" || : + $(Q)qemu-$(QEMU_ARCH_USER) ./nolibc-test > "$(CURDIR)/run.out" || : $(Q)$(REPORT) $(CURDIR)/run.out initramfs.cpio: kernel nolibc-test -- cgit v1.2.3 From 8bcf9a485541fe0079483162496db1add932689b Mon Sep 17 00:00:00 2001 From: Thomas Weißschuh Date: Wed, 15 Sep 2077 02:13:53 +0200 Subject: selftests/nolibc: run-tests.sh: enable testing via qemu-user MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit qemu-user is faster than a full system test. Link: https://lore.kernel.org/r/20770915-nolibc-run-user-v1-2-3caec61726dc@weissschuh.net Signed-off-by: Thomas Weißschuh --- tools/testing/selftests/nolibc/run-tests.sh | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/nolibc/run-tests.sh b/tools/testing/selftests/nolibc/run-tests.sh index 3a1eaccfbd8d..c0a5a7cea9fa 100755 --- a/tools/testing/selftests/nolibc/run-tests.sh +++ b/tools/testing/selftests/nolibc/run-tests.sh @@ -14,9 +14,10 @@ cache_dir="${XDG_CACHE_HOME:-"$HOME"/.cache}" download_location="${cache_dir}/crosstools/" build_location="$(realpath "${cache_dir}"/nolibc-tests/)" perform_download=0 +test_mode=system archs="i386 x86_64 arm64 arm mips32le mips32be ppc ppc64 ppc64le riscv s390 loongarch" -TEMP=$(getopt -o 'j:d:c:b:a:ph' -n "$0" -- "$@") +TEMP=$(getopt -o 'j:d:c:b:a:m:ph' -n "$0" -- "$@") eval set -- "$TEMP" unset TEMP @@ -38,6 +39,7 @@ Options: -c [VERSION] Version of toolchains to use (default: ${crosstool_version}) -a [ARCH] Host architecture of toolchains to use (default: ${hostarch}) -b [DIR] Build location (default: ${build_location}) + -m [MODE] Test mode user/system (default: ${test_mode}) EOF } @@ -61,6 +63,9 @@ while true; do '-b') build_location="$(realpath "$2")" shift 2; continue ;; + '-m') + test_mode="$2" + shift 2; continue ;; '-h') print_usage exit 0 @@ -133,11 +138,22 @@ test_arch() { MAKE=(make -j"${nproc}" XARCH="${arch}" CROSS_COMPILE="${cross_compile}" O="${build_dir}") mkdir -p "$build_dir" - if [ ! -f "${build_dir}/.config" ]; then + if [ "$test_mode" = "system" ] && [ ! -f "${build_dir}/.config" ]; then swallow_output "${MAKE[@]}" defconfig fi + case "$test_mode" in + 'system') + test_target=run + ;; + 'user') + test_target=run-user + ;; + *) + echo "Unknown mode $test_mode" + exit 1 + esac printf '%-15s' "$arch:" - swallow_output "${MAKE[@]}" run V=1 + swallow_output "${MAKE[@]}" "$test_target" V=1 cp run.out run.out."${arch}" "${MAKE[@]}" report | grep passed } -- cgit v1.2.3 From 544102458a8d1c33f9f5f99f9bda8e2b858bcb10 Mon Sep 17 00:00:00 2001 From: Thomas Weißschuh Date: Wed, 8 Nov 2023 19:14:43 +0100 Subject: tools/nolibc: mips: add support for PIC MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit MIPS requires some extra instructions to set up the $gp register for the with a pointer to the global data area. This isn't needed for non-PIC builds, but this patch enables the code unconditionally to prevent bitrot. Also enable PIC in one of the test configurations for ongoing validation. Link: https://lore.kernel.org/r/20231108-nolibc-pic-v2-1-4fb0d6284757@weissschuh.net Signed-off-by: Thomas Weißschuh --- tools/include/nolibc/arch-mips.h | 7 ++++++- tools/testing/selftests/nolibc/Makefile | 2 +- 2 files changed, 7 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/include/nolibc/arch-mips.h b/tools/include/nolibc/arch-mips.h index 3a2c76716b83..62cc50ef3288 100644 --- a/tools/include/nolibc/arch-mips.h +++ b/tools/include/nolibc/arch-mips.h @@ -184,8 +184,13 @@ void __attribute__((weak, noreturn, optimize("Os", "omit-frame-pointer"))) __no_ __asm__ volatile ( ".set push\n" ".set noreorder\n" - ".option pic0\n" + "bal 1f\n" /* prime $ra for .cpload */ + "nop\n" + "1:\n" + ".cpload $ra\n" "move $a0, $sp\n" /* save stack pointer to $a0, as arg1 of _start_c */ + "addiu $sp, $sp, -4\n" /* space for .cprestore to store $gp */ + ".cprestore 0\n" "li $t0, -8\n" "and $sp, $sp, $t0\n" /* $sp must be 8-byte aligned */ "addiu $sp, $sp, -16\n" /* the callee expects to save a0..a3 there */ diff --git a/tools/testing/selftests/nolibc/Makefile b/tools/testing/selftests/nolibc/Makefile index 47f0fb52fe55..40dd95228051 100644 --- a/tools/testing/selftests/nolibc/Makefile +++ b/tools/testing/selftests/nolibc/Makefile @@ -147,7 +147,7 @@ CFLAGS_ppc = -m32 -mbig-endian -mno-vsx $(call cc-option,-mmultiple) CFLAGS_ppc64 = -m64 -mbig-endian -mno-vsx $(call cc-option,-mmultiple) CFLAGS_ppc64le = -m64 -mlittle-endian -mno-vsx $(call cc-option,-mabi=elfv2) CFLAGS_s390 = -m64 -CFLAGS_mips32le = -EL -mabi=32 +CFLAGS_mips32le = -EL -mabi=32 -fPIC CFLAGS_mips32be = -EB -mabi=32 CFLAGS_STACKPROTECTOR ?= $(call cc-option,-mstack-protector-guard=global $(call cc-option,-fstack-protector-all)) CFLAGS ?= -Os -fno-ident -fno-asynchronous-unwind-tables -std=c89 -W -Wall -Wextra \ -- cgit v1.2.3 From b9e64724cd8aeca9e7ab4523a92ccf2ba0cd1de2 Mon Sep 17 00:00:00 2001 From: Thomas Weißschuh Date: Wed, 22 Nov 2023 08:27:59 +0100 Subject: selftests/nolibc: make result alignment more robust MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Move the check of the existing length into the function so it can't be forgotten by the caller. Also hardcode the padding character as only spaces are ever used. Signed-off-by: Thomas Weißschuh --- tools/testing/selftests/nolibc/nolibc-test.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/nolibc/nolibc-test.c b/tools/testing/selftests/nolibc/nolibc-test.c index e173014f6b66..2b71fb5fae4e 100644 --- a/tools/testing/selftests/nolibc/nolibc-test.c +++ b/tools/testing/selftests/nolibc/nolibc-test.c @@ -130,11 +130,17 @@ static const char *errorname(int err) } } -static void putcharn(char c, size_t n) +static void align_result(size_t llen) { - char buf[64]; + const size_t align = 64; + char buf[align]; + size_t n; - memset(buf, c, n); + if (llen >= align) + return; + + n = align - llen; + memset(buf, ' ', n); buf[n] = '\0'; fputs(buf, stdout); } @@ -156,8 +162,7 @@ static void result(int llen, enum RESULT r) else msg = " [FAIL]"; - if (llen < 64) - putcharn(' ', 64 - llen); + align_result(llen); puts(msg); } -- cgit v1.2.3 From dece8476d6dda087cacb8e105bb70e02a9ef9387 Mon Sep 17 00:00:00 2001 From: Thomas Weißschuh Date: Thu, 23 Nov 2023 22:53:13 +0100 Subject: tools/nolibc: annotate va_list printf formats MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit __attribute__(format(printf)) can also be used for functions that take a va_list argument. As per the GCC docs: For functions where the arguments are not available to be checked (such as vprintf), specify the third parameter as zero. Link: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html Signed-off-by: Thomas Weißschuh --- tools/include/nolibc/stdio.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/include/nolibc/stdio.h b/tools/include/nolibc/stdio.h index d7ef43973916..16cd4d807251 100644 --- a/tools/include/nolibc/stdio.h +++ b/tools/include/nolibc/stdio.h @@ -212,7 +212,7 @@ char *fgets(char *s, int size, FILE *stream) * - %s * - unknown modifiers are ignored. */ -static __attribute__((unused)) +static __attribute__((unused, format(printf, 2, 0))) int vfprintf(FILE *stream, const char *fmt, va_list args) { char escape, lpref, c; @@ -318,7 +318,7 @@ int vfprintf(FILE *stream, const char *fmt, va_list args) return written; } -static __attribute__((unused)) +static __attribute__((unused, format(printf, 1, 0))) int vprintf(const char *fmt, va_list args) { return vfprintf(stdout, fmt, args); -- cgit v1.2.3 From 825f404776b4f9d5f4a35545ea2d258bb16c0d4e Mon Sep 17 00:00:00 2001 From: Thomas Weißschuh Date: Sat, 25 Nov 2023 11:54:02 +0100 Subject: tools/nolibc: drop duplicated testcase ioctl_tiocinq MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The same testcase is present on the line above. Fixes: b4844fa0bdb4 ("selftests/nolibc: implement a few tests for various syscalls") Signed-off-by: Thomas Weißschuh --- tools/testing/selftests/nolibc/nolibc-test.c | 1 - 1 file changed, 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/nolibc/nolibc-test.c b/tools/testing/selftests/nolibc/nolibc-test.c index 2b71fb5fae4e..783c2a97c4e3 100644 --- a/tools/testing/selftests/nolibc/nolibc-test.c +++ b/tools/testing/selftests/nolibc/nolibc-test.c @@ -910,7 +910,6 @@ int run_syscall(int min, int max) CASE_TEST(gettimeofday_tv_tz);EXPECT_SYSZR(1, gettimeofday(&tv, &tz)); break; CASE_TEST(getpagesize); EXPECT_SYSZR(1, test_getpagesize()); break; CASE_TEST(ioctl_tiocinq); EXPECT_SYSZR(1, ioctl(0, TIOCINQ, &tmp)); break; - CASE_TEST(ioctl_tiocinq); EXPECT_SYSZR(1, ioctl(0, TIOCINQ, &tmp)); break; CASE_TEST(link_root1); EXPECT_SYSER(1, link("/", "/"), -1, EEXIST); break; CASE_TEST(link_blah); EXPECT_SYSER(1, link("/proc/self/blah", "/blah"), -1, ENOENT); break; CASE_TEST(link_dir); EXPECT_SYSER(euid0, link("/", "/blah"), -1, EPERM); break; -- cgit v1.2.3 From 7b20478b777c3be39a2b69b08a6c0b50c10105f1 Mon Sep 17 00:00:00 2001 From: Thomas Weißschuh Date: Wed, 22 Nov 2023 23:22:52 +0100 Subject: tools/nolibc: drop custom definition of struct rusage MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A future commit will include linux/resource.h, which will conflict with the private definition of struct rusage in nolibc. Avoid the conflict by dropping the private definition and use the one from the UAPI headers. Signed-off-by: Thomas Weißschuh Link: https://lore.kernel.org/lkml/20231123-nolibc-rlimit-v1-1-a428b131de2a@weissschuh.net/ Acked-by: Willy Tarreau --- tools/include/nolibc/types.h | 21 +-------------------- 1 file changed, 1 insertion(+), 20 deletions(-) (limited to 'tools') diff --git a/tools/include/nolibc/types.h b/tools/include/nolibc/types.h index ad0ddaa89e50..b26a5d0c417c 100644 --- a/tools/include/nolibc/types.h +++ b/tools/include/nolibc/types.h @@ -13,6 +13,7 @@ #include #include #include +#include /* Only the generic macros and types may be defined here. The arch-specific @@ -178,26 +179,6 @@ struct linux_dirent64 { char d_name[]; }; -/* needed by wait4() */ -struct rusage { - struct timeval ru_utime; - struct timeval ru_stime; - long ru_maxrss; - long ru_ixrss; - long ru_idrss; - long ru_isrss; - long ru_minflt; - long ru_majflt; - long ru_nswap; - long ru_inblock; - long ru_oublock; - long ru_msgsnd; - long ru_msgrcv; - long ru_nsignals; - long ru_nvcsw; - long ru_nivcsw; -}; - /* The format of the struct as returned by the libc to the application, which * significantly differs from the format returned by the stat() syscall flavours. */ -- cgit v1.2.3 From a0bb5f88fc3d72bc92c24a631f2c7794362efac1 Mon Sep 17 00:00:00 2001 From: Thomas Weißschuh Date: Wed, 22 Nov 2023 23:49:43 +0100 Subject: tools/nolibc: add support for getrlimit/setrlimit MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The implementation uses the prlimit64 systemcall as that is available on all architectures. Signed-off-by: Thomas Weißschuh Link: https://lore.kernel.org/lkml/20231123-nolibc-rlimit-v1-2-a428b131de2a@weissschuh.net/ Acked-by: Willy Tarreau --- tools/include/nolibc/sys.h | 38 ++++++++++++++++++++++++++++ tools/testing/selftests/nolibc/nolibc-test.c | 29 +++++++++++++++++++++ 2 files changed, 67 insertions(+) (limited to 'tools') diff --git a/tools/include/nolibc/sys.h b/tools/include/nolibc/sys.h index 2f359cb03d10..dda9dffd1d74 100644 --- a/tools/include/nolibc/sys.h +++ b/tools/include/nolibc/sys.h @@ -21,6 +21,7 @@ #include /* for O_* and AT_* */ #include /* for statx() */ #include +#include #include "arch.h" #include "errno.h" @@ -898,6 +899,43 @@ int reboot(int cmd) } +/* + * int getrlimit(int resource, struct rlimit *rlim); + * int setrlimit(int resource, const struct rlimit *rlim); + */ + +static __attribute__((unused)) +int sys_prlimit64(pid_t pid, int resource, + const struct rlimit64 *new_limit, struct rlimit64 *old_limit) +{ + return my_syscall4(__NR_prlimit64, pid, resource, new_limit, old_limit); +} + +static __attribute__((unused)) +int getrlimit(int resource, struct rlimit *rlim) +{ + struct rlimit64 rlim64; + int ret; + + ret = __sysret(sys_prlimit64(0, resource, NULL, &rlim64)); + rlim->rlim_cur = rlim64.rlim_cur; + rlim->rlim_max = rlim64.rlim_max; + + return ret; +} + +static __attribute__((unused)) +int setrlimit(int resource, const struct rlimit *rlim) +{ + struct rlimit64 rlim64 = { + .rlim_cur = rlim->rlim_cur, + .rlim_max = rlim->rlim_max, + }; + + return __sysret(sys_prlimit64(0, resource, &rlim64, NULL)); +} + + /* * int sched_yield(void); */ diff --git a/tools/testing/selftests/nolibc/nolibc-test.c b/tools/testing/selftests/nolibc/nolibc-test.c index 783c2a97c4e3..a0271ac313ee 100644 --- a/tools/testing/selftests/nolibc/nolibc-test.c +++ b/tools/testing/selftests/nolibc/nolibc-test.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include @@ -839,6 +840,33 @@ int test_pipe(void) return !!memcmp(buf, msg, len); } +int test_rlimit(void) +{ + struct rlimit rlim = { + .rlim_cur = 1 << 20, + .rlim_max = 1 << 21, + }; + int ret; + + ret = setrlimit(RLIMIT_CORE, &rlim); + if (ret) + return -1; + + rlim.rlim_cur = 0; + rlim.rlim_max = 0; + + ret = getrlimit(RLIMIT_CORE, &rlim); + if (ret) + return -1; + + if (rlim.rlim_cur != 1 << 20) + return -1; + if (rlim.rlim_max != 1 << 21) + return -1; + + return 0; +} + /* Run syscall tests between IDs and . * Return 0 on success, non-zero on failure. @@ -928,6 +956,7 @@ int run_syscall(int min, int max) CASE_TEST(poll_fault); EXPECT_SYSER(1, poll(NULL, 1, 0), -1, EFAULT); break; CASE_TEST(prctl); EXPECT_SYSER(1, prctl(PR_SET_NAME, (unsigned long)NULL, 0, 0, 0), -1, EFAULT); break; CASE_TEST(read_badf); EXPECT_SYSER(1, read(-1, &tmp, 1), -1, EBADF); break; + CASE_TEST(rlimit); EXPECT_SYSZR(1, test_rlimit()); break; CASE_TEST(rmdir_blah); EXPECT_SYSER(1, rmdir("/blah"), -1, ENOENT); break; CASE_TEST(sched_yield); EXPECT_SYSZR(1, sched_yield()); break; CASE_TEST(select_null); EXPECT_SYSZR(1, ({ struct timeval tv = { 0 }; select(0, NULL, NULL, NULL, &tv); })); break; -- cgit v1.2.3 From d543d9ddf593b1f4cb1d57d9ac0ad279fe18adaf Mon Sep 17 00:00:00 2001 From: Thomas Weißschuh Date: Wed, 22 Nov 2023 23:49:52 +0100 Subject: selftests/nolibc: disable coredump via setrlimit MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit qemu-user does has its own implementation of coredumping. That implementation does not respect the call to prctl(PR_SET_DUMPABLE, 0) in run_protection(). This leads to a coredump for every test run under qemu-user. Use also setrlimit() to inhibit coredump creation which is respected by qemu-user. Link: https://lore.kernel.org/qemu-devel/20231115-qemu-user-dumpable-v1-2-edbe7f0fbb02@t-8ch.de/ Signed-off-by: Thomas Weißschuh Link: https://lore.kernel.org/lkml/20231123-nolibc-rlimit-v1-3-a428b131de2a@weissschuh.net/ Acked-by: Willy Tarreau --- tools/testing/selftests/nolibc/nolibc-test.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/nolibc/nolibc-test.c b/tools/testing/selftests/nolibc/nolibc-test.c index a0271ac313ee..6ba4f8275ac4 100644 --- a/tools/testing/selftests/nolibc/nolibc-test.c +++ b/tools/testing/selftests/nolibc/nolibc-test.c @@ -1166,6 +1166,7 @@ static int run_protection(int min __attribute__((unused)), { pid_t pid; int llen = 0, status; + struct rlimit rlimit = { 0, 0 }; llen += printf("0 -fstackprotector "); @@ -1197,6 +1198,7 @@ static int run_protection(int min __attribute__((unused)), close(STDERR_FILENO); prctl(PR_SET_DUMPABLE, 0, 0, 0, 0); + setrlimit(RLIMIT_CORE, &rlimit); smash_stack(); return 1; -- cgit v1.2.3 From e72c1ccfd449598f7eda10d3bb7441d501ddcfc3 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Mon, 11 Dec 2023 09:41:31 -0800 Subject: selftests/bpf: validate eliminated global subprog is not freplaceable Add selftest that establishes dead code-eliminated valid global subprog (global_dead) and makes sure that it's not possible to freplace it, as it's effectively not there. This test will fail with unexpected success before 2afae08c9dcb ("bpf: Validate global subprogs lazily"). v2->v3: - add missing err assignment (Alan); - undo unnecessary signature changes in verifier_global_subprogs.c (Eduard); v1->v2: - don't rely on assembly output in verifier log, which changes between compiler versions (CI). Acked-by: Eduard Zingerman Reviewed-by: Alan Maguire Suggested-by: Alexei Starovoitov Signed-off-by: Andrii Nakryiko Acked-by: John Fastabend Link: https://lore.kernel.org/r/20231211174131.2324306-1-andrii@kernel.org Signed-off-by: Alexei Starovoitov --- .../bpf/prog_tests/global_func_dead_code.c | 60 ++++++++++++++++++++++ .../bpf/progs/freplace_dead_global_func.c | 11 ++++ .../selftests/bpf/progs/verifier_global_subprogs.c | 15 ++++-- 3 files changed, 83 insertions(+), 3 deletions(-) create mode 100644 tools/testing/selftests/bpf/prog_tests/global_func_dead_code.c create mode 100644 tools/testing/selftests/bpf/progs/freplace_dead_global_func.c (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/global_func_dead_code.c b/tools/testing/selftests/bpf/prog_tests/global_func_dead_code.c new file mode 100644 index 000000000000..65309894b27a --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/global_func_dead_code.c @@ -0,0 +1,60 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */ + +#include +#include "verifier_global_subprogs.skel.h" +#include "freplace_dead_global_func.skel.h" + +void test_global_func_dead_code(void) +{ + struct verifier_global_subprogs *tgt_skel = NULL; + struct freplace_dead_global_func *skel = NULL; + char log_buf[4096]; + int err, tgt_fd; + + /* first, try to load target with good global subprog */ + tgt_skel = verifier_global_subprogs__open(); + if (!ASSERT_OK_PTR(tgt_skel, "tgt_skel_good_open")) + return; + + bpf_program__set_autoload(tgt_skel->progs.chained_global_func_calls_success, true); + + err = verifier_global_subprogs__load(tgt_skel); + if (!ASSERT_OK(err, "tgt_skel_good_load")) + goto out; + + tgt_fd = bpf_program__fd(tgt_skel->progs.chained_global_func_calls_success); + + /* Attach to good non-eliminated subprog */ + skel = freplace_dead_global_func__open(); + if (!ASSERT_OK_PTR(skel, "skel_good_open")) + goto out; + + err = bpf_program__set_attach_target(skel->progs.freplace_prog, tgt_fd, "global_good"); + ASSERT_OK(err, "attach_target_good"); + + err = freplace_dead_global_func__load(skel); + if (!ASSERT_OK(err, "skel_good_load")) + goto out; + + freplace_dead_global_func__destroy(skel); + + /* Try attaching to dead code-eliminated subprog */ + skel = freplace_dead_global_func__open(); + if (!ASSERT_OK_PTR(skel, "skel_dead_open")) + goto out; + + bpf_program__set_log_buf(skel->progs.freplace_prog, log_buf, sizeof(log_buf)); + err = bpf_program__set_attach_target(skel->progs.freplace_prog, tgt_fd, "global_dead"); + ASSERT_OK(err, "attach_target_dead"); + + err = freplace_dead_global_func__load(skel); + if (!ASSERT_ERR(err, "skel_dead_load")) + goto out; + + ASSERT_HAS_SUBSTR(log_buf, "Subprog global_dead doesn't exist", "dead_subprog_missing_msg"); + +out: + verifier_global_subprogs__destroy(tgt_skel); + freplace_dead_global_func__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/progs/freplace_dead_global_func.c b/tools/testing/selftests/bpf/progs/freplace_dead_global_func.c new file mode 100644 index 000000000000..e6a75f86cac6 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/freplace_dead_global_func.c @@ -0,0 +1,11 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include + +SEC("freplace") +int freplace_prog(void) +{ + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/verifier_global_subprogs.c b/tools/testing/selftests/bpf/progs/verifier_global_subprogs.c index a0a5efd1caa1..bd696a431244 100644 --- a/tools/testing/selftests/bpf/progs/verifier_global_subprogs.c +++ b/tools/testing/selftests/bpf/progs/verifier_global_subprogs.c @@ -10,6 +10,7 @@ int arr[1]; int unkn_idx; +const volatile bool call_dead_subprog = false; __noinline long global_bad(void) { @@ -31,23 +32,31 @@ __noinline long global_calls_good_only(void) return global_good(); } +__noinline long global_dead(void) +{ + return arr[0] * 2; +} + SEC("?raw_tp") __success __log_level(2) /* main prog is validated completely first */ __msg("('global_calls_good_only') is global and assumed valid.") -__msg("1: (95) exit") /* eventually global_good() is transitively validated as well */ __msg("Validating global_good() func") __msg("('global_good') is safe for any args that match its prototype") int chained_global_func_calls_success(void) { - return global_calls_good_only(); + int sum = 0; + + if (call_dead_subprog) + sum += global_dead(); + return global_calls_good_only() + sum; } SEC("?raw_tp") __failure __log_level(2) /* main prog validated successfully first */ -__msg("1: (95) exit") +__msg("('global_calls_bad') is global and assumed valid.") /* eventually we validate global_bad() and fail */ __msg("Validating global_bad() func") __msg("math between map_value pointer and register") /* BOOM */ -- cgit v1.2.3 From c7b98bf0fc79bd2d91f6ef84e07b5f648d43c13e Mon Sep 17 00:00:00 2001 From: James Clark Date: Mon, 11 Dec 2023 16:13:20 +0000 Subject: KVM: selftests: aarch64: Update tools copy of arm_pmuv3.h Now that ARMV8_PMU_PMCR_N is made with GENMASK, update usages to treat it as a pre-shifted mask. Signed-off-by: James Clark Link: https://lore.kernel.org/r/20231211161331.1277825-9-james.clark@arm.com Signed-off-by: Will Deacon --- tools/include/perf/arm_pmuv3.h | 43 +++++++++++++--------- .../selftests/kvm/aarch64/vpmu_counter_access.c | 5 +-- 2 files changed, 28 insertions(+), 20 deletions(-) (limited to 'tools') diff --git a/tools/include/perf/arm_pmuv3.h b/tools/include/perf/arm_pmuv3.h index e822d49fb5b8..1e397d55384e 100644 --- a/tools/include/perf/arm_pmuv3.h +++ b/tools/include/perf/arm_pmuv3.h @@ -218,45 +218,54 @@ #define ARMV8_PMU_PMCR_DP (1 << 5) /* Disable CCNT if non-invasive debug*/ #define ARMV8_PMU_PMCR_LC (1 << 6) /* Overflow on 64 bit cycle counter */ #define ARMV8_PMU_PMCR_LP (1 << 7) /* Long event counter enable */ -#define ARMV8_PMU_PMCR_N_SHIFT 11 /* Number of counters supported */ -#define ARMV8_PMU_PMCR_N_MASK 0x1f -#define ARMV8_PMU_PMCR_MASK 0xff /* Mask for writable bits */ +#define ARMV8_PMU_PMCR_N GENMASK(15, 11) /* Number of counters supported */ +/* Mask for writable bits */ +#define ARMV8_PMU_PMCR_MASK (ARMV8_PMU_PMCR_E | ARMV8_PMU_PMCR_P | \ + ARMV8_PMU_PMCR_C | ARMV8_PMU_PMCR_D | \ + ARMV8_PMU_PMCR_X | ARMV8_PMU_PMCR_DP | \ + ARMV8_PMU_PMCR_LC | ARMV8_PMU_PMCR_LP) /* * PMOVSR: counters overflow flag status reg */ -#define ARMV8_PMU_OVSR_MASK 0xffffffff /* Mask for writable bits */ -#define ARMV8_PMU_OVERFLOWED_MASK ARMV8_PMU_OVSR_MASK +#define ARMV8_PMU_OVSR_P GENMASK(30, 0) +#define ARMV8_PMU_OVSR_C BIT(31) +/* Mask for writable bits is both P and C fields */ +#define ARMV8_PMU_OVERFLOWED_MASK (ARMV8_PMU_OVSR_P | ARMV8_PMU_OVSR_C) /* * PMXEVTYPER: Event selection reg */ -#define ARMV8_PMU_EVTYPE_MASK 0xc800ffff /* Mask for writable bits */ -#define ARMV8_PMU_EVTYPE_EVENT 0xffff /* Mask for EVENT bits */ +#define ARMV8_PMU_EVTYPE_EVENT GENMASK(15, 0) /* Mask for EVENT bits */ +#define ARMV8_PMU_EVTYPE_TH GENMASK(43, 32) +#define ARMV8_PMU_EVTYPE_TC GENMASK(63, 61) /* * Event filters for PMUv3 */ -#define ARMV8_PMU_EXCLUDE_EL1 (1U << 31) -#define ARMV8_PMU_EXCLUDE_EL0 (1U << 30) -#define ARMV8_PMU_INCLUDE_EL2 (1U << 27) +#define ARMV8_PMU_EXCLUDE_EL1 (1U << 31) +#define ARMV8_PMU_EXCLUDE_EL0 (1U << 30) +#define ARMV8_PMU_EXCLUDE_NS_EL1 (1U << 29) +#define ARMV8_PMU_EXCLUDE_NS_EL0 (1U << 28) +#define ARMV8_PMU_INCLUDE_EL2 (1U << 27) +#define ARMV8_PMU_EXCLUDE_EL3 (1U << 26) /* * PMUSERENR: user enable reg */ -#define ARMV8_PMU_USERENR_MASK 0xf /* Mask for writable bits */ #define ARMV8_PMU_USERENR_EN (1 << 0) /* PMU regs can be accessed at EL0 */ #define ARMV8_PMU_USERENR_SW (1 << 1) /* PMSWINC can be written at EL0 */ #define ARMV8_PMU_USERENR_CR (1 << 2) /* Cycle counter can be read at EL0 */ #define ARMV8_PMU_USERENR_ER (1 << 3) /* Event counter can be read at EL0 */ +/* Mask for writable bits */ +#define ARMV8_PMU_USERENR_MASK (ARMV8_PMU_USERENR_EN | ARMV8_PMU_USERENR_SW | \ + ARMV8_PMU_USERENR_CR | ARMV8_PMU_USERENR_ER) /* PMMIR_EL1.SLOTS mask */ -#define ARMV8_PMU_SLOTS_MASK 0xff - -#define ARMV8_PMU_BUS_SLOTS_SHIFT 8 -#define ARMV8_PMU_BUS_SLOTS_MASK 0xff -#define ARMV8_PMU_BUS_WIDTH_SHIFT 16 -#define ARMV8_PMU_BUS_WIDTH_MASK 0xf +#define ARMV8_PMU_SLOTS GENMASK(7, 0) +#define ARMV8_PMU_BUS_SLOTS GENMASK(15, 8) +#define ARMV8_PMU_BUS_WIDTH GENMASK(19, 16) +#define ARMV8_PMU_THWIDTH GENMASK(23, 20) /* * This code is really good diff --git a/tools/testing/selftests/kvm/aarch64/vpmu_counter_access.c b/tools/testing/selftests/kvm/aarch64/vpmu_counter_access.c index 5ea78986e665..9d51b5691349 100644 --- a/tools/testing/selftests/kvm/aarch64/vpmu_counter_access.c +++ b/tools/testing/selftests/kvm/aarch64/vpmu_counter_access.c @@ -42,13 +42,12 @@ struct pmreg_sets { static uint64_t get_pmcr_n(uint64_t pmcr) { - return (pmcr >> ARMV8_PMU_PMCR_N_SHIFT) & ARMV8_PMU_PMCR_N_MASK; + return FIELD_GET(ARMV8_PMU_PMCR_N, pmcr); } static void set_pmcr_n(uint64_t *pmcr, uint64_t pmcr_n) { - *pmcr = *pmcr & ~(ARMV8_PMU_PMCR_N_MASK << ARMV8_PMU_PMCR_N_SHIFT); - *pmcr |= (pmcr_n << ARMV8_PMU_PMCR_N_SHIFT); + u64p_replace_bits((__u64 *) pmcr, pmcr_n, ARMV8_PMU_PMCR_N); } static uint64_t get_counters_mask(uint64_t n) -- cgit v1.2.3 From 232afb557835d6f6859c73bf610bad308c96b131 Mon Sep 17 00:00:00 2001 From: "Borislav Petkov (AMD)" Date: Sat, 2 Dec 2023 12:50:23 +0100 Subject: x86/CPU/AMD: Add X86_FEATURE_ZEN1 Add a synthetic feature flag specifically for first generation Zen machines. There's need to have a generic flag for all Zen generations so make X86_FEATURE_ZEN be that flag. Fixes: 30fa92832f40 ("x86/CPU/AMD: Add ZenX generations flags") Suggested-by: Brian Gerst Suggested-by: Tom Lendacky Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/dc3835e3-0731-4230-bbb9-336bbe3d042b@amd.com --- arch/x86/include/asm/cpufeatures.h | 3 ++- arch/x86/kernel/cpu/amd.c | 11 ++++++----- tools/arch/x86/include/asm/cpufeatures.h | 2 +- 3 files changed, 9 insertions(+), 7 deletions(-) (limited to 'tools') diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 149cc5d5c2ae..632c26cdeeda 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -218,7 +218,7 @@ #define X86_FEATURE_IBRS ( 7*32+25) /* Indirect Branch Restricted Speculation */ #define X86_FEATURE_IBPB ( 7*32+26) /* Indirect Branch Prediction Barrier */ #define X86_FEATURE_STIBP ( 7*32+27) /* Single Thread Indirect Branch Predictors */ -#define X86_FEATURE_ZEN ( 7*32+28) /* "" CPU based on Zen microarchitecture */ +#define X86_FEATURE_ZEN ( 7*32+28) /* "" Generic flag for all Zen and newer */ #define X86_FEATURE_L1TF_PTEINV ( 7*32+29) /* "" L1TF workaround PTE inversion */ #define X86_FEATURE_IBRS_ENHANCED ( 7*32+30) /* Enhanced IBRS */ #define X86_FEATURE_MSR_IA32_FEAT_CTL ( 7*32+31) /* "" MSR IA32_FEAT_CTL configured */ @@ -315,6 +315,7 @@ #define X86_FEATURE_ZEN2 (11*32+28) /* "" CPU based on Zen2 microarchitecture */ #define X86_FEATURE_ZEN3 (11*32+29) /* "" CPU based on Zen3 microarchitecture */ #define X86_FEATURE_ZEN4 (11*32+30) /* "" CPU based on Zen4 microarchitecture */ +#define X86_FEATURE_ZEN1 (11*32+31) /* "" CPU based on Zen1 microarchitecture */ /* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */ #define X86_FEATURE_AVX_VNNI (12*32+ 4) /* AVX VNNI instructions */ diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 89bbb1ac9ecf..339586394e7e 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -542,7 +542,7 @@ static void bsp_init_amd(struct cpuinfo_x86 *c) switch (c->x86_model) { case 0x00 ... 0x2f: case 0x50 ... 0x5f: - setup_force_cpu_cap(X86_FEATURE_ZEN); + setup_force_cpu_cap(X86_FEATURE_ZEN1); break; case 0x30 ... 0x4f: case 0x60 ... 0x7f: @@ -948,12 +948,13 @@ void init_spectral_chicken(struct cpuinfo_x86 *c) static void init_amd_zen_common(void) { + setup_force_cpu_cap(X86_FEATURE_ZEN); #ifdef CONFIG_NUMA node_reclaim_distance = 32; #endif } -static void init_amd_zen(struct cpuinfo_x86 *c) +static void init_amd_zen1(struct cpuinfo_x86 *c) { init_amd_zen_common(); fix_erratum_1386(c); @@ -1075,8 +1076,8 @@ static void init_amd(struct cpuinfo_x86 *c) case 0x16: init_amd_jg(c); break; } - if (boot_cpu_has(X86_FEATURE_ZEN)) - init_amd_zen(c); + if (boot_cpu_has(X86_FEATURE_ZEN1)) + init_amd_zen1(c); else if (boot_cpu_has(X86_FEATURE_ZEN2)) init_amd_zen2(c); else if (boot_cpu_has(X86_FEATURE_ZEN3)) @@ -1143,7 +1144,7 @@ static void init_amd(struct cpuinfo_x86 *c) * Counter May Be Inaccurate". */ if (cpu_has(c, X86_FEATURE_IRPERF) && - (boot_cpu_has(X86_FEATURE_ZEN) && c->x86_model > 0x2f)) + (boot_cpu_has(X86_FEATURE_ZEN1) && c->x86_model > 0x2f)) msr_set_bit(MSR_K7_HWCR, MSR_K7_HWCR_IRPERF_EN_BIT); check_null_seg_clears_base(c); diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h index 798e60b5454b..845a4023ba44 100644 --- a/tools/arch/x86/include/asm/cpufeatures.h +++ b/tools/arch/x86/include/asm/cpufeatures.h @@ -219,7 +219,7 @@ #define X86_FEATURE_IBRS ( 7*32+25) /* Indirect Branch Restricted Speculation */ #define X86_FEATURE_IBPB ( 7*32+26) /* Indirect Branch Prediction Barrier */ #define X86_FEATURE_STIBP ( 7*32+27) /* Single Thread Indirect Branch Predictors */ -#define X86_FEATURE_ZEN (7*32+28) /* "" CPU based on Zen microarchitecture */ +#define X86_FEATURE_ZEN ( 7*32+28) /* "" Generic flag for all Zen and newer */ #define X86_FEATURE_L1TF_PTEINV ( 7*32+29) /* "" L1TF workaround PTE inversion */ #define X86_FEATURE_IBRS_ENHANCED ( 7*32+30) /* Enhanced IBRS */ #define X86_FEATURE_MSR_IA32_FEAT_CTL ( 7*32+31) /* "" MSR IA32_FEAT_CTL configured */ -- cgit v1.2.3 From 0c12e6c8267f831e491ee64ac6f216601cea3eee Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Tue, 12 Dec 2023 07:04:32 +0000 Subject: KVM: selftests: Ensure sysreg-defs.h is generated at the expected path Building the KVM selftests from the main selftests Makefile (as opposed to the kvm subdirectory) doesn't work as OUTPUT is set, forcing the generated header to spill into the selftests directory. Additionally, relative paths do not work when building outside of the srctree, as the canonical selftests path is replaced with 'kselftest' in the output. Work around both of these issues by explicitly overriding OUTPUT on the submake cmdline. Move the whole fragment below the point lib.mk gets included such that $(abs_objdir) is available. Reviewed-by: Cornelia Huck Tested-by: Mark Brown Link: https://lore.kernel.org/r/20231212070431.145544-2-oliver.upton@linux.dev Signed-off-by: Oliver Upton --- tools/testing/selftests/kvm/Makefile | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index 52c59bad7213..5f4f6be7a866 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -17,16 +17,6 @@ else ARCH_DIR := $(ARCH) endif -ifeq ($(ARCH),arm64) -tools_dir := $(top_srcdir)/tools -arm64_tools_dir := $(tools_dir)/arch/arm64/tools/ -GEN_HDRS := $(top_srcdir)/tools/arch/arm64/include/generated/ -CFLAGS += -I$(GEN_HDRS) - -$(GEN_HDRS): $(wildcard $(arm64_tools_dir)/*) - $(MAKE) -C $(arm64_tools_dir) O=$(tools_dir) -endif - LIBKVM += lib/assert.c LIBKVM += lib/elf.c LIBKVM += lib/guest_modes.c @@ -234,6 +224,22 @@ CFLAGS += -Wall -Wstrict-prototypes -Wuninitialized -O2 -g -std=gnu99 \ ifeq ($(ARCH),s390) CFLAGS += -march=z10 endif +ifeq ($(ARCH),arm64) +tools_dir := $(top_srcdir)/tools +arm64_tools_dir := $(tools_dir)/arch/arm64/tools/ + +ifneq ($(abs_objdir),) +arm64_hdr_outdir := $(abs_objdir)/tools/ +else +arm64_hdr_outdir := $(tools_dir)/ +endif + +GEN_HDRS := $(arm64_hdr_outdir)arch/arm64/include/generated/ +CFLAGS += -I$(GEN_HDRS) + +$(GEN_HDRS): $(wildcard $(arm64_tools_dir)/*) + $(MAKE) -C $(arm64_tools_dir) OUTPUT=$(arm64_hdr_outdir) +endif no-pie-option := $(call try-run, echo 'int main(void) { return 0; }' | \ $(CC) -Werror $(CFLAGS) -no-pie -x c - -o "$$TMP", -no-pie) -- cgit v1.2.3 From 48219b089d84f109e8a81d8a7fa1bbc2e6e5f97d Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Tue, 28 Nov 2023 22:01:58 -0800 Subject: libperf cpumap: Rename perf_cpu_map__dummy_new() to perf_cpu_map__new_any_cpu() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Rename perf_cpu_map__dummy_new() to perf_cpu_map__new_any_cpu() to better indicate this is creating a CPU map for the perf_event_open "any" CPU case. Reviewed-by: James Clark Signed-off-by: Ian Rogers Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Alexandre Ghiti Cc: Andrew Jones Cc: André Almeida Cc: Athira Jajeev Cc: Atish Patra Cc: Changbin Du Cc: Darren Hart Cc: Davidlohr Bueso Cc: Huacai Chen Cc: Ingo Molnar Cc: Jiri Olsa Cc: John Garry Cc: K Prateek Nayak Cc: Kajol Jain Cc: Kan Liang Cc: Leo Yan Cc: Mark Rutland Cc: Mike Leach Cc: Namhyung Kim Cc: Nick Desaulniers Cc: Paolo Bonzini Cc: Paran Lee Cc: Peter Zijlstra Cc: Ravi Bangoria Cc: Sandipan Das Cc: Sean Christopherson Cc: Steinar H. Gunderson Cc: Suzuki Poulouse Cc: Thomas Gleixner Cc: Will Deacon Cc: Yang Jihong Cc: Yang Li Cc: Yanteng Si Cc: bpf@vger.kernel.org Cc: coresight@lists.linaro.org Cc: linux-arm-kernel@lists.infradead.org Link: https://lore.kernel.org/r/20231129060211.1890454-2-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/perf/Documentation/libperf.txt | 2 +- tools/lib/perf/cpumap.c | 4 ++-- tools/lib/perf/evsel.c | 2 +- tools/lib/perf/include/perf/cpumap.h | 4 ++-- tools/lib/perf/libperf.map | 2 +- tools/lib/perf/tests/test-cpumap.c | 2 +- tools/lib/perf/tests/test-evlist.c | 2 +- tools/perf/tests/cpumap.c | 2 +- tools/perf/tests/sw-clock.c | 2 +- tools/perf/tests/task-exit.c | 2 +- tools/perf/util/evlist.c | 2 +- tools/perf/util/evsel.c | 2 +- 12 files changed, 14 insertions(+), 14 deletions(-) (limited to 'tools') diff --git a/tools/lib/perf/Documentation/libperf.txt b/tools/lib/perf/Documentation/libperf.txt index a8f1a237931b..a256a26598b0 100644 --- a/tools/lib/perf/Documentation/libperf.txt +++ b/tools/lib/perf/Documentation/libperf.txt @@ -37,7 +37,7 @@ SYNOPSIS struct perf_cpu_map; - struct perf_cpu_map *perf_cpu_map__dummy_new(void); + struct perf_cpu_map *perf_cpu_map__new_any_cpu(void); struct perf_cpu_map *perf_cpu_map__new(const char *cpu_list); struct perf_cpu_map *perf_cpu_map__read(FILE *file); struct perf_cpu_map *perf_cpu_map__get(struct perf_cpu_map *map); diff --git a/tools/lib/perf/cpumap.c b/tools/lib/perf/cpumap.c index 2a5a29217374..2bd6aba3d8c9 100644 --- a/tools/lib/perf/cpumap.c +++ b/tools/lib/perf/cpumap.c @@ -27,7 +27,7 @@ struct perf_cpu_map *perf_cpu_map__alloc(int nr_cpus) return result; } -struct perf_cpu_map *perf_cpu_map__dummy_new(void) +struct perf_cpu_map *perf_cpu_map__new_any_cpu(void) { struct perf_cpu_map *cpus = perf_cpu_map__alloc(1); @@ -271,7 +271,7 @@ struct perf_cpu_map *perf_cpu_map__new(const char *cpu_list) else if (*cpu_list != '\0') cpus = cpu_map__default_new(); else - cpus = perf_cpu_map__dummy_new(); + cpus = perf_cpu_map__new_any_cpu(); invalid: free(tmp_cpus); out: diff --git a/tools/lib/perf/evsel.c b/tools/lib/perf/evsel.c index 8b51b008a81f..c07160953224 100644 --- a/tools/lib/perf/evsel.c +++ b/tools/lib/perf/evsel.c @@ -120,7 +120,7 @@ int perf_evsel__open(struct perf_evsel *evsel, struct perf_cpu_map *cpus, static struct perf_cpu_map *empty_cpu_map; if (empty_cpu_map == NULL) { - empty_cpu_map = perf_cpu_map__dummy_new(); + empty_cpu_map = perf_cpu_map__new_any_cpu(); if (empty_cpu_map == NULL) return -ENOMEM; } diff --git a/tools/lib/perf/include/perf/cpumap.h b/tools/lib/perf/include/perf/cpumap.h index e38d859a384d..d0bf218ada11 100644 --- a/tools/lib/perf/include/perf/cpumap.h +++ b/tools/lib/perf/include/perf/cpumap.h @@ -19,9 +19,9 @@ struct perf_cache { struct perf_cpu_map; /** - * perf_cpu_map__dummy_new - a map with a singular "any CPU"/dummy -1 value. + * perf_cpu_map__new_any_cpu - a map with a singular "any CPU"/dummy -1 value. */ -LIBPERF_API struct perf_cpu_map *perf_cpu_map__dummy_new(void); +LIBPERF_API struct perf_cpu_map *perf_cpu_map__new_any_cpu(void); LIBPERF_API struct perf_cpu_map *perf_cpu_map__default_new(void); LIBPERF_API struct perf_cpu_map *perf_cpu_map__new(const char *cpu_list); LIBPERF_API struct perf_cpu_map *perf_cpu_map__read(FILE *file); diff --git a/tools/lib/perf/libperf.map b/tools/lib/perf/libperf.map index 190b56ae923a..a8ff64baea3e 100644 --- a/tools/lib/perf/libperf.map +++ b/tools/lib/perf/libperf.map @@ -1,7 +1,7 @@ LIBPERF_0.0.1 { global: libperf_init; - perf_cpu_map__dummy_new; + perf_cpu_map__new_any_cpu; perf_cpu_map__default_new; perf_cpu_map__get; perf_cpu_map__put; diff --git a/tools/lib/perf/tests/test-cpumap.c b/tools/lib/perf/tests/test-cpumap.c index 87b0510a556f..2c359bdb951e 100644 --- a/tools/lib/perf/tests/test-cpumap.c +++ b/tools/lib/perf/tests/test-cpumap.c @@ -21,7 +21,7 @@ int test_cpumap(int argc, char **argv) libperf_init(libperf_print); - cpus = perf_cpu_map__dummy_new(); + cpus = perf_cpu_map__new_any_cpu(); if (!cpus) return -1; diff --git a/tools/lib/perf/tests/test-evlist.c b/tools/lib/perf/tests/test-evlist.c index ed616fc19b4f..ab63878bacb9 100644 --- a/tools/lib/perf/tests/test-evlist.c +++ b/tools/lib/perf/tests/test-evlist.c @@ -261,7 +261,7 @@ static int test_mmap_thread(void) threads = perf_thread_map__new_dummy(); __T("failed to create threads", threads); - cpus = perf_cpu_map__dummy_new(); + cpus = perf_cpu_map__new_any_cpu(); __T("failed to create cpus", cpus); perf_thread_map__set_pid(threads, 0, pid); diff --git a/tools/perf/tests/cpumap.c b/tools/perf/tests/cpumap.c index 7730fc2ab40b..bd8e396f3e57 100644 --- a/tools/perf/tests/cpumap.c +++ b/tools/perf/tests/cpumap.c @@ -213,7 +213,7 @@ static int test__cpu_map_intersect(struct test_suite *test __maybe_unused, static int test__cpu_map_equal(struct test_suite *test __maybe_unused, int subtest __maybe_unused) { - struct perf_cpu_map *any = perf_cpu_map__dummy_new(); + struct perf_cpu_map *any = perf_cpu_map__new_any_cpu(); struct perf_cpu_map *one = perf_cpu_map__new("1"); struct perf_cpu_map *two = perf_cpu_map__new("2"); struct perf_cpu_map *empty = perf_cpu_map__intersect(one, two); diff --git a/tools/perf/tests/sw-clock.c b/tools/perf/tests/sw-clock.c index 4d7493fa0105..290716783ac6 100644 --- a/tools/perf/tests/sw-clock.c +++ b/tools/perf/tests/sw-clock.c @@ -62,7 +62,7 @@ static int __test__sw_clock_freq(enum perf_sw_ids clock_id) } evlist__add(evlist, evsel); - cpus = perf_cpu_map__dummy_new(); + cpus = perf_cpu_map__new_any_cpu(); threads = thread_map__new_by_tid(getpid()); if (!cpus || !threads) { err = -ENOMEM; diff --git a/tools/perf/tests/task-exit.c b/tools/perf/tests/task-exit.c index 968dddde6dda..d33d0952025c 100644 --- a/tools/perf/tests/task-exit.c +++ b/tools/perf/tests/task-exit.c @@ -70,7 +70,7 @@ static int test__task_exit(struct test_suite *test __maybe_unused, int subtest _ * evlist__prepare_workload we'll fill in the only thread * we're monitoring, the one forked there. */ - cpus = perf_cpu_map__dummy_new(); + cpus = perf_cpu_map__new_any_cpu(); threads = thread_map__new_by_tid(-1); if (!cpus || !threads) { err = -ENOMEM; diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index e36da58522ef..ff7f85ded89d 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -1056,7 +1056,7 @@ int evlist__create_maps(struct evlist *evlist, struct target *target) return -1; if (target__uses_dummy_map(target)) - cpus = perf_cpu_map__dummy_new(); + cpus = perf_cpu_map__new_any_cpu(); else cpus = perf_cpu_map__new(target->cpu_list); diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 532f34d9fcb5..6d7c9c58a9bc 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -1801,7 +1801,7 @@ static int __evsel__prepare_open(struct evsel *evsel, struct perf_cpu_map *cpus, if (cpus == NULL) { if (empty_cpu_map == NULL) { - empty_cpu_map = perf_cpu_map__dummy_new(); + empty_cpu_map = perf_cpu_map__new_any_cpu(); if (empty_cpu_map == NULL) return -ENOMEM; } -- cgit v1.2.3 From f77d795618b92ac6fdb43de0d4036c6ce49f0b82 Mon Sep 17 00:00:00 2001 From: Manu Bretelle Date: Mon, 11 Dec 2023 10:07:33 -0800 Subject: selftests/bpf: Fixes tests for filesystem kfuncs `fs_kfuncs.c`'s `test_xattr` would fail the test even when the filesystem did not support xattr, for instance when /tmp is mounted as tmpfs. This change checks errno when setxattr fail. If the failure is due to the operation being unsupported, we will skip the test (just like we would if verity was not enabled on the FS. Before the change, fs_kfuncs test would fail in test_axattr: $ vmtest -k $(make -s image_name) './tools/testing/selftests/bpf/test_progs -a fs_kfuncs' => bzImage ===> Booting [ 0.000000] rcu: RCU restricting CPUs from NR_CPUS=128 to nr_cpu_ ===> Setting up VM ===> Running command [ 4.157491] bpf_testmod: loading out-of-tree module taints kernel. [ 4.161515] bpf_testmod: module verification failed: signature and/or required key missing - tainting kernel test_xattr:PASS:create_file 0 nsec test_xattr:FAIL:setxattr unexpected error: -1 (errno 95) #90/1 fs_kfuncs/xattr:FAIL #90/2 fs_kfuncs/fsverity:SKIP #90 fs_kfuncs:FAIL All error logs: test_xattr:PASS:create_file 0 nsec test_xattr:FAIL:setxattr unexpected error: -1 (errno 95) #90/1 fs_kfuncs/xattr:FAIL #90 fs_kfuncs:FAIL Summary: 0/0 PASSED, 1 SKIPPED, 1 FAILED Test plan: $ touch tmpfs_file && truncate -s 1G tmpfs_file && mkfs.ext4 tmpfs_file # /tmp mounted as tmpfs $ vmtest -k $(make -s image_name) './tools/testing/selftests/bpf/test_progs -a fs_kfuncs' => bzImage ===> Booting ===> Setting up VM ===> Running command WARNING! Selftests relying on bpf_testmod.ko will be skipped. Can't find bpf_testmod.ko kernel module: -2 #90/1 fs_kfuncs/xattr:SKIP #90/2 fs_kfuncs/fsverity:SKIP #90 fs_kfuncs:SKIP Summary: 1/0 PASSED, 2 SKIPPED, 0 FAILED # /tmp mounted as ext4 with xattr enabled but not verity $ vmtest -k $(make -s image_name) 'mount -o loop tmpfs_file /tmp && \ /tools/testing/selftests/bpf/test_progs -a fs_kfuncs' => bzImage ===> Booting ===> Setting up VM ===> Running command [ 4.067071] loop0: detected capacity change from 0 to 2097152 [ 4.191882] EXT4-fs (loop0): mounted filesystem 407ffa36-4553-4c8c-8c78-134443630f69 r/w with ordered data mode. Quota mode: none. WARNING! Selftests relying on bpf_testmod.ko will be skipped. Can't find bpf_testmod.ko kernel module: -2 #90/1 fs_kfuncs/xattr:OK #90/2 fs_kfuncs/fsverity:SKIP #90 fs_kfuncs:OK (SKIP: 1/2) Summary: 1/1 PASSED, 1 SKIPPED, 0 FAILED $ tune2fs -O verity tmpfs_file # /tmp as ext4 with both xattr and verity enabled $ vmtest -k $(make -s image_name) 'mount -o loop tmpfs_file /tmp && \ ./tools/testing/selftests/bpf/test_progs -a fs_kfuncs' => bzImage ===> Booting ===> Setting up VM ===> Running command [ 4.291434] loop0: detected capacity change from 0 to 2097152 [ 4.460828] EXT4-fs (loop0): recovery complete [ 4.468631] EXT4-fs (loop0): mounted filesystem 7b4a7b7f-c442-4b06-9ede-254e63cceb52 r/w with ordered data mode. Quota mode: none. [ 4.988074] fs-verity: sha256 using implementation "sha256-generic" WARNING! Selftests relying on bpf_testmod.ko will be skipped. Can't find bpf_testmod.ko kernel module: -2 #90/1 fs_kfuncs/xattr:OK #90/2 fs_kfuncs/fsverity:OK #90 fs_kfuncs:OK Summary: 1/2 PASSED, 0 SKIPPED, 0 FAILED Fixes: 341f06fdddf7 ("selftests/bpf: Add tests for filesystem kfuncs") Signed-off-by: Manu Bretelle Signed-off-by: Andrii Nakryiko Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/20231211180733.763025-1-chantr4@gmail.com --- tools/testing/selftests/bpf/prog_tests/fs_kfuncs.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/fs_kfuncs.c b/tools/testing/selftests/bpf/prog_tests/fs_kfuncs.c index d3196a4b089f..37056ba73847 100644 --- a/tools/testing/selftests/bpf/prog_tests/fs_kfuncs.c +++ b/tools/testing/selftests/bpf/prog_tests/fs_kfuncs.c @@ -25,6 +25,14 @@ static void test_xattr(void) fd = -1; err = setxattr(testfile, "user.kfuncs", "hello", sizeof("hello"), 0); + if (err && errno == EOPNOTSUPP) { + printf("%s:SKIP:local fs doesn't support xattr (%d)\n" + "To run this test, make sure /tmp filesystem supports xattr.\n", + __func__, errno); + test__skip(); + goto out; + } + if (!ASSERT_OK(err, "setxattr")) goto out; -- cgit v1.2.3 From 8f60f870a9af53295ab4301da05ca453f115a6b6 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Tue, 28 Nov 2023 22:01:59 -0800 Subject: libperf cpumap: Rename perf_cpu_map__default_new() to perf_cpu_map__new_online_cpus() and prefer sysfs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Rename perf_cpu_map__default_new() to perf_cpu_map__new_online_cpus() to better indicate what the implementation does. Read the online CPUs from /sys/devices/system/cpu/online first before using sysconf() as it can't accurately configure holes in the CPU map. If sysconf() is used, warn when the configured and online processors disagree. When reading from a file, if the read doesn't yield a CPU map then return an empty map rather than the default online. This avoids recursion but also better yields being able to detect failures. Add more comments. Reviewed-by: James Clark Signed-off-by: Ian Rogers Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Alexandre Ghiti Cc: Andrew Jones Cc: André Almeida Cc: Athira Jajeev Cc: Atish Patra Cc: Changbin Du Cc: Darren Hart Cc: Davidlohr Bueso Cc: Huacai Chen Cc: Ingo Molnar Cc: Jiri Olsa Cc: John Garry Cc: K Prateek Nayak Cc: Kajol Jain Cc: Kan Liang Cc: Leo Yan Cc: Mark Rutland Cc: Mike Leach Cc: Namhyung Kim Cc: Nick Desaulniers Cc: Paolo Bonzini Cc: Paran Lee Cc: Peter Zijlstra Cc: Ravi Bangoria Cc: Sandipan Das Cc: Sean Christopherson Cc: Steinar H. Gunderson Cc: Suzuki Poulouse Cc: Thomas Gleixner Cc: Will Deacon Cc: Yang Jihong Cc: Yang Li Cc: Yanteng Si Cc: bpf@vger.kernel.org Cc: coresight@lists.linaro.org Cc: linux-arm-kernel@lists.infradead.org Link: https://lore.kernel.org/r/20231129060211.1890454-3-irogers@google.com [ s/syfs/sysfs/g typo ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/perf/cpumap.c | 59 +++++++++++++++++++++--------------- tools/lib/perf/include/perf/cpumap.h | 15 ++++++++- tools/lib/perf/libperf.map | 2 +- tools/lib/perf/tests/test-cpumap.c | 2 +- 4 files changed, 51 insertions(+), 27 deletions(-) (limited to 'tools') diff --git a/tools/lib/perf/cpumap.c b/tools/lib/perf/cpumap.c index 2bd6aba3d8c9..3aa80d0d26e8 100644 --- a/tools/lib/perf/cpumap.c +++ b/tools/lib/perf/cpumap.c @@ -9,6 +9,7 @@ #include #include #include +#include "internal.h" void perf_cpu_map__set_nr(struct perf_cpu_map *map, int nr_cpus) { @@ -66,15 +67,21 @@ void perf_cpu_map__put(struct perf_cpu_map *map) } } -static struct perf_cpu_map *cpu_map__default_new(void) +static struct perf_cpu_map *cpu_map__new_sysconf(void) { struct perf_cpu_map *cpus; - int nr_cpus; + int nr_cpus, nr_cpus_conf; nr_cpus = sysconf(_SC_NPROCESSORS_ONLN); if (nr_cpus < 0) return NULL; + nr_cpus_conf = sysconf(_SC_NPROCESSORS_CONF); + if (nr_cpus != nr_cpus_conf) { + pr_warning("Number of online CPUs (%d) differs from the number configured (%d) the CPU map will only cover the first %d CPUs.", + nr_cpus, nr_cpus_conf, nr_cpus); + } + cpus = perf_cpu_map__alloc(nr_cpus); if (cpus != NULL) { int i; @@ -86,9 +93,27 @@ static struct perf_cpu_map *cpu_map__default_new(void) return cpus; } -struct perf_cpu_map *perf_cpu_map__default_new(void) +static struct perf_cpu_map *cpu_map__new_sysfs_online(void) { - return cpu_map__default_new(); + struct perf_cpu_map *cpus = NULL; + FILE *onlnf; + + onlnf = fopen("/sys/devices/system/cpu/online", "r"); + if (onlnf) { + cpus = perf_cpu_map__read(onlnf); + fclose(onlnf); + } + return cpus; +} + +struct perf_cpu_map *perf_cpu_map__new_online_cpus(void) +{ + struct perf_cpu_map *cpus = cpu_map__new_sysfs_online(); + + if (cpus) + return cpus; + + return cpu_map__new_sysconf(); } @@ -180,27 +205,11 @@ struct perf_cpu_map *perf_cpu_map__read(FILE *file) if (nr_cpus > 0) cpus = cpu_map__trim_new(nr_cpus, tmp_cpus); - else - cpus = cpu_map__default_new(); out_free_tmp: free(tmp_cpus); return cpus; } -static struct perf_cpu_map *cpu_map__read_all_cpu_map(void) -{ - struct perf_cpu_map *cpus = NULL; - FILE *onlnf; - - onlnf = fopen("/sys/devices/system/cpu/online", "r"); - if (!onlnf) - return cpu_map__default_new(); - - cpus = perf_cpu_map__read(onlnf); - fclose(onlnf); - return cpus; -} - struct perf_cpu_map *perf_cpu_map__new(const char *cpu_list) { struct perf_cpu_map *cpus = NULL; @@ -211,7 +220,7 @@ struct perf_cpu_map *perf_cpu_map__new(const char *cpu_list) int max_entries = 0; if (!cpu_list) - return cpu_map__read_all_cpu_map(); + return perf_cpu_map__new_online_cpus(); /* * must handle the case of empty cpumap to cover @@ -268,9 +277,11 @@ struct perf_cpu_map *perf_cpu_map__new(const char *cpu_list) if (nr_cpus > 0) cpus = cpu_map__trim_new(nr_cpus, tmp_cpus); - else if (*cpu_list != '\0') - cpus = cpu_map__default_new(); - else + else if (*cpu_list != '\0') { + pr_warning("Unexpected characters at end of cpu list ('%s'), using online CPUs.", + cpu_list); + cpus = perf_cpu_map__new_online_cpus(); + } else cpus = perf_cpu_map__new_any_cpu(); invalid: free(tmp_cpus); diff --git a/tools/lib/perf/include/perf/cpumap.h b/tools/lib/perf/include/perf/cpumap.h index d0bf218ada11..b24bd8b8f34e 100644 --- a/tools/lib/perf/include/perf/cpumap.h +++ b/tools/lib/perf/include/perf/cpumap.h @@ -22,7 +22,20 @@ struct perf_cpu_map; * perf_cpu_map__new_any_cpu - a map with a singular "any CPU"/dummy -1 value. */ LIBPERF_API struct perf_cpu_map *perf_cpu_map__new_any_cpu(void); -LIBPERF_API struct perf_cpu_map *perf_cpu_map__default_new(void); +/** + * perf_cpu_map__new_online_cpus - a map read from + * /sys/devices/system/cpu/online if + * available. If reading wasn't possible a map + * is created using the online processors + * assuming the first 'n' processors are all + * online. + */ +LIBPERF_API struct perf_cpu_map *perf_cpu_map__new_online_cpus(void); +/** + * perf_cpu_map__new - create a map from the given cpu_list such as "0-7". If no + * cpu_list argument is provided then + * perf_cpu_map__new_online_cpus is returned. + */ LIBPERF_API struct perf_cpu_map *perf_cpu_map__new(const char *cpu_list); LIBPERF_API struct perf_cpu_map *perf_cpu_map__read(FILE *file); LIBPERF_API struct perf_cpu_map *perf_cpu_map__get(struct perf_cpu_map *map); diff --git a/tools/lib/perf/libperf.map b/tools/lib/perf/libperf.map index a8ff64baea3e..8a71f841498e 100644 --- a/tools/lib/perf/libperf.map +++ b/tools/lib/perf/libperf.map @@ -2,7 +2,7 @@ LIBPERF_0.0.1 { global: libperf_init; perf_cpu_map__new_any_cpu; - perf_cpu_map__default_new; + perf_cpu_map__new_online_cpus; perf_cpu_map__get; perf_cpu_map__put; perf_cpu_map__new; diff --git a/tools/lib/perf/tests/test-cpumap.c b/tools/lib/perf/tests/test-cpumap.c index 2c359bdb951e..c998b1dae863 100644 --- a/tools/lib/perf/tests/test-cpumap.c +++ b/tools/lib/perf/tests/test-cpumap.c @@ -29,7 +29,7 @@ int test_cpumap(int argc, char **argv) perf_cpu_map__put(cpus); perf_cpu_map__put(cpus); - cpus = perf_cpu_map__default_new(); + cpus = perf_cpu_map__new_online_cpus(); if (!cpus) return -1; -- cgit v1.2.3 From 923ca62a7b1edceaa61eb6ac8dc56fdac51913b8 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Tue, 28 Nov 2023 22:02:00 -0800 Subject: libperf cpumap: Rename perf_cpu_map__empty() to perf_cpu_map__has_any_cpu_or_is_empty() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The name perf_cpu_map_empty is misleading as true is also returned when the map contains an "any" CPU (aka dummy) map. Rename to perf_cpu_map__has_any_cpu_or_is_empty(), later changes will (re)introduce perf_cpu_map__empty() and perf_cpu_map__has_any_cpu(). Reviewed-by: James Clark Signed-off-by: Ian Rogers Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Alexandre Ghiti Cc: Andrew Jones Cc: André Almeida Cc: Athira Jajeev Cc: Atish Patra Cc: Changbin Du Cc: Darren Hart Cc: Davidlohr Bueso Cc: Huacai Chen Cc: Ingo Molnar Cc: Jiri Olsa Cc: John Garry Cc: K Prateek Nayak Cc: Kajol Jain Cc: Kan Liang Cc: Leo Yan Cc: Mark Rutland Cc: Mike Leach Cc: Namhyung Kim Cc: Nick Desaulniers Cc: Paolo Bonzini Cc: Paran Lee Cc: Peter Zijlstra Cc: Ravi Bangoria Cc: Sandipan Das Cc: Sean Christopherson Cc: Steinar H. Gunderson Cc: Suzuki Poulouse Cc: Thomas Gleixner Cc: Will Deacon Cc: Yang Jihong Cc: Yang Li Cc: Yanteng Si Cc: bpf@vger.kernel.org Cc: coresight@lists.linaro.org Cc: linux-arm-kernel@lists.infradead.org Link: https://lore.kernel.org/r/20231129060211.1890454-4-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/perf/Documentation/libperf.txt | 2 +- tools/lib/perf/cpumap.c | 2 +- tools/lib/perf/evlist.c | 4 ++-- tools/lib/perf/include/perf/cpumap.h | 4 ++-- tools/lib/perf/libperf.map | 2 +- tools/perf/arch/arm/util/cs-etm.c | 10 +++++----- tools/perf/arch/arm64/util/arm-spe.c | 4 ++-- tools/perf/arch/x86/util/intel-bts.c | 4 ++-- tools/perf/arch/x86/util/intel-pt.c | 10 +++++----- tools/perf/builtin-c2c.c | 2 +- tools/perf/builtin-stat.c | 6 +++--- tools/perf/util/auxtrace.c | 4 ++-- tools/perf/util/record.c | 2 +- tools/perf/util/stat.c | 2 +- 14 files changed, 29 insertions(+), 29 deletions(-) (limited to 'tools') diff --git a/tools/lib/perf/Documentation/libperf.txt b/tools/lib/perf/Documentation/libperf.txt index a256a26598b0..fcfb9499ef9c 100644 --- a/tools/lib/perf/Documentation/libperf.txt +++ b/tools/lib/perf/Documentation/libperf.txt @@ -46,7 +46,7 @@ SYNOPSIS void perf_cpu_map__put(struct perf_cpu_map *map); int perf_cpu_map__cpu(const struct perf_cpu_map *cpus, int idx); int perf_cpu_map__nr(const struct perf_cpu_map *cpus); - bool perf_cpu_map__empty(const struct perf_cpu_map *map); + bool perf_cpu_map__has_any_cpu_or_is_empty(const struct perf_cpu_map *map); int perf_cpu_map__max(struct perf_cpu_map *map); bool perf_cpu_map__has(const struct perf_cpu_map *map, int cpu); diff --git a/tools/lib/perf/cpumap.c b/tools/lib/perf/cpumap.c index 3aa80d0d26e8..4adcd7920d03 100644 --- a/tools/lib/perf/cpumap.c +++ b/tools/lib/perf/cpumap.c @@ -311,7 +311,7 @@ int perf_cpu_map__nr(const struct perf_cpu_map *cpus) return cpus ? __perf_cpu_map__nr(cpus) : 1; } -bool perf_cpu_map__empty(const struct perf_cpu_map *map) +bool perf_cpu_map__has_any_cpu_or_is_empty(const struct perf_cpu_map *map) { return map ? __perf_cpu_map__cpu(map, 0).cpu == -1 : true; } diff --git a/tools/lib/perf/evlist.c b/tools/lib/perf/evlist.c index 3acbbccc1901..75f36218fdd9 100644 --- a/tools/lib/perf/evlist.c +++ b/tools/lib/perf/evlist.c @@ -619,7 +619,7 @@ static int perf_evlist__nr_mmaps(struct perf_evlist *evlist) /* One for each CPU */ nr_mmaps = perf_cpu_map__nr(evlist->all_cpus); - if (perf_cpu_map__empty(evlist->all_cpus)) { + if (perf_cpu_map__has_any_cpu_or_is_empty(evlist->all_cpus)) { /* Plus one for each thread */ nr_mmaps += perf_thread_map__nr(evlist->threads); /* Minus the per-thread CPU (-1) */ @@ -653,7 +653,7 @@ int perf_evlist__mmap_ops(struct perf_evlist *evlist, if (evlist->pollfd.entries == NULL && perf_evlist__alloc_pollfd(evlist) < 0) return -ENOMEM; - if (perf_cpu_map__empty(cpus)) + if (perf_cpu_map__has_any_cpu_or_is_empty(cpus)) return mmap_per_thread(evlist, ops, mp); return mmap_per_cpu(evlist, ops, mp); diff --git a/tools/lib/perf/include/perf/cpumap.h b/tools/lib/perf/include/perf/cpumap.h index b24bd8b8f34e..9cf361fc5edc 100644 --- a/tools/lib/perf/include/perf/cpumap.h +++ b/tools/lib/perf/include/perf/cpumap.h @@ -47,9 +47,9 @@ LIBPERF_API void perf_cpu_map__put(struct perf_cpu_map *map); LIBPERF_API struct perf_cpu perf_cpu_map__cpu(const struct perf_cpu_map *cpus, int idx); LIBPERF_API int perf_cpu_map__nr(const struct perf_cpu_map *cpus); /** - * perf_cpu_map__empty - is map either empty or the "any CPU"/dummy value. + * perf_cpu_map__has_any_cpu_or_is_empty - is map either empty or has the "any CPU"/dummy value. */ -LIBPERF_API bool perf_cpu_map__empty(const struct perf_cpu_map *map); +LIBPERF_API bool perf_cpu_map__has_any_cpu_or_is_empty(const struct perf_cpu_map *map); LIBPERF_API struct perf_cpu perf_cpu_map__max(const struct perf_cpu_map *map); LIBPERF_API bool perf_cpu_map__has(const struct perf_cpu_map *map, struct perf_cpu cpu); LIBPERF_API bool perf_cpu_map__equal(const struct perf_cpu_map *lhs, diff --git a/tools/lib/perf/libperf.map b/tools/lib/perf/libperf.map index 8a71f841498e..10b3f3722642 100644 --- a/tools/lib/perf/libperf.map +++ b/tools/lib/perf/libperf.map @@ -9,7 +9,7 @@ LIBPERF_0.0.1 { perf_cpu_map__read; perf_cpu_map__nr; perf_cpu_map__cpu; - perf_cpu_map__empty; + perf_cpu_map__has_any_cpu_or_is_empty; perf_cpu_map__max; perf_cpu_map__has; perf_thread_map__new_array; diff --git a/tools/perf/arch/arm/util/cs-etm.c b/tools/perf/arch/arm/util/cs-etm.c index 2cf873d71dff..c6b7b3066324 100644 --- a/tools/perf/arch/arm/util/cs-etm.c +++ b/tools/perf/arch/arm/util/cs-etm.c @@ -211,7 +211,7 @@ static int cs_etm_validate_config(struct auxtrace_record *itr, * program can run on any CPUs in this case, thus don't skip * validation. */ - if (!perf_cpu_map__empty(event_cpus) && + if (!perf_cpu_map__has_any_cpu_or_is_empty(event_cpus) && !perf_cpu_map__has(event_cpus, cpu)) continue; @@ -435,7 +435,7 @@ static int cs_etm_recording_options(struct auxtrace_record *itr, * Also the case of per-cpu mmaps, need the contextID in order to be notified * when a context switch happened. */ - if (!perf_cpu_map__empty(cpus)) { + if (!perf_cpu_map__has_any_cpu_or_is_empty(cpus)) { evsel__set_config_if_unset(cs_etm_pmu, cs_etm_evsel, "timestamp", 1); evsel__set_config_if_unset(cs_etm_pmu, cs_etm_evsel, @@ -461,7 +461,7 @@ static int cs_etm_recording_options(struct auxtrace_record *itr, evsel->core.attr.sample_period = 1; /* In per-cpu case, always need the time of mmap events etc */ - if (!perf_cpu_map__empty(cpus)) + if (!perf_cpu_map__has_any_cpu_or_is_empty(cpus)) evsel__set_sample_bit(evsel, TIME); err = cs_etm_validate_config(itr, cs_etm_evsel); @@ -539,7 +539,7 @@ cs_etm_info_priv_size(struct auxtrace_record *itr __maybe_unused, struct perf_cpu_map *online_cpus = perf_cpu_map__new(NULL); /* cpu map is not empty, we have specific CPUs to work with */ - if (!perf_cpu_map__empty(event_cpus)) { + if (!perf_cpu_map__has_any_cpu_or_is_empty(event_cpus)) { for (i = 0; i < cpu__max_cpu().cpu; i++) { struct perf_cpu cpu = { .cpu = i, }; @@ -814,7 +814,7 @@ static int cs_etm_info_fill(struct auxtrace_record *itr, return -EINVAL; /* If the cpu_map is empty all online CPUs are involved */ - if (perf_cpu_map__empty(event_cpus)) { + if (perf_cpu_map__has_any_cpu_or_is_empty(event_cpus)) { cpu_map = online_cpus; } else { /* Make sure all specified CPUs are online */ diff --git a/tools/perf/arch/arm64/util/arm-spe.c b/tools/perf/arch/arm64/util/arm-spe.c index e3acc739bd00..51ccbfd3d246 100644 --- a/tools/perf/arch/arm64/util/arm-spe.c +++ b/tools/perf/arch/arm64/util/arm-spe.c @@ -232,7 +232,7 @@ static int arm_spe_recording_options(struct auxtrace_record *itr, * In the case of per-cpu mmaps, sample CPU for AUX event; * also enable the timestamp tracing for samples correlation. */ - if (!perf_cpu_map__empty(cpus)) { + if (!perf_cpu_map__has_any_cpu_or_is_empty(cpus)) { evsel__set_sample_bit(arm_spe_evsel, CPU); evsel__set_config_if_unset(arm_spe_pmu, arm_spe_evsel, "ts_enable", 1); @@ -265,7 +265,7 @@ static int arm_spe_recording_options(struct auxtrace_record *itr, tracking_evsel->core.attr.sample_period = 1; /* In per-cpu case, always need the time of mmap events etc */ - if (!perf_cpu_map__empty(cpus)) { + if (!perf_cpu_map__has_any_cpu_or_is_empty(cpus)) { evsel__set_sample_bit(tracking_evsel, TIME); evsel__set_sample_bit(tracking_evsel, CPU); diff --git a/tools/perf/arch/x86/util/intel-bts.c b/tools/perf/arch/x86/util/intel-bts.c index d2c8cac11470..af8ae4647585 100644 --- a/tools/perf/arch/x86/util/intel-bts.c +++ b/tools/perf/arch/x86/util/intel-bts.c @@ -143,7 +143,7 @@ static int intel_bts_recording_options(struct auxtrace_record *itr, if (!opts->full_auxtrace) return 0; - if (opts->full_auxtrace && !perf_cpu_map__empty(cpus)) { + if (opts->full_auxtrace && !perf_cpu_map__has_any_cpu_or_is_empty(cpus)) { pr_err(INTEL_BTS_PMU_NAME " does not support per-cpu recording\n"); return -EINVAL; } @@ -224,7 +224,7 @@ static int intel_bts_recording_options(struct auxtrace_record *itr, * In the case of per-cpu mmaps, we need the CPU on the * AUX event. */ - if (!perf_cpu_map__empty(cpus)) + if (!perf_cpu_map__has_any_cpu_or_is_empty(cpus)) evsel__set_sample_bit(intel_bts_evsel, CPU); } diff --git a/tools/perf/arch/x86/util/intel-pt.c b/tools/perf/arch/x86/util/intel-pt.c index fa0c718b9e72..d199619df3ab 100644 --- a/tools/perf/arch/x86/util/intel-pt.c +++ b/tools/perf/arch/x86/util/intel-pt.c @@ -369,7 +369,7 @@ static int intel_pt_info_fill(struct auxtrace_record *itr, ui__warning("Intel Processor Trace: TSC not available\n"); } - per_cpu_mmaps = !perf_cpu_map__empty(session->evlist->core.user_requested_cpus); + per_cpu_mmaps = !perf_cpu_map__has_any_cpu_or_is_empty(session->evlist->core.user_requested_cpus); auxtrace_info->type = PERF_AUXTRACE_INTEL_PT; auxtrace_info->priv[INTEL_PT_PMU_TYPE] = intel_pt_pmu->type; @@ -774,7 +774,7 @@ static int intel_pt_recording_options(struct auxtrace_record *itr, * Per-cpu recording needs sched_switch events to distinguish different * threads. */ - if (have_timing_info && !perf_cpu_map__empty(cpus) && + if (have_timing_info && !perf_cpu_map__has_any_cpu_or_is_empty(cpus) && !record_opts__no_switch_events(opts)) { if (perf_can_record_switch_events()) { bool cpu_wide = !target__none(&opts->target) && @@ -832,7 +832,7 @@ static int intel_pt_recording_options(struct auxtrace_record *itr, * In the case of per-cpu mmaps, we need the CPU on the * AUX event. */ - if (!perf_cpu_map__empty(cpus)) + if (!perf_cpu_map__has_any_cpu_or_is_empty(cpus)) evsel__set_sample_bit(intel_pt_evsel, CPU); } @@ -858,7 +858,7 @@ static int intel_pt_recording_options(struct auxtrace_record *itr, tracking_evsel->immediate = true; /* In per-cpu case, always need the time of mmap events etc */ - if (!perf_cpu_map__empty(cpus)) { + if (!perf_cpu_map__has_any_cpu_or_is_empty(cpus)) { evsel__set_sample_bit(tracking_evsel, TIME); /* And the CPU for switch events */ evsel__set_sample_bit(tracking_evsel, CPU); @@ -870,7 +870,7 @@ static int intel_pt_recording_options(struct auxtrace_record *itr, * Warn the user when we do not have enough information to decode i.e. * per-cpu with no sched_switch (except workload-only). */ - if (!ptr->have_sched_switch && !perf_cpu_map__empty(cpus) && + if (!ptr->have_sched_switch && !perf_cpu_map__has_any_cpu_or_is_empty(cpus) && !target__none(&opts->target) && !intel_pt_evsel->core.attr.exclude_user) ui__warning("Intel Processor Trace decoding will not be possible except for kernel tracing!\n"); diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index a4cf9de7a7b5..f78eea9e2153 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -2320,7 +2320,7 @@ static int setup_nodes(struct perf_session *session) nodes[node] = set; /* empty node, skip */ - if (perf_cpu_map__empty(map)) + if (perf_cpu_map__has_any_cpu_or_is_empty(map)) continue; perf_cpu_map__for_each_cpu(cpu, idx, map) { diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 0bfa70791cfc..bda020c0b9d5 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -1316,7 +1316,7 @@ static int cpu__get_cache_id_from_map(struct perf_cpu cpu, char *map) * be the first online CPU in the cache domain else use the * first online CPU of the cache domain as the ID. */ - if (perf_cpu_map__empty(cpu_map)) + if (perf_cpu_map__has_any_cpu_or_is_empty(cpu_map)) id = cpu.cpu; else id = perf_cpu_map__cpu(cpu_map, 0).cpu; @@ -1622,7 +1622,7 @@ static int perf_stat_init_aggr_mode(void) * taking the highest cpu number to be the size of * the aggregation translate cpumap. */ - if (!perf_cpu_map__empty(evsel_list->core.user_requested_cpus)) + if (!perf_cpu_map__has_any_cpu_or_is_empty(evsel_list->core.user_requested_cpus)) nr = perf_cpu_map__max(evsel_list->core.user_requested_cpus).cpu; else nr = 0; @@ -2289,7 +2289,7 @@ int process_stat_config_event(struct perf_session *session, perf_event__read_stat_config(&stat_config, &event->stat_config); - if (perf_cpu_map__empty(st->cpus)) { + if (perf_cpu_map__has_any_cpu_or_is_empty(st->cpus)) { if (st->aggr_mode != AGGR_UNSET) pr_warning("warning: processing task data, aggregation mode not set\n"); } else if (st->aggr_mode != AGGR_UNSET) { diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c index f528c4364d23..3684e6009b63 100644 --- a/tools/perf/util/auxtrace.c +++ b/tools/perf/util/auxtrace.c @@ -174,7 +174,7 @@ void auxtrace_mmap_params__set_idx(struct auxtrace_mmap_params *mp, struct evlist *evlist, struct evsel *evsel, int idx) { - bool per_cpu = !perf_cpu_map__empty(evlist->core.user_requested_cpus); + bool per_cpu = !perf_cpu_map__has_any_cpu_or_is_empty(evlist->core.user_requested_cpus); mp->mmap_needed = evsel->needs_auxtrace_mmap; @@ -648,7 +648,7 @@ int auxtrace_parse_snapshot_options(struct auxtrace_record *itr, static int evlist__enable_event_idx(struct evlist *evlist, struct evsel *evsel, int idx) { - bool per_cpu_mmaps = !perf_cpu_map__empty(evlist->core.user_requested_cpus); + bool per_cpu_mmaps = !perf_cpu_map__has_any_cpu_or_is_empty(evlist->core.user_requested_cpus); if (per_cpu_mmaps) { struct perf_cpu evlist_cpu = perf_cpu_map__cpu(evlist->core.all_cpus, idx); diff --git a/tools/perf/util/record.c b/tools/perf/util/record.c index 9eb5c6a08999..40290382b2d7 100644 --- a/tools/perf/util/record.c +++ b/tools/perf/util/record.c @@ -237,7 +237,7 @@ bool evlist__can_select_event(struct evlist *evlist, const char *str) evsel = evlist__last(temp_evlist); - if (!evlist || perf_cpu_map__empty(evlist->core.user_requested_cpus)) { + if (!evlist || perf_cpu_map__has_any_cpu_or_is_empty(evlist->core.user_requested_cpus)) { struct perf_cpu_map *cpus = perf_cpu_map__new(NULL); if (cpus) diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c index ec3506042217..012c4946b9c4 100644 --- a/tools/perf/util/stat.c +++ b/tools/perf/util/stat.c @@ -315,7 +315,7 @@ static int check_per_pkg(struct evsel *counter, struct perf_counts_values *vals, if (!counter->per_pkg) return 0; - if (perf_cpu_map__empty(cpus)) + if (perf_cpu_map__has_any_cpu_or_is_empty(cpus)) return 0; if (!mask) { -- cgit v1.2.3 From effe957c6bb70cac12918c0f5fd4cefb35967618 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Tue, 28 Nov 2023 22:02:01 -0800 Subject: libperf cpumap: Replace usage of perf_cpu_map__new(NULL) with perf_cpu_map__new_online_cpus() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Passing NULL to perf_cpu_map__new() performs perf_cpu_map__new_online_cpus(), just directly call perf_cpu_map__new_online_cpus() to be more intention revealing. Reviewed-by: James Clark Signed-off-by: Ian Rogers Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Alexandre Ghiti Cc: Andrew Jones Cc: André Almeida Cc: Athira Jajeev Cc: Atish Patra Cc: Changbin Du Cc: Darren Hart Cc: Davidlohr Bueso Cc: Huacai Chen Cc: Ingo Molnar Cc: Jiri Olsa Cc: John Garry Cc: K Prateek Nayak Cc: Kajol Jain Cc: Kan Liang Cc: Leo Yan Cc: Mark Rutland Cc: Mike Leach Cc: Namhyung Kim Cc: Nick Desaulniers Cc: Paolo Bonzini Cc: Paran Lee Cc: Peter Zijlstra Cc: Ravi Bangoria Cc: Sandipan Das Cc: Sean Christopherson Cc: Steinar H. Gunderson Cc: Suzuki Poulouse Cc: Thomas Gleixner Cc: Will Deacon Cc: Yang Jihong Cc: Yang Li Cc: Yanteng Si Cc: bpf@vger.kernel.org Cc: coresight@lists.linaro.org Cc: linux-arm-kernel@lists.infradead.org Link: https://lore.kernel.org/r/20231129060211.1890454-5-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/perf/Documentation/examples/sampling.c | 2 +- tools/lib/perf/Documentation/libperf-sampling.txt | 2 +- tools/lib/perf/evlist.c | 2 +- tools/lib/perf/tests/test-evlist.c | 4 ++-- tools/lib/perf/tests/test-evsel.c | 2 +- tools/perf/arch/arm/util/cs-etm.c | 6 +++--- tools/perf/arch/arm64/util/header.c | 2 +- tools/perf/bench/epoll-ctl.c | 2 +- tools/perf/bench/epoll-wait.c | 2 +- tools/perf/bench/futex-hash.c | 2 +- tools/perf/bench/futex-lock-pi.c | 2 +- tools/perf/bench/futex-requeue.c | 2 +- tools/perf/bench/futex-wake-parallel.c | 2 +- tools/perf/bench/futex-wake.c | 2 +- tools/perf/builtin-ftrace.c | 2 +- tools/perf/tests/code-reading.c | 2 +- tools/perf/tests/keep-tracking.c | 2 +- tools/perf/tests/mmap-basic.c | 2 +- tools/perf/tests/openat-syscall-all-cpus.c | 2 +- tools/perf/tests/perf-time-to-tsc.c | 2 +- tools/perf/tests/switch-tracking.c | 2 +- tools/perf/tests/topology.c | 2 +- tools/perf/util/bpf_counter.c | 2 +- tools/perf/util/cpumap.c | 2 +- tools/perf/util/cputopo.c | 2 +- tools/perf/util/evlist.c | 2 +- tools/perf/util/perf_api_probe.c | 4 ++-- tools/perf/util/record.c | 2 +- 28 files changed, 32 insertions(+), 32 deletions(-) (limited to 'tools') diff --git a/tools/lib/perf/Documentation/examples/sampling.c b/tools/lib/perf/Documentation/examples/sampling.c index 8e1a926a9cfe..bc142f0664b5 100644 --- a/tools/lib/perf/Documentation/examples/sampling.c +++ b/tools/lib/perf/Documentation/examples/sampling.c @@ -39,7 +39,7 @@ int main(int argc, char **argv) libperf_init(libperf_print); - cpus = perf_cpu_map__new(NULL); + cpus = perf_cpu_map__new_online_cpus(); if (!cpus) { fprintf(stderr, "failed to create cpus\n"); return -1; diff --git a/tools/lib/perf/Documentation/libperf-sampling.txt b/tools/lib/perf/Documentation/libperf-sampling.txt index d6ca24f6ef78..2378980fab8a 100644 --- a/tools/lib/perf/Documentation/libperf-sampling.txt +++ b/tools/lib/perf/Documentation/libperf-sampling.txt @@ -97,7 +97,7 @@ In this case we will monitor all the available CPUs: [source,c] -- - 42 cpus = perf_cpu_map__new(NULL); + 42 cpus = perf_cpu_map__new_online_cpus(); 43 if (!cpus) { 44 fprintf(stderr, "failed to create cpus\n"); 45 return -1; diff --git a/tools/lib/perf/evlist.c b/tools/lib/perf/evlist.c index 75f36218fdd9..058e3ff10f9b 100644 --- a/tools/lib/perf/evlist.c +++ b/tools/lib/perf/evlist.c @@ -39,7 +39,7 @@ static void __perf_evlist__propagate_maps(struct perf_evlist *evlist, if (evsel->system_wide) { /* System wide: set the cpu map of the evsel to all online CPUs. */ perf_cpu_map__put(evsel->cpus); - evsel->cpus = perf_cpu_map__new(NULL); + evsel->cpus = perf_cpu_map__new_online_cpus(); } else if (evlist->has_user_cpus && evsel->is_pmu_core) { /* * User requested CPUs on a core PMU, ensure the requested CPUs diff --git a/tools/lib/perf/tests/test-evlist.c b/tools/lib/perf/tests/test-evlist.c index ab63878bacb9..10f70cb41ff1 100644 --- a/tools/lib/perf/tests/test-evlist.c +++ b/tools/lib/perf/tests/test-evlist.c @@ -46,7 +46,7 @@ static int test_stat_cpu(void) }; int err, idx; - cpus = perf_cpu_map__new(NULL); + cpus = perf_cpu_map__new_online_cpus(); __T("failed to create cpus", cpus); evlist = perf_evlist__new(); @@ -350,7 +350,7 @@ static int test_mmap_cpus(void) attr.config = id; - cpus = perf_cpu_map__new(NULL); + cpus = perf_cpu_map__new_online_cpus(); __T("failed to create cpus", cpus); evlist = perf_evlist__new(); diff --git a/tools/lib/perf/tests/test-evsel.c b/tools/lib/perf/tests/test-evsel.c index a11fc51bfb68..545ec3150546 100644 --- a/tools/lib/perf/tests/test-evsel.c +++ b/tools/lib/perf/tests/test-evsel.c @@ -27,7 +27,7 @@ static int test_stat_cpu(void) }; int err, idx; - cpus = perf_cpu_map__new(NULL); + cpus = perf_cpu_map__new_online_cpus(); __T("failed to create cpus", cpus); evsel = perf_evsel__new(&attr); diff --git a/tools/perf/arch/arm/util/cs-etm.c b/tools/perf/arch/arm/util/cs-etm.c index c6b7b3066324..77e6663c1703 100644 --- a/tools/perf/arch/arm/util/cs-etm.c +++ b/tools/perf/arch/arm/util/cs-etm.c @@ -199,7 +199,7 @@ static int cs_etm_validate_config(struct auxtrace_record *itr, { int i, err = -EINVAL; struct perf_cpu_map *event_cpus = evsel->evlist->core.user_requested_cpus; - struct perf_cpu_map *online_cpus = perf_cpu_map__new(NULL); + struct perf_cpu_map *online_cpus = perf_cpu_map__new_online_cpus(); /* Set option of each CPU we have */ for (i = 0; i < cpu__max_cpu().cpu; i++) { @@ -536,7 +536,7 @@ cs_etm_info_priv_size(struct auxtrace_record *itr __maybe_unused, int i; int etmv3 = 0, etmv4 = 0, ete = 0; struct perf_cpu_map *event_cpus = evlist->core.user_requested_cpus; - struct perf_cpu_map *online_cpus = perf_cpu_map__new(NULL); + struct perf_cpu_map *online_cpus = perf_cpu_map__new_online_cpus(); /* cpu map is not empty, we have specific CPUs to work with */ if (!perf_cpu_map__has_any_cpu_or_is_empty(event_cpus)) { @@ -802,7 +802,7 @@ static int cs_etm_info_fill(struct auxtrace_record *itr, u64 nr_cpu, type; struct perf_cpu_map *cpu_map; struct perf_cpu_map *event_cpus = session->evlist->core.user_requested_cpus; - struct perf_cpu_map *online_cpus = perf_cpu_map__new(NULL); + struct perf_cpu_map *online_cpus = perf_cpu_map__new_online_cpus(); struct cs_etm_recording *ptr = container_of(itr, struct cs_etm_recording, itr); struct perf_pmu *cs_etm_pmu = ptr->cs_etm_pmu; diff --git a/tools/perf/arch/arm64/util/header.c b/tools/perf/arch/arm64/util/header.c index a2eef9ec5491..97037499152e 100644 --- a/tools/perf/arch/arm64/util/header.c +++ b/tools/perf/arch/arm64/util/header.c @@ -57,7 +57,7 @@ static int _get_cpuid(char *buf, size_t sz, struct perf_cpu_map *cpus) int get_cpuid(char *buf, size_t sz) { - struct perf_cpu_map *cpus = perf_cpu_map__new(NULL); + struct perf_cpu_map *cpus = perf_cpu_map__new_online_cpus(); int ret; if (!cpus) diff --git a/tools/perf/bench/epoll-ctl.c b/tools/perf/bench/epoll-ctl.c index 6bfffe83dde9..d3db73dac66a 100644 --- a/tools/perf/bench/epoll-ctl.c +++ b/tools/perf/bench/epoll-ctl.c @@ -330,7 +330,7 @@ int bench_epoll_ctl(int argc, const char **argv) act.sa_sigaction = toggle_done; sigaction(SIGINT, &act, NULL); - cpu = perf_cpu_map__new(NULL); + cpu = perf_cpu_map__new_online_cpus(); if (!cpu) goto errmem; diff --git a/tools/perf/bench/epoll-wait.c b/tools/perf/bench/epoll-wait.c index cb5174b53940..06bb3187660a 100644 --- a/tools/perf/bench/epoll-wait.c +++ b/tools/perf/bench/epoll-wait.c @@ -444,7 +444,7 @@ int bench_epoll_wait(int argc, const char **argv) act.sa_sigaction = toggle_done; sigaction(SIGINT, &act, NULL); - cpu = perf_cpu_map__new(NULL); + cpu = perf_cpu_map__new_online_cpus(); if (!cpu) goto errmem; diff --git a/tools/perf/bench/futex-hash.c b/tools/perf/bench/futex-hash.c index 2005a3fa3026..0c69d20efa32 100644 --- a/tools/perf/bench/futex-hash.c +++ b/tools/perf/bench/futex-hash.c @@ -138,7 +138,7 @@ int bench_futex_hash(int argc, const char **argv) exit(EXIT_FAILURE); } - cpu = perf_cpu_map__new(NULL); + cpu = perf_cpu_map__new_online_cpus(); if (!cpu) goto errmem; diff --git a/tools/perf/bench/futex-lock-pi.c b/tools/perf/bench/futex-lock-pi.c index 092cbd52db82..7a4973346180 100644 --- a/tools/perf/bench/futex-lock-pi.c +++ b/tools/perf/bench/futex-lock-pi.c @@ -172,7 +172,7 @@ int bench_futex_lock_pi(int argc, const char **argv) if (argc) goto err; - cpu = perf_cpu_map__new(NULL); + cpu = perf_cpu_map__new_online_cpus(); if (!cpu) err(EXIT_FAILURE, "calloc"); diff --git a/tools/perf/bench/futex-requeue.c b/tools/perf/bench/futex-requeue.c index c0035990a33c..d9ad736c1a3e 100644 --- a/tools/perf/bench/futex-requeue.c +++ b/tools/perf/bench/futex-requeue.c @@ -174,7 +174,7 @@ int bench_futex_requeue(int argc, const char **argv) if (argc) goto err; - cpu = perf_cpu_map__new(NULL); + cpu = perf_cpu_map__new_online_cpus(); if (!cpu) err(EXIT_FAILURE, "cpu_map__new"); diff --git a/tools/perf/bench/futex-wake-parallel.c b/tools/perf/bench/futex-wake-parallel.c index 5ab0234d74e6..b66df553e561 100644 --- a/tools/perf/bench/futex-wake-parallel.c +++ b/tools/perf/bench/futex-wake-parallel.c @@ -264,7 +264,7 @@ int bench_futex_wake_parallel(int argc, const char **argv) err(EXIT_FAILURE, "mlockall"); } - cpu = perf_cpu_map__new(NULL); + cpu = perf_cpu_map__new_online_cpus(); if (!cpu) err(EXIT_FAILURE, "calloc"); diff --git a/tools/perf/bench/futex-wake.c b/tools/perf/bench/futex-wake.c index 18a5894af8bb..690fd6d3da13 100644 --- a/tools/perf/bench/futex-wake.c +++ b/tools/perf/bench/futex-wake.c @@ -149,7 +149,7 @@ int bench_futex_wake(int argc, const char **argv) exit(EXIT_FAILURE); } - cpu = perf_cpu_map__new(NULL); + cpu = perf_cpu_map__new_online_cpus(); if (!cpu) err(EXIT_FAILURE, "calloc"); diff --git a/tools/perf/builtin-ftrace.c b/tools/perf/builtin-ftrace.c index ac2e6c75f912..eb30c8eca488 100644 --- a/tools/perf/builtin-ftrace.c +++ b/tools/perf/builtin-ftrace.c @@ -333,7 +333,7 @@ static int set_tracing_func_irqinfo(struct perf_ftrace *ftrace) static int reset_tracing_cpu(void) { - struct perf_cpu_map *cpumap = perf_cpu_map__new(NULL); + struct perf_cpu_map *cpumap = perf_cpu_map__new_online_cpus(); int ret; ret = set_tracing_cpumask(cpumap); diff --git a/tools/perf/tests/code-reading.c b/tools/perf/tests/code-reading.c index 8620146d0378..7a3a7bbbec71 100644 --- a/tools/perf/tests/code-reading.c +++ b/tools/perf/tests/code-reading.c @@ -610,7 +610,7 @@ static int do_test_code_reading(bool try_kcore) goto out_put; } - cpus = perf_cpu_map__new(NULL); + cpus = perf_cpu_map__new_online_cpus(); if (!cpus) { pr_debug("perf_cpu_map__new failed\n"); goto out_put; diff --git a/tools/perf/tests/keep-tracking.c b/tools/perf/tests/keep-tracking.c index 8f4f9b632e1e..5a3b2bed07f3 100644 --- a/tools/perf/tests/keep-tracking.c +++ b/tools/perf/tests/keep-tracking.c @@ -81,7 +81,7 @@ static int test__keep_tracking(struct test_suite *test __maybe_unused, int subte threads = thread_map__new(-1, getpid(), UINT_MAX); CHECK_NOT_NULL__(threads); - cpus = perf_cpu_map__new(NULL); + cpus = perf_cpu_map__new_online_cpus(); CHECK_NOT_NULL__(cpus); evlist = evlist__new(); diff --git a/tools/perf/tests/mmap-basic.c b/tools/perf/tests/mmap-basic.c index 886a13a77a16..012c8ae439fd 100644 --- a/tools/perf/tests/mmap-basic.c +++ b/tools/perf/tests/mmap-basic.c @@ -52,7 +52,7 @@ static int test__basic_mmap(struct test_suite *test __maybe_unused, int subtest return -1; } - cpus = perf_cpu_map__new(NULL); + cpus = perf_cpu_map__new_online_cpus(); if (cpus == NULL) { pr_debug("perf_cpu_map__new\n"); goto out_free_threads; diff --git a/tools/perf/tests/openat-syscall-all-cpus.c b/tools/perf/tests/openat-syscall-all-cpus.c index f3275be83a33..fb114118c876 100644 --- a/tools/perf/tests/openat-syscall-all-cpus.c +++ b/tools/perf/tests/openat-syscall-all-cpus.c @@ -37,7 +37,7 @@ static int test__openat_syscall_event_on_all_cpus(struct test_suite *test __mayb return -1; } - cpus = perf_cpu_map__new(NULL); + cpus = perf_cpu_map__new_online_cpus(); if (cpus == NULL) { pr_debug("perf_cpu_map__new\n"); goto out_thread_map_delete; diff --git a/tools/perf/tests/perf-time-to-tsc.c b/tools/perf/tests/perf-time-to-tsc.c index efcd71c2738a..bbe2ddeb9b74 100644 --- a/tools/perf/tests/perf-time-to-tsc.c +++ b/tools/perf/tests/perf-time-to-tsc.c @@ -93,7 +93,7 @@ static int test__perf_time_to_tsc(struct test_suite *test __maybe_unused, int su threads = thread_map__new(-1, getpid(), UINT_MAX); CHECK_NOT_NULL__(threads); - cpus = perf_cpu_map__new(NULL); + cpus = perf_cpu_map__new_online_cpus(); CHECK_NOT_NULL__(cpus); evlist = evlist__new(); diff --git a/tools/perf/tests/switch-tracking.c b/tools/perf/tests/switch-tracking.c index e52b031bedc5..5cab17a1942e 100644 --- a/tools/perf/tests/switch-tracking.c +++ b/tools/perf/tests/switch-tracking.c @@ -351,7 +351,7 @@ static int test__switch_tracking(struct test_suite *test __maybe_unused, int sub goto out_err; } - cpus = perf_cpu_map__new(NULL); + cpus = perf_cpu_map__new_online_cpus(); if (!cpus) { pr_debug("perf_cpu_map__new failed!\n"); goto out_err; diff --git a/tools/perf/tests/topology.c b/tools/perf/tests/topology.c index 9dee63734e66..2a842f53fbb5 100644 --- a/tools/perf/tests/topology.c +++ b/tools/perf/tests/topology.c @@ -215,7 +215,7 @@ static int test__session_topology(struct test_suite *test __maybe_unused, int su if (session_write_header(path)) goto free_path; - map = perf_cpu_map__new(NULL); + map = perf_cpu_map__new_online_cpus(); if (map == NULL) { pr_debug("failed to get system cpumap\n"); goto free_path; diff --git a/tools/perf/util/bpf_counter.c b/tools/perf/util/bpf_counter.c index 7f9b0e46e008..7a8af60e0f51 100644 --- a/tools/perf/util/bpf_counter.c +++ b/tools/perf/util/bpf_counter.c @@ -455,7 +455,7 @@ static int bperf__load(struct evsel *evsel, struct target *target) return -1; if (!all_cpu_map) { - all_cpu_map = perf_cpu_map__new(NULL); + all_cpu_map = perf_cpu_map__new_online_cpus(); if (!all_cpu_map) return -1; } diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c index 0e090e8bc334..0581ee0fa5f2 100644 --- a/tools/perf/util/cpumap.c +++ b/tools/perf/util/cpumap.c @@ -672,7 +672,7 @@ struct perf_cpu_map *cpu_map__online(void) /* thread unsafe */ static struct perf_cpu_map *online; if (!online) - online = perf_cpu_map__new(NULL); /* from /sys/devices/system/cpu/online */ + online = perf_cpu_map__new_online_cpus(); /* from /sys/devices/system/cpu/online */ return online; } diff --git a/tools/perf/util/cputopo.c b/tools/perf/util/cputopo.c index 81cfc85f4668..8bbeb2dc76fd 100644 --- a/tools/perf/util/cputopo.c +++ b/tools/perf/util/cputopo.c @@ -267,7 +267,7 @@ struct cpu_topology *cpu_topology__new(void) ncpus = cpu__max_present_cpu().cpu; /* build online CPU map */ - map = perf_cpu_map__new(NULL); + map = perf_cpu_map__new_online_cpus(); if (map == NULL) { pr_debug("failed to get system cpumap\n"); return NULL; diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index ff7f85ded89d..0ed3ce2aa8eb 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -1352,7 +1352,7 @@ static int evlist__create_syswide_maps(struct evlist *evlist) * error, and we may not want to do that fallback to a * default cpu identity map :-\ */ - cpus = perf_cpu_map__new(NULL); + cpus = perf_cpu_map__new_online_cpus(); if (!cpus) goto out; diff --git a/tools/perf/util/perf_api_probe.c b/tools/perf/util/perf_api_probe.c index e1e2d701599c..1de3b69cdf4a 100644 --- a/tools/perf/util/perf_api_probe.c +++ b/tools/perf/util/perf_api_probe.c @@ -64,7 +64,7 @@ static bool perf_probe_api(setup_probe_fn_t fn) struct perf_cpu cpu; int ret, i = 0; - cpus = perf_cpu_map__new(NULL); + cpus = perf_cpu_map__new_online_cpus(); if (!cpus) return false; cpu = perf_cpu_map__cpu(cpus, 0); @@ -140,7 +140,7 @@ bool perf_can_record_cpu_wide(void) struct perf_cpu cpu; int fd; - cpus = perf_cpu_map__new(NULL); + cpus = perf_cpu_map__new_online_cpus(); if (!cpus) return false; diff --git a/tools/perf/util/record.c b/tools/perf/util/record.c index 40290382b2d7..87e817b3cf7e 100644 --- a/tools/perf/util/record.c +++ b/tools/perf/util/record.c @@ -238,7 +238,7 @@ bool evlist__can_select_event(struct evlist *evlist, const char *str) evsel = evlist__last(temp_evlist); if (!evlist || perf_cpu_map__has_any_cpu_or_is_empty(evlist->core.user_requested_cpus)) { - struct perf_cpu_map *cpus = perf_cpu_map__new(NULL); + struct perf_cpu_map *cpus = perf_cpu_map__new_online_cpus(); if (cpus) cpu = perf_cpu_map__cpu(cpus, 0); -- cgit v1.2.3 From 5805c82513c444333efb086017be8d666336858a Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Tue, 28 Nov 2023 22:02:02 -0800 Subject: libperf cpumap: Add for_each_cpu() that skips the "any CPU" case MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When iterating CPUs in a CPU map it is often desirable to skip the "any CPU" (aka dummy) case. Add a helper for this and use in builtin-record. Reviewed-by: James Clark Signed-off-by: Ian Rogers Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Alexandre Ghiti Cc: Andrew Jones Cc: André Almeida Cc: Athira Jajeev Cc: Atish Patra Cc: Changbin Du Cc: Darren Hart Cc: Davidlohr Bueso Cc: Huacai Chen Cc: Ingo Molnar Cc: Jiri Olsa Cc: John Garry Cc: K Prateek Nayak Cc: Kajol Jain Cc: Kan Liang Cc: Leo Yan Cc: Mark Rutland Cc: Mike Leach Cc: Namhyung Kim Cc: Nick Desaulniers Cc: Paolo Bonzini Cc: Paran Lee Cc: Peter Zijlstra Cc: Ravi Bangoria Cc: Sandipan Das Cc: Sean Christopherson Cc: Steinar H. Gunderson Cc: Suzuki Poulouse Cc: Thomas Gleixner Cc: Will Deacon Cc: Yang Jihong Cc: Yang Li Cc: Yanteng Si Cc: bpf@vger.kernel.org Cc: coresight@lists.linaro.org Cc: linux-arm-kernel@lists.infradead.org Link: https://lore.kernel.org/r/20231129060211.1890454-6-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/perf/include/perf/cpumap.h | 6 ++++++ tools/perf/builtin-record.c | 4 +--- 2 files changed, 7 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/lib/perf/include/perf/cpumap.h b/tools/lib/perf/include/perf/cpumap.h index 9cf361fc5edc..dbe0a7352b64 100644 --- a/tools/lib/perf/include/perf/cpumap.h +++ b/tools/lib/perf/include/perf/cpumap.h @@ -64,6 +64,12 @@ LIBPERF_API bool perf_cpu_map__has_any_cpu(const struct perf_cpu_map *map); (idx) < perf_cpu_map__nr(cpus); \ (idx)++, (cpu) = perf_cpu_map__cpu(cpus, idx)) +#define perf_cpu_map__for_each_cpu_skip_any(_cpu, idx, cpus) \ + for ((idx) = 0, (_cpu) = perf_cpu_map__cpu(cpus, idx); \ + (idx) < perf_cpu_map__nr(cpus); \ + (idx)++, (_cpu) = perf_cpu_map__cpu(cpus, idx)) \ + if ((_cpu).cpu != -1) + #define perf_cpu_map__for_each_idx(idx, cpus) \ for ((idx) = 0; (idx) < perf_cpu_map__nr(cpus); (idx)++) diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index eb5a398ddb1d..8a1002683fda 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -3601,9 +3601,7 @@ static int record__mmap_cpu_mask_init(struct mmap_cpu_mask *mask, struct perf_cp if (cpu_map__is_dummy(cpus)) return 0; - perf_cpu_map__for_each_cpu(cpu, idx, cpus) { - if (cpu.cpu == -1) - continue; + perf_cpu_map__for_each_cpu_skip_any(cpu, idx, cpus) { /* Return ENODEV is input cpu is greater than max cpu */ if ((unsigned long)cpu.cpu > mask->nbits) return -ENODEV; -- cgit v1.2.3 From bf857ddd21d0bffc1edafc317e8e2ce0d6d5950c Mon Sep 17 00:00:00 2001 From: "Liam R. Howlett" Date: Wed, 1 Nov 2023 13:16:20 -0400 Subject: maple_tree: move debug check to __mas_set_range() __mas_set_range() was created to shortcut resetting the maple state and a debug check was added to the caller (the vma iterator) to ensure the internal maple state remains safe to use. Move the debug check from the vma iterator into the maple tree itself so other users do not incorrectly use the advanced maple state modification. Fallout from this change include a large amount of debug setup needed to be moved to earlier in the header, and the maple_tree.h radix-tree test code needed to move the inclusion of the header to after the atomic define. None of those changes have functional changes. Link: https://lkml.kernel.org/r/20231101171629.3612299-4-Liam.Howlett@oracle.com Signed-off-by: Liam R. Howlett Cc: Peng Zhang Signed-off-by: Andrew Morton --- include/linux/maple_tree.h | 255 ++++++++++++++-------------- mm/internal.h | 2 - tools/testing/radix-tree/linux/maple_tree.h | 2 +- 3 files changed, 130 insertions(+), 129 deletions(-) (limited to 'tools') diff --git a/include/linux/maple_tree.h b/include/linux/maple_tree.h index a452dd8a1e5c..b5d5992578c9 100644 --- a/include/linux/maple_tree.h +++ b/include/linux/maple_tree.h @@ -557,6 +557,131 @@ static inline void mas_reset(struct ma_state *mas) */ #define mas_for_each(__mas, __entry, __max) \ while (((__entry) = mas_find((__mas), (__max))) != NULL) + +#ifdef CONFIG_DEBUG_MAPLE_TREE +enum mt_dump_format { + mt_dump_dec, + mt_dump_hex, +}; + +extern atomic_t maple_tree_tests_run; +extern atomic_t maple_tree_tests_passed; + +void mt_dump(const struct maple_tree *mt, enum mt_dump_format format); +void mas_dump(const struct ma_state *mas); +void mas_wr_dump(const struct ma_wr_state *wr_mas); +void mt_validate(struct maple_tree *mt); +void mt_cache_shrink(void); +#define MT_BUG_ON(__tree, __x) do { \ + atomic_inc(&maple_tree_tests_run); \ + if (__x) { \ + pr_info("BUG at %s:%d (%u)\n", \ + __func__, __LINE__, __x); \ + mt_dump(__tree, mt_dump_hex); \ + pr_info("Pass: %u Run:%u\n", \ + atomic_read(&maple_tree_tests_passed), \ + atomic_read(&maple_tree_tests_run)); \ + dump_stack(); \ + } else { \ + atomic_inc(&maple_tree_tests_passed); \ + } \ +} while (0) + +#define MAS_BUG_ON(__mas, __x) do { \ + atomic_inc(&maple_tree_tests_run); \ + if (__x) { \ + pr_info("BUG at %s:%d (%u)\n", \ + __func__, __LINE__, __x); \ + mas_dump(__mas); \ + mt_dump((__mas)->tree, mt_dump_hex); \ + pr_info("Pass: %u Run:%u\n", \ + atomic_read(&maple_tree_tests_passed), \ + atomic_read(&maple_tree_tests_run)); \ + dump_stack(); \ + } else { \ + atomic_inc(&maple_tree_tests_passed); \ + } \ +} while (0) + +#define MAS_WR_BUG_ON(__wrmas, __x) do { \ + atomic_inc(&maple_tree_tests_run); \ + if (__x) { \ + pr_info("BUG at %s:%d (%u)\n", \ + __func__, __LINE__, __x); \ + mas_wr_dump(__wrmas); \ + mas_dump((__wrmas)->mas); \ + mt_dump((__wrmas)->mas->tree, mt_dump_hex); \ + pr_info("Pass: %u Run:%u\n", \ + atomic_read(&maple_tree_tests_passed), \ + atomic_read(&maple_tree_tests_run)); \ + dump_stack(); \ + } else { \ + atomic_inc(&maple_tree_tests_passed); \ + } \ +} while (0) + +#define MT_WARN_ON(__tree, __x) ({ \ + int ret = !!(__x); \ + atomic_inc(&maple_tree_tests_run); \ + if (ret) { \ + pr_info("WARN at %s:%d (%u)\n", \ + __func__, __LINE__, __x); \ + mt_dump(__tree, mt_dump_hex); \ + pr_info("Pass: %u Run:%u\n", \ + atomic_read(&maple_tree_tests_passed), \ + atomic_read(&maple_tree_tests_run)); \ + dump_stack(); \ + } else { \ + atomic_inc(&maple_tree_tests_passed); \ + } \ + unlikely(ret); \ +}) + +#define MAS_WARN_ON(__mas, __x) ({ \ + int ret = !!(__x); \ + atomic_inc(&maple_tree_tests_run); \ + if (ret) { \ + pr_info("WARN at %s:%d (%u)\n", \ + __func__, __LINE__, __x); \ + mas_dump(__mas); \ + mt_dump((__mas)->tree, mt_dump_hex); \ + pr_info("Pass: %u Run:%u\n", \ + atomic_read(&maple_tree_tests_passed), \ + atomic_read(&maple_tree_tests_run)); \ + dump_stack(); \ + } else { \ + atomic_inc(&maple_tree_tests_passed); \ + } \ + unlikely(ret); \ +}) + +#define MAS_WR_WARN_ON(__wrmas, __x) ({ \ + int ret = !!(__x); \ + atomic_inc(&maple_tree_tests_run); \ + if (ret) { \ + pr_info("WARN at %s:%d (%u)\n", \ + __func__, __LINE__, __x); \ + mas_wr_dump(__wrmas); \ + mas_dump((__wrmas)->mas); \ + mt_dump((__wrmas)->mas->tree, mt_dump_hex); \ + pr_info("Pass: %u Run:%u\n", \ + atomic_read(&maple_tree_tests_passed), \ + atomic_read(&maple_tree_tests_run)); \ + dump_stack(); \ + } else { \ + atomic_inc(&maple_tree_tests_passed); \ + } \ + unlikely(ret); \ +}) +#else +#define MT_BUG_ON(__tree, __x) BUG_ON(__x) +#define MAS_BUG_ON(__mas, __x) BUG_ON(__x) +#define MAS_WR_BUG_ON(__mas, __x) BUG_ON(__x) +#define MT_WARN_ON(__tree, __x) WARN_ON(__x) +#define MAS_WARN_ON(__mas, __x) WARN_ON(__x) +#define MAS_WR_WARN_ON(__mas, __x) WARN_ON(__x) +#endif /* CONFIG_DEBUG_MAPLE_TREE */ + /** * __mas_set_range() - Set up Maple Tree operation state to a sub-range of the * current location. @@ -570,6 +695,9 @@ static inline void mas_reset(struct ma_state *mas) static inline void __mas_set_range(struct ma_state *mas, unsigned long start, unsigned long last) { + /* Ensure the range starts within the current slot */ + MAS_WARN_ON(mas, mas_is_active(mas) && + (mas->index > start || mas->last < start)); mas->index = start; mas->last = last; } @@ -587,8 +715,8 @@ static inline void __mas_set_range(struct ma_state *mas, unsigned long start, static inline void mas_set_range(struct ma_state *mas, unsigned long start, unsigned long last) { - __mas_set_range(mas, start, last); mas->node = MAS_START; + __mas_set_range(mas, start, last); } /** @@ -713,129 +841,4 @@ void *mt_next(struct maple_tree *mt, unsigned long index, unsigned long max); for (__entry = mt_find(__tree, &(__index), __max); \ __entry; __entry = mt_find_after(__tree, &(__index), __max)) - -#ifdef CONFIG_DEBUG_MAPLE_TREE -enum mt_dump_format { - mt_dump_dec, - mt_dump_hex, -}; - -extern atomic_t maple_tree_tests_run; -extern atomic_t maple_tree_tests_passed; - -void mt_dump(const struct maple_tree *mt, enum mt_dump_format format); -void mas_dump(const struct ma_state *mas); -void mas_wr_dump(const struct ma_wr_state *wr_mas); -void mt_validate(struct maple_tree *mt); -void mt_cache_shrink(void); -#define MT_BUG_ON(__tree, __x) do { \ - atomic_inc(&maple_tree_tests_run); \ - if (__x) { \ - pr_info("BUG at %s:%d (%u)\n", \ - __func__, __LINE__, __x); \ - mt_dump(__tree, mt_dump_hex); \ - pr_info("Pass: %u Run:%u\n", \ - atomic_read(&maple_tree_tests_passed), \ - atomic_read(&maple_tree_tests_run)); \ - dump_stack(); \ - } else { \ - atomic_inc(&maple_tree_tests_passed); \ - } \ -} while (0) - -#define MAS_BUG_ON(__mas, __x) do { \ - atomic_inc(&maple_tree_tests_run); \ - if (__x) { \ - pr_info("BUG at %s:%d (%u)\n", \ - __func__, __LINE__, __x); \ - mas_dump(__mas); \ - mt_dump((__mas)->tree, mt_dump_hex); \ - pr_info("Pass: %u Run:%u\n", \ - atomic_read(&maple_tree_tests_passed), \ - atomic_read(&maple_tree_tests_run)); \ - dump_stack(); \ - } else { \ - atomic_inc(&maple_tree_tests_passed); \ - } \ -} while (0) - -#define MAS_WR_BUG_ON(__wrmas, __x) do { \ - atomic_inc(&maple_tree_tests_run); \ - if (__x) { \ - pr_info("BUG at %s:%d (%u)\n", \ - __func__, __LINE__, __x); \ - mas_wr_dump(__wrmas); \ - mas_dump((__wrmas)->mas); \ - mt_dump((__wrmas)->mas->tree, mt_dump_hex); \ - pr_info("Pass: %u Run:%u\n", \ - atomic_read(&maple_tree_tests_passed), \ - atomic_read(&maple_tree_tests_run)); \ - dump_stack(); \ - } else { \ - atomic_inc(&maple_tree_tests_passed); \ - } \ -} while (0) - -#define MT_WARN_ON(__tree, __x) ({ \ - int ret = !!(__x); \ - atomic_inc(&maple_tree_tests_run); \ - if (ret) { \ - pr_info("WARN at %s:%d (%u)\n", \ - __func__, __LINE__, __x); \ - mt_dump(__tree, mt_dump_hex); \ - pr_info("Pass: %u Run:%u\n", \ - atomic_read(&maple_tree_tests_passed), \ - atomic_read(&maple_tree_tests_run)); \ - dump_stack(); \ - } else { \ - atomic_inc(&maple_tree_tests_passed); \ - } \ - unlikely(ret); \ -}) - -#define MAS_WARN_ON(__mas, __x) ({ \ - int ret = !!(__x); \ - atomic_inc(&maple_tree_tests_run); \ - if (ret) { \ - pr_info("WARN at %s:%d (%u)\n", \ - __func__, __LINE__, __x); \ - mas_dump(__mas); \ - mt_dump((__mas)->tree, mt_dump_hex); \ - pr_info("Pass: %u Run:%u\n", \ - atomic_read(&maple_tree_tests_passed), \ - atomic_read(&maple_tree_tests_run)); \ - dump_stack(); \ - } else { \ - atomic_inc(&maple_tree_tests_passed); \ - } \ - unlikely(ret); \ -}) - -#define MAS_WR_WARN_ON(__wrmas, __x) ({ \ - int ret = !!(__x); \ - atomic_inc(&maple_tree_tests_run); \ - if (ret) { \ - pr_info("WARN at %s:%d (%u)\n", \ - __func__, __LINE__, __x); \ - mas_wr_dump(__wrmas); \ - mas_dump((__wrmas)->mas); \ - mt_dump((__wrmas)->mas->tree, mt_dump_hex); \ - pr_info("Pass: %u Run:%u\n", \ - atomic_read(&maple_tree_tests_passed), \ - atomic_read(&maple_tree_tests_run)); \ - dump_stack(); \ - } else { \ - atomic_inc(&maple_tree_tests_passed); \ - } \ - unlikely(ret); \ -}) -#else -#define MT_BUG_ON(__tree, __x) BUG_ON(__x) -#define MAS_BUG_ON(__mas, __x) BUG_ON(__x) -#define MAS_WR_BUG_ON(__mas, __x) BUG_ON(__x) -#define MT_WARN_ON(__tree, __x) WARN_ON(__x) -#define MAS_WARN_ON(__mas, __x) WARN_ON(__x) -#define MAS_WR_WARN_ON(__mas, __x) WARN_ON(__x) -#endif /* CONFIG_DEBUG_MAPLE_TREE */ - #endif /*_LINUX_MAPLE_TREE_H */ diff --git a/mm/internal.h b/mm/internal.h index 2bc9ff8db393..0005b8adbd5c 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -1135,8 +1135,6 @@ static inline bool vma_soft_dirty_enabled(struct vm_area_struct *vma) static inline void vma_iter_config(struct vma_iterator *vmi, unsigned long index, unsigned long last) { - MAS_BUG_ON(&vmi->mas, vmi->mas.node != MAS_START && - (vmi->mas.index > index || vmi->mas.last < index)); __mas_set_range(&vmi->mas, index, last - 1); } diff --git a/tools/testing/radix-tree/linux/maple_tree.h b/tools/testing/radix-tree/linux/maple_tree.h index 7d8d1f445b89..06c89bdcc515 100644 --- a/tools/testing/radix-tree/linux/maple_tree.h +++ b/tools/testing/radix-tree/linux/maple_tree.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0+ */ #define atomic_t int32_t -#include "../../../../include/linux/maple_tree.h" #define atomic_inc(x) uatomic_inc(x) #define atomic_read(x) uatomic_read(x) #define atomic_set(x, y) do {} while (0) #define U8_MAX UCHAR_MAX +#include "../../../../include/linux/maple_tree.h" -- cgit v1.2.3 From 31c532a8af57513228c2b12d281104198ff412b8 Mon Sep 17 00:00:00 2001 From: "Liam R. Howlett" Date: Wed, 1 Nov 2023 13:16:21 -0400 Subject: maple_tree: add end of node tracking to the maple state Analysis of the mas_for_each() iteration showed that there is a significant time spent finding the end of a node. This time can be greatly reduced if the end of the node is cached in the maple state. Care must be taken to update & invalidate as necessary. Link: https://lkml.kernel.org/r/20231101171629.3612299-5-Liam.Howlett@oracle.com Signed-off-by: Liam R. Howlett Cc: Peng Zhang Signed-off-by: Andrew Morton --- include/linux/maple_tree.h | 1 + lib/maple_tree.c | 7 +++++++ tools/testing/radix-tree/maple.c | 1 + 3 files changed, 9 insertions(+) (limited to 'tools') diff --git a/include/linux/maple_tree.h b/include/linux/maple_tree.h index b5d5992578c9..0b82efe0cf1e 100644 --- a/include/linux/maple_tree.h +++ b/include/linux/maple_tree.h @@ -393,6 +393,7 @@ struct ma_state { unsigned char depth; /* depth of tree descent during write */ unsigned char offset; unsigned char mas_flags; + unsigned char end; /* The end of the node */ }; struct ma_wr_state { diff --git a/lib/maple_tree.c b/lib/maple_tree.c index 8d379d34ea0a..ea0a36341fed 100644 --- a/lib/maple_tree.c +++ b/lib/maple_tree.c @@ -2841,6 +2841,7 @@ next: goto dead_node; } while (!ma_is_leaf(type)); + mas->end = end; mas->offset = offset; mas->index = min; mas->last = max; @@ -3507,6 +3508,7 @@ static noinline_for_kasan int mas_commit_b_node(struct ma_wr_state *wr_mas, mas_replace_node(wr_mas->mas, old_enode); reuse_node: mas_update_gap(wr_mas->mas); + wr_mas->mas->end = b_end; return 1; } @@ -4010,6 +4012,7 @@ done: } trace_ma_write(__func__, mas, 0, wr_mas->entry); mas_update_gap(mas); + mas->end = new_end; return true; } @@ -4190,6 +4193,7 @@ static inline bool mas_wr_append(struct ma_wr_state *wr_mas, if (!wr_mas->content || !wr_mas->entry) mas_update_gap(mas); + mas->end = new_end; trace_ma_write(__func__, mas, new_end, wr_mas->entry); return true; } @@ -4428,6 +4432,7 @@ static inline int mas_prev_node(struct ma_state *mas, unsigned long min) if (unlikely(mte_dead_node(mas->node))) return 1; + mas->end = mas->offset; return 0; no_entry: @@ -5074,6 +5079,7 @@ int mas_empty_area(struct ma_state *mas, unsigned long min, if (mas->index < min) mas->index = min; mas->last = mas->index + size - 1; + mas->end = mas_data_end(mas); return 0; } EXPORT_SYMBOL_GPL(mas_empty_area); @@ -5134,6 +5140,7 @@ int mas_empty_area_rev(struct ma_state *mas, unsigned long min, mas->last = max; mas->index = mas->last - size + 1; + mas->end = mas_data_end(mas); return 0; } EXPORT_SYMBOL_GPL(mas_empty_area_rev); diff --git a/tools/testing/radix-tree/maple.c b/tools/testing/radix-tree/maple.c index cb5358674521..7095fb0ec026 100644 --- a/tools/testing/radix-tree/maple.c +++ b/tools/testing/radix-tree/maple.c @@ -945,6 +945,7 @@ retry: goto retry; } + mas->end = mas_data_end(mas); return ret; not_found: -- cgit v1.2.3 From 067311d33e650adfe7ae23765959ddcc1ba18510 Mon Sep 17 00:00:00 2001 From: "Liam R. Howlett" Date: Wed, 1 Nov 2023 13:16:25 -0400 Subject: maple_tree: separate ma_state node from status The maple tree node is overloaded to keep status as well as the active node. This, unfortunately, results in a re-walk on underflow or overflow. Since the maple state has room, the status can be placed in its own enum in the structure. Once an underflow/overflow is detected, certain modes can restore the status to active and others may need to re-walk just that one node to see the entry. The status being an enum has the benefit of detecting unhandled status in switch statements. [Liam.Howlett@oracle.com: fix comments about MAS_*] Link: https://lkml.kernel.org/r/20231106154124.614247-1-Liam.Howlett@oracle.com [Liam.Howlett@oracle.com: update forking to separate maple state and node] Link: https://lkml.kernel.org/r/20231106154551.615042-1-Liam.Howlett@oracle.com [Liam.Howlett@oracle.com: fix mas_prev() state separation code] Link: https://lkml.kernel.org/r/20231207193319.4025462-1-Liam.Howlett@oracle.com Link: https://lkml.kernel.org/r/20231101171629.3612299-9-Liam.Howlett@oracle.com Signed-off-by: Liam R. Howlett Cc: Peng Zhang Signed-off-by: Andrew Morton --- include/linux/maple_tree.h | 87 ++++---- include/linux/mm_types.h | 3 +- lib/maple_tree.c | 459 +++++++++++++++++++++++---------------- lib/test_maple_tree.c | 189 ++++++++-------- mm/internal.h | 8 +- tools/testing/radix-tree/maple.c | 26 ++- 6 files changed, 445 insertions(+), 327 deletions(-) (limited to 'tools') diff --git a/include/linux/maple_tree.h b/include/linux/maple_tree.h index 0b82efe0cf1e..4dd668f7b111 100644 --- a/include/linux/maple_tree.h +++ b/include/linux/maple_tree.h @@ -349,6 +349,36 @@ static inline bool mtree_empty(const struct maple_tree *mt) /* Advanced API */ +/* + * Maple State Status + * ma_active means the maple state is pointing to a node and offset and can + * continue operating on the tree. + * ma_start means we have not searched the tree. + * ma_root means we have searched the tree and the entry we found lives in + * the root of the tree (ie it has index 0, length 1 and is the only entry in + * the tree). + * ma_none means we have searched the tree and there is no node in the + * tree for this entry. For example, we searched for index 1 in an empty + * tree. Or we have a tree which points to a full leaf node and we + * searched for an entry which is larger than can be contained in that + * leaf node. + * ma_pause means the data within the maple state may be stale, restart the + * operation + * ma_overflow means the search has reached the upper limit of the search + * ma_underflow means the search has reached the lower limit of the search + * ma_error means there was an error, check the node for the error number. + */ +enum maple_status { + ma_active, + ma_start, + ma_root, + ma_none, + ma_pause, + ma_overflow, + ma_underflow, + ma_error, +}; + /* * The maple state is defined in the struct ma_state and is used to keep track * of information during operations, and even between operations when using the @@ -381,6 +411,13 @@ static inline bool mtree_empty(const struct maple_tree *mt) * When returning a value the maple state index and last respectively contain * the start and end of the range for the entry. Ranges are inclusive in the * Maple Tree. + * + * The status of the state is used to determine how the next action should treat + * the state. For instance, if the status is ma_start then the next action + * should start at the root of the tree and walk down. If the status is + * ma_pause then the node may be stale data and should be discarded. If the + * status is ma_overflow, then the last action hit the upper limit. + * */ struct ma_state { struct maple_tree *tree; /* The tree we're operating in */ @@ -390,6 +427,7 @@ struct ma_state { unsigned long min; /* The minimum index of this node - implied pivot min */ unsigned long max; /* The maximum index of this node - implied pivot max */ struct maple_alloc *alloc; /* Allocated nodes for this operation */ + enum maple_status status; /* The status of the state (active, start, none, etc) */ unsigned char depth; /* depth of tree descent during write */ unsigned char offset; unsigned char mas_flags; @@ -416,28 +454,12 @@ struct ma_wr_state { spin_lock_nested(&((mas)->tree->ma_lock), subclass) #define mas_unlock(mas) spin_unlock(&((mas)->tree->ma_lock)) - /* * Special values for ma_state.node. - * MAS_START means we have not searched the tree. - * MAS_ROOT means we have searched the tree and the entry we found lives in - * the root of the tree (ie it has index 0, length 1 and is the only entry in - * the tree). - * MAS_NONE means we have searched the tree and there is no node in the - * tree for this entry. For example, we searched for index 1 in an empty - * tree. Or we have a tree which points to a full leaf node and we - * searched for an entry which is larger than can be contained in that - * leaf node. * MA_ERROR represents an errno. After dropping the lock and attempting * to resolve the error, the walk would have to be restarted from the * top of the tree as the tree may have been modified. */ -#define MAS_START ((struct maple_enode *)1UL) -#define MAS_ROOT ((struct maple_enode *)5UL) -#define MAS_NONE ((struct maple_enode *)9UL) -#define MAS_PAUSE ((struct maple_enode *)17UL) -#define MAS_OVERFLOW ((struct maple_enode *)33UL) -#define MAS_UNDERFLOW ((struct maple_enode *)65UL) #define MA_ERROR(err) \ ((struct maple_enode *)(((unsigned long)err << 2) | 2UL)) @@ -446,7 +468,8 @@ struct ma_wr_state { .tree = mt, \ .index = first, \ .last = end, \ - .node = MAS_START, \ + .node = NULL, \ + .status = ma_start, \ .min = 0, \ .max = ULONG_MAX, \ .alloc = NULL, \ @@ -477,7 +500,6 @@ void *mas_find_range(struct ma_state *mas, unsigned long max); void *mas_find_rev(struct ma_state *mas, unsigned long min); void *mas_find_range_rev(struct ma_state *mas, unsigned long max); int mas_preallocate(struct ma_state *mas, void *entry, gfp_t gfp); -bool mas_is_err(struct ma_state *mas); bool mas_nomem(struct ma_state *mas, gfp_t gfp); void mas_pause(struct ma_state *mas); @@ -506,28 +528,18 @@ static inline void mas_init(struct ma_state *mas, struct maple_tree *tree, mas->tree = tree; mas->index = mas->last = addr; mas->max = ULONG_MAX; - mas->node = MAS_START; + mas->status = ma_start; + mas->node = NULL; } -/* Checks if a mas has not found anything */ -static inline bool mas_is_none(const struct ma_state *mas) -{ - return mas->node == MAS_NONE; -} - -/* Checks if a mas has been paused */ -static inline bool mas_is_paused(const struct ma_state *mas) +static inline bool mas_is_active(struct ma_state *mas) { - return mas->node == MAS_PAUSE; + return mas->status == ma_active; } -/* Check if the mas is pointing to a node or not */ -static inline bool mas_is_active(struct ma_state *mas) +static inline bool mas_is_err(struct ma_state *mas) { - if ((unsigned long)mas->node >= MAPLE_RESERVED_RANGE) - return true; - - return false; + return mas->status == ma_error; } /** @@ -540,9 +552,10 @@ static inline bool mas_is_active(struct ma_state *mas) * * Context: Any context. */ -static inline void mas_reset(struct ma_state *mas) +static __always_inline void mas_reset(struct ma_state *mas) { - mas->node = MAS_START; + mas->status = ma_start; + mas->node = NULL; } /** @@ -716,7 +729,7 @@ static inline void __mas_set_range(struct ma_state *mas, unsigned long start, static inline void mas_set_range(struct ma_state *mas, unsigned long start, unsigned long last) { - mas->node = MAS_START; + mas_reset(mas); __mas_set_range(mas, start, last); } diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index ef18d2b25378..a66534c78c4d 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -1071,7 +1071,8 @@ struct vma_iterator { .mas = { \ .tree = &(__mm)->mm_mt, \ .index = __addr, \ - .node = MAS_START, \ + .node = NULL, \ + .status = ma_start, \ }, \ } diff --git a/lib/maple_tree.c b/lib/maple_tree.c index f0d2aea91351..187a9796188e 100644 --- a/lib/maple_tree.c +++ b/lib/maple_tree.c @@ -249,40 +249,40 @@ static __always_inline bool mt_is_reserved(const void *entry) xa_is_internal(entry); } -static inline void mas_set_err(struct ma_state *mas, long err) +static __always_inline void mas_set_err(struct ma_state *mas, long err) { mas->node = MA_ERROR(err); + mas->status = ma_error; } -static inline bool mas_is_ptr(const struct ma_state *mas) +static __always_inline bool mas_is_ptr(const struct ma_state *mas) { - return mas->node == MAS_ROOT; + return mas->status == ma_root; } -static inline bool mas_is_start(const struct ma_state *mas) +static __always_inline bool mas_is_start(const struct ma_state *mas) { - return mas->node == MAS_START; + return mas->status == ma_start; } -bool mas_is_err(struct ma_state *mas) +static __always_inline bool mas_is_none(const struct ma_state *mas) { - return xa_is_err(mas->node); + return mas->status == ma_none; } -static __always_inline bool mas_is_overflow(struct ma_state *mas) +static __always_inline bool mas_is_paused(const struct ma_state *mas) { - if (unlikely(mas->node == MAS_OVERFLOW)) - return true; - - return false; + return mas->status == ma_pause; } -static __always_inline bool mas_is_underflow(struct ma_state *mas) +static __always_inline bool mas_is_overflow(struct ma_state *mas) { - if (unlikely(mas->node == MAS_UNDERFLOW)) - return true; + return mas->status == ma_overflow; +} - return false; +static inline bool mas_is_underflow(struct ma_state *mas) +{ + return mas->status == ma_underflow; } static inline bool mas_searchable(struct ma_state *mas) @@ -1274,6 +1274,7 @@ static inline void mas_alloc_nodes(struct ma_state *mas, gfp_t gfp) if (mas->mas_flags & MA_STATE_PREALLOC) { if (allocated) return; + BUG_ON(!allocated); WARN_ON(!allocated); } @@ -1379,14 +1380,14 @@ static void mas_node_count(struct ma_state *mas, int count) * mas_start() - Sets up maple state for operations. * @mas: The maple state. * - * If mas->node == MAS_START, then set the min, max and depth to + * If mas->status == mas_start, then set the min, max and depth to * defaults. * * Return: - * - If mas->node is an error or not MAS_START, return NULL. - * - If it's an empty tree: NULL & mas->node == MAS_NONE - * - If it's a single entry: The entry & mas->node == MAS_ROOT - * - If it's a tree: NULL & mas->node == safe root node. + * - If mas->node is an error or not mas_start, return NULL. + * - If it's an empty tree: NULL & mas->status == ma_none + * - If it's a single entry: The entry & mas->status == mas_root + * - If it's a tree: NULL & mas->status == safe root node. */ static inline struct maple_enode *mas_start(struct ma_state *mas) { @@ -1402,6 +1403,7 @@ retry: /* Tree with nodes */ if (likely(xa_is_node(root))) { mas->depth = 1; + mas->status = ma_active; mas->node = mte_safe_root(root); mas->offset = 0; if (mte_dead_node(mas->node)) @@ -1412,13 +1414,14 @@ retry: /* empty tree */ if (unlikely(!root)) { - mas->node = MAS_NONE; + mas->node = NULL; + mas->status = ma_none; mas->offset = MAPLE_NODE_SLOTS; return NULL; } /* Single entry tree */ - mas->node = MAS_ROOT; + mas->status = ma_root; mas->offset = MAPLE_NODE_SLOTS; /* Single entry tree. */ @@ -2225,19 +2228,21 @@ static inline bool mas_next_sibling(struct ma_state *mas) } /* - * mte_node_or_node() - Return the encoded node or MAS_NONE. + * mte_node_or_none() - Set the enode and state. * @enode: The encoded maple node. * - * Shorthand to avoid setting %NULLs in the tree or maple_subtree_state. - * - * Return: @enode or MAS_NONE + * Set the node to the enode and the status. */ -static inline struct maple_enode *mte_node_or_none(struct maple_enode *enode) +static inline void mas_node_or_none(struct ma_state *mas, + struct maple_enode *enode) { - if (enode) - return enode; - - return ma_enode_ptr(MAS_NONE); + if (enode) { + mas->node = enode; + mas->status = ma_active; + } else { + mas->node = NULL; + mas->status = ma_none; + } } /* @@ -2557,13 +2562,15 @@ static inline void mast_set_split_parents(struct maple_subtree_state *mast, * The node will either be RCU freed or pushed back on the maple state. */ static inline void mas_topiary_node(struct ma_state *mas, - struct maple_enode *enode, bool in_rcu) + struct ma_state *tmp_mas, bool in_rcu) { struct maple_node *tmp; + struct maple_enode *enode; - if (enode == MAS_NONE) + if (mas_is_none(tmp_mas)) return; + enode = tmp_mas->node; tmp = mte_to_node(enode); mte_set_node_dead(enode); if (in_rcu) @@ -2603,8 +2610,8 @@ static inline void mas_topiary_replace(struct ma_state *mas, /* Update the parent pointers in the tree */ tmp[0] = *mas; tmp[0].offset = 0; - tmp[1].node = MAS_NONE; - tmp[2].node = MAS_NONE; + tmp[1].status = ma_none; + tmp[2].status = ma_none; while (!mte_is_leaf(tmp[0].node)) { n = 0; for (i = 0; i < 3; i++) { @@ -2624,7 +2631,7 @@ static inline void mas_topiary_replace(struct ma_state *mas, break; while (n < 3) - tmp_next[n++].node = MAS_NONE; + tmp_next[n++].status = ma_none; for (i = 0; i < 3; i++) tmp[i] = tmp_next[i]; @@ -2637,8 +2644,8 @@ static inline void mas_topiary_replace(struct ma_state *mas, tmp[0] = *mas; tmp[0].offset = 0; tmp[0].node = old_enode; - tmp[1].node = MAS_NONE; - tmp[2].node = MAS_NONE; + tmp[1].status = ma_none; + tmp[2].status = ma_none; in_rcu = mt_in_rcu(mas->tree); do { n = 0; @@ -2653,7 +2660,7 @@ static inline void mas_topiary_replace(struct ma_state *mas, if ((tmp_next[n].min >= tmp_next->index) && (tmp_next[n].max <= tmp_next->last)) { mat_add(&subtrees, tmp_next[n].node); - tmp_next[n].node = MAS_NONE; + tmp_next[n].status = ma_none; } else { n++; } @@ -2664,16 +2671,16 @@ static inline void mas_topiary_replace(struct ma_state *mas, break; while (n < 3) - tmp_next[n++].node = MAS_NONE; + tmp_next[n++].status = ma_none; for (i = 0; i < 3; i++) { - mas_topiary_node(mas, tmp[i].node, in_rcu); + mas_topiary_node(mas, &tmp[i], in_rcu); tmp[i] = tmp_next[i]; } } while (!mte_is_leaf(tmp[0].node)); for (i = 0; i < 3; i++) - mas_topiary_node(mas, tmp[i].node, in_rcu); + mas_topiary_node(mas, &tmp[i], in_rcu); mas_mat_destroy(mas, &subtrees); } @@ -2712,9 +2719,9 @@ static inline void mast_cp_to_nodes(struct maple_subtree_state *mast, { bool new_lmax = true; - mast->l->node = mte_node_or_none(left); - mast->m->node = mte_node_or_none(middle); - mast->r->node = mte_node_or_none(right); + mas_node_or_none(mast->l, left); + mas_node_or_none(mast->m, middle); + mas_node_or_none(mast->r, right); mast->l->min = mast->orig_l->min; if (split == mast->bn->b_end) { @@ -2894,7 +2901,7 @@ static int mas_spanning_rebalance(struct ma_state *mas, mast->l = &l_mas; mast->m = &m_mas; mast->r = &r_mas; - l_mas.node = r_mas.node = m_mas.node = MAS_NONE; + l_mas.status = r_mas.status = m_mas.status = ma_none; /* Check if this is not root and has sufficient data. */ if (((mast->orig_l->min != 0) || (mast->orig_r->max != ULONG_MAX)) && @@ -3421,7 +3428,6 @@ static int mas_split(struct ma_state *mas, struct maple_big_node *b_node) /* Try to push left. */ if (mas_push_data(mas, height, &mast, true)) break; - /* Try to push right. */ if (mas_push_data(mas, height, &mast, false)) break; @@ -3537,6 +3543,7 @@ static inline int mas_root_expand(struct ma_state *mas, void *entry) slots = ma_slots(node, type); node->parent = ma_parent_ptr(mas_tree_parent(mas)); mas->node = mt_mk_node(node, type); + mas->status = ma_active; if (mas->index) { if (contents) { @@ -3569,7 +3576,7 @@ static inline void mas_store_root(struct ma_state *mas, void *entry) mas_root_expand(mas, entry); else { rcu_assign_pointer(mas->tree->ma_root, entry); - mas->node = MAS_START; + mas->status = ma_start; } } @@ -3801,7 +3808,7 @@ static inline int mas_new_root(struct ma_state *mas, void *entry) mas->depth = 0; mas_set_height(mas); rcu_assign_pointer(mas->tree->ma_root, entry); - mas->node = MAS_START; + mas->status = ma_start; goto done; } @@ -3814,6 +3821,7 @@ static inline int mas_new_root(struct ma_state *mas, void *entry) slots = ma_slots(node, type); node->parent = ma_parent_ptr(mas_tree_parent(mas)); mas->node = mt_mk_node(node, type); + mas->status = ma_active; rcu_assign_pointer(slots[0], entry); pivots[0] = mas->last; mas->depth = 1; @@ -4367,11 +4375,13 @@ static __always_inline bool mas_rewalk_if_dead(struct ma_state *mas, /* * mas_prev_node() - Find the prev non-null entry at the same level in the - * tree. The prev value will be mas->node[mas->offset] or MAS_NONE. + * tree. The prev value will be mas->node[mas->offset] or the status will be + * ma_none. * @mas: The maple state * @min: The lower limit to search * - * The prev node value will be mas->node[mas->offset] or MAS_NONE. + * The prev node value will be mas->node[mas->offset] or the status will be + * ma_none. * Return: 1 if the node is dead, 0 otherwise. */ static int mas_prev_node(struct ma_state *mas, unsigned long min) @@ -4441,7 +4451,7 @@ no_entry: if (unlikely(ma_dead_node(node))) return 1; - mas->node = MAS_NONE; + mas->status = ma_underflow; return 0; } @@ -4455,8 +4465,7 @@ no_entry: * * Return: The entry in the previous slot which is possibly NULL */ -static void *mas_prev_slot(struct ma_state *mas, unsigned long min, bool empty, - bool set_underflow) +static void *mas_prev_slot(struct ma_state *mas, unsigned long min, bool empty) { void *entry; void __rcu **slots; @@ -4489,13 +4498,16 @@ again: mas->last = mas->index - 1; mas->index = mas_safe_min(mas, pivots, mas->offset); } else { + if (mas->index <= min) + goto underflow; + if (mas_prev_node(mas, min)) { mas_rewalk(mas, save_point); goto retry; } - if (mas_is_none(mas)) - goto underflow; + if (WARN_ON_ONCE(mas_is_underflow(mas))) + return NULL; mas->last = mas->max; node = mas_mn(mas); @@ -4509,12 +4521,15 @@ again: if (unlikely(mas_rewalk_if_dead(mas, node, save_point))) goto retry; + if (likely(entry)) return entry; if (!empty) { - if (mas->index <= min) - goto underflow; + if (mas->index <= min) { + mas->status = ma_underflow; + return NULL; + } goto again; } @@ -4522,8 +4537,7 @@ again: return entry; underflow: - if (set_underflow) - mas->node = MAS_UNDERFLOW; + mas->status = ma_underflow; return NULL; } @@ -4532,7 +4546,8 @@ underflow: * @mas: The maple state * @max: The maximum pivot value to check. * - * The next value will be mas->node[mas->offset] or MAS_NONE. + * The next value will be mas->node[mas->offset] or the status will have + * overflowed. * Return: 1 on dead node, 0 otherwise. */ static int mas_next_node(struct ma_state *mas, struct maple_node *node, @@ -4548,13 +4563,13 @@ static int mas_next_node(struct ma_state *mas, struct maple_node *node, void __rcu **slots; if (mas->max >= max) - goto no_entry; + goto overflow; min = mas->max + 1; level = 0; do { if (ma_is_root(node)) - goto no_entry; + goto overflow; /* Walk up. */ if (unlikely(mas_ascend(mas))) @@ -4605,11 +4620,11 @@ static int mas_next_node(struct ma_state *mas, struct maple_node *node, mas->min = min; return 0; -no_entry: +overflow: if (unlikely(ma_dead_node(node))) return 1; - mas->node = MAS_NONE; + mas->status = ma_overflow; return 0; } @@ -4624,8 +4639,7 @@ no_entry: * * Return: The entry in the next slot which is possibly NULL */ -static void *mas_next_slot(struct ma_state *mas, unsigned long max, bool empty, - bool set_overflow) +static void *mas_next_slot(struct ma_state *mas, unsigned long max, bool empty) { void __rcu **slots; unsigned long *pivots; @@ -4646,13 +4660,15 @@ retry: if (likely(mas->offset < mas->end)) pivot = pivots[mas->offset]; else - goto overflow; + pivot = mas->max; if (unlikely(mas_rewalk_if_dead(mas, node, save_point))) goto retry; - if (pivot >= max) - goto overflow; + if (pivot >= max) { /* Was at the limit, next will extend beyond */ + mas->status = ma_overflow; + return NULL; + } } if (likely(mas->offset < mas->end)) { @@ -4664,16 +4680,18 @@ again: else mas->last = mas->max; } else { + if (mas->last >= max) { + mas->status = ma_overflow; + return NULL; + } + if (mas_next_node(mas, node, max)) { mas_rewalk(mas, save_point); goto retry; } - if (WARN_ON_ONCE(mas_is_none(mas))) { - mas->node = MAS_OVERFLOW; + if (WARN_ON_ONCE(mas_is_overflow(mas))) return NULL; - goto overflow; - } mas->offset = 0; mas->index = mas->min; @@ -4691,20 +4709,18 @@ again: if (entry) return entry; + if (!empty) { - if (mas->last >= max) - goto overflow; + if (mas->last >= max) { + mas->status = ma_overflow; + return NULL; + } mas->index = mas->last + 1; goto again; } return entry; - -overflow: - if (set_overflow) - mas->node = MAS_OVERFLOW; - return NULL; } /* @@ -4723,11 +4739,11 @@ overflow: static inline void *mas_next_entry(struct ma_state *mas, unsigned long limit) { if (mas->last >= limit) { - mas->node = MAS_OVERFLOW; + mas->status = ma_overflow; return NULL; } - return mas_next_slot(mas, limit, false, true); + return mas_next_slot(mas, limit, false); } /* @@ -4895,7 +4911,7 @@ done: * @mas: The maple state. * * mas->index and mas->last will be set to the range if there is a value. If - * mas->node is MAS_NONE, reset to MAS_START. + * mas->status is ma_none, reset to ma_start * * Return: the entry at the location or %NULL. */ @@ -4904,7 +4920,7 @@ void *mas_walk(struct ma_state *mas) void *entry; if (!mas_is_active(mas) || !mas_is_start(mas)) - mas->node = MAS_START; + mas->status = ma_start; retry: entry = mas_state_walk(mas); if (mas_is_start(mas)) { @@ -4920,7 +4936,7 @@ retry: mas->index = 1; mas->last = ULONG_MAX; - mas->node = MAS_NONE; + mas->status = ma_none; return NULL; } @@ -5672,27 +5688,40 @@ static bool mas_next_setup(struct ma_state *mas, unsigned long max, bool was_none = mas_is_none(mas); if (unlikely(mas->last >= max)) { - mas->node = MAS_OVERFLOW; + mas->status = ma_overflow; return true; } - if (mas_is_active(mas)) + switch (mas->status) { + case ma_active: return false; - - if (mas_is_none(mas) || mas_is_paused(mas)) { - mas->node = MAS_START; - } else if (mas_is_overflow(mas)) { + case ma_none: + fallthrough; + case ma_pause: + mas->status = ma_start; + fallthrough; + case ma_start: + mas_walk(mas); /* Retries on dead nodes handled by mas_walk */ + break; + case ma_overflow: /* Overflowed before, but the max changed */ - mas->node = MAS_START; - } else if (mas_is_underflow(mas)) { - mas->node = MAS_START; + mas->status = ma_active; + break; + case ma_underflow: + /* The user expects the mas to be one before where it is */ + mas->status = ma_active; *entry = mas_walk(mas); if (*entry) return true; + break; + case ma_root: + break; + case ma_error: + return true; } - if (mas_is_start(mas)) - *entry = mas_walk(mas); /* Retries on dead nodes handled by mas_walk */ + if (likely(mas_is_active(mas))) /* Fast path */ + return false; if (mas_is_ptr(mas)) { *entry = NULL; @@ -5702,7 +5731,7 @@ static bool mas_next_setup(struct ma_state *mas, unsigned long max, } mas->index = 1; mas->last = ULONG_MAX; - mas->node = MAS_NONE; + mas->status = ma_none; return true; } @@ -5731,7 +5760,7 @@ void *mas_next(struct ma_state *mas, unsigned long max) return entry; /* Retries on dead nodes handled by mas_next_slot */ - return mas_next_slot(mas, max, false, true); + return mas_next_slot(mas, max, false); } EXPORT_SYMBOL_GPL(mas_next); @@ -5754,7 +5783,7 @@ void *mas_next_range(struct ma_state *mas, unsigned long max) return entry; /* Retries on dead nodes handled by mas_next_slot */ - return mas_next_slot(mas, max, true, true); + return mas_next_slot(mas, max, true); } EXPORT_SYMBOL_GPL(mas_next_range); @@ -5785,33 +5814,45 @@ EXPORT_SYMBOL_GPL(mt_next); static bool mas_prev_setup(struct ma_state *mas, unsigned long min, void **entry) { if (unlikely(mas->index <= min)) { - mas->node = MAS_UNDERFLOW; + mas->status = ma_underflow; return true; } - if (mas_is_active(mas)) + switch (mas->status) { + case ma_active: return false; - - if (mas_is_overflow(mas)) { - mas->node = MAS_START; + case ma_start: + break; + case ma_none: + fallthrough; + case ma_pause: + mas->status = ma_start; + break; + case ma_underflow: + /* underflowed before but the min changed */ + mas->status = ma_active; + break; + case ma_overflow: + /* User expects mas to be one after where it is */ + mas->status = ma_active; *entry = mas_walk(mas); if (*entry) return true; - } - - if (mas_is_none(mas) || mas_is_paused(mas)) { - mas->node = MAS_START; - } else if (mas_is_underflow(mas)) { - /* underflowed before but the min changed */ - mas->node = MAS_START; + break; + case ma_root: + break; + case ma_error: + return true; } if (mas_is_start(mas)) mas_walk(mas); if (unlikely(mas_is_ptr(mas))) { - if (!mas->index) - goto none; + if (!mas->index) { + mas->status = ma_none; + return true; + } mas->index = mas->last = 0; *entry = mas_root(mas); return true; @@ -5821,7 +5862,7 @@ static bool mas_prev_setup(struct ma_state *mas, unsigned long min, void **entry if (mas->index) { /* Walked to out-of-range pointer? */ mas->index = mas->last = 0; - mas->node = MAS_ROOT; + mas->status = ma_root; *entry = mas_root(mas); return true; } @@ -5829,10 +5870,6 @@ static bool mas_prev_setup(struct ma_state *mas, unsigned long min, void **entry } return false; - -none: - mas->node = MAS_NONE; - return true; } /** @@ -5841,7 +5878,7 @@ none: * @min: The minimum value to check. * * Must hold rcu_read_lock or the write lock. - * Will reset mas to MAS_START if the node is MAS_NONE. Will stop on not + * Will reset mas to ma_start if the status is ma_none. Will stop on not * searchable nodes. * * Return: the previous value or %NULL. @@ -5853,7 +5890,7 @@ void *mas_prev(struct ma_state *mas, unsigned long min) if (mas_prev_setup(mas, min, &entry)) return entry; - return mas_prev_slot(mas, min, false, true); + return mas_prev_slot(mas, min, false); } EXPORT_SYMBOL_GPL(mas_prev); @@ -5864,7 +5901,7 @@ EXPORT_SYMBOL_GPL(mas_prev); * * Sets @mas->index and @mas->last to the range. * Must hold rcu_read_lock or the write lock. - * Will reset mas to MAS_START if the node is MAS_NONE. Will stop on not + * Will reset mas to ma_start if the node is ma_none. Will stop on not * searchable nodes. * * Return: the previous value or %NULL. @@ -5876,7 +5913,7 @@ void *mas_prev_range(struct ma_state *mas, unsigned long min) if (mas_prev_setup(mas, min, &entry)) return entry; - return mas_prev_slot(mas, min, true, true); + return mas_prev_slot(mas, min, true); } EXPORT_SYMBOL_GPL(mas_prev_range); @@ -5919,7 +5956,8 @@ EXPORT_SYMBOL_GPL(mt_prev); */ void mas_pause(struct ma_state *mas) { - mas->node = MAS_PAUSE; + mas->status = ma_pause; + mas->node = NULL; } EXPORT_SYMBOL_GPL(mas_pause); @@ -5933,32 +5971,52 @@ EXPORT_SYMBOL_GPL(mas_pause); */ static __always_inline bool mas_find_setup(struct ma_state *mas, unsigned long max, void **entry) { - if (mas_is_active(mas)) { + switch (mas->status) { + case ma_active: if (mas->last < max) return false; - return true; - } - - if (mas_is_paused(mas)) { + case ma_start: + break; + case ma_pause: if (unlikely(mas->last >= max)) return true; mas->index = ++mas->last; - mas->node = MAS_START; - } else if (mas_is_none(mas)) { + mas->status = ma_start; + break; + case ma_none: if (unlikely(mas->last >= max)) return true; mas->index = mas->last; - mas->node = MAS_START; - } else if (mas_is_overflow(mas) || mas_is_underflow(mas)) { - if (mas->index > max) { - mas->node = MAS_OVERFLOW; + mas->status = ma_start; + break; + case ma_underflow: + /* mas is pointing at entry before unable to go lower */ + if (unlikely(mas->index >= max)) { + mas->status = ma_overflow; return true; } - mas->node = MAS_START; + mas->status = ma_active; + *entry = mas_walk(mas); + if (*entry) + return true; + break; + case ma_overflow: + if (unlikely(mas->last >= max)) + return true; + + mas->status = ma_active; + *entry = mas_walk(mas); + if (*entry) + return true; + break; + case ma_root: + break; + case ma_error: + return true; } if (mas_is_start(mas)) { @@ -5985,7 +6043,7 @@ static __always_inline bool mas_find_setup(struct ma_state *mas, unsigned long m return false; ptr_out_of_range: - mas->node = MAS_NONE; + mas->status = ma_none; mas->index = 1; mas->last = ULONG_MAX; return true; @@ -5999,7 +6057,7 @@ ptr_out_of_range: * * Must hold rcu_read_lock or the write lock. * If an entry exists, last and index are updated accordingly. - * May set @mas->node to MAS_NONE. + * May set @mas->status to ma_overflow. * * Return: The entry or %NULL. */ @@ -6011,7 +6069,10 @@ void *mas_find(struct ma_state *mas, unsigned long max) return entry; /* Retries on dead nodes handled by mas_next_slot */ - return mas_next_slot(mas, max, false, false); + entry = mas_next_slot(mas, max, false); + /* Ignore overflow */ + mas->status = ma_active; + return entry; } EXPORT_SYMBOL_GPL(mas_find); @@ -6023,7 +6084,7 @@ EXPORT_SYMBOL_GPL(mas_find); * * Must hold rcu_read_lock or the write lock. * If an entry exists, last and index are updated accordingly. - * May set @mas->node to MAS_NONE. + * May set @mas->status to ma_overflow. * * Return: The entry or %NULL. */ @@ -6035,7 +6096,7 @@ void *mas_find_range(struct ma_state *mas, unsigned long max) return entry; /* Retries on dead nodes handled by mas_next_slot */ - return mas_next_slot(mas, max, true, false); + return mas_next_slot(mas, max, true); } EXPORT_SYMBOL_GPL(mas_find_range); @@ -6050,33 +6111,45 @@ EXPORT_SYMBOL_GPL(mas_find_range); static bool mas_find_rev_setup(struct ma_state *mas, unsigned long min, void **entry) { - if (mas_is_active(mas)) { - if (mas->index > min) - return false; - - return true; - } - if (mas_is_paused(mas)) { + switch (mas->status) { + case ma_active: + goto active; + case ma_start: + break; + case ma_pause: if (unlikely(mas->index <= min)) { - mas->node = MAS_NONE; + mas->status = ma_underflow; return true; } - mas->node = MAS_START; mas->last = --mas->index; - } else if (mas_is_none(mas)) { + mas->status = ma_start; + break; + case ma_none: if (mas->index <= min) goto none; mas->last = mas->index; - mas->node = MAS_START; - } else if (mas_is_underflow(mas) || mas_is_overflow(mas)) { - if (mas->last <= min) { - mas->node = MAS_UNDERFLOW; + mas->status = ma_start; + break; + case ma_overflow: /* user expects the mas to be one after where it is */ + if (unlikely(mas->index <= min)) { + mas->status = ma_underflow; return true; } - mas->node = MAS_START; + mas->status = ma_active; + break; + case ma_underflow: /* user expects the mas to be one before where it is */ + if (unlikely(mas->index <= min)) + return true; + + mas->status = ma_active; + break; + case ma_root: + break; + case ma_error: + return true; } if (mas_is_start(mas)) { @@ -6099,19 +6172,20 @@ static bool mas_find_rev_setup(struct ma_state *mas, unsigned long min, * previous location is 0. */ mas->last = mas->index = 0; - mas->node = MAS_ROOT; + mas->status = ma_root; *entry = mas_root(mas); return true; } } +active: if (mas->index < min) return true; return false; none: - mas->node = MAS_NONE; + mas->status = ma_none; return true; } @@ -6124,7 +6198,7 @@ none: * * Must hold rcu_read_lock or the write lock. * If an entry exists, last and index are updated accordingly. - * May set @mas->node to MAS_NONE. + * May set @mas->status to ma_underflow. * * Return: The entry or %NULL. */ @@ -6136,7 +6210,7 @@ void *mas_find_rev(struct ma_state *mas, unsigned long min) return entry; /* Retries on dead nodes handled by mas_prev_slot */ - return mas_prev_slot(mas, min, false, false); + return mas_prev_slot(mas, min, false); } EXPORT_SYMBOL_GPL(mas_find_rev); @@ -6150,7 +6224,7 @@ EXPORT_SYMBOL_GPL(mas_find_rev); * * Must hold rcu_read_lock or the write lock. * If an entry exists, last and index are updated accordingly. - * May set @mas->node to MAS_NONE. + * May set @mas->status to ma_underflow. * * Return: The entry or %NULL. */ @@ -6162,7 +6236,7 @@ void *mas_find_range_rev(struct ma_state *mas, unsigned long min) return entry; /* Retries on dead nodes handled by mas_prev_slot */ - return mas_prev_slot(mas, min, true, false); + return mas_prev_slot(mas, min, true); } EXPORT_SYMBOL_GPL(mas_find_range_rev); @@ -6183,7 +6257,7 @@ void *mas_erase(struct ma_state *mas) MA_WR_STATE(wr_mas, mas, NULL); if (!mas_is_active(mas) || !mas_is_start(mas)) - mas->node = MAS_START; + mas->status = ma_start; /* Retry unnecessary when holding the write lock. */ entry = mas_state_walk(mas); @@ -6228,7 +6302,7 @@ bool mas_nomem(struct ma_state *mas, gfp_t gfp) if (!mas_allocated(mas)) return false; - mas->node = MAS_START; + mas->status = ma_start; return true; } @@ -6627,7 +6701,7 @@ static inline void mas_dup_build(struct ma_state *mas, struct ma_state *new_mas, node = mt_alloc_one(gfp); if (!node) { - new_mas->node = MAS_NONE; + new_mas->status = ma_none; mas_set_err(mas, -ENOMEM); return; } @@ -6971,11 +7045,11 @@ static inline struct maple_enode *mas_get_slot(struct ma_state *mas, static void mas_dfs_postorder(struct ma_state *mas, unsigned long max) { - struct maple_enode *p = MAS_NONE, *mn = mas->node; + struct maple_enode *p, *mn = mas->node; unsigned long p_min, p_max; mas_next_node(mas, mas_mn(mas), max); - if (!mas_is_none(mas)) + if (!mas_is_overflow(mas)) return; if (mte_is_root(mn)) @@ -6988,7 +7062,7 @@ static void mas_dfs_postorder(struct ma_state *mas, unsigned long max) p_min = mas->min; p_max = mas->max; mas_prev_node(mas, 0); - } while (!mas_is_none(mas)); + } while (!mas_is_underflow(mas)); mas->node = p; mas->max = p_max; @@ -7443,7 +7517,7 @@ static void mt_validate_nulls(struct maple_tree *mt) MA_STATE(mas, mt, 0, 0); mas_start(&mas); - if (mas_is_none(&mas) || (mas.node == MAS_ROOT)) + if (mas_is_none(&mas) || (mas_is_ptr(&mas))) return; while (!mte_is_leaf(mas.node)) @@ -7460,7 +7534,7 @@ static void mt_validate_nulls(struct maple_tree *mt) last = entry; if (offset == mas_data_end(&mas)) { mas_next_node(&mas, mas_mn(&mas), ULONG_MAX); - if (mas_is_none(&mas)) + if (mas_is_overflow(&mas)) return; offset = 0; slots = ma_slots(mte_to_node(mas.node), @@ -7469,7 +7543,7 @@ static void mt_validate_nulls(struct maple_tree *mt) offset++; } - } while (!mas_is_none(&mas)); + } while (!mas_is_overflow(&mas)); } /* @@ -7490,7 +7564,7 @@ void mt_validate(struct maple_tree *mt) while (!mte_is_leaf(mas.node)) mas_descend(&mas); - while (!mas_is_none(&mas)) { + while (!mas_is_overflow(&mas)) { MAS_WARN_ON(&mas, mte_dead_node(mas.node)); end = mas_data_end(&mas); if (MAS_WARN_ON(&mas, (end < mt_min_slot_count(mas.node)) && @@ -7515,16 +7589,35 @@ EXPORT_SYMBOL_GPL(mt_validate); void mas_dump(const struct ma_state *mas) { pr_err("MAS: tree=%p enode=%p ", mas->tree, mas->node); - if (mas_is_none(mas)) - pr_err("(MAS_NONE) "); - else if (mas_is_ptr(mas)) - pr_err("(MAS_ROOT) "); - else if (mas_is_start(mas)) - pr_err("(MAS_START) "); - else if (mas_is_paused(mas)) - pr_err("(MAS_PAUSED) "); - - pr_err("[%u] index=%lx last=%lx\n", mas->offset, mas->index, mas->last); + switch (mas->status) { + case ma_active: + pr_err("(ma_active)"); + break; + case ma_none: + pr_err("(ma_none)"); + break; + case ma_root: + pr_err("(ma_root)"); + break; + case ma_start: + pr_err("(ma_start) "); + break; + case ma_pause: + pr_err("(ma_pause) "); + break; + case ma_overflow: + pr_err("(ma_overflow) "); + break; + case ma_underflow: + pr_err("(ma_underflow) "); + break; + case ma_error: + pr_err("(ma_error) "); + break; + } + + pr_err("[%u/%u] index=%lx last=%lx\n", mas->offset, mas->end, + mas->index, mas->last); pr_err(" min=%lx max=%lx alloc=%p, depth=%u, flags=%x\n", mas->min, mas->max, mas->alloc, mas->depth, mas->mas_flags); if (mas->index > mas->last) diff --git a/lib/test_maple_tree.c b/lib/test_maple_tree.c index 3e4597fb49d3..e7a5d688c9e0 100644 --- a/lib/test_maple_tree.c +++ b/lib/test_maple_tree.c @@ -54,6 +54,11 @@ atomic_t maple_tree_tests_passed; #else #define cond_resched() do {} while (0) #endif + +#define mas_is_none(x) ((x)->status == ma_none) +#define mas_is_overflow(x) ((x)->status == ma_overflow) +#define mas_is_underflow(x) ((x)->status == ma_underflow) + static int __init mtree_insert_index(struct maple_tree *mt, unsigned long index, gfp_t gfp) { @@ -582,7 +587,7 @@ static noinline void __init check_find(struct maple_tree *mt) MT_BUG_ON(mt, last != mas.last); - mas.node = MAS_NONE; + mas.status = ma_none; mas.index = ULONG_MAX; mas.last = ULONG_MAX; entry2 = mas_prev(&mas, 0); @@ -2178,7 +2183,7 @@ static noinline void __init next_prev_test(struct maple_tree *mt) MT_BUG_ON(mt, val != NULL); MT_BUG_ON(mt, mas.index != 0); MT_BUG_ON(mt, mas.last != 5); - MT_BUG_ON(mt, mas.node != MAS_UNDERFLOW); + MT_BUG_ON(mt, !mas_is_underflow(&mas)); mas.index = 0; mas.last = 5; @@ -3042,10 +3047,6 @@ static noinline void __init check_empty_area_fill(struct maple_tree *mt) * DNE active active range of NULL */ -#define mas_active(x) (((x).node != MAS_ROOT) && \ - ((x).node != MAS_START) && \ - ((x).node != MAS_PAUSE) && \ - ((x).node != MAS_NONE)) static noinline void __init check_state_handling(struct maple_tree *mt) { MA_STATE(mas, mt, 0, 0); @@ -3060,7 +3061,7 @@ static noinline void __init check_state_handling(struct maple_tree *mt) /* prev: Start -> underflow*/ entry = mas_prev(&mas, 0); MT_BUG_ON(mt, entry != NULL); - MT_BUG_ON(mt, mas.node != MAS_UNDERFLOW); + MT_BUG_ON(mt, mas.status != ma_underflow); /* prev: Start -> root */ mas_set(&mas, 10); @@ -3068,7 +3069,7 @@ static noinline void __init check_state_handling(struct maple_tree *mt) MT_BUG_ON(mt, entry != ptr); MT_BUG_ON(mt, mas.index != 0); MT_BUG_ON(mt, mas.last != 0); - MT_BUG_ON(mt, mas.node != MAS_ROOT); + MT_BUG_ON(mt, mas.status != ma_root); /* prev: pause -> root */ mas_set(&mas, 10); @@ -3077,7 +3078,7 @@ static noinline void __init check_state_handling(struct maple_tree *mt) MT_BUG_ON(mt, entry != ptr); MT_BUG_ON(mt, mas.index != 0); MT_BUG_ON(mt, mas.last != 0); - MT_BUG_ON(mt, mas.node != MAS_ROOT); + MT_BUG_ON(mt, mas.status != ma_root); /* next: start -> none */ mas_set(&mas, 0); @@ -3085,7 +3086,7 @@ static noinline void __init check_state_handling(struct maple_tree *mt) MT_BUG_ON(mt, mas.index != 1); MT_BUG_ON(mt, mas.last != ULONG_MAX); MT_BUG_ON(mt, entry != NULL); - MT_BUG_ON(mt, mas.node != MAS_NONE); + MT_BUG_ON(mt, mas.status != ma_none); /* next: start -> none*/ mas_set(&mas, 10); @@ -3093,7 +3094,7 @@ static noinline void __init check_state_handling(struct maple_tree *mt) MT_BUG_ON(mt, mas.index != 1); MT_BUG_ON(mt, mas.last != ULONG_MAX); MT_BUG_ON(mt, entry != NULL); - MT_BUG_ON(mt, mas.node != MAS_NONE); + MT_BUG_ON(mt, mas.status != ma_none); /* find: start -> root */ mas_set(&mas, 0); @@ -3101,21 +3102,21 @@ static noinline void __init check_state_handling(struct maple_tree *mt) MT_BUG_ON(mt, entry != ptr); MT_BUG_ON(mt, mas.index != 0); MT_BUG_ON(mt, mas.last != 0); - MT_BUG_ON(mt, mas.node != MAS_ROOT); + MT_BUG_ON(mt, mas.status != ma_root); /* find: root -> none */ entry = mas_find(&mas, ULONG_MAX); MT_BUG_ON(mt, entry != NULL); MT_BUG_ON(mt, mas.index != 1); MT_BUG_ON(mt, mas.last != ULONG_MAX); - MT_BUG_ON(mt, mas.node != MAS_NONE); + MT_BUG_ON(mt, mas.status != ma_none); /* find: none -> none */ entry = mas_find(&mas, ULONG_MAX); MT_BUG_ON(mt, entry != NULL); MT_BUG_ON(mt, mas.index != 1); MT_BUG_ON(mt, mas.last != ULONG_MAX); - MT_BUG_ON(mt, mas.node != MAS_NONE); + MT_BUG_ON(mt, mas.status != ma_none); /* find: start -> none */ mas_set(&mas, 10); @@ -3123,14 +3124,14 @@ static noinline void __init check_state_handling(struct maple_tree *mt) MT_BUG_ON(mt, entry != NULL); MT_BUG_ON(mt, mas.index != 1); MT_BUG_ON(mt, mas.last != ULONG_MAX); - MT_BUG_ON(mt, mas.node != MAS_NONE); + MT_BUG_ON(mt, mas.status != ma_none); /* find_rev: none -> root */ entry = mas_find_rev(&mas, 0); MT_BUG_ON(mt, entry != ptr); MT_BUG_ON(mt, mas.index != 0); MT_BUG_ON(mt, mas.last != 0); - MT_BUG_ON(mt, mas.node != MAS_ROOT); + MT_BUG_ON(mt, mas.status != ma_root); /* find_rev: start -> root */ mas_set(&mas, 0); @@ -3138,21 +3139,21 @@ static noinline void __init check_state_handling(struct maple_tree *mt) MT_BUG_ON(mt, entry != ptr); MT_BUG_ON(mt, mas.index != 0); MT_BUG_ON(mt, mas.last != 0); - MT_BUG_ON(mt, mas.node != MAS_ROOT); + MT_BUG_ON(mt, mas.status != ma_root); /* find_rev: root -> none */ entry = mas_find_rev(&mas, 0); MT_BUG_ON(mt, entry != NULL); MT_BUG_ON(mt, mas.index != 0); MT_BUG_ON(mt, mas.last != 0); - MT_BUG_ON(mt, mas.node != MAS_NONE); + MT_BUG_ON(mt, mas.status != ma_none); /* find_rev: none -> none */ entry = mas_find_rev(&mas, 0); MT_BUG_ON(mt, entry != NULL); MT_BUG_ON(mt, mas.index != 0); MT_BUG_ON(mt, mas.last != 0); - MT_BUG_ON(mt, mas.node != MAS_NONE); + MT_BUG_ON(mt, mas.status != ma_none); /* find_rev: start -> root */ mas_set(&mas, 10); @@ -3160,7 +3161,7 @@ static noinline void __init check_state_handling(struct maple_tree *mt) MT_BUG_ON(mt, entry != ptr); MT_BUG_ON(mt, mas.index != 0); MT_BUG_ON(mt, mas.last != 0); - MT_BUG_ON(mt, mas.node != MAS_ROOT); + MT_BUG_ON(mt, mas.status != ma_root); /* walk: start -> none */ mas_set(&mas, 10); @@ -3168,7 +3169,7 @@ static noinline void __init check_state_handling(struct maple_tree *mt) MT_BUG_ON(mt, entry != NULL); MT_BUG_ON(mt, mas.index != 1); MT_BUG_ON(mt, mas.last != ULONG_MAX); - MT_BUG_ON(mt, mas.node != MAS_NONE); + MT_BUG_ON(mt, mas.status != ma_none); /* walk: pause -> none*/ mas_set(&mas, 10); @@ -3177,7 +3178,7 @@ static noinline void __init check_state_handling(struct maple_tree *mt) MT_BUG_ON(mt, entry != NULL); MT_BUG_ON(mt, mas.index != 1); MT_BUG_ON(mt, mas.last != ULONG_MAX); - MT_BUG_ON(mt, mas.node != MAS_NONE); + MT_BUG_ON(mt, mas.status != ma_none); /* walk: none -> none */ mas.index = mas.last = 10; @@ -3185,14 +3186,14 @@ static noinline void __init check_state_handling(struct maple_tree *mt) MT_BUG_ON(mt, entry != NULL); MT_BUG_ON(mt, mas.index != 1); MT_BUG_ON(mt, mas.last != ULONG_MAX); - MT_BUG_ON(mt, mas.node != MAS_NONE); + MT_BUG_ON(mt, mas.status != ma_none); /* walk: none -> none */ entry = mas_walk(&mas); MT_BUG_ON(mt, entry != NULL); MT_BUG_ON(mt, mas.index != 1); MT_BUG_ON(mt, mas.last != ULONG_MAX); - MT_BUG_ON(mt, mas.node != MAS_NONE); + MT_BUG_ON(mt, mas.status != ma_none); /* walk: start -> root */ mas_set(&mas, 0); @@ -3200,7 +3201,7 @@ static noinline void __init check_state_handling(struct maple_tree *mt) MT_BUG_ON(mt, entry != ptr); MT_BUG_ON(mt, mas.index != 0); MT_BUG_ON(mt, mas.last != 0); - MT_BUG_ON(mt, mas.node != MAS_ROOT); + MT_BUG_ON(mt, mas.status != ma_root); /* walk: pause -> root */ mas_set(&mas, 0); @@ -3209,22 +3210,22 @@ static noinline void __init check_state_handling(struct maple_tree *mt) MT_BUG_ON(mt, entry != ptr); MT_BUG_ON(mt, mas.index != 0); MT_BUG_ON(mt, mas.last != 0); - MT_BUG_ON(mt, mas.node != MAS_ROOT); + MT_BUG_ON(mt, mas.status != ma_root); /* walk: none -> root */ - mas.node = MAS_NONE; + mas.status = ma_none; entry = mas_walk(&mas); MT_BUG_ON(mt, entry != ptr); MT_BUG_ON(mt, mas.index != 0); MT_BUG_ON(mt, mas.last != 0); - MT_BUG_ON(mt, mas.node != MAS_ROOT); + MT_BUG_ON(mt, mas.status != ma_root); /* walk: root -> root */ entry = mas_walk(&mas); MT_BUG_ON(mt, entry != ptr); MT_BUG_ON(mt, mas.index != 0); MT_BUG_ON(mt, mas.last != 0); - MT_BUG_ON(mt, mas.node != MAS_ROOT); + MT_BUG_ON(mt, mas.status != ma_root); /* walk: root -> none */ mas_set(&mas, 10); @@ -3232,7 +3233,7 @@ static noinline void __init check_state_handling(struct maple_tree *mt) MT_BUG_ON(mt, entry != NULL); MT_BUG_ON(mt, mas.index != 1); MT_BUG_ON(mt, mas.last != ULONG_MAX); - MT_BUG_ON(mt, mas.node != MAS_NONE); + MT_BUG_ON(mt, mas.status != ma_none); /* walk: none -> root */ mas.index = mas.last = 0; @@ -3240,7 +3241,7 @@ static noinline void __init check_state_handling(struct maple_tree *mt) MT_BUG_ON(mt, entry != ptr); MT_BUG_ON(mt, mas.index != 0); MT_BUG_ON(mt, mas.last != 0); - MT_BUG_ON(mt, mas.node != MAS_ROOT); + MT_BUG_ON(mt, mas.status != ma_root); mas_unlock(&mas); @@ -3258,7 +3259,7 @@ static noinline void __init check_state_handling(struct maple_tree *mt) MT_BUG_ON(mt, entry != ptr); MT_BUG_ON(mt, mas.index != 0x1000); MT_BUG_ON(mt, mas.last != 0x1500); - MT_BUG_ON(mt, !mas_active(mas)); + MT_BUG_ON(mt, !mas_is_active(&mas)); /* next: pause ->active */ mas_set(&mas, 0); @@ -3267,126 +3268,132 @@ static noinline void __init check_state_handling(struct maple_tree *mt) MT_BUG_ON(mt, entry != ptr); MT_BUG_ON(mt, mas.index != 0x1000); MT_BUG_ON(mt, mas.last != 0x1500); - MT_BUG_ON(mt, !mas_active(mas)); + MT_BUG_ON(mt, !mas_is_active(&mas)); /* next: none ->active */ mas.index = mas.last = 0; mas.offset = 0; - mas.node = MAS_NONE; + mas.status = ma_none; entry = mas_next(&mas, ULONG_MAX); MT_BUG_ON(mt, entry != ptr); MT_BUG_ON(mt, mas.index != 0x1000); MT_BUG_ON(mt, mas.last != 0x1500); - MT_BUG_ON(mt, !mas_active(mas)); + MT_BUG_ON(mt, !mas_is_active(&mas)); - /* next:active ->active */ - entry = mas_next(&mas, ULONG_MAX); + /* next:active ->active (spanning limit) */ + entry = mas_next(&mas, 0x2100); MT_BUG_ON(mt, entry != ptr2); MT_BUG_ON(mt, mas.index != 0x2000); MT_BUG_ON(mt, mas.last != 0x2500); - MT_BUG_ON(mt, !mas_active(mas)); + MT_BUG_ON(mt, !mas_is_active(&mas)); - /* next:active -> active beyond data */ + /* next:active -> overflow (limit reached) beyond data */ entry = mas_next(&mas, 0x2999); MT_BUG_ON(mt, entry != NULL); MT_BUG_ON(mt, mas.index != 0x2501); MT_BUG_ON(mt, mas.last != 0x2fff); - MT_BUG_ON(mt, !mas_active(mas)); + MT_BUG_ON(mt, !mas_is_overflow(&mas)); - /* Continue after last range ends after max */ + /* next:overflow -> active (limit changed) */ entry = mas_next(&mas, ULONG_MAX); MT_BUG_ON(mt, entry != ptr3); MT_BUG_ON(mt, mas.index != 0x3000); MT_BUG_ON(mt, mas.last != 0x3500); - MT_BUG_ON(mt, !mas_active(mas)); + MT_BUG_ON(mt, !mas_is_active(&mas)); - /* next:active -> active continued */ + /* next:active -> overflow (limit reached) */ entry = mas_next(&mas, ULONG_MAX); MT_BUG_ON(mt, entry != NULL); MT_BUG_ON(mt, mas.index != 0x3501); MT_BUG_ON(mt, mas.last != ULONG_MAX); - MT_BUG_ON(mt, !mas_active(mas)); - - /* next:active -> overflow */ - entry = mas_next(&mas, ULONG_MAX); - MT_BUG_ON(mt, entry != NULL); - MT_BUG_ON(mt, mas.index != 0x3501); - MT_BUG_ON(mt, mas.last != ULONG_MAX); - MT_BUG_ON(mt, mas.node != MAS_OVERFLOW); + MT_BUG_ON(mt, !mas_is_overflow(&mas)); /* next:overflow -> overflow */ entry = mas_next(&mas, ULONG_MAX); MT_BUG_ON(mt, entry != NULL); MT_BUG_ON(mt, mas.index != 0x3501); MT_BUG_ON(mt, mas.last != ULONG_MAX); - MT_BUG_ON(mt, mas.node != MAS_OVERFLOW); + MT_BUG_ON(mt, !mas_is_overflow(&mas)); /* prev:overflow -> active */ entry = mas_prev(&mas, 0); MT_BUG_ON(mt, entry != ptr3); MT_BUG_ON(mt, mas.index != 0x3000); MT_BUG_ON(mt, mas.last != 0x3500); - MT_BUG_ON(mt, !mas_active(mas)); + MT_BUG_ON(mt, !mas_is_active(&mas)); /* next: none -> active, skip value at location */ mas_set(&mas, 0); entry = mas_next(&mas, ULONG_MAX); - mas.node = MAS_NONE; + mas.status = ma_none; mas.offset = 0; entry = mas_next(&mas, ULONG_MAX); MT_BUG_ON(mt, entry != ptr2); MT_BUG_ON(mt, mas.index != 0x2000); MT_BUG_ON(mt, mas.last != 0x2500); - MT_BUG_ON(mt, !mas_active(mas)); + MT_BUG_ON(mt, !mas_is_active(&mas)); /* prev:active ->active */ entry = mas_prev(&mas, 0); MT_BUG_ON(mt, entry != ptr); MT_BUG_ON(mt, mas.index != 0x1000); MT_BUG_ON(mt, mas.last != 0x1500); - MT_BUG_ON(mt, !mas_active(mas)); + MT_BUG_ON(mt, !mas_is_active(&mas)); - /* prev:active -> active spanning end range */ + /* prev:active -> underflow (span limit) */ + mas_next(&mas, ULONG_MAX); + entry = mas_prev(&mas, 0x1200); + MT_BUG_ON(mt, entry != ptr); + MT_BUG_ON(mt, mas.index != 0x1000); + MT_BUG_ON(mt, mas.last != 0x1500); + MT_BUG_ON(mt, !mas_is_active(&mas)); /* spanning limit */ + entry = mas_prev(&mas, 0x1200); /* underflow */ + MT_BUG_ON(mt, entry != NULL); + MT_BUG_ON(mt, mas.index != 0x1000); + MT_BUG_ON(mt, mas.last != 0x1500); + MT_BUG_ON(mt, !mas_is_underflow(&mas)); + + /* prev:underflow -> underflow (lower limit) spanning end range */ entry = mas_prev(&mas, 0x0100); MT_BUG_ON(mt, entry != NULL); MT_BUG_ON(mt, mas.index != 0); MT_BUG_ON(mt, mas.last != 0x0FFF); - MT_BUG_ON(mt, !mas_active(mas)); + MT_BUG_ON(mt, !mas_is_underflow(&mas)); - /* prev:active -> underflow */ + /* prev:underflow -> underflow */ entry = mas_prev(&mas, 0); MT_BUG_ON(mt, entry != NULL); MT_BUG_ON(mt, mas.index != 0); MT_BUG_ON(mt, mas.last != 0x0FFF); - MT_BUG_ON(mt, mas.node != MAS_UNDERFLOW); + MT_BUG_ON(mt, !mas_is_underflow(&mas)); /* prev:underflow -> underflow */ entry = mas_prev(&mas, 0); MT_BUG_ON(mt, entry != NULL); MT_BUG_ON(mt, mas.index != 0); MT_BUG_ON(mt, mas.last != 0x0FFF); - MT_BUG_ON(mt, mas.node != MAS_UNDERFLOW); + MT_BUG_ON(mt, !mas_is_underflow(&mas)); /* next:underflow -> active */ entry = mas_next(&mas, ULONG_MAX); MT_BUG_ON(mt, entry != ptr); MT_BUG_ON(mt, mas.index != 0x1000); MT_BUG_ON(mt, mas.last != 0x1500); - MT_BUG_ON(mt, !mas_active(mas)); + MT_BUG_ON(mt, !mas_is_active(&mas)); /* prev:first value -> underflow */ entry = mas_prev(&mas, 0x1000); MT_BUG_ON(mt, entry != NULL); MT_BUG_ON(mt, mas.index != 0x1000); MT_BUG_ON(mt, mas.last != 0x1500); - MT_BUG_ON(mt, mas.node != MAS_UNDERFLOW); + MT_BUG_ON(mt, !mas_is_underflow(&mas)); /* find:underflow -> first value */ entry = mas_find(&mas, ULONG_MAX); MT_BUG_ON(mt, entry != ptr); MT_BUG_ON(mt, mas.index != 0x1000); MT_BUG_ON(mt, mas.last != 0x1500); - MT_BUG_ON(mt, !mas_active(mas)); + MT_BUG_ON(mt, !mas_is_active(&mas)); /* prev: pause ->active */ mas_set(&mas, 0x3600); @@ -3397,21 +3404,21 @@ static noinline void __init check_state_handling(struct maple_tree *mt) MT_BUG_ON(mt, entry != ptr2); MT_BUG_ON(mt, mas.index != 0x2000); MT_BUG_ON(mt, mas.last != 0x2500); - MT_BUG_ON(mt, !mas_active(mas)); + MT_BUG_ON(mt, !mas_is_active(&mas)); - /* prev:active -> active spanning min */ + /* prev:active -> underflow spanning min */ entry = mas_prev(&mas, 0x1600); MT_BUG_ON(mt, entry != NULL); MT_BUG_ON(mt, mas.index != 0x1501); MT_BUG_ON(mt, mas.last != 0x1FFF); - MT_BUG_ON(mt, !mas_active(mas)); + MT_BUG_ON(mt, !mas_is_underflow(&mas)); /* prev: active ->active, continue */ entry = mas_prev(&mas, 0); MT_BUG_ON(mt, entry != ptr); MT_BUG_ON(mt, mas.index != 0x1000); MT_BUG_ON(mt, mas.last != 0x1500); - MT_BUG_ON(mt, !mas_active(mas)); + MT_BUG_ON(mt, !mas_is_active(&mas)); /* find: start ->active */ mas_set(&mas, 0); @@ -3419,7 +3426,7 @@ static noinline void __init check_state_handling(struct maple_tree *mt) MT_BUG_ON(mt, entry != ptr); MT_BUG_ON(mt, mas.index != 0x1000); MT_BUG_ON(mt, mas.last != 0x1500); - MT_BUG_ON(mt, !mas_active(mas)); + MT_BUG_ON(mt, !mas_is_active(&mas)); /* find: pause ->active */ mas_set(&mas, 0); @@ -3428,7 +3435,7 @@ static noinline void __init check_state_handling(struct maple_tree *mt) MT_BUG_ON(mt, entry != ptr); MT_BUG_ON(mt, mas.index != 0x1000); MT_BUG_ON(mt, mas.last != 0x1500); - MT_BUG_ON(mt, !mas_active(mas)); + MT_BUG_ON(mt, !mas_is_active(&mas)); /* find: start ->active on value */; mas_set(&mas, 1200); @@ -3436,14 +3443,14 @@ static noinline void __init check_state_handling(struct maple_tree *mt) MT_BUG_ON(mt, entry != ptr); MT_BUG_ON(mt, mas.index != 0x1000); MT_BUG_ON(mt, mas.last != 0x1500); - MT_BUG_ON(mt, !mas_active(mas)); + MT_BUG_ON(mt, !mas_is_active(&mas)); /* find:active ->active */ entry = mas_find(&mas, ULONG_MAX); MT_BUG_ON(mt, entry != ptr2); MT_BUG_ON(mt, mas.index != 0x2000); MT_BUG_ON(mt, mas.last != 0x2500); - MT_BUG_ON(mt, !mas_active(mas)); + MT_BUG_ON(mt, !mas_is_active(&mas)); /* find:active -> active (NULL)*/ @@ -3451,35 +3458,35 @@ static noinline void __init check_state_handling(struct maple_tree *mt) MT_BUG_ON(mt, entry != NULL); MT_BUG_ON(mt, mas.index != 0x2501); MT_BUG_ON(mt, mas.last != 0x2FFF); - MT_BUG_ON(mt, !mas_active(mas)); + MAS_BUG_ON(&mas, !mas_is_active(&mas)); /* find: overflow ->active */ entry = mas_find(&mas, 0x5000); MT_BUG_ON(mt, entry != ptr3); MT_BUG_ON(mt, mas.index != 0x3000); MT_BUG_ON(mt, mas.last != 0x3500); - MT_BUG_ON(mt, !mas_active(mas)); + MT_BUG_ON(mt, !mas_is_active(&mas)); /* find:active -> active (NULL) end*/ entry = mas_find(&mas, ULONG_MAX); MT_BUG_ON(mt, entry != NULL); MT_BUG_ON(mt, mas.index != 0x3501); MT_BUG_ON(mt, mas.last != ULONG_MAX); - MT_BUG_ON(mt, !mas_active(mas)); + MAS_BUG_ON(&mas, !mas_is_active(&mas)); /* find_rev: active (END) ->active */ entry = mas_find_rev(&mas, 0); MT_BUG_ON(mt, entry != ptr3); MT_BUG_ON(mt, mas.index != 0x3000); MT_BUG_ON(mt, mas.last != 0x3500); - MT_BUG_ON(mt, !mas_active(mas)); + MT_BUG_ON(mt, !mas_is_active(&mas)); /* find_rev:active ->active */ entry = mas_find_rev(&mas, 0); MT_BUG_ON(mt, entry != ptr2); MT_BUG_ON(mt, mas.index != 0x2000); MT_BUG_ON(mt, mas.last != 0x2500); - MT_BUG_ON(mt, !mas_active(mas)); + MT_BUG_ON(mt, !mas_is_active(&mas)); /* find_rev: pause ->active */ mas_pause(&mas); @@ -3487,14 +3494,14 @@ static noinline void __init check_state_handling(struct maple_tree *mt) MT_BUG_ON(mt, entry != ptr); MT_BUG_ON(mt, mas.index != 0x1000); MT_BUG_ON(mt, mas.last != 0x1500); - MT_BUG_ON(mt, !mas_active(mas)); + MT_BUG_ON(mt, !mas_is_active(&mas)); - /* find_rev:active -> active */ + /* find_rev:active -> underflow */ entry = mas_find_rev(&mas, 0); MT_BUG_ON(mt, entry != NULL); MT_BUG_ON(mt, mas.index != 0); MT_BUG_ON(mt, mas.last != 0x0FFF); - MT_BUG_ON(mt, !mas_active(mas)); + MT_BUG_ON(mt, !mas_is_underflow(&mas)); /* find_rev: start ->active */ mas_set(&mas, 0x1200); @@ -3502,7 +3509,7 @@ static noinline void __init check_state_handling(struct maple_tree *mt) MT_BUG_ON(mt, entry != ptr); MT_BUG_ON(mt, mas.index != 0x1000); MT_BUG_ON(mt, mas.last != 0x1500); - MT_BUG_ON(mt, !mas_active(mas)); + MT_BUG_ON(mt, !mas_is_active(&mas)); /* mas_walk start ->active */ mas_set(&mas, 0x1200); @@ -3510,7 +3517,7 @@ static noinline void __init check_state_handling(struct maple_tree *mt) MT_BUG_ON(mt, entry != ptr); MT_BUG_ON(mt, mas.index != 0x1000); MT_BUG_ON(mt, mas.last != 0x1500); - MT_BUG_ON(mt, !mas_active(mas)); + MT_BUG_ON(mt, !mas_is_active(&mas)); /* mas_walk start ->active */ mas_set(&mas, 0x1600); @@ -3518,7 +3525,7 @@ static noinline void __init check_state_handling(struct maple_tree *mt) MT_BUG_ON(mt, entry != NULL); MT_BUG_ON(mt, mas.index != 0x1501); MT_BUG_ON(mt, mas.last != 0x1fff); - MT_BUG_ON(mt, !mas_active(mas)); + MT_BUG_ON(mt, !mas_is_active(&mas)); /* mas_walk pause ->active */ mas_set(&mas, 0x1200); @@ -3527,7 +3534,7 @@ static noinline void __init check_state_handling(struct maple_tree *mt) MT_BUG_ON(mt, entry != ptr); MT_BUG_ON(mt, mas.index != 0x1000); MT_BUG_ON(mt, mas.last != 0x1500); - MT_BUG_ON(mt, !mas_active(mas)); + MT_BUG_ON(mt, !mas_is_active(&mas)); /* mas_walk pause -> active */ mas_set(&mas, 0x1600); @@ -3536,25 +3543,25 @@ static noinline void __init check_state_handling(struct maple_tree *mt) MT_BUG_ON(mt, entry != NULL); MT_BUG_ON(mt, mas.index != 0x1501); MT_BUG_ON(mt, mas.last != 0x1fff); - MT_BUG_ON(mt, !mas_active(mas)); + MT_BUG_ON(mt, !mas_is_active(&mas)); /* mas_walk none -> active */ mas_set(&mas, 0x1200); - mas.node = MAS_NONE; + mas.status = ma_none; entry = mas_walk(&mas); MT_BUG_ON(mt, entry != ptr); MT_BUG_ON(mt, mas.index != 0x1000); MT_BUG_ON(mt, mas.last != 0x1500); - MT_BUG_ON(mt, !mas_active(mas)); + MT_BUG_ON(mt, !mas_is_active(&mas)); /* mas_walk none -> active */ mas_set(&mas, 0x1600); - mas.node = MAS_NONE; + mas.status = ma_none; entry = mas_walk(&mas); MT_BUG_ON(mt, entry != NULL); MT_BUG_ON(mt, mas.index != 0x1501); MT_BUG_ON(mt, mas.last != 0x1fff); - MT_BUG_ON(mt, !mas_active(mas)); + MT_BUG_ON(mt, !mas_is_active(&mas)); /* mas_walk active -> active */ mas.index = 0x1200; @@ -3564,7 +3571,7 @@ static noinline void __init check_state_handling(struct maple_tree *mt) MT_BUG_ON(mt, entry != ptr); MT_BUG_ON(mt, mas.index != 0x1000); MT_BUG_ON(mt, mas.last != 0x1500); - MT_BUG_ON(mt, !mas_active(mas)); + MT_BUG_ON(mt, !mas_is_active(&mas)); /* mas_walk active -> active */ mas.index = 0x1600; @@ -3573,7 +3580,7 @@ static noinline void __init check_state_handling(struct maple_tree *mt) MT_BUG_ON(mt, entry != NULL); MT_BUG_ON(mt, mas.index != 0x1501); MT_BUG_ON(mt, mas.last != 0x1fff); - MT_BUG_ON(mt, !mas_active(mas)); + MT_BUG_ON(mt, !mas_is_active(&mas)); mas_unlock(&mas); } diff --git a/mm/internal.h b/mm/internal.h index 0005b8adbd5c..8450562744cf 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -1163,13 +1163,13 @@ static inline void vma_iter_store(struct vma_iterator *vmi, { #if defined(CONFIG_DEBUG_VM_MAPLE_TREE) - if (MAS_WARN_ON(&vmi->mas, vmi->mas.node != MAS_START && + if (MAS_WARN_ON(&vmi->mas, vmi->mas.status != ma_start && vmi->mas.index > vma->vm_start)) { pr_warn("%lx > %lx\n store vma %lx-%lx\n into slot %lx-%lx\n", vmi->mas.index, vma->vm_start, vma->vm_start, vma->vm_end, vmi->mas.index, vmi->mas.last); } - if (MAS_WARN_ON(&vmi->mas, vmi->mas.node != MAS_START && + if (MAS_WARN_ON(&vmi->mas, vmi->mas.status != ma_start && vmi->mas.last < vma->vm_start)) { pr_warn("%lx < %lx\nstore vma %lx-%lx\ninto slot %lx-%lx\n", vmi->mas.last, vma->vm_start, vma->vm_start, vma->vm_end, @@ -1177,7 +1177,7 @@ static inline void vma_iter_store(struct vma_iterator *vmi, } #endif - if (vmi->mas.node != MAS_START && + if (vmi->mas.status != ma_start && ((vmi->mas.index > vma->vm_start) || (vmi->mas.last < vma->vm_start))) vma_iter_invalidate(vmi); @@ -1188,7 +1188,7 @@ static inline void vma_iter_store(struct vma_iterator *vmi, static inline int vma_iter_store_gfp(struct vma_iterator *vmi, struct vm_area_struct *vma, gfp_t gfp) { - if (vmi->mas.node != MAS_START && + if (vmi->mas.status != ma_start && ((vmi->mas.index > vma->vm_start) || (vmi->mas.last < vma->vm_start))) vma_iter_invalidate(vmi); diff --git a/tools/testing/radix-tree/maple.c b/tools/testing/radix-tree/maple.c index 7095fb0ec026..857c439e6bbc 100644 --- a/tools/testing/radix-tree/maple.c +++ b/tools/testing/radix-tree/maple.c @@ -118,6 +118,7 @@ static noinline void __init check_new_node(struct maple_tree *mt) MT_BUG_ON(mt, mas.alloc == NULL); MT_BUG_ON(mt, mas.alloc->slot[0] == NULL); mas_push_node(&mas, mn); + mas_reset(&mas); mas_nomem(&mas, GFP_KERNEL); /* free */ mtree_unlock(mt); @@ -141,7 +142,7 @@ static noinline void __init check_new_node(struct maple_tree *mt) mn->parent = ma_parent_ptr(mn); ma_free_rcu(mn); - mas.node = MAS_START; + mas.status = ma_start; mas_nomem(&mas, GFP_KERNEL); /* Allocate 3 nodes, will fail. */ mas_node_count(&mas, 3); @@ -158,6 +159,7 @@ static noinline void __init check_new_node(struct maple_tree *mt) /* Ensure we counted 3. */ MT_BUG_ON(mt, mas_allocated(&mas) != 3); /* Free. */ + mas_reset(&mas); mas_nomem(&mas, GFP_KERNEL); /* Set allocation request to 1. */ @@ -272,6 +274,7 @@ static noinline void __init check_new_node(struct maple_tree *mt) ma_free_rcu(mn); MT_BUG_ON(mt, mas_allocated(&mas) != i - j - 1); } + mas_reset(&mas); MT_BUG_ON(mt, mas_nomem(&mas, GFP_KERNEL)); } @@ -294,6 +297,7 @@ static noinline void __init check_new_node(struct maple_tree *mt) smn = smn->slot[0]; /* next. */ } MT_BUG_ON(mt, mas_allocated(&mas) != total); + mas_reset(&mas); mas_nomem(&mas, GFP_KERNEL); /* Free. */ MT_BUG_ON(mt, mas_allocated(&mas) != 0); @@ -441,7 +445,7 @@ static noinline void __init check_new_node(struct maple_tree *mt) mas.node = MA_ERROR(-ENOMEM); mas_node_count(&mas, 10); /* Request */ mas_nomem(&mas, GFP_KERNEL); /* Fill request */ - mas.node = MAS_START; + mas.status = ma_start; MT_BUG_ON(mt, mas_allocated(&mas) != 10); mas_destroy(&mas); @@ -452,7 +456,7 @@ static noinline void __init check_new_node(struct maple_tree *mt) mas.node = MA_ERROR(-ENOMEM); mas_node_count(&mas, 10 + MAPLE_ALLOC_SLOTS - 1); /* Request */ mas_nomem(&mas, GFP_KERNEL); /* Fill request */ - mas.node = MAS_START; + mas.status = ma_start; MT_BUG_ON(mt, mas_allocated(&mas) != 10 + MAPLE_ALLOC_SLOTS - 1); mas_destroy(&mas); @@ -941,7 +945,7 @@ retry: ret = mas_descend_walk(mas, range_min, range_max); if (unlikely(mte_dead_node(mas->node))) { - mas->node = MAS_START; + mas->status = ma_start; goto retry; } @@ -961,10 +965,10 @@ static inline void *mas_range_load(struct ma_state *mas, unsigned long index = mas->index; if (mas_is_none(mas) || mas_is_paused(mas)) - mas->node = MAS_START; + mas->status = ma_start; retry: if (mas_tree_walk(mas, range_min, range_max)) - if (unlikely(mas->node == MAS_ROOT)) + if (unlikely(mas->status == ma_root)) return mas_root(mas); if (likely(mas->offset != MAPLE_NODE_SLOTS)) @@ -35337,7 +35341,7 @@ static void mas_dfs_preorder(struct ma_state *mas) unsigned char end, slot = 0; unsigned long *pivots; - if (mas->node == MAS_START) { + if (mas->status == ma_start) { mas_start(mas); return; } @@ -35374,7 +35378,7 @@ walk_up: return; done: - mas->node = MAS_NONE; + mas->status = ma_none; } @@ -35833,7 +35837,7 @@ static noinline void __init check_nomem(struct maple_tree *mt) mas_store(&ms, &ms); /* insert 1 -> &ms, fails. */ MT_BUG_ON(mt, ms.node != MA_ERROR(-ENOMEM)); mas_nomem(&ms, GFP_KERNEL); /* Node allocated in here. */ - MT_BUG_ON(mt, ms.node != MAS_START); + MT_BUG_ON(mt, ms.status != ma_start); mtree_unlock(mt); MT_BUG_ON(mt, mtree_insert(mt, 2, mt, GFP_KERNEL) != 0); mtree_lock(mt); @@ -35952,7 +35956,7 @@ static int __init compare_tree(struct maple_tree *mt_a, struct maple_tree *mt_b) if (mas_is_ptr(&mas_a) || mas_is_ptr(&mas_b)) { if (!(mas_is_ptr(&mas_a) && mas_is_ptr(&mas_b))) { - pr_err("One is MAS_ROOT and the other is not.\n"); + pr_err("One is ma_root and the other is not.\n"); return -1; } return 0; @@ -35961,7 +35965,7 @@ static int __init compare_tree(struct maple_tree *mt_a, struct maple_tree *mt_b) while (!mas_is_none(&mas_a) || !mas_is_none(&mas_b)) { if (mas_is_none(&mas_a) || mas_is_none(&mas_b)) { - pr_err("One is MAS_NONE and the other is not.\n"); + pr_err("One is ma_none and the other is not.\n"); return -1; } -- cgit v1.2.3 From 9a40d45c1f2c49273c04938ec3d7849f685eb3c1 Mon Sep 17 00:00:00 2001 From: "Liam R. Howlett" Date: Wed, 1 Nov 2023 13:16:26 -0400 Subject: maple_tree: remove mas_searchable() Now that the status of the maple state is outside of the node, the mas_searchable() function can be dropped for easier open-coding of what is going on. Link: https://lkml.kernel.org/r/20231101171629.3612299-10-Liam.Howlett@oracle.com Signed-off-by: Liam R. Howlett Cc: Peng Zhang Signed-off-by: Andrew Morton --- lib/maple_tree.c | 66 ++++++++++------------------------------ tools/testing/radix-tree/maple.c | 4 ++- 2 files changed, 19 insertions(+), 51 deletions(-) (limited to 'tools') diff --git a/lib/maple_tree.c b/lib/maple_tree.c index 187a9796188e..c7016066f12b 100644 --- a/lib/maple_tree.c +++ b/lib/maple_tree.c @@ -285,17 +285,6 @@ static inline bool mas_is_underflow(struct ma_state *mas) return mas->status == ma_underflow; } -static inline bool mas_searchable(struct ma_state *mas) -{ - if (mas_is_none(mas)) - return false; - - if (mas_is_ptr(mas)) - return false; - - return true; -} - static __always_inline struct maple_node *mte_to_node( const struct maple_enode *entry) { @@ -6030,12 +6019,11 @@ static __always_inline bool mas_find_setup(struct ma_state *mas, unsigned long m } - if (unlikely(!mas_searchable(mas))) { - if (unlikely(mas_is_ptr(mas))) - goto ptr_out_of_range; + if (unlikely(mas_is_ptr(mas))) + goto ptr_out_of_range; + if (unlikely(mas_is_none(mas))) return true; - } if (mas->index == max) return true; @@ -6162,20 +6150,18 @@ static bool mas_find_rev_setup(struct ma_state *mas, unsigned long min, return true; } - if (unlikely(!mas_searchable(mas))) { - if (mas_is_ptr(mas)) - goto none; + if (unlikely(mas_is_ptr(mas))) + goto none; - if (mas_is_none(mas)) { - /* - * Walked to the location, and there was nothing so the - * previous location is 0. - */ - mas->last = mas->index = 0; - mas->status = ma_root; - *entry = mas_root(mas); - return true; - } + if (unlikely(mas_is_none(mas))) { + /* + * Walked to the location, and there was nothing so the previous + * location is 0. + */ + mas->last = mas->index = 0; + mas->status = ma_root; + *entry = mas_root(mas); + return true; } active: @@ -6905,7 +6891,7 @@ retry: if (entry) goto unlock; - while (mas_searchable(&mas) && (mas.last < max)) { + while (mas_is_active(&mas) && (mas.last < max)) { entry = mas_next_entry(&mas, max); if (likely(entry && !xa_is_zero(entry))) break; @@ -6987,26 +6973,6 @@ unsigned int mt_nr_allocated(void) return kmem_cache_nr_allocated(maple_node_cache); } -/* - * mas_dead_node() - Check if the maple state is pointing to a dead node. - * @mas: The maple state - * @index: The index to restore in @mas. - * - * Used in test code. - * Return: 1 if @mas has been reset to MAS_START, 0 otherwise. - */ -static inline int mas_dead_node(struct ma_state *mas, unsigned long index) -{ - if (unlikely(!mas_searchable(mas) || mas_is_start(mas))) - return 0; - - if (likely(!mte_dead_node(mas->node))) - return 0; - - mas_rewalk(mas, index); - return 1; -} - void mt_cache_shrink(void) { } @@ -7558,7 +7524,7 @@ void mt_validate(struct maple_tree *mt) MA_STATE(mas, mt, 0, 0); rcu_read_lock(); mas_start(&mas); - if (!mas_searchable(&mas)) + if (!mas_is_active(&mas)) goto done; while (!mte_is_leaf(mas.node)) diff --git a/tools/testing/radix-tree/maple.c b/tools/testing/radix-tree/maple.c index 857c439e6bbc..56ae47291ee0 100644 --- a/tools/testing/radix-tree/maple.c +++ b/tools/testing/radix-tree/maple.c @@ -974,8 +974,10 @@ retry: if (likely(mas->offset != MAPLE_NODE_SLOTS)) entry = mas_get_slot(mas, mas->offset); - if (mas_dead_node(mas, index)) + if (mas_is_active(mas) && mte_dead_node(mas->node)) { + mas_set(mas, index); goto retry; + } return entry; } -- cgit v1.2.3 From a697dc2be925d4814f26d7588347ccdd2c5525ed Mon Sep 17 00:00:00 2001 From: Domenico Cerasuolo Date: Thu, 30 Nov 2023 11:40:22 -0800 Subject: selftests: cgroup: update per-memcg zswap writeback selftest The memcg-zswap self test is updated to adjust to the behavior change implemented by commit 87730b165089 ("zswap: make shrinking memcg-aware"), where zswap performs writeback for specific memcg. Link: https://lkml.kernel.org/r/20231130194023.4102148-6-nphamcs@gmail.com Signed-off-by: Domenico Cerasuolo Signed-off-by: Nhat Pham Tested-by: Bagas Sanjaya Acked-by: Chris Li (Google) Cc: Dan Streetman Cc: Johannes Weiner Cc: Michal Hocko Cc: Muchun Song Cc: Roman Gushchin Cc: Seth Jennings Cc: Shakeel Butt Cc: Shuah Khan Cc: Vitaly Wool Cc: Yosry Ahmed Signed-off-by: Andrew Morton --- tools/testing/selftests/cgroup/test_zswap.c | 74 +++++++++++++++++++---------- 1 file changed, 50 insertions(+), 24 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/cgroup/test_zswap.c b/tools/testing/selftests/cgroup/test_zswap.c index c99d2adaca3f..47fdaa146443 100644 --- a/tools/testing/selftests/cgroup/test_zswap.c +++ b/tools/testing/selftests/cgroup/test_zswap.c @@ -50,9 +50,9 @@ static int get_zswap_stored_pages(size_t *value) return read_int("/sys/kernel/debug/zswap/stored_pages", value); } -static int get_zswap_written_back_pages(size_t *value) +static int get_cg_wb_count(const char *cg) { - return read_int("/sys/kernel/debug/zswap/written_back_pages", value); + return cg_read_key_long(cg, "memory.stat", "zswp_wb"); } static long get_zswpout(const char *cgroup) @@ -73,6 +73,24 @@ static int allocate_bytes(const char *cgroup, void *arg) return 0; } +static char *setup_test_group_1M(const char *root, const char *name) +{ + char *group_name = cg_name(root, name); + + if (!group_name) + return NULL; + if (cg_create(group_name)) + goto fail; + if (cg_write(group_name, "memory.max", "1M")) { + cg_destroy(group_name); + goto fail; + } + return group_name; +fail: + free(group_name); + return NULL; +} + /* * Sanity test to check that pages are written into zswap. */ @@ -117,43 +135,51 @@ out: /* * When trying to store a memcg page in zswap, if the memcg hits its memory - * limit in zswap, writeback should not be triggered. - * - * This was fixed with commit 0bdf0efa180a("zswap: do not shrink if cgroup may - * not zswap"). Needs to be revised when a per memcg writeback mechanism is - * implemented. + * limit in zswap, writeback should affect only the zswapped pages of that + * memcg. */ static int test_no_invasive_cgroup_shrink(const char *root) { - size_t written_back_before, written_back_after; int ret = KSFT_FAIL; - char *test_group; + size_t control_allocation_size = MB(10); + char *control_allocation, *wb_group = NULL, *control_group = NULL; /* Set up */ - test_group = cg_name(root, "no_shrink_test"); - if (!test_group) - goto out; - if (cg_create(test_group)) + wb_group = setup_test_group_1M(root, "per_memcg_wb_test1"); + if (!wb_group) + return KSFT_FAIL; + if (cg_write(wb_group, "memory.zswap.max", "10K")) goto out; - if (cg_write(test_group, "memory.max", "1M")) + control_group = setup_test_group_1M(root, "per_memcg_wb_test2"); + if (!control_group) goto out; - if (cg_write(test_group, "memory.zswap.max", "10K")) + + /* Push some test_group2 memory into zswap */ + if (cg_enter_current(control_group)) goto out; - if (get_zswap_written_back_pages(&written_back_before)) + control_allocation = malloc(control_allocation_size); + for (int i = 0; i < control_allocation_size; i += 4095) + control_allocation[i] = 'a'; + if (cg_read_key_long(control_group, "memory.stat", "zswapped") < 1) goto out; - /* Allocate 10x memory.max to push memory into zswap */ - if (cg_run(test_group, allocate_bytes, (void *)MB(10))) + /* Allocate 10x memory.max to push wb_group memory into zswap and trigger wb */ + if (cg_run(wb_group, allocate_bytes, (void *)MB(10))) goto out; - /* Verify that no writeback happened because of the memcg allocation */ - if (get_zswap_written_back_pages(&written_back_after)) - goto out; - if (written_back_after == written_back_before) + /* Verify that only zswapped memory from gwb_group has been written back */ + if (get_cg_wb_count(wb_group) > 0 && get_cg_wb_count(control_group) == 0) ret = KSFT_PASS; out: - cg_destroy(test_group); - free(test_group); + cg_enter_current(root); + if (control_group) { + cg_destroy(control_group); + free(control_group); + } + cg_destroy(wb_group); + free(wb_group); + if (control_allocation) + free(control_allocation); return ret; } -- cgit v1.2.3 From 4b86316ef18231109e4ebb3661ffc69d816fb56f Mon Sep 17 00:00:00 2001 From: Nico Pache Date: Wed, 29 Nov 2023 15:11:40 -0700 Subject: selftests/mm: dont run ksm_functional_tests twice ksm functional test is already being run. Remove the duplicate call to ./ksm_functional_tests. Link: https://lkml.kernel.org/r/20231129221140.614713-1-npache@redhat.com Fixes: 93fb70aa5904 ("selftests/vm: add KSM unmerge tests") Signed-off-by: Nico Pache Acked-by: Joel Savitz Cc: David Hildenbrand Cc: Shuah Khan Signed-off-by: Andrew Morton --- tools/testing/selftests/mm/run_vmtests.sh | 2 -- 1 file changed, 2 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/mm/run_vmtests.sh b/tools/testing/selftests/mm/run_vmtests.sh index 00757445278e..c0212258b852 100755 --- a/tools/testing/selftests/mm/run_vmtests.sh +++ b/tools/testing/selftests/mm/run_vmtests.sh @@ -334,8 +334,6 @@ CATEGORY="ksm_numa" run_test ./ksm_tests -N -m 0 CATEGORY="ksm" run_test ./ksm_functional_tests -run_test ./ksm_functional_tests - # protection_keys tests if [ -x ./protection_keys_32 ] then -- cgit v1.2.3 From 3649caed1c9b7aa57049620c498596c17fc7af9e Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Thu, 30 Nov 2023 02:36:49 +0000 Subject: selftests/damon: test quota goals directory Add DAMON selftests for testing creation/existence of quota goals directories and files, and simple valid input writes. Link: https://lkml.kernel.org/r/20231130023652.50284-7-sj@kernel.org Signed-off-by: SeongJae Park Cc: Brendan Higgins Cc: David Gow Cc: Jonathan Corbet Cc: Shuah Khan Signed-off-by: Andrew Morton --- tools/testing/selftests/damon/sysfs.sh | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/damon/sysfs.sh b/tools/testing/selftests/damon/sysfs.sh index 56f0230a8b92..e9a976d296e2 100755 --- a/tools/testing/selftests/damon/sysfs.sh +++ b/tools/testing/selftests/damon/sysfs.sh @@ -150,6 +150,32 @@ test_weights() ensure_file "$weights_dir/age_permil" "exist" "600" } +test_goal() +{ + goal_dir=$1 + ensure_dir "$goal_dir" "exist" + ensure_file "$goal_dir/target_value" "exist" "600" + ensure_file "$goal_dir/current_value" "exist" "600" +} + +test_goals() +{ + goals_dir=$1 + ensure_dir "$goals_dir" "exist" + ensure_file "$goals_dir/nr_goals" "exist" "600" + + ensure_write_succ "$goals_dir/nr_goals" "1" "valid input" + test_goal "$goals_dir/0" + + ensure_write_succ "$goals_dir/nr_goals" "2" "valid input" + test_goal "$goals_dir/0" + test_goal "$goals_dir/1" + + ensure_write_succ "$goals_dir/nr_goals" "0" "valid input" + ensure_dir "$goals_dir/0" "not_exist" + ensure_dir "$goals_dir/1" "not_exist" +} + test_quotas() { quotas_dir=$1 @@ -158,6 +184,7 @@ test_quotas() ensure_file "$quotas_dir/bytes" "exist" 600 ensure_file "$quotas_dir/reset_interval_ms" "exist" 600 test_weights "$quotas_dir/weights" + test_goals "$quotas_dir/goals" } test_access_pattern() -- cgit v1.2.3 From e1ba7f64b192f083b4423644be03bb9e3dc8ae84 Mon Sep 17 00:00:00 2001 From: YiFei Zhu Date: Tue, 12 Dec 2023 18:29:11 +0000 Subject: selftests/bpf: Relax time_tai test for equal timestamps in tai_forward We're observing test flakiness on an arm64 platform which might not have timestamps as precise as x86. The test log looks like: test_time_tai:PASS:tai_open 0 nsec test_time_tai:PASS:test_run 0 nsec test_time_tai:PASS:tai_ts1 0 nsec test_time_tai:PASS:tai_ts2 0 nsec test_time_tai:FAIL:tai_forward unexpected tai_forward: actual 1702348135471494160 <= expected 1702348135471494160 test_time_tai:PASS:tai_gettime 0 nsec test_time_tai:PASS:tai_future_ts1 0 nsec test_time_tai:PASS:tai_future_ts2 0 nsec test_time_tai:PASS:tai_range_ts1 0 nsec test_time_tai:PASS:tai_range_ts2 0 nsec #199 time_tai:FAIL This patch changes ASSERT_GT to ASSERT_GE in the tai_forward assertion so that equal timestamps are permitted. Fixes: 64e15820b987 ("selftests/bpf: Add BPF-helper test for CLOCK_TAI access") Signed-off-by: YiFei Zhu Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20231212182911.3784108-1-zhuyifei@google.com --- tools/testing/selftests/bpf/prog_tests/time_tai.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/time_tai.c b/tools/testing/selftests/bpf/prog_tests/time_tai.c index a31119823666..f45af1b0ef2c 100644 --- a/tools/testing/selftests/bpf/prog_tests/time_tai.c +++ b/tools/testing/selftests/bpf/prog_tests/time_tai.c @@ -56,7 +56,7 @@ void test_time_tai(void) ASSERT_NEQ(ts2, 0, "tai_ts2"); /* TAI is moving forward only */ - ASSERT_GT(ts2, ts1, "tai_forward"); + ASSERT_GE(ts2, ts1, "tai_forward"); /* Check for future */ ret = clock_gettime(CLOCK_TAI, &now_tai); -- cgit v1.2.3 From a6fcd57cf2df409d35e9225b8dbad6f937b28df0 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Wed, 6 Dec 2023 11:35:58 +0100 Subject: selftests/mm: cow: print ksft header before printing anything else Doing a ksft_print_msg() before the ksft_print_header() seems to confuse the ksft framework in a strange way: running the test on the cmdline results in the expected output. But piping the output somewhere else, results in some odd output, whereby we repeatedly get the same info printed: # [INFO] detected THP size: 2048 KiB # [INFO] detected hugetlb page size: 2048 KiB # [INFO] detected hugetlb page size: 1048576 KiB # [INFO] huge zeropage is enabled TAP version 13 1..190 # [INFO] Anonymous memory tests in private mappings # [RUN] Basic COW after fork() ... with base page # [INFO] detected THP size: 2048 KiB # [INFO] detected hugetlb page size: 2048 KiB # [INFO] detected hugetlb page size: 1048576 KiB # [INFO] huge zeropage is enabled TAP version 13 1..190 # [INFO] Anonymous memory tests in private mappings # [RUN] Basic COW after fork() ... with base page ok 1 No leak from parent into child # [RUN] Basic COW after fork() ... with swapped out base page # [INFO] detected THP size: 2048 KiB # [INFO] detected hugetlb page size: 2048 KiB # [INFO] detected hugetlb page size: 1048576 KiB # [INFO] huge zeropage is enabled Doing the ksft_print_header() first seems to resolve that and gives us the output we expect: TAP version 13 # [INFO] detected THP size: 2048 KiB # [INFO] detected hugetlb page size: 2048 KiB # [INFO] detected hugetlb page size: 1048576 KiB # [INFO] huge zeropage is enabled 1..190 # [INFO] Anonymous memory tests in private mappings # [RUN] Basic COW after fork() ... with base page ok 1 No leak from parent into child # [RUN] Basic COW after fork() ... with swapped out base page ok 2 No leak from parent into child # [RUN] Basic COW after fork() ... with THP ok 3 No leak from parent into child # [RUN] Basic COW after fork() ... with swapped-out THP ok 4 No leak from parent into child # [RUN] Basic COW after fork() ... with PTE-mapped THP ok 5 No leak from parent into child Link: https://lkml.kernel.org/r/20231206103558.38040-1-david@redhat.com Fixes: f4b5fd6946e2 ("selftests/vm: anon_cow: THP tests") Signed-off-by: David Hildenbrand Reported-by: Nico Pache Signed-off-by: Andrew Morton --- tools/testing/selftests/mm/cow.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/mm/cow.c b/tools/testing/selftests/mm/cow.c index 7324ce5363c0..6f2f83990441 100644 --- a/tools/testing/selftests/mm/cow.c +++ b/tools/testing/selftests/mm/cow.c @@ -1680,6 +1680,8 @@ int main(int argc, char **argv) { int err; + ksft_print_header(); + pagesize = getpagesize(); thpsize = read_pmd_pagesize(); if (thpsize) @@ -1689,7 +1691,6 @@ int main(int argc, char **argv) ARRAY_SIZE(hugetlbsizes)); detect_huge_zeropage(); - ksft_print_header(); ksft_set_plan(ARRAY_SIZE(anon_test_cases) * tests_per_anon_test_case() + ARRAY_SIZE(anon_thp_test_cases) * tests_per_anon_thp_test_case() + ARRAY_SIZE(non_anon_test_cases) * tests_per_non_anon_test_case()); -- cgit v1.2.3 From 43e8832fed08438e2a27afed9bac21acd0ceffe5 Mon Sep 17 00:00:00 2001 From: John Hubbard Date: Fri, 8 Dec 2023 18:01:44 -0800 Subject: Revert "selftests: error out if kernel header files are not yet built" This reverts commit 9fc96c7c19df ("selftests: error out if kernel header files are not yet built"). It turns out that requiring the kernel headers to be built as a prerequisite to building selftests, does not work in many cases. For example, Peter Zijlstra writes: "My biggest beef with the whole thing is that I simply do not want to use 'make headers', it doesn't work for me. I have a ton of output directories and I don't care to build tools into the output dirs, in fact some of them flat out refuse to work that way (bpf comes to mind)." [1] Therefore, stop erroring out on the selftests build. Additional patches will be required in order to change over to not requiring the kernel headers. [1] https://lore.kernel.org/20231208221007.GO28727@noisy.programming.kicks-ass.net Link: https://lkml.kernel.org/r/20231209020144.244759-1-jhubbard@nvidia.com Fixes: 9fc96c7c19df ("selftests: error out if kernel header files are not yet built") Signed-off-by: John Hubbard Cc: Anders Roxell Cc: Muhammad Usama Anjum Cc: David Hildenbrand Cc: Peter Xu Cc: Jonathan Corbet Cc: Nathan Chancellor Cc: Shuah Khan Cc: Peter Zijlstra Cc: Marcos Paulo de Souza Cc: Signed-off-by: Andrew Morton --- tools/testing/selftests/Makefile | 21 +-------------------- tools/testing/selftests/lib.mk | 40 +++------------------------------------- 2 files changed, 4 insertions(+), 57 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile index 3b2061d1c1a5..8247a7c69c36 100644 --- a/tools/testing/selftests/Makefile +++ b/tools/testing/selftests/Makefile @@ -155,12 +155,10 @@ ifneq ($(KBUILD_OUTPUT),) abs_objtree := $(realpath $(abs_objtree)) BUILD := $(abs_objtree)/kselftest KHDR_INCLUDES := -isystem ${abs_objtree}/usr/include - KHDR_DIR := ${abs_objtree}/usr/include else BUILD := $(CURDIR) abs_srctree := $(shell cd $(top_srcdir) && pwd) KHDR_INCLUDES := -isystem ${abs_srctree}/usr/include - KHDR_DIR := ${abs_srctree}/usr/include DEFAULT_INSTALL_HDR_PATH := 1 endif @@ -174,7 +172,7 @@ export KHDR_INCLUDES # all isn't the first target in the file. .DEFAULT_GOAL := all -all: kernel_header_files +all: @ret=1; \ for TARGET in $(TARGETS); do \ BUILD_TARGET=$$BUILD/$$TARGET; \ @@ -185,23 +183,6 @@ all: kernel_header_files ret=$$((ret * $$?)); \ done; exit $$ret; -kernel_header_files: - @ls $(KHDR_DIR)/linux/*.h >/dev/null 2>/dev/null; \ - if [ $$? -ne 0 ]; then \ - RED='\033[1;31m'; \ - NOCOLOR='\033[0m'; \ - echo; \ - echo -e "$${RED}error$${NOCOLOR}: missing kernel header files."; \ - echo "Please run this and try again:"; \ - echo; \ - echo " cd $(top_srcdir)"; \ - echo " make headers"; \ - echo; \ - exit 1; \ - fi - -.PHONY: kernel_header_files - run_tests: all @for TARGET in $(TARGETS); do \ BUILD_TARGET=$$BUILD/$$TARGET; \ diff --git a/tools/testing/selftests/lib.mk b/tools/testing/selftests/lib.mk index 118e0964bda9..aa646e0661f3 100644 --- a/tools/testing/selftests/lib.mk +++ b/tools/testing/selftests/lib.mk @@ -44,26 +44,10 @@ endif selfdir = $(realpath $(dir $(filter %/lib.mk,$(MAKEFILE_LIST)))) top_srcdir = $(selfdir)/../../.. -ifeq ("$(origin O)", "command line") - KBUILD_OUTPUT := $(O) +ifeq ($(KHDR_INCLUDES),) +KHDR_INCLUDES := -isystem $(top_srcdir)/usr/include endif -ifneq ($(KBUILD_OUTPUT),) - # Make's built-in functions such as $(abspath ...), $(realpath ...) cannot - # expand a shell special character '~'. We use a somewhat tedious way here. - abs_objtree := $(shell cd $(top_srcdir) && mkdir -p $(KBUILD_OUTPUT) && cd $(KBUILD_OUTPUT) && pwd) - $(if $(abs_objtree),, \ - $(error failed to create output directory "$(KBUILD_OUTPUT)")) - # $(realpath ...) resolves symlinks - abs_objtree := $(realpath $(abs_objtree)) - KHDR_DIR := ${abs_objtree}/usr/include -else - abs_srctree := $(shell cd $(top_srcdir) && pwd) - KHDR_DIR := ${abs_srctree}/usr/include -endif - -KHDR_INCLUDES := -isystem $(KHDR_DIR) - # The following are built by lib.mk common compile rules. # TEST_CUSTOM_PROGS should be used by tests that require # custom build rule and prevent common build rule use. @@ -74,25 +58,7 @@ TEST_GEN_PROGS := $(patsubst %,$(OUTPUT)/%,$(TEST_GEN_PROGS)) TEST_GEN_PROGS_EXTENDED := $(patsubst %,$(OUTPUT)/%,$(TEST_GEN_PROGS_EXTENDED)) TEST_GEN_FILES := $(patsubst %,$(OUTPUT)/%,$(TEST_GEN_FILES)) -all: kernel_header_files $(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED) \ - $(TEST_GEN_FILES) - -kernel_header_files: - @ls $(KHDR_DIR)/linux/*.h >/dev/null 2>/dev/null; \ - if [ $$? -ne 0 ]; then \ - RED='\033[1;31m'; \ - NOCOLOR='\033[0m'; \ - echo; \ - echo -e "$${RED}error$${NOCOLOR}: missing kernel header files."; \ - echo "Please run this and try again:"; \ - echo; \ - echo " cd $(top_srcdir)"; \ - echo " make headers"; \ - echo; \ - exit 1; \ - fi - -.PHONY: kernel_header_files +all: $(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED) $(TEST_GEN_FILES) define RUN_TESTS BASE_DIR="$(selfdir)"; \ -- cgit v1.2.3 From 9dbd5927408c4a0707de73ae9dd9306b184e8fee Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 29 Nov 2023 00:27:44 +1100 Subject: selftests/powerpc: Fix error handling in FPU/VMX preemption tests The FPU & VMX preemption tests do not check for errors returned by the low-level asm routines, preempt_fpu() / preempt_vsx() respectively. That means any register corruption detected by the asm routines does not result in a test failure. Fix it by returning the return value of the asm routines from the pthread child routines. Fixes: e5ab8be68e44 ("selftests/powerpc: Test preservation of FPU and VMX regs across preemption") Signed-off-by: Michael Ellerman Link: https://msgid.link/20231128132748.1990179-1-mpe@ellerman.id.au --- tools/testing/selftests/powerpc/math/fpu_preempt.c | 9 +++++---- tools/testing/selftests/powerpc/math/vmx_preempt.c | 10 ++++++---- 2 files changed, 11 insertions(+), 8 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/powerpc/math/fpu_preempt.c b/tools/testing/selftests/powerpc/math/fpu_preempt.c index 5235bdc8c0b1..3e5b5663d244 100644 --- a/tools/testing/selftests/powerpc/math/fpu_preempt.c +++ b/tools/testing/selftests/powerpc/math/fpu_preempt.c @@ -37,19 +37,20 @@ __thread double darray[] = {0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, int threads_starting; int running; -extern void preempt_fpu(double *darray, int *threads_starting, int *running); +extern int preempt_fpu(double *darray, int *threads_starting, int *running); void *preempt_fpu_c(void *p) { + long rc; int i; + srand(pthread_self()); for (i = 0; i < 21; i++) darray[i] = rand(); - /* Test failed if it ever returns */ - preempt_fpu(darray, &threads_starting, &running); + rc = preempt_fpu(darray, &threads_starting, &running); - return p; + return (void *)rc; } int test_preempt_fpu(void) diff --git a/tools/testing/selftests/powerpc/math/vmx_preempt.c b/tools/testing/selftests/powerpc/math/vmx_preempt.c index 6761d6ce30ec..6f7cf400c687 100644 --- a/tools/testing/selftests/powerpc/math/vmx_preempt.c +++ b/tools/testing/selftests/powerpc/math/vmx_preempt.c @@ -37,19 +37,21 @@ __thread vector int varray[] = {{1, 2, 3, 4}, {5, 6, 7, 8}, {9, 10,11,12}, int threads_starting; int running; -extern void preempt_vmx(vector int *varray, int *threads_starting, int *running); +extern int preempt_vmx(vector int *varray, int *threads_starting, int *running); void *preempt_vmx_c(void *p) { int i, j; + long rc; + srand(pthread_self()); for (i = 0; i < 12; i++) for (j = 0; j < 4; j++) varray[i][j] = rand(); - /* Test fails if it ever returns */ - preempt_vmx(varray, &threads_starting, &running); - return p; + rc = preempt_vmx(varray, &threads_starting, &running); + + return (void *)rc; } int test_preempt_vmx(void) -- cgit v1.2.3 From e5d00aaac651a69b8016d355123438387bfd37be Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 29 Nov 2023 00:27:45 +1100 Subject: selftests/powerpc: Check all FPRs in fpu_preempt There's a selftest that checks FPRs aren't corrupted by preemption, or just process scheduling. However it only checks the non-volatile FPRs, meaning corruption of the volatile FPRs could go undetected. The check_fpu function it calls is used by several other tests, so for now add a new routine to check all the FPRs. Increase the size of the array of FPRs to 32, and initialise them all with random values. Signed-off-by: Michael Ellerman Link: https://msgid.link/20231128132748.1990179-2-mpe@ellerman.id.au --- tools/testing/selftests/powerpc/math/fpu_asm.S | 41 +++++++++++++++++++--- tools/testing/selftests/powerpc/math/fpu_preempt.c | 15 ++++---- 2 files changed, 43 insertions(+), 13 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/powerpc/math/fpu_asm.S b/tools/testing/selftests/powerpc/math/fpu_asm.S index 9dc0c158f871..051392ad3ce7 100644 --- a/tools/testing/selftests/powerpc/math/fpu_asm.S +++ b/tools/testing/selftests/powerpc/math/fpu_asm.S @@ -66,6 +66,40 @@ FUNC_START(check_fpu) li r3,0 # Success!!! 1: blr + +// int check_all_fprs(double darray[32]) +FUNC_START(check_all_fprs) + PUSH_BASIC_STACK(8) + mr r4, r3 // r4 = darray + li r3, 1 // prepare for failure + + stfd f31, STACK_FRAME_LOCAL(0, 0)(sp) // backup f31 + + // Check regs f0-f30, using f31 as scratch + .set i, 0 + .rept 31 + lfd f31, (8 * i)(r4) // load expected value + fcmpu cr0, i, f31 // compare + bne cr0, 1f // bail if mismatch + .set i, i + 1 + .endr + + lfd f31, STACK_FRAME_LOCAL(0, 0)(sp) // reload f31 + stfd f30, STACK_FRAME_LOCAL(0, 0)(sp) // backup f30 + + lfd f30, (8 * 31)(r4) // load expected value of f31 + fcmpu cr0, f30, f31 // compare + bne cr0, 1f // bail if mismatch + + lfd f30, STACK_FRAME_LOCAL(0, 0)(sp) // reload f30 + + // Success + li r3, 0 + +1: POP_BASIC_STACK(8) + blr +FUNC_END(check_all_fprs) + FUNC_START(test_fpu) # r3 holds pointer to where to put the result of fork # r4 holds pointer to the pid @@ -104,8 +138,8 @@ FUNC_START(preempt_fpu) std r4,STACK_FRAME_PARAM(1)(sp) # int *threads_starting std r5,STACK_FRAME_PARAM(2)(sp) # int *running - bl load_fpu - nop + // Load FPRs with expected values + OP_REGS lfd, 8, 0, 31, r3 sync # Atomic DEC @@ -116,8 +150,7 @@ FUNC_START(preempt_fpu) bne- 1b 2: ld r3,STACK_FRAME_PARAM(0)(sp) - bl check_fpu - nop + bl check_all_fprs cmpdi r3,0 bne 3f ld r4,STACK_FRAME_PARAM(2)(sp) diff --git a/tools/testing/selftests/powerpc/math/fpu_preempt.c b/tools/testing/selftests/powerpc/math/fpu_preempt.c index 3e5b5663d244..24b5abacccdc 100644 --- a/tools/testing/selftests/powerpc/math/fpu_preempt.c +++ b/tools/testing/selftests/powerpc/math/fpu_preempt.c @@ -1,13 +1,12 @@ // SPDX-License-Identifier: GPL-2.0-or-later /* * Copyright 2015, Cyril Bur, IBM Corp. + * Copyright 2023, Michael Ellerman, IBM Corp. * * This test attempts to see if the FPU registers change across preemption. - * Two things should be noted here a) The check_fpu function in asm only checks - * the non volatile registers as it is reused from the syscall test b) There is - * no way to be sure preemption happened so this test just uses many threads - * and a long wait. As such, a successful test doesn't mean much but a failure - * is bad. + * There is no way to be sure preemption happened so this test just uses many + * threads and a long wait. As such, a successful test doesn't mean much but + * a failure is bad. */ #include @@ -30,9 +29,7 @@ #define THREAD_FACTOR 8 -__thread double darray[] = {0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, - 1.1, 1.2, 1.3, 1.4, 1.5, 1.6, 1.7, 1.8, 1.9, 2.0, - 2.1}; +__thread double darray[32]; int threads_starting; int running; @@ -45,7 +42,7 @@ void *preempt_fpu_c(void *p) int i; srand(pthread_self()); - for (i = 0; i < 21; i++) + for (i = 0; i < ARRAY_SIZE(darray); i++) darray[i] = rand(); rc = preempt_fpu(darray, &threads_starting, &running); -- cgit v1.2.3 From 2ba107f6795d780bb54b85040a9b2c6a60fccb15 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 29 Nov 2023 00:27:46 +1100 Subject: selftests/powerpc: Generate better bit patterns for FPU tests The fpu_preempt test randomly initialises an array of doubles to try and detect FPU register corruption. However the values it generates do not occupy the full range of values possible in the 64-bit double, meaning some partial register corruption could go undetected. Without getting too carried away, add some better initialisation to generate values that occupy more bits. Sample values before: f0 902677510 (raw 0x41cae6e203000000) f1 325217596 (raw 0x41b3626d3c000000) f2 1856578300 (raw 0x41dbaa48bf000000) f3 1247189984 (raw 0x41d295a6f8000000) And after: f0 1.1078153481413311e-09 (raw 0x3e13083932805cc2) f1 1.0576648474801922e+17 (raw 0x43777c20eb88c261) f2 -6.6245033413594075e-10 (raw 0xbe06c2f989facae9) f3 3.0085988827156291e+18 (raw 0x43c4e0585f2df37b) Signed-off-by: Michael Ellerman Link: https://msgid.link/20231128132748.1990179-3-mpe@ellerman.id.au --- tools/testing/selftests/powerpc/math/fpu.h | 25 ++++++++++++++++++++++ tools/testing/selftests/powerpc/math/fpu_preempt.c | 6 ++---- 2 files changed, 27 insertions(+), 4 deletions(-) create mode 100644 tools/testing/selftests/powerpc/math/fpu.h (limited to 'tools') diff --git a/tools/testing/selftests/powerpc/math/fpu.h b/tools/testing/selftests/powerpc/math/fpu.h new file mode 100644 index 000000000000..a8ad0d42604e --- /dev/null +++ b/tools/testing/selftests/powerpc/math/fpu.h @@ -0,0 +1,25 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright 2023, Michael Ellerman, IBM Corporation. + */ + +#ifndef _SELFTESTS_POWERPC_FPU_H +#define _SELFTESTS_POWERPC_FPU_H + +static inline void randomise_darray(double *darray, int num) +{ + long val; + + for (int i = 0; i < num; i++) { + val = random(); + if (val & 1) + val *= -1; + + if (val & 2) + darray[i] = 1.0 / val; + else + darray[i] = val * val; + } +} + +#endif /* _SELFTESTS_POWERPC_FPU_H */ diff --git a/tools/testing/selftests/powerpc/math/fpu_preempt.c b/tools/testing/selftests/powerpc/math/fpu_preempt.c index 24b5abacccdc..97dff3690136 100644 --- a/tools/testing/selftests/powerpc/math/fpu_preempt.c +++ b/tools/testing/selftests/powerpc/math/fpu_preempt.c @@ -19,6 +19,7 @@ #include #include "utils.h" +#include "fpu.h" /* Time to wait for workers to get preempted (seconds) */ #define PREEMPT_TIME 20 @@ -39,12 +40,9 @@ extern int preempt_fpu(double *darray, int *threads_starting, int *running); void *preempt_fpu_c(void *p) { long rc; - int i; srand(pthread_self()); - for (i = 0; i < ARRAY_SIZE(darray); i++) - darray[i] = rand(); - + randomise_darray(darray, ARRAY_SIZE(darray)); rc = preempt_fpu(darray, &threads_starting, &running); return (void *)rc; -- cgit v1.2.3 From 60d2c3af9a0c04dc2d34a62e9f5e7033b40dfed8 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 29 Nov 2023 00:27:47 +1100 Subject: selftests/powerpc: Run fpu_preempt test for 60 seconds The FPU preempt test only runs for 20 seconds, which is not particularly long. Run it for 60 seconds to increase the chance of detecting corruption. Signed-off-by: Michael Ellerman Link: https://msgid.link/20231128132748.1990179-4-mpe@ellerman.id.au --- tools/testing/selftests/powerpc/math/fpu_preempt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/powerpc/math/fpu_preempt.c b/tools/testing/selftests/powerpc/math/fpu_preempt.c index 97dff3690136..9ddede0770ed 100644 --- a/tools/testing/selftests/powerpc/math/fpu_preempt.c +++ b/tools/testing/selftests/powerpc/math/fpu_preempt.c @@ -22,7 +22,7 @@ #include "fpu.h" /* Time to wait for workers to get preempted (seconds) */ -#define PREEMPT_TIME 20 +#define PREEMPT_TIME 60 /* * Factor by which to multiply number of online CPUs for total number of * worker threads -- cgit v1.2.3 From 1bdf22580b794a498b17617bcd7ae417d6b78444 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 29 Nov 2023 00:27:48 +1100 Subject: selftests/powerpc: Check all FPRs in fpu_syscall test There is a selftest that checks if FPRs are corrupted across a fork, aka clone. It was added as part of the series that optimised the clone path to save the parent's FP state without "giving up" (turning off FP). See commit 8792468da5e1 ("powerpc: Add the ability to save FPU without giving it up"). The test encodes the assumption that FPRs 0-13 are volatile across the syscall, by only checking the volatile FPRs are not changed by the fork. There was also a comment in the fpu_preempt test alluding to that: The check_fpu function in asm only checks the non volatile registers as it is reused from the syscall test It is true that the function call ABI treats f0-f13 as volatile, however the syscall ABI has since been documented as *not* treating those registers as volatile. See commit 7b8845a2a2ec ("powerpc/64: Document the syscall ABI"). So change the test to check all FPRs are not corrupted by the syscall. Note that this currently fails, because save_fpu() etc. do not restore f0/vsr0. Signed-off-by: Michael Ellerman Link: https://msgid.link/20231128132748.1990179-5-mpe@ellerman.id.au --- tools/testing/selftests/powerpc/math/fpu_asm.S | 7 ++++--- tools/testing/selftests/powerpc/math/fpu_syscall.c | 8 +++++--- 2 files changed, 9 insertions(+), 6 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/powerpc/math/fpu_asm.S b/tools/testing/selftests/powerpc/math/fpu_asm.S index 051392ad3ce7..efe1e1be4695 100644 --- a/tools/testing/selftests/powerpc/math/fpu_asm.S +++ b/tools/testing/selftests/powerpc/math/fpu_asm.S @@ -109,8 +109,9 @@ FUNC_START(test_fpu) std r3,STACK_FRAME_PARAM(0)(sp) # Address of darray std r4,STACK_FRAME_PARAM(1)(sp) # Address of pid - bl load_fpu - nop + // Load FPRs with expected values + OP_REGS lfd, 8, 0, 31, r3 + li r0,__NR_fork sc @@ -119,7 +120,7 @@ FUNC_START(test_fpu) std r3,0(r9) ld r3,STACK_FRAME_PARAM(0)(sp) - bl check_fpu + bl check_all_fprs nop POP_FPU(256) diff --git a/tools/testing/selftests/powerpc/math/fpu_syscall.c b/tools/testing/selftests/powerpc/math/fpu_syscall.c index 694f225c7e45..751d46b133fc 100644 --- a/tools/testing/selftests/powerpc/math/fpu_syscall.c +++ b/tools/testing/selftests/powerpc/math/fpu_syscall.c @@ -14,12 +14,11 @@ #include #include "utils.h" +#include "fpu.h" extern int test_fpu(double *darray, pid_t *pid); -double darray[] = {0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, - 1.1, 1.2, 1.3, 1.4, 1.5, 1.6, 1.7, 1.8, 1.9, 2.0, - 2.1}; +double darray[32]; int syscall_fpu(void) { @@ -27,6 +26,9 @@ int syscall_fpu(void) int i; int ret; int child_ret; + + randomise_darray(darray, ARRAY_SIZE(darray)); + for (i = 0; i < 1000; i++) { /* test_fpu will fork() */ ret = test_fpu(darray, &fork_pid); -- cgit v1.2.3 From 3279f526952f82b2967663c36c12a01f125cbbfd Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Thu, 7 Dec 2023 14:40:13 +0100 Subject: KVM: riscv: selftests: Fix get-reg-list print_reg defaults print_reg() will print everything it knows when it encounters a register ID it's unfamiliar with in the default cases of its decoding switches. Fix several issues with these (until now, never tested) paths; missing newlines in printfs, missing complement operator in mask, and missing return in order to avoid continuing to decode. Fixes: 62d0c458f828 ("KVM: riscv: selftests: get-reg-list print_reg should never fail") Signed-off-by: Andrew Jones Reviewed-by: Haibo Xu Signed-off-by: Anup Patel --- tools/testing/selftests/kvm/riscv/get-reg-list.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/kvm/riscv/get-reg-list.c b/tools/testing/selftests/kvm/riscv/get-reg-list.c index 6bedaea95395..25de4b8bc347 100644 --- a/tools/testing/selftests/kvm/riscv/get-reg-list.c +++ b/tools/testing/selftests/kvm/riscv/get-reg-list.c @@ -458,8 +458,9 @@ void print_reg(const char *prefix, __u64 id) reg_size = "KVM_REG_SIZE_U128"; break; default: - printf("\tKVM_REG_RISCV | (%lld << KVM_REG_SIZE_SHIFT) | 0x%llx /* UNKNOWN */,", - (id & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT, id & REG_MASK); + printf("\tKVM_REG_RISCV | (%lld << KVM_REG_SIZE_SHIFT) | 0x%llx /* UNKNOWN */,\n", + (id & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT, id & ~REG_MASK); + return; } switch (id & KVM_REG_RISCV_TYPE_MASK) { @@ -496,8 +497,9 @@ void print_reg(const char *prefix, __u64 id) reg_size, sbi_ext_id_to_str(prefix, id)); break; default: - printf("\tKVM_REG_RISCV | %s | 0x%llx /* UNKNOWN */,", - reg_size, id & REG_MASK); + printf("\tKVM_REG_RISCV | %s | 0x%llx /* UNKNOWN */,\n", + reg_size, id & ~REG_MASK); + return; } } -- cgit v1.2.3 From 9118c5d32bddb5f75bc4f9f31218e70317702502 Mon Sep 17 00:00:00 2001 From: Nathan Lynch Date: Tue, 12 Dec 2023 11:01:59 -0600 Subject: powerpc/selftests: Add test for papr-vpd Add selftests for /dev/papr-vpd, exercising the common expected use cases: * Retrieve all VPD by passing an empty location code. * Retrieve the "system VPD" by passing a location code derived from DT root node properties, as done by the vpdupdate command. The tests also verify that certain intended properties of the driver hold: * Passing an unterminated location code to PAPR_VPD_CREATE_HANDLE gets EINVAL. * Passing a NULL location code pointer to PAPR_VPD_CREATE_HANDLE gets EFAULT. * Closing the device node without first issuing a PAPR_VPD_CREATE_HANDLE command to it succeeds. * Releasing a handle without first consuming any data from it succeeds. * Re-reading the contents of a handle returns the same data as the first time. Some minimal validation of the returned data is performed. The tests are skipped on systems where the papr-vpd driver does not initialize, making this useful only on PowerVM LPARs at this point. Signed-off-by: Nathan Lynch Signed-off-by: Michael Ellerman Link: https://msgid.link/20231212-papr-sys_rtas-vs-lockdown-v6-12-e9eafd0c8c6c@linux.ibm.com --- tools/testing/selftests/powerpc/Makefile | 1 + .../testing/selftests/powerpc/papr_vpd/.gitignore | 1 + tools/testing/selftests/powerpc/papr_vpd/Makefile | 12 + .../testing/selftests/powerpc/papr_vpd/papr_vpd.c | 352 +++++++++++++++++++++ 4 files changed, 366 insertions(+) create mode 100644 tools/testing/selftests/powerpc/papr_vpd/.gitignore create mode 100644 tools/testing/selftests/powerpc/papr_vpd/Makefile create mode 100644 tools/testing/selftests/powerpc/papr_vpd/papr_vpd.c (limited to 'tools') diff --git a/tools/testing/selftests/powerpc/Makefile b/tools/testing/selftests/powerpc/Makefile index 7ea42fa02eab..05fc68d446c2 100644 --- a/tools/testing/selftests/powerpc/Makefile +++ b/tools/testing/selftests/powerpc/Makefile @@ -32,6 +32,7 @@ SUB_DIRS = alignment \ vphn \ math \ papr_attributes \ + papr_vpd \ ptrace \ security \ mce diff --git a/tools/testing/selftests/powerpc/papr_vpd/.gitignore b/tools/testing/selftests/powerpc/papr_vpd/.gitignore new file mode 100644 index 000000000000..49285031a656 --- /dev/null +++ b/tools/testing/selftests/powerpc/papr_vpd/.gitignore @@ -0,0 +1 @@ +/papr_vpd diff --git a/tools/testing/selftests/powerpc/papr_vpd/Makefile b/tools/testing/selftests/powerpc/papr_vpd/Makefile new file mode 100644 index 000000000000..06b719703bfd --- /dev/null +++ b/tools/testing/selftests/powerpc/papr_vpd/Makefile @@ -0,0 +1,12 @@ +# SPDX-License-Identifier: GPL-2.0 +noarg: + $(MAKE) -C ../ + +TEST_GEN_PROGS := papr_vpd + +top_srcdir = ../../../../.. +include ../../lib.mk + +$(TEST_GEN_PROGS): ../harness.c ../utils.c + +$(OUTPUT)/papr_vpd: CFLAGS += $(KHDR_INCLUDES) diff --git a/tools/testing/selftests/powerpc/papr_vpd/papr_vpd.c b/tools/testing/selftests/powerpc/papr_vpd/papr_vpd.c new file mode 100644 index 000000000000..98cbb9109ee6 --- /dev/null +++ b/tools/testing/selftests/powerpc/papr_vpd/papr_vpd.c @@ -0,0 +1,352 @@ +// SPDX-License-Identifier: GPL-2.0-only +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include + +#include + +#include "utils.h" + +#define DEVPATH "/dev/papr-vpd" + +static int dev_papr_vpd_open_close(void) +{ + const int devfd = open(DEVPATH, O_RDONLY); + + SKIP_IF_MSG(devfd < 0 && errno == ENOENT, + DEVPATH " not present"); + + FAIL_IF(devfd < 0); + FAIL_IF(close(devfd) != 0); + + return 0; +} + +static int dev_papr_vpd_get_handle_all(void) +{ + const int devfd = open(DEVPATH, O_RDONLY); + struct papr_location_code lc = { .str = "", }; + off_t size; + int fd; + + SKIP_IF_MSG(devfd < 0 && errno == ENOENT, + DEVPATH " not present"); + + FAIL_IF(devfd < 0); + + errno = 0; + fd = ioctl(devfd, PAPR_VPD_IOC_CREATE_HANDLE, &lc); + FAIL_IF(errno != 0); + FAIL_IF(fd < 0); + + FAIL_IF(close(devfd) != 0); + + size = lseek(fd, 0, SEEK_END); + FAIL_IF(size <= 0); + + void *buf = malloc((size_t)size); + FAIL_IF(!buf); + + ssize_t consumed = pread(fd, buf, size, 0); + FAIL_IF(consumed != size); + + /* Ensure EOF */ + FAIL_IF(read(fd, buf, size) != 0); + FAIL_IF(close(fd)); + + /* Verify that the buffer looks like VPD */ + static const char needle[] = "System VPD"; + FAIL_IF(!memmem(buf, size, needle, strlen(needle))); + + return 0; +} + +static int dev_papr_vpd_get_handle_byte_at_a_time(void) +{ + const int devfd = open(DEVPATH, O_RDONLY); + struct papr_location_code lc = { .str = "", }; + int fd; + + SKIP_IF_MSG(devfd < 0 && errno == ENOENT, + DEVPATH " not present"); + + FAIL_IF(devfd < 0); + + errno = 0; + fd = ioctl(devfd, PAPR_VPD_IOC_CREATE_HANDLE, &lc); + FAIL_IF(errno != 0); + FAIL_IF(fd < 0); + + FAIL_IF(close(devfd) != 0); + + size_t consumed = 0; + while (1) { + ssize_t res; + char c; + + errno = 0; + res = read(fd, &c, sizeof(c)); + FAIL_IF(res > sizeof(c)); + FAIL_IF(res < 0); + FAIL_IF(errno != 0); + consumed += res; + if (res == 0) + break; + } + + FAIL_IF(consumed != lseek(fd, 0, SEEK_END)); + + FAIL_IF(close(fd)); + + return 0; +} + + +static int dev_papr_vpd_unterm_loc_code(void) +{ + const int devfd = open(DEVPATH, O_RDONLY); + struct papr_location_code lc = {}; + int fd; + + SKIP_IF_MSG(devfd < 0 && errno == ENOENT, + DEVPATH " not present"); + + FAIL_IF(devfd < 0); + + /* + * Place a non-null byte in every element of loc_code; the + * driver should reject this input. + */ + memset(lc.str, 'x', ARRAY_SIZE(lc.str)); + + errno = 0; + fd = ioctl(devfd, PAPR_VPD_IOC_CREATE_HANDLE, &lc); + FAIL_IF(fd != -1); + FAIL_IF(errno != EINVAL); + + FAIL_IF(close(devfd) != 0); + return 0; +} + +static int dev_papr_vpd_null_handle(void) +{ + const int devfd = open(DEVPATH, O_RDONLY); + int rc; + + SKIP_IF_MSG(devfd < 0 && errno == ENOENT, + DEVPATH " not present"); + + FAIL_IF(devfd < 0); + + errno = 0; + rc = ioctl(devfd, PAPR_VPD_IOC_CREATE_HANDLE, NULL); + FAIL_IF(rc != -1); + FAIL_IF(errno != EFAULT); + + FAIL_IF(close(devfd) != 0); + return 0; +} + +static int papr_vpd_close_handle_without_reading(void) +{ + const int devfd = open(DEVPATH, O_RDONLY); + struct papr_location_code lc; + int fd; + + SKIP_IF_MSG(devfd < 0 && errno == ENOENT, + DEVPATH " not present"); + + FAIL_IF(devfd < 0); + + errno = 0; + fd = ioctl(devfd, PAPR_VPD_IOC_CREATE_HANDLE, &lc); + FAIL_IF(errno != 0); + FAIL_IF(fd < 0); + + /* close the handle without reading it */ + FAIL_IF(close(fd) != 0); + + FAIL_IF(close(devfd) != 0); + return 0; +} + +static int papr_vpd_reread(void) +{ + const int devfd = open(DEVPATH, O_RDONLY); + struct papr_location_code lc = { .str = "", }; + int fd; + + SKIP_IF_MSG(devfd < 0 && errno == ENOENT, + DEVPATH " not present"); + + FAIL_IF(devfd < 0); + + errno = 0; + fd = ioctl(devfd, PAPR_VPD_IOC_CREATE_HANDLE, &lc); + FAIL_IF(errno != 0); + FAIL_IF(fd < 0); + + FAIL_IF(close(devfd) != 0); + + const off_t size = lseek(fd, 0, SEEK_END); + FAIL_IF(size <= 0); + + char *bufs[2]; + + for (size_t i = 0; i < ARRAY_SIZE(bufs); ++i) { + bufs[i] = malloc(size); + FAIL_IF(!bufs[i]); + ssize_t consumed = pread(fd, bufs[i], size, 0); + FAIL_IF(consumed != size); + } + + FAIL_IF(memcmp(bufs[0], bufs[1], size)); + + FAIL_IF(close(fd) != 0); + + return 0; +} + +static int get_system_loc_code(struct papr_location_code *lc) +{ + static const char system_id_path[] = "/sys/firmware/devicetree/base/system-id"; + static const char model_path[] = "/sys/firmware/devicetree/base/model"; + char *system_id; + char *model; + int err = -1; + + if (read_file_alloc(model_path, &model, NULL)) + return err; + + if (read_file_alloc(system_id_path, &system_id, NULL)) + goto free_model; + + char *mtm; + int sscanf_ret = sscanf(model, "IBM,%ms", &mtm); + if (sscanf_ret != 1) + goto free_system_id; + + char *plant_and_seq; + if (sscanf(system_id, "IBM,%*c%*c%ms", &plant_and_seq) != 1) + goto free_mtm; + /* + * Replace - with . to build location code. + */ + char *sep = strchr(mtm, '-'); + if (!sep) + goto free_mtm; + else + *sep = '.'; + + snprintf(lc->str, sizeof(lc->str), + "U%s.%s", mtm, plant_and_seq); + err = 0; + + free(plant_and_seq); +free_mtm: + free(mtm); +free_system_id: + free(system_id); +free_model: + free(model); + return err; +} + +static int papr_vpd_system_loc_code(void) +{ + struct papr_location_code lc; + const int devfd = open(DEVPATH, O_RDONLY); + off_t size; + int fd; + + SKIP_IF_MSG(get_system_loc_code(&lc), + "Cannot determine system location code"); + SKIP_IF_MSG(devfd < 0 && errno == ENOENT, + DEVPATH " not present"); + + FAIL_IF(devfd < 0); + + errno = 0; + fd = ioctl(devfd, PAPR_VPD_IOC_CREATE_HANDLE, &lc); + FAIL_IF(errno != 0); + FAIL_IF(fd < 0); + + FAIL_IF(close(devfd) != 0); + + size = lseek(fd, 0, SEEK_END); + FAIL_IF(size <= 0); + + void *buf = malloc((size_t)size); + FAIL_IF(!buf); + + ssize_t consumed = pread(fd, buf, size, 0); + FAIL_IF(consumed != size); + + /* Ensure EOF */ + FAIL_IF(read(fd, buf, size) != 0); + FAIL_IF(close(fd)); + + /* Verify that the buffer looks like VPD */ + static const char needle[] = "System VPD"; + FAIL_IF(!memmem(buf, size, needle, strlen(needle))); + + return 0; +} + +struct vpd_test { + int (*function)(void); + const char *description; +}; + +static const struct vpd_test vpd_tests[] = { + { + .function = dev_papr_vpd_open_close, + .description = "open/close " DEVPATH, + }, + { + .function = dev_papr_vpd_unterm_loc_code, + .description = "ensure EINVAL on unterminated location code", + }, + { + .function = dev_papr_vpd_null_handle, + .description = "ensure EFAULT on bad handle addr", + }, + { + .function = dev_papr_vpd_get_handle_all, + .description = "get handle for all VPD" + }, + { + .function = papr_vpd_close_handle_without_reading, + .description = "close handle without consuming VPD" + }, + { + .function = dev_papr_vpd_get_handle_byte_at_a_time, + .description = "read all VPD one byte at a time" + }, + { + .function = papr_vpd_reread, + .description = "ensure re-read yields same results" + }, + { + .function = papr_vpd_system_loc_code, + .description = "get handle for system VPD" + }, +}; + +int main(void) +{ + size_t fails = 0; + + for (size_t i = 0; i < ARRAY_SIZE(vpd_tests); ++i) { + const struct vpd_test *t = &vpd_tests[i]; + + if (test_harness(t->function, t->description)) + ++fails; + } + + return fails == 0 ? EXIT_SUCCESS : EXIT_FAILURE; +} -- cgit v1.2.3 From 76b2ec3faeaa0c8d84705acd64ac0e5a307ce9c2 Mon Sep 17 00:00:00 2001 From: Nathan Lynch Date: Tue, 12 Dec 2023 11:02:00 -0600 Subject: powerpc/selftests: Add test for papr-sysparm Consistently testing system parameter access is a bit difficult by nature -- the set of parameters available depends on the model and system configuration, and updating a parameter should be considered a destructive operation reserved for the admin. So we validate some of the error paths and retrieve the SPLPAR characteristics string, but not much else. Signed-off-by: Nathan Lynch Signed-off-by: Michael Ellerman Link: https://msgid.link/20231212-papr-sys_rtas-vs-lockdown-v6-13-e9eafd0c8c6c@linux.ibm.com --- tools/testing/selftests/powerpc/Makefile | 1 + .../selftests/powerpc/papr_sysparm/.gitignore | 1 + .../selftests/powerpc/papr_sysparm/Makefile | 12 ++ .../selftests/powerpc/papr_sysparm/papr_sysparm.c | 196 +++++++++++++++++++++ 4 files changed, 210 insertions(+) create mode 100644 tools/testing/selftests/powerpc/papr_sysparm/.gitignore create mode 100644 tools/testing/selftests/powerpc/papr_sysparm/Makefile create mode 100644 tools/testing/selftests/powerpc/papr_sysparm/papr_sysparm.c (limited to 'tools') diff --git a/tools/testing/selftests/powerpc/Makefile b/tools/testing/selftests/powerpc/Makefile index 05fc68d446c2..c376151982c4 100644 --- a/tools/testing/selftests/powerpc/Makefile +++ b/tools/testing/selftests/powerpc/Makefile @@ -33,6 +33,7 @@ SUB_DIRS = alignment \ math \ papr_attributes \ papr_vpd \ + papr_sysparm \ ptrace \ security \ mce diff --git a/tools/testing/selftests/powerpc/papr_sysparm/.gitignore b/tools/testing/selftests/powerpc/papr_sysparm/.gitignore new file mode 100644 index 000000000000..f2a69bf59d40 --- /dev/null +++ b/tools/testing/selftests/powerpc/papr_sysparm/.gitignore @@ -0,0 +1 @@ +/papr_sysparm diff --git a/tools/testing/selftests/powerpc/papr_sysparm/Makefile b/tools/testing/selftests/powerpc/papr_sysparm/Makefile new file mode 100644 index 000000000000..7f79e437634a --- /dev/null +++ b/tools/testing/selftests/powerpc/papr_sysparm/Makefile @@ -0,0 +1,12 @@ +# SPDX-License-Identifier: GPL-2.0 +noarg: + $(MAKE) -C ../ + +TEST_GEN_PROGS := papr_sysparm + +top_srcdir = ../../../../.. +include ../../lib.mk + +$(TEST_GEN_PROGS): ../harness.c ../utils.c + +$(OUTPUT)/papr_sysparm: CFLAGS += $(KHDR_INCLUDES) diff --git a/tools/testing/selftests/powerpc/papr_sysparm/papr_sysparm.c b/tools/testing/selftests/powerpc/papr_sysparm/papr_sysparm.c new file mode 100644 index 000000000000..d5436de5b8ed --- /dev/null +++ b/tools/testing/selftests/powerpc/papr_sysparm/papr_sysparm.c @@ -0,0 +1,196 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include +#include +#include +#include +#include +#include + +#include "utils.h" + +#define DEVPATH "/dev/papr-sysparm" + +static int open_close(void) +{ + const int devfd = open(DEVPATH, O_RDONLY); + + SKIP_IF_MSG(devfd < 0 && errno == ENOENT, + DEVPATH " not present"); + + FAIL_IF(devfd < 0); + FAIL_IF(close(devfd) != 0); + + return 0; +} + +static int get_splpar(void) +{ + struct papr_sysparm_io_block sp = { + .parameter = 20, // SPLPAR characteristics + }; + const int devfd = open(DEVPATH, O_RDONLY); + + SKIP_IF_MSG(devfd < 0 && errno == ENOENT, + DEVPATH " not present"); + + FAIL_IF(devfd < 0); + FAIL_IF(ioctl(devfd, PAPR_SYSPARM_IOC_GET, &sp) != 0); + FAIL_IF(sp.length == 0); + FAIL_IF(sp.length > sizeof(sp.data)); + FAIL_IF(close(devfd) != 0); + + return 0; +} + +static int get_bad_parameter(void) +{ + struct papr_sysparm_io_block sp = { + .parameter = UINT32_MAX, // there are only ~60 specified parameters + }; + const int devfd = open(DEVPATH, O_RDONLY); + + SKIP_IF_MSG(devfd < 0 && errno == ENOENT, + DEVPATH " not present"); + + FAIL_IF(devfd < 0); + + // Ensure expected error + FAIL_IF(ioctl(devfd, PAPR_SYSPARM_IOC_GET, &sp) != -1); + FAIL_IF(errno != EOPNOTSUPP); + + // Ensure the buffer is unchanged + FAIL_IF(sp.length != 0); + for (size_t i = 0; i < ARRAY_SIZE(sp.data); ++i) + FAIL_IF(sp.data[i] != 0); + + FAIL_IF(close(devfd) != 0); + + return 0; +} + +static int check_efault_common(unsigned long cmd) +{ + const int devfd = open(DEVPATH, O_RDWR); + + SKIP_IF_MSG(devfd < 0 && errno == ENOENT, + DEVPATH " not present"); + + FAIL_IF(devfd < 0); + + // Ensure expected error + FAIL_IF(ioctl(devfd, cmd, NULL) != -1); + FAIL_IF(errno != EFAULT); + + FAIL_IF(close(devfd) != 0); + + return 0; +} + +static int check_efault_get(void) +{ + return check_efault_common(PAPR_SYSPARM_IOC_GET); +} + +static int check_efault_set(void) +{ + return check_efault_common(PAPR_SYSPARM_IOC_SET); +} + +static int set_hmc0(void) +{ + struct papr_sysparm_io_block sp = { + .parameter = 0, // HMC0, not a settable parameter + }; + const int devfd = open(DEVPATH, O_RDWR); + + SKIP_IF_MSG(devfd < 0 && errno == ENOENT, + DEVPATH " not present"); + + FAIL_IF(devfd < 0); + + // Ensure expected error + FAIL_IF(ioctl(devfd, PAPR_SYSPARM_IOC_SET, &sp) != -1); + SKIP_IF_MSG(errno == EOPNOTSUPP, "operation not supported"); + FAIL_IF(errno != EPERM); + + FAIL_IF(close(devfd) != 0); + + return 0; +} + +static int set_with_ro_fd(void) +{ + struct papr_sysparm_io_block sp = { + .parameter = 0, // HMC0, not a settable parameter. + }; + const int devfd = open(DEVPATH, O_RDONLY); + + SKIP_IF_MSG(devfd < 0 && errno == ENOENT, + DEVPATH " not present"); + + FAIL_IF(devfd < 0); + + // Ensure expected error + FAIL_IF(ioctl(devfd, PAPR_SYSPARM_IOC_SET, &sp) != -1); + SKIP_IF_MSG(errno == EOPNOTSUPP, "operation not supported"); + + // HMC0 isn't a settable parameter and we would normally + // expect to get EPERM on attempts to modify it. However, when + // the file is open read-only, we expect the driver to prevent + // the attempt with a distinct error. + FAIL_IF(errno != EBADF); + + FAIL_IF(close(devfd) != 0); + + return 0; +} + +struct sysparm_test { + int (*function)(void); + const char *description; +}; + +static const struct sysparm_test sysparm_tests[] = { + { + .function = open_close, + .description = "open and close " DEVPATH " without issuing commands", + }, + { + .function = get_splpar, + .description = "retrieve SPLPAR characteristics", + }, + { + .function = get_bad_parameter, + .description = "verify EOPNOTSUPP for known-bad parameter", + }, + { + .function = check_efault_get, + .description = "PAPR_SYSPARM_IOC_GET returns EFAULT on bad address", + }, + { + .function = check_efault_set, + .description = "PAPR_SYSPARM_IOC_SET returns EFAULT on bad address", + }, + { + .function = set_hmc0, + .description = "ensure EPERM on attempt to update HMC0", + }, + { + .function = set_with_ro_fd, + .description = "PAPR_IOC_SYSPARM_SET returns EACCESS on read-only fd", + }, +}; + +int main(void) +{ + size_t fails = 0; + + for (size_t i = 0; i < ARRAY_SIZE(sysparm_tests); ++i) { + const struct sysparm_test *t = &sysparm_tests[i]; + + if (test_harness(t->function, t->description)) + ++fails; + } + + return fails == 0 ? EXIT_SUCCESS : EXIT_FAILURE; +} -- cgit v1.2.3 From e39120ab8a04e5eb304cee3cfb42d628eb1f0d48 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 13 Dec 2023 07:21:35 -0500 Subject: KVM: selftests: Fix dynamic generation of configuration names When we dynamically generate a name for a configuration in get-reg-list we use strcat() to append to a buffer allocated using malloc() but we never initialise that buffer. Since malloc() offers no guarantees regarding the contents of the memory it returns this can lead to us corrupting, and likely overflowing, the buffer: vregs: PASS vregs+pmu: PASS sve: PASS sve+pmu: PASS vregs+pauth_address+pauth_generic: PASS X?vr+gspauth_addre+spauth_generi+pmu: PASS The bug is that strcat() should have been strcpy(), and that replacement would be enough to fix it, but there are other things in the function that leave something to be desired. In particular, an (incorrectly) empty config would cause an out of bounds access to c->name[-1]. Since the strcpy() call relies on c->name[0..len-1] being initialized, enforce that invariant throughout the function. Fixes: 2f9ace5d4557 ("KVM: arm64: selftests: get-reg-list: Introduce vcpu configs") Reviewed-by: Andrew Jones Co-developed-by: Mark Brown Signed-off-by: Mark Brown Message-Id: <20231211-kvm-get-reg-list-str-init-v3-1-6554c71c77b1@kernel.org> Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/get-reg-list.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/kvm/get-reg-list.c b/tools/testing/selftests/kvm/get-reg-list.c index be7bf5224434..8274ef04301f 100644 --- a/tools/testing/selftests/kvm/get-reg-list.c +++ b/tools/testing/selftests/kvm/get-reg-list.c @@ -71,11 +71,12 @@ static const char *config_name(struct vcpu_reg_list *c) for_each_sublist(c, s) { if (!strcmp(s->name, "base")) continue; - strcat(c->name + len, s->name); - len += strlen(s->name) + 1; - c->name[len - 1] = '+'; + if (len) + c->name[len++] = '+'; + strcpy(c->name + len, s->name); + len += strlen(s->name); } - c->name[len - 1] = '\0'; + c->name[len] = '\0'; return c->name; } -- cgit v1.2.3 From 813900d19b923fc1b241c1ce292472f68066092b Mon Sep 17 00:00:00 2001 From: Yicong Yang Date: Thu, 7 Dec 2023 16:16:34 +0800 Subject: perf header: Fix one memory leakage in perf_event__fprintf_event_update() When dump the raw trace by `perf report -D` ASan reports a memory leakage in perf_event__fprintf_event_update(). It shows that we allocated a temporary cpumap for dumping the CPUs but doesn't release it and it's not used elsewhere. Fix this by free the cpumap after the dumping. Fixes: c853f9394b7bc189 ("perf tools: Add perf_event__fprintf_event_update function") Reviewed-by: Ian Rogers Signed-off-by: Yicong Yang Acked-by: Namhyung Kim Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Ingo Molnar Cc: Jiri Olsa Cc: Jonathan Cameron Cc: Junhao He Cc: Mark Rutland Cc: Peter Zijlstra Cc: linuxarm@huawei.com Link: https://lore.kernel.org/r/20231207081635.8427-2-yangyicong@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/header.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 08cc2febabde..a9f71f8343f0 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -4391,9 +4391,10 @@ size_t perf_event__fprintf_event_update(union perf_event *event, FILE *fp) ret += fprintf(fp, "... "); map = cpu_map__new_data(&ev->cpus.cpus); - if (map) + if (map) { ret += cpu_map__fprintf(map, fp); - else + perf_cpu_map__put(map); + } else ret += fprintf(fp, "failed to get cpus\n"); break; default: -- cgit v1.2.3 From 1bc479d665bc25a9a4e8168d5b400a47491511f9 Mon Sep 17 00:00:00 2001 From: Yicong Yang Date: Thu, 7 Dec 2023 16:16:35 +0800 Subject: perf hisi-ptt: Fix one memory leakage in hisi_ptt_process_auxtrace_event() ASan complains a memory leakage in hisi_ptt_process_auxtrace_event() that the data buffer is not freed. Since currently we only support the raw dump trace mode, the data buffer is used only within this function. So fix this by freeing the data buffer before going out. Fixes: 5e91e57e68090c0e ("perf auxtrace arm64: Add support for parsing HiSilicon PCIe Trace packet") Reviewed-by: Ian Rogers Signed-off-by: Yicong Yang Acked-by: Namhyung Kim Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Ingo Molnar Cc: Jiri Olsa Cc: Jonathan Cameron Cc: Junhao He Cc: Mark Rutland Cc: Peter Zijlstra Cc: Qi Liu Link: https://lore.kernel.org/r/20231207081635.8427-3-yangyicong@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/hisi-ptt.c | 1 + 1 file changed, 1 insertion(+) (limited to 'tools') diff --git a/tools/perf/util/hisi-ptt.c b/tools/perf/util/hisi-ptt.c index 43bd1ca62d58..52d0ce302ca0 100644 --- a/tools/perf/util/hisi-ptt.c +++ b/tools/perf/util/hisi-ptt.c @@ -123,6 +123,7 @@ static int hisi_ptt_process_auxtrace_event(struct perf_session *session, if (dump_trace) hisi_ptt_dump_event(ptt, data, size); + free(data); return 0; } -- cgit v1.2.3 From 62d9a969f4a95219c757831e9ad66cd4dd9edee5 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Tue, 12 Dec 2023 14:53:43 -0800 Subject: selftests/bpf: fix compiler warnings in RELEASE=1 mode When compiling BPF selftests with RELEASE=1, we get two new warnings, which are treated as errors. Fix them. Signed-off-by: Andrii Nakryiko Acked-by: Yonghong Song Acked-by: John Fastabend Link: https://lore.kernel.org/r/20231212225343.1723081-1-andrii@kernel.org Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/veristat.c | 2 +- tools/testing/selftests/bpf/xdp_hw_metadata.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/veristat.c b/tools/testing/selftests/bpf/veristat.c index 1d418d66e375..244d4996e06e 100644 --- a/tools/testing/selftests/bpf/veristat.c +++ b/tools/testing/selftests/bpf/veristat.c @@ -1254,7 +1254,7 @@ static int cmp_join_stat(const struct verif_stats_join *s1, bool asc, bool abs) { const char *str1 = NULL, *str2 = NULL; - double v1, v2; + double v1 = 0.0, v2 = 0.0; int cmp = 0; fetch_join_stat_value(s1, id, var, &str1, &v1); diff --git a/tools/testing/selftests/bpf/xdp_hw_metadata.c b/tools/testing/selftests/bpf/xdp_hw_metadata.c index 3291625ba4fb..c69c08933fdd 100644 --- a/tools/testing/selftests/bpf/xdp_hw_metadata.c +++ b/tools/testing/selftests/bpf/xdp_hw_metadata.c @@ -79,7 +79,7 @@ static int open_xsk(int ifindex, struct xsk *xsk, __u32 queue_id) .flags = XSK_UMEM__DEFAULT_FLAGS, .tx_metadata_len = sizeof(struct xsk_tx_metadata), }; - __u32 idx; + __u32 idx = 0; u64 addr; int ret; int i; -- cgit v1.2.3 From 2f70803532e9b7f14897d17f8944d431755661a7 Mon Sep 17 00:00:00 2001 From: Daniel Xu Date: Mon, 11 Dec 2023 13:20:07 -0700 Subject: libbpf: Add BPF_CORE_WRITE_BITFIELD() macro === Motivation === Similar to reading from CO-RE bitfields, we need a CO-RE aware bitfield writing wrapper to make the verifier happy. Two alternatives to this approach are: 1. Use the upcoming `preserve_static_offset` [0] attribute to disable CO-RE on specific structs. 2. Use broader byte-sized writes to write to bitfields. (1) is a bit hard to use. It requires specific and not-very-obvious annotations to bpftool generated vmlinux.h. It's also not generally available in released LLVM versions yet. (2) makes the code quite hard to read and write. And especially if BPF_CORE_READ_BITFIELD() is already being used, it makes more sense to to have an inverse helper for writing. === Implementation details === Since the logic is a bit non-obvious, I thought it would be helpful to explain exactly what's going on. To start, it helps by explaining what LSHIFT_U64 (lshift) and RSHIFT_U64 (rshift) is designed to mean. Consider the core of the BPF_CORE_READ_BITFIELD() algorithm: val <<= __CORE_RELO(s, field, LSHIFT_U64); val = val >> __CORE_RELO(s, field, RSHIFT_U64); Basically what happens is we lshift to clear the non-relevant (blank) higher order bits. Then we rshift to bring the relevant bits (bitfield) down to LSB position (while also clearing blank lower order bits). To illustrate: Start: ........XXX...... Lshift: XXX......00000000 Rshift: 00000000000000XXX where `.` means blank bit, `0` means 0 bit, and `X` means bitfield bit. After the two operations, the bitfield is ready to be interpreted as a regular integer. Next, we want to build an alternative (but more helpful) mental model on lshift and rshift. That is, to consider: * rshift as the total number of blank bits in the u64 * lshift as number of blank bits left of the bitfield in the u64 Take a moment to consider why that is true by consulting the above diagram. With this insight, we can now define the following relationship: bitfield _ | | 0.....00XXX0...00 | | | | |______| | | lshift | | |____| (rshift - lshift) That is, we know the number of higher order blank bits is just lshift. And the number of lower order blank bits is (rshift - lshift). Finally, we can examine the core of the write side algorithm: mask = (~0ULL << rshift) >> lshift; // 1 val = (val & ~mask) | ((nval << rpad) & mask); // 2 1. Compute a mask where the set bits are the bitfield bits. The first left shift zeros out exactly the number of blank bits, leaving a bitfield sized set of 1s. The subsequent right shift inserts the correct amount of higher order blank bits. 2. On the left of the `|`, mask out the bitfield bits. This creates 0s where the new bitfield bits will go. On the right of the `|`, bring nval into the correct bit position and mask out any bits that fall outside of the bitfield. Finally, by bor'ing the two halves, we get the final set of bits to write back. [0]: https://reviews.llvm.org/D133361 Co-developed-by: Eduard Zingerman Signed-off-by: Eduard Zingerman Co-developed-by: Jonathan Lemon Signed-off-by: Jonathan Lemon Acked-by: Andrii Nakryiko Signed-off-by: Daniel Xu Link: https://lore.kernel.org/r/4d3dd215a4fd57d980733886f9c11a45e1a9adf3.1702325874.git.dxu@dxuuu.xyz Signed-off-by: Martin KaFai Lau --- tools/lib/bpf/bpf_core_read.h | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) (limited to 'tools') diff --git a/tools/lib/bpf/bpf_core_read.h b/tools/lib/bpf/bpf_core_read.h index 1ac57bb7ac55..7325a12692a3 100644 --- a/tools/lib/bpf/bpf_core_read.h +++ b/tools/lib/bpf/bpf_core_read.h @@ -111,6 +111,38 @@ enum bpf_enum_value_kind { val; \ }) +/* + * Write to a bitfield, identified by s->field. + * This is the inverse of BPF_CORE_WRITE_BITFIELD(). + */ +#define BPF_CORE_WRITE_BITFIELD(s, field, new_val) ({ \ + void *p = (void *)s + __CORE_RELO(s, field, BYTE_OFFSET); \ + unsigned int byte_size = __CORE_RELO(s, field, BYTE_SIZE); \ + unsigned int lshift = __CORE_RELO(s, field, LSHIFT_U64); \ + unsigned int rshift = __CORE_RELO(s, field, RSHIFT_U64); \ + unsigned long long mask, val, nval = new_val; \ + unsigned int rpad = rshift - lshift; \ + \ + asm volatile("" : "+r"(p)); \ + \ + switch (byte_size) { \ + case 1: val = *(unsigned char *)p; break; \ + case 2: val = *(unsigned short *)p; break; \ + case 4: val = *(unsigned int *)p; break; \ + case 8: val = *(unsigned long long *)p; break; \ + } \ + \ + mask = (~0ULL << rshift) >> lshift; \ + val = (val & ~mask) | ((nval << rpad) & mask); \ + \ + switch (byte_size) { \ + case 1: *(unsigned char *)p = val; break; \ + case 2: *(unsigned short *)p = val; break; \ + case 4: *(unsigned int *)p = val; break; \ + case 8: *(unsigned long long *)p = val; break; \ + } \ +}) + #define ___bpf_field_ref1(field) (field) #define ___bpf_field_ref2(type, field) (((typeof(type) *)0)->field) #define ___bpf_field_ref(args...) \ -- cgit v1.2.3 From 7d19c00e9abc8ad3b3b72a1989331f45287e6bf5 Mon Sep 17 00:00:00 2001 From: Daniel Xu Date: Mon, 11 Dec 2023 13:20:08 -0700 Subject: bpf: selftests: test_loader: Support __btf_path() annotation This commit adds support for per-prog btf_custom_path. This is necessary for testing CO-RE relocations on non-vmlinux types using test_loader infrastructure. Acked-by: Andrii Nakryiko Signed-off-by: Daniel Xu Link: https://lore.kernel.org/r/660ea7f2fdbdd5103bc1af87c9fc931f05327926.1702325874.git.dxu@dxuuu.xyz Signed-off-by: Martin KaFai Lau --- tools/testing/selftests/bpf/progs/bpf_misc.h | 1 + tools/testing/selftests/bpf/test_loader.c | 7 +++++++ 2 files changed, 8 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/progs/bpf_misc.h b/tools/testing/selftests/bpf/progs/bpf_misc.h index 799fff4995d8..2fd59970c43a 100644 --- a/tools/testing/selftests/bpf/progs/bpf_misc.h +++ b/tools/testing/selftests/bpf/progs/bpf_misc.h @@ -71,6 +71,7 @@ #define __retval_unpriv(val) __attribute__((btf_decl_tag("comment:test_retval_unpriv="#val))) #define __auxiliary __attribute__((btf_decl_tag("comment:test_auxiliary"))) #define __auxiliary_unpriv __attribute__((btf_decl_tag("comment:test_auxiliary_unpriv"))) +#define __btf_path(path) __attribute__((btf_decl_tag("comment:test_btf_path=" path))) /* Convenience macro for use with 'asm volatile' blocks */ #define __naked __attribute__((naked)) diff --git a/tools/testing/selftests/bpf/test_loader.c b/tools/testing/selftests/bpf/test_loader.c index a350ecdfba4a..74ceb7877ae2 100644 --- a/tools/testing/selftests/bpf/test_loader.c +++ b/tools/testing/selftests/bpf/test_loader.c @@ -27,6 +27,7 @@ #define TEST_TAG_RETVAL_PFX_UNPRIV "comment:test_retval_unpriv=" #define TEST_TAG_AUXILIARY "comment:test_auxiliary" #define TEST_TAG_AUXILIARY_UNPRIV "comment:test_auxiliary_unpriv" +#define TEST_BTF_PATH "comment:test_btf_path=" /* Warning: duplicated in bpf_misc.h */ #define POINTER_VALUE 0xcafe4all @@ -58,6 +59,7 @@ struct test_spec { const char *prog_name; struct test_subspec priv; struct test_subspec unpriv; + const char *btf_custom_path; int log_level; int prog_flags; int mode_mask; @@ -288,6 +290,8 @@ static int parse_test_spec(struct test_loader *tester, goto cleanup; update_flags(&spec->prog_flags, flags, clear); } + } else if (str_has_pfx(s, TEST_BTF_PATH)) { + spec->btf_custom_path = s + sizeof(TEST_BTF_PATH) - 1; } } @@ -578,6 +582,9 @@ void run_subtest(struct test_loader *tester, } } + /* Implicitly reset to NULL if next test case doesn't specify */ + open_opts->btf_custom_path = spec->btf_custom_path; + tobj = bpf_object__open_mem(obj_bytes, obj_byte_cnt, open_opts); if (!ASSERT_OK_PTR(tobj, "obj_open_mem")) /* shouldn't happen */ goto subtest_cleanup; -- cgit v1.2.3 From f04f2ce6018f3cb33ac96270b9153c2920ead190 Mon Sep 17 00:00:00 2001 From: Daniel Xu Date: Mon, 11 Dec 2023 13:20:09 -0700 Subject: bpf: selftests: Add verifier tests for CO-RE bitfield writes Add some tests that exercise BPF_CORE_WRITE_BITFIELD() macro. Since some non-trivial bit fiddling is going on, make sure various edge cases (such as adjacent bitfields and bitfields at the edge of structs) are exercised. Acked-by: Andrii Nakryiko Signed-off-by: Daniel Xu Link: https://lore.kernel.org/r/72698a1080fa565f541d5654705255984ea2a029.1702325874.git.dxu@dxuuu.xyz Signed-off-by: Martin KaFai Lau --- tools/testing/selftests/bpf/prog_tests/verifier.c | 2 + .../selftests/bpf/progs/verifier_bitfield_write.c | 100 +++++++++++++++++++++ 2 files changed, 102 insertions(+) create mode 100644 tools/testing/selftests/bpf/progs/verifier_bitfield_write.c (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/verifier.c b/tools/testing/selftests/bpf/prog_tests/verifier.c index 8d746642cbd7..ac49ec25211d 100644 --- a/tools/testing/selftests/bpf/prog_tests/verifier.c +++ b/tools/testing/selftests/bpf/prog_tests/verifier.c @@ -6,6 +6,7 @@ #include "verifier_and.skel.h" #include "verifier_array_access.skel.h" #include "verifier_basic_stack.skel.h" +#include "verifier_bitfield_write.skel.h" #include "verifier_bounds.skel.h" #include "verifier_bounds_deduction.skel.h" #include "verifier_bounds_deduction_non_const.skel.h" @@ -116,6 +117,7 @@ static void run_tests_aux(const char *skel_name, void test_verifier_and(void) { RUN(verifier_and); } void test_verifier_basic_stack(void) { RUN(verifier_basic_stack); } +void test_verifier_bitfield_write(void) { RUN(verifier_bitfield_write); } void test_verifier_bounds(void) { RUN(verifier_bounds); } void test_verifier_bounds_deduction(void) { RUN(verifier_bounds_deduction); } void test_verifier_bounds_deduction_non_const(void) { RUN(verifier_bounds_deduction_non_const); } diff --git a/tools/testing/selftests/bpf/progs/verifier_bitfield_write.c b/tools/testing/selftests/bpf/progs/verifier_bitfield_write.c new file mode 100644 index 000000000000..623f130a3198 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/verifier_bitfield_write.c @@ -0,0 +1,100 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include +#include + +#include +#include + +#include "bpf_misc.h" + +struct core_reloc_bitfields { + /* unsigned bitfields */ + uint8_t ub1: 1; + uint8_t ub2: 2; + uint32_t ub7: 7; + /* signed bitfields */ + int8_t sb4: 4; + int32_t sb20: 20; + /* non-bitfields */ + uint32_t u32; + int32_t s32; +} __attribute__((preserve_access_index)); + +SEC("tc") +__description("single CO-RE bitfield roundtrip") +__btf_path("btf__core_reloc_bitfields.bpf.o") +__success +__retval(3) +int single_field_roundtrip(struct __sk_buff *ctx) +{ + struct core_reloc_bitfields bitfields; + + __builtin_memset(&bitfields, 0, sizeof(bitfields)); + BPF_CORE_WRITE_BITFIELD(&bitfields, ub2, 3); + return BPF_CORE_READ_BITFIELD(&bitfields, ub2); +} + +SEC("tc") +__description("multiple CO-RE bitfield roundtrip") +__btf_path("btf__core_reloc_bitfields.bpf.o") +__success +__retval(0x3FD) +int multiple_field_roundtrip(struct __sk_buff *ctx) +{ + struct core_reloc_bitfields bitfields; + uint8_t ub2; + int8_t sb4; + + __builtin_memset(&bitfields, 0, sizeof(bitfields)); + BPF_CORE_WRITE_BITFIELD(&bitfields, ub2, 1); + BPF_CORE_WRITE_BITFIELD(&bitfields, sb4, -1); + + ub2 = BPF_CORE_READ_BITFIELD(&bitfields, ub2); + sb4 = BPF_CORE_READ_BITFIELD(&bitfields, sb4); + + return (((uint8_t)sb4) << 2) | ub2; +} + +SEC("tc") +__description("adjacent CO-RE bitfield roundtrip") +__btf_path("btf__core_reloc_bitfields.bpf.o") +__success +__retval(7) +int adjacent_field_roundtrip(struct __sk_buff *ctx) +{ + struct core_reloc_bitfields bitfields; + uint8_t ub1, ub2; + + __builtin_memset(&bitfields, 0, sizeof(bitfields)); + BPF_CORE_WRITE_BITFIELD(&bitfields, ub1, 1); + BPF_CORE_WRITE_BITFIELD(&bitfields, ub2, 3); + + ub1 = BPF_CORE_READ_BITFIELD(&bitfields, ub1); + ub2 = BPF_CORE_READ_BITFIELD(&bitfields, ub2); + + return (ub2 << 1) | ub1; +} + +SEC("tc") +__description("multibyte CO-RE bitfield roundtrip") +__btf_path("btf__core_reloc_bitfields.bpf.o") +__success +__retval(0x21) +int multibyte_field_roundtrip(struct __sk_buff *ctx) +{ + struct core_reloc_bitfields bitfields; + uint32_t ub7; + uint8_t ub1; + + __builtin_memset(&bitfields, 0, sizeof(bitfields)); + BPF_CORE_WRITE_BITFIELD(&bitfields, ub1, 1); + BPF_CORE_WRITE_BITFIELD(&bitfields, ub7, 16); + + ub1 = BPF_CORE_READ_BITFIELD(&bitfields, ub1); + ub7 = BPF_CORE_READ_BITFIELD(&bitfields, ub7); + + return (ub7 << 1) | ub1; +} + +char _license[] SEC("license") = "GPL"; -- cgit v1.2.3 From c6c5be3eee975ae640966844db66d404c1de79b1 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Wed, 13 Dec 2023 11:08:34 -0800 Subject: libbpf: split feature detectors definitions from cached results Split a list of supported feature detectors with their corresponding callbacks from actual cached supported/missing values. This will allow to have more flexible per-token or per-object feature detectors in subsequent refactorings. Acked-by: John Fastabend Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20231213190842.3844987-3-andrii@kernel.org Signed-off-by: Alexei Starovoitov --- tools/lib/bpf/libbpf.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) (limited to 'tools') diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index ac54ebc0629f..d2828a26b011 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -4999,12 +4999,17 @@ enum kern_feature_result { FEAT_MISSING = 2, }; +struct kern_feature_cache { + enum kern_feature_result res[__FEAT_CNT]; +}; + typedef int (*feature_probe_fn)(void); +static struct kern_feature_cache feature_cache; + static struct kern_feature_desc { const char *desc; feature_probe_fn probe; - enum kern_feature_result res; } feature_probes[__FEAT_CNT] = { [FEAT_PROG_NAME] = { "BPF program name", probe_kern_prog_name, @@ -5072,6 +5077,7 @@ static struct kern_feature_desc { bool kernel_supports(const struct bpf_object *obj, enum kern_feature_id feat_id) { struct kern_feature_desc *feat = &feature_probes[feat_id]; + struct kern_feature_cache *cache = &feature_cache; int ret; if (obj && obj->gen_loader) @@ -5080,19 +5086,19 @@ bool kernel_supports(const struct bpf_object *obj, enum kern_feature_id feat_id) */ return true; - if (READ_ONCE(feat->res) == FEAT_UNKNOWN) { + if (READ_ONCE(cache->res[feat_id]) == FEAT_UNKNOWN) { ret = feat->probe(); if (ret > 0) { - WRITE_ONCE(feat->res, FEAT_SUPPORTED); + WRITE_ONCE(cache->res[feat_id], FEAT_SUPPORTED); } else if (ret == 0) { - WRITE_ONCE(feat->res, FEAT_MISSING); + WRITE_ONCE(cache->res[feat_id], FEAT_MISSING); } else { pr_warn("Detection of kernel %s support failed: %d\n", feat->desc, ret); - WRITE_ONCE(feat->res, FEAT_MISSING); + WRITE_ONCE(cache->res[feat_id], FEAT_MISSING); } } - return READ_ONCE(feat->res) == FEAT_SUPPORTED; + return READ_ONCE(cache->res[feat_id]) == FEAT_SUPPORTED; } static bool map_is_reuse_compat(const struct bpf_map *map, int map_fd) -- cgit v1.2.3 From 29c302a2e265a356434b005155990a9e766db75d Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Wed, 13 Dec 2023 11:08:35 -0800 Subject: libbpf: further decouple feature checking logic from bpf_object Add feat_supported() helper that accepts feature cache instead of bpf_object. This allows low-level code in bpf.c to not know or care about higher-level concept of bpf_object, yet it will be able to utilize custom feature checking in cases where BPF token might influence the outcome. Acked-by: John Fastabend Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20231213190842.3844987-4-andrii@kernel.org Signed-off-by: Alexei Starovoitov --- tools/lib/bpf/bpf.c | 6 +++--- tools/lib/bpf/libbpf.c | 22 +++++++++++++++------- tools/lib/bpf/libbpf_internal.h | 5 ++++- 3 files changed, 22 insertions(+), 11 deletions(-) (limited to 'tools') diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c index f4e1da3c6d5f..120855ac6859 100644 --- a/tools/lib/bpf/bpf.c +++ b/tools/lib/bpf/bpf.c @@ -146,7 +146,7 @@ int bump_rlimit_memlock(void) struct rlimit rlim; /* if kernel supports memcg-based accounting, skip bumping RLIMIT_MEMLOCK */ - if (memlock_bumped || kernel_supports(NULL, FEAT_MEMCG_ACCOUNT)) + if (memlock_bumped || feat_supported(NULL, FEAT_MEMCG_ACCOUNT)) return 0; memlock_bumped = true; @@ -181,7 +181,7 @@ int bpf_map_create(enum bpf_map_type map_type, return libbpf_err(-EINVAL); attr.map_type = map_type; - if (map_name && kernel_supports(NULL, FEAT_PROG_NAME)) + if (map_name && feat_supported(NULL, FEAT_PROG_NAME)) libbpf_strlcpy(attr.map_name, map_name, sizeof(attr.map_name)); attr.key_size = key_size; attr.value_size = value_size; @@ -265,7 +265,7 @@ int bpf_prog_load(enum bpf_prog_type prog_type, attr.kern_version = OPTS_GET(opts, kern_version, 0); attr.prog_token_fd = OPTS_GET(opts, token_fd, 0); - if (prog_name && kernel_supports(NULL, FEAT_PROG_NAME)) + if (prog_name && feat_supported(NULL, FEAT_PROG_NAME)) libbpf_strlcpy(attr.prog_name, prog_name, sizeof(attr.prog_name)); attr.license = ptr_to_u64(license); diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index d2828a26b011..2b7962120730 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -5074,17 +5074,14 @@ static struct kern_feature_desc { }, }; -bool kernel_supports(const struct bpf_object *obj, enum kern_feature_id feat_id) +bool feat_supported(struct kern_feature_cache *cache, enum kern_feature_id feat_id) { struct kern_feature_desc *feat = &feature_probes[feat_id]; - struct kern_feature_cache *cache = &feature_cache; int ret; - if (obj && obj->gen_loader) - /* To generate loader program assume the latest kernel - * to avoid doing extra prog_load, map_create syscalls. - */ - return true; + /* assume global feature cache, unless custom one is provided */ + if (!cache) + cache = &feature_cache; if (READ_ONCE(cache->res[feat_id]) == FEAT_UNKNOWN) { ret = feat->probe(); @@ -5101,6 +5098,17 @@ bool kernel_supports(const struct bpf_object *obj, enum kern_feature_id feat_id) return READ_ONCE(cache->res[feat_id]) == FEAT_SUPPORTED; } +bool kernel_supports(const struct bpf_object *obj, enum kern_feature_id feat_id) +{ + if (obj && obj->gen_loader) + /* To generate loader program assume the latest kernel + * to avoid doing extra prog_load, map_create syscalls. + */ + return true; + + return feat_supported(NULL, feat_id); +} + static bool map_is_reuse_compat(const struct bpf_map *map, int map_fd) { struct bpf_map_info map_info; diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h index b5d334754e5d..754a432335e4 100644 --- a/tools/lib/bpf/libbpf_internal.h +++ b/tools/lib/bpf/libbpf_internal.h @@ -360,8 +360,11 @@ enum kern_feature_id { __FEAT_CNT, }; -int probe_memcg_account(void); +struct kern_feature_cache; +bool feat_supported(struct kern_feature_cache *cache, enum kern_feature_id feat_id); bool kernel_supports(const struct bpf_object *obj, enum kern_feature_id feat_id); + +int probe_memcg_account(void); int bump_rlimit_memlock(void); int parse_cpu_mask_str(const char *s, bool **mask, int *mask_sz); -- cgit v1.2.3 From ab8fc393b27cd2d6dd1ced1ba2358ddcd123fc15 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Wed, 13 Dec 2023 11:08:36 -0800 Subject: libbpf: move feature detection code into its own file It's quite a lot of well isolated code, so it seems like a good candidate to move it out of libbpf.c to reduce its size. Acked-by: John Fastabend Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20231213190842.3844987-5-andrii@kernel.org Signed-off-by: Alexei Starovoitov --- tools/lib/bpf/Build | 2 +- tools/lib/bpf/elf.c | 2 - tools/lib/bpf/features.c | 463 ++++++++++++++++++++++++++++++++++++++++ tools/lib/bpf/libbpf.c | 463 +--------------------------------------- tools/lib/bpf/libbpf_internal.h | 12 +- tools/lib/bpf/str_error.h | 3 + 6 files changed, 479 insertions(+), 466 deletions(-) create mode 100644 tools/lib/bpf/features.c (limited to 'tools') diff --git a/tools/lib/bpf/Build b/tools/lib/bpf/Build index 2d0c282c8588..b6619199a706 100644 --- a/tools/lib/bpf/Build +++ b/tools/lib/bpf/Build @@ -1,4 +1,4 @@ libbpf-y := libbpf.o bpf.o nlattr.o btf.o libbpf_errno.o str_error.o \ netlink.o bpf_prog_linfo.o libbpf_probes.o hashmap.o \ btf_dump.o ringbuf.o strset.o linker.o gen_loader.o relo_core.o \ - usdt.o zip.o elf.o + usdt.o zip.o elf.o features.o diff --git a/tools/lib/bpf/elf.c b/tools/lib/bpf/elf.c index b02faec748a5..c92e02394159 100644 --- a/tools/lib/bpf/elf.c +++ b/tools/lib/bpf/elf.c @@ -11,8 +11,6 @@ #include "libbpf_internal.h" #include "str_error.h" -#define STRERR_BUFSIZE 128 - /* A SHT_GNU_versym section holds 16-bit words. This bit is set if * the symbol is hidden and can only be seen when referenced using an * explicit version number. This is a GNU extension. diff --git a/tools/lib/bpf/features.c b/tools/lib/bpf/features.c new file mode 100644 index 000000000000..338fd0dcd3bd --- /dev/null +++ b/tools/lib/bpf/features.c @@ -0,0 +1,463 @@ +// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) +/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */ +#include +#include +#include "bpf.h" +#include "libbpf.h" +#include "libbpf_common.h" +#include "libbpf_internal.h" +#include "str_error.h" + +static inline __u64 ptr_to_u64(const void *ptr) +{ + return (__u64)(unsigned long)ptr; +} + +static int probe_fd(int fd) +{ + if (fd >= 0) + close(fd); + return fd >= 0; +} + +static int probe_kern_prog_name(void) +{ + const size_t attr_sz = offsetofend(union bpf_attr, prog_name); + struct bpf_insn insns[] = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }; + union bpf_attr attr; + int ret; + + memset(&attr, 0, attr_sz); + attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER; + attr.license = ptr_to_u64("GPL"); + attr.insns = ptr_to_u64(insns); + attr.insn_cnt = (__u32)ARRAY_SIZE(insns); + libbpf_strlcpy(attr.prog_name, "libbpf_nametest", sizeof(attr.prog_name)); + + /* make sure loading with name works */ + ret = sys_bpf_prog_load(&attr, attr_sz, PROG_LOAD_ATTEMPTS); + return probe_fd(ret); +} + +static int probe_kern_global_data(void) +{ + char *cp, errmsg[STRERR_BUFSIZE]; + struct bpf_insn insns[] = { + BPF_LD_MAP_VALUE(BPF_REG_1, 0, 16), + BPF_ST_MEM(BPF_DW, BPF_REG_1, 0, 42), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }; + int ret, map, insn_cnt = ARRAY_SIZE(insns); + + map = bpf_map_create(BPF_MAP_TYPE_ARRAY, "libbpf_global", sizeof(int), 32, 1, NULL); + if (map < 0) { + ret = -errno; + cp = libbpf_strerror_r(ret, errmsg, sizeof(errmsg)); + pr_warn("Error in %s():%s(%d). Couldn't create simple array map.\n", + __func__, cp, -ret); + return ret; + } + + insns[0].imm = map; + + ret = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL", insns, insn_cnt, NULL); + close(map); + return probe_fd(ret); +} + +static int probe_kern_btf(void) +{ + static const char strs[] = "\0int"; + __u32 types[] = { + /* int */ + BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4), + }; + + return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), + strs, sizeof(strs))); +} + +static int probe_kern_btf_func(void) +{ + static const char strs[] = "\0int\0x\0a"; + /* void x(int a) {} */ + __u32 types[] = { + /* int */ + BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + /* FUNC_PROTO */ /* [2] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_FUNC_PROTO, 0, 1), 0), + BTF_PARAM_ENC(7, 1), + /* FUNC x */ /* [3] */ + BTF_TYPE_ENC(5, BTF_INFO_ENC(BTF_KIND_FUNC, 0, 0), 2), + }; + + return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), + strs, sizeof(strs))); +} + +static int probe_kern_btf_func_global(void) +{ + static const char strs[] = "\0int\0x\0a"; + /* static void x(int a) {} */ + __u32 types[] = { + /* int */ + BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + /* FUNC_PROTO */ /* [2] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_FUNC_PROTO, 0, 1), 0), + BTF_PARAM_ENC(7, 1), + /* FUNC x BTF_FUNC_GLOBAL */ /* [3] */ + BTF_TYPE_ENC(5, BTF_INFO_ENC(BTF_KIND_FUNC, 0, BTF_FUNC_GLOBAL), 2), + }; + + return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), + strs, sizeof(strs))); +} + +static int probe_kern_btf_datasec(void) +{ + static const char strs[] = "\0x\0.data"; + /* static int a; */ + __u32 types[] = { + /* int */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + /* VAR x */ /* [2] */ + BTF_TYPE_ENC(1, BTF_INFO_ENC(BTF_KIND_VAR, 0, 0), 1), + BTF_VAR_STATIC, + /* DATASEC val */ /* [3] */ + BTF_TYPE_ENC(3, BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 1), 4), + BTF_VAR_SECINFO_ENC(2, 0, 4), + }; + + return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), + strs, sizeof(strs))); +} + +static int probe_kern_btf_float(void) +{ + static const char strs[] = "\0float"; + __u32 types[] = { + /* float */ + BTF_TYPE_FLOAT_ENC(1, 4), + }; + + return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), + strs, sizeof(strs))); +} + +static int probe_kern_btf_decl_tag(void) +{ + static const char strs[] = "\0tag"; + __u32 types[] = { + /* int */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + /* VAR x */ /* [2] */ + BTF_TYPE_ENC(1, BTF_INFO_ENC(BTF_KIND_VAR, 0, 0), 1), + BTF_VAR_STATIC, + /* attr */ + BTF_TYPE_DECL_TAG_ENC(1, 2, -1), + }; + + return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), + strs, sizeof(strs))); +} + +static int probe_kern_btf_type_tag(void) +{ + static const char strs[] = "\0tag"; + __u32 types[] = { + /* int */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + /* attr */ + BTF_TYPE_TYPE_TAG_ENC(1, 1), /* [2] */ + /* ptr */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 2), /* [3] */ + }; + + return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), + strs, sizeof(strs))); +} + +static int probe_kern_array_mmap(void) +{ + LIBBPF_OPTS(bpf_map_create_opts, opts, .map_flags = BPF_F_MMAPABLE); + int fd; + + fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, "libbpf_mmap", sizeof(int), sizeof(int), 1, &opts); + return probe_fd(fd); +} + +static int probe_kern_exp_attach_type(void) +{ + LIBBPF_OPTS(bpf_prog_load_opts, opts, .expected_attach_type = BPF_CGROUP_INET_SOCK_CREATE); + struct bpf_insn insns[] = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }; + int fd, insn_cnt = ARRAY_SIZE(insns); + + /* use any valid combination of program type and (optional) + * non-zero expected attach type (i.e., not a BPF_CGROUP_INET_INGRESS) + * to see if kernel supports expected_attach_type field for + * BPF_PROG_LOAD command + */ + fd = bpf_prog_load(BPF_PROG_TYPE_CGROUP_SOCK, NULL, "GPL", insns, insn_cnt, &opts); + return probe_fd(fd); +} + +static int probe_kern_probe_read_kernel(void) +{ + struct bpf_insn insns[] = { + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), /* r1 = r10 (fp) */ + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), /* r1 += -8 */ + BPF_MOV64_IMM(BPF_REG_2, 8), /* r2 = 8 */ + BPF_MOV64_IMM(BPF_REG_3, 0), /* r3 = 0 */ + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_probe_read_kernel), + BPF_EXIT_INSN(), + }; + int fd, insn_cnt = ARRAY_SIZE(insns); + + fd = bpf_prog_load(BPF_PROG_TYPE_TRACEPOINT, NULL, "GPL", insns, insn_cnt, NULL); + return probe_fd(fd); +} + +static int probe_prog_bind_map(void) +{ + char *cp, errmsg[STRERR_BUFSIZE]; + struct bpf_insn insns[] = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }; + int ret, map, prog, insn_cnt = ARRAY_SIZE(insns); + + map = bpf_map_create(BPF_MAP_TYPE_ARRAY, "libbpf_det_bind", sizeof(int), 32, 1, NULL); + if (map < 0) { + ret = -errno; + cp = libbpf_strerror_r(ret, errmsg, sizeof(errmsg)); + pr_warn("Error in %s():%s(%d). Couldn't create simple array map.\n", + __func__, cp, -ret); + return ret; + } + + prog = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL", insns, insn_cnt, NULL); + if (prog < 0) { + close(map); + return 0; + } + + ret = bpf_prog_bind_map(prog, map, NULL); + + close(map); + close(prog); + + return ret >= 0; +} + +static int probe_module_btf(void) +{ + static const char strs[] = "\0int"; + __u32 types[] = { + /* int */ + BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4), + }; + struct bpf_btf_info info; + __u32 len = sizeof(info); + char name[16]; + int fd, err; + + fd = libbpf__load_raw_btf((char *)types, sizeof(types), strs, sizeof(strs)); + if (fd < 0) + return 0; /* BTF not supported at all */ + + memset(&info, 0, sizeof(info)); + info.name = ptr_to_u64(name); + info.name_len = sizeof(name); + + /* check that BPF_OBJ_GET_INFO_BY_FD supports specifying name pointer; + * kernel's module BTF support coincides with support for + * name/name_len fields in struct bpf_btf_info. + */ + err = bpf_btf_get_info_by_fd(fd, &info, &len); + close(fd); + return !err; +} + +static int probe_perf_link(void) +{ + struct bpf_insn insns[] = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }; + int prog_fd, link_fd, err; + + prog_fd = bpf_prog_load(BPF_PROG_TYPE_TRACEPOINT, NULL, "GPL", + insns, ARRAY_SIZE(insns), NULL); + if (prog_fd < 0) + return -errno; + + /* use invalid perf_event FD to get EBADF, if link is supported; + * otherwise EINVAL should be returned + */ + link_fd = bpf_link_create(prog_fd, -1, BPF_PERF_EVENT, NULL); + err = -errno; /* close() can clobber errno */ + + if (link_fd >= 0) + close(link_fd); + close(prog_fd); + + return link_fd < 0 && err == -EBADF; +} + +static int probe_uprobe_multi_link(void) +{ + LIBBPF_OPTS(bpf_prog_load_opts, load_opts, + .expected_attach_type = BPF_TRACE_UPROBE_MULTI, + ); + LIBBPF_OPTS(bpf_link_create_opts, link_opts); + struct bpf_insn insns[] = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }; + int prog_fd, link_fd, err; + unsigned long offset = 0; + + prog_fd = bpf_prog_load(BPF_PROG_TYPE_KPROBE, NULL, "GPL", + insns, ARRAY_SIZE(insns), &load_opts); + if (prog_fd < 0) + return -errno; + + /* Creating uprobe in '/' binary should fail with -EBADF. */ + link_opts.uprobe_multi.path = "/"; + link_opts.uprobe_multi.offsets = &offset; + link_opts.uprobe_multi.cnt = 1; + + link_fd = bpf_link_create(prog_fd, -1, BPF_TRACE_UPROBE_MULTI, &link_opts); + err = -errno; /* close() can clobber errno */ + + if (link_fd >= 0) + close(link_fd); + close(prog_fd); + + return link_fd < 0 && err == -EBADF; +} + +static int probe_kern_bpf_cookie(void) +{ + struct bpf_insn insns[] = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_attach_cookie), + BPF_EXIT_INSN(), + }; + int ret, insn_cnt = ARRAY_SIZE(insns); + + ret = bpf_prog_load(BPF_PROG_TYPE_KPROBE, NULL, "GPL", insns, insn_cnt, NULL); + return probe_fd(ret); +} + +static int probe_kern_btf_enum64(void) +{ + static const char strs[] = "\0enum64"; + __u32 types[] = { + BTF_TYPE_ENC(1, BTF_INFO_ENC(BTF_KIND_ENUM64, 0, 0), 8), + }; + + return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), + strs, sizeof(strs))); +} + +typedef int (*feature_probe_fn)(void); + +static struct kern_feature_cache feature_cache; + +static struct kern_feature_desc { + const char *desc; + feature_probe_fn probe; +} feature_probes[__FEAT_CNT] = { + [FEAT_PROG_NAME] = { + "BPF program name", probe_kern_prog_name, + }, + [FEAT_GLOBAL_DATA] = { + "global variables", probe_kern_global_data, + }, + [FEAT_BTF] = { + "minimal BTF", probe_kern_btf, + }, + [FEAT_BTF_FUNC] = { + "BTF functions", probe_kern_btf_func, + }, + [FEAT_BTF_GLOBAL_FUNC] = { + "BTF global function", probe_kern_btf_func_global, + }, + [FEAT_BTF_DATASEC] = { + "BTF data section and variable", probe_kern_btf_datasec, + }, + [FEAT_ARRAY_MMAP] = { + "ARRAY map mmap()", probe_kern_array_mmap, + }, + [FEAT_EXP_ATTACH_TYPE] = { + "BPF_PROG_LOAD expected_attach_type attribute", + probe_kern_exp_attach_type, + }, + [FEAT_PROBE_READ_KERN] = { + "bpf_probe_read_kernel() helper", probe_kern_probe_read_kernel, + }, + [FEAT_PROG_BIND_MAP] = { + "BPF_PROG_BIND_MAP support", probe_prog_bind_map, + }, + [FEAT_MODULE_BTF] = { + "module BTF support", probe_module_btf, + }, + [FEAT_BTF_FLOAT] = { + "BTF_KIND_FLOAT support", probe_kern_btf_float, + }, + [FEAT_PERF_LINK] = { + "BPF perf link support", probe_perf_link, + }, + [FEAT_BTF_DECL_TAG] = { + "BTF_KIND_DECL_TAG support", probe_kern_btf_decl_tag, + }, + [FEAT_BTF_TYPE_TAG] = { + "BTF_KIND_TYPE_TAG support", probe_kern_btf_type_tag, + }, + [FEAT_MEMCG_ACCOUNT] = { + "memcg-based memory accounting", probe_memcg_account, + }, + [FEAT_BPF_COOKIE] = { + "BPF cookie support", probe_kern_bpf_cookie, + }, + [FEAT_BTF_ENUM64] = { + "BTF_KIND_ENUM64 support", probe_kern_btf_enum64, + }, + [FEAT_SYSCALL_WRAPPER] = { + "Kernel using syscall wrapper", probe_kern_syscall_wrapper, + }, + [FEAT_UPROBE_MULTI_LINK] = { + "BPF multi-uprobe link support", probe_uprobe_multi_link, + }, +}; + +bool feat_supported(struct kern_feature_cache *cache, enum kern_feature_id feat_id) +{ + struct kern_feature_desc *feat = &feature_probes[feat_id]; + int ret; + + /* assume global feature cache, unless custom one is provided */ + if (!cache) + cache = &feature_cache; + + if (READ_ONCE(cache->res[feat_id]) == FEAT_UNKNOWN) { + ret = feat->probe(); + if (ret > 0) { + WRITE_ONCE(cache->res[feat_id], FEAT_SUPPORTED); + } else if (ret == 0) { + WRITE_ONCE(cache->res[feat_id], FEAT_MISSING); + } else { + pr_warn("Detection of kernel %s support failed: %d\n", feat->desc, ret); + WRITE_ONCE(cache->res[feat_id], FEAT_MISSING); + } + } + + return READ_ONCE(cache->res[feat_id]) == FEAT_SUPPORTED; +} diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 2b7962120730..4e6f8d225caa 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -4637,467 +4637,6 @@ bpf_object__probe_loading(struct bpf_object *obj) return 0; } -static int probe_fd(int fd) -{ - if (fd >= 0) - close(fd); - return fd >= 0; -} - -static int probe_kern_prog_name(void) -{ - const size_t attr_sz = offsetofend(union bpf_attr, prog_name); - struct bpf_insn insns[] = { - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }; - union bpf_attr attr; - int ret; - - memset(&attr, 0, attr_sz); - attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER; - attr.license = ptr_to_u64("GPL"); - attr.insns = ptr_to_u64(insns); - attr.insn_cnt = (__u32)ARRAY_SIZE(insns); - libbpf_strlcpy(attr.prog_name, "libbpf_nametest", sizeof(attr.prog_name)); - - /* make sure loading with name works */ - ret = sys_bpf_prog_load(&attr, attr_sz, PROG_LOAD_ATTEMPTS); - return probe_fd(ret); -} - -static int probe_kern_global_data(void) -{ - char *cp, errmsg[STRERR_BUFSIZE]; - struct bpf_insn insns[] = { - BPF_LD_MAP_VALUE(BPF_REG_1, 0, 16), - BPF_ST_MEM(BPF_DW, BPF_REG_1, 0, 42), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }; - int ret, map, insn_cnt = ARRAY_SIZE(insns); - - map = bpf_map_create(BPF_MAP_TYPE_ARRAY, "libbpf_global", sizeof(int), 32, 1, NULL); - if (map < 0) { - ret = -errno; - cp = libbpf_strerror_r(ret, errmsg, sizeof(errmsg)); - pr_warn("Error in %s():%s(%d). Couldn't create simple array map.\n", - __func__, cp, -ret); - return ret; - } - - insns[0].imm = map; - - ret = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL", insns, insn_cnt, NULL); - close(map); - return probe_fd(ret); -} - -static int probe_kern_btf(void) -{ - static const char strs[] = "\0int"; - __u32 types[] = { - /* int */ - BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4), - }; - - return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), - strs, sizeof(strs))); -} - -static int probe_kern_btf_func(void) -{ - static const char strs[] = "\0int\0x\0a"; - /* void x(int a) {} */ - __u32 types[] = { - /* int */ - BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - /* FUNC_PROTO */ /* [2] */ - BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_FUNC_PROTO, 0, 1), 0), - BTF_PARAM_ENC(7, 1), - /* FUNC x */ /* [3] */ - BTF_TYPE_ENC(5, BTF_INFO_ENC(BTF_KIND_FUNC, 0, 0), 2), - }; - - return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), - strs, sizeof(strs))); -} - -static int probe_kern_btf_func_global(void) -{ - static const char strs[] = "\0int\0x\0a"; - /* static void x(int a) {} */ - __u32 types[] = { - /* int */ - BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - /* FUNC_PROTO */ /* [2] */ - BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_FUNC_PROTO, 0, 1), 0), - BTF_PARAM_ENC(7, 1), - /* FUNC x BTF_FUNC_GLOBAL */ /* [3] */ - BTF_TYPE_ENC(5, BTF_INFO_ENC(BTF_KIND_FUNC, 0, BTF_FUNC_GLOBAL), 2), - }; - - return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), - strs, sizeof(strs))); -} - -static int probe_kern_btf_datasec(void) -{ - static const char strs[] = "\0x\0.data"; - /* static int a; */ - __u32 types[] = { - /* int */ - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - /* VAR x */ /* [2] */ - BTF_TYPE_ENC(1, BTF_INFO_ENC(BTF_KIND_VAR, 0, 0), 1), - BTF_VAR_STATIC, - /* DATASEC val */ /* [3] */ - BTF_TYPE_ENC(3, BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 1), 4), - BTF_VAR_SECINFO_ENC(2, 0, 4), - }; - - return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), - strs, sizeof(strs))); -} - -static int probe_kern_btf_float(void) -{ - static const char strs[] = "\0float"; - __u32 types[] = { - /* float */ - BTF_TYPE_FLOAT_ENC(1, 4), - }; - - return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), - strs, sizeof(strs))); -} - -static int probe_kern_btf_decl_tag(void) -{ - static const char strs[] = "\0tag"; - __u32 types[] = { - /* int */ - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - /* VAR x */ /* [2] */ - BTF_TYPE_ENC(1, BTF_INFO_ENC(BTF_KIND_VAR, 0, 0), 1), - BTF_VAR_STATIC, - /* attr */ - BTF_TYPE_DECL_TAG_ENC(1, 2, -1), - }; - - return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), - strs, sizeof(strs))); -} - -static int probe_kern_btf_type_tag(void) -{ - static const char strs[] = "\0tag"; - __u32 types[] = { - /* int */ - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - /* attr */ - BTF_TYPE_TYPE_TAG_ENC(1, 1), /* [2] */ - /* ptr */ - BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 2), /* [3] */ - }; - - return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), - strs, sizeof(strs))); -} - -static int probe_kern_array_mmap(void) -{ - LIBBPF_OPTS(bpf_map_create_opts, opts, .map_flags = BPF_F_MMAPABLE); - int fd; - - fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, "libbpf_mmap", sizeof(int), sizeof(int), 1, &opts); - return probe_fd(fd); -} - -static int probe_kern_exp_attach_type(void) -{ - LIBBPF_OPTS(bpf_prog_load_opts, opts, .expected_attach_type = BPF_CGROUP_INET_SOCK_CREATE); - struct bpf_insn insns[] = { - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }; - int fd, insn_cnt = ARRAY_SIZE(insns); - - /* use any valid combination of program type and (optional) - * non-zero expected attach type (i.e., not a BPF_CGROUP_INET_INGRESS) - * to see if kernel supports expected_attach_type field for - * BPF_PROG_LOAD command - */ - fd = bpf_prog_load(BPF_PROG_TYPE_CGROUP_SOCK, NULL, "GPL", insns, insn_cnt, &opts); - return probe_fd(fd); -} - -static int probe_kern_probe_read_kernel(void) -{ - struct bpf_insn insns[] = { - BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), /* r1 = r10 (fp) */ - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), /* r1 += -8 */ - BPF_MOV64_IMM(BPF_REG_2, 8), /* r2 = 8 */ - BPF_MOV64_IMM(BPF_REG_3, 0), /* r3 = 0 */ - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_probe_read_kernel), - BPF_EXIT_INSN(), - }; - int fd, insn_cnt = ARRAY_SIZE(insns); - - fd = bpf_prog_load(BPF_PROG_TYPE_TRACEPOINT, NULL, "GPL", insns, insn_cnt, NULL); - return probe_fd(fd); -} - -static int probe_prog_bind_map(void) -{ - char *cp, errmsg[STRERR_BUFSIZE]; - struct bpf_insn insns[] = { - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }; - int ret, map, prog, insn_cnt = ARRAY_SIZE(insns); - - map = bpf_map_create(BPF_MAP_TYPE_ARRAY, "libbpf_det_bind", sizeof(int), 32, 1, NULL); - if (map < 0) { - ret = -errno; - cp = libbpf_strerror_r(ret, errmsg, sizeof(errmsg)); - pr_warn("Error in %s():%s(%d). Couldn't create simple array map.\n", - __func__, cp, -ret); - return ret; - } - - prog = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL", insns, insn_cnt, NULL); - if (prog < 0) { - close(map); - return 0; - } - - ret = bpf_prog_bind_map(prog, map, NULL); - - close(map); - close(prog); - - return ret >= 0; -} - -static int probe_module_btf(void) -{ - static const char strs[] = "\0int"; - __u32 types[] = { - /* int */ - BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4), - }; - struct bpf_btf_info info; - __u32 len = sizeof(info); - char name[16]; - int fd, err; - - fd = libbpf__load_raw_btf((char *)types, sizeof(types), strs, sizeof(strs)); - if (fd < 0) - return 0; /* BTF not supported at all */ - - memset(&info, 0, sizeof(info)); - info.name = ptr_to_u64(name); - info.name_len = sizeof(name); - - /* check that BPF_OBJ_GET_INFO_BY_FD supports specifying name pointer; - * kernel's module BTF support coincides with support for - * name/name_len fields in struct bpf_btf_info. - */ - err = bpf_btf_get_info_by_fd(fd, &info, &len); - close(fd); - return !err; -} - -static int probe_perf_link(void) -{ - struct bpf_insn insns[] = { - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }; - int prog_fd, link_fd, err; - - prog_fd = bpf_prog_load(BPF_PROG_TYPE_TRACEPOINT, NULL, "GPL", - insns, ARRAY_SIZE(insns), NULL); - if (prog_fd < 0) - return -errno; - - /* use invalid perf_event FD to get EBADF, if link is supported; - * otherwise EINVAL should be returned - */ - link_fd = bpf_link_create(prog_fd, -1, BPF_PERF_EVENT, NULL); - err = -errno; /* close() can clobber errno */ - - if (link_fd >= 0) - close(link_fd); - close(prog_fd); - - return link_fd < 0 && err == -EBADF; -} - -static int probe_uprobe_multi_link(void) -{ - LIBBPF_OPTS(bpf_prog_load_opts, load_opts, - .expected_attach_type = BPF_TRACE_UPROBE_MULTI, - ); - LIBBPF_OPTS(bpf_link_create_opts, link_opts); - struct bpf_insn insns[] = { - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }; - int prog_fd, link_fd, err; - unsigned long offset = 0; - - prog_fd = bpf_prog_load(BPF_PROG_TYPE_KPROBE, NULL, "GPL", - insns, ARRAY_SIZE(insns), &load_opts); - if (prog_fd < 0) - return -errno; - - /* Creating uprobe in '/' binary should fail with -EBADF. */ - link_opts.uprobe_multi.path = "/"; - link_opts.uprobe_multi.offsets = &offset; - link_opts.uprobe_multi.cnt = 1; - - link_fd = bpf_link_create(prog_fd, -1, BPF_TRACE_UPROBE_MULTI, &link_opts); - err = -errno; /* close() can clobber errno */ - - if (link_fd >= 0) - close(link_fd); - close(prog_fd); - - return link_fd < 0 && err == -EBADF; -} - -static int probe_kern_bpf_cookie(void) -{ - struct bpf_insn insns[] = { - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_attach_cookie), - BPF_EXIT_INSN(), - }; - int ret, insn_cnt = ARRAY_SIZE(insns); - - ret = bpf_prog_load(BPF_PROG_TYPE_KPROBE, NULL, "GPL", insns, insn_cnt, NULL); - return probe_fd(ret); -} - -static int probe_kern_btf_enum64(void) -{ - static const char strs[] = "\0enum64"; - __u32 types[] = { - BTF_TYPE_ENC(1, BTF_INFO_ENC(BTF_KIND_ENUM64, 0, 0), 8), - }; - - return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), - strs, sizeof(strs))); -} - -static int probe_kern_syscall_wrapper(void); - -enum kern_feature_result { - FEAT_UNKNOWN = 0, - FEAT_SUPPORTED = 1, - FEAT_MISSING = 2, -}; - -struct kern_feature_cache { - enum kern_feature_result res[__FEAT_CNT]; -}; - -typedef int (*feature_probe_fn)(void); - -static struct kern_feature_cache feature_cache; - -static struct kern_feature_desc { - const char *desc; - feature_probe_fn probe; -} feature_probes[__FEAT_CNT] = { - [FEAT_PROG_NAME] = { - "BPF program name", probe_kern_prog_name, - }, - [FEAT_GLOBAL_DATA] = { - "global variables", probe_kern_global_data, - }, - [FEAT_BTF] = { - "minimal BTF", probe_kern_btf, - }, - [FEAT_BTF_FUNC] = { - "BTF functions", probe_kern_btf_func, - }, - [FEAT_BTF_GLOBAL_FUNC] = { - "BTF global function", probe_kern_btf_func_global, - }, - [FEAT_BTF_DATASEC] = { - "BTF data section and variable", probe_kern_btf_datasec, - }, - [FEAT_ARRAY_MMAP] = { - "ARRAY map mmap()", probe_kern_array_mmap, - }, - [FEAT_EXP_ATTACH_TYPE] = { - "BPF_PROG_LOAD expected_attach_type attribute", - probe_kern_exp_attach_type, - }, - [FEAT_PROBE_READ_KERN] = { - "bpf_probe_read_kernel() helper", probe_kern_probe_read_kernel, - }, - [FEAT_PROG_BIND_MAP] = { - "BPF_PROG_BIND_MAP support", probe_prog_bind_map, - }, - [FEAT_MODULE_BTF] = { - "module BTF support", probe_module_btf, - }, - [FEAT_BTF_FLOAT] = { - "BTF_KIND_FLOAT support", probe_kern_btf_float, - }, - [FEAT_PERF_LINK] = { - "BPF perf link support", probe_perf_link, - }, - [FEAT_BTF_DECL_TAG] = { - "BTF_KIND_DECL_TAG support", probe_kern_btf_decl_tag, - }, - [FEAT_BTF_TYPE_TAG] = { - "BTF_KIND_TYPE_TAG support", probe_kern_btf_type_tag, - }, - [FEAT_MEMCG_ACCOUNT] = { - "memcg-based memory accounting", probe_memcg_account, - }, - [FEAT_BPF_COOKIE] = { - "BPF cookie support", probe_kern_bpf_cookie, - }, - [FEAT_BTF_ENUM64] = { - "BTF_KIND_ENUM64 support", probe_kern_btf_enum64, - }, - [FEAT_SYSCALL_WRAPPER] = { - "Kernel using syscall wrapper", probe_kern_syscall_wrapper, - }, - [FEAT_UPROBE_MULTI_LINK] = { - "BPF multi-uprobe link support", probe_uprobe_multi_link, - }, -}; - -bool feat_supported(struct kern_feature_cache *cache, enum kern_feature_id feat_id) -{ - struct kern_feature_desc *feat = &feature_probes[feat_id]; - int ret; - - /* assume global feature cache, unless custom one is provided */ - if (!cache) - cache = &feature_cache; - - if (READ_ONCE(cache->res[feat_id]) == FEAT_UNKNOWN) { - ret = feat->probe(); - if (ret > 0) { - WRITE_ONCE(cache->res[feat_id], FEAT_SUPPORTED); - } else if (ret == 0) { - WRITE_ONCE(cache->res[feat_id], FEAT_MISSING); - } else { - pr_warn("Detection of kernel %s support failed: %d\n", feat->desc, ret); - WRITE_ONCE(cache->res[feat_id], FEAT_MISSING); - } - } - - return READ_ONCE(cache->res[feat_id]) == FEAT_SUPPORTED; -} - bool kernel_supports(const struct bpf_object *obj, enum kern_feature_id feat_id) { if (obj && obj->gen_loader) @@ -10626,7 +10165,7 @@ static const char *arch_specific_syscall_pfx(void) #endif } -static int probe_kern_syscall_wrapper(void) +int probe_kern_syscall_wrapper(void) { char syscall_name[64]; const char *ksys_pfx; diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h index 754a432335e4..db4a499c0ec5 100644 --- a/tools/lib/bpf/libbpf_internal.h +++ b/tools/lib/bpf/libbpf_internal.h @@ -360,10 +360,20 @@ enum kern_feature_id { __FEAT_CNT, }; -struct kern_feature_cache; +enum kern_feature_result { + FEAT_UNKNOWN = 0, + FEAT_SUPPORTED = 1, + FEAT_MISSING = 2, +}; + +struct kern_feature_cache { + enum kern_feature_result res[__FEAT_CNT]; +}; + bool feat_supported(struct kern_feature_cache *cache, enum kern_feature_id feat_id); bool kernel_supports(const struct bpf_object *obj, enum kern_feature_id feat_id); +int probe_kern_syscall_wrapper(void); int probe_memcg_account(void); int bump_rlimit_memlock(void); diff --git a/tools/lib/bpf/str_error.h b/tools/lib/bpf/str_error.h index a139334d57b6..626d7ffb03d6 100644 --- a/tools/lib/bpf/str_error.h +++ b/tools/lib/bpf/str_error.h @@ -2,5 +2,8 @@ #ifndef __LIBBPF_STR_ERROR_H #define __LIBBPF_STR_ERROR_H +#define STRERR_BUFSIZE 128 + char *libbpf_strerror_r(int err, char *dst, int len); + #endif /* __LIBBPF_STR_ERROR_H */ -- cgit v1.2.3 From a75bb6a16518d4a224f24116633f3f9d5787f6d1 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Wed, 13 Dec 2023 11:08:37 -0800 Subject: libbpf: wire up token_fd into feature probing logic Adjust feature probing callbacks to take into account optional token_fd. In unprivileged contexts, some feature detectors would fail to detect kernel support just because BPF program, BPF map, or BTF object can't be loaded due to privileged nature of those operations. So when BPF object is loaded with BPF token, this token should be used for feature probing. This patch is setting support for this scenario, but we don't yet pass non-zero token FD. This will be added in the next patch. We also switched BPF cookie detector from using kprobe program to tracepoint one, as tracepoint is somewhat less dangerous BPF program type and has higher likelihood of being allowed through BPF token in the future. This change has no effect on detection behavior. Acked-by: John Fastabend Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20231213190842.3844987-6-andrii@kernel.org Signed-off-by: Alexei Starovoitov --- tools/lib/bpf/bpf.c | 3 +- tools/lib/bpf/features.c | 91 ++++++++++++++++++++++++----------------- tools/lib/bpf/libbpf.c | 2 +- tools/lib/bpf/libbpf_internal.h | 8 ++-- tools/lib/bpf/libbpf_probes.c | 8 ++-- 5 files changed, 66 insertions(+), 46 deletions(-) (limited to 'tools') diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c index 120855ac6859..0ad8e532b3cf 100644 --- a/tools/lib/bpf/bpf.c +++ b/tools/lib/bpf/bpf.c @@ -103,7 +103,7 @@ int sys_bpf_prog_load(union bpf_attr *attr, unsigned int size, int attempts) * [0] https://lore.kernel.org/bpf/20201201215900.3569844-1-guro@fb.com/ * [1] d05512618056 ("bpf: Add bpf_ktime_get_coarse_ns helper") */ -int probe_memcg_account(void) +int probe_memcg_account(int token_fd) { const size_t attr_sz = offsetofend(union bpf_attr, attach_btf_obj_fd); struct bpf_insn insns[] = { @@ -120,6 +120,7 @@ int probe_memcg_account(void) attr.insns = ptr_to_u64(insns); attr.insn_cnt = insn_cnt; attr.license = ptr_to_u64("GPL"); + attr.prog_token_fd = token_fd; prog_fd = sys_bpf_fd(BPF_PROG_LOAD, &attr, attr_sz); if (prog_fd >= 0) { diff --git a/tools/lib/bpf/features.c b/tools/lib/bpf/features.c index 338fd0dcd3bd..ce98a334be21 100644 --- a/tools/lib/bpf/features.c +++ b/tools/lib/bpf/features.c @@ -20,7 +20,7 @@ static int probe_fd(int fd) return fd >= 0; } -static int probe_kern_prog_name(void) +static int probe_kern_prog_name(int token_fd) { const size_t attr_sz = offsetofend(union bpf_attr, prog_name); struct bpf_insn insns[] = { @@ -35,6 +35,7 @@ static int probe_kern_prog_name(void) attr.license = ptr_to_u64("GPL"); attr.insns = ptr_to_u64(insns); attr.insn_cnt = (__u32)ARRAY_SIZE(insns); + attr.prog_token_fd = token_fd; libbpf_strlcpy(attr.prog_name, "libbpf_nametest", sizeof(attr.prog_name)); /* make sure loading with name works */ @@ -42,7 +43,7 @@ static int probe_kern_prog_name(void) return probe_fd(ret); } -static int probe_kern_global_data(void) +static int probe_kern_global_data(int token_fd) { char *cp, errmsg[STRERR_BUFSIZE]; struct bpf_insn insns[] = { @@ -51,9 +52,11 @@ static int probe_kern_global_data(void) BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }; + LIBBPF_OPTS(bpf_map_create_opts, map_opts, .token_fd = token_fd); + LIBBPF_OPTS(bpf_prog_load_opts, prog_opts, .token_fd = token_fd); int ret, map, insn_cnt = ARRAY_SIZE(insns); - map = bpf_map_create(BPF_MAP_TYPE_ARRAY, "libbpf_global", sizeof(int), 32, 1, NULL); + map = bpf_map_create(BPF_MAP_TYPE_ARRAY, "libbpf_global", sizeof(int), 32, 1, &map_opts); if (map < 0) { ret = -errno; cp = libbpf_strerror_r(ret, errmsg, sizeof(errmsg)); @@ -64,12 +67,12 @@ static int probe_kern_global_data(void) insns[0].imm = map; - ret = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL", insns, insn_cnt, NULL); + ret = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL", insns, insn_cnt, &prog_opts); close(map); return probe_fd(ret); } -static int probe_kern_btf(void) +static int probe_kern_btf(int token_fd) { static const char strs[] = "\0int"; __u32 types[] = { @@ -78,10 +81,10 @@ static int probe_kern_btf(void) }; return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), - strs, sizeof(strs))); + strs, sizeof(strs), token_fd)); } -static int probe_kern_btf_func(void) +static int probe_kern_btf_func(int token_fd) { static const char strs[] = "\0int\0x\0a"; /* void x(int a) {} */ @@ -96,10 +99,10 @@ static int probe_kern_btf_func(void) }; return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), - strs, sizeof(strs))); + strs, sizeof(strs), token_fd)); } -static int probe_kern_btf_func_global(void) +static int probe_kern_btf_func_global(int token_fd) { static const char strs[] = "\0int\0x\0a"; /* static void x(int a) {} */ @@ -114,10 +117,10 @@ static int probe_kern_btf_func_global(void) }; return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), - strs, sizeof(strs))); + strs, sizeof(strs), token_fd)); } -static int probe_kern_btf_datasec(void) +static int probe_kern_btf_datasec(int token_fd) { static const char strs[] = "\0x\0.data"; /* static int a; */ @@ -133,10 +136,10 @@ static int probe_kern_btf_datasec(void) }; return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), - strs, sizeof(strs))); + strs, sizeof(strs), token_fd)); } -static int probe_kern_btf_float(void) +static int probe_kern_btf_float(int token_fd) { static const char strs[] = "\0float"; __u32 types[] = { @@ -145,10 +148,10 @@ static int probe_kern_btf_float(void) }; return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), - strs, sizeof(strs))); + strs, sizeof(strs), token_fd)); } -static int probe_kern_btf_decl_tag(void) +static int probe_kern_btf_decl_tag(int token_fd) { static const char strs[] = "\0tag"; __u32 types[] = { @@ -162,10 +165,10 @@ static int probe_kern_btf_decl_tag(void) }; return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), - strs, sizeof(strs))); + strs, sizeof(strs), token_fd)); } -static int probe_kern_btf_type_tag(void) +static int probe_kern_btf_type_tag(int token_fd) { static const char strs[] = "\0tag"; __u32 types[] = { @@ -178,21 +181,27 @@ static int probe_kern_btf_type_tag(void) }; return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), - strs, sizeof(strs))); + strs, sizeof(strs), token_fd)); } -static int probe_kern_array_mmap(void) +static int probe_kern_array_mmap(int token_fd) { - LIBBPF_OPTS(bpf_map_create_opts, opts, .map_flags = BPF_F_MMAPABLE); + LIBBPF_OPTS(bpf_map_create_opts, opts, + .map_flags = BPF_F_MMAPABLE, + .token_fd = token_fd, + ); int fd; fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, "libbpf_mmap", sizeof(int), sizeof(int), 1, &opts); return probe_fd(fd); } -static int probe_kern_exp_attach_type(void) +static int probe_kern_exp_attach_type(int token_fd) { - LIBBPF_OPTS(bpf_prog_load_opts, opts, .expected_attach_type = BPF_CGROUP_INET_SOCK_CREATE); + LIBBPF_OPTS(bpf_prog_load_opts, opts, + .expected_attach_type = BPF_CGROUP_INET_SOCK_CREATE, + .token_fd = token_fd, + ); struct bpf_insn insns[] = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), @@ -208,8 +217,9 @@ static int probe_kern_exp_attach_type(void) return probe_fd(fd); } -static int probe_kern_probe_read_kernel(void) +static int probe_kern_probe_read_kernel(int token_fd) { + LIBBPF_OPTS(bpf_prog_load_opts, opts, .token_fd = token_fd); struct bpf_insn insns[] = { BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), /* r1 = r10 (fp) */ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), /* r1 += -8 */ @@ -220,20 +230,22 @@ static int probe_kern_probe_read_kernel(void) }; int fd, insn_cnt = ARRAY_SIZE(insns); - fd = bpf_prog_load(BPF_PROG_TYPE_TRACEPOINT, NULL, "GPL", insns, insn_cnt, NULL); + fd = bpf_prog_load(BPF_PROG_TYPE_TRACEPOINT, NULL, "GPL", insns, insn_cnt, &opts); return probe_fd(fd); } -static int probe_prog_bind_map(void) +static int probe_prog_bind_map(int token_fd) { char *cp, errmsg[STRERR_BUFSIZE]; struct bpf_insn insns[] = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }; + LIBBPF_OPTS(bpf_map_create_opts, map_opts, .token_fd = token_fd); + LIBBPF_OPTS(bpf_prog_load_opts, prog_opts, .token_fd = token_fd); int ret, map, prog, insn_cnt = ARRAY_SIZE(insns); - map = bpf_map_create(BPF_MAP_TYPE_ARRAY, "libbpf_det_bind", sizeof(int), 32, 1, NULL); + map = bpf_map_create(BPF_MAP_TYPE_ARRAY, "libbpf_det_bind", sizeof(int), 32, 1, &map_opts); if (map < 0) { ret = -errno; cp = libbpf_strerror_r(ret, errmsg, sizeof(errmsg)); @@ -242,7 +254,7 @@ static int probe_prog_bind_map(void) return ret; } - prog = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL", insns, insn_cnt, NULL); + prog = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL", insns, insn_cnt, &prog_opts); if (prog < 0) { close(map); return 0; @@ -256,7 +268,7 @@ static int probe_prog_bind_map(void) return ret >= 0; } -static int probe_module_btf(void) +static int probe_module_btf(int token_fd) { static const char strs[] = "\0int"; __u32 types[] = { @@ -268,7 +280,7 @@ static int probe_module_btf(void) char name[16]; int fd, err; - fd = libbpf__load_raw_btf((char *)types, sizeof(types), strs, sizeof(strs)); + fd = libbpf__load_raw_btf((char *)types, sizeof(types), strs, sizeof(strs), token_fd); if (fd < 0) return 0; /* BTF not supported at all */ @@ -285,16 +297,17 @@ static int probe_module_btf(void) return !err; } -static int probe_perf_link(void) +static int probe_perf_link(int token_fd) { struct bpf_insn insns[] = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }; + LIBBPF_OPTS(bpf_prog_load_opts, opts, .token_fd = token_fd); int prog_fd, link_fd, err; prog_fd = bpf_prog_load(BPF_PROG_TYPE_TRACEPOINT, NULL, "GPL", - insns, ARRAY_SIZE(insns), NULL); + insns, ARRAY_SIZE(insns), &opts); if (prog_fd < 0) return -errno; @@ -311,10 +324,11 @@ static int probe_perf_link(void) return link_fd < 0 && err == -EBADF; } -static int probe_uprobe_multi_link(void) +static int probe_uprobe_multi_link(int token_fd) { LIBBPF_OPTS(bpf_prog_load_opts, load_opts, .expected_attach_type = BPF_TRACE_UPROBE_MULTI, + .token_fd = token_fd, ); LIBBPF_OPTS(bpf_link_create_opts, link_opts); struct bpf_insn insns[] = { @@ -344,19 +358,20 @@ static int probe_uprobe_multi_link(void) return link_fd < 0 && err == -EBADF; } -static int probe_kern_bpf_cookie(void) +static int probe_kern_bpf_cookie(int token_fd) { struct bpf_insn insns[] = { BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_attach_cookie), BPF_EXIT_INSN(), }; + LIBBPF_OPTS(bpf_prog_load_opts, opts, .token_fd = token_fd); int ret, insn_cnt = ARRAY_SIZE(insns); - ret = bpf_prog_load(BPF_PROG_TYPE_KPROBE, NULL, "GPL", insns, insn_cnt, NULL); + ret = bpf_prog_load(BPF_PROG_TYPE_TRACEPOINT, NULL, "GPL", insns, insn_cnt, &opts); return probe_fd(ret); } -static int probe_kern_btf_enum64(void) +static int probe_kern_btf_enum64(int token_fd) { static const char strs[] = "\0enum64"; __u32 types[] = { @@ -364,10 +379,10 @@ static int probe_kern_btf_enum64(void) }; return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), - strs, sizeof(strs))); + strs, sizeof(strs), token_fd)); } -typedef int (*feature_probe_fn)(void); +typedef int (*feature_probe_fn)(int /* token_fd */); static struct kern_feature_cache feature_cache; @@ -448,7 +463,7 @@ bool feat_supported(struct kern_feature_cache *cache, enum kern_feature_id feat_ cache = &feature_cache; if (READ_ONCE(cache->res[feat_id]) == FEAT_UNKNOWN) { - ret = feat->probe(); + ret = feat->probe(cache->token_fd); if (ret > 0) { WRITE_ONCE(cache->res[feat_id], FEAT_SUPPORTED); } else if (ret == 0) { diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 4e6f8d225caa..1acd1c224c5e 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -10165,7 +10165,7 @@ static const char *arch_specific_syscall_pfx(void) #endif } -int probe_kern_syscall_wrapper(void) +int probe_kern_syscall_wrapper(int token_fd) { char syscall_name[64]; const char *ksys_pfx; diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h index db4a499c0ec5..b45566e428d7 100644 --- a/tools/lib/bpf/libbpf_internal.h +++ b/tools/lib/bpf/libbpf_internal.h @@ -368,19 +368,21 @@ enum kern_feature_result { struct kern_feature_cache { enum kern_feature_result res[__FEAT_CNT]; + int token_fd; }; bool feat_supported(struct kern_feature_cache *cache, enum kern_feature_id feat_id); bool kernel_supports(const struct bpf_object *obj, enum kern_feature_id feat_id); -int probe_kern_syscall_wrapper(void); -int probe_memcg_account(void); +int probe_kern_syscall_wrapper(int token_fd); +int probe_memcg_account(int token_fd); int bump_rlimit_memlock(void); int parse_cpu_mask_str(const char *s, bool **mask, int *mask_sz); int parse_cpu_mask_file(const char *fcpu, bool **mask, int *mask_sz); int libbpf__load_raw_btf(const char *raw_types, size_t types_len, - const char *str_sec, size_t str_len); + const char *str_sec, size_t str_len, + int token_fd); int btf_load_into_kernel(struct btf *btf, char *log_buf, size_t log_sz, __u32 log_level); struct btf *btf_get_from_fd(int btf_fd, struct btf *base_btf); diff --git a/tools/lib/bpf/libbpf_probes.c b/tools/lib/bpf/libbpf_probes.c index 9c4db90b92b6..8e7437006639 100644 --- a/tools/lib/bpf/libbpf_probes.c +++ b/tools/lib/bpf/libbpf_probes.c @@ -219,7 +219,8 @@ int libbpf_probe_bpf_prog_type(enum bpf_prog_type prog_type, const void *opts) } int libbpf__load_raw_btf(const char *raw_types, size_t types_len, - const char *str_sec, size_t str_len) + const char *str_sec, size_t str_len, + int token_fd) { struct btf_header hdr = { .magic = BTF_MAGIC, @@ -229,6 +230,7 @@ int libbpf__load_raw_btf(const char *raw_types, size_t types_len, .str_off = types_len, .str_len = str_len, }; + LIBBPF_OPTS(bpf_btf_load_opts, opts, .token_fd = token_fd); int btf_fd, btf_len; __u8 *raw_btf; @@ -241,7 +243,7 @@ int libbpf__load_raw_btf(const char *raw_types, size_t types_len, memcpy(raw_btf + hdr.hdr_len, raw_types, hdr.type_len); memcpy(raw_btf + hdr.hdr_len + hdr.type_len, str_sec, hdr.str_len); - btf_fd = bpf_btf_load(raw_btf, btf_len, NULL); + btf_fd = bpf_btf_load(raw_btf, btf_len, &opts); free(raw_btf); return btf_fd; @@ -271,7 +273,7 @@ static int load_local_storage_btf(void) }; return libbpf__load_raw_btf((char *)types, sizeof(types), - strs, sizeof(strs)); + strs, sizeof(strs), 0); } static int probe_map_create(enum bpf_map_type map_type) -- cgit v1.2.3 From 1d0dd6ea2e38c18e1b31a8c3c59b6bdfe4f4efde Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Wed, 13 Dec 2023 11:08:38 -0800 Subject: libbpf: wire up BPF token support at BPF object level Add BPF token support to BPF object-level functionality. BPF token is supported by BPF object logic either as an explicitly provided BPF token from outside (through BPF FS path or explicit BPF token FD), or implicitly (unless prevented through bpf_object_open_opts). Implicit mode is assumed to be the most common one for user namespaced unprivileged workloads. The assumption is that privileged container manager sets up default BPF FS mount point at /sys/fs/bpf with BPF token delegation options (delegate_{cmds,maps,progs,attachs} mount options). BPF object during loading will attempt to create BPF token from /sys/fs/bpf location, and pass it for all relevant operations (currently, map creation, BTF load, and program load). In this implicit mode, if BPF token creation fails due to whatever reason (BPF FS is not mounted, or kernel doesn't support BPF token, etc), this is not considered an error. BPF object loading sequence will proceed with no BPF token. In explicit BPF token mode, user provides explicitly either custom BPF FS mount point path or creates BPF token on their own and just passes token FD directly. In such case, BPF object will either dup() token FD (to not require caller to hold onto it for entire duration of BPF object lifetime) or will attempt to create BPF token from provided BPF FS location. If BPF token creation fails, that is considered a critical error and BPF object load fails with an error. Libbpf provides a way to disable implicit BPF token creation, if it causes any troubles (BPF token is designed to be completely optional and shouldn't cause any problems even if provided, but in the world of BPF LSM, custom security logic can be installed that might change outcome dependin on the presence of BPF token). To disable libbpf's default BPF token creation behavior user should provide either invalid BPF token FD (negative), or empty bpf_token_path option. BPF token presence can influence libbpf's feature probing, so if BPF object has associated BPF token, feature probing is instructed to use BPF object-specific feature detection cache and token FD. Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20231213190842.3844987-7-andrii@kernel.org Signed-off-by: Alexei Starovoitov --- tools/lib/bpf/btf.c | 7 ++- tools/lib/bpf/libbpf.c | 118 +++++++++++++++++++++++++++++++++++++--- tools/lib/bpf/libbpf.h | 28 +++++++++- tools/lib/bpf/libbpf_internal.h | 17 +++++- 4 files changed, 158 insertions(+), 12 deletions(-) (limited to 'tools') diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c index ee95fd379d4d..63033c334320 100644 --- a/tools/lib/bpf/btf.c +++ b/tools/lib/bpf/btf.c @@ -1317,7 +1317,9 @@ struct btf *btf__parse_split(const char *path, struct btf *base_btf) static void *btf_get_raw_data(const struct btf *btf, __u32 *size, bool swap_endian); -int btf_load_into_kernel(struct btf *btf, char *log_buf, size_t log_sz, __u32 log_level) +int btf_load_into_kernel(struct btf *btf, + char *log_buf, size_t log_sz, __u32 log_level, + int token_fd) { LIBBPF_OPTS(bpf_btf_load_opts, opts); __u32 buf_sz = 0, raw_size; @@ -1367,6 +1369,7 @@ retry_load: opts.log_level = log_level; } + opts.token_fd = token_fd; btf->fd = bpf_btf_load(raw_data, raw_size, &opts); if (btf->fd < 0) { /* time to turn on verbose mode and try again */ @@ -1394,7 +1397,7 @@ done: int btf__load_into_kernel(struct btf *btf) { - return btf_load_into_kernel(btf, NULL, 0, 0); + return btf_load_into_kernel(btf, NULL, 0, 0, 0); } int btf__fd(const struct btf *btf) diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 1acd1c224c5e..db94bbe163e3 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -59,6 +59,8 @@ #define BPF_FS_MAGIC 0xcafe4a11 #endif +#define BPF_FS_DEFAULT_PATH "/sys/fs/bpf" + #define BPF_INSN_SZ (sizeof(struct bpf_insn)) /* vsprintf() in __base_pr() uses nonliteral format string. It may break @@ -693,6 +695,10 @@ struct bpf_object { struct usdt_manager *usdt_man; + struct kern_feature_cache *feat_cache; + char *token_path; + int token_fd; + char path[]; }; @@ -2192,7 +2198,7 @@ static int build_map_pin_path(struct bpf_map *map, const char *path) int err; if (!path) - path = "/sys/fs/bpf"; + path = BPF_FS_DEFAULT_PATH; err = pathname_concat(buf, sizeof(buf), path, bpf_map__name(map)); if (err) @@ -3279,7 +3285,7 @@ skip_exception_cb: } else { /* currently BPF_BTF_LOAD only supports log_level 1 */ err = btf_load_into_kernel(kern_btf, obj->log_buf, obj->log_size, - obj->log_level ? 1 : 0); + obj->log_level ? 1 : 0, obj->token_fd); } if (sanitize) { if (!err) { @@ -4602,6 +4608,63 @@ int bpf_map__set_max_entries(struct bpf_map *map, __u32 max_entries) return 0; } +static int bpf_object_prepare_token(struct bpf_object *obj) +{ + const char *bpffs_path; + int bpffs_fd = -1, token_fd, err; + bool mandatory; + enum libbpf_print_level level; + + /* token is already set up */ + if (obj->token_fd > 0) + return 0; + /* token is explicitly prevented */ + if (obj->token_fd < 0) { + pr_debug("object '%s': token is prevented, skipping...\n", obj->name); + /* reset to zero to avoid extra checks during map_create and prog_load steps */ + obj->token_fd = 0; + return 0; + } + + mandatory = obj->token_path != NULL; + level = mandatory ? LIBBPF_WARN : LIBBPF_DEBUG; + + bpffs_path = obj->token_path ?: BPF_FS_DEFAULT_PATH; + bpffs_fd = open(bpffs_path, O_DIRECTORY, O_RDWR); + if (bpffs_fd < 0) { + err = -errno; + __pr(level, "object '%s': failed (%d) to open BPF FS mount at '%s'%s\n", + obj->name, err, bpffs_path, + mandatory ? "" : ", skipping optional step..."); + return mandatory ? err : 0; + } + + token_fd = bpf_token_create(bpffs_fd, 0); + close(bpffs_fd); + if (token_fd < 0) { + if (!mandatory && token_fd == -ENOENT) { + pr_debug("object '%s': BPF FS at '%s' doesn't have BPF token delegation set up, skipping...\n", + obj->name, bpffs_path); + return 0; + } + __pr(level, "object '%s': failed (%d) to create BPF token from '%s'%s\n", + obj->name, token_fd, bpffs_path, + mandatory ? "" : ", skipping optional step..."); + return mandatory ? token_fd : 0; + } + + obj->feat_cache = calloc(1, sizeof(*obj->feat_cache)); + if (!obj->feat_cache) { + close(token_fd); + return -ENOMEM; + } + + obj->token_fd = token_fd; + obj->feat_cache->token_fd = token_fd; + + return 0; +} + static int bpf_object__probe_loading(struct bpf_object *obj) { @@ -4611,6 +4674,7 @@ bpf_object__probe_loading(struct bpf_object *obj) BPF_EXIT_INSN(), }; int ret, insn_cnt = ARRAY_SIZE(insns); + LIBBPF_OPTS(bpf_prog_load_opts, opts, .token_fd = obj->token_fd); if (obj->gen_loader) return 0; @@ -4620,9 +4684,9 @@ bpf_object__probe_loading(struct bpf_object *obj) pr_warn("Failed to bump RLIMIT_MEMLOCK (err = %d), you might need to do it explicitly!\n", ret); /* make sure basic loading works */ - ret = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL", insns, insn_cnt, NULL); + ret = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL", insns, insn_cnt, &opts); if (ret < 0) - ret = bpf_prog_load(BPF_PROG_TYPE_TRACEPOINT, NULL, "GPL", insns, insn_cnt, NULL); + ret = bpf_prog_load(BPF_PROG_TYPE_TRACEPOINT, NULL, "GPL", insns, insn_cnt, &opts); if (ret < 0) { ret = errno; cp = libbpf_strerror_r(ret, errmsg, sizeof(errmsg)); @@ -4645,6 +4709,9 @@ bool kernel_supports(const struct bpf_object *obj, enum kern_feature_id feat_id) */ return true; + if (obj->token_fd) + return feat_supported(obj->feat_cache, feat_id); + return feat_supported(NULL, feat_id); } @@ -4764,6 +4831,7 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, b create_attr.map_flags = def->map_flags; create_attr.numa_node = map->numa_node; create_attr.map_extra = map->map_extra; + create_attr.token_fd = obj->token_fd; if (bpf_map__is_struct_ops(map)) create_attr.btf_vmlinux_value_type_id = map->btf_vmlinux_value_type_id; @@ -6599,6 +6667,7 @@ static int bpf_object_load_prog(struct bpf_object *obj, struct bpf_program *prog load_attr.attach_btf_id = prog->attach_btf_id; load_attr.kern_version = kern_version; load_attr.prog_ifindex = prog->prog_ifindex; + load_attr.token_fd = obj->token_fd; /* specify func_info/line_info only if kernel supports them */ btf_fd = bpf_object__btf_fd(obj); @@ -7060,10 +7129,10 @@ static int bpf_object_init_progs(struct bpf_object *obj, const struct bpf_object static struct bpf_object *bpf_object_open(const char *path, const void *obj_buf, size_t obj_buf_sz, const struct bpf_object_open_opts *opts) { - const char *obj_name, *kconfig, *btf_tmp_path; + const char *obj_name, *kconfig, *btf_tmp_path, *token_path; struct bpf_object *obj; char tmp_name[64]; - int err; + int err, token_fd; char *log_buf; size_t log_size; __u32 log_level; @@ -7097,6 +7166,22 @@ static struct bpf_object *bpf_object_open(const char *path, const void *obj_buf, if (log_size && !log_buf) return ERR_PTR(-EINVAL); + token_path = OPTS_GET(opts, bpf_token_path, NULL); + token_fd = OPTS_GET(opts, bpf_token_fd, -1); + /* non-empty token path can't be combined with invalid token FD */ + if (token_path && token_path[0] != '\0' && token_fd < 0) + return ERR_PTR(-EINVAL); + if (token_path && token_path[0] == '\0') { + /* empty token path can't be combined with valid token FD */ + if (token_fd > 0) + return ERR_PTR(-EINVAL); + /* empty token_path is equivalent to invalid token_fd */ + token_path = NULL; + token_fd = -1; + } + if (token_path && strlen(token_path) >= PATH_MAX) + return ERR_PTR(-ENAMETOOLONG); + obj = bpf_object__new(path, obj_buf, obj_buf_sz, obj_name); if (IS_ERR(obj)) return obj; @@ -7105,6 +7190,19 @@ static struct bpf_object *bpf_object_open(const char *path, const void *obj_buf, obj->log_size = log_size; obj->log_level = log_level; + obj->token_fd = token_fd <= 0 ? token_fd : dup_good_fd(token_fd); + if (token_fd > 0 && obj->token_fd < 0) { + err = -errno; + goto out; + } + if (token_path) { + obj->token_path = strdup(token_path); + if (!obj->token_path) { + err = -ENOMEM; + goto out; + } + } + btf_tmp_path = OPTS_GET(opts, btf_custom_path, NULL); if (btf_tmp_path) { if (strlen(btf_tmp_path) >= PATH_MAX) { @@ -7615,7 +7713,8 @@ static int bpf_object_load(struct bpf_object *obj, int extra_log_level, const ch if (obj->gen_loader) bpf_gen__init(obj->gen_loader, extra_log_level, obj->nr_programs, obj->nr_maps); - err = bpf_object__probe_loading(obj); + err = bpf_object_prepare_token(obj); + err = err ? : bpf_object__probe_loading(obj); err = err ? : bpf_object__load_vmlinux_btf(obj, false); err = err ? : bpf_object__resolve_externs(obj, obj->kconfig); err = err ? : bpf_object__sanitize_and_load_btf(obj); @@ -8152,6 +8251,11 @@ void bpf_object__close(struct bpf_object *obj) } zfree(&obj->programs); + zfree(&obj->feat_cache); + zfree(&obj->token_path); + if (obj->token_fd > 0) + close(obj->token_fd); + free(obj); } diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index 6cd9c501624f..d3de39b537f3 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -177,10 +177,36 @@ struct bpf_object_open_opts { * logs through its print callback. */ __u32 kernel_log_level; + /* FD of a BPF token instantiated by user through bpf_token_create() + * API. BPF object will keep dup()'ed FD internally, so passed token + * FD can be closed after BPF object/skeleton open step. + * + * Setting bpf_token_fd to negative value disables libbpf's automatic + * attempt to create BPF token from default BPF FS mount point + * (/sys/fs/bpf), in case this default behavior is undesirable. + * + * bpf_token_path and bpf_token_fd are mutually exclusive and only one + * of those options should be set. + */ + int bpf_token_fd; + /* Path to BPF FS mount point to derive BPF token from. + * + * Created BPF token will be used for all bpf() syscall operations + * that accept BPF token (e.g., map creation, BTF and program loads, + * etc) automatically within instantiated BPF object. + * + * Setting bpf_token_path option to empty string disables libbpf's + * automatic attempt to create BPF token from default BPF FS mount + * point (/sys/fs/bpf), in case this default behavior is undesirable. + * + * bpf_token_path and bpf_token_fd are mutually exclusive and only one + * of those options should be set. + */ + const char *bpf_token_path; size_t :0; }; -#define bpf_object_open_opts__last_field kernel_log_level +#define bpf_object_open_opts__last_field bpf_token_path /** * @brief **bpf_object__open()** creates a bpf_object by opening diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h index b45566e428d7..4cda32298c49 100644 --- a/tools/lib/bpf/libbpf_internal.h +++ b/tools/lib/bpf/libbpf_internal.h @@ -383,7 +383,9 @@ int parse_cpu_mask_file(const char *fcpu, bool **mask, int *mask_sz); int libbpf__load_raw_btf(const char *raw_types, size_t types_len, const char *str_sec, size_t str_len, int token_fd); -int btf_load_into_kernel(struct btf *btf, char *log_buf, size_t log_sz, __u32 log_level); +int btf_load_into_kernel(struct btf *btf, + char *log_buf, size_t log_sz, __u32 log_level, + int token_fd); struct btf *btf_get_from_fd(int btf_fd, struct btf *base_btf); void btf_get_kernel_prefix_kind(enum bpf_attach_type attach_type, @@ -547,6 +549,17 @@ static inline bool is_ldimm64_insn(struct bpf_insn *insn) return insn->code == (BPF_LD | BPF_IMM | BPF_DW); } +/* Unconditionally dup FD, ensuring it doesn't use [0, 2] range. + * Original FD is not closed or altered in any other way. + * Preserves original FD value, if it's invalid (negative). + */ +static inline int dup_good_fd(int fd) +{ + if (fd < 0) + return fd; + return fcntl(fd, F_DUPFD_CLOEXEC, 3); +} + /* if fd is stdin, stdout, or stderr, dup to a fd greater than 2 * Takes ownership of the fd passed in, and closes it if calling * fcntl(fd, F_DUPFD_CLOEXEC, 3). @@ -558,7 +571,7 @@ static inline int ensure_good_fd(int fd) if (fd < 0) return fd; if (fd < 3) { - fd = fcntl(fd, F_DUPFD_CLOEXEC, 3); + fd = dup_good_fd(fd); saved_errno = errno; close(old_fd); errno = saved_errno; -- cgit v1.2.3 From 98e0eaa36adfb580a3aa43fca62847ec0f625d3f Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Wed, 13 Dec 2023 11:08:39 -0800 Subject: selftests/bpf: add BPF object loading tests with explicit token passing Add a few tests that attempt to load BPF object containing privileged map, program, and the one requiring mandatory BTF uploading into the kernel (to validate token FD propagation to BPF_BTF_LOAD command). Acked-by: John Fastabend Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20231213190842.3844987-8-andrii@kernel.org Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/prog_tests/token.c | 159 +++++++++++++++++++++++++ tools/testing/selftests/bpf/progs/priv_map.c | 13 ++ tools/testing/selftests/bpf/progs/priv_prog.c | 13 ++ 3 files changed, 185 insertions(+) create mode 100644 tools/testing/selftests/bpf/progs/priv_map.c create mode 100644 tools/testing/selftests/bpf/progs/priv_prog.c (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/token.c b/tools/testing/selftests/bpf/prog_tests/token.c index dc03790c6272..9812292336c9 100644 --- a/tools/testing/selftests/bpf/prog_tests/token.c +++ b/tools/testing/selftests/bpf/prog_tests/token.c @@ -14,6 +14,9 @@ #include #include #include +#include "priv_map.skel.h" +#include "priv_prog.skel.h" +#include "dummy_st_ops_success.skel.h" static inline int sys_mount(const char *dev_name, const char *dir_name, const char *type, unsigned long flags, @@ -643,6 +646,123 @@ cleanup: return err; } +static int userns_obj_priv_map(int mnt_fd) +{ + LIBBPF_OPTS(bpf_object_open_opts, opts); + char buf[256]; + struct priv_map *skel; + int err, token_fd; + + skel = priv_map__open_and_load(); + if (!ASSERT_ERR_PTR(skel, "obj_tokenless_load")) { + priv_map__destroy(skel); + return -EINVAL; + } + + /* use bpf_token_path to provide BPF FS path */ + snprintf(buf, sizeof(buf), "/proc/self/fd/%d", mnt_fd); + opts.bpf_token_path = buf; + skel = priv_map__open_opts(&opts); + if (!ASSERT_OK_PTR(skel, "obj_token_path_open")) + return -EINVAL; + + err = priv_map__load(skel); + priv_map__destroy(skel); + if (!ASSERT_OK(err, "obj_token_path_load")) + return -EINVAL; + + /* create token and pass it through bpf_token_fd */ + token_fd = bpf_token_create(mnt_fd, NULL); + if (!ASSERT_GT(token_fd, 0, "create_token")) + return -EINVAL; + + opts.bpf_token_path = NULL; + opts.bpf_token_fd = token_fd; + skel = priv_map__open_opts(&opts); + if (!ASSERT_OK_PTR(skel, "obj_token_fd_open")) + return -EINVAL; + + /* we can close our token FD, bpf_object owns dup()'ed FD now */ + close(token_fd); + + err = priv_map__load(skel); + priv_map__destroy(skel); + if (!ASSERT_OK(err, "obj_token_fd_load")) + return -EINVAL; + + return 0; +} + +static int userns_obj_priv_prog(int mnt_fd) +{ + LIBBPF_OPTS(bpf_object_open_opts, opts); + char buf[256]; + struct priv_prog *skel; + int err; + + skel = priv_prog__open_and_load(); + if (!ASSERT_ERR_PTR(skel, "obj_tokenless_load")) { + priv_prog__destroy(skel); + return -EINVAL; + } + + /* use bpf_token_path to provide BPF FS path */ + snprintf(buf, sizeof(buf), "/proc/self/fd/%d", mnt_fd); + opts.bpf_token_path = buf; + skel = priv_prog__open_opts(&opts); + if (!ASSERT_OK_PTR(skel, "obj_token_path_open")) + return -EINVAL; + + err = priv_prog__load(skel); + priv_prog__destroy(skel); + if (!ASSERT_OK(err, "obj_token_path_load")) + return -EINVAL; + + return 0; +} + +/* this test is called with BPF FS that doesn't delegate BPF_BTF_LOAD command, + * which should cause struct_ops application to fail, as BTF won't be uploaded + * into the kernel, even if STRUCT_OPS programs themselves are allowed + */ +static int validate_struct_ops_load(int mnt_fd, bool expect_success) +{ + LIBBPF_OPTS(bpf_object_open_opts, opts); + char buf[256]; + struct dummy_st_ops_success *skel; + int err; + + snprintf(buf, sizeof(buf), "/proc/self/fd/%d", mnt_fd); + opts.bpf_token_path = buf; + skel = dummy_st_ops_success__open_opts(&opts); + if (!ASSERT_OK_PTR(skel, "obj_token_path_open")) + return -EINVAL; + + err = dummy_st_ops_success__load(skel); + dummy_st_ops_success__destroy(skel); + if (expect_success) { + if (!ASSERT_OK(err, "obj_token_path_load")) + return -EINVAL; + } else /* expect failure */ { + if (!ASSERT_ERR(err, "obj_token_path_load")) + return -EINVAL; + } + + return 0; +} + +static int userns_obj_priv_btf_fail(int mnt_fd) +{ + return validate_struct_ops_load(mnt_fd, false /* should fail */); +} + +static int userns_obj_priv_btf_success(int mnt_fd) +{ + return validate_struct_ops_load(mnt_fd, true /* should succeed */); +} + +#define bit(n) (1ULL << (n)) + void test_token(void) { if (test__start_subtest("map_token")) { @@ -669,4 +789,43 @@ void test_token(void) subtest_userns(&opts, userns_prog_load); } + if (test__start_subtest("obj_priv_map")) { + struct bpffs_opts opts = { + .cmds = bit(BPF_MAP_CREATE), + .maps = bit(BPF_MAP_TYPE_QUEUE), + }; + + subtest_userns(&opts, userns_obj_priv_map); + } + if (test__start_subtest("obj_priv_prog")) { + struct bpffs_opts opts = { + .cmds = bit(BPF_PROG_LOAD), + .progs = bit(BPF_PROG_TYPE_KPROBE), + .attachs = ~0ULL, + }; + + subtest_userns(&opts, userns_obj_priv_prog); + } + if (test__start_subtest("obj_priv_btf_fail")) { + struct bpffs_opts opts = { + /* disallow BTF loading */ + .cmds = bit(BPF_MAP_CREATE) | bit(BPF_PROG_LOAD), + .maps = bit(BPF_MAP_TYPE_STRUCT_OPS), + .progs = bit(BPF_PROG_TYPE_STRUCT_OPS), + .attachs = ~0ULL, + }; + + subtest_userns(&opts, userns_obj_priv_btf_fail); + } + if (test__start_subtest("obj_priv_btf_success")) { + struct bpffs_opts opts = { + /* allow BTF loading */ + .cmds = bit(BPF_BTF_LOAD) | bit(BPF_MAP_CREATE) | bit(BPF_PROG_LOAD), + .maps = bit(BPF_MAP_TYPE_STRUCT_OPS), + .progs = bit(BPF_PROG_TYPE_STRUCT_OPS), + .attachs = ~0ULL, + }; + + subtest_userns(&opts, userns_obj_priv_btf_success); + } } diff --git a/tools/testing/selftests/bpf/progs/priv_map.c b/tools/testing/selftests/bpf/progs/priv_map.c new file mode 100644 index 000000000000..9085be50f03b --- /dev/null +++ b/tools/testing/selftests/bpf/progs/priv_map.c @@ -0,0 +1,13 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */ + +#include "vmlinux.h" +#include + +char _license[] SEC("license") = "GPL"; + +struct { + __uint(type, BPF_MAP_TYPE_QUEUE); + __uint(max_entries, 1); + __type(value, __u32); +} priv_map SEC(".maps"); diff --git a/tools/testing/selftests/bpf/progs/priv_prog.c b/tools/testing/selftests/bpf/progs/priv_prog.c new file mode 100644 index 000000000000..3c7b2b618c8a --- /dev/null +++ b/tools/testing/selftests/bpf/progs/priv_prog.c @@ -0,0 +1,13 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */ + +#include "vmlinux.h" +#include + +char _license[] SEC("license") = "GPL"; + +SEC("kprobe") +int kprobe_prog(void *ctx) +{ + return 1; +} -- cgit v1.2.3 From 18678cf0ee13cf19bac4ecd55665e6d1d63108b3 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Wed, 13 Dec 2023 11:08:40 -0800 Subject: selftests/bpf: add tests for BPF object load with implicit token Add a test to validate libbpf's implicit BPF token creation from default BPF FS location (/sys/fs/bpf). Also validate that disabling this implicit BPF token creation works. Acked-by: John Fastabend Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20231213190842.3844987-9-andrii@kernel.org Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/prog_tests/token.c | 76 ++++++++++++++++++++++++++ 1 file changed, 76 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/token.c b/tools/testing/selftests/bpf/prog_tests/token.c index 9812292336c9..1a3c3aacf537 100644 --- a/tools/testing/selftests/bpf/prog_tests/token.c +++ b/tools/testing/selftests/bpf/prog_tests/token.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include "priv_map.skel.h" @@ -45,6 +46,13 @@ static inline int sys_fsmount(int fs_fd, unsigned flags, unsigned ms_flags) return syscall(__NR_fsmount, fs_fd, flags, ms_flags); } +static inline int sys_move_mount(int from_dfd, const char *from_path, + int to_dfd, const char *to_path, + unsigned flags) +{ + return syscall(__NR_move_mount, from_dfd, from_path, to_dfd, to_path, flags); +} + static int drop_priv_caps(__u64 *old_caps) { return cap_disable_effective((1ULL << CAP_BPF) | @@ -761,6 +769,63 @@ static int userns_obj_priv_btf_success(int mnt_fd) return validate_struct_ops_load(mnt_fd, true /* should succeed */); } +static int userns_obj_priv_implicit_token(int mnt_fd) +{ + LIBBPF_OPTS(bpf_object_open_opts, opts); + struct dummy_st_ops_success *skel; + int err; + + /* before we mount BPF FS with token delegation, struct_ops skeleton + * should fail to load + */ + skel = dummy_st_ops_success__open_and_load(); + if (!ASSERT_ERR_PTR(skel, "obj_tokenless_load")) { + dummy_st_ops_success__destroy(skel); + return -EINVAL; + } + + /* mount custom BPF FS over /sys/fs/bpf so that libbpf can create BPF + * token automatically and implicitly + */ + err = sys_move_mount(mnt_fd, "", AT_FDCWD, "/sys/fs/bpf", MOVE_MOUNT_F_EMPTY_PATH); + if (!ASSERT_OK(err, "move_mount_bpffs")) + return -EINVAL; + + /* now the same struct_ops skeleton should succeed thanks to libppf + * creating BPF token from /sys/fs/bpf mount point + */ + skel = dummy_st_ops_success__open_and_load(); + if (!ASSERT_OK_PTR(skel, "obj_implicit_token_load")) + return -EINVAL; + + dummy_st_ops_success__destroy(skel); + + /* now disable implicit token through empty bpf_token_path, should fail */ + opts.bpf_token_path = ""; + skel = dummy_st_ops_success__open_opts(&opts); + if (!ASSERT_OK_PTR(skel, "obj_empty_token_path_open")) + return -EINVAL; + + err = dummy_st_ops_success__load(skel); + dummy_st_ops_success__destroy(skel); + if (!ASSERT_ERR(err, "obj_empty_token_path_load")) + return -EINVAL; + + /* now disable implicit token through negative bpf_token_fd, should fail */ + opts.bpf_token_path = NULL; + opts.bpf_token_fd = -1; + skel = dummy_st_ops_success__open_opts(&opts); + if (!ASSERT_OK_PTR(skel, "obj_neg_token_fd_open")) + return -EINVAL; + + err = dummy_st_ops_success__load(skel); + dummy_st_ops_success__destroy(skel); + if (!ASSERT_ERR(err, "obj_neg_token_fd_load")) + return -EINVAL; + + return 0; +} + #define bit(n) (1ULL << (n)) void test_token(void) @@ -828,4 +893,15 @@ void test_token(void) subtest_userns(&opts, userns_obj_priv_btf_success); } + if (test__start_subtest("obj_priv_implicit_token")) { + struct bpffs_opts opts = { + /* allow BTF loading */ + .cmds = bit(BPF_BTF_LOAD) | bit(BPF_MAP_CREATE) | bit(BPF_PROG_LOAD), + .maps = bit(BPF_MAP_TYPE_STRUCT_OPS), + .progs = bit(BPF_PROG_TYPE_STRUCT_OPS), + .attachs = ~0ULL, + }; + + subtest_userns(&opts, userns_obj_priv_implicit_token); + } } -- cgit v1.2.3 From ed54124b88056fd629c6af71664dfcd4d3b3e0b8 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Wed, 13 Dec 2023 11:08:41 -0800 Subject: libbpf: support BPF token path setting through LIBBPF_BPF_TOKEN_PATH envvar To allow external admin authority to override default BPF FS location (/sys/fs/bpf) for implicit BPF token creation, teach libbpf to recognize LIBBPF_BPF_TOKEN_PATH envvar. If it is specified and user application didn't explicitly specify neither bpf_token_path nor bpf_token_fd option, it will be treated exactly like bpf_token_path option, overriding default /sys/fs/bpf location and making BPF token mandatory. Suggested-by: Alexei Starovoitov Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20231213190842.3844987-10-andrii@kernel.org Signed-off-by: Alexei Starovoitov --- tools/lib/bpf/libbpf.c | 14 ++++++++++---- tools/lib/bpf/libbpf.h | 13 +++++++++++-- 2 files changed, 21 insertions(+), 6 deletions(-) (limited to 'tools') diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index db94bbe163e3..4b5ff9508e18 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -7171,11 +7171,17 @@ static struct bpf_object *bpf_object_open(const char *path, const void *obj_buf, /* non-empty token path can't be combined with invalid token FD */ if (token_path && token_path[0] != '\0' && token_fd < 0) return ERR_PTR(-EINVAL); + /* empty token path can't be combined with valid token FD */ + if (token_path && token_path[0] == '\0' && token_fd > 0) + return ERR_PTR(-EINVAL); + /* if user didn't specify bpf_token_path/bpf_token_fd explicitly, + * check if LIBBPF_BPF_TOKEN_PATH envvar was set and treat it as + * bpf_token_path option + */ + if (token_fd == 0 && !token_path) + token_path = getenv("LIBBPF_BPF_TOKEN_PATH"); + /* empty token_path is equivalent to invalid token_fd */ if (token_path && token_path[0] == '\0') { - /* empty token path can't be combined with valid token FD */ - if (token_fd > 0) - return ERR_PTR(-EINVAL); - /* empty token_path is equivalent to invalid token_fd */ token_path = NULL; token_fd = -1; } diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index d3de39b537f3..916904bd2a7a 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -185,8 +185,16 @@ struct bpf_object_open_opts { * attempt to create BPF token from default BPF FS mount point * (/sys/fs/bpf), in case this default behavior is undesirable. * + * If bpf_token_path and bpf_token_fd are not specified, libbpf will + * consult LIBBPF_BPF_TOKEN_PATH environment variable. If set, it will + * be taken as a value of bpf_token_path option and will force libbpf + * to either create BPF token from provided custom BPF FS path, or + * will disable implicit BPF token creation, if envvar value is an + * empty string. + * * bpf_token_path and bpf_token_fd are mutually exclusive and only one - * of those options should be set. + * of those options should be set. Either of them overrides + * LIBBPF_BPF_TOKEN_PATH envvar. */ int bpf_token_fd; /* Path to BPF FS mount point to derive BPF token from. @@ -200,7 +208,8 @@ struct bpf_object_open_opts { * point (/sys/fs/bpf), in case this default behavior is undesirable. * * bpf_token_path and bpf_token_fd are mutually exclusive and only one - * of those options should be set. + * of those options should be set. Either of them overrides + * LIBBPF_BPF_TOKEN_PATH envvar. */ const char *bpf_token_path; -- cgit v1.2.3 From 322122bf8c75b1df78d6608516807a0354f6ab3c Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Wed, 13 Dec 2023 11:08:42 -0800 Subject: selftests/bpf: add tests for LIBBPF_BPF_TOKEN_PATH envvar Add new subtest validating LIBBPF_BPF_TOKEN_PATH envvar semantics. Extend existing test to validate that LIBBPF_BPF_TOKEN_PATH allows to disable implicit BPF token creation by setting envvar to empty string. Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20231213190842.3844987-11-andrii@kernel.org Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/prog_tests/token.c | 112 +++++++++++++++++++++++++ 1 file changed, 112 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/token.c b/tools/testing/selftests/bpf/prog_tests/token.c index 1a3c3aacf537..548aeb91ab0d 100644 --- a/tools/testing/selftests/bpf/prog_tests/token.c +++ b/tools/testing/selftests/bpf/prog_tests/token.c @@ -769,6 +769,9 @@ static int userns_obj_priv_btf_success(int mnt_fd) return validate_struct_ops_load(mnt_fd, true /* should succeed */); } +#define TOKEN_ENVVAR "LIBBPF_BPF_TOKEN_PATH" +#define TOKEN_BPFFS_CUSTOM "/bpf-token-fs" + static int userns_obj_priv_implicit_token(int mnt_fd) { LIBBPF_OPTS(bpf_object_open_opts, opts); @@ -791,6 +794,20 @@ static int userns_obj_priv_implicit_token(int mnt_fd) if (!ASSERT_OK(err, "move_mount_bpffs")) return -EINVAL; + /* disable implicit BPF token creation by setting + * LIBBPF_BPF_TOKEN_PATH envvar to empty value, load should fail + */ + err = setenv(TOKEN_ENVVAR, "", 1 /*overwrite*/); + if (!ASSERT_OK(err, "setenv_token_path")) + return -EINVAL; + skel = dummy_st_ops_success__open_and_load(); + if (!ASSERT_ERR_PTR(skel, "obj_token_envvar_disabled_load")) { + unsetenv(TOKEN_ENVVAR); + dummy_st_ops_success__destroy(skel); + return -EINVAL; + } + unsetenv(TOKEN_ENVVAR); + /* now the same struct_ops skeleton should succeed thanks to libppf * creating BPF token from /sys/fs/bpf mount point */ @@ -826,6 +843,90 @@ static int userns_obj_priv_implicit_token(int mnt_fd) return 0; } +static int userns_obj_priv_implicit_token_envvar(int mnt_fd) +{ + LIBBPF_OPTS(bpf_object_open_opts, opts); + struct dummy_st_ops_success *skel; + int err; + + /* before we mount BPF FS with token delegation, struct_ops skeleton + * should fail to load + */ + skel = dummy_st_ops_success__open_and_load(); + if (!ASSERT_ERR_PTR(skel, "obj_tokenless_load")) { + dummy_st_ops_success__destroy(skel); + return -EINVAL; + } + + /* mount custom BPF FS over custom location, so libbpf can't create + * BPF token implicitly, unless pointed to it through + * LIBBPF_BPF_TOKEN_PATH envvar + */ + rmdir(TOKEN_BPFFS_CUSTOM); + if (!ASSERT_OK(mkdir(TOKEN_BPFFS_CUSTOM, 0777), "mkdir_bpffs_custom")) + goto err_out; + err = sys_move_mount(mnt_fd, "", AT_FDCWD, TOKEN_BPFFS_CUSTOM, MOVE_MOUNT_F_EMPTY_PATH); + if (!ASSERT_OK(err, "move_mount_bpffs")) + goto err_out; + + /* even though we have BPF FS with delegation, it's not at default + * /sys/fs/bpf location, so we still fail to load until envvar is set up + */ + skel = dummy_st_ops_success__open_and_load(); + if (!ASSERT_ERR_PTR(skel, "obj_tokenless_load2")) { + dummy_st_ops_success__destroy(skel); + goto err_out; + } + + err = setenv(TOKEN_ENVVAR, TOKEN_BPFFS_CUSTOM, 1 /*overwrite*/); + if (!ASSERT_OK(err, "setenv_token_path")) + goto err_out; + + /* now the same struct_ops skeleton should succeed thanks to libppf + * creating BPF token from custom mount point + */ + skel = dummy_st_ops_success__open_and_load(); + if (!ASSERT_OK_PTR(skel, "obj_implicit_token_load")) + goto err_out; + + dummy_st_ops_success__destroy(skel); + + /* now disable implicit token through empty bpf_token_path, envvar + * will be ignored, should fail + */ + opts.bpf_token_path = ""; + skel = dummy_st_ops_success__open_opts(&opts); + if (!ASSERT_OK_PTR(skel, "obj_empty_token_path_open")) + goto err_out; + + err = dummy_st_ops_success__load(skel); + dummy_st_ops_success__destroy(skel); + if (!ASSERT_ERR(err, "obj_empty_token_path_load")) + goto err_out; + + /* now disable implicit token through negative bpf_token_fd, envvar + * will be ignored, should fail + */ + opts.bpf_token_path = NULL; + opts.bpf_token_fd = -1; + skel = dummy_st_ops_success__open_opts(&opts); + if (!ASSERT_OK_PTR(skel, "obj_neg_token_fd_open")) + goto err_out; + + err = dummy_st_ops_success__load(skel); + dummy_st_ops_success__destroy(skel); + if (!ASSERT_ERR(err, "obj_neg_token_fd_load")) + goto err_out; + + rmdir(TOKEN_BPFFS_CUSTOM); + unsetenv(TOKEN_ENVVAR); + return 0; +err_out: + rmdir(TOKEN_BPFFS_CUSTOM); + unsetenv(TOKEN_ENVVAR); + return -EINVAL; +} + #define bit(n) (1ULL << (n)) void test_token(void) @@ -904,4 +1005,15 @@ void test_token(void) subtest_userns(&opts, userns_obj_priv_implicit_token); } + if (test__start_subtest("obj_priv_implicit_token_envvar")) { + struct bpffs_opts opts = { + /* allow BTF loading */ + .cmds = bit(BPF_BTF_LOAD) | bit(BPF_MAP_CREATE) | bit(BPF_PROG_LOAD), + .maps = bit(BPF_MAP_TYPE_STRUCT_OPS), + .progs = bit(BPF_PROG_TYPE_STRUCT_OPS), + .attachs = ~0ULL, + }; + + subtest_userns(&opts, userns_obj_priv_implicit_token_envvar); + } } -- cgit v1.2.3 From e6795330f88b4f643c649a02662d47b779340535 Mon Sep 17 00:00:00 2001 From: Larysa Zaremba Date: Tue, 5 Dec 2023 22:08:38 +0100 Subject: xdp: Add VLAN tag hint Implement functionality that enables drivers to expose VLAN tag to XDP code. VLAN tag is represented by 2 variables: - protocol ID, which is passed to bpf code in BE - VLAN TCI, in host byte order Acked-by: Stanislav Fomichev Signed-off-by: Larysa Zaremba Acked-by: Jesper Dangaard Brouer Link: https://lore.kernel.org/r/20231205210847.28460-10-larysa.zaremba@intel.com Signed-off-by: Alexei Starovoitov --- Documentation/netlink/specs/netdev.yaml | 4 ++++ Documentation/networking/xdp-rx-metadata.rst | 8 ++++++- include/net/xdp.h | 6 +++++ include/uapi/linux/netdev.h | 3 +++ net/core/xdp.c | 33 ++++++++++++++++++++++++++++ tools/include/uapi/linux/netdev.h | 3 +++ tools/net/ynl/generated/netdev-user.c | 1 + 7 files changed, 57 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/Documentation/netlink/specs/netdev.yaml b/Documentation/netlink/specs/netdev.yaml index eef6358ec587..aeec090e1387 100644 --- a/Documentation/netlink/specs/netdev.yaml +++ b/Documentation/netlink/specs/netdev.yaml @@ -54,6 +54,10 @@ definitions: name: hash doc: Device is capable of exposing receive packet hash via bpf_xdp_metadata_rx_hash(). + - + name: vlan-tag + doc: + Device is capable of exposing receive packet VLAN tag via bpf_xdp_metadata_rx_vlan_tag(). - type: flags name: xsk-flags diff --git a/Documentation/networking/xdp-rx-metadata.rst b/Documentation/networking/xdp-rx-metadata.rst index e3e9420fd817..a6e0ece18be5 100644 --- a/Documentation/networking/xdp-rx-metadata.rst +++ b/Documentation/networking/xdp-rx-metadata.rst @@ -20,7 +20,13 @@ Currently, the following kfuncs are supported. In the future, as more metadata is supported, this set will grow: .. kernel-doc:: net/core/xdp.c - :identifiers: bpf_xdp_metadata_rx_timestamp bpf_xdp_metadata_rx_hash + :identifiers: bpf_xdp_metadata_rx_timestamp + +.. kernel-doc:: net/core/xdp.c + :identifiers: bpf_xdp_metadata_rx_hash + +.. kernel-doc:: net/core/xdp.c + :identifiers: bpf_xdp_metadata_rx_vlan_tag An XDP program can use these kfuncs to read the metadata into stack variables for its own consumption. Or, to pass the metadata on to other diff --git a/include/net/xdp.h b/include/net/xdp.h index b7d6fe61381f..8cd04a74dba5 100644 --- a/include/net/xdp.h +++ b/include/net/xdp.h @@ -404,6 +404,10 @@ void xdp_attachment_setup(struct xdp_attachment_info *info, NETDEV_XDP_RX_METADATA_HASH, \ bpf_xdp_metadata_rx_hash, \ xmo_rx_hash) \ + XDP_METADATA_KFUNC(XDP_METADATA_KFUNC_RX_VLAN_TAG, \ + NETDEV_XDP_RX_METADATA_VLAN_TAG, \ + bpf_xdp_metadata_rx_vlan_tag, \ + xmo_rx_vlan_tag) \ enum xdp_rx_metadata { #define XDP_METADATA_KFUNC(name, _, __, ___) name, @@ -465,6 +469,8 @@ struct xdp_metadata_ops { int (*xmo_rx_timestamp)(const struct xdp_md *ctx, u64 *timestamp); int (*xmo_rx_hash)(const struct xdp_md *ctx, u32 *hash, enum xdp_rss_hash_type *rss_type); + int (*xmo_rx_vlan_tag)(const struct xdp_md *ctx, __be16 *vlan_proto, + u16 *vlan_tci); }; #ifdef CONFIG_NET diff --git a/include/uapi/linux/netdev.h b/include/uapi/linux/netdev.h index 6244c0164976..966638b08ccf 100644 --- a/include/uapi/linux/netdev.h +++ b/include/uapi/linux/netdev.h @@ -44,10 +44,13 @@ enum netdev_xdp_act { * timestamp via bpf_xdp_metadata_rx_timestamp(). * @NETDEV_XDP_RX_METADATA_HASH: Device is capable of exposing receive packet * hash via bpf_xdp_metadata_rx_hash(). + * @NETDEV_XDP_RX_METADATA_VLAN_TAG: Device is capable of exposing receive + * packet VLAN tag via bpf_xdp_metadata_rx_vlan_tag(). */ enum netdev_xdp_rx_metadata { NETDEV_XDP_RX_METADATA_TIMESTAMP = 1, NETDEV_XDP_RX_METADATA_HASH = 2, + NETDEV_XDP_RX_METADATA_VLAN_TAG = 4, }; /** diff --git a/net/core/xdp.c b/net/core/xdp.c index b6f1d6dab3f2..4869c1c2d8f3 100644 --- a/net/core/xdp.c +++ b/net/core/xdp.c @@ -736,6 +736,39 @@ __bpf_kfunc int bpf_xdp_metadata_rx_hash(const struct xdp_md *ctx, u32 *hash, return -EOPNOTSUPP; } +/** + * bpf_xdp_metadata_rx_vlan_tag - Get XDP packet outermost VLAN tag + * @ctx: XDP context pointer. + * @vlan_proto: Destination pointer for VLAN Tag protocol identifier (TPID). + * @vlan_tci: Destination pointer for VLAN TCI (VID + DEI + PCP) + * + * In case of success, ``vlan_proto`` contains *Tag protocol identifier (TPID)*, + * usually ``ETH_P_8021Q`` or ``ETH_P_8021AD``, but some networks can use + * custom TPIDs. ``vlan_proto`` is stored in **network byte order (BE)** + * and should be used as follows: + * ``if (vlan_proto == bpf_htons(ETH_P_8021Q)) do_something();`` + * + * ``vlan_tci`` contains the remaining 16 bits of a VLAN tag. + * Driver is expected to provide those in **host byte order (usually LE)**, + * so the bpf program should not perform byte conversion. + * According to 802.1Q standard, *VLAN TCI (Tag control information)* + * is a bit field that contains: + * *VLAN identifier (VID)* that can be read with ``vlan_tci & 0xfff``, + * *Drop eligible indicator (DEI)* - 1 bit, + * *Priority code point (PCP)* - 3 bits. + * For detailed meaning of DEI and PCP, please refer to other sources. + * + * Return: + * * Returns 0 on success or ``-errno`` on error. + * * ``-EOPNOTSUPP`` : device driver doesn't implement kfunc + * * ``-ENODATA`` : VLAN tag was not stripped or is not available + */ +__bpf_kfunc int bpf_xdp_metadata_rx_vlan_tag(const struct xdp_md *ctx, + __be16 *vlan_proto, u16 *vlan_tci) +{ + return -EOPNOTSUPP; +} + __bpf_kfunc_end_defs(); BTF_SET8_START(xdp_metadata_kfunc_ids) diff --git a/tools/include/uapi/linux/netdev.h b/tools/include/uapi/linux/netdev.h index 6244c0164976..966638b08ccf 100644 --- a/tools/include/uapi/linux/netdev.h +++ b/tools/include/uapi/linux/netdev.h @@ -44,10 +44,13 @@ enum netdev_xdp_act { * timestamp via bpf_xdp_metadata_rx_timestamp(). * @NETDEV_XDP_RX_METADATA_HASH: Device is capable of exposing receive packet * hash via bpf_xdp_metadata_rx_hash(). + * @NETDEV_XDP_RX_METADATA_VLAN_TAG: Device is capable of exposing receive + * packet VLAN tag via bpf_xdp_metadata_rx_vlan_tag(). */ enum netdev_xdp_rx_metadata { NETDEV_XDP_RX_METADATA_TIMESTAMP = 1, NETDEV_XDP_RX_METADATA_HASH = 2, + NETDEV_XDP_RX_METADATA_VLAN_TAG = 4, }; /** diff --git a/tools/net/ynl/generated/netdev-user.c b/tools/net/ynl/generated/netdev-user.c index 3b9dee94d4ce..e3fe748086bd 100644 --- a/tools/net/ynl/generated/netdev-user.c +++ b/tools/net/ynl/generated/netdev-user.c @@ -53,6 +53,7 @@ const char *netdev_xdp_act_str(enum netdev_xdp_act value) static const char * const netdev_xdp_rx_metadata_strmap[] = { [0] = "timestamp", [1] = "hash", + [2] = "vlan-tag", }; const char *netdev_xdp_rx_metadata_str(enum netdev_xdp_rx_metadata value) -- cgit v1.2.3 From e71a9fa7fdb2effcaaed37c207ec4f634c8f4901 Mon Sep 17 00:00:00 2001 From: Larysa Zaremba Date: Tue, 5 Dec 2023 22:08:44 +0100 Subject: selftests/bpf: Allow VLAN packets in xdp_hw_metadata Make VLAN c-tag and s-tag XDP hint testing more convenient by not skipping VLAN-ed packets. Allow both 802.1ad and 802.1Q headers. Acked-by: Stanislav Fomichev Signed-off-by: Larysa Zaremba Link: https://lore.kernel.org/r/20231205210847.28460-16-larysa.zaremba@intel.com Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/progs/xdp_hw_metadata.c | 10 +++++++++- tools/testing/selftests/bpf/xdp_metadata.h | 8 ++++++++ 2 files changed, 17 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/progs/xdp_hw_metadata.c b/tools/testing/selftests/bpf/progs/xdp_hw_metadata.c index f6d1cc9ad892..8767d919c881 100644 --- a/tools/testing/selftests/bpf/progs/xdp_hw_metadata.c +++ b/tools/testing/selftests/bpf/progs/xdp_hw_metadata.c @@ -26,15 +26,23 @@ int rx(struct xdp_md *ctx) { void *data, *data_meta, *data_end; struct ipv6hdr *ip6h = NULL; - struct ethhdr *eth = NULL; struct udphdr *udp = NULL; struct iphdr *iph = NULL; struct xdp_meta *meta; + struct ethhdr *eth; int err; data = (void *)(long)ctx->data; data_end = (void *)(long)ctx->data_end; eth = data; + + if (eth + 1 < data_end && (eth->h_proto == bpf_htons(ETH_P_8021AD) || + eth->h_proto == bpf_htons(ETH_P_8021Q))) + eth = (void *)eth + sizeof(struct vlan_hdr); + + if (eth + 1 < data_end && eth->h_proto == bpf_htons(ETH_P_8021Q)) + eth = (void *)eth + sizeof(struct vlan_hdr); + if (eth + 1 < data_end) { if (eth->h_proto == bpf_htons(ETH_P_IP)) { iph = (void *)(eth + 1); diff --git a/tools/testing/selftests/bpf/xdp_metadata.h b/tools/testing/selftests/bpf/xdp_metadata.h index 938a729bd307..6664893c2c77 100644 --- a/tools/testing/selftests/bpf/xdp_metadata.h +++ b/tools/testing/selftests/bpf/xdp_metadata.h @@ -9,6 +9,14 @@ #define ETH_P_IPV6 0x86DD #endif +#ifndef ETH_P_8021Q +#define ETH_P_8021Q 0x8100 +#endif + +#ifndef ETH_P_8021AD +#define ETH_P_8021AD 0x88A8 +#endif + struct xdp_meta { __u64 rx_timestamp; __u64 xdp_timestamp; -- cgit v1.2.3 From 8e68a4beba943bdffb342c601c649223f44b7329 Mon Sep 17 00:00:00 2001 From: Larysa Zaremba Date: Tue, 5 Dec 2023 22:08:45 +0100 Subject: selftests/bpf: Add flags and VLAN hint to xdp_hw_metadata Add VLAN hint to the xdp_hw_metadata program. Also, to make metadata layout more straightforward, add flags field to pass information about validity of every separate hint separately. Acked-by: Stanislav Fomichev Signed-off-by: Larysa Zaremba Link: https://lore.kernel.org/r/20231205210847.28460-17-larysa.zaremba@intel.com Signed-off-by: Alexei Starovoitov --- .../testing/selftests/bpf/progs/xdp_hw_metadata.c | 28 ++++++++++++++---- tools/testing/selftests/bpf/xdp_hw_metadata.c | 34 ++++++++++++++++++---- tools/testing/selftests/bpf/xdp_metadata.h | 26 ++++++++++++++++- 3 files changed, 76 insertions(+), 12 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/progs/xdp_hw_metadata.c b/tools/testing/selftests/bpf/progs/xdp_hw_metadata.c index 8767d919c881..330ece2eabdb 100644 --- a/tools/testing/selftests/bpf/progs/xdp_hw_metadata.c +++ b/tools/testing/selftests/bpf/progs/xdp_hw_metadata.c @@ -20,6 +20,9 @@ extern int bpf_xdp_metadata_rx_timestamp(const struct xdp_md *ctx, __u64 *timestamp) __ksym; extern int bpf_xdp_metadata_rx_hash(const struct xdp_md *ctx, __u32 *hash, enum xdp_rss_hash_type *rss_type) __ksym; +extern int bpf_xdp_metadata_rx_vlan_tag(const struct xdp_md *ctx, + __be16 *vlan_proto, + __u16 *vlan_tci) __ksym; SEC("xdp.frags") int rx(struct xdp_md *ctx) @@ -84,15 +87,28 @@ int rx(struct xdp_md *ctx) return XDP_PASS; } + meta->hint_valid = 0; + + meta->xdp_timestamp = bpf_ktime_get_tai_ns(); err = bpf_xdp_metadata_rx_timestamp(ctx, &meta->rx_timestamp); - if (!err) - meta->xdp_timestamp = bpf_ktime_get_tai_ns(); + if (err) + meta->rx_timestamp_err = err; + else + meta->hint_valid |= XDP_META_FIELD_TS; + + err = bpf_xdp_metadata_rx_hash(ctx, &meta->rx_hash, + &meta->rx_hash_type); + if (err) + meta->rx_hash_err = err; else - meta->rx_timestamp = 0; /* Used by AF_XDP as not avail signal */ + meta->hint_valid |= XDP_META_FIELD_RSS; - err = bpf_xdp_metadata_rx_hash(ctx, &meta->rx_hash, &meta->rx_hash_type); - if (err < 0) - meta->rx_hash_err = err; /* Used by AF_XDP as no hash signal */ + err = bpf_xdp_metadata_rx_vlan_tag(ctx, &meta->rx_vlan_proto, + &meta->rx_vlan_tci); + if (err) + meta->rx_vlan_tag_err = err; + else + meta->hint_valid |= XDP_META_FIELD_VLAN_TAG; __sync_add_and_fetch(&pkts_redir, 1); return bpf_redirect_map(&xsk, ctx->rx_queue_index, XDP_PASS); diff --git a/tools/testing/selftests/bpf/xdp_hw_metadata.c b/tools/testing/selftests/bpf/xdp_hw_metadata.c index c69c08933fdd..878d68db0325 100644 --- a/tools/testing/selftests/bpf/xdp_hw_metadata.c +++ b/tools/testing/selftests/bpf/xdp_hw_metadata.c @@ -21,6 +21,9 @@ #include "xsk.h" #include +#include +#include +#include #include #include #include @@ -182,19 +185,31 @@ static void print_tstamp_delta(const char *name, const char *refname, (double)delta / 1000); } +#define VLAN_PRIO_MASK GENMASK(15, 13) /* Priority Code Point */ +#define VLAN_DEI_MASK GENMASK(12, 12) /* Drop Eligible Indicator */ +#define VLAN_VID_MASK GENMASK(11, 0) /* VLAN Identifier */ +static void print_vlan_tci(__u16 tag) +{ + __u16 vlan_id = FIELD_GET(VLAN_VID_MASK, tag); + __u8 pcp = FIELD_GET(VLAN_PRIO_MASK, tag); + bool dei = FIELD_GET(VLAN_DEI_MASK, tag); + + printf("PCP=%u, DEI=%d, VID=0x%X\n", pcp, dei, vlan_id); +} + static void verify_xdp_metadata(void *data, clockid_t clock_id) { struct xdp_meta *meta; meta = data - sizeof(*meta); - if (meta->rx_hash_err < 0) - printf("No rx_hash err=%d\n", meta->rx_hash_err); - else + if (meta->hint_valid & XDP_META_FIELD_RSS) printf("rx_hash: 0x%X with RSS type:0x%X\n", meta->rx_hash, meta->rx_hash_type); + else + printf("No rx_hash, err=%d\n", meta->rx_hash_err); - if (meta->rx_timestamp) { + if (meta->hint_valid & XDP_META_FIELD_TS) { __u64 ref_tstamp = gettime(clock_id); /* store received timestamps to calculate a delta at tx */ @@ -206,7 +221,16 @@ static void verify_xdp_metadata(void *data, clockid_t clock_id) print_tstamp_delta("XDP RX-time", "User RX-time", meta->xdp_timestamp, ref_tstamp); } else { - printf("No rx_timestamp\n"); + printf("No rx_timestamp, err=%d\n", meta->rx_timestamp_err); + } + + if (meta->hint_valid & XDP_META_FIELD_VLAN_TAG) { + printf("rx_vlan_proto: 0x%X\n", ntohs(meta->rx_vlan_proto)); + printf("rx_vlan_tci: "); + print_vlan_tci(meta->rx_vlan_tci); + } else { + printf("No rx_vlan_tci or rx_vlan_proto, err=%d\n", + meta->rx_vlan_tag_err); } } diff --git a/tools/testing/selftests/bpf/xdp_metadata.h b/tools/testing/selftests/bpf/xdp_metadata.h index 6664893c2c77..87318ad1117a 100644 --- a/tools/testing/selftests/bpf/xdp_metadata.h +++ b/tools/testing/selftests/bpf/xdp_metadata.h @@ -17,12 +17,36 @@ #define ETH_P_8021AD 0x88A8 #endif +#ifndef BIT +#define BIT(nr) (1 << (nr)) +#endif + +/* Non-existent checksum status */ +#define XDP_CHECKSUM_MAGIC BIT(2) + +enum xdp_meta_field { + XDP_META_FIELD_TS = BIT(0), + XDP_META_FIELD_RSS = BIT(1), + XDP_META_FIELD_VLAN_TAG = BIT(2), +}; + struct xdp_meta { - __u64 rx_timestamp; + union { + __u64 rx_timestamp; + __s32 rx_timestamp_err; + }; __u64 xdp_timestamp; __u32 rx_hash; union { __u32 rx_hash_type; __s32 rx_hash_err; }; + union { + struct { + __be16 rx_vlan_proto; + __u16 rx_vlan_tci; + }; + __s32 rx_vlan_tag_err; + }; + enum xdp_meta_field hint_valid; }; -- cgit v1.2.3 From a3850af4ea25dadc8b35edf132340907d523657e Mon Sep 17 00:00:00 2001 From: Larysa Zaremba Date: Tue, 5 Dec 2023 22:08:46 +0100 Subject: selftests/bpf: Add AF_INET packet generation to xdp_metadata The easiest way to simulate stripped VLAN tag in veth is to send a packet from VLAN interface, attached to veth. Unfortunately, this approach is incompatible with AF_XDP on TX side, because VLAN interfaces do not have such feature. Check both packets sent via AF_XDP TX and regular socket. AF_INET packet will also have a filled-in hash type (XDP_RSS_TYPE_L4), unlike AF_XDP packet, so more values can be checked. Signed-off-by: Larysa Zaremba Acked-by: Stanislav Fomichev Link: https://lore.kernel.org/r/20231205210847.28460-18-larysa.zaremba@intel.com Signed-off-by: Alexei Starovoitov --- .../selftests/bpf/prog_tests/xdp_metadata.c | 116 +++++++++++++++++---- 1 file changed, 97 insertions(+), 19 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c b/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c index 33cdf88efa6b..e7f06cbdd845 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c @@ -20,7 +20,7 @@ #define UDP_PAYLOAD_BYTES 4 -#define AF_XDP_SOURCE_PORT 1234 +#define UDP_SOURCE_PORT 1234 #define AF_XDP_CONSUMER_PORT 8080 #define UMEM_NUM 16 @@ -33,6 +33,12 @@ #define RX_ADDR "10.0.0.2" #define PREFIX_LEN "8" #define FAMILY AF_INET +#define TX_NETNS_NAME "xdp_metadata_tx" +#define RX_NETNS_NAME "xdp_metadata_rx" +#define TX_MAC "00:00:00:00:00:01" +#define RX_MAC "00:00:00:00:00:02" + +#define XDP_RSS_TYPE_L4 BIT(3) struct xsk { void *umem_area; @@ -181,7 +187,7 @@ static int generate_packet(struct xsk *xsk, __u16 dst_port) ASSERT_EQ(inet_pton(FAMILY, RX_ADDR, &iph->daddr), 1, "inet_pton(RX_ADDR)"); ip_csum(iph); - udph->source = htons(AF_XDP_SOURCE_PORT); + udph->source = htons(UDP_SOURCE_PORT); udph->dest = htons(dst_port); udph->len = htons(sizeof(*udph) + UDP_PAYLOAD_BYTES); udph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, @@ -204,6 +210,30 @@ static int generate_packet(struct xsk *xsk, __u16 dst_port) return 0; } +static int generate_packet_inet(void) +{ + char udp_payload[UDP_PAYLOAD_BYTES]; + struct sockaddr_in rx_addr; + int sock_fd, err = 0; + + /* Build a packet */ + memset(udp_payload, 0xAA, UDP_PAYLOAD_BYTES); + rx_addr.sin_addr.s_addr = inet_addr(RX_ADDR); + rx_addr.sin_family = AF_INET; + rx_addr.sin_port = htons(AF_XDP_CONSUMER_PORT); + + sock_fd = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP); + if (!ASSERT_GE(sock_fd, 0, "socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP)")) + return sock_fd; + + err = sendto(sock_fd, udp_payload, UDP_PAYLOAD_BYTES, MSG_DONTWAIT, + (void *)&rx_addr, sizeof(rx_addr)); + ASSERT_GE(err, 0, "sendto"); + + close(sock_fd); + return err; +} + static void complete_tx(struct xsk *xsk) { struct xsk_tx_metadata *meta; @@ -236,7 +266,7 @@ static void refill_rx(struct xsk *xsk, __u64 addr) } } -static int verify_xsk_metadata(struct xsk *xsk) +static int verify_xsk_metadata(struct xsk *xsk, bool sent_from_af_xdp) { const struct xdp_desc *rx_desc; struct pollfd fds = {}; @@ -290,17 +320,36 @@ static int verify_xsk_metadata(struct xsk *xsk) if (!ASSERT_NEQ(meta->rx_hash, 0, "rx_hash")) return -1; + if (!sent_from_af_xdp) { + if (!ASSERT_NEQ(meta->rx_hash_type & XDP_RSS_TYPE_L4, 0, "rx_hash_type")) + return -1; + goto done; + } + ASSERT_EQ(meta->rx_hash_type, 0, "rx_hash_type"); /* checksum offload */ ASSERT_EQ(udph->check, htons(0x721c), "csum"); +done: xsk_ring_cons__release(&xsk->rx, 1); refill_rx(xsk, comp_addr); return 0; } +static void switch_ns_to_rx(struct nstoken **tok) +{ + close_netns(*tok); + *tok = open_netns(RX_NETNS_NAME); +} + +static void switch_ns_to_tx(struct nstoken **tok) +{ + close_netns(*tok); + *tok = open_netns(TX_NETNS_NAME); +} + void test_xdp_metadata(void) { struct xdp_metadata2 *bpf_obj2 = NULL; @@ -318,27 +367,31 @@ void test_xdp_metadata(void) int sock_fd; int ret; - /* Setup new networking namespace, with a veth pair. */ + /* Setup new networking namespaces, with a veth pair. */ + SYS(out, "ip netns add " TX_NETNS_NAME); + SYS(out, "ip netns add " RX_NETNS_NAME); - SYS(out, "ip netns add xdp_metadata"); - tok = open_netns("xdp_metadata"); + tok = open_netns(TX_NETNS_NAME); SYS(out, "ip link add numtxqueues 1 numrxqueues 1 " TX_NAME " type veth peer " RX_NAME " numtxqueues 1 numrxqueues 1"); - SYS(out, "ip link set dev " TX_NAME " address 00:00:00:00:00:01"); - SYS(out, "ip link set dev " RX_NAME " address 00:00:00:00:00:02"); + SYS(out, "ip link set " RX_NAME " netns " RX_NETNS_NAME); + + SYS(out, "ip link set dev " TX_NAME " address " TX_MAC); SYS(out, "ip link set dev " TX_NAME " up"); - SYS(out, "ip link set dev " RX_NAME " up"); SYS(out, "ip addr add " TX_ADDR "/" PREFIX_LEN " dev " TX_NAME); + + /* Avoid ARP calls */ + SYS(out, "ip -4 neigh add " RX_ADDR " lladdr " RX_MAC " dev " TX_NAME); + + switch_ns_to_rx(&tok); + + SYS(out, "ip link set dev " RX_NAME " address " RX_MAC); + SYS(out, "ip link set dev " RX_NAME " up"); SYS(out, "ip addr add " RX_ADDR "/" PREFIX_LEN " dev " RX_NAME); rx_ifindex = if_nametoindex(RX_NAME); - tx_ifindex = if_nametoindex(TX_NAME); - /* Setup separate AF_XDP for TX and RX interfaces. */ - - ret = open_xsk(tx_ifindex, &tx_xsk); - if (!ASSERT_OK(ret, "open_xsk(TX_NAME)")) - goto out; + /* Setup separate AF_XDP for RX interface. */ ret = open_xsk(rx_ifindex, &rx_xsk); if (!ASSERT_OK(ret, "open_xsk(RX_NAME)")) @@ -379,18 +432,38 @@ void test_xdp_metadata(void) if (!ASSERT_GE(ret, 0, "bpf_map_update_elem")) goto out; - /* Send packet destined to RX AF_XDP socket. */ + switch_ns_to_tx(&tok); + + /* Setup separate AF_XDP for TX interface nad send packet to the RX socket. */ + tx_ifindex = if_nametoindex(TX_NAME); + ret = open_xsk(tx_ifindex, &tx_xsk); + if (!ASSERT_OK(ret, "open_xsk(TX_NAME)")) + goto out; + if (!ASSERT_GE(generate_packet(&tx_xsk, AF_XDP_CONSUMER_PORT), 0, "generate AF_XDP_CONSUMER_PORT")) goto out; - /* Verify AF_XDP RX packet has proper metadata. */ - if (!ASSERT_GE(verify_xsk_metadata(&rx_xsk), 0, + switch_ns_to_rx(&tok); + + /* Verify packet sent from AF_XDP has proper metadata. */ + if (!ASSERT_GE(verify_xsk_metadata(&rx_xsk, true), 0, "verify_xsk_metadata")) goto out; + switch_ns_to_tx(&tok); complete_tx(&tx_xsk); + /* Now check metadata of packet, generated with network stack */ + if (!ASSERT_GE(generate_packet_inet(), 0, "generate UDP packet")) + goto out; + + switch_ns_to_rx(&tok); + + if (!ASSERT_GE(verify_xsk_metadata(&rx_xsk, false), 0, + "verify_xsk_metadata")) + goto out; + /* Make sure freplace correctly picks up original bound device * and doesn't crash. */ @@ -408,11 +481,15 @@ void test_xdp_metadata(void) if (!ASSERT_OK(xdp_metadata2__attach(bpf_obj2), "attach freplace")) goto out; + switch_ns_to_tx(&tok); + /* Send packet to trigger . */ if (!ASSERT_GE(generate_packet(&tx_xsk, AF_XDP_CONSUMER_PORT), 0, "generate freplace packet")) goto out; + switch_ns_to_rx(&tok); + while (!retries--) { if (bpf_obj2->bss->called) break; @@ -427,5 +504,6 @@ out: xdp_metadata__destroy(bpf_obj); if (tok) close_netns(tok); - SYS_NOFAIL("ip netns del xdp_metadata"); + SYS_NOFAIL("ip netns del " RX_NETNS_NAME); + SYS_NOFAIL("ip netns del " TX_NETNS_NAME); } -- cgit v1.2.3 From 4c6612f6100c2d85212865dbd1a5d8a7e391d3cb Mon Sep 17 00:00:00 2001 From: Larysa Zaremba Date: Tue, 5 Dec 2023 22:08:47 +0100 Subject: selftests/bpf: Check VLAN tag and proto in xdp_metadata Verify, whether VLAN tag and proto are set correctly. To simulate "stripped" VLAN tag on veth, send test packet from VLAN interface. Also, add TO_STR() macro for convenience. Acked-by: Stanislav Fomichev Signed-off-by: Larysa Zaremba Link: https://lore.kernel.org/r/20231205210847.28460-19-larysa.zaremba@intel.com Signed-off-by: Alexei Starovoitov --- .../testing/selftests/bpf/prog_tests/xdp_metadata.c | 20 ++++++++++++++++++-- tools/testing/selftests/bpf/progs/xdp_metadata.c | 5 +++++ tools/testing/selftests/bpf/testing_helpers.h | 3 +++ 3 files changed, 26 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c b/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c index e7f06cbdd845..05edcf32f528 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c @@ -38,7 +38,13 @@ #define TX_MAC "00:00:00:00:00:01" #define RX_MAC "00:00:00:00:00:02" +#define VLAN_ID 59 +#define VLAN_PROTO "802.1Q" +#define VLAN_PID htons(ETH_P_8021Q) +#define TX_NAME_VLAN TX_NAME "." TO_STR(VLAN_ID) + #define XDP_RSS_TYPE_L4 BIT(3) +#define VLAN_VID_MASK 0xfff struct xsk { void *umem_area; @@ -323,6 +329,12 @@ static int verify_xsk_metadata(struct xsk *xsk, bool sent_from_af_xdp) if (!sent_from_af_xdp) { if (!ASSERT_NEQ(meta->rx_hash_type & XDP_RSS_TYPE_L4, 0, "rx_hash_type")) return -1; + + if (!ASSERT_EQ(meta->rx_vlan_tci & VLAN_VID_MASK, VLAN_ID, "rx_vlan_tci")) + return -1; + + if (!ASSERT_EQ(meta->rx_vlan_proto, VLAN_PID, "rx_vlan_proto")) + return -1; goto done; } @@ -378,10 +390,14 @@ void test_xdp_metadata(void) SYS(out, "ip link set dev " TX_NAME " address " TX_MAC); SYS(out, "ip link set dev " TX_NAME " up"); - SYS(out, "ip addr add " TX_ADDR "/" PREFIX_LEN " dev " TX_NAME); + + SYS(out, "ip link add link " TX_NAME " " TX_NAME_VLAN + " type vlan proto " VLAN_PROTO " id " TO_STR(VLAN_ID)); + SYS(out, "ip link set dev " TX_NAME_VLAN " up"); + SYS(out, "ip addr add " TX_ADDR "/" PREFIX_LEN " dev " TX_NAME_VLAN); /* Avoid ARP calls */ - SYS(out, "ip -4 neigh add " RX_ADDR " lladdr " RX_MAC " dev " TX_NAME); + SYS(out, "ip -4 neigh add " RX_ADDR " lladdr " RX_MAC " dev " TX_NAME_VLAN); switch_ns_to_rx(&tok); diff --git a/tools/testing/selftests/bpf/progs/xdp_metadata.c b/tools/testing/selftests/bpf/progs/xdp_metadata.c index 5d6c1245c310..31ca229bb3c0 100644 --- a/tools/testing/selftests/bpf/progs/xdp_metadata.c +++ b/tools/testing/selftests/bpf/progs/xdp_metadata.c @@ -23,6 +23,9 @@ extern int bpf_xdp_metadata_rx_timestamp(const struct xdp_md *ctx, __u64 *timestamp) __ksym; extern int bpf_xdp_metadata_rx_hash(const struct xdp_md *ctx, __u32 *hash, enum xdp_rss_hash_type *rss_type) __ksym; +extern int bpf_xdp_metadata_rx_vlan_tag(const struct xdp_md *ctx, + __be16 *vlan_proto, + __u16 *vlan_tci) __ksym; SEC("xdp") int rx(struct xdp_md *ctx) @@ -86,6 +89,8 @@ int rx(struct xdp_md *ctx) meta->rx_timestamp = 1; bpf_xdp_metadata_rx_hash(ctx, &meta->rx_hash, &meta->rx_hash_type); + bpf_xdp_metadata_rx_vlan_tag(ctx, &meta->rx_vlan_proto, + &meta->rx_vlan_tci); return bpf_redirect_map(&xsk, ctx->rx_queue_index, XDP_PASS); } diff --git a/tools/testing/selftests/bpf/testing_helpers.h b/tools/testing/selftests/bpf/testing_helpers.h index 5b7a55136741..35284faff4f2 100644 --- a/tools/testing/selftests/bpf/testing_helpers.h +++ b/tools/testing/selftests/bpf/testing_helpers.h @@ -9,6 +9,9 @@ #include #include +#define __TO_STR(x) #x +#define TO_STR(x) __TO_STR(x) + int parse_num_list(const char *s, bool **set, int *set_len); __u32 link_info_prog_id(const struct bpf_link *link, struct bpf_link_info *info); int bpf_prog_test_load(const char *file, enum bpf_prog_type type, -- cgit v1.2.3 From 50d96f05af6787a34b4eca2ee3fc1993289c4c24 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Fri, 1 Dec 2023 10:01:39 -0800 Subject: bpf: sockmap, test for unconnected af_unix sock Add test to sockmap_basic to ensure af_unix sockets that are not connected can not be added to the map. Ensure we keep DGRAM sockets working however as these will not be connected typically. Signed-off-by: John Fastabend Acked-by: Jakub Sitnicki Link: https://lore.kernel.org/r/20231201180139.328529-3-john.fastabend@gmail.com Signed-off-by: Martin KaFai Lau --- .../selftests/bpf/prog_tests/sockmap_basic.c | 34 ++++++++++++++++++++++ 1 file changed, 34 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c index f75f84d0b3d7..7c2241fae19a 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c +++ b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c @@ -524,6 +524,37 @@ out: test_sockmap_pass_prog__destroy(pass); } +static void test_sockmap_unconnected_unix(void) +{ + int err, map, stream = 0, dgram = 0, zero = 0; + struct test_sockmap_pass_prog *skel; + + skel = test_sockmap_pass_prog__open_and_load(); + if (!ASSERT_OK_PTR(skel, "open_and_load")) + return; + + map = bpf_map__fd(skel->maps.sock_map_rx); + + stream = xsocket(AF_UNIX, SOCK_STREAM, 0); + if (stream < 0) + return; + + dgram = xsocket(AF_UNIX, SOCK_DGRAM, 0); + if (dgram < 0) { + close(stream); + return; + } + + err = bpf_map_update_elem(map, &zero, &stream, BPF_ANY); + ASSERT_ERR(err, "bpf_map_update_elem(stream)"); + + err = bpf_map_update_elem(map, &zero, &dgram, BPF_ANY); + ASSERT_OK(err, "bpf_map_update_elem(dgram)"); + + close(stream); + close(dgram); +} + void test_sockmap_basic(void) { if (test__start_subtest("sockmap create_update_free")) @@ -566,4 +597,7 @@ void test_sockmap_basic(void) test_sockmap_skb_verdict_fionread(false); if (test__start_subtest("sockmap skb_verdict msg_f_peek")) test_sockmap_skb_verdict_peek(); + + if (test__start_subtest("sockmap unconnected af_unix")) + test_sockmap_unconnected_unix(); } -- cgit v1.2.3 From 2e1d6a04116c373fbd25beddba4267178535bc60 Mon Sep 17 00:00:00 2001 From: Tushar Vyavahare Date: Thu, 14 Dec 2023 13:00:07 +0000 Subject: selftests/xsk: Fix for SEND_RECEIVE_UNALIGNED test Fix test broken by shared umem test and framework enhancement commit. Correct the current implementation of pkt_stream_replace_half() by ensuring that nb_valid_entries are not set to half, as this is not true for all the tests. Ensure that the expected value for valid_entries for the SEND_RECEIVE_UNALIGNED test equals the total number of packets sent, which is 4096. Create a new function called pkt_stream_pkt_set() that allows for packet modification to meet specific requirements while ensuring the accurate maintenance of the valid packet count to prevent inconsistencies in packet tracking. Fixes: 6d198a89c004 ("selftests/xsk: Add a test for shared umem feature") Reported-by: Maciej Fijalkowski Signed-off-by: Tushar Vyavahare Signed-off-by: Daniel Borkmann Reviewed-by: Maciej Fijalkowski Acked-by: Magnus Karlsson Link: https://lore.kernel.org/bpf/20231214130007.33281-1-tushar.vyavahare@intel.com --- tools/testing/selftests/bpf/xskxceiver.c | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/xskxceiver.c b/tools/testing/selftests/bpf/xskxceiver.c index b604c570309a..b1102ee13faa 100644 --- a/tools/testing/selftests/bpf/xskxceiver.c +++ b/tools/testing/selftests/bpf/xskxceiver.c @@ -634,16 +634,24 @@ static u32 pkt_nb_frags(u32 frame_size, struct pkt_stream *pkt_stream, struct pk return nb_frags; } +static bool set_pkt_valid(int offset, u32 len) +{ + return len <= MAX_ETH_JUMBO_SIZE; +} + static void pkt_set(struct pkt_stream *pkt_stream, struct pkt *pkt, int offset, u32 len) { pkt->offset = offset; pkt->len = len; - if (len > MAX_ETH_JUMBO_SIZE) { - pkt->valid = false; - } else { - pkt->valid = true; - pkt_stream->nb_valid_entries++; - } + pkt->valid = set_pkt_valid(offset, len); +} + +static void pkt_stream_pkt_set(struct pkt_stream *pkt_stream, struct pkt *pkt, int offset, u32 len) +{ + bool prev_pkt_valid = pkt->valid; + + pkt_set(pkt_stream, pkt, offset, len); + pkt_stream->nb_valid_entries += pkt->valid - prev_pkt_valid; } static u32 pkt_get_buffer_len(struct xsk_umem_info *umem, u32 len) @@ -665,7 +673,7 @@ static struct pkt_stream *__pkt_stream_generate(u32 nb_pkts, u32 pkt_len, u32 nb for (i = 0; i < nb_pkts; i++) { struct pkt *pkt = &pkt_stream->pkts[i]; - pkt_set(pkt_stream, pkt, 0, pkt_len); + pkt_stream_pkt_set(pkt_stream, pkt, 0, pkt_len); pkt->pkt_nb = nb_start + i * nb_off; } @@ -700,10 +708,9 @@ static void __pkt_stream_replace_half(struct ifobject *ifobj, u32 pkt_len, pkt_stream = pkt_stream_clone(ifobj->xsk->pkt_stream); for (i = 1; i < ifobj->xsk->pkt_stream->nb_pkts; i += 2) - pkt_set(pkt_stream, &pkt_stream->pkts[i], offset, pkt_len); + pkt_stream_pkt_set(pkt_stream, &pkt_stream->pkts[i], offset, pkt_len); ifobj->xsk->pkt_stream = pkt_stream; - pkt_stream->nb_valid_entries /= 2; } static void pkt_stream_replace_half(struct test_spec *test, u32 pkt_len, int offset) -- cgit v1.2.3 From 5bd3cf8cbc8a286308ef3f40656659d5abc89995 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Wed, 13 Dec 2023 17:11:03 +0100 Subject: add selftest for statmount/listmount Initial selftest for the new statmount() and listmount() syscalls. Signed-off-by: Miklos Szeredi Link: https://lore.kernel.org/r/20231213161104.403171-1-mszeredi@redhat.com Signed-off-by: Christian Brauner --- tools/testing/selftests/Makefile | 1 + .../selftests/filesystems/statmount/.gitignore | 2 + .../selftests/filesystems/statmount/Makefile | 6 + .../filesystems/statmount/statmount_test.c | 612 +++++++++++++++++++++ 4 files changed, 621 insertions(+) create mode 100644 tools/testing/selftests/filesystems/statmount/.gitignore create mode 100644 tools/testing/selftests/filesystems/statmount/Makefile create mode 100644 tools/testing/selftests/filesystems/statmount/statmount_test.c (limited to 'tools') diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile index 3b2061d1c1a5..da2e1b0e4dd8 100644 --- a/tools/testing/selftests/Makefile +++ b/tools/testing/selftests/Makefile @@ -26,6 +26,7 @@ TARGETS += filesystems TARGETS += filesystems/binderfs TARGETS += filesystems/epoll TARGETS += filesystems/fat +TARGETS += filesystems/statmount TARGETS += firmware TARGETS += fpu TARGETS += ftrace diff --git a/tools/testing/selftests/filesystems/statmount/.gitignore b/tools/testing/selftests/filesystems/statmount/.gitignore new file mode 100644 index 000000000000..82a4846cbc4b --- /dev/null +++ b/tools/testing/selftests/filesystems/statmount/.gitignore @@ -0,0 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only +/*_test diff --git a/tools/testing/selftests/filesystems/statmount/Makefile b/tools/testing/selftests/filesystems/statmount/Makefile new file mode 100644 index 000000000000..07a0d5b545ca --- /dev/null +++ b/tools/testing/selftests/filesystems/statmount/Makefile @@ -0,0 +1,6 @@ +# SPDX-License-Identifier: GPL-2.0-or-later + +CFLAGS += -Wall -O2 -g $(KHDR_INCLUDES) +TEST_GEN_PROGS := statmount_test + +include ../../lib.mk diff --git a/tools/testing/selftests/filesystems/statmount/statmount_test.c b/tools/testing/selftests/filesystems/statmount/statmount_test.c new file mode 100644 index 000000000000..3eafd7da58e2 --- /dev/null +++ b/tools/testing/selftests/filesystems/statmount/statmount_test.c @@ -0,0 +1,612 @@ +// SPDX-License-Identifier: GPL-2.0-or-later + +#define _GNU_SOURCE + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "../../kselftest.h" + +static const char *const known_fs[] = { + "9p", "adfs", "affs", "afs", "aio", "anon_inodefs", "apparmorfs", + "autofs", "bcachefs", "bdev", "befs", "bfs", "binder", "binfmt_misc", + "bpf", "btrfs", "btrfs_test_fs", "ceph", "cgroup", "cgroup2", "cifs", + "coda", "configfs", "cpuset", "cramfs", "cxl", "dax", "debugfs", + "devpts", "devtmpfs", "dmabuf", "drm", "ecryptfs", "efivarfs", "efs", + "erofs", "exfat", "ext2", "ext3", "ext4", "f2fs", "functionfs", + "fuse", "fuseblk", "fusectl", "gadgetfs", "gfs2", "gfs2meta", "hfs", + "hfsplus", "hostfs", "hpfs", "hugetlbfs", "ibmasmfs", "iomem", + "ipathfs", "iso9660", "jffs2", "jfs", "minix", "mqueue", "msdos", + "nfs", "nfs4", "nfsd", "nilfs2", "nsfs", "ntfs", "ntfs3", "ocfs2", + "ocfs2_dlmfs", "ocxlflash", "omfs", "openpromfs", "overlay", "pipefs", + "proc", "pstore", "pvfs2", "qnx4", "qnx6", "ramfs", "reiserfs", + "resctrl", "romfs", "rootfs", "rpc_pipefs", "s390_hypfs", "secretmem", + "securityfs", "selinuxfs", "smackfs", "smb3", "sockfs", "spufs", + "squashfs", "sysfs", "sysv", "tmpfs", "tracefs", "ubifs", "udf", + "ufs", "v7", "vboxsf", "vfat", "virtiofs", "vxfs", "xenfs", "xfs", + "zonefs", NULL }; + +static int statmount(uint64_t mnt_id, uint64_t mask, struct statmount *buf, + size_t bufsize, unsigned int flags) +{ + struct mnt_id_req req = { + .size = MNT_ID_REQ_SIZE_VER0, + .mnt_id = mnt_id, + .param = mask, + }; + + return syscall(__NR_statmount, &req, buf, bufsize, flags); +} + +static struct statmount *statmount_alloc(uint64_t mnt_id, uint64_t mask, unsigned int flags) +{ + size_t bufsize = 1 << 15; + struct statmount *buf = NULL, *tmp = alloca(bufsize); + int tofree = 0; + int ret; + + for (;;) { + ret = statmount(mnt_id, mask, tmp, bufsize, flags); + if (ret != -1) + break; + if (tofree) + free(tmp); + if (errno != EOVERFLOW) + return NULL; + bufsize <<= 1; + tofree = 1; + tmp = malloc(bufsize); + if (!tmp) + return NULL; + } + buf = malloc(tmp->size); + if (buf) + memcpy(buf, tmp, tmp->size); + if (tofree) + free(tmp); + + return buf; +} + +static void write_file(const char *path, const char *val) +{ + int fd = open(path, O_WRONLY); + size_t len = strlen(val); + int ret; + + if (fd == -1) + ksft_exit_fail_msg("opening %s for write: %s\n", path, strerror(errno)); + + ret = write(fd, val, len); + if (ret == -1) + ksft_exit_fail_msg("writing to %s: %s\n", path, strerror(errno)); + if (ret != len) + ksft_exit_fail_msg("short write to %s\n", path); + + ret = close(fd); + if (ret == -1) + ksft_exit_fail_msg("closing %s\n", path); +} + +static uint64_t get_mnt_id(const char *name, const char *path, uint64_t mask) +{ + struct statx sx; + int ret; + + ret = statx(AT_FDCWD, path, 0, mask, &sx); + if (ret == -1) + ksft_exit_fail_msg("retrieving %s mount ID for %s: %s\n", + mask & STATX_MNT_ID_UNIQUE ? "unique" : "old", + name, strerror(errno)); + if (!(sx.stx_mask & mask)) + ksft_exit_fail_msg("no %s mount ID available for %s\n", + mask & STATX_MNT_ID_UNIQUE ? "unique" : "old", + name); + + return sx.stx_mnt_id; +} + + +static char root_mntpoint[] = "/tmp/statmount_test_root.XXXXXX"; +static int orig_root; +static uint64_t root_id, parent_id; +static uint32_t old_root_id, old_parent_id; + + +static void cleanup_namespace(void) +{ + fchdir(orig_root); + chroot("."); + umount2(root_mntpoint, MNT_DETACH); + rmdir(root_mntpoint); +} + +static void setup_namespace(void) +{ + int ret; + char buf[32]; + uid_t uid = getuid(); + gid_t gid = getgid(); + + ret = unshare(CLONE_NEWNS|CLONE_NEWUSER); + if (ret == -1) + ksft_exit_fail_msg("unsharing mountns and userns: %s\n", + strerror(errno)); + + sprintf(buf, "0 %d 1", uid); + write_file("/proc/self/uid_map", buf); + write_file("/proc/self/setgroups", "deny"); + sprintf(buf, "0 %d 1", gid); + write_file("/proc/self/gid_map", buf); + + ret = mount("", "/", NULL, MS_REC|MS_PRIVATE, NULL); + if (ret == -1) + ksft_exit_fail_msg("making mount tree private: %s\n", + strerror(errno)); + + if (!mkdtemp(root_mntpoint)) + ksft_exit_fail_msg("creating temporary directory %s: %s\n", + root_mntpoint, strerror(errno)); + + old_parent_id = get_mnt_id("parent", root_mntpoint, STATX_MNT_ID); + parent_id = get_mnt_id("parent", root_mntpoint, STATX_MNT_ID_UNIQUE); + + orig_root = open("/", O_PATH); + if (orig_root == -1) + ksft_exit_fail_msg("opening root directory: %s", + strerror(errno)); + + atexit(cleanup_namespace); + + ret = mount(root_mntpoint, root_mntpoint, NULL, MS_BIND, NULL); + if (ret == -1) + ksft_exit_fail_msg("mounting temp root %s: %s\n", + root_mntpoint, strerror(errno)); + + ret = chroot(root_mntpoint); + if (ret == -1) + ksft_exit_fail_msg("chroot to temp root %s: %s\n", + root_mntpoint, strerror(errno)); + + ret = chdir("/"); + if (ret == -1) + ksft_exit_fail_msg("chdir to root: %s\n", strerror(errno)); + + old_root_id = get_mnt_id("root", "/", STATX_MNT_ID); + root_id = get_mnt_id("root", "/", STATX_MNT_ID_UNIQUE); +} + +static int setup_mount_tree(int log2_num) +{ + int ret, i; + + ret = mount("", "/", NULL, MS_REC|MS_SHARED, NULL); + if (ret == -1) { + ksft_test_result_fail("making mount tree shared: %s\n", + strerror(errno)); + return -1; + } + + for (i = 0; i < log2_num; i++) { + ret = mount("/", "/", NULL, MS_BIND, NULL); + if (ret == -1) { + ksft_test_result_fail("mounting submount %s: %s\n", + root_mntpoint, strerror(errno)); + return -1; + } + } + return 0; +} + +static ssize_t listmount(uint64_t mnt_id, uint64_t last_mnt_id, + uint64_t list[], size_t num, unsigned int flags) +{ + struct mnt_id_req req = { + .size = MNT_ID_REQ_SIZE_VER0, + .mnt_id = mnt_id, + .param = last_mnt_id, + }; + + return syscall(__NR_listmount, &req, list, num, flags); +} + +static void test_listmount_empty_root(void) +{ + ssize_t res; + const unsigned int size = 32; + uint64_t list[size]; + + res = listmount(LSMT_ROOT, 0, list, size, 0); + if (res == -1) { + ksft_test_result_fail("listmount: %s\n", strerror(errno)); + return; + } + if (res != 1) { + ksft_test_result_fail("listmount result is %zi != 1\n", res); + return; + } + + if (list[0] != root_id) { + ksft_test_result_fail("listmount ID doesn't match 0x%llx != 0x%llx\n", + (unsigned long long) list[0], + (unsigned long long) root_id); + return; + } + + ksft_test_result_pass("listmount empty root\n"); +} + +static void test_statmount_zero_mask(void) +{ + struct statmount sm; + int ret; + + ret = statmount(root_id, 0, &sm, sizeof(sm), 0); + if (ret == -1) { + ksft_test_result_fail("statmount zero mask: %s\n", + strerror(errno)); + return; + } + if (sm.size != sizeof(sm)) { + ksft_test_result_fail("unexpected size: %u != %u\n", + sm.size, (uint32_t) sizeof(sm)); + return; + } + if (sm.mask != 0) { + ksft_test_result_fail("unexpected mask: 0x%llx != 0x0\n", + (unsigned long long) sm.mask); + return; + } + + ksft_test_result_pass("statmount zero mask\n"); +} + +static void test_statmount_mnt_basic(void) +{ + struct statmount sm; + int ret; + uint64_t mask = STATMOUNT_MNT_BASIC; + + ret = statmount(root_id, mask, &sm, sizeof(sm), 0); + if (ret == -1) { + ksft_test_result_fail("statmount mnt basic: %s\n", + strerror(errno)); + return; + } + if (sm.size != sizeof(sm)) { + ksft_test_result_fail("unexpected size: %u != %u\n", + sm.size, (uint32_t) sizeof(sm)); + return; + } + if (sm.mask != mask) { + ksft_test_result_skip("statmount mnt basic unavailable\n"); + return; + } + + if (sm.mnt_id != root_id) { + ksft_test_result_fail("unexpected root ID: 0x%llx != 0x%llx\n", + (unsigned long long) sm.mnt_id, + (unsigned long long) root_id); + return; + } + + if (sm.mnt_id_old != old_root_id) { + ksft_test_result_fail("unexpected old root ID: %u != %u\n", + sm.mnt_id_old, old_root_id); + return; + } + + if (sm.mnt_parent_id != parent_id) { + ksft_test_result_fail("unexpected parent ID: 0x%llx != 0x%llx\n", + (unsigned long long) sm.mnt_parent_id, + (unsigned long long) parent_id); + return; + } + + if (sm.mnt_parent_id_old != old_parent_id) { + ksft_test_result_fail("unexpected old parent ID: %u != %u\n", + sm.mnt_parent_id_old, old_parent_id); + return; + } + + if (sm.mnt_propagation != MS_PRIVATE) { + ksft_test_result_fail("unexpected propagation: 0x%llx\n", + (unsigned long long) sm.mnt_propagation); + return; + } + + ksft_test_result_pass("statmount mnt basic\n"); +} + + +static void test_statmount_sb_basic(void) +{ + struct statmount sm; + int ret; + uint64_t mask = STATMOUNT_SB_BASIC; + struct statx sx; + struct statfs sf; + + ret = statmount(root_id, mask, &sm, sizeof(sm), 0); + if (ret == -1) { + ksft_test_result_fail("statmount sb basic: %s\n", + strerror(errno)); + return; + } + if (sm.size != sizeof(sm)) { + ksft_test_result_fail("unexpected size: %u != %u\n", + sm.size, (uint32_t) sizeof(sm)); + return; + } + if (sm.mask != mask) { + ksft_test_result_skip("statmount sb basic unavailable\n"); + return; + } + + ret = statx(AT_FDCWD, "/", 0, 0, &sx); + if (ret == -1) { + ksft_test_result_fail("stat root failed: %s\n", + strerror(errno)); + return; + } + + if (sm.sb_dev_major != sx.stx_dev_major || + sm.sb_dev_minor != sx.stx_dev_minor) { + ksft_test_result_fail("unexpected sb dev %u:%u != %u:%u\n", + sm.sb_dev_major, sm.sb_dev_minor, + sx.stx_dev_major, sx.stx_dev_minor); + return; + } + + ret = statfs("/", &sf); + if (ret == -1) { + ksft_test_result_fail("statfs root failed: %s\n", + strerror(errno)); + return; + } + + if (sm.sb_magic != sf.f_type) { + ksft_test_result_fail("unexpected sb magic: 0x%llx != 0x%lx\n", + (unsigned long long) sm.sb_magic, + sf.f_type); + return; + } + + ksft_test_result_pass("statmount sb basic\n"); +} + +static void test_statmount_mnt_point(void) +{ + struct statmount *sm; + + sm = statmount_alloc(root_id, STATMOUNT_MNT_POINT, 0); + if (!sm) { + ksft_test_result_fail("statmount mount point: %s\n", + strerror(errno)); + return; + } + + if (strcmp(sm->str + sm->mnt_point, "/") != 0) { + ksft_test_result_fail("unexpected mount point: '%s' != '/'\n", + sm->str + sm->mnt_point); + goto out; + } + ksft_test_result_pass("statmount mount point\n"); +out: + free(sm); +} + +static void test_statmount_mnt_root(void) +{ + struct statmount *sm; + const char *mnt_root, *last_dir, *last_root; + + last_dir = strrchr(root_mntpoint, '/'); + assert(last_dir); + last_dir++; + + sm = statmount_alloc(root_id, STATMOUNT_MNT_ROOT, 0); + if (!sm) { + ksft_test_result_fail("statmount mount root: %s\n", + strerror(errno)); + return; + } + mnt_root = sm->str + sm->mnt_root; + last_root = strrchr(mnt_root, '/'); + if (last_root) + last_root++; + else + last_root = mnt_root; + + if (strcmp(last_dir, last_root) != 0) { + ksft_test_result_fail("unexpected mount root last component: '%s' != '%s'\n", + last_root, last_dir); + goto out; + } + ksft_test_result_pass("statmount mount root\n"); +out: + free(sm); +} + +static void test_statmount_fs_type(void) +{ + struct statmount *sm; + const char *fs_type; + const char *const *s; + + sm = statmount_alloc(root_id, STATMOUNT_FS_TYPE, 0); + if (!sm) { + ksft_test_result_fail("statmount fs type: %s\n", + strerror(errno)); + return; + } + fs_type = sm->str + sm->fs_type; + for (s = known_fs; s != NULL; s++) { + if (strcmp(fs_type, *s) == 0) + break; + } + if (!s) + ksft_print_msg("unknown filesystem type: %s\n", fs_type); + + ksft_test_result_pass("statmount fs type\n"); + free(sm); +} + +static void test_statmount_string(uint64_t mask, size_t off, const char *name) +{ + struct statmount *sm; + size_t len, shortsize, exactsize; + uint32_t start, i; + int ret; + + sm = statmount_alloc(root_id, mask, 0); + if (!sm) { + ksft_test_result_fail("statmount %s: %s\n", name, + strerror(errno)); + goto out; + } + if (sm->size < sizeof(*sm)) { + ksft_test_result_fail("unexpected size: %u < %u\n", + sm->size, (uint32_t) sizeof(*sm)); + goto out; + } + if (sm->mask != mask) { + ksft_test_result_skip("statmount %s unavailable\n", name); + goto out; + } + len = sm->size - sizeof(*sm); + start = ((uint32_t *) sm)[off]; + + for (i = start;; i++) { + if (i >= len) { + ksft_test_result_fail("string out of bounds\n"); + goto out; + } + if (!sm->str[i]) + break; + } + exactsize = sm->size; + shortsize = sizeof(*sm) + i; + + ret = statmount(root_id, mask, sm, exactsize, 0); + if (ret == -1) { + ksft_test_result_fail("statmount exact size: %s\n", + strerror(errno)); + goto out; + } + errno = 0; + ret = statmount(root_id, mask, sm, shortsize, 0); + if (ret != -1 || errno != EOVERFLOW) { + ksft_test_result_fail("should have failed with EOVERFLOW: %s\n", + strerror(errno)); + goto out; + } + + ksft_test_result_pass("statmount string %s\n", name); +out: + free(sm); +} + +static void test_listmount_tree(void) +{ + ssize_t res; + const unsigned int log2_num = 4; + const unsigned int step = 3; + const unsigned int size = (1 << log2_num) + step + 1; + size_t num, expect = 1 << log2_num; + uint64_t list[size]; + uint64_t list2[size]; + size_t i; + + + res = setup_mount_tree(log2_num); + if (res == -1) + return; + + num = res = listmount(LSMT_ROOT, 0, list, size, 0); + if (res == -1) { + ksft_test_result_fail("listmount: %s\n", strerror(errno)); + return; + } + if (num != expect) { + ksft_test_result_fail("listmount result is %zi != %zi\n", + res, expect); + return; + } + + for (i = 0; i < size - step;) { + res = listmount(LSMT_ROOT, i ? list2[i - 1] : 0, list2 + i, step, 0); + if (res == -1) + ksft_test_result_fail("short listmount: %s\n", + strerror(errno)); + i += res; + if (res < step) + break; + } + if (i != num) { + ksft_test_result_fail("different number of entries: %zu != %zu\n", + i, num); + return; + } + for (i = 0; i < num; i++) { + if (list2[i] != list[i]) { + ksft_test_result_fail("different value for entry %zu: 0x%llx != 0x%llx\n", + i, + (unsigned long long) list2[i], + (unsigned long long) list[i]); + } + } + + ksft_test_result_pass("listmount tree\n"); +} + +#define str_off(memb) (offsetof(struct statmount, memb) / sizeof(uint32_t)) + +int main(void) +{ + int ret; + uint64_t all_mask = STATMOUNT_SB_BASIC | STATMOUNT_MNT_BASIC | + STATMOUNT_PROPAGATE_FROM | STATMOUNT_MNT_ROOT | + STATMOUNT_MNT_POINT | STATMOUNT_FS_TYPE; + + ksft_print_header(); + + ret = statmount(0, 0, NULL, 0, 0); + assert(ret == -1); + if (errno == ENOSYS) + ksft_exit_skip("statmount() syscall not supported\n"); + + setup_namespace(); + + ksft_set_plan(14); + test_listmount_empty_root(); + test_statmount_zero_mask(); + test_statmount_mnt_basic(); + test_statmount_sb_basic(); + test_statmount_mnt_root(); + test_statmount_mnt_point(); + test_statmount_fs_type(); + test_statmount_string(STATMOUNT_MNT_ROOT, str_off(mnt_root), "mount root"); + test_statmount_string(STATMOUNT_MNT_POINT, str_off(mnt_point), "mount point"); + test_statmount_string(STATMOUNT_FS_TYPE, str_off(fs_type), "fs type"); + test_statmount_string(all_mask, str_off(mnt_root), "mount root & all"); + test_statmount_string(all_mask, str_off(mnt_point), "mount point & all"); + test_statmount_string(all_mask, str_off(fs_type), "fs type & all"); + + test_listmount_tree(); + + + if (ksft_get_fail_cnt() + ksft_get_error_cnt() > 0) + ksft_exit_fail(); + else + ksft_exit_pass(); +} -- cgit v1.2.3 From 6f33e6fa29d0366d6e5b3ea2930dbc0b648151fe Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Wed, 13 Dec 2023 22:02:56 -0800 Subject: perf stat: Combine the -A/--no-aggr and --no-merge options The -A or --no-aggr option disables aggregation of core events: $ perf stat -A -e cycles,data_total -a true Performance counter stats for 'system wide': CPU0 1,287,665 cycles CPU1 1,831,681 cycles CPU2 27,345,998 cycles CPU3 1,964,799 cycles CPU4 236,174 cycles CPU5 3,302,825 cycles CPU6 9,201,446 cycles CPU7 1,403,043 cycles CPU0 110.90 MiB data_total 0.008961761 seconds time elapsed The --no-merge option disables the aggregation of uncore events: $ perf stat --no-merge -e cycles,data_total -a true Performance counter stats for 'system wide': 38,482,778 cycles 15.04 MiB data_total [uncore_imc_free_running_1] 15.00 MiB data_total [uncore_imc_free_running_0] 0.005915155 seconds time elapsed Having two options confuses users who generally don't appreciate the difference in PMUs. Keep all the options but make it so they all disable aggregation both of core and uncore events: $ perf stat -A -e cycles,data_total -a true Performance counter stats for 'system wide': CPU0 85,878 cycles CPU1 88,179 cycles CPU2 60,872 cycles CPU3 3,265,567 cycles CPU4 82,357 cycles CPU5 83,383 cycles CPU6 84,156 cycles CPU7 220,803 cycles CPU0 2.38 MiB data_total [uncore_imc_free_running_0] CPU0 2.38 MiB data_total [uncore_imc_free_running_1] 0.001397205 seconds time elapsed Update the relevant 'perf stat' man page information. Reviewed-by: Kan Liang Signed-off-by: Ian Rogers Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Athira Jajeev Cc: Changbin Du Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: John Garry Cc: K Prateek Nayak Cc: Kaige Ye Cc: Mark Rutland Cc: Namhyung Kim Cc: Nick Desaulniers Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20231214060256.2094017-1-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-stat.txt | 52 ++++++++++++++++++---------------- tools/perf/builtin-stat.c | 5 ++-- tools/perf/util/stat-display.c | 2 +- tools/perf/util/stat.c | 2 +- tools/perf/util/stat.h | 1 - 5 files changed, 33 insertions(+), 29 deletions(-) (limited to 'tools') diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt index 8f789fa1242e..5af2e432b54f 100644 --- a/tools/perf/Documentation/perf-stat.txt +++ b/tools/perf/Documentation/perf-stat.txt @@ -422,7 +422,34 @@ See perf list output for the possible metrics and metricgroups. -A:: --no-aggr:: -Do not aggregate counts across all monitored CPUs. +--no-merge:: +Do not aggregate/merge counts across monitored CPUs or PMUs. + +When multiple events are created from a single event specification, +stat will, by default, aggregate the event counts and show the result +in a single row. This option disables that behavior and shows the +individual events and counts. + +Multiple events are created from a single event specification when: + +1. PID monitoring isn't requested and the system has more than one + CPU. For example, a system with 8 SMT threads will have one event + opened on each thread and aggregation is performed across them. + +2. Prefix or glob wildcard matching is used for the PMU name. For + example, multiple memory controller PMUs may exist typically with a + suffix of _0, _1, etc. By default the event counts will all be + combined if the PMU is specified without the suffix such as + uncore_imc rather than uncore_imc_0. + +3. Aliases, which are listed immediately after the Kernel PMU events + by perf list, are used. + +--hybrid-merge:: +Merge core event counts from all core PMUs. In hybrid or big.LITTLE +systems by default each core PMU will report its count +separately. This option forces core PMU counts to be combined to give +a behavior closer to having a single CPU type in the system. --topdown:: Print top-down metrics supported by the CPU. This allows to determine @@ -475,29 +502,6 @@ highlight 'tma_frontend_bound'. This metric may be drilled into with Error out if the input is higher than the supported max level. ---no-merge:: -Do not merge results from same PMUs. - -When multiple events are created from a single event specification, -stat will, by default, aggregate the event counts and show the result -in a single row. This option disables that behavior and shows -the individual events and counts. - -Multiple events are created from a single event specification when: -1. Prefix or glob matching is used for the PMU name. -2. Aliases, which are listed immediately after the Kernel PMU events - by perf list, are used. - ---hybrid-merge:: -Merge the hybrid event counts from all PMUs. - -For hybrid events, by default, the stat aggregates and reports the event -counts per PMU. But sometimes, it's also useful to aggregate event counts -from all PMUs. This option enables that behavior and reports the counts -without PMUs. - -For non-hybrid events, it should be no effect. - --smi-cost:: Measure SMI cost if msr/aperf/ and msr/smi/ events are supported. diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index bda020c0b9d5..5fe9abc6a524 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -1204,8 +1204,9 @@ static struct option stat_options[] = { OPT_STRING('C', "cpu", &target.cpu_list, "cpu", "list of cpus to monitor in system-wide"), OPT_SET_UINT('A', "no-aggr", &stat_config.aggr_mode, - "disable CPU count aggregation", AGGR_NONE), - OPT_BOOLEAN(0, "no-merge", &stat_config.no_merge, "Do not merge identical named events"), + "disable aggregation across CPUs or PMUs", AGGR_NONE), + OPT_SET_UINT(0, "no-merge", &stat_config.aggr_mode, + "disable aggregation the same as -A or -no-aggr", AGGR_NONE), OPT_BOOLEAN(0, "hybrid-merge", &stat_config.hybrid_merge, "Merge identical named hybrid events"), OPT_STRING('x', "field-separator", &stat_config.csv_sep, "separator", diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index afe6db8e7bf4..8c61f8627ebc 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -898,7 +898,7 @@ static bool hybrid_uniquify(struct evsel *evsel, struct perf_stat_config *config static void uniquify_counter(struct perf_stat_config *config, struct evsel *counter) { - if (config->no_merge || hybrid_uniquify(counter, config)) + if (config->aggr_mode == AGGR_NONE || hybrid_uniquify(counter, config)) uniquify_event_name(counter); } diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c index 012c4946b9c4..b0bcf92f0f9c 100644 --- a/tools/perf/util/stat.c +++ b/tools/perf/util/stat.c @@ -592,7 +592,7 @@ void perf_stat_merge_counters(struct perf_stat_config *config, struct evlist *ev { struct evsel *evsel; - if (config->no_merge) + if (config->aggr_mode == AGGR_NONE) return; evlist__for_each_entry(evlist, evsel) diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h index 325d0fad1842..4357ba114822 100644 --- a/tools/perf/util/stat.h +++ b/tools/perf/util/stat.h @@ -76,7 +76,6 @@ struct perf_stat_config { bool null_run; bool ru_display; bool big_num; - bool no_merge; bool hybrid_merge; bool walltime_run_table; bool all_kernel; -- cgit v1.2.3 From 1af478903fc48c1409a8dd6b698383b62387adf1 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 11 Dec 2023 23:05:44 -0800 Subject: perf genelf: Set ELF program header addresses properly The text section starts after the ELF headers so PHDR.p_vaddr and others should have the correct addresses. Fixes: babd04386b1df8c3 ("perf jit: Include program header in ELF files") Reviewed-by: Ian Rogers Signed-off-by: Namhyung Kim Cc: Adrian Hunter Cc: Fangrui Song Cc: Ingo Molnar Cc: Jiri Olsa Cc: Lieven Hey Cc: Milian Wolff Cc: Pablo Galindo Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20231212070547.612536-2-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/genelf.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/genelf.c b/tools/perf/util/genelf.c index fefc72066c4e..ac17a3cb59dc 100644 --- a/tools/perf/util/genelf.c +++ b/tools/perf/util/genelf.c @@ -293,9 +293,9 @@ jit_write_elf(int fd, uint64_t load_addr, const char *sym, */ phdr = elf_newphdr(e, 1); phdr[0].p_type = PT_LOAD; - phdr[0].p_offset = 0; - phdr[0].p_vaddr = 0; - phdr[0].p_paddr = 0; + phdr[0].p_offset = GEN_ELF_TEXT_OFFSET; + phdr[0].p_vaddr = GEN_ELF_TEXT_OFFSET; + phdr[0].p_paddr = GEN_ELF_TEXT_OFFSET; phdr[0].p_filesz = csize; phdr[0].p_memsz = csize; phdr[0].p_flags = PF_X | PF_R; -- cgit v1.2.3 From c966d23a351a33f8a977fd7efbb6f467132f7383 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 11 Dec 2023 23:05:45 -0800 Subject: perf unwind-libdw: Handle JIT-generated DSOs properly Usually DSOs are mapped from the beginning of the file, so the base address of the DSO can be calculated by map->start - map->pgoff. However, JIT DSOs which are generated by `perf inject -j`, are mapped only the code segment. This makes unwind-libdw code confusing and rejects processing unwinds in the JIT DSOs. It should use the map start address as base for them to fix the confusion. Fixes: 1fe627da30331024 ("perf unwind: Take pgoff into account when reporting elf to libdwfl") Signed-off-by: Namhyung Kim Cc: Adrian Hunter Cc: Fangrui Song Cc: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Milian Wolff Cc: Pablo Galindo Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20231212070547.612536-3-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/unwind-libdw.c | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/unwind-libdw.c b/tools/perf/util/unwind-libdw.c index 8554db3fc0d7..6013335a8dae 100644 --- a/tools/perf/util/unwind-libdw.c +++ b/tools/perf/util/unwind-libdw.c @@ -46,6 +46,7 @@ static int __report_module(struct addr_location *al, u64 ip, { Dwfl_Module *mod; struct dso *dso = NULL; + Dwarf_Addr base; /* * Some callers will use al->sym, so we can't just use the * cheaper thread__find_map() here. @@ -58,13 +59,25 @@ static int __report_module(struct addr_location *al, u64 ip, if (!dso) return 0; + /* + * The generated JIT DSO files only map the code segment without + * ELF headers. Since JIT codes used to be packed in a memory + * segment, calculating the base address using pgoff falls into + * a different code in another DSO. So just use the map->start + * directly to pick the correct one. + */ + if (!strncmp(dso->long_name, "/tmp/jitted-", 12)) + base = map__start(al->map); + else + base = map__start(al->map) - map__pgoff(al->map); + mod = dwfl_addrmodule(ui->dwfl, ip); if (mod) { Dwarf_Addr s; dwfl_module_info(mod, NULL, &s, NULL, NULL, NULL, NULL, NULL); - if (s != map__start(al->map) - map__pgoff(al->map)) - mod = 0; + if (s != base) + mod = NULL; } if (!mod) { @@ -72,14 +85,14 @@ static int __report_module(struct addr_location *al, u64 ip, __symbol__join_symfs(filename, sizeof(filename), dso->long_name); mod = dwfl_report_elf(ui->dwfl, dso->short_name, filename, -1, - map__start(al->map) - map__pgoff(al->map), false); + base, false); } if (!mod) { char filename[PATH_MAX]; if (dso__build_id_filename(dso, filename, sizeof(filename), false)) mod = dwfl_report_elf(ui->dwfl, dso->short_name, filename, -1, - map__start(al->map) - map__pgoff(al->map), false); + base, false); } if (mod) { -- cgit v1.2.3 From 4fb54994b2360ab5029ee3a959161f6fe6bbb349 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 11 Dec 2023 23:05:46 -0800 Subject: perf unwind-libunwind: Fix base address for .eh_frame The base address of a DSO mapping should start at the start of the file. Usually DSOs are mapped from the pgoff 0 so it doesn't matter when it uses the start of the map address. But generated DSOs for JIT codes doesn't start from the 0 so it should subtract the offset to calculate the .eh_frame table offsets correctly. Fixes: dc2cf4ca866f5715 ("perf unwind: Fix segbase for ld.lld linked objects") Reviewed-by: Ian Rogers Signed-off-by: Namhyung Kim Cc: Adrian Hunter Cc: Fangrui Song Cc: Ingo Molnar Cc: Jiri Olsa Cc: Milian Wolff Cc: Pablo Galindo Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20231212070547.612536-4-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/unwind-libunwind-local.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/util/unwind-libunwind-local.c b/tools/perf/util/unwind-libunwind-local.c index c0641882fd2f..5e5c3395a499 100644 --- a/tools/perf/util/unwind-libunwind-local.c +++ b/tools/perf/util/unwind-libunwind-local.c @@ -327,7 +327,7 @@ static int read_unwind_spec_eh_frame(struct dso *dso, struct unwind_info *ui, maps__for_each_entry(thread__maps(ui->thread), map_node) { struct map *map = map_node->map; - u64 start = map__start(map); + u64 start = map__start(map) - map__pgoff(map); if (map__dso(map) == dso && start < base_addr) base_addr = start; -- cgit v1.2.3 From 5fa695e7da4975e8d21ce49f3718d6cf00ecb75e Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Thu, 14 Dec 2023 06:46:11 -0800 Subject: perf top: Use evsel's cpus to replace user_requested_cpus perf top errors out on a hybrid machine $perf top Error: The cycles:P event is not supported. The perf top expects that the "cycles" is collected on all CPUs in the system. But for hybrid there is no single "cycles" event which can cover all CPUs. Perf has to split it into two cycles events, e.g., cpu_core/cycles/ and cpu_atom/cycles/. Each event has its own CPU mask. If a event is opened on the unsupported CPU. The open fails. That's the reason of the above error out. Perf should only open the cycles event on the corresponding CPU. The commit ef91871c960e ("perf evlist: Propagate user CPU maps intersecting core PMU maps") intersect the requested CPU map with the CPU map of the PMU. Use the evsel's cpus to replace user_requested_cpus. The evlist's threads are also propagated to the evsel's threads in __perf_evlist__propagate_maps(). For a system-wide event, perf appends a dummy event and assign it to the evsel's threads. For a per-thread event, the evlist's thread_map is assigned to the evsel's threads. The same as the other tools, e.g., perf record, using the evsel's threads when opening an event. Reported-by: Arnaldo Carvalho de Melo Reviewed-by: Ian Rogers Signed-off-by: Kan Liang Tested-by: Arnaldo Carvalho de Melo Cc: Hector Martin Cc: Marc Zyngier Cc: Mark Rutland Cc: Namhyung Kim Closes: https://lore.kernel.org/linux-perf-users/ZXNnDrGKXbEELMXV@kernel.org/ Link: https://lore.kernel.org/r/20231214144612.1092028-1-kan.liang@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-top.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index ed83afeeced0..13e609c0c693 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -1026,8 +1026,8 @@ static int perf_top__start_counters(struct perf_top *top) evlist__for_each_entry(evlist, counter) { try_again: - if (evsel__open(counter, top->evlist->core.user_requested_cpus, - top->evlist->core.threads) < 0) { + if (evsel__open(counter, counter->core.cpus, + counter->core.threads) < 0) { /* * Specially handle overwrite fall back. -- cgit v1.2.3 From a61f89bf76ef6f87ec48dd90dbc73a6cf9952edc Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Thu, 14 Dec 2023 06:46:12 -0800 Subject: perf top: Uniform the event name for the hybrid machine It's hard to distinguish the default cycles events among hybrid PMUs. For example, $ perf top Available samples 385 cycles:P 903 cycles:P The other tool, e.g., perf record, uniforms the event name and adds the hybrid PMU name before opening the event. So the events can be easily distinguished. Apply the same methodology for the perf top as well. The evlist__uniquify_name() will be invoked by both record and top. Move it to util/evlist.c With the patch: $ perf top Available samples 148 cpu_atom/cycles:P/ 1K cpu_core/cycles:P/ Reviewed-by: Ian Rogers Signed-off-by: Kan Liang Tested-by: Arnaldo Carvalho de Melo Cc: Hector Martin Cc: Marc Zyngier Cc: Mark Rutland Cc: Namhyung Kim Link: https://lore.kernel.org/r/20231214144612.1092028-2-kan.liang@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-record.c | 28 +--------------------------- tools/perf/builtin-top.c | 1 + tools/perf/util/evlist.c | 25 +++++++++++++++++++++++++ tools/perf/util/evlist.h | 1 + 4 files changed, 28 insertions(+), 27 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 8a1002683fda..a89013c44fd5 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -2237,32 +2237,6 @@ static void hit_auxtrace_snapshot_trigger(struct record *rec) } } -static void record__uniquify_name(struct record *rec) -{ - struct evsel *pos; - struct evlist *evlist = rec->evlist; - char *new_name; - int ret; - - if (perf_pmus__num_core_pmus() == 1) - return; - - evlist__for_each_entry(evlist, pos) { - if (!evsel__is_hybrid(pos)) - continue; - - if (strchr(pos->name, '/')) - continue; - - ret = asprintf(&new_name, "%s/%s/", - pos->pmu_name, pos->name); - if (ret) { - free(pos->name); - pos->name = new_name; - } - } -} - static int record__terminate_thread(struct record_thread *thread_data) { int err; @@ -2496,7 +2470,7 @@ static int __cmd_record(struct record *rec, int argc, const char **argv) if (data->is_pipe && rec->evlist->core.nr_entries == 1) rec->opts.sample_id = true; - record__uniquify_name(rec); + evlist__uniquify_name(rec->evlist); /* Debug message used by test scripts */ pr_debug3("perf record opening and mmapping events\n"); diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 13e609c0c693..baf1ab083436 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -1298,6 +1298,7 @@ static int __cmd_top(struct perf_top *top) } } + evlist__uniquify_name(top->evlist); ret = perf_top__start_counters(top); if (ret) return ret; diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 0ed3ce2aa8eb..6f0892803c22 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -2518,3 +2518,28 @@ void evlist__warn_user_requested_cpus(struct evlist *evlist, const char *cpu_lis } perf_cpu_map__put(user_requested_cpus); } + +void evlist__uniquify_name(struct evlist *evlist) +{ + struct evsel *pos; + char *new_name; + int ret; + + if (perf_pmus__num_core_pmus() == 1) + return; + + evlist__for_each_entry(evlist, pos) { + if (!evsel__is_hybrid(pos)) + continue; + + if (strchr(pos->name, '/')) + continue; + + ret = asprintf(&new_name, "%s/%s/", + pos->pmu_name, pos->name); + if (ret) { + free(pos->name); + pos->name = new_name; + } + } +} diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index 98e7ddb2bd30..cb91dc9117a2 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -442,5 +442,6 @@ struct evsel *evlist__find_evsel(struct evlist *evlist, int idx); int evlist__scnprintf_evsels(struct evlist *evlist, size_t size, char *bf); void evlist__check_mem_load_aux(struct evlist *evlist); void evlist__warn_user_requested_cpus(struct evlist *evlist, const char *cpu_list); +void evlist__uniquify_name(struct evlist *evlist); #endif /* __PERF_EVLIST_H */ -- cgit v1.2.3 From 56925f389e152dcb8d093435d43b78a310539c23 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Thu, 14 Dec 2023 12:38:20 -0800 Subject: selftests/bpf: Remove flaky test_btf_id test With previous patch, one of subtests in test_btf_id becomes flaky and may fail. The following is a failing example: Error: #26 btf Error: #26/174 btf/BTF ID Error: #26/174 btf/BTF ID btf_raw_create:PASS:check 0 nsec btf_raw_create:PASS:check 0 nsec test_btf_id:PASS:check 0 nsec ... test_btf_id:PASS:check 0 nsec test_btf_id:FAIL:check BTF lingersdo_test_get_info:FAIL:check failed: -1 The test tries to prove a btf_id not available after the map is closed. But btf_id is freed only after workqueue and a rcu grace period, compared to previous case just after a rcu grade period. Depending on system workload, workqueue could take quite some time to execute function bpf_map_free_deferred() which may cause the test failure. Instead of adding arbitrary delays, let us remove the logic to check btf_id availability after map is closed. Signed-off-by: Yonghong Song Link: https://lore.kernel.org/r/20231214203820.1469402-1-yonghong.song@linux.dev Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/prog_tests/btf.c | 5 ----- 1 file changed, 5 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/btf.c b/tools/testing/selftests/bpf/prog_tests/btf.c index 8fb4a04fbbc0..816145bcb647 100644 --- a/tools/testing/selftests/bpf/prog_tests/btf.c +++ b/tools/testing/selftests/bpf/prog_tests/btf.c @@ -4630,11 +4630,6 @@ static int test_btf_id(unsigned int test_num) /* The map holds the last ref to BTF and its btf_id */ close(map_fd); map_fd = -1; - btf_fd[0] = bpf_btf_get_fd_by_id(map_info.btf_id); - if (CHECK(btf_fd[0] >= 0, "BTF lingers")) { - err = -1; - goto done; - } fprintf(stderr, "OK"); -- cgit v1.2.3 From 77a7a8220f0d87c44425c0a12e0a72b14962535b Mon Sep 17 00:00:00 2001 From: Daniel Xu Date: Thu, 14 Dec 2023 15:49:03 -0700 Subject: bpf: selftests: test_tunnel: Setup fresh topology for each subtest This helps with determinism b/c individual setup/teardown prevents leaking state between different subtests. Signed-off-by: Daniel Xu Link: https://lore.kernel.org/r/0fb59fa16fb58cca7def5239df606005a3e8dd0e.1702593901.git.dxu@dxuuu.xyz Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/prog_tests/test_tunnel.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/test_tunnel.c b/tools/testing/selftests/bpf/prog_tests/test_tunnel.c index d149ab98798d..b57d48219d0b 100644 --- a/tools/testing/selftests/bpf/prog_tests/test_tunnel.c +++ b/tools/testing/selftests/bpf/prog_tests/test_tunnel.c @@ -535,23 +535,20 @@ done: #define RUN_TEST(name, ...) \ ({ \ if (test__start_subtest(#name)) { \ + config_device(); \ test_ ## name(__VA_ARGS__); \ + cleanup(); \ } \ }) static void *test_tunnel_run_tests(void *arg) { - cleanup(); - config_device(); - RUN_TEST(vxlan_tunnel); RUN_TEST(ip6vxlan_tunnel); RUN_TEST(ipip_tunnel, NONE); RUN_TEST(ipip_tunnel, FOU); RUN_TEST(ipip_tunnel, GUE); - cleanup(); - return NULL; } -- cgit v1.2.3 From 02b4e126e6a5f5552da2ccec47a028984d2d9654 Mon Sep 17 00:00:00 2001 From: Daniel Xu Date: Thu, 14 Dec 2023 15:49:04 -0700 Subject: bpf: selftests: test_tunnel: Use vmlinux.h declarations vmlinux.h declarations are more ergnomic, especially when working with kfuncs. The uapi headers are often incomplete for kfunc definitions. This commit also switches bitfield accesses to use CO-RE helpers. Switching to vmlinux.h definitions makes the verifier very unhappy with raw bitfield accesses. The error is: ; md.u.md2.dir = direction; 33: (69) r1 = *(u16 *)(r2 +11) misaligned stack access off (0x0; 0x0)+-64+11 size 2 Fix by using CO-RE-aware bitfield reads and writes. Co-developed-by: Antony Antony Signed-off-by: Antony Antony Signed-off-by: Daniel Xu Link: https://lore.kernel.org/r/884bde1d9a351d126a3923886b945ea6b1b0776b.1702593901.git.dxu@dxuuu.xyz Signed-off-by: Alexei Starovoitov --- .../testing/selftests/bpf/progs/bpf_tracing_net.h | 1 + .../testing/selftests/bpf/progs/test_tunnel_kern.c | 76 ++++++---------------- 2 files changed, 22 insertions(+), 55 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/progs/bpf_tracing_net.h b/tools/testing/selftests/bpf/progs/bpf_tracing_net.h index 0b793a102791..1bdc680b0e0e 100644 --- a/tools/testing/selftests/bpf/progs/bpf_tracing_net.h +++ b/tools/testing/selftests/bpf/progs/bpf_tracing_net.h @@ -26,6 +26,7 @@ #define IPV6_AUTOFLOWLABEL 70 #define TC_ACT_UNSPEC (-1) +#define TC_ACT_OK 0 #define TC_ACT_SHOT 2 #define SOL_TCP 6 diff --git a/tools/testing/selftests/bpf/progs/test_tunnel_kern.c b/tools/testing/selftests/bpf/progs/test_tunnel_kern.c index f66af753bbbb..b320fb7bb080 100644 --- a/tools/testing/selftests/bpf/progs/test_tunnel_kern.c +++ b/tools/testing/selftests/bpf/progs/test_tunnel_kern.c @@ -6,62 +6,26 @@ * modify it under the terms of version 2 of the GNU General Public * License as published by the Free Software Foundation. */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include +#include "vmlinux.h" +#include #include #include +#include "bpf_kfuncs.h" +#include "bpf_tracing_net.h" #define log_err(__ret) bpf_printk("ERROR line:%d ret:%d\n", __LINE__, __ret) -#define VXLAN_UDP_PORT 4789 +#define VXLAN_UDP_PORT 4789 +#define ETH_P_IP 0x0800 +#define PACKET_HOST 0 +#define TUNNEL_CSUM bpf_htons(0x01) +#define TUNNEL_KEY bpf_htons(0x04) /* Only IPv4 address assigned to veth1. * 172.16.1.200 */ #define ASSIGNED_ADDR_VETH1 0xac1001c8 -struct geneve_opt { - __be16 opt_class; - __u8 type; - __u8 length:5; - __u8 r3:1; - __u8 r2:1; - __u8 r1:1; - __u8 opt_data[8]; /* hard-coded to 8 byte */ -}; - -struct vxlanhdr { - __be32 vx_flags; - __be32 vx_vni; -} __attribute__((packed)); - -struct vxlan_metadata { - __u32 gbp; -}; - -struct bpf_fou_encap { - __be16 sport; - __be16 dport; -}; - -enum bpf_fou_encap_type { - FOU_BPF_ENCAP_FOU, - FOU_BPF_ENCAP_GUE, -}; - int bpf_skb_set_fou_encap(struct __sk_buff *skb_ctx, struct bpf_fou_encap *encap, int type) __ksym; int bpf_skb_get_fou_encap(struct __sk_buff *skb_ctx, @@ -205,9 +169,9 @@ int erspan_set_tunnel(struct __sk_buff *skb) __u8 hwid = 7; md.version = 2; - md.u.md2.dir = direction; - md.u.md2.hwid = hwid & 0xf; - md.u.md2.hwid_upper = (hwid >> 4) & 0x3; + BPF_CORE_WRITE_BITFIELD(&md.u.md2, dir, direction); + BPF_CORE_WRITE_BITFIELD(&md.u.md2, hwid, (hwid & 0xf)); + BPF_CORE_WRITE_BITFIELD(&md.u.md2, hwid_upper, (hwid >> 4) & 0x3); #endif ret = bpf_skb_set_tunnel_opt(skb, &md, sizeof(md)); @@ -246,8 +210,9 @@ int erspan_get_tunnel(struct __sk_buff *skb) bpf_printk("\tindex %x\n", index); #else bpf_printk("\tdirection %d hwid %x timestamp %u\n", - md.u.md2.dir, - (md.u.md2.hwid_upper << 4) + md.u.md2.hwid, + BPF_CORE_READ_BITFIELD(&md.u.md2, dir), + (BPF_CORE_READ_BITFIELD(&md.u.md2, hwid_upper) << 4) + + BPF_CORE_READ_BITFIELD(&md.u.md2, hwid), bpf_ntohl(md.u.md2.timestamp)); #endif @@ -284,9 +249,9 @@ int ip4ip6erspan_set_tunnel(struct __sk_buff *skb) __u8 hwid = 17; md.version = 2; - md.u.md2.dir = direction; - md.u.md2.hwid = hwid & 0xf; - md.u.md2.hwid_upper = (hwid >> 4) & 0x3; + BPF_CORE_WRITE_BITFIELD(&md.u.md2, dir, direction); + BPF_CORE_WRITE_BITFIELD(&md.u.md2, hwid, (hwid & 0xf)); + BPF_CORE_WRITE_BITFIELD(&md.u.md2, hwid_upper, (hwid >> 4) & 0x3); #endif ret = bpf_skb_set_tunnel_opt(skb, &md, sizeof(md)); @@ -326,8 +291,9 @@ int ip4ip6erspan_get_tunnel(struct __sk_buff *skb) bpf_printk("\tindex %x\n", index); #else bpf_printk("\tdirection %d hwid %x timestamp %u\n", - md.u.md2.dir, - (md.u.md2.hwid_upper << 4) + md.u.md2.hwid, + BPF_CORE_READ_BITFIELD(&md.u.md2, dir), + (BPF_CORE_READ_BITFIELD(&md.u.md2, hwid_upper) << 4) + + BPF_CORE_READ_BITFIELD(&md.u.md2, hwid), bpf_ntohl(md.u.md2.timestamp)); #endif -- cgit v1.2.3 From e7adc8291a9e9c232d600d82465cbbb682164ca3 Mon Sep 17 00:00:00 2001 From: Daniel Xu Date: Thu, 14 Dec 2023 15:49:05 -0700 Subject: bpf: selftests: Move xfrm tunnel test to test_progs test_progs is better than a shell script b/c C is a bit easier to maintain than shell. Also it's easier to use new infra like memory mapped global variables from C via bpf skeleton. Co-developed-by: Antony Antony Signed-off-by: Antony Antony Signed-off-by: Daniel Xu Link: https://lore.kernel.org/r/a350db9e08520c64544562d88ec005a039124d9b.1702593901.git.dxu@dxuuu.xyz Signed-off-by: Alexei Starovoitov --- .../testing/selftests/bpf/prog_tests/test_tunnel.c | 143 +++++++++++++++++++++ .../testing/selftests/bpf/progs/test_tunnel_kern.c | 11 +- tools/testing/selftests/bpf/test_tunnel.sh | 92 ------------- 3 files changed, 151 insertions(+), 95 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/test_tunnel.c b/tools/testing/selftests/bpf/prog_tests/test_tunnel.c index b57d48219d0b..2d7f8fa82ebd 100644 --- a/tools/testing/selftests/bpf/prog_tests/test_tunnel.c +++ b/tools/testing/selftests/bpf/prog_tests/test_tunnel.c @@ -50,6 +50,7 @@ */ #include +#include #include #include #include @@ -92,6 +93,11 @@ #define IPIP_TUNL_DEV0 "ipip00" #define IPIP_TUNL_DEV1 "ipip11" +#define XFRM_AUTH "0x1111111111111111111111111111111111111111" +#define XFRM_ENC "0x22222222222222222222222222222222" +#define XFRM_SPI_IN_TO_OUT 0x1 +#define XFRM_SPI_OUT_TO_IN 0x2 + #define PING_ARGS "-i 0.01 -c 3 -w 10 -q" static int config_device(void) @@ -264,6 +270,92 @@ static void delete_ipip_tunnel(void) SYS_NOFAIL("ip fou del port 5555 2> /dev/null"); } +static int add_xfrm_tunnel(void) +{ + /* at_ns0 namespace + * at_ns0 -> root + */ + SYS(fail, + "ip netns exec at_ns0 " + "ip xfrm state add src %s dst %s proto esp " + "spi %d reqid 1 mode tunnel " + "auth-trunc 'hmac(sha1)' %s 96 enc 'cbc(aes)' %s", + IP4_ADDR_VETH0, IP4_ADDR1_VETH1, XFRM_SPI_IN_TO_OUT, XFRM_AUTH, XFRM_ENC); + SYS(fail, + "ip netns exec at_ns0 " + "ip xfrm policy add src %s/32 dst %s/32 dir out " + "tmpl src %s dst %s proto esp reqid 1 " + "mode tunnel", + IP4_ADDR_TUNL_DEV0, IP4_ADDR_TUNL_DEV1, IP4_ADDR_VETH0, IP4_ADDR1_VETH1); + + /* root -> at_ns0 */ + SYS(fail, + "ip netns exec at_ns0 " + "ip xfrm state add src %s dst %s proto esp " + "spi %d reqid 2 mode tunnel " + "auth-trunc 'hmac(sha1)' %s 96 enc 'cbc(aes)' %s", + IP4_ADDR1_VETH1, IP4_ADDR_VETH0, XFRM_SPI_OUT_TO_IN, XFRM_AUTH, XFRM_ENC); + SYS(fail, + "ip netns exec at_ns0 " + "ip xfrm policy add src %s/32 dst %s/32 dir in " + "tmpl src %s dst %s proto esp reqid 2 " + "mode tunnel", + IP4_ADDR_TUNL_DEV1, IP4_ADDR_TUNL_DEV0, IP4_ADDR1_VETH1, IP4_ADDR_VETH0); + + /* address & route */ + SYS(fail, "ip netns exec at_ns0 ip addr add dev veth0 %s/32", + IP4_ADDR_TUNL_DEV0); + SYS(fail, "ip netns exec at_ns0 ip route add %s dev veth0 via %s src %s", + IP4_ADDR_TUNL_DEV1, IP4_ADDR1_VETH1, IP4_ADDR_TUNL_DEV0); + + /* root namespace + * at_ns0 -> root + */ + SYS(fail, + "ip xfrm state add src %s dst %s proto esp " + "spi %d reqid 1 mode tunnel " + "auth-trunc 'hmac(sha1)' %s 96 enc 'cbc(aes)' %s", + IP4_ADDR_VETH0, IP4_ADDR1_VETH1, XFRM_SPI_IN_TO_OUT, XFRM_AUTH, XFRM_ENC); + SYS(fail, + "ip xfrm policy add src %s/32 dst %s/32 dir in " + "tmpl src %s dst %s proto esp reqid 1 " + "mode tunnel", + IP4_ADDR_TUNL_DEV0, IP4_ADDR_TUNL_DEV1, IP4_ADDR_VETH0, IP4_ADDR1_VETH1); + + /* root -> at_ns0 */ + SYS(fail, + "ip xfrm state add src %s dst %s proto esp " + "spi %d reqid 2 mode tunnel " + "auth-trunc 'hmac(sha1)' %s 96 enc 'cbc(aes)' %s", + IP4_ADDR1_VETH1, IP4_ADDR_VETH0, XFRM_SPI_OUT_TO_IN, XFRM_AUTH, XFRM_ENC); + SYS(fail, + "ip xfrm policy add src %s/32 dst %s/32 dir out " + "tmpl src %s dst %s proto esp reqid 2 " + "mode tunnel", + IP4_ADDR_TUNL_DEV1, IP4_ADDR_TUNL_DEV0, IP4_ADDR1_VETH1, IP4_ADDR_VETH0); + + /* address & route */ + SYS(fail, "ip addr add dev veth1 %s/32", IP4_ADDR_TUNL_DEV1); + SYS(fail, "ip route add %s dev veth1 via %s src %s", + IP4_ADDR_TUNL_DEV0, IP4_ADDR_VETH0, IP4_ADDR_TUNL_DEV1); + + return 0; +fail: + return -1; +} + +static void delete_xfrm_tunnel(void) +{ + SYS_NOFAIL("ip xfrm policy delete dir out src %s/32 dst %s/32 2> /dev/null", + IP4_ADDR_TUNL_DEV1, IP4_ADDR_TUNL_DEV0); + SYS_NOFAIL("ip xfrm policy delete dir in src %s/32 dst %s/32 2> /dev/null", + IP4_ADDR_TUNL_DEV0, IP4_ADDR_TUNL_DEV1); + SYS_NOFAIL("ip xfrm state delete src %s dst %s proto esp spi %d 2> /dev/null", + IP4_ADDR_VETH0, IP4_ADDR1_VETH1, XFRM_SPI_IN_TO_OUT); + SYS_NOFAIL("ip xfrm state delete src %s dst %s proto esp spi %d 2> /dev/null", + IP4_ADDR1_VETH1, IP4_ADDR_VETH0, XFRM_SPI_OUT_TO_IN); +} + static int test_ping(int family, const char *addr) { SYS(fail, "%s %s %s > /dev/null", ping_command(family), PING_ARGS, addr); @@ -532,6 +624,56 @@ done: test_tunnel_kern__destroy(skel); } +static void test_xfrm_tunnel(void) +{ + DECLARE_LIBBPF_OPTS(bpf_tc_hook, tc_hook, + .attach_point = BPF_TC_INGRESS); + struct test_tunnel_kern *skel = NULL; + struct nstoken *nstoken; + int tc_prog_fd; + int ifindex; + int err; + + err = add_xfrm_tunnel(); + if (!ASSERT_OK(err, "add_xfrm_tunnel")) + return; + + skel = test_tunnel_kern__open_and_load(); + if (!ASSERT_OK_PTR(skel, "test_tunnel_kern__open_and_load")) + goto done; + + ifindex = if_nametoindex("veth1"); + if (!ASSERT_NEQ(ifindex, 0, "veth1 ifindex")) + goto done; + + /* attach tc prog to tunnel dev */ + tc_hook.ifindex = ifindex; + tc_prog_fd = bpf_program__fd(skel->progs.xfrm_get_state); + if (!ASSERT_GE(tc_prog_fd, 0, "bpf_program__fd")) + goto done; + if (attach_tc_prog(&tc_hook, tc_prog_fd, -1)) + goto done; + + /* ping from at_ns0 namespace test */ + nstoken = open_netns("at_ns0"); + err = test_ping(AF_INET, IP4_ADDR_TUNL_DEV1); + close_netns(nstoken); + if (!ASSERT_OK(err, "test_ping")) + goto done; + + if (!ASSERT_EQ(skel->bss->xfrm_reqid, 1, "req_id")) + goto done; + if (!ASSERT_EQ(skel->bss->xfrm_spi, XFRM_SPI_IN_TO_OUT, "spi")) + goto done; + if (!ASSERT_EQ(skel->bss->xfrm_remote_ip, 0xac100164, "remote_ip")) + goto done; + +done: + delete_xfrm_tunnel(); + if (skel) + test_tunnel_kern__destroy(skel); +} + #define RUN_TEST(name, ...) \ ({ \ if (test__start_subtest(#name)) { \ @@ -548,6 +690,7 @@ static void *test_tunnel_run_tests(void *arg) RUN_TEST(ipip_tunnel, NONE); RUN_TEST(ipip_tunnel, FOU); RUN_TEST(ipip_tunnel, GUE); + RUN_TEST(xfrm_tunnel); return NULL; } diff --git a/tools/testing/selftests/bpf/progs/test_tunnel_kern.c b/tools/testing/selftests/bpf/progs/test_tunnel_kern.c index b320fb7bb080..3a59eb9c34de 100644 --- a/tools/testing/selftests/bpf/progs/test_tunnel_kern.c +++ b/tools/testing/selftests/bpf/progs/test_tunnel_kern.c @@ -929,6 +929,10 @@ int ip6ip6_get_tunnel(struct __sk_buff *skb) return TC_ACT_OK; } +volatile int xfrm_reqid = 0; +volatile int xfrm_spi = 0; +volatile int xfrm_remote_ip = 0; + SEC("tc") int xfrm_get_state(struct __sk_buff *skb) { @@ -939,9 +943,10 @@ int xfrm_get_state(struct __sk_buff *skb) if (ret < 0) return TC_ACT_OK; - bpf_printk("reqid %d spi 0x%x remote ip 0x%x\n", - x.reqid, bpf_ntohl(x.spi), - bpf_ntohl(x.remote_ipv4)); + xfrm_reqid = x.reqid; + xfrm_spi = bpf_ntohl(x.spi); + xfrm_remote_ip = bpf_ntohl(x.remote_ipv4); + return TC_ACT_OK; } diff --git a/tools/testing/selftests/bpf/test_tunnel.sh b/tools/testing/selftests/bpf/test_tunnel.sh index 2dec7dbf29a2..d9661b9988ba 100755 --- a/tools/testing/selftests/bpf/test_tunnel.sh +++ b/tools/testing/selftests/bpf/test_tunnel.sh @@ -517,90 +517,6 @@ test_ip6ip6() echo -e ${GREEN}"PASS: ip6$TYPE"${NC} } -setup_xfrm_tunnel() -{ - auth=0x$(printf '1%.0s' {1..40}) - enc=0x$(printf '2%.0s' {1..32}) - spi_in_to_out=0x1 - spi_out_to_in=0x2 - # at_ns0 namespace - # at_ns0 -> root - ip netns exec at_ns0 \ - ip xfrm state add src 172.16.1.100 dst 172.16.1.200 proto esp \ - spi $spi_in_to_out reqid 1 mode tunnel \ - auth-trunc 'hmac(sha1)' $auth 96 enc 'cbc(aes)' $enc - ip netns exec at_ns0 \ - ip xfrm policy add src 10.1.1.100/32 dst 10.1.1.200/32 dir out \ - tmpl src 172.16.1.100 dst 172.16.1.200 proto esp reqid 1 \ - mode tunnel - # root -> at_ns0 - ip netns exec at_ns0 \ - ip xfrm state add src 172.16.1.200 dst 172.16.1.100 proto esp \ - spi $spi_out_to_in reqid 2 mode tunnel \ - auth-trunc 'hmac(sha1)' $auth 96 enc 'cbc(aes)' $enc - ip netns exec at_ns0 \ - ip xfrm policy add src 10.1.1.200/32 dst 10.1.1.100/32 dir in \ - tmpl src 172.16.1.200 dst 172.16.1.100 proto esp reqid 2 \ - mode tunnel - # address & route - ip netns exec at_ns0 \ - ip addr add dev veth0 10.1.1.100/32 - ip netns exec at_ns0 \ - ip route add 10.1.1.200 dev veth0 via 172.16.1.200 \ - src 10.1.1.100 - - # root namespace - # at_ns0 -> root - ip xfrm state add src 172.16.1.100 dst 172.16.1.200 proto esp \ - spi $spi_in_to_out reqid 1 mode tunnel \ - auth-trunc 'hmac(sha1)' $auth 96 enc 'cbc(aes)' $enc - ip xfrm policy add src 10.1.1.100/32 dst 10.1.1.200/32 dir in \ - tmpl src 172.16.1.100 dst 172.16.1.200 proto esp reqid 1 \ - mode tunnel - # root -> at_ns0 - ip xfrm state add src 172.16.1.200 dst 172.16.1.100 proto esp \ - spi $spi_out_to_in reqid 2 mode tunnel \ - auth-trunc 'hmac(sha1)' $auth 96 enc 'cbc(aes)' $enc - ip xfrm policy add src 10.1.1.200/32 dst 10.1.1.100/32 dir out \ - tmpl src 172.16.1.200 dst 172.16.1.100 proto esp reqid 2 \ - mode tunnel - # address & route - ip addr add dev veth1 10.1.1.200/32 - ip route add 10.1.1.100 dev veth1 via 172.16.1.100 src 10.1.1.200 -} - -test_xfrm_tunnel() -{ - if [[ -e /sys/kernel/tracing/trace ]]; then - TRACE=/sys/kernel/tracing/trace - else - TRACE=/sys/kernel/debug/tracing/trace - fi - config_device - > ${TRACE} - setup_xfrm_tunnel - mkdir -p ${BPF_PIN_TUNNEL_DIR} - bpftool prog loadall ${BPF_FILE} ${BPF_PIN_TUNNEL_DIR} - tc qdisc add dev veth1 clsact - tc filter add dev veth1 proto ip ingress bpf da object-pinned \ - ${BPF_PIN_TUNNEL_DIR}/xfrm_get_state - ip netns exec at_ns0 ping $PING_ARG 10.1.1.200 - sleep 1 - grep "reqid 1" ${TRACE} - check_err $? - grep "spi 0x1" ${TRACE} - check_err $? - grep "remote ip 0xac100164" ${TRACE} - check_err $? - cleanup - - if [ $ret -ne 0 ]; then - echo -e ${RED}"FAIL: xfrm tunnel"${NC} - return 1 - fi - echo -e ${GREEN}"PASS: xfrm tunnel"${NC} -} - attach_bpf() { DEV=$1 @@ -630,10 +546,6 @@ cleanup() ip link del ip6geneve11 2> /dev/null ip link del erspan11 2> /dev/null ip link del ip6erspan11 2> /dev/null - ip xfrm policy delete dir out src 10.1.1.200/32 dst 10.1.1.100/32 2> /dev/null - ip xfrm policy delete dir in src 10.1.1.100/32 dst 10.1.1.200/32 2> /dev/null - ip xfrm state delete src 172.16.1.100 dst 172.16.1.200 proto esp spi 0x1 2> /dev/null - ip xfrm state delete src 172.16.1.200 dst 172.16.1.100 proto esp spi 0x2 2> /dev/null } cleanup_exit() @@ -716,10 +628,6 @@ bpf_tunnel_test() test_ip6ip6 errors=$(( $errors + $? )) - echo "Testing IPSec tunnel..." - test_xfrm_tunnel - errors=$(( $errors + $? )) - return $errors } -- cgit v1.2.3 From 2cd07b0eb08c0ed63b1bd0bf0114146b19a4ab1f Mon Sep 17 00:00:00 2001 From: Daniel Xu Date: Thu, 14 Dec 2023 15:49:06 -0700 Subject: bpf: xfrm: Add selftest for bpf_xdp_get_xfrm_state() This commit extends test_tunnel selftest to test the new XDP xfrm state lookup kfunc. Co-developed-by: Antony Antony Signed-off-by: Antony Antony Signed-off-by: Daniel Xu Link: https://lore.kernel.org/r/e704e9a4332e3eac7b458e4bfdec8fcc6984cdb6.1702593901.git.dxu@dxuuu.xyz Signed-off-by: Alexei Starovoitov --- .../testing/selftests/bpf/prog_tests/test_tunnel.c | 16 ++++++- .../testing/selftests/bpf/progs/test_tunnel_kern.c | 51 ++++++++++++++++++++++ 2 files changed, 65 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/test_tunnel.c b/tools/testing/selftests/bpf/prog_tests/test_tunnel.c index 2d7f8fa82ebd..2b3c6dd66259 100644 --- a/tools/testing/selftests/bpf/prog_tests/test_tunnel.c +++ b/tools/testing/selftests/bpf/prog_tests/test_tunnel.c @@ -278,7 +278,7 @@ static int add_xfrm_tunnel(void) SYS(fail, "ip netns exec at_ns0 " "ip xfrm state add src %s dst %s proto esp " - "spi %d reqid 1 mode tunnel " + "spi %d reqid 1 mode tunnel replay-window 42 " "auth-trunc 'hmac(sha1)' %s 96 enc 'cbc(aes)' %s", IP4_ADDR_VETH0, IP4_ADDR1_VETH1, XFRM_SPI_IN_TO_OUT, XFRM_AUTH, XFRM_ENC); SYS(fail, @@ -313,7 +313,7 @@ static int add_xfrm_tunnel(void) */ SYS(fail, "ip xfrm state add src %s dst %s proto esp " - "spi %d reqid 1 mode tunnel " + "spi %d reqid 1 mode tunnel replay-window 42 " "auth-trunc 'hmac(sha1)' %s 96 enc 'cbc(aes)' %s", IP4_ADDR_VETH0, IP4_ADDR1_VETH1, XFRM_SPI_IN_TO_OUT, XFRM_AUTH, XFRM_ENC); SYS(fail, @@ -628,8 +628,10 @@ static void test_xfrm_tunnel(void) { DECLARE_LIBBPF_OPTS(bpf_tc_hook, tc_hook, .attach_point = BPF_TC_INGRESS); + LIBBPF_OPTS(bpf_xdp_attach_opts, opts); struct test_tunnel_kern *skel = NULL; struct nstoken *nstoken; + int xdp_prog_fd; int tc_prog_fd; int ifindex; int err; @@ -654,6 +656,14 @@ static void test_xfrm_tunnel(void) if (attach_tc_prog(&tc_hook, tc_prog_fd, -1)) goto done; + /* attach xdp prog to tunnel dev */ + xdp_prog_fd = bpf_program__fd(skel->progs.xfrm_get_state_xdp); + if (!ASSERT_GE(xdp_prog_fd, 0, "bpf_program__fd")) + goto done; + err = bpf_xdp_attach(ifindex, xdp_prog_fd, XDP_FLAGS_REPLACE, &opts); + if (!ASSERT_OK(err, "bpf_xdp_attach")) + goto done; + /* ping from at_ns0 namespace test */ nstoken = open_netns("at_ns0"); err = test_ping(AF_INET, IP4_ADDR_TUNL_DEV1); @@ -667,6 +677,8 @@ static void test_xfrm_tunnel(void) goto done; if (!ASSERT_EQ(skel->bss->xfrm_remote_ip, 0xac100164, "remote_ip")) goto done; + if (!ASSERT_EQ(skel->bss->xfrm_replay_window, 42, "replay_window")) + goto done; done: delete_xfrm_tunnel(); diff --git a/tools/testing/selftests/bpf/progs/test_tunnel_kern.c b/tools/testing/selftests/bpf/progs/test_tunnel_kern.c index 3a59eb9c34de..3e436e6f7312 100644 --- a/tools/testing/selftests/bpf/progs/test_tunnel_kern.c +++ b/tools/testing/selftests/bpf/progs/test_tunnel_kern.c @@ -30,6 +30,10 @@ int bpf_skb_set_fou_encap(struct __sk_buff *skb_ctx, struct bpf_fou_encap *encap, int type) __ksym; int bpf_skb_get_fou_encap(struct __sk_buff *skb_ctx, struct bpf_fou_encap *encap) __ksym; +struct xfrm_state * +bpf_xdp_get_xfrm_state(struct xdp_md *ctx, struct bpf_xfrm_state_opts *opts, + u32 opts__sz) __ksym; +void bpf_xdp_xfrm_state_release(struct xfrm_state *x) __ksym; struct { __uint(type, BPF_MAP_TYPE_ARRAY); @@ -950,4 +954,51 @@ int xfrm_get_state(struct __sk_buff *skb) return TC_ACT_OK; } +volatile int xfrm_replay_window = 0; + +SEC("xdp") +int xfrm_get_state_xdp(struct xdp_md *xdp) +{ + struct bpf_xfrm_state_opts opts = {}; + struct xfrm_state *x = NULL; + struct ip_esp_hdr *esph; + struct bpf_dynptr ptr; + u8 esph_buf[8] = {}; + u8 iph_buf[20] = {}; + struct iphdr *iph; + u32 off; + + if (bpf_dynptr_from_xdp(xdp, 0, &ptr)) + goto out; + + off = sizeof(struct ethhdr); + iph = bpf_dynptr_slice(&ptr, off, iph_buf, sizeof(iph_buf)); + if (!iph || iph->protocol != IPPROTO_ESP) + goto out; + + off += sizeof(struct iphdr); + esph = bpf_dynptr_slice(&ptr, off, esph_buf, sizeof(esph_buf)); + if (!esph) + goto out; + + opts.netns_id = BPF_F_CURRENT_NETNS; + opts.daddr.a4 = iph->daddr; + opts.spi = esph->spi; + opts.proto = IPPROTO_ESP; + opts.family = AF_INET; + + x = bpf_xdp_get_xfrm_state(xdp, &opts, sizeof(opts)); + if (!x) + goto out; + + if (!x->replay_esn) + goto out; + + xfrm_replay_window = x->replay_esn->replay_window; +out: + if (x) + bpf_xdp_xfrm_state_release(x); + return XDP_PASS; +} + char _license[] SEC("license") = "GPL"; -- cgit v1.2.3 From f2d0ffee1f03395d9ae65f9c615b6a0ee05d0e12 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Thu, 14 Dec 2023 14:50:16 -0800 Subject: selftests/bpf: utilize string values for delegate_xxx mount options Use both hex-based and string-based way to specify delegate mount options for BPF FS. Acked-by: John Fastabend Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20231214225016.1209867-3-andrii@kernel.org Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/prog_tests/token.c | 52 ++++++++++++++++---------- 1 file changed, 32 insertions(+), 20 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/token.c b/tools/testing/selftests/bpf/prog_tests/token.c index 548aeb91ab0d..b5dce630e0e1 100644 --- a/tools/testing/selftests/bpf/prog_tests/token.c +++ b/tools/testing/selftests/bpf/prog_tests/token.c @@ -66,14 +66,22 @@ static int restore_priv_caps(__u64 old_caps) return cap_enable_effective(old_caps, NULL); } -static int set_delegate_mask(int fs_fd, const char *key, __u64 mask) +static int set_delegate_mask(int fs_fd, const char *key, __u64 mask, const char *mask_str) { char buf[32]; int err; - snprintf(buf, sizeof(buf), "0x%llx", (unsigned long long)mask); + if (!mask_str) { + if (mask == ~0ULL) { + mask_str = "any"; + } else { + snprintf(buf, sizeof(buf), "0x%llx", (unsigned long long)mask); + mask_str = buf; + } + } + err = sys_fsconfig(fs_fd, FSCONFIG_SET_STRING, key, - mask == ~0ULL ? "any" : buf, 0); + mask_str, 0); if (err < 0) err = -errno; return err; @@ -86,6 +94,10 @@ struct bpffs_opts { __u64 maps; __u64 progs; __u64 attachs; + const char *cmds_str; + const char *maps_str; + const char *progs_str; + const char *attachs_str; }; static int create_bpffs_fd(void) @@ -104,16 +116,16 @@ static int materialize_bpffs_fd(int fs_fd, struct bpffs_opts *opts) int mnt_fd, err; /* set up token delegation mount options */ - err = set_delegate_mask(fs_fd, "delegate_cmds", opts->cmds); + err = set_delegate_mask(fs_fd, "delegate_cmds", opts->cmds, opts->cmds_str); if (!ASSERT_OK(err, "fs_cfg_cmds")) return err; - err = set_delegate_mask(fs_fd, "delegate_maps", opts->maps); + err = set_delegate_mask(fs_fd, "delegate_maps", opts->maps, opts->maps_str); if (!ASSERT_OK(err, "fs_cfg_maps")) return err; - err = set_delegate_mask(fs_fd, "delegate_progs", opts->progs); + err = set_delegate_mask(fs_fd, "delegate_progs", opts->progs, opts->progs_str); if (!ASSERT_OK(err, "fs_cfg_progs")) return err; - err = set_delegate_mask(fs_fd, "delegate_attachs", opts->attachs); + err = set_delegate_mask(fs_fd, "delegate_attachs", opts->attachs, opts->attachs_str); if (!ASSERT_OK(err, "fs_cfg_attachs")) return err; @@ -295,13 +307,13 @@ static void child(int sock_fd, struct bpffs_opts *opts, child_callback_fn callba } /* ensure unprivileged child cannot set delegation options */ - err = set_delegate_mask(fs_fd, "delegate_cmds", 0x1); + err = set_delegate_mask(fs_fd, "delegate_cmds", 0x1, NULL); ASSERT_EQ(err, -EPERM, "delegate_cmd_eperm"); - err = set_delegate_mask(fs_fd, "delegate_maps", 0x1); + err = set_delegate_mask(fs_fd, "delegate_maps", 0x1, NULL); ASSERT_EQ(err, -EPERM, "delegate_maps_eperm"); - err = set_delegate_mask(fs_fd, "delegate_progs", 0x1); + err = set_delegate_mask(fs_fd, "delegate_progs", 0x1, NULL); ASSERT_EQ(err, -EPERM, "delegate_progs_eperm"); - err = set_delegate_mask(fs_fd, "delegate_attachs", 0x1); + err = set_delegate_mask(fs_fd, "delegate_attachs", 0x1, NULL); ASSERT_EQ(err, -EPERM, "delegate_attachs_eperm"); /* pass BPF FS context object to parent */ @@ -325,22 +337,22 @@ static void child(int sock_fd, struct bpffs_opts *opts, child_callback_fn callba } /* ensure unprivileged child cannot reconfigure to set delegation options */ - err = set_delegate_mask(fs_fd, "delegate_cmds", ~0ULL); + err = set_delegate_mask(fs_fd, "delegate_cmds", 0, "any"); if (!ASSERT_EQ(err, -EPERM, "delegate_cmd_eperm_reconfig")) { err = -EINVAL; goto cleanup; } - err = set_delegate_mask(fs_fd, "delegate_maps", ~0ULL); + err = set_delegate_mask(fs_fd, "delegate_maps", 0, "any"); if (!ASSERT_EQ(err, -EPERM, "delegate_maps_eperm_reconfig")) { err = -EINVAL; goto cleanup; } - err = set_delegate_mask(fs_fd, "delegate_progs", ~0ULL); + err = set_delegate_mask(fs_fd, "delegate_progs", 0, "any"); if (!ASSERT_EQ(err, -EPERM, "delegate_progs_eperm_reconfig")) { err = -EINVAL; goto cleanup; } - err = set_delegate_mask(fs_fd, "delegate_attachs", ~0ULL); + err = set_delegate_mask(fs_fd, "delegate_attachs", 0, "any"); if (!ASSERT_EQ(err, -EPERM, "delegate_attachs_eperm_reconfig")) { err = -EINVAL; goto cleanup; @@ -933,8 +945,8 @@ void test_token(void) { if (test__start_subtest("map_token")) { struct bpffs_opts opts = { - .cmds = 1ULL << BPF_MAP_CREATE, - .maps = 1ULL << BPF_MAP_TYPE_STACK, + .cmds_str = "map_create", + .maps_str = "stack", }; subtest_userns(&opts, userns_map_create); @@ -948,9 +960,9 @@ void test_token(void) } if (test__start_subtest("prog_token")) { struct bpffs_opts opts = { - .cmds = 1ULL << BPF_PROG_LOAD, - .progs = 1ULL << BPF_PROG_TYPE_XDP, - .attachs = 1ULL << BPF_XDP, + .cmds_str = "PROG_LOAD", + .progs_str = "XDP", + .attachs_str = "xdp", }; subtest_userns(&opts, userns_prog_load); -- cgit v1.2.3 From 4dc27587dcbaf9f6229222ba015344c0edce24c9 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 13 Dec 2023 15:14:25 -0800 Subject: tools: ynl-gen: add missing request free helpers for dumps The code gen generates a prototype for dump request free in the header, but no implementation in the source. Reviewed-by: Donald Hunter Link: https://lore.kernel.org/r/20231213231432.2944749-2-kuba@kernel.org Signed-off-by: Jakub Kicinski --- tools/net/ynl/ynl-gen-c.py | 1 + 1 file changed, 1 insertion(+) (limited to 'tools') diff --git a/tools/net/ynl/ynl-gen-c.py b/tools/net/ynl/ynl-gen-c.py index 266bc1629e58..9484882dbc2e 100755 --- a/tools/net/ynl/ynl-gen-c.py +++ b/tools/net/ynl/ynl-gen-c.py @@ -2771,6 +2771,7 @@ def main(): ri = RenderInfo(cw, parsed, args.mode, op, "dump") if not ri.type_consistent: parse_rsp_msg(ri, deref=True) + print_req_free(ri) print_dump_type_free(ri) print_dump(ri) cw.nl() -- cgit v1.2.3 From 139c163b5b0b7095bf88415da030795c403baa33 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 13 Dec 2023 15:14:26 -0800 Subject: tools: ynl-gen: use enum user type for members and args Commit 30c902001534 ("tools: ynl-gen: use enum name from the spec") added pre-cooked user type for enums. Use it to fix ignoring enum-name provided in the spec. This changes a type in struct ethtool_tunnel_udp_entry but is generally inconsequential for current families. Reviewed-by: Donald Hunter Link: https://lore.kernel.org/r/20231213231432.2944749-3-kuba@kernel.org Signed-off-by: Jakub Kicinski --- tools/net/ynl/ynl-gen-c.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/net/ynl/ynl-gen-c.py b/tools/net/ynl/ynl-gen-c.py index 9484882dbc2e..ab009d0f9db5 100755 --- a/tools/net/ynl/ynl-gen-c.py +++ b/tools/net/ynl/ynl-gen-c.py @@ -333,9 +333,8 @@ class TypeScalar(Type): else: self.is_bitfield = False - maybe_enum = not self.is_bitfield and 'enum' in self.attr - if maybe_enum and self.family.consts[self.attr['enum']].enum_name: - self.type_name = c_lower(f"enum {self.family.name}_{self.attr['enum']}") + if not self.is_bitfield and 'enum' in self.attr: + self.type_name = self.family.consts[self.attr['enum']].user_type elif self.is_auto_scalar: self.type_name = '__' + self.type[0] + '64' else: -- cgit v1.2.3 From f6805072c2aa81e6441c797bd074f4ae2db0c66e Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 13 Dec 2023 15:14:27 -0800 Subject: tools: ynl-gen: support fixed headers in genetlink Support genetlink families using simple fixed headers. Assume fixed header is identical for all ops of the family for now. Fixed headers are added to the request and reply structs as a _hdr member, and copied to/from netlink messages appropriately. Reviewed-by: Donald Hunter Link: https://lore.kernel.org/r/20231213231432.2944749-4-kuba@kernel.org Signed-off-by: Jakub Kicinski --- tools/net/ynl/lib/ynl.c | 8 ++++---- tools/net/ynl/lib/ynl.h | 1 + tools/net/ynl/ynl-gen-c.py | 44 ++++++++++++++++++++++++++++++++++++++++---- 3 files changed, 45 insertions(+), 8 deletions(-) (limited to 'tools') diff --git a/tools/net/ynl/lib/ynl.c b/tools/net/ynl/lib/ynl.c index 587286de10b5..c82a7f41b31c 100644 --- a/tools/net/ynl/lib/ynl.c +++ b/tools/net/ynl/lib/ynl.c @@ -191,12 +191,12 @@ ynl_ext_ack_check(struct ynl_sock *ys, const struct nlmsghdr *nlh, str ? " (" : ""); start = mnl_nlmsg_get_payload_offset(ys->nlh, - sizeof(struct genlmsghdr)); + ys->family->hdr_len); end = mnl_nlmsg_get_payload_tail(ys->nlh); off = ys->err.attr_offs; off -= sizeof(struct nlmsghdr); - off -= sizeof(struct genlmsghdr); + off -= ys->family->hdr_len; n += ynl_err_walk(ys, start, end, off, ys->req_policy, &bad_attr[n], sizeof(bad_attr) - n, NULL); @@ -217,14 +217,14 @@ ynl_ext_ack_check(struct ynl_sock *ys, const struct nlmsghdr *nlh, bad_attr[0] ? ", " : (str ? " (" : "")); start = mnl_nlmsg_get_payload_offset(ys->nlh, - sizeof(struct genlmsghdr)); + ys->family->hdr_len); end = mnl_nlmsg_get_payload_tail(ys->nlh); nest_pol = ys->req_policy; if (tb[NLMSGERR_ATTR_MISS_NEST]) { off = mnl_attr_get_u32(tb[NLMSGERR_ATTR_MISS_NEST]); off -= sizeof(struct nlmsghdr); - off -= sizeof(struct genlmsghdr); + off -= ys->family->hdr_len; n += ynl_err_walk(ys, start, end, off, ys->req_policy, &miss_attr[n], sizeof(miss_attr) - n, diff --git a/tools/net/ynl/lib/ynl.h b/tools/net/ynl/lib/ynl.h index 5de580b992b8..ce77a6d76ce0 100644 --- a/tools/net/ynl/lib/ynl.h +++ b/tools/net/ynl/lib/ynl.h @@ -44,6 +44,7 @@ struct ynl_error { struct ynl_family { /* private: */ const char *name; + size_t hdr_len; const struct ynl_ntf_info *ntf_info; unsigned int ntf_info_size; }; diff --git a/tools/net/ynl/ynl-gen-c.py b/tools/net/ynl/ynl-gen-c.py index ab009d0f9db5..70e2c41e5bd6 100755 --- a/tools/net/ynl/ynl-gen-c.py +++ b/tools/net/ynl/ynl-gen-c.py @@ -1124,6 +1124,10 @@ class RenderInfo: self.op_mode = op_mode self.op = op + self.fixed_hdr = None + if op and op.fixed_header: + self.fixed_hdr = 'struct ' + c_lower(op.fixed_header) + # 'do' and 'dump' response parsing is identical self.type_consistent = True if op_mode != 'do' and 'dump' in op: @@ -1570,7 +1574,9 @@ def _multi_parse(ri, struct, init_lines, local_vars): if struct.nested: iter_line = "mnl_attr_for_each_nested(attr, nested)" else: - iter_line = "mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr))" + if ri.fixed_hdr: + local_vars += ['void *hdr;'] + iter_line = "mnl_attr_for_each(attr, nlh, yarg->ys->family->hdr_len)" array_nests = set() multi_attrs = set() @@ -1603,6 +1609,9 @@ def _multi_parse(ri, struct, init_lines, local_vars): for arg in struct.inherited: ri.cw.p(f'dst->{arg} = {arg};') + if ri.fixed_hdr: + ri.cw.p('hdr = mnl_nlmsg_get_payload_offset(nlh, sizeof(struct genlmsghdr));') + ri.cw.p(f"memcpy(&dst->_hdr, hdr, sizeof({ri.fixed_hdr}));") for anest in sorted(all_multi): aspec = struct[anest] ri.cw.p(f"if (dst->{aspec.c_name})") @@ -1723,6 +1732,10 @@ def print_req(ri): ret_err = 'NULL' local_vars += [f'{type_name(ri, rdir(direction))} *rsp;'] + if ri.fixed_hdr: + local_vars += ['size_t hdr_len;', + 'void *hdr;'] + print_prototype(ri, direction, terminate=False) ri.cw.block_start() ri.cw.write_func_lvar(local_vars) @@ -1733,6 +1746,13 @@ def print_req(ri): if 'reply' in ri.op[ri.op_mode]: ri.cw.p(f"yrs.yarg.rsp_policy = &{ri.struct['reply'].render_name}_nest;") ri.cw.nl() + + if ri.fixed_hdr: + ri.cw.p("hdr_len = sizeof(req->_hdr);") + ri.cw.p("hdr = mnl_nlmsg_put_extra_header(nlh, hdr_len);") + ri.cw.p("memcpy(hdr, &req->_hdr, hdr_len);") + ri.cw.nl() + for _, attr in ri.struct["request"].member_list(): attr.attr_put(ri, "req") ri.cw.nl() @@ -1773,9 +1793,11 @@ def print_dump(ri): 'struct nlmsghdr *nlh;', 'int err;'] - for var in local_vars: - ri.cw.p(f'{var}') - ri.cw.nl() + if ri.fixed_hdr: + local_vars += ['size_t hdr_len;', + 'void *hdr;'] + + ri.cw.write_func_lvar(local_vars) ri.cw.p('yds.ys = ys;') ri.cw.p(f"yds.alloc_sz = sizeof({type_name(ri, rdir(direction))});") @@ -1788,6 +1810,12 @@ def print_dump(ri): ri.cw.nl() ri.cw.p(f"nlh = ynl_gemsg_start_dump(ys, {ri.nl.get_family_id()}, {ri.op.enum_name}, 1);") + if ri.fixed_hdr: + ri.cw.p("hdr_len = sizeof(req->_hdr);") + ri.cw.p("hdr = mnl_nlmsg_put_extra_header(nlh, hdr_len);") + ri.cw.p("memcpy(hdr, &req->_hdr, hdr_len);") + ri.cw.nl() + if "request" in ri.op[ri.op_mode]: ri.cw.p(f"ys->req_policy = &{ri.struct['request'].render_name}_nest;") ri.cw.nl() @@ -1845,6 +1873,10 @@ def _print_type(ri, direction, struct): ri.cw.block_start(line=f"struct {ri.family.c_name}{suffix}") + if ri.fixed_hdr: + ri.cw.p(ri.fixed_hdr + ' _hdr;') + ri.cw.nl() + meta_started = False for _, attr in struct.member_list(): for type_filter in ['len', 'bit']: @@ -2482,6 +2514,10 @@ def render_user_family(family, cw, prototype): cw.block_start(f'{symbol} = ') cw.p(f'.name\t\t= "{family.c_name}",') + if family.fixed_header: + cw.p(f'.hdr_len\t= sizeof(struct genlmsghdr) + sizeof(struct {c_lower(family.fixed_header)}),') + else: + cw.p('.hdr_len\t= sizeof(struct genlmsghdr),') if family.ntfs: cw.p(f".ntf_info\t= {family['name']}_ntf_info,") cw.p(f".ntf_info_size\t= MNL_ARRAY_SIZE({family['name']}_ntf_info),") -- cgit v1.2.3 From f967a498fce89e9291f01b8f29109fe782d7670a Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 13 Dec 2023 15:14:28 -0800 Subject: tools: ynl-gen: fill in implementations for TypeUnused Fill in more empty handlers for TypeUnused. When 'unused' attr gets specified in a nested set we have to cleanly skip it during code generation. Reviewed-by: Donald Hunter Link: https://lore.kernel.org/r/20231213231432.2944749-5-kuba@kernel.org Signed-off-by: Jakub Kicinski --- tools/net/ynl/ynl-gen-c.py | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'tools') diff --git a/tools/net/ynl/ynl-gen-c.py b/tools/net/ynl/ynl-gen-c.py index 70e2c41e5bd6..2b7961838fb6 100755 --- a/tools/net/ynl/ynl-gen-c.py +++ b/tools/net/ynl/ynl-gen-c.py @@ -264,6 +264,15 @@ class TypeUnused(Type): def attr_policy(self, cw): pass + def attr_put(self, ri, var): + pass + + def attr_get(self, ri, var, first): + pass + + def setter(self, ri, space, direction, deref=False, ref=None): + pass + class TypePad(Type): def presence_type(self): -- cgit v1.2.3 From 38329fcfb757b8215c07a77b6657721cc7e9530e Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 13 Dec 2023 15:14:29 -0800 Subject: tools: ynl-gen: record information about recursive nests Track which nests are recursive. Non-recursive nesting gets rendered in C as directly nested structs. For recursive ones we need to put a pointer in, rather than full struct. Track this information, no change to generated code, yet. Reviewed-by: Donald Hunter Link: https://lore.kernel.org/r/20231213231432.2944749-6-kuba@kernel.org Signed-off-by: Jakub Kicinski --- tools/net/ynl/ynl-gen-c.py | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/net/ynl/ynl-gen-c.py b/tools/net/ynl/ynl-gen-c.py index 2b7961838fb6..8a2c304cd2ad 100755 --- a/tools/net/ynl/ynl-gen-c.py +++ b/tools/net/ynl/ynl-gen-c.py @@ -105,6 +105,9 @@ class Type(SpecAttr): def is_scalar(self): return self.type in {'u8', 'u16', 'u32', 'u64', 's32', 's64'} + def is_recursive(self): + return False + def presence_type(self): return 'bit' @@ -529,6 +532,9 @@ class TypeBitfield32(Type): class TypeNest(Type): + def is_recursive(self): + return self.family.pure_nested_structs[self.nested_attrs].recursive + def _complex_member_type(self, ri): return self.nested_struct_type @@ -700,9 +706,12 @@ class Struct: if self.nested and space_name in family.consts: self.struct_name += '_' self.ptr_name = self.struct_name + ' *' + # All attr sets this one contains, directly or multiple levels down + self.child_nests = set() self.request = False self.reply = False + self.recursive = False self.attr_list = [] self.attrs = dict() @@ -1059,14 +1068,20 @@ class Family(SpecFamily): pns_key_seen.add(name) else: pns_key_list.append(name) - # Propagate the request / reply + # Propagate the request / reply / recursive for attr_set, struct in reversed(self.pure_nested_structs.items()): for _, spec in self.attr_sets[attr_set].items(): if 'nested-attributes' in spec: - child = self.pure_nested_structs.get(spec['nested-attributes']) + child_name = spec['nested-attributes'] + struct.child_nests.add(child_name) + child = self.pure_nested_structs.get(child_name) if child: + if not child.recursive: + struct.child_nests.update(child.child_nests) child.request |= struct.request child.reply |= struct.reply + if attr_set in struct.child_nests: + struct.recursive = True def _load_attr_use(self): for _, struct in self.pure_nested_structs.items(): -- cgit v1.2.3 From aa75783b95a1e7fc09129f5364476e6effe47392 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 13 Dec 2023 15:14:30 -0800 Subject: tools: ynl-gen: re-sort ignoring recursive nests We try to keep the structures and helpers "topologically sorted", to avoid forward declarations. When recursive nests are at play we need to sort twice, because structs which end up being marked as recursive will get a full set of forward declarations, so we should ignore them for the purpose of sorting. Reviewed-by: Donald Hunter Link: https://lore.kernel.org/r/20231213231432.2944749-7-kuba@kernel.org Signed-off-by: Jakub Kicinski --- tools/net/ynl/ynl-gen-c.py | 52 +++++++++++++++++++++++++++------------------- 1 file changed, 31 insertions(+), 21 deletions(-) (limited to 'tools') diff --git a/tools/net/ynl/ynl-gen-c.py b/tools/net/ynl/ynl-gen-c.py index 8a2c304cd2ad..4ef3a774c402 100755 --- a/tools/net/ynl/ynl-gen-c.py +++ b/tools/net/ynl/ynl-gen-c.py @@ -1008,6 +1008,33 @@ class Family(SpecFamily): self.root_sets[op['attribute-set']]['request'].update(req_attrs) self.root_sets[op['attribute-set']]['reply'].update(rsp_attrs) + def _sort_pure_types(self): + # Try to reorder according to dependencies + pns_key_list = list(self.pure_nested_structs.keys()) + pns_key_seen = set() + rounds = len(pns_key_list) ** 2 # it's basically bubble sort + for _ in range(rounds): + if len(pns_key_list) == 0: + break + name = pns_key_list.pop(0) + finished = True + for _, spec in self.attr_sets[name].items(): + if 'nested-attributes' in spec: + nested = spec['nested-attributes'] + # If the unknown nest we hit is recursive it's fine, it'll be a pointer + if self.pure_nested_structs[nested].recursive: + continue + if nested not in pns_key_seen: + # Dicts are sorted, this will make struct last + struct = self.pure_nested_structs.pop(name) + self.pure_nested_structs[name] = struct + finished = False + break + if finished: + pns_key_seen.add(name) + else: + pns_key_list.append(name) + def _load_nested_sets(self): attr_set_queue = list(self.root_sets.keys()) attr_set_seen = set(self.root_sets.keys()) @@ -1047,27 +1074,8 @@ class Family(SpecFamily): if attr in rs_members['reply']: self.pure_nested_structs[nested].reply = True - # Try to reorder according to dependencies - pns_key_list = list(self.pure_nested_structs.keys()) - pns_key_seen = set() - rounds = len(pns_key_list)**2 # it's basically bubble sort - for _ in range(rounds): - if len(pns_key_list) == 0: - break - name = pns_key_list.pop(0) - finished = True - for _, spec in self.attr_sets[name].items(): - if 'nested-attributes' in spec: - if spec['nested-attributes'] not in pns_key_seen: - # Dicts are sorted, this will make struct last - struct = self.pure_nested_structs.pop(name) - self.pure_nested_structs[name] = struct - finished = False - break - if finished: - pns_key_seen.add(name) - else: - pns_key_list.append(name) + self._sort_pure_types() + # Propagate the request / reply / recursive for attr_set, struct in reversed(self.pure_nested_structs.items()): for _, spec in self.attr_sets[attr_set].items(): @@ -1083,6 +1091,8 @@ class Family(SpecFamily): if attr_set in struct.child_nests: struct.recursive = True + self._sort_pure_types() + def _load_attr_use(self): for _, struct in self.pure_nested_structs.items(): if struct.request: -- cgit v1.2.3 From 461f25a2e4334767d3e306b08dbda054da1aaa30 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 13 Dec 2023 15:14:31 -0800 Subject: tools: ynl-gen: store recursive nests by a pointer To avoid infinite nesting store recursive structs by pointer. If recursive struct is placed in the op directly - the first instance can be stored by value. That makes the code much less of a pain for majority of practical uses. Reviewed-by: Donald Hunter Link: https://lore.kernel.org/r/20231213231432.2944749-8-kuba@kernel.org Signed-off-by: Jakub Kicinski --- tools/net/ynl/ynl-gen-c.py | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/net/ynl/ynl-gen-c.py b/tools/net/ynl/ynl-gen-c.py index 4ef3a774c402..7176afb4a3bd 100755 --- a/tools/net/ynl/ynl-gen-c.py +++ b/tools/net/ynl/ynl-gen-c.py @@ -108,6 +108,9 @@ class Type(SpecAttr): def is_recursive(self): return False + def is_recursive_for_op(self, ri): + return self.is_recursive() and not ri.op + def presence_type(self): return 'bit' @@ -148,6 +151,8 @@ class Type(SpecAttr): member = self._complex_member_type(ri) if member: ptr = '*' if self.is_multi_val() else '' + if self.is_recursive_for_op(ri): + ptr = '*' ri.cw.p(f"{member} {ptr}{self.c_name};") return members = self.arg_member(ri) @@ -539,7 +544,11 @@ class TypeNest(Type): return self.nested_struct_type def free(self, ri, var, ref): - ri.cw.p(f'{self.nested_render_name}_free(&{var}->{ref}{self.c_name});') + at = '&' + if self.is_recursive_for_op(ri): + at = '' + ri.cw.p(f'if ({var}->{ref}{self.c_name})') + ri.cw.p(f'{self.nested_render_name}_free({at}{var}->{ref}{self.c_name});') def _attr_typol(self): return f'.type = YNL_PT_NEST, .nest = &{self.nested_render_name}_nest, ' @@ -548,8 +557,9 @@ class TypeNest(Type): return 'NLA_POLICY_NESTED(' + self.nested_render_name + '_nl_policy)' def attr_put(self, ri, var): + at = '' if self.is_recursive_for_op(ri) else '&' self._attr_put_line(ri, var, f"{self.nested_render_name}_put(nlh, " + - f"{self.enum_name}, &{var}->{self.c_name})") + f"{self.enum_name}, {at}{var}->{self.c_name})") def _attr_get(self, ri, var): get_lines = [f"if ({self.nested_render_name}_parse(&parg, attr))", @@ -562,6 +572,8 @@ class TypeNest(Type): ref = (ref if ref else []) + [self.c_name] for _, attr in ri.family.pure_nested_structs[self.nested_attrs].member_list(): + if attr.is_recursive(): + continue attr.setter(ri, self.nested_attrs, direction, deref=deref, ref=ref) -- cgit v1.2.3 From 7b5fe80ebc6312896a72f0187b00f7840579d4fd Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 13 Dec 2023 15:14:32 -0800 Subject: tools: ynl-gen: print prototypes for recursive stuff We avoid printing forward declarations and prototypes for most types by sorting things topologically. But if structs nest we do need the forward declarations, there's no other way. Reviewed-by: Donald Hunter Link: https://lore.kernel.org/r/20231213231432.2944749-9-kuba@kernel.org Signed-off-by: Jakub Kicinski --- tools/net/ynl/ynl-gen-c.py | 44 +++++++++++++++++++++++++++++++++++++++----- 1 file changed, 39 insertions(+), 5 deletions(-) (limited to 'tools') diff --git a/tools/net/ynl/ynl-gen-c.py b/tools/net/ynl/ynl-gen-c.py index 7176afb4a3bd..7fc1aa788f6f 100755 --- a/tools/net/ynl/ynl-gen-c.py +++ b/tools/net/ynl/ynl-gen-c.py @@ -1521,6 +1521,10 @@ def print_dump_prototype(ri): print_prototype(ri, "request") +def put_typol_fwd(cw, struct): + cw.p(f'extern struct ynl_policy_nest {struct.render_name}_nest;') + + def put_typol(cw, struct): type_max = struct.attr_set.max_name cw.block_start(line=f'struct ynl_policy_attr {struct.render_name}_policy[{type_max} + 1] =') @@ -1594,12 +1598,17 @@ def put_enum_to_str(family, cw, enum): _put_enum_to_str_helper(cw, enum.render_name, map_name, 'value', enum=enum) -def put_req_nested(ri, struct): +def put_req_nested_prototype(ri, struct, suffix=';'): func_args = ['struct nlmsghdr *nlh', 'unsigned int attr_type', f'{struct.ptr_name}obj'] - ri.cw.write_func_prot('int', f'{struct.render_name}_put', func_args) + ri.cw.write_func_prot('int', f'{struct.render_name}_put', func_args, + suffix=suffix) + + +def put_req_nested(ri, struct): + put_req_nested_prototype(ri, struct, suffix='') ri.cw.block_start() ri.cw.write_func_lvar('struct nlattr *nest;') @@ -1726,18 +1735,23 @@ def _multi_parse(ri, struct, init_lines, local_vars): ri.cw.nl() -def parse_rsp_nested(ri, struct): +def parse_rsp_nested_prototype(ri, struct, suffix=';'): func_args = ['struct ynl_parse_arg *yarg', 'const struct nlattr *nested'] for arg in struct.inherited: func_args.append('__u32 ' + arg) + ri.cw.write_func_prot('int', f'{struct.render_name}_parse', func_args, + suffix=suffix) + + +def parse_rsp_nested(ri, struct): + parse_rsp_nested_prototype(ri, struct, suffix='') + local_vars = ['const struct nlattr *attr;', f'{struct.ptr_name}dst = yarg->data;'] init_lines = [] - ri.cw.write_func_prot('int', f'{struct.render_name}_parse', func_args) - _multi_parse(ri, struct, init_lines, local_vars) @@ -2051,6 +2065,10 @@ def _free_type(ri, direction, struct): ri.cw.nl() +def free_rsp_nested_prototype(ri): + print_free_prototype(ri, "") + + def free_rsp_nested(ri, struct): _free_type(ri, "", struct) @@ -2818,7 +2836,14 @@ def main(): put_enum_to_str(parsed, cw, const) cw.nl() + has_recursive_nests = False cw.p('/* Policies */') + for struct in parsed.pure_nested_structs.values(): + if struct.recursive: + put_typol_fwd(cw, struct) + has_recursive_nests = True + if has_recursive_nests: + cw.nl() for name in parsed.pure_nested_structs: struct = Struct(parsed, name) put_typol(cw, struct) @@ -2827,6 +2852,15 @@ def main(): put_typol(cw, struct) cw.p('/* Common nested types */') + if has_recursive_nests: + for attr_set, struct in parsed.pure_nested_structs.items(): + ri = RenderInfo(cw, parsed, args.mode, "", "", attr_set) + free_rsp_nested_prototype(ri) + if struct.request: + put_req_nested_prototype(ri, struct) + if struct.reply: + parse_rsp_nested_prototype(ri, struct) + cw.nl() for attr_set, struct in parsed.pure_nested_structs.items(): ri = RenderInfo(cw, parsed, args.mode, "", "", attr_set) -- cgit v1.2.3 From b6925b4ed57cccf42ca0fb46c7446f0859e7ad4b Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Wed, 13 Dec 2023 14:08:44 +0800 Subject: selftests/net: add variable NS_LIST for lib.sh Add a global variable NS_LIST to store all the namespaces that setup_ns created, so the caller could call cleanup_all_ns() instead of remember all the netns names when using cleanup_ns(). Signed-off-by: Hangbin Liu Link: https://lore.kernel.org/r/20231213060856.4030084-2-liuhangbin@gmail.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/lib.sh | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/net/lib.sh b/tools/testing/selftests/net/lib.sh index 518eca57b815..dca549443801 100644 --- a/tools/testing/selftests/net/lib.sh +++ b/tools/testing/selftests/net/lib.sh @@ -6,6 +6,8 @@ # Kselftest framework requirement - SKIP code is 4. ksft_skip=4 +# namespace list created by setup_ns +NS_LIST="" ############################################################################## # Helpers @@ -56,6 +58,11 @@ cleanup_ns() return $ret } +cleanup_all_ns() +{ + cleanup_ns $NS_LIST +} + # setup netns with given names as prefix. e.g # setup_ns local remote setup_ns() @@ -82,4 +89,5 @@ setup_ns() ip -n "$ns" link set lo up ns_list="$ns_list $ns" done + NS_LIST="$NS_LIST $ns_list" } -- cgit v1.2.3 From 59cac2efd378811822ee320777116845b3e30722 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Wed, 13 Dec 2023 14:08:45 +0800 Subject: selftests/net: convert srv6_end_dt46_l3vpn_test.sh to run it in unique namespace As the name \${rt-${rt}} may make reader confuse, convert the variable hs/rt in setup_rt/hs to hid, rid. Here is the test result after conversion. ]# ./srv6_end_dt46_l3vpn_test.sh ################################################################################ TEST SECTION: IPv6 routers connectivity test ################################################################################ TEST: Routers connectivity: rt-1 -> rt-2 [ OK ] TEST: Routers connectivity: rt-2 -> rt-1 [ OK ] ... TEST: IPv4 Hosts isolation: hs-t200-4 -X-> hs-t100-2 [ OK ] Tests passed: 34 Tests failed: 0 Signed-off-by: Hangbin Liu Link: https://lore.kernel.org/r/20231213060856.4030084-3-liuhangbin@gmail.com Signed-off-by: Jakub Kicinski --- .../selftests/net/srv6_end_dt46_l3vpn_test.sh | 51 ++++++++++------------ 1 file changed, 24 insertions(+), 27 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/net/srv6_end_dt46_l3vpn_test.sh b/tools/testing/selftests/net/srv6_end_dt46_l3vpn_test.sh index 441eededa031..02d617040793 100755 --- a/tools/testing/selftests/net/srv6_end_dt46_l3vpn_test.sh +++ b/tools/testing/selftests/net/srv6_end_dt46_l3vpn_test.sh @@ -193,8 +193,7 @@ # +---------------------------------------------------+ # -# Kselftest framework requirement - SKIP code is 4. -ksft_skip=4 +source lib.sh readonly LOCALSID_TABLE_ID=90 readonly IPv6_RT_NETWORK=fd00 @@ -250,26 +249,22 @@ cleanup() ip link del veth-rt-1 2>/dev/null || true ip link del veth-rt-2 2>/dev/null || true - # destroy routers rt-* and hosts hs-* - for ns in $(ip netns show | grep -E 'rt-*|hs-*'); do - ip netns del ${ns} || true - done + cleanup_all_ns } # Setup the basic networking for the routers setup_rt_networking() { - local rt=$1 - local nsname=rt-${rt} + local id=$1 + eval local nsname=\${rt_${id}} - ip netns add ${nsname} - ip link set veth-rt-${rt} netns ${nsname} - ip -netns ${nsname} link set veth-rt-${rt} name veth0 + ip link set veth-rt-${id} netns ${nsname} + ip -netns ${nsname} link set veth-rt-${id} name veth0 ip netns exec ${nsname} sysctl -wq net.ipv6.conf.all.accept_dad=0 ip netns exec ${nsname} sysctl -wq net.ipv6.conf.default.accept_dad=0 - ip -netns ${nsname} addr add ${IPv6_RT_NETWORK}::${rt}/64 dev veth0 nodad + ip -netns ${nsname} addr add ${IPv6_RT_NETWORK}::${id}/64 dev veth0 nodad ip -netns ${nsname} link set veth0 up ip -netns ${nsname} link set lo up @@ -279,16 +274,14 @@ setup_rt_networking() setup_hs() { - local hs=$1 - local rt=$2 + local hid=$1 + local rid=$2 local tid=$3 - local hsname=hs-t${tid}-${hs} - local rtname=rt-${rt} + eval local hsname=\${hs_t${tid}_${hid}} + eval local rtname=\${rt_${rid}} local rtveth=veth-t${tid} # set the networking for the host - ip netns add ${hsname} - ip netns exec ${hsname} sysctl -wq net.ipv6.conf.all.accept_dad=0 ip netns exec ${hsname} sysctl -wq net.ipv6.conf.default.accept_dad=0 @@ -299,8 +292,8 @@ setup_hs() ip -netns ${hsname} link add veth0 type veth peer name ${rtveth} ip -netns ${hsname} link set ${rtveth} netns ${rtname} - ip -netns ${hsname} addr add ${IPv6_HS_NETWORK}::${hs}/64 dev veth0 nodad - ip -netns ${hsname} addr add ${IPv4_HS_NETWORK}.${hs}/24 dev veth0 + ip -netns ${hsname} addr add ${IPv6_HS_NETWORK}::${hid}/64 dev veth0 nodad + ip -netns ${hsname} addr add ${IPv4_HS_NETWORK}.${hid}/24 dev veth0 ip -netns ${hsname} link set veth0 up ip -netns ${hsname} link set lo up @@ -332,10 +325,8 @@ setup_vpn_config() local rtdst=$4 local tid=$5 - local hssrc_name=hs-t${tid}-${hssrc} - local hsdst_name=hs-t${tid}-${hsdst} - local rtsrc_name=rt-${rtsrc} - local rtdst_name=rt-${rtdst} + eval local rtsrc_name=\${rt_${rtsrc}} + eval local rtdst_name=\${rt_${rtdst}} local rtveth=veth-t${tid} local vpn_sid=${VPN_LOCATOR_SERVICE}:${hssrc}${hsdst}:${tid}::6046 @@ -379,18 +370,21 @@ setup() { ip link add veth-rt-1 type veth peer name veth-rt-2 # setup the networking for router rt-1 and router rt-2 + setup_ns rt_1 rt_2 setup_rt_networking 1 setup_rt_networking 2 # setup two hosts for the tenant 100. # - host hs-1 is directly connected to the router rt-1; # - host hs-2 is directly connected to the router rt-2. + setup_ns hs_t100_1 hs_t100_2 setup_hs 1 1 100 #args: host router tenant setup_hs 2 2 100 # setup two hosts for the tenant 200 # - host hs-3 is directly connected to the router rt-1; # - host hs-4 is directly connected to the router rt-2. + setup_ns hs_t200_3 hs_t200_4 setup_hs 3 1 200 setup_hs 4 2 200 @@ -409,8 +403,9 @@ check_rt_connectivity() { local rtsrc=$1 local rtdst=$2 + eval local nsname=\${rt_${rtsrc}} - ip netns exec rt-${rtsrc} ping -c 1 -W 1 ${IPv6_RT_NETWORK}::${rtdst} \ + ip netns exec ${nsname} ping -c 1 -W 1 ${IPv6_RT_NETWORK}::${rtdst} \ >/dev/null 2>&1 } @@ -428,8 +423,9 @@ check_hs_ipv6_connectivity() local hssrc=$1 local hsdst=$2 local tid=$3 + eval local nsname=\${hs_t${tid}_${hssrc}} - ip netns exec hs-t${tid}-${hssrc} ping -c 1 -W ${PING_TIMEOUT_SEC} \ + ip netns exec ${nsname} ping -c 1 -W ${PING_TIMEOUT_SEC} \ ${IPv6_HS_NETWORK}::${hsdst} >/dev/null 2>&1 } @@ -438,8 +434,9 @@ check_hs_ipv4_connectivity() local hssrc=$1 local hsdst=$2 local tid=$3 + eval local nsname=\${hs_t${tid}_${hssrc}} - ip netns exec hs-t${tid}-${hssrc} ping -c 1 -W ${PING_TIMEOUT_SEC} \ + ip netns exec ${nsname} ping -c 1 -W ${PING_TIMEOUT_SEC} \ ${IPv4_HS_NETWORK}.${hsdst} >/dev/null 2>&1 } -- cgit v1.2.3 From 7b2d941c81bc110925870b6f0c1a071a20850b7c Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Wed, 13 Dec 2023 14:08:46 +0800 Subject: selftests/net: convert srv6_end_dt4_l3vpn_test.sh to run it in unique namespace As the name \${rt-${rt}} may make reader confuse, convert the variable hs/rt in setup_rt/hs to hid, rid. Here is the test result after conversion. ]# ./srv6_end_dt4_l3vpn_test.sh ################################################################################ TEST SECTION: IPv6 routers connectivity test ################################################################################ TEST: Routers connectivity: rt-1 -> rt-2 [ OK ] TEST: Routers connectivity: rt-2 -> rt-1 [ OK ] ... TEST: Hosts isolation: hs-t200-4 -X-> hs-t100-2 [ OK ] Tests passed: 18 Tests failed: 0 Signed-off-by: Hangbin Liu Link: https://lore.kernel.org/r/20231213060856.4030084-4-liuhangbin@gmail.com Signed-off-by: Jakub Kicinski --- .../selftests/net/srv6_end_dt4_l3vpn_test.sh | 48 ++++++++++------------ 1 file changed, 21 insertions(+), 27 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/net/srv6_end_dt4_l3vpn_test.sh b/tools/testing/selftests/net/srv6_end_dt4_l3vpn_test.sh index f96282362811..79fb81e63c59 100755 --- a/tools/testing/selftests/net/srv6_end_dt4_l3vpn_test.sh +++ b/tools/testing/selftests/net/srv6_end_dt4_l3vpn_test.sh @@ -163,8 +163,7 @@ # +---------------------------------------------------+ # -# Kselftest framework requirement - SKIP code is 4. -ksft_skip=4 +source lib.sh readonly LOCALSID_TABLE_ID=90 readonly IPv6_RT_NETWORK=fd00 @@ -219,27 +218,22 @@ cleanup() ip link del veth-rt-1 2>/dev/null || true ip link del veth-rt-2 2>/dev/null || true - # destroy routers rt-* and hosts hs-* - for ns in $(ip netns show | grep -E 'rt-*|hs-*'); do - ip netns del ${ns} || true - done + cleanup_all_ns } # Setup the basic networking for the routers setup_rt_networking() { - local rt=$1 - local nsname=rt-${rt} - - ip netns add ${nsname} + local id=$1 + eval local nsname=\${rt_${id}} ip netns exec ${nsname} sysctl -wq net.ipv6.conf.all.accept_dad=0 ip netns exec ${nsname} sysctl -wq net.ipv6.conf.default.accept_dad=0 - ip link set veth-rt-${rt} netns ${nsname} - ip -netns ${nsname} link set veth-rt-${rt} name veth0 + ip link set veth-rt-${id} netns ${nsname} + ip -netns ${nsname} link set veth-rt-${id} name veth0 - ip -netns ${nsname} addr add ${IPv6_RT_NETWORK}::${rt}/64 dev veth0 nodad + ip -netns ${nsname} addr add ${IPv6_RT_NETWORK}::${id}/64 dev veth0 nodad ip -netns ${nsname} link set veth0 up ip -netns ${nsname} link set lo up @@ -249,16 +243,13 @@ setup_rt_networking() setup_hs() { - local hs=$1 - local rt=$2 + local hid=$1 + local rid=$2 local tid=$3 - local hsname=hs-t${tid}-${hs} - local rtname=rt-${rt} + eval local hsname=\${hs_t${tid}_${hid}} + eval local rtname=\${rt_${rid}} local rtveth=veth-t${tid} - # set the networking for the host - ip netns add ${hsname} - # disable the rp_filter otherwise the kernel gets confused about how # to route decap ipv4 packets. ip netns exec ${rtname} sysctl -wq net.ipv4.conf.all.rp_filter=0 @@ -266,7 +257,7 @@ setup_hs() ip -netns ${hsname} link add veth0 type veth peer name ${rtveth} ip -netns ${hsname} link set ${rtveth} netns ${rtname} - ip -netns ${hsname} addr add ${IPv4_HS_NETWORK}.${hs}/24 dev veth0 + ip -netns ${hsname} addr add ${IPv4_HS_NETWORK}.${hid}/24 dev veth0 ip -netns ${hsname} link set veth0 up ip -netns ${hsname} link set lo up @@ -293,10 +284,8 @@ setup_vpn_config() local rtdst=$4 local tid=$5 - local hssrc_name=hs-t${tid}-${hssrc} - local hsdst_name=hs-t${tid}-${hsdst} - local rtsrc_name=rt-${rtsrc} - local rtdst_name=rt-${rtdst} + eval local rtsrc_name=\${rt_${rtsrc}} + eval local rtdst_name=\${rt_${rtdst}} local vpn_sid=${VPN_LOCATOR_SERVICE}:${hssrc}${hsdst}:${tid}::6004 # set the encap route for encapsulating packets which arrive from the @@ -328,18 +317,21 @@ setup() { ip link add veth-rt-1 type veth peer name veth-rt-2 # setup the networking for router rt-1 and router rt-2 + setup_ns rt_1 rt_2 setup_rt_networking 1 setup_rt_networking 2 # setup two hosts for the tenant 100. # - host hs-1 is directly connected to the router rt-1; # - host hs-2 is directly connected to the router rt-2. + setup_ns hs_t100_1 hs_t100_2 setup_hs 1 1 100 #args: host router tenant setup_hs 2 2 100 # setup two hosts for the tenant 200 # - host hs-3 is directly connected to the router rt-1; # - host hs-4 is directly connected to the router rt-2. + setup_ns hs_t200_3 hs_t200_4 setup_hs 3 1 200 setup_hs 4 2 200 @@ -358,8 +350,9 @@ check_rt_connectivity() { local rtsrc=$1 local rtdst=$2 + eval local nsname=\${rt_${rtsrc}} - ip netns exec rt-${rtsrc} ping -c 1 -W 1 ${IPv6_RT_NETWORK}::${rtdst} \ + ip netns exec ${nsname} ping -c 1 -W 1 ${IPv6_RT_NETWORK}::${rtdst} \ >/dev/null 2>&1 } @@ -377,8 +370,9 @@ check_hs_connectivity() local hssrc=$1 local hsdst=$2 local tid=$3 + eval local nsname=\${hs_t${tid}_${hssrc}} - ip netns exec hs-t${tid}-${hssrc} ping -c 1 -W ${PING_TIMEOUT_SEC} \ + ip netns exec ${nsname} ping -c 1 -W ${PING_TIMEOUT_SEC} \ ${IPv4_HS_NETWORK}.${hsdst} >/dev/null 2>&1 } -- cgit v1.2.3 From 792cd1dbc8a253c67f715ae6f987b8b28dbbcbbf Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Wed, 13 Dec 2023 14:08:47 +0800 Subject: selftests/net: convert srv6_end_dt6_l3vpn_test.sh to run it in unique namespace As the name \${rt-${rt}} may make reader confuse, convert the variable hs/rt in setup_rt/hs to hid, rid. Here is the test result after conversion. ]# ./srv6_end_dt6_l3vpn_test.sh ################################################################################ TEST SECTION: IPv6 routers connectivity test ################################################################################ TEST: Routers connectivity: rt-1 -> rt-2 [ OK ] TEST: Routers connectivity: rt-2 -> rt-1 [ OK ] ... TEST: Hosts isolation: hs-t200-4 -X-> hs-t100-2 [ OK ] Tests passed: 18 Tests failed: 0 Signed-off-by: Hangbin Liu Link: https://lore.kernel.org/r/20231213060856.4030084-5-liuhangbin@gmail.com Signed-off-by: Jakub Kicinski --- .../selftests/net/srv6_end_dt6_l3vpn_test.sh | 46 ++++++++++------------ 1 file changed, 21 insertions(+), 25 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/net/srv6_end_dt6_l3vpn_test.sh b/tools/testing/selftests/net/srv6_end_dt6_l3vpn_test.sh index b9b06ef80d88..e408406d8489 100755 --- a/tools/testing/selftests/net/srv6_end_dt6_l3vpn_test.sh +++ b/tools/testing/selftests/net/srv6_end_dt6_l3vpn_test.sh @@ -164,8 +164,7 @@ # +---------------------------------------------------+ # -# Kselftest framework requirement - SKIP code is 4. -ksft_skip=4 +source lib.sh readonly LOCALSID_TABLE_ID=90 readonly IPv6_RT_NETWORK=fd00 @@ -220,26 +219,22 @@ cleanup() ip link del veth-rt-1 2>/dev/null || true ip link del veth-rt-2 2>/dev/null || true - # destroy routers rt-* and hosts hs-* - for ns in $(ip netns show | grep -E 'rt-*|hs-*'); do - ip netns del ${ns} || true - done + cleanup_all_ns } # Setup the basic networking for the routers setup_rt_networking() { - local rt=$1 - local nsname=rt-${rt} + local id=$1 + eval local nsname=\${rt_${id}} - ip netns add ${nsname} - ip link set veth-rt-${rt} netns ${nsname} - ip -netns ${nsname} link set veth-rt-${rt} name veth0 + ip link set veth-rt-${id} netns ${nsname} + ip -netns ${nsname} link set veth-rt-${id} name veth0 ip netns exec ${nsname} sysctl -wq net.ipv6.conf.all.accept_dad=0 ip netns exec ${nsname} sysctl -wq net.ipv6.conf.default.accept_dad=0 - ip -netns ${nsname} addr add ${IPv6_RT_NETWORK}::${rt}/64 dev veth0 nodad + ip -netns ${nsname} addr add ${IPv6_RT_NETWORK}::${id}/64 dev veth0 nodad ip -netns ${nsname} link set veth0 up ip -netns ${nsname} link set lo up @@ -248,22 +243,20 @@ setup_rt_networking() setup_hs() { - local hs=$1 - local rt=$2 + local hid=$1 + local rid=$2 local tid=$3 - local hsname=hs-t${tid}-${hs} - local rtname=rt-${rt} + eval local hsname=\${hs_t${tid}_${hid}} + eval local rtname=\${rt_${rid}} local rtveth=veth-t${tid} # set the networking for the host - ip netns add ${hsname} - ip netns exec ${hsname} sysctl -wq net.ipv6.conf.all.accept_dad=0 ip netns exec ${hsname} sysctl -wq net.ipv6.conf.default.accept_dad=0 ip -netns ${hsname} link add veth0 type veth peer name ${rtveth} ip -netns ${hsname} link set ${rtveth} netns ${rtname} - ip -netns ${hsname} addr add ${IPv6_HS_NETWORK}::${hs}/64 dev veth0 nodad + ip -netns ${hsname} addr add ${IPv6_HS_NETWORK}::${hid}/64 dev veth0 nodad ip -netns ${hsname} link set veth0 up ip -netns ${hsname} link set lo up @@ -293,10 +286,8 @@ setup_vpn_config() local rtdst=$4 local tid=$5 - local hssrc_name=hs-t${tid}-${hssrc} - local hsdst_name=hs-t${tid}-${hsdst} - local rtsrc_name=rt-${rtsrc} - local rtdst_name=rt-${rtdst} + eval local rtsrc_name=\${rt_${rtsrc}} + eval local rtdst_name=\${rt_${rtdst}} local rtveth=veth-t${tid} local vpn_sid=${VPN_LOCATOR_SERVICE}:${hssrc}${hsdst}:${tid}::6006 @@ -331,18 +322,21 @@ setup() { ip link add veth-rt-1 type veth peer name veth-rt-2 # setup the networking for router rt-1 and router rt-2 + setup_ns rt_1 rt_2 setup_rt_networking 1 setup_rt_networking 2 # setup two hosts for the tenant 100. # - host hs-1 is directly connected to the router rt-1; # - host hs-2 is directly connected to the router rt-2. + setup_ns hs_t100_1 hs_t100_2 setup_hs 1 1 100 #args: host router tenant setup_hs 2 2 100 # setup two hosts for the tenant 200 # - host hs-3 is directly connected to the router rt-1; # - host hs-4 is directly connected to the router rt-2. + setup_ns hs_t200_3 hs_t200_4 setup_hs 3 1 200 setup_hs 4 2 200 @@ -361,8 +355,9 @@ check_rt_connectivity() { local rtsrc=$1 local rtdst=$2 + eval local nsname=\${rt_${rtsrc}} - ip netns exec rt-${rtsrc} ping -c 1 -W 1 ${IPv6_RT_NETWORK}::${rtdst} \ + ip netns exec ${nsname} ping -c 1 -W 1 ${IPv6_RT_NETWORK}::${rtdst} \ >/dev/null 2>&1 } @@ -380,8 +375,9 @@ check_hs_connectivity() local hssrc=$1 local hsdst=$2 local tid=$3 + eval local nsname=\${hs_t${tid}_${hssrc}} - ip netns exec hs-t${tid}-${hssrc} ping -c 1 -W ${PING_TIMEOUT_SEC} \ + ip netns exec ${nsname} ping -c 1 -W ${PING_TIMEOUT_SEC} \ ${IPv6_HS_NETWORK}::${hsdst} >/dev/null 2>&1 } -- cgit v1.2.3 From 779283b7770f4580afa6691aa7fc6519d60f0e88 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Wed, 13 Dec 2023 14:08:48 +0800 Subject: selftests/net: convert fcnal-test.sh to run it in unique namespace Here is the test result after conversion. There are some failures, but it also exists on my system without this patch. So it's not affectec by this patch and I will check the reason later. ]# time ./fcnal-test.sh /usr/bin/which: no nettest in (/root/.local/bin:/root/bin:/usr/share/Modules/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin) ########################################################################### IPv4 ping ########################################################################### ################################################################# No VRF SYSCTL: net.ipv4.raw_l3mdev_accept=0 TEST: ping out - ns-B IP [ OK ] TEST: ping out, device bind - ns-B IP [ OK ] TEST: ping out, address bind - ns-B IP [ OK ] ... ################################################################# SNAT on VRF TEST: IPv4 TCP connection over VRF with SNAT [ OK ] TEST: IPv6 TCP connection over VRF with SNAT [ OK ] Tests passed: 893 Tests failed: 21 real 52m48.178s user 0m34.158s sys 1m42.976s BTW, this test needs a really long time. So expand the timeout to 1h. Acked-by: David Ahern Signed-off-by: Hangbin Liu Link: https://lore.kernel.org/r/20231213060856.4030084-6-liuhangbin@gmail.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/fcnal-test.sh | 30 +++++++++++++----------------- tools/testing/selftests/net/settings | 2 +- 2 files changed, 14 insertions(+), 18 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/net/fcnal-test.sh b/tools/testing/selftests/net/fcnal-test.sh index d32a14ba069a..0d4f252427e2 100755 --- a/tools/testing/selftests/net/fcnal-test.sh +++ b/tools/testing/selftests/net/fcnal-test.sh @@ -37,9 +37,7 @@ # # server / client nomenclature relative to ns-A -# Kselftest framework requirement - SKIP code is 4. -ksft_skip=4 - +source lib.sh VERBOSE=0 NSA_DEV=eth1 @@ -82,14 +80,6 @@ MCAST=ff02::1 NSA_LINKIP6= NSB_LINKIP6= -NSA=ns-A -NSB=ns-B -NSC=ns-C - -NSA_CMD="ip netns exec ${NSA}" -NSB_CMD="ip netns exec ${NSB}" -NSC_CMD="ip netns exec ${NSC}" - which ping6 > /dev/null 2>&1 && ping6=$(which ping6) || ping6=$(which ping) # Check if FIPS mode is enabled @@ -406,9 +396,6 @@ create_ns() local addr=$2 local addr6=$3 - ip netns add ${ns} - - ip -netns ${ns} link set lo up if [ "${addr}" != "-" ]; then ip -netns ${ns} addr add dev lo ${addr} fi @@ -467,13 +454,12 @@ cleanup() ip -netns ${NSA} link del dev ${NSA_DEV} ip netns pids ${NSA} | xargs kill 2>/dev/null - ip netns del ${NSA} + cleanup_ns ${NSA} fi ip netns pids ${NSB} | xargs kill 2>/dev/null - ip netns del ${NSB} ip netns pids ${NSC} | xargs kill 2>/dev/null - ip netns del ${NSC} >/dev/null 2>&1 + cleanup_ns ${NSB} ${NSC} } cleanup_vrf_dup() @@ -487,6 +473,8 @@ setup_vrf_dup() { # some VRF tests use ns-C which has the same config as # ns-B but for a device NOT in the VRF + setup_ns NSC + NSC_CMD="ip netns exec ${NSC}" create_ns ${NSC} "-" "-" connect_ns ${NSA} ${NSA_DEV2} ${NSA_IP}/24 ${NSA_IP6}/64 \ ${NSC} ${NSC_DEV} ${NSB_IP}/24 ${NSB_IP6}/64 @@ -503,6 +491,10 @@ setup() log_debug "Configuring network namespaces" set -e + setup_ns NSA NSB + NSA_CMD="ip netns exec ${NSA}" + NSB_CMD="ip netns exec ${NSB}" + create_ns ${NSA} ${NSA_LO_IP}/32 ${NSA_LO_IP6}/128 create_ns ${NSB} ${NSB_LO_IP}/32 ${NSB_LO_IP6}/128 connect_ns ${NSA} ${NSA_DEV} ${NSA_IP}/24 ${NSA_IP6}/64 \ @@ -545,6 +537,10 @@ setup_lla_only() log_debug "Configuring network namespaces" set -e + setup_ns NSA NSB NSC + NSA_CMD="ip netns exec ${NSA}" + NSB_CMD="ip netns exec ${NSB}" + NSC_CMD="ip netns exec ${NSC}" create_ns ${NSA} "-" "-" create_ns ${NSB} "-" "-" create_ns ${NSC} "-" "-" diff --git a/tools/testing/selftests/net/settings b/tools/testing/selftests/net/settings index dfc27cdc6c05..ed8418e8217a 100644 --- a/tools/testing/selftests/net/settings +++ b/tools/testing/selftests/net/settings @@ -1 +1 @@ -timeout=1500 +timeout=3600 -- cgit v1.2.3 From a33e9da3470499e9ff476138f271fb52d6bfe767 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Wed, 13 Dec 2023 14:08:49 +0800 Subject: selftests/net: fix grep checking for fib_nexthop_multiprefix When running fib_nexthop_multiprefix test I saw all IPv6 test failed. e.g. ]# ./fib_nexthop_multiprefix.sh TEST: IPv4: host 0 to host 1, mtu 1300 [ OK ] TEST: IPv6: host 0 to host 1, mtu 1300 [FAIL] With -v it shows COMMAND: ip netns exec h0 /usr/sbin/ping6 -s 1350 -c5 -w5 2001:db8:101::1 PING 2001:db8:101::1(2001:db8:101::1) 1350 data bytes From 2001:db8:100::64 icmp_seq=1 Packet too big: mtu=1300 --- 2001:db8:101::1 ping statistics --- 1 packets transmitted, 0 received, +1 errors, 100% packet loss, time 0ms Route get 2001:db8:101::1 via 2001:db8:100::64 dev eth0 src 2001:db8:100::1 metric 1024 expires 599sec mtu 1300 pref medium Searching for: 2001:db8:101::1 from :: via 2001:db8:100::64 dev eth0 src 2001:db8:100::1 .* mtu 1300 The reason is when CONFIG_IPV6_SUBTREES is not enabled, rt6_fill_node() will not put RTA_SRC info. After fix: ]# ./fib_nexthop_multiprefix.sh TEST: IPv4: host 0 to host 1, mtu 1300 [ OK ] TEST: IPv6: host 0 to host 1, mtu 1300 [ OK ] Fixes: 735ab2f65dce ("selftests: Add test with multiple prefixes using single nexthop") Signed-off-by: Hangbin Liu Link: https://lore.kernel.org/r/20231213060856.4030084-7-liuhangbin@gmail.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/fib_nexthop_multiprefix.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/net/fib_nexthop_multiprefix.sh b/tools/testing/selftests/net/fib_nexthop_multiprefix.sh index 51df5e305855..b52d59547fc5 100755 --- a/tools/testing/selftests/net/fib_nexthop_multiprefix.sh +++ b/tools/testing/selftests/net/fib_nexthop_multiprefix.sh @@ -209,12 +209,12 @@ validate_v6_exception() echo "Route get" ip -netns h0 -6 ro get ${dst} echo "Searching for:" - echo " ${dst} from :: via ${r1} dev eth0 src ${h0} .* mtu ${mtu}" + echo " ${dst}.* via ${r1} dev eth0 src ${h0} .* mtu ${mtu}" echo fi ip -netns h0 -6 ro get ${dst} | \ - grep -q "${dst} from :: via ${r1} dev eth0 src ${h0} .* mtu ${mtu}" + grep -q "${dst}.* via ${r1} dev eth0 src ${h0} .* mtu ${mtu}" rc=$? log_test $rc 0 "IPv6: host 0 to host ${i}, mtu ${mtu}" -- cgit v1.2.3 From 5ae89fe43a4e889249fa700d0da5aa5d3e64e972 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Wed, 13 Dec 2023 14:08:50 +0800 Subject: selftests/net: convert fib_nexthop_multiprefix to run it in unique namespace Here is the test result after conversion. ]# ./fib_nexthop_multiprefix.sh TEST: IPv4: host 0 to host 1, mtu 1300 [ OK ] TEST: IPv6: host 0 to host 1, mtu 1300 [ OK ] TEST: IPv4: host 0 to host 2, mtu 1350 [ OK ] TEST: IPv6: host 0 to host 2, mtu 1350 [ OK ] TEST: IPv4: host 0 to host 3, mtu 1400 [ OK ] TEST: IPv6: host 0 to host 3, mtu 1400 [ OK ] TEST: IPv4: host 0 to host 1, mtu 1300 [ OK ] TEST: IPv6: host 0 to host 1, mtu 1300 [ OK ] TEST: IPv4: host 0 to host 2, mtu 1350 [ OK ] TEST: IPv6: host 0 to host 2, mtu 1350 [ OK ] TEST: IPv4: host 0 to host 3, mtu 1400 [ OK ] TEST: IPv6: host 0 to host 3, mtu 1400 [ OK ] Acked-by: David Ahern Signed-off-by: Hangbin Liu Link: https://lore.kernel.org/r/20231213060856.4030084-8-liuhangbin@gmail.com Signed-off-by: Jakub Kicinski --- .../selftests/net/fib_nexthop_multiprefix.sh | 98 +++++++++++----------- 1 file changed, 48 insertions(+), 50 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/net/fib_nexthop_multiprefix.sh b/tools/testing/selftests/net/fib_nexthop_multiprefix.sh index b52d59547fc5..e85248609af4 100755 --- a/tools/testing/selftests/net/fib_nexthop_multiprefix.sh +++ b/tools/testing/selftests/net/fib_nexthop_multiprefix.sh @@ -12,6 +12,7 @@ # # routing in h0 to hN is done with nexthop objects. +source lib.sh PAUSE_ON_FAIL=no VERBOSE=0 @@ -72,12 +73,6 @@ create_ns() { local ns=${1} - ip netns del ${ns} 2>/dev/null - - ip netns add ${ns} - ip -netns ${ns} addr add 127.0.0.1/8 dev lo - ip -netns ${ns} link set lo up - ip netns exec ${ns} sysctl -q -w net.ipv6.conf.all.keep_addr_on_down=1 case ${ns} in h*) @@ -97,7 +92,13 @@ setup() #set -e - for ns in h0 r1 h1 h2 h3 + setup_ns h0 r1 h1 h2 h3 + h[0]=$h0 + h[1]=$h1 + h[2]=$h2 + h[3]=$h3 + r[1]=$r1 + for ns in ${h[0]} ${r[1]} ${h[1]} ${h[2]} ${h[3]} do create_ns ${ns} done @@ -108,35 +109,35 @@ setup() for i in 0 1 2 3 do - ip -netns h${i} li add eth0 type veth peer name r1h${i} - ip -netns h${i} li set eth0 up - ip -netns h${i} li set r1h${i} netns r1 name eth${i} up - - ip -netns h${i} addr add dev eth0 172.16.10${i}.1/24 - ip -netns h${i} -6 addr add dev eth0 2001:db8:10${i}::1/64 - ip -netns r1 addr add dev eth${i} 172.16.10${i}.254/24 - ip -netns r1 -6 addr add dev eth${i} 2001:db8:10${i}::64/64 + ip -netns ${h[$i]} li add eth0 type veth peer name r1h${i} + ip -netns ${h[$i]} li set eth0 up + ip -netns ${h[$i]} li set r1h${i} netns ${r[1]} name eth${i} up + + ip -netns ${h[$i]} addr add dev eth0 172.16.10${i}.1/24 + ip -netns ${h[$i]} -6 addr add dev eth0 2001:db8:10${i}::1/64 + ip -netns ${r[1]} addr add dev eth${i} 172.16.10${i}.254/24 + ip -netns ${r[1]} -6 addr add dev eth${i} 2001:db8:10${i}::64/64 done - ip -netns h0 nexthop add id 4 via 172.16.100.254 dev eth0 - ip -netns h0 nexthop add id 6 via 2001:db8:100::64 dev eth0 + ip -netns ${h[0]} nexthop add id 4 via 172.16.100.254 dev eth0 + ip -netns ${h[0]} nexthop add id 6 via 2001:db8:100::64 dev eth0 - # routing from h0 to h1-h3 and back + # routing from ${h[0]} to h1-h3 and back for i in 1 2 3 do - ip -netns h0 ro add 172.16.10${i}.0/24 nhid 4 - ip -netns h${i} ro add 172.16.100.0/24 via 172.16.10${i}.254 + ip -netns ${h[0]} ro add 172.16.10${i}.0/24 nhid 4 + ip -netns ${h[$i]} ro add 172.16.100.0/24 via 172.16.10${i}.254 - ip -netns h0 -6 ro add 2001:db8:10${i}::/64 nhid 6 - ip -netns h${i} -6 ro add 2001:db8:100::/64 via 2001:db8:10${i}::64 + ip -netns ${h[0]} -6 ro add 2001:db8:10${i}::/64 nhid 6 + ip -netns ${h[$i]} -6 ro add 2001:db8:100::/64 via 2001:db8:10${i}::64 done if [ "$VERBOSE" = "1" ]; then echo echo "host 1 config" - ip -netns h0 li sh - ip -netns h0 ro sh - ip -netns h0 -6 ro sh + ip -netns ${h[0]} li sh + ip -netns ${h[0]} ro sh + ip -netns ${h[0]} -6 ro sh fi #set +e @@ -144,10 +145,7 @@ setup() cleanup() { - for n in h0 r1 h1 h2 h3 - do - ip netns del ${n} 2>/dev/null - done + cleanup_all_ns } change_mtu() @@ -156,7 +154,7 @@ change_mtu() local mtu=$2 run_cmd ip -netns h${hostid} li set eth0 mtu ${mtu} - run_cmd ip -netns r1 li set eth${hostid} mtu ${mtu} + run_cmd ip -netns ${r1} li set eth${hostid} mtu ${mtu} } ################################################################################ @@ -168,23 +166,23 @@ validate_v4_exception() local mtu=$2 local ping_sz=$3 local dst="172.16.10${i}.1" - local h0=172.16.100.1 - local r1=172.16.100.254 + local h0_ip=172.16.100.1 + local r1_ip=172.16.100.254 local rc if [ ${ping_sz} != "0" ]; then - run_cmd ip netns exec h0 ping -s ${ping_sz} -c5 -w5 ${dst} + run_cmd ip netns exec ${h0} ping -s ${ping_sz} -c5 -w5 ${dst} fi if [ "$VERBOSE" = "1" ]; then echo "Route get" - ip -netns h0 ro get ${dst} + ip -netns ${h0} ro get ${dst} echo "Searching for:" echo " cache .* mtu ${mtu}" echo fi - ip -netns h0 ro get ${dst} | \ + ip -netns ${h0} ro get ${dst} | \ grep -q "cache .* mtu ${mtu}" rc=$? @@ -197,24 +195,24 @@ validate_v6_exception() local mtu=$2 local ping_sz=$3 local dst="2001:db8:10${i}::1" - local h0=2001:db8:100::1 - local r1=2001:db8:100::64 + local h0_ip=2001:db8:100::1 + local r1_ip=2001:db8:100::64 local rc if [ ${ping_sz} != "0" ]; then - run_cmd ip netns exec h0 ${ping6} -s ${ping_sz} -c5 -w5 ${dst} + run_cmd ip netns exec ${h0} ${ping6} -s ${ping_sz} -c5 -w5 ${dst} fi if [ "$VERBOSE" = "1" ]; then echo "Route get" - ip -netns h0 -6 ro get ${dst} + ip -netns ${h0} -6 ro get ${dst} echo "Searching for:" - echo " ${dst}.* via ${r1} dev eth0 src ${h0} .* mtu ${mtu}" + echo " ${dst}.* via ${r1_ip} dev eth0 src ${h0_ip} .* mtu ${mtu}" echo fi - ip -netns h0 -6 ro get ${dst} | \ - grep -q "${dst}.* via ${r1} dev eth0 src ${h0} .* mtu ${mtu}" + ip -netns ${h0} -6 ro get ${dst} | \ + grep -q "${dst}.* via ${r1_ip} dev eth0 src ${h0_ip} .* mtu ${mtu}" rc=$? log_test $rc 0 "IPv6: host 0 to host ${i}, mtu ${mtu}" @@ -242,11 +240,11 @@ for i in 1 2 3 do # generate a cached route per-cpu for c in ${cpus}; do - run_cmd taskset -c ${c} ip netns exec h0 ping -c1 -w1 172.16.10${i}.1 - [ $? -ne 0 ] && printf "\nERROR: ping to h${i} failed\n" && ret=1 + run_cmd taskset -c ${c} ip netns exec ${h0} ping -c1 -w1 172.16.10${i}.1 + [ $? -ne 0 ] && printf "\nERROR: ping to ${h[$i]} failed\n" && ret=1 - run_cmd taskset -c ${c} ip netns exec h0 ${ping6} -c1 -w1 2001:db8:10${i}::1 - [ $? -ne 0 ] && printf "\nERROR: ping6 to h${i} failed\n" && ret=1 + run_cmd taskset -c ${c} ip netns exec ${h0} ${ping6} -c1 -w1 2001:db8:10${i}::1 + [ $? -ne 0 ] && printf "\nERROR: ping6 to ${h[$i]} failed\n" && ret=1 [ $ret -ne 0 ] && break done @@ -282,11 +280,11 @@ if [ $ret -eq 0 ]; then validate_v6_exception 3 1400 0 # targeted deletes to trigger cleanup paths in kernel - ip -netns h0 ro del 172.16.102.0/24 nhid 4 - ip -netns h0 -6 ro del 2001:db8:102::/64 nhid 6 + ip -netns ${h0} ro del 172.16.102.0/24 nhid 4 + ip -netns ${h0} -6 ro del 2001:db8:102::/64 nhid 6 - ip -netns h0 nexthop del id 4 - ip -netns h0 nexthop del id 6 + ip -netns ${h0} nexthop del id 4 + ip -netns ${h0} nexthop del id 6 fi cleanup -- cgit v1.2.3 From d2168ea792345938c4f53c22126066fbe7a6001c Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Wed, 13 Dec 2023 14:08:51 +0800 Subject: selftests/net: convert fib_nexthop_nongw.sh to run it in unique namespace Here is the test result after conversion. ]# ./fib_nexthop_nongw.sh TEST: nexthop: get route with nexthop without gw [ OK ] TEST: nexthop: ping through nexthop without gw [ OK ] Acked-by: David Ahern Signed-off-by: Hangbin Liu Link: https://lore.kernel.org/r/20231213060856.4030084-9-liuhangbin@gmail.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/fib_nexthop_nongw.sh | 34 +++++++++++------------- 1 file changed, 15 insertions(+), 19 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/net/fib_nexthop_nongw.sh b/tools/testing/selftests/net/fib_nexthop_nongw.sh index b7b928b38ce4..1ccf56f10171 100755 --- a/tools/testing/selftests/net/fib_nexthop_nongw.sh +++ b/tools/testing/selftests/net/fib_nexthop_nongw.sh @@ -8,6 +8,7 @@ # veth0 <---|---> veth1 # Validate source address selection for route without gateway +source lib.sh PAUSE_ON_FAIL=no VERBOSE=0 ret=0 @@ -64,35 +65,31 @@ run_cmd() # config setup() { - ip netns add h1 - ip -n h1 link set lo up - ip netns add h2 - ip -n h2 link set lo up + setup_ns h1 h2 # Add a fake eth0 to support an ip address - ip -n h1 link add name eth0 type dummy - ip -n h1 link set eth0 up - ip -n h1 address add 192.168.0.1/24 dev eth0 + ip -n $h1 link add name eth0 type dummy + ip -n $h1 link set eth0 up + ip -n $h1 address add 192.168.0.1/24 dev eth0 # Configure veths (same @mac, arp off) - ip -n h1 link add name veth0 type veth peer name veth1 netns h2 - ip -n h1 link set veth0 up + ip -n $h1 link add name veth0 type veth peer name veth1 netns $h2 + ip -n $h1 link set veth0 up - ip -n h2 link set veth1 up + ip -n $h2 link set veth1 up # Configure @IP in the peer netns - ip -n h2 address add 192.168.1.1/32 dev veth1 - ip -n h2 route add default dev veth1 + ip -n $h2 address add 192.168.1.1/32 dev veth1 + ip -n $h2 route add default dev veth1 # Add a nexthop without @gw and use it in a route - ip -n h1 nexthop add id 1 dev veth0 - ip -n h1 route add 192.168.1.1 nhid 1 + ip -n $h1 nexthop add id 1 dev veth0 + ip -n $h1 route add 192.168.1.1 nhid 1 } cleanup() { - ip netns del h1 2>/dev/null - ip netns del h2 2>/dev/null + cleanup_ns $h1 $h2 } trap cleanup EXIT @@ -108,12 +105,11 @@ do esac done -cleanup setup -run_cmd ip -netns h1 route get 192.168.1.1 +run_cmd ip -netns $h1 route get 192.168.1.1 log_test $? 0 "nexthop: get route with nexthop without gw" -run_cmd ip netns exec h1 ping -c1 192.168.1.1 +run_cmd ip netns exec $h1 ping -c1 192.168.1.1 log_test $? 0 "nexthop: ping through nexthop without gw" exit $ret -- cgit v1.2.3 From 39333e31672cfc35430d1f5e411188553936b64d Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Wed, 13 Dec 2023 14:08:52 +0800 Subject: selftests/net: convert fib_nexthops.sh to run it in unique namespace Here is the test result after conversion. ]# ./fib_nexthops.sh Basic functional tests ---------------------- TEST: List with nothing defined [ OK ] TEST: Nexthop get on non-existent id [ OK ] ... TEST: IPv6 resilient nexthop group torture test [ OK ] Tests passed: 234 Tests failed: 0 Acked-by: David Ahern Signed-off-by: Hangbin Liu Reviewed-by: Ido Schimmel Tested-by: Ido Schimmel Link: https://lore.kernel.org/r/20231213060856.4030084-10-liuhangbin@gmail.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/fib_nexthops.sh | 142 ++++++++++++++-------------- 1 file changed, 69 insertions(+), 73 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/net/fib_nexthops.sh b/tools/testing/selftests/net/fib_nexthops.sh index a6f2c0b9555d..d5a281aadbac 100755 --- a/tools/testing/selftests/net/fib_nexthops.sh +++ b/tools/testing/selftests/net/fib_nexthops.sh @@ -14,6 +14,7 @@ # objects. Device reference counts and network namespace cleanup tested # by use of network namespace for peer. +source lib.sh ret=0 # Kselftest framework requirement - SKIP code is 4. ksft_skip=4 @@ -148,13 +149,7 @@ create_ns() { local n=${1} - ip netns del ${n} 2>/dev/null - set -e - ip netns add ${n} - ip netns set ${n} $((nsid++)) - ip -netns ${n} addr add 127.0.0.1/8 dev lo - ip -netns ${n} link set lo up ip netns exec ${n} sysctl -qw net.ipv4.ip_forward=1 ip netns exec ${n} sysctl -qw net.ipv4.fib_multipath_use_neigh=1 @@ -173,12 +168,13 @@ setup() { cleanup - create_ns me - create_ns peer - create_ns remote + setup_ns me peer remote + create_ns $me + create_ns $peer + create_ns $remote - IP="ip -netns me" - BRIDGE="bridge -netns me" + IP="ip -netns $me" + BRIDGE="bridge -netns $me" set -e $IP li add veth1 type veth peer name veth2 $IP li set veth1 up @@ -190,24 +186,24 @@ setup() $IP addr add 172.16.2.1/24 dev veth3 $IP -6 addr add 2001:db8:92::1/64 dev veth3 nodad - $IP li set veth2 netns peer up - ip -netns peer addr add 172.16.1.2/24 dev veth2 - ip -netns peer -6 addr add 2001:db8:91::2/64 dev veth2 nodad + $IP li set veth2 netns $peer up + ip -netns $peer addr add 172.16.1.2/24 dev veth2 + ip -netns $peer -6 addr add 2001:db8:91::2/64 dev veth2 nodad - $IP li set veth4 netns peer up - ip -netns peer addr add 172.16.2.2/24 dev veth4 - ip -netns peer -6 addr add 2001:db8:92::2/64 dev veth4 nodad + $IP li set veth4 netns $peer up + ip -netns $peer addr add 172.16.2.2/24 dev veth4 + ip -netns $peer -6 addr add 2001:db8:92::2/64 dev veth4 nodad - ip -netns remote li add veth5 type veth peer name veth6 - ip -netns remote li set veth5 up - ip -netns remote addr add dev veth5 172.16.101.1/24 - ip -netns remote -6 addr add dev veth5 2001:db8:101::1/64 nodad - ip -netns remote ro add 172.16.0.0/22 via 172.16.101.2 - ip -netns remote -6 ro add 2001:db8:90::/40 via 2001:db8:101::2 + ip -netns $remote li add veth5 type veth peer name veth6 + ip -netns $remote li set veth5 up + ip -netns $remote addr add dev veth5 172.16.101.1/24 + ip -netns $remote -6 addr add dev veth5 2001:db8:101::1/64 nodad + ip -netns $remote ro add 172.16.0.0/22 via 172.16.101.2 + ip -netns $remote -6 ro add 2001:db8:90::/40 via 2001:db8:101::2 - ip -netns remote li set veth6 netns peer up - ip -netns peer addr add dev veth6 172.16.101.2/24 - ip -netns peer -6 addr add dev veth6 2001:db8:101::2/64 nodad + ip -netns $remote li set veth6 netns $peer up + ip -netns $peer addr add dev veth6 172.16.101.2/24 + ip -netns $peer -6 addr add dev veth6 2001:db8:101::2/64 nodad set +e } @@ -215,7 +211,7 @@ cleanup() { local ns - for ns in me peer remote; do + for ns in $me $peer $remote; do ip netns del ${ns} 2>/dev/null done } @@ -779,7 +775,7 @@ ipv6_grp_refs() run_cmd "$IP route add 2001:db8:101::1/128 nhid 102" # create per-cpu dsts through nh 100 - run_cmd "ip netns exec me mausezahn -6 veth1.10 -B 2001:db8:101::1 -A 2001:db8:91::1 -c 5 -t tcp "dp=1-1023, flags=syn" >/dev/null 2>&1" + run_cmd "ip netns exec $me mausezahn -6 veth1.10 -B 2001:db8:101::1 -A 2001:db8:91::1 -c 5 -t tcp "dp=1-1023, flags=syn" >/dev/null 2>&1" # remove nh 100 from the group to delete the route potentially leaving # a stale per-cpu dst which holds a reference to the nexthop's net @@ -805,7 +801,7 @@ ipv6_grp_refs() # if a reference was lost this command will hang because the net device # cannot be removed - timeout -s KILL 5 ip netns exec me ip link del veth1.10 >/dev/null 2>&1 + timeout -s KILL 5 ip netns exec $me ip link del veth1.10 >/dev/null 2>&1 # we can't cleanup if the command is hung trying to delete the netdev if [ $? -eq 137 ]; then @@ -1012,13 +1008,13 @@ ipv6_fcnal_runtime() log_test $? 0 "Route delete" run_cmd "$IP ro add 2001:db8:101::1/128 nhid 81" - run_cmd "ip netns exec me ping -c1 -w$PING_TIMEOUT 2001:db8:101::1" + run_cmd "ip netns exec $me ping -c1 -w$PING_TIMEOUT 2001:db8:101::1" log_test $? 0 "Ping with nexthop" run_cmd "$IP nexthop add id 82 via 2001:db8:92::2 dev veth3" run_cmd "$IP nexthop add id 122 group 81/82" run_cmd "$IP ro replace 2001:db8:101::1/128 nhid 122" - run_cmd "ip netns exec me ping -c1 -w$PING_TIMEOUT 2001:db8:101::1" + run_cmd "ip netns exec $me ping -c1 -w$PING_TIMEOUT 2001:db8:101::1" log_test $? 0 "Ping - multipath" # @@ -1026,26 +1022,26 @@ ipv6_fcnal_runtime() # run_cmd "$IP -6 nexthop add id 83 blackhole" run_cmd "$IP ro replace 2001:db8:101::1/128 nhid 83" - run_cmd "ip netns exec me ping -c1 -w$PING_TIMEOUT 2001:db8:101::1" + run_cmd "ip netns exec $me ping -c1 -w$PING_TIMEOUT 2001:db8:101::1" log_test $? 2 "Ping - blackhole" run_cmd "$IP nexthop replace id 83 via 2001:db8:91::2 dev veth1" - run_cmd "ip netns exec me ping -c1 -w$PING_TIMEOUT 2001:db8:101::1" + run_cmd "ip netns exec $me ping -c1 -w$PING_TIMEOUT 2001:db8:101::1" log_test $? 0 "Ping - blackhole replaced with gateway" run_cmd "$IP -6 nexthop replace id 83 blackhole" - run_cmd "ip netns exec me ping -c1 -w$PING_TIMEOUT 2001:db8:101::1" + run_cmd "ip netns exec $me ping -c1 -w$PING_TIMEOUT 2001:db8:101::1" log_test $? 2 "Ping - gateway replaced by blackhole" run_cmd "$IP ro replace 2001:db8:101::1/128 nhid 122" - run_cmd "ip netns exec me ping -c1 -w$PING_TIMEOUT 2001:db8:101::1" + run_cmd "ip netns exec $me ping -c1 -w$PING_TIMEOUT 2001:db8:101::1" if [ $? -eq 0 ]; then run_cmd "$IP nexthop replace id 122 group 83" - run_cmd "ip netns exec me ping -c1 -w$PING_TIMEOUT 2001:db8:101::1" + run_cmd "ip netns exec $me ping -c1 -w$PING_TIMEOUT 2001:db8:101::1" log_test $? 2 "Ping - group with blackhole" run_cmd "$IP nexthop replace id 122 group 81/82" - run_cmd "ip netns exec me ping -c1 -w$PING_TIMEOUT 2001:db8:101::1" + run_cmd "ip netns exec $me ping -c1 -w$PING_TIMEOUT 2001:db8:101::1" log_test $? 0 "Ping - group blackhole replaced with gateways" else log_test 2 0 "Ping - multipath failed" @@ -1128,15 +1124,15 @@ ipv6_fcnal_runtime() # rpfilter and default route $IP nexthop flush >/dev/null 2>&1 - run_cmd "ip netns exec me ip6tables -t mangle -I PREROUTING 1 -m rpfilter --invert -j DROP" + run_cmd "ip netns exec $me ip6tables -t mangle -I PREROUTING 1 -m rpfilter --invert -j DROP" run_cmd "$IP nexthop add id 91 via 2001:db8:91::2 dev veth1" run_cmd "$IP nexthop add id 92 via 2001:db8:92::2 dev veth3" run_cmd "$IP nexthop add id 93 group 91/92" run_cmd "$IP -6 ro add default nhid 91" - run_cmd "ip netns exec me ping -c1 -w$PING_TIMEOUT 2001:db8:101::1" + run_cmd "ip netns exec $me ping -c1 -w$PING_TIMEOUT 2001:db8:101::1" log_test $? 0 "Nexthop with default route and rpfilter" run_cmd "$IP -6 ro replace default nhid 93" - run_cmd "ip netns exec me ping -c1 -w$PING_TIMEOUT 2001:db8:101::1" + run_cmd "ip netns exec $me ping -c1 -w$PING_TIMEOUT 2001:db8:101::1" log_test $? 0 "Nexthop with multipath default route and rpfilter" # TO-DO: @@ -1216,11 +1212,11 @@ ipv6_torture() pid1=$! ipv6_grp_replace_loop & pid2=$! - ip netns exec me ping -f 2001:db8:101::1 >/dev/null 2>&1 & + ip netns exec $me ping -f 2001:db8:101::1 >/dev/null 2>&1 & pid3=$! - ip netns exec me ping -f 2001:db8:101::2 >/dev/null 2>&1 & + ip netns exec $me ping -f 2001:db8:101::2 >/dev/null 2>&1 & pid4=$! - ip netns exec me mausezahn -6 veth1 -B 2001:db8:101::2 -A 2001:db8:91::1 -c 0 -t tcp "dp=1-1023, flags=syn" >/dev/null 2>&1 & + ip netns exec $me mausezahn -6 veth1 -B 2001:db8:101::2 -A 2001:db8:91::1 -c 0 -t tcp "dp=1-1023, flags=syn" >/dev/null 2>&1 & pid5=$! sleep 300 @@ -1270,11 +1266,11 @@ ipv6_res_torture() pid1=$! ipv6_res_grp_replace_loop & pid2=$! - ip netns exec me ping -f 2001:db8:101::1 >/dev/null 2>&1 & + ip netns exec $me ping -f 2001:db8:101::1 >/dev/null 2>&1 & pid3=$! - ip netns exec me ping -f 2001:db8:101::2 >/dev/null 2>&1 & + ip netns exec $me ping -f 2001:db8:101::2 >/dev/null 2>&1 & pid4=$! - ip netns exec me mausezahn -6 veth1 \ + ip netns exec $me mausezahn -6 veth1 \ -B 2001:db8:101::2 -A 2001:db8:91::1 -c 0 \ -t tcp "dp=1-1023, flags=syn" >/dev/null 2>&1 & pid5=$! @@ -1544,7 +1540,7 @@ ipv4_withv6_fcnal() local lladdr set -e - lladdr=$(get_linklocal veth2 peer) + lladdr=$(get_linklocal veth2 $peer) run_cmd "$IP nexthop add id 11 via ${lladdr} dev veth1" set +e run_cmd "$IP ro add 172.16.101.1/32 nhid 11" @@ -1606,13 +1602,13 @@ ipv4_fcnal_runtime() # run_cmd "$IP nexthop replace id 21 via 172.16.1.2 dev veth1" run_cmd "$IP ro replace 172.16.101.1/32 nhid 21" - run_cmd "ip netns exec me ping -c1 -w$PING_TIMEOUT 172.16.101.1" + run_cmd "ip netns exec $me ping -c1 -w$PING_TIMEOUT 172.16.101.1" log_test $? 0 "Basic ping" run_cmd "$IP nexthop replace id 22 via 172.16.2.2 dev veth3" run_cmd "$IP nexthop add id 122 group 21/22" run_cmd "$IP ro replace 172.16.101.1/32 nhid 122" - run_cmd "ip netns exec me ping -c1 -w$PING_TIMEOUT 172.16.101.1" + run_cmd "ip netns exec $me ping -c1 -w$PING_TIMEOUT 172.16.101.1" log_test $? 0 "Ping - multipath" run_cmd "$IP ro delete 172.16.101.1/32 nhid 122" @@ -1623,7 +1619,7 @@ ipv4_fcnal_runtime() run_cmd "$IP nexthop add id 501 via 172.16.1.2 dev veth1" run_cmd "$IP ro add default nhid 501" run_cmd "$IP ro add default via 172.16.1.3 dev veth1 metric 20" - run_cmd "ip netns exec me ping -c1 -w$PING_TIMEOUT 172.16.101.1" + run_cmd "ip netns exec $me ping -c1 -w$PING_TIMEOUT 172.16.101.1" log_test $? 0 "Ping - multiple default routes, nh first" # flip the order @@ -1632,7 +1628,7 @@ ipv4_fcnal_runtime() run_cmd "$IP ro add default via 172.16.1.2 dev veth1 metric 20" run_cmd "$IP nexthop replace id 501 via 172.16.1.3 dev veth1" run_cmd "$IP ro add default nhid 501 metric 20" - run_cmd "ip netns exec me ping -c1 -w$PING_TIMEOUT 172.16.101.1" + run_cmd "ip netns exec $me ping -c1 -w$PING_TIMEOUT 172.16.101.1" log_test $? 0 "Ping - multiple default routes, nh second" run_cmd "$IP nexthop delete nhid 501" @@ -1643,26 +1639,26 @@ ipv4_fcnal_runtime() # run_cmd "$IP nexthop add id 23 blackhole" run_cmd "$IP ro replace 172.16.101.1/32 nhid 23" - run_cmd "ip netns exec me ping -c1 -w$PING_TIMEOUT 172.16.101.1" + run_cmd "ip netns exec $me ping -c1 -w$PING_TIMEOUT 172.16.101.1" log_test $? 2 "Ping - blackhole" run_cmd "$IP nexthop replace id 23 via 172.16.1.2 dev veth1" - run_cmd "ip netns exec me ping -c1 -w$PING_TIMEOUT 172.16.101.1" + run_cmd "ip netns exec $me ping -c1 -w$PING_TIMEOUT 172.16.101.1" log_test $? 0 "Ping - blackhole replaced with gateway" run_cmd "$IP nexthop replace id 23 blackhole" - run_cmd "ip netns exec me ping -c1 -w$PING_TIMEOUT 172.16.101.1" + run_cmd "ip netns exec $me ping -c1 -w$PING_TIMEOUT 172.16.101.1" log_test $? 2 "Ping - gateway replaced by blackhole" run_cmd "$IP ro replace 172.16.101.1/32 nhid 122" - run_cmd "ip netns exec me ping -c1 -w$PING_TIMEOUT 172.16.101.1" + run_cmd "ip netns exec $me ping -c1 -w$PING_TIMEOUT 172.16.101.1" if [ $? -eq 0 ]; then run_cmd "$IP nexthop replace id 122 group 23" - run_cmd "ip netns exec me ping -c1 -w$PING_TIMEOUT 172.16.101.1" + run_cmd "ip netns exec $me ping -c1 -w$PING_TIMEOUT 172.16.101.1" log_test $? 2 "Ping - group with blackhole" run_cmd "$IP nexthop replace id 122 group 21/22" - run_cmd "ip netns exec me ping -c1 -w$PING_TIMEOUT 172.16.101.1" + run_cmd "ip netns exec $me ping -c1 -w$PING_TIMEOUT 172.16.101.1" log_test $? 0 "Ping - group blackhole replaced with gateways" else log_test 2 0 "Ping - multipath failed" @@ -1685,11 +1681,11 @@ ipv4_fcnal_runtime() # IPv4 with IPv6 # set -e - lladdr=$(get_linklocal veth2 peer) + lladdr=$(get_linklocal veth2 $peer) run_cmd "$IP nexthop add id 24 via ${lladdr} dev veth1" set +e run_cmd "$IP ro replace 172.16.101.1/32 nhid 24" - run_cmd "ip netns exec me ping -c1 -w$PING_TIMEOUT 172.16.101.1" + run_cmd "ip netns exec $me ping -c1 -w$PING_TIMEOUT 172.16.101.1" log_test $? 0 "IPv6 nexthop with IPv4 route" $IP neigh sh | grep -q "${lladdr} dev veth1" @@ -1713,11 +1709,11 @@ ipv4_fcnal_runtime() check_route "172.16.101.1" "172.16.101.1 nhid 101 nexthop via inet6 ${lladdr} dev veth1 weight 1 nexthop via 172.16.1.2 dev veth1 weight 1" - run_cmd "ip netns exec me ping -c1 -w$PING_TIMEOUT 172.16.101.1" + run_cmd "ip netns exec $me ping -c1 -w$PING_TIMEOUT 172.16.101.1" log_test $? 0 "IPv6 nexthop with IPv4 route" run_cmd "$IP ro replace 172.16.101.1/32 via inet6 ${lladdr} dev veth1" - run_cmd "ip netns exec me ping -c1 -w$PING_TIMEOUT 172.16.101.1" + run_cmd "ip netns exec $me ping -c1 -w$PING_TIMEOUT 172.16.101.1" log_test $? 0 "IPv4 route with IPv6 gateway" $IP neigh sh | grep -q "${lladdr} dev veth1" @@ -1734,7 +1730,7 @@ ipv4_fcnal_runtime() run_cmd "$IP ro del 172.16.101.1/32 via inet6 ${lladdr} dev veth1" run_cmd "$IP -4 ro add default via inet6 ${lladdr} dev veth1" - run_cmd "ip netns exec me ping -c1 -w$PING_TIMEOUT 172.16.101.1" + run_cmd "ip netns exec $me ping -c1 -w$PING_TIMEOUT 172.16.101.1" log_test $? 0 "IPv4 default route with IPv6 gateway" # @@ -1785,7 +1781,7 @@ sysctl_nexthop_compat_mode_check() local sysctlname="net.ipv4.nexthop_compat_mode" local lprefix=$1 - IPE="ip netns exec me" + IPE="ip netns exec $me" $IPE sysctl -q $sysctlname 2>&1 >/dev/null if [ $? -ne 0 ]; then @@ -1804,7 +1800,7 @@ sysctl_nexthop_compat_mode_set() local mode=$1 local lprefix=$2 - IPE="ip netns exec me" + IPE="ip netns exec $me" out=$($IPE sysctl -w $sysctlname=$mode) log_test $? 0 "$lprefix set compat mode - $mode" @@ -1988,11 +1984,11 @@ ipv4_torture() pid1=$! ipv4_grp_replace_loop & pid2=$! - ip netns exec me ping -f 172.16.101.1 >/dev/null 2>&1 & + ip netns exec $me ping -f 172.16.101.1 >/dev/null 2>&1 & pid3=$! - ip netns exec me ping -f 172.16.101.2 >/dev/null 2>&1 & + ip netns exec $me ping -f 172.16.101.2 >/dev/null 2>&1 & pid4=$! - ip netns exec me mausezahn veth1 -B 172.16.101.2 -A 172.16.1.1 -c 0 -t tcp "dp=1-1023, flags=syn" >/dev/null 2>&1 & + ip netns exec $me mausezahn veth1 -B 172.16.101.2 -A 172.16.1.1 -c 0 -t tcp "dp=1-1023, flags=syn" >/dev/null 2>&1 & pid5=$! sleep 300 @@ -2042,11 +2038,11 @@ ipv4_res_torture() pid1=$! ipv4_res_grp_replace_loop & pid2=$! - ip netns exec me ping -f 172.16.101.1 >/dev/null 2>&1 & + ip netns exec $me ping -f 172.16.101.1 >/dev/null 2>&1 & pid3=$! - ip netns exec me ping -f 172.16.101.2 >/dev/null 2>&1 & + ip netns exec $me ping -f 172.16.101.2 >/dev/null 2>&1 & pid4=$! - ip netns exec me mausezahn veth1 \ + ip netns exec $me mausezahn veth1 \ -B 172.16.101.2 -A 172.16.1.1 -c 0 \ -t tcp "dp=1-1023, flags=syn" >/dev/null 2>&1 & pid5=$! @@ -2081,10 +2077,10 @@ basic() # create nh with linkdown device - fails $IP li set veth1 up - ip -netns peer li set veth2 down + ip -netns $peer li set veth2 down run_cmd "$IP nexthop add id 1 dev veth1" log_test $? 2 "Nexthop with device that is linkdown" - ip -netns peer li set veth2 up + ip -netns $peer li set veth2 up # device only run_cmd "$IP nexthop add id 1 dev veth1" @@ -2465,7 +2461,7 @@ fi for t in $TESTS do case $t in - none) IP="ip -netns peer"; setup; exit 0;; + none) IP="ip -netns $peer"; setup; exit 0;; *) setup; $t; cleanup;; esac done -- cgit v1.2.3 From 3a06833b2adc0a902f2469ad4ce41ccd64f1f3ab Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Wed, 13 Dec 2023 14:08:53 +0800 Subject: selftests/net: convert fib-onlink-tests.sh to run it in unique namespace Remove PEER_CMD, which is not used in this test Here is the test result after conversion. ]# ./fib-onlink-tests.sh Error: ipv4: FIB table does not exist. Flush terminated Error: ipv6: FIB table does not exist. Flush terminated ######################################## Configuring interfaces ... TEST: Gateway resolves to wrong nexthop device - VRF [ OK ] Tests passed: 38 Tests failed: 0 Acked-by: David Ahern Signed-off-by: Hangbin Liu Link: https://lore.kernel.org/r/20231213060856.4030084-11-liuhangbin@gmail.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/fib-onlink-tests.sh | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/net/fib-onlink-tests.sh b/tools/testing/selftests/net/fib-onlink-tests.sh index c287b90b8af8..ec2d6ceb1f08 100755 --- a/tools/testing/selftests/net/fib-onlink-tests.sh +++ b/tools/testing/selftests/net/fib-onlink-tests.sh @@ -3,6 +3,7 @@ # IPv4 and IPv6 onlink tests +source lib.sh PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no} VERBOSE=0 @@ -74,9 +75,6 @@ TEST_NET4IN6[2]=10.2.1.254 # mcast address MCAST6=ff02::1 - -PEER_NS=bart -PEER_CMD="ip netns exec ${PEER_NS}" VRF=lisa VRF_TABLE=1101 PBR_TABLE=101 @@ -176,8 +174,7 @@ setup() set -e # create namespace - ip netns add ${PEER_NS} - ip -netns ${PEER_NS} li set lo up + setup_ns PEER_NS # add vrf table ip li add ${VRF} type vrf table ${VRF_TABLE} @@ -219,7 +216,7 @@ setup() cleanup() { # make sure we start from a clean slate - ip netns del ${PEER_NS} 2>/dev/null + cleanup_ns ${PEER_NS} 2>/dev/null for n in 1 3 5 7; do ip link del ${NETIFS[p${n}]} 2>/dev/null done -- cgit v1.2.3 From 6c0ee7b4d69d5545d28f81ceabf89a5b7d86d1dd Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Wed, 13 Dec 2023 14:08:54 +0800 Subject: selftests/net: convert fib_rule_tests.sh to run it in unique namespace Here is the test result after conversion. ]# ./fib_rule_tests.sh TEST: rule6 check: oif redirect to table [ OK ] ... TEST: rule4 dsfield tcp connect (dsfield 0x07) [ OK ] Tests passed: 66 Tests failed: 0 Acked-by: David Ahern Signed-off-by: Hangbin Liu Link: https://lore.kernel.org/r/20231213060856.4030084-12-liuhangbin@gmail.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/fib_rule_tests.sh | 36 ++++++++++++--------------- 1 file changed, 16 insertions(+), 20 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/net/fib_rule_tests.sh b/tools/testing/selftests/net/fib_rule_tests.sh index 63c3eaec8d30..51157a5559b7 100755 --- a/tools/testing/selftests/net/fib_rule_tests.sh +++ b/tools/testing/selftests/net/fib_rule_tests.sh @@ -3,14 +3,9 @@ # This test is for checking IPv4 and IPv6 FIB rules API -# Kselftest framework requirement - SKIP code is 4. -ksft_skip=4 - +source lib.sh ret=0 - PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no} -IP="ip -netns testns" -IP_PEER="ip -netns peerns" RTABLE=100 RTABLE_PEER=101 @@ -84,8 +79,8 @@ check_nettest() setup() { set -e - ip netns add testns - $IP link set dev lo up + setup_ns testns + IP="ip -netns $testns" $IP link add dummy0 type dummy $IP link set dev dummy0 up @@ -98,18 +93,19 @@ setup() cleanup() { $IP link del dev dummy0 &> /dev/null - ip netns del testns + cleanup_ns $testns } setup_peer() { set -e - ip netns add peerns + setup_ns peerns + IP_PEER="ip -netns $peerns" $IP_PEER link set dev lo up - ip link add name veth0 netns testns type veth \ - peer name veth1 netns peerns + ip link add name veth0 netns $testns type veth \ + peer name veth1 netns $peerns $IP link set dev veth0 up $IP_PEER link set dev veth1 up @@ -131,7 +127,7 @@ setup_peer() cleanup_peer() { $IP link del dev veth0 - ip netns del peerns + ip netns del $peerns } fib_check_iproute_support() @@ -270,11 +266,11 @@ fib_rule6_connect_test() # (Not-ECT: 0, ECT(1): 1, ECT(0): 2, CE: 3). # The ECN bits shouldn't influence the result of the test. for dsfield in 0x04 0x05 0x06 0x07; do - nettest -q -6 -B -t 5 -N testns -O peerns -U -D \ + nettest -q -6 -B -t 5 -N $testns -O $peerns -U -D \ -Q "${dsfield}" -l 2001:db8::1:11 -r 2001:db8::1:11 log_test $? 0 "rule6 dsfield udp connect (dsfield ${dsfield})" - nettest -q -6 -B -t 5 -N testns -O peerns -Q "${dsfield}" \ + nettest -q -6 -B -t 5 -N $testns -O $peerns -Q "${dsfield}" \ -l 2001:db8::1:11 -r 2001:db8::1:11 log_test $? 0 "rule6 dsfield tcp connect (dsfield ${dsfield})" done @@ -337,11 +333,11 @@ fib_rule4_test() # need enable forwarding and disable rp_filter temporarily as all the # addresses are in the same subnet and egress device == ingress device. - ip netns exec testns sysctl -qw net.ipv4.ip_forward=1 - ip netns exec testns sysctl -qw net.ipv4.conf.$DEV.rp_filter=0 + ip netns exec $testns sysctl -qw net.ipv4.ip_forward=1 + ip netns exec $testns sysctl -qw net.ipv4.conf.$DEV.rp_filter=0 match="from $SRC_IP iif $DEV" fib_rule4_test_match_n_redirect "$match" "$match" "iif redirect to table" - ip netns exec testns sysctl -qw net.ipv4.ip_forward=0 + ip netns exec $testns sysctl -qw net.ipv4.ip_forward=0 # Reject dsfield (tos) options which have ECN bits set for cnt in $(seq 1 3); do @@ -407,11 +403,11 @@ fib_rule4_connect_test() # (Not-ECT: 0, ECT(1): 1, ECT(0): 2, CE: 3). # The ECN bits shouldn't influence the result of the test. for dsfield in 0x04 0x05 0x06 0x07; do - nettest -q -B -t 5 -N testns -O peerns -D -U -Q "${dsfield}" \ + nettest -q -B -t 5 -N $testns -O $peerns -D -U -Q "${dsfield}" \ -l 198.51.100.11 -r 198.51.100.11 log_test $? 0 "rule4 dsfield udp connect (dsfield ${dsfield})" - nettest -q -B -t 5 -N testns -O peerns -Q "${dsfield}" \ + nettest -q -B -t 5 -N $testns -O $peerns -Q "${dsfield}" \ -l 198.51.100.11 -r 198.51.100.11 log_test $? 0 "rule4 dsfield tcp connect (dsfield ${dsfield})" done -- cgit v1.2.3 From f6fc5b949911efb758e13bc4a1a10c9a473b6254 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Wed, 13 Dec 2023 14:08:55 +0800 Subject: selftests/net: convert fib_tests.sh to run it in unique namespace Here is the test result after conversion. # ./fib_tests.sh Single path route test Start point TEST: IPv4 fibmatch [ OK ] ... Fib6 garbage collection test TEST: ipv6 route garbage collection [ OK ] IPv4 multipath list receive tests TEST: Multipath route hit ratio (1.00) [ OK ] IPv6 multipath list receive tests TEST: Multipath route hit ratio (1.00) [ OK ] Tests passed: 225 Tests failed: 0 Acked-by: David Ahern Signed-off-by: Hangbin Liu Reviewed-by: Ido Schimmel Tested-by: Ido Schimmel Link: https://lore.kernel.org/r/20231213060856.4030084-13-liuhangbin@gmail.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/fib_tests.sh | 184 +++++++++++++++---------------- 1 file changed, 87 insertions(+), 97 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/net/fib_tests.sh b/tools/testing/selftests/net/fib_tests.sh index 66d0db7a2614..b3ecccbbfcd2 100755 --- a/tools/testing/selftests/net/fib_tests.sh +++ b/tools/testing/selftests/net/fib_tests.sh @@ -3,10 +3,8 @@ # This test is for checking IPv4 and IPv6 FIB behavior in response to # different events. - +source lib.sh ret=0 -# Kselftest framework requirement - SKIP code is 4. -ksft_skip=4 # all tests in this script. Can be overridden with -t option TESTS="unregister down carrier nexthop suppress ipv6_notify ipv4_notify \ @@ -18,8 +16,6 @@ TESTS="unregister down carrier nexthop suppress ipv6_notify ipv4_notify \ VERBOSE=0 PAUSE_ON_FAIL=no PAUSE=no -IP="$(which ip) -netns ns1" -NS_EXEC="$(which ip) netns exec ns1" which ping6 > /dev/null 2>&1 && ping6=$(which ping6) || ping6=$(which ping) @@ -55,11 +51,11 @@ log_test() setup() { set -e - ip netns add ns1 - ip netns set ns1 auto - $IP link set dev lo up - ip netns exec ns1 sysctl -qw net.ipv4.ip_forward=1 - ip netns exec ns1 sysctl -qw net.ipv6.conf.all.forwarding=1 + setup_ns ns1 + IP="$(which ip) -netns $ns1" + NS_EXEC="$(which ip) netns exec $ns1" + ip netns exec $ns1 sysctl -qw net.ipv4.ip_forward=1 + ip netns exec $ns1 sysctl -qw net.ipv6.conf.all.forwarding=1 $IP link add dummy0 type dummy $IP link set dev dummy0 up @@ -72,8 +68,7 @@ setup() cleanup() { $IP link del dev dummy0 &> /dev/null - ip netns del ns1 &> /dev/null - ip netns del ns2 &> /dev/null + cleanup_ns $ns1 $ns2 } get_linklocal() @@ -448,28 +443,25 @@ fib_rp_filter_test() setup set -e - ip netns add ns2 - ip netns set ns2 auto - - ip -netns ns2 link set dev lo up + setup_ns ns2 $IP link add name veth1 type veth peer name veth2 - $IP link set dev veth2 netns ns2 + $IP link set dev veth2 netns $ns2 $IP address add 192.0.2.1/24 dev veth1 - ip -netns ns2 address add 192.0.2.1/24 dev veth2 + ip -netns $ns2 address add 192.0.2.1/24 dev veth2 $IP link set dev veth1 up - ip -netns ns2 link set dev veth2 up + ip -netns $ns2 link set dev veth2 up $IP link set dev lo address 52:54:00:6a:c7:5e $IP link set dev veth1 address 52:54:00:6a:c7:5e - ip -netns ns2 link set dev lo address 52:54:00:6a:c7:5e - ip -netns ns2 link set dev veth2 address 52:54:00:6a:c7:5e + ip -netns $ns2 link set dev lo address 52:54:00:6a:c7:5e + ip -netns $ns2 link set dev veth2 address 52:54:00:6a:c7:5e # 1. (ns2) redirect lo's egress to veth2's egress - ip netns exec ns2 tc qdisc add dev lo parent root handle 1: fq_codel - ip netns exec ns2 tc filter add dev lo parent 1: protocol arp basic \ + ip netns exec $ns2 tc qdisc add dev lo parent root handle 1: fq_codel + ip netns exec $ns2 tc filter add dev lo parent 1: protocol arp basic \ action mirred egress redirect dev veth2 - ip netns exec ns2 tc filter add dev lo parent 1: protocol ip basic \ + ip netns exec $ns2 tc filter add dev lo parent 1: protocol ip basic \ action mirred egress redirect dev veth2 # 2. (ns1) redirect veth1's ingress to lo's ingress @@ -487,24 +479,24 @@ fib_rp_filter_test() action mirred egress redirect dev veth1 # 4. (ns2) redirect veth2's ingress to lo's ingress - ip netns exec ns2 tc qdisc add dev veth2 ingress - ip netns exec ns2 tc filter add dev veth2 ingress protocol arp basic \ + ip netns exec $ns2 tc qdisc add dev veth2 ingress + ip netns exec $ns2 tc filter add dev veth2 ingress protocol arp basic \ action mirred ingress redirect dev lo - ip netns exec ns2 tc filter add dev veth2 ingress protocol ip basic \ + ip netns exec $ns2 tc filter add dev veth2 ingress protocol ip basic \ action mirred ingress redirect dev lo $NS_EXEC sysctl -qw net.ipv4.conf.all.rp_filter=1 $NS_EXEC sysctl -qw net.ipv4.conf.all.accept_local=1 $NS_EXEC sysctl -qw net.ipv4.conf.all.route_localnet=1 - ip netns exec ns2 sysctl -qw net.ipv4.conf.all.rp_filter=1 - ip netns exec ns2 sysctl -qw net.ipv4.conf.all.accept_local=1 - ip netns exec ns2 sysctl -qw net.ipv4.conf.all.route_localnet=1 + ip netns exec $ns2 sysctl -qw net.ipv4.conf.all.rp_filter=1 + ip netns exec $ns2 sysctl -qw net.ipv4.conf.all.accept_local=1 + ip netns exec $ns2 sysctl -qw net.ipv4.conf.all.route_localnet=1 set +e - run_cmd "ip netns exec ns2 ping -w1 -c1 192.0.2.1" + run_cmd "ip netns exec $ns2 ping -w1 -c1 192.0.2.1" log_test $? 0 "rp_filter passes local packets" - run_cmd "ip netns exec ns2 ping -w1 -c1 127.0.0.1" + run_cmd "ip netns exec $ns2 ping -w1 -c1 127.0.0.1" log_test $? 0 "rp_filter passes loopback packets" cleanup @@ -959,34 +951,32 @@ route_setup() [ "${VERBOSE}" = "1" ] && set -x set -e - ip netns add ns2 - ip netns set ns2 auto - ip -netns ns2 link set dev lo up - ip netns exec ns2 sysctl -qw net.ipv4.ip_forward=1 - ip netns exec ns2 sysctl -qw net.ipv6.conf.all.forwarding=1 + setup_ns ns2 + ip netns exec $ns2 sysctl -qw net.ipv4.ip_forward=1 + ip netns exec $ns2 sysctl -qw net.ipv6.conf.all.forwarding=1 $IP li add veth1 type veth peer name veth2 $IP li add veth3 type veth peer name veth4 $IP li set veth1 up $IP li set veth3 up - $IP li set veth2 netns ns2 up - $IP li set veth4 netns ns2 up - ip -netns ns2 li add dummy1 type dummy - ip -netns ns2 li set dummy1 up + $IP li set veth2 netns $ns2 up + $IP li set veth4 netns $ns2 up + ip -netns $ns2 li add dummy1 type dummy + ip -netns $ns2 li set dummy1 up $IP -6 addr add 2001:db8:101::1/64 dev veth1 nodad $IP -6 addr add 2001:db8:103::1/64 dev veth3 nodad $IP addr add 172.16.101.1/24 dev veth1 $IP addr add 172.16.103.1/24 dev veth3 - ip -netns ns2 -6 addr add 2001:db8:101::2/64 dev veth2 nodad - ip -netns ns2 -6 addr add 2001:db8:103::2/64 dev veth4 nodad - ip -netns ns2 -6 addr add 2001:db8:104::1/64 dev dummy1 nodad + ip -netns $ns2 -6 addr add 2001:db8:101::2/64 dev veth2 nodad + ip -netns $ns2 -6 addr add 2001:db8:103::2/64 dev veth4 nodad + ip -netns $ns2 -6 addr add 2001:db8:104::1/64 dev dummy1 nodad - ip -netns ns2 addr add 172.16.101.2/24 dev veth2 - ip -netns ns2 addr add 172.16.103.2/24 dev veth4 - ip -netns ns2 addr add 172.16.104.1/24 dev dummy1 + ip -netns $ns2 addr add 172.16.101.2/24 dev veth2 + ip -netns $ns2 addr add 172.16.103.2/24 dev veth4 + ip -netns $ns2 addr add 172.16.104.1/24 dev dummy1 set +e } @@ -1238,7 +1228,7 @@ ipv6_addr_metric_test() log_test $rc 0 "Modify metric of address" # verify prefix route removed on down - run_cmd "ip netns exec ns1 sysctl -qw net.ipv6.conf.all.keep_addr_on_down=1" + run_cmd "ip netns exec $ns1 sysctl -qw net.ipv6.conf.all.keep_addr_on_down=1" run_cmd "$IP li set dev dummy2 down" rc=$? if [ $rc -eq 0 ]; then @@ -1344,7 +1334,7 @@ ipv6_route_metrics_test() log_test $rc 0 "Multipath route with mtu metric" $IP -6 ro add 2001:db8:104::/64 via 2001:db8:101::2 mtu 1300 - run_cmd "ip netns exec ns1 ${ping6} -w1 -c1 -s 1500 2001:db8:104::1" + run_cmd "ip netns exec $ns1 ${ping6} -w1 -c1 -s 1500 2001:db8:104::1" log_test $? 0 "Using route with mtu metric" run_cmd "$IP -6 ro add 2001:db8:114::/64 via 2001:db8:101::2 congctl lock foo" @@ -1599,19 +1589,19 @@ ipv4_rt_replace() ipv4_local_rt_cache() { run_cmd "ip addr add 10.0.0.1/32 dev lo" - run_cmd "ip netns add test-ns" + run_cmd "setup_ns test-ns" run_cmd "ip link add veth-outside type veth peer name veth-inside" run_cmd "ip link add vrf-100 type vrf table 1100" run_cmd "ip link set veth-outside master vrf-100" - run_cmd "ip link set veth-inside netns test-ns" + run_cmd "ip link set veth-inside netns $test-ns" run_cmd "ip link set veth-outside up" run_cmd "ip link set vrf-100 up" run_cmd "ip route add 10.1.1.1/32 dev veth-outside table 1100" - run_cmd "ip netns exec test-ns ip link set veth-inside up" - run_cmd "ip netns exec test-ns ip addr add 10.1.1.1/32 dev veth-inside" - run_cmd "ip netns exec test-ns ip route add 10.0.0.1/32 dev veth-inside" - run_cmd "ip netns exec test-ns ip route add default via 10.0.0.1" - run_cmd "ip netns exec test-ns ping 10.0.0.1 -c 1 -i 1" + run_cmd "ip netns exec $test-ns ip link set veth-inside up" + run_cmd "ip netns exec $test-ns ip addr add 10.1.1.1/32 dev veth-inside" + run_cmd "ip netns exec $test-ns ip route add 10.0.0.1/32 dev veth-inside" + run_cmd "ip netns exec $test-ns ip route add default via 10.0.0.1" + run_cmd "ip netns exec $test-ns ping 10.0.0.1 -c 1 -i 1" run_cmd "ip link delete vrf-100" # if we do not hang test is a success @@ -1841,7 +1831,7 @@ ipv4_route_metrics_test() log_test $rc 0 "Multipath route with mtu metric" $IP ro add 172.16.104.0/24 via 172.16.101.2 mtu 1300 - run_cmd "ip netns exec ns1 ping -w1 -c1 -s 1500 172.16.104.1" + run_cmd "ip netns exec $ns1 ping -w1 -c1 -s 1500 172.16.104.1" log_test $? 0 "Using route with mtu metric" run_cmd "$IP ro add 172.16.111.0/24 via 172.16.101.2 congctl lock foo" @@ -2105,7 +2095,7 @@ ipv4_route_v6_gw_test() check_route "172.16.104.0/24 via inet6 2001:db8:101::2 dev veth1" fi - run_cmd "ip netns exec ns1 ping -w1 -c1 172.16.104.1" + run_cmd "ip netns exec $ns1 ping -w1 -c1 172.16.104.1" log_test $rc 0 "Single path route with IPv6 gateway - ping" run_cmd "$IP ro del 172.16.104.0/24 via inet6 2001:db8:101::2" @@ -2196,7 +2186,7 @@ ipv4_mangle_test() sleep 2 local tmp_file=$(mktemp) - ip netns exec ns2 socat UDP4-LISTEN:54321,fork $tmp_file & + ip netns exec $ns2 socat UDP4-LISTEN:54321,fork $tmp_file & # Add a FIB rule and a route that will direct our connection to the # listening server. @@ -2254,7 +2244,7 @@ ipv6_mangle_test() sleep 2 local tmp_file=$(mktemp) - ip netns exec ns2 socat UDP6-LISTEN:54321,fork $tmp_file & + ip netns exec $ns2 socat UDP6-LISTEN:54321,fork $tmp_file & # Add a FIB rule and a route that will direct our connection to the # listening server. @@ -2423,37 +2413,37 @@ ipv4_mpath_list_test() route_setup set -e - run_cmd "ip netns exec ns1 ethtool -K veth1 tcp-segmentation-offload off" - - run_cmd "ip netns exec ns2 bash -c \"echo 20000 > /sys/class/net/veth2/gro_flush_timeout\"" - run_cmd "ip netns exec ns2 bash -c \"echo 1 > /sys/class/net/veth2/napi_defer_hard_irqs\"" - run_cmd "ip netns exec ns2 ethtool -K veth2 generic-receive-offload on" - run_cmd "ip -n ns2 link add name nh1 up type dummy" - run_cmd "ip -n ns2 link add name nh2 up type dummy" - run_cmd "ip -n ns2 address add 172.16.201.1/24 dev nh1" - run_cmd "ip -n ns2 address add 172.16.202.1/24 dev nh2" - run_cmd "ip -n ns2 neigh add 172.16.201.2 lladdr 00:11:22:33:44:55 nud perm dev nh1" - run_cmd "ip -n ns2 neigh add 172.16.202.2 lladdr 00:aa:bb:cc:dd:ee nud perm dev nh2" - run_cmd "ip -n ns2 route add 203.0.113.0/24 + run_cmd "ip netns exec $ns1 ethtool -K veth1 tcp-segmentation-offload off" + + run_cmd "ip netns exec $ns2 bash -c \"echo 20000 > /sys/class/net/veth2/gro_flush_timeout\"" + run_cmd "ip netns exec $ns2 bash -c \"echo 1 > /sys/class/net/veth2/napi_defer_hard_irqs\"" + run_cmd "ip netns exec $ns2 ethtool -K veth2 generic-receive-offload on" + run_cmd "ip -n $ns2 link add name nh1 up type dummy" + run_cmd "ip -n $ns2 link add name nh2 up type dummy" + run_cmd "ip -n $ns2 address add 172.16.201.1/24 dev nh1" + run_cmd "ip -n $ns2 address add 172.16.202.1/24 dev nh2" + run_cmd "ip -n $ns2 neigh add 172.16.201.2 lladdr 00:11:22:33:44:55 nud perm dev nh1" + run_cmd "ip -n $ns2 neigh add 172.16.202.2 lladdr 00:aa:bb:cc:dd:ee nud perm dev nh2" + run_cmd "ip -n $ns2 route add 203.0.113.0/24 nexthop via 172.16.201.2 nexthop via 172.16.202.2" - run_cmd "ip netns exec ns2 sysctl -qw net.ipv4.fib_multipath_hash_policy=1" - run_cmd "ip netns exec ns2 sysctl -qw net.ipv4.conf.veth2.rp_filter=0" - run_cmd "ip netns exec ns2 sysctl -qw net.ipv4.conf.all.rp_filter=0" - run_cmd "ip netns exec ns2 sysctl -qw net.ipv4.conf.default.rp_filter=0" + run_cmd "ip netns exec $ns2 sysctl -qw net.ipv4.fib_multipath_hash_policy=1" + run_cmd "ip netns exec $ns2 sysctl -qw net.ipv4.conf.veth2.rp_filter=0" + run_cmd "ip netns exec $ns2 sysctl -qw net.ipv4.conf.all.rp_filter=0" + run_cmd "ip netns exec $ns2 sysctl -qw net.ipv4.conf.default.rp_filter=0" set +e - local dmac=$(ip -n ns2 -j link show dev veth2 | jq -r '.[]["address"]') + local dmac=$(ip -n $ns2 -j link show dev veth2 | jq -r '.[]["address"]') local tmp_file=$(mktemp) - local cmd="ip netns exec ns1 mausezahn veth1 -a own -b $dmac + local cmd="ip netns exec $ns1 mausezahn veth1 -a own -b $dmac -A 172.16.101.1 -B 203.0.113.1 -t udp 'sp=12345,dp=0-65535' -q" # Packets forwarded in a list using a multipath route must not reuse a # cached result so that a flow always hits the same nexthop. In other # words, the FIB lookup tracepoint needs to be triggered for every # packet. - local t0_rx_pkts=$(link_stats_get ns2 veth2 rx packets) + local t0_rx_pkts=$(link_stats_get $ns2 veth2 rx packets) run_cmd "perf stat -a -e fib:fib_table_lookup --filter 'err == 0' -j -o $tmp_file -- $cmd" - local t1_rx_pkts=$(link_stats_get ns2 veth2 rx packets) + local t1_rx_pkts=$(link_stats_get $ns2 veth2 rx packets) local diff=$(echo $t1_rx_pkts - $t0_rx_pkts | bc -l) list_rcv_eval $tmp_file $diff @@ -2471,34 +2461,34 @@ ipv6_mpath_list_test() route_setup set -e - run_cmd "ip netns exec ns1 ethtool -K veth1 tcp-segmentation-offload off" - - run_cmd "ip netns exec ns2 bash -c \"echo 20000 > /sys/class/net/veth2/gro_flush_timeout\"" - run_cmd "ip netns exec ns2 bash -c \"echo 1 > /sys/class/net/veth2/napi_defer_hard_irqs\"" - run_cmd "ip netns exec ns2 ethtool -K veth2 generic-receive-offload on" - run_cmd "ip -n ns2 link add name nh1 up type dummy" - run_cmd "ip -n ns2 link add name nh2 up type dummy" - run_cmd "ip -n ns2 -6 address add 2001:db8:201::1/64 dev nh1" - run_cmd "ip -n ns2 -6 address add 2001:db8:202::1/64 dev nh2" - run_cmd "ip -n ns2 -6 neigh add 2001:db8:201::2 lladdr 00:11:22:33:44:55 nud perm dev nh1" - run_cmd "ip -n ns2 -6 neigh add 2001:db8:202::2 lladdr 00:aa:bb:cc:dd:ee nud perm dev nh2" - run_cmd "ip -n ns2 -6 route add 2001:db8:301::/64 + run_cmd "ip netns exec $ns1 ethtool -K veth1 tcp-segmentation-offload off" + + run_cmd "ip netns exec $ns2 bash -c \"echo 20000 > /sys/class/net/veth2/gro_flush_timeout\"" + run_cmd "ip netns exec $ns2 bash -c \"echo 1 > /sys/class/net/veth2/napi_defer_hard_irqs\"" + run_cmd "ip netns exec $ns2 ethtool -K veth2 generic-receive-offload on" + run_cmd "ip -n $ns2 link add name nh1 up type dummy" + run_cmd "ip -n $ns2 link add name nh2 up type dummy" + run_cmd "ip -n $ns2 -6 address add 2001:db8:201::1/64 dev nh1" + run_cmd "ip -n $ns2 -6 address add 2001:db8:202::1/64 dev nh2" + run_cmd "ip -n $ns2 -6 neigh add 2001:db8:201::2 lladdr 00:11:22:33:44:55 nud perm dev nh1" + run_cmd "ip -n $ns2 -6 neigh add 2001:db8:202::2 lladdr 00:aa:bb:cc:dd:ee nud perm dev nh2" + run_cmd "ip -n $ns2 -6 route add 2001:db8:301::/64 nexthop via 2001:db8:201::2 nexthop via 2001:db8:202::2" - run_cmd "ip netns exec ns2 sysctl -qw net.ipv6.fib_multipath_hash_policy=1" + run_cmd "ip netns exec $ns2 sysctl -qw net.ipv6.fib_multipath_hash_policy=1" set +e - local dmac=$(ip -n ns2 -j link show dev veth2 | jq -r '.[]["address"]') + local dmac=$(ip -n $ns2 -j link show dev veth2 | jq -r '.[]["address"]') local tmp_file=$(mktemp) - local cmd="ip netns exec ns1 mausezahn -6 veth1 -a own -b $dmac + local cmd="ip netns exec $ns1 mausezahn -6 veth1 -a own -b $dmac -A 2001:db8:101::1 -B 2001:db8:301::1 -t udp 'sp=12345,dp=0-65535' -q" # Packets forwarded in a list using a multipath route must not reuse a # cached result so that a flow always hits the same nexthop. In other # words, the FIB lookup tracepoint needs to be triggered for every # packet. - local t0_rx_pkts=$(link_stats_get ns2 veth2 rx packets) + local t0_rx_pkts=$(link_stats_get $ns2 veth2 rx packets) run_cmd "perf stat -a -e fib6:fib6_table_lookup --filter 'err == 0' -j -o $tmp_file -- $cmd" - local t1_rx_pkts=$(link_stats_get ns2 veth2 rx packets) + local t1_rx_pkts=$(link_stats_get $ns2 veth2 rx packets) local diff=$(echo $t1_rx_pkts - $t0_rx_pkts | bc -l) list_rcv_eval $tmp_file $diff -- cgit v1.2.3 From b795db185e3260c8022dbfd01c12a05dcfe51b7a Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Wed, 13 Dec 2023 14:08:56 +0800 Subject: selftests/net: convert fdb_flush.sh to run it in unique namespace Here is the test result after conversion. # ./fdb_flush.sh TEST: vx10: Expected 5 FDB entries, got 5 [ OK ] TEST: vx20: Expected 5 FDB entries, got 5 [ OK ] ... TEST: vx10: Expected 5 FDB entries, got 5 [ OK ] TEST: Test entries with dst 192.0.2.1 [ OK ] Acked-by: David Ahern Signed-off-by: Hangbin Liu Reviewed-by: Ido Schimmel Tested-by: Ido Schimmel Link: https://lore.kernel.org/r/20231213060856.4030084-14-liuhangbin@gmail.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/fdb_flush.sh | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/net/fdb_flush.sh b/tools/testing/selftests/net/fdb_flush.sh index 90e7a29e0476..d5e3abb8658c 100755 --- a/tools/testing/selftests/net/fdb_flush.sh +++ b/tools/testing/selftests/net/fdb_flush.sh @@ -5,6 +5,8 @@ # Check that flush works as expected with all the supported arguments and verify # some combinations of arguments. +source lib.sh + FLUSH_BY_STATE_TESTS=" vxlan_test_flush_by_permanent vxlan_test_flush_by_nopermanent @@ -739,10 +741,9 @@ bridge_vxlan_test_flush() setup() { - IP="ip -netns ns1" - BRIDGE="bridge -netns ns1" - - ip netns add ns1 + setup_ns NS + IP="ip -netns ${NS}" + BRIDGE="bridge -netns ${NS}" $IP link add name vx10 type vxlan id 1000 dstport "$VXPORT" $IP link add name vx20 type vxlan id 2000 dstport "$VXPORT" @@ -759,7 +760,7 @@ cleanup() $IP link del dev vx20 $IP link del dev vx10 - ip netns del ns1 + cleanup_ns ${NS} } ################################################################################ -- cgit v1.2.3 From 542e893fbadc51caa38a7c4c2a8f8e822cdba2b1 Mon Sep 17 00:00:00 2001 From: Arseniy Krasnov Date: Thu, 14 Dec 2023 15:52:30 +0300 Subject: vsock/test: two tests to check credit update logic Both tests are almost same, only differs in two 'if' conditions, so implemented in a single function. Tests check, that credit update message is sent: 1) During setting SO_RCVLOWAT value of the socket. 2) When number of 'rx_bytes' become smaller than SO_RCVLOWAT value. Signed-off-by: Arseniy Krasnov Reviewed-by: Stefano Garzarella Acked-by: Michael S. Tsirkin Signed-off-by: David S. Miller --- tools/testing/vsock/vsock_test.c | 175 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 175 insertions(+) (limited to 'tools') diff --git a/tools/testing/vsock/vsock_test.c b/tools/testing/vsock/vsock_test.c index 01fa816868bc..66246d81d654 100644 --- a/tools/testing/vsock/vsock_test.c +++ b/tools/testing/vsock/vsock_test.c @@ -1232,6 +1232,171 @@ static void test_double_bind_connect_client(const struct test_opts *opts) } } +#define RCVLOWAT_CREDIT_UPD_BUF_SIZE (1024 * 128) +/* This define is the same as in 'include/linux/virtio_vsock.h': + * it is used to decide when to send credit update message during + * reading from rx queue of a socket. Value and its usage in + * kernel is important for this test. + */ +#define VIRTIO_VSOCK_MAX_PKT_BUF_SIZE (1024 * 64) + +static void test_stream_rcvlowat_def_cred_upd_client(const struct test_opts *opts) +{ + size_t buf_size; + void *buf; + int fd; + + fd = vsock_stream_connect(opts->peer_cid, 1234); + if (fd < 0) { + perror("connect"); + exit(EXIT_FAILURE); + } + + /* Send 1 byte more than peer's buffer size. */ + buf_size = RCVLOWAT_CREDIT_UPD_BUF_SIZE + 1; + + buf = malloc(buf_size); + if (!buf) { + perror("malloc"); + exit(EXIT_FAILURE); + } + + /* Wait until peer sets needed buffer size. */ + recv_byte(fd, 1, 0); + + if (send(fd, buf, buf_size, 0) != buf_size) { + perror("send failed"); + exit(EXIT_FAILURE); + } + + free(buf); + close(fd); +} + +static void test_stream_credit_update_test(const struct test_opts *opts, + bool low_rx_bytes_test) +{ + size_t recv_buf_size; + struct pollfd fds; + size_t buf_size; + void *buf; + int fd; + + fd = vsock_stream_accept(VMADDR_CID_ANY, 1234, NULL); + if (fd < 0) { + perror("accept"); + exit(EXIT_FAILURE); + } + + buf_size = RCVLOWAT_CREDIT_UPD_BUF_SIZE; + + if (setsockopt(fd, AF_VSOCK, SO_VM_SOCKETS_BUFFER_SIZE, + &buf_size, sizeof(buf_size))) { + perror("setsockopt(SO_VM_SOCKETS_BUFFER_SIZE)"); + exit(EXIT_FAILURE); + } + + if (low_rx_bytes_test) { + /* Set new SO_RCVLOWAT here. This enables sending credit + * update when number of bytes if our rx queue become < + * SO_RCVLOWAT value. + */ + recv_buf_size = 1 + VIRTIO_VSOCK_MAX_PKT_BUF_SIZE; + + if (setsockopt(fd, SOL_SOCKET, SO_RCVLOWAT, + &recv_buf_size, sizeof(recv_buf_size))) { + perror("setsockopt(SO_RCVLOWAT)"); + exit(EXIT_FAILURE); + } + } + + /* Send one dummy byte here, because 'setsockopt()' above also + * sends special packet which tells sender to update our buffer + * size. This 'send_byte()' will serialize such packet with data + * reads in a loop below. Sender starts transmission only when + * it receives this single byte. + */ + send_byte(fd, 1, 0); + + buf = malloc(buf_size); + if (!buf) { + perror("malloc"); + exit(EXIT_FAILURE); + } + + /* Wait until there will be 128KB of data in rx queue. */ + while (1) { + ssize_t res; + + res = recv(fd, buf, buf_size, MSG_PEEK); + if (res == buf_size) + break; + + if (res <= 0) { + fprintf(stderr, "unexpected 'recv()' return: %zi\n", res); + exit(EXIT_FAILURE); + } + } + + /* There is 128KB of data in the socket's rx queue, dequeue first + * 64KB, credit update is sent if 'low_rx_bytes_test' == true. + * Otherwise, credit update is sent in 'if (!low_rx_bytes_test)'. + */ + recv_buf_size = VIRTIO_VSOCK_MAX_PKT_BUF_SIZE; + recv_buf(fd, buf, recv_buf_size, 0, recv_buf_size); + + if (!low_rx_bytes_test) { + recv_buf_size++; + + /* Updating SO_RCVLOWAT will send credit update. */ + if (setsockopt(fd, SOL_SOCKET, SO_RCVLOWAT, + &recv_buf_size, sizeof(recv_buf_size))) { + perror("setsockopt(SO_RCVLOWAT)"); + exit(EXIT_FAILURE); + } + } + + fds.fd = fd; + fds.events = POLLIN | POLLRDNORM | POLLERR | + POLLRDHUP | POLLHUP; + + /* This 'poll()' will return once we receive last byte + * sent by client. + */ + if (poll(&fds, 1, -1) < 0) { + perror("poll"); + exit(EXIT_FAILURE); + } + + if (fds.revents & POLLERR) { + fprintf(stderr, "'poll()' error\n"); + exit(EXIT_FAILURE); + } + + if (fds.revents & (POLLIN | POLLRDNORM)) { + recv_buf(fd, buf, recv_buf_size, MSG_DONTWAIT, recv_buf_size); + } else { + /* These flags must be set, as there is at + * least 64KB of data ready to read. + */ + fprintf(stderr, "POLLIN | POLLRDNORM expected\n"); + exit(EXIT_FAILURE); + } + + free(buf); + close(fd); +} + +static void test_stream_cred_upd_on_low_rx_bytes(const struct test_opts *opts) +{ + test_stream_credit_update_test(opts, true); +} + +static void test_stream_cred_upd_on_set_rcvlowat(const struct test_opts *opts) +{ + test_stream_credit_update_test(opts, false); +} + static struct test_case test_cases[] = { { .name = "SOCK_STREAM connection reset", @@ -1342,6 +1507,16 @@ static struct test_case test_cases[] = { .run_client = test_double_bind_connect_client, .run_server = test_double_bind_connect_server, }, + { + .name = "SOCK_STREAM virtio credit update + SO_RCVLOWAT", + .run_client = test_stream_rcvlowat_def_cred_upd_client, + .run_server = test_stream_cred_upd_on_set_rcvlowat, + }, + { + .name = "SOCK_STREAM virtio credit update + low rx_bytes", + .run_client = test_stream_rcvlowat_def_cred_upd_client, + .run_server = test_stream_cred_upd_on_low_rx_bytes, + }, {}, }; -- cgit v1.2.3 From 18872ba8cd2406ffa835f9f6276e47ed86fbb5d6 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 14 Dec 2023 10:49:01 +0000 Subject: selftests/net: optmem_max became per netns /proc/sys/net/core/optmem_max is now per netns, change two tests that were saving/changing/restoring its value on the parent netns. Signed-off-by: Eric Dumazet Reviewed-by: Willem de Bruijn Signed-off-by: David S. Miller --- tools/testing/selftests/net/io_uring_zerocopy_tx.sh | 9 ++++----- tools/testing/selftests/net/msg_zerocopy.sh | 9 ++++----- 2 files changed, 8 insertions(+), 10 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/net/io_uring_zerocopy_tx.sh b/tools/testing/selftests/net/io_uring_zerocopy_tx.sh index 9ac4456d48fc..123439545013 100755 --- a/tools/testing/selftests/net/io_uring_zerocopy_tx.sh +++ b/tools/testing/selftests/net/io_uring_zerocopy_tx.sh @@ -76,23 +76,22 @@ case "${TXMODE}" in esac # Start of state changes: install cleanup handler -save_sysctl_mem="$(sysctl -n ${path_sysctl_mem})" cleanup() { ip netns del "${NS2}" ip netns del "${NS1}" - sysctl -w -q "${path_sysctl_mem}=${save_sysctl_mem}" } trap cleanup EXIT -# Configure system settings -sysctl -w -q "${path_sysctl_mem}=1000000" - # Create virtual ethernet pair between network namespaces ip netns add "${NS1}" ip netns add "${NS2}" +# Configure system settings +ip netns exec "${NS1}" sysctl -w -q "${path_sysctl_mem}=1000000" +ip netns exec "${NS2}" sysctl -w -q "${path_sysctl_mem}=1000000" + ip link add "${DEV}" mtu "${DEV_MTU}" netns "${NS1}" type veth \ peer name "${DEV}" mtu "${DEV_MTU}" netns "${NS2}" diff --git a/tools/testing/selftests/net/msg_zerocopy.sh b/tools/testing/selftests/net/msg_zerocopy.sh index 825ffec85cea..89c22f5320e0 100755 --- a/tools/testing/selftests/net/msg_zerocopy.sh +++ b/tools/testing/selftests/net/msg_zerocopy.sh @@ -70,23 +70,22 @@ case "${TXMODE}" in esac # Start of state changes: install cleanup handler -save_sysctl_mem="$(sysctl -n ${path_sysctl_mem})" cleanup() { ip netns del "${NS2}" ip netns del "${NS1}" - sysctl -w -q "${path_sysctl_mem}=${save_sysctl_mem}" } trap cleanup EXIT -# Configure system settings -sysctl -w -q "${path_sysctl_mem}=1000000" - # Create virtual ethernet pair between network namespaces ip netns add "${NS1}" ip netns add "${NS2}" +# Configure system settings +ip netns exec "${NS1}" sysctl -w -q "${path_sysctl_mem}=1000000" +ip netns exec "${NS2}" sysctl -w -q "${path_sysctl_mem}=1000000" + ip link add "${DEV}" mtu "${DEV_MTU}" netns "${NS1}" type veth \ peer name "${DEV}" mtu "${DEV_MTU}" netns "${NS2}" -- cgit v1.2.3 From 00e7f29d9b895cbee58b7071900dd52ed6dcec1e Mon Sep 17 00:00:00 2001 From: Tobias Waldekranz Date: Thu, 14 Dec 2023 14:50:29 +0100 Subject: selftests: forwarding: ethtool_rmon: Add histogram counter test Validate the operation of rx and tx histogram counters, if supported by the interface, by sending batches of packets targeted for each bucket. Signed-off-by: Tobias Waldekranz Tested-by: Vladimir Oltean Reviewed-by: Vladimir Oltean Signed-off-by: David S. Miller --- tools/testing/selftests/net/forwarding/Makefile | 1 + .../selftests/net/forwarding/ethtool_rmon.sh | 143 +++++++++++++++++++++ tools/testing/selftests/net/forwarding/lib.sh | 9 ++ 3 files changed, 153 insertions(+) create mode 100755 tools/testing/selftests/net/forwarding/ethtool_rmon.sh (limited to 'tools') diff --git a/tools/testing/selftests/net/forwarding/Makefile b/tools/testing/selftests/net/forwarding/Makefile index df593b7b3e6b..452693514be4 100644 --- a/tools/testing/selftests/net/forwarding/Makefile +++ b/tools/testing/selftests/net/forwarding/Makefile @@ -17,6 +17,7 @@ TEST_PROGS = bridge_fdb_learning_limit.sh \ dual_vxlan_bridge.sh \ ethtool_extended_state.sh \ ethtool_mm.sh \ + ethtool_rmon.sh \ ethtool.sh \ gre_custom_multipath_hash.sh \ gre_inner_v4_multipath.sh \ diff --git a/tools/testing/selftests/net/forwarding/ethtool_rmon.sh b/tools/testing/selftests/net/forwarding/ethtool_rmon.sh new file mode 100755 index 000000000000..41a34a61f763 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/ethtool_rmon.sh @@ -0,0 +1,143 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +ALL_TESTS=" + rmon_rx_histogram + rmon_tx_histogram +" + +NUM_NETIFS=2 +source lib.sh + +ETH_FCS_LEN=4 +ETH_HLEN=$((6+6+2)) + +declare -A netif_mtu + +ensure_mtu() +{ + local iface=$1; shift + local len=$1; shift + local current=$(ip -j link show dev $iface | jq -r '.[0].mtu') + local required=$((len - ETH_HLEN - ETH_FCS_LEN)) + + if [ $current -lt $required ]; then + ip link set dev $iface mtu $required || return 1 + fi +} + +bucket_test() +{ + local iface=$1; shift + local neigh=$1; shift + local set=$1; shift + local bucket=$1; shift + local len=$1; shift + local num_rx=10000 + local num_tx=20000 + local expected= + local before= + local after= + local delta= + + # Mausezahn does not include FCS bytes in its length - but the + # histogram counters do + len=$((len - ETH_FCS_LEN)) + + before=$(ethtool --json -S $iface --groups rmon | \ + jq -r ".[0].rmon[\"${set}-pktsNtoM\"][$bucket].val") + + # Send 10k one way and 20k in the other, to detect counters + # mapped to the wrong direction + $MZ $neigh -q -c $num_rx -p $len -a own -b bcast -d 10us + $MZ $iface -q -c $num_tx -p $len -a own -b bcast -d 10us + + after=$(ethtool --json -S $iface --groups rmon | \ + jq -r ".[0].rmon[\"${set}-pktsNtoM\"][$bucket].val") + + delta=$((after - before)) + + expected=$([ $set = rx ] && echo $num_rx || echo $num_tx) + + # Allow some extra tolerance for other packets sent by the stack + [ $delta -ge $expected ] && [ $delta -le $((expected + 100)) ] +} + +rmon_histogram() +{ + local iface=$1; shift + local neigh=$1; shift + local set=$1; shift + local nbuckets=0 + local step= + + RET=0 + + while read -r -a bucket; do + step="$set-pkts${bucket[0]}to${bucket[1]} on $iface" + + for if in $iface $neigh; do + if ! ensure_mtu $if ${bucket[0]}; then + log_test_skip "$if does not support the required MTU for $step" + return + fi + done + + if ! bucket_test $iface $neigh $set $nbuckets ${bucket[0]}; then + check_err 1 "$step failed" + return 1 + fi + log_test "$step" + nbuckets=$((nbuckets + 1)) + done < <(ethtool --json -S $iface --groups rmon | \ + jq -r ".[0].rmon[\"${set}-pktsNtoM\"][]|[.low, .high]|@tsv" 2>/dev/null) + + if [ $nbuckets -eq 0 ]; then + log_test_skip "$iface does not support $set histogram counters" + return + fi +} + +rmon_rx_histogram() +{ + rmon_histogram $h1 $h2 rx + rmon_histogram $h2 $h1 rx +} + +rmon_tx_histogram() +{ + rmon_histogram $h1 $h2 tx + rmon_histogram $h2 $h1 tx +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + h2=${NETIFS[p2]} + + for iface in $h1 $h2; do + netif_mtu[$iface]=$(ip -j link show dev $iface | jq -r '.[0].mtu') + ip link set dev $iface up + done +} + +cleanup() +{ + pre_cleanup + + for iface in $h2 $h1; do + ip link set dev $iface \ + mtu ${netif_mtu[$iface]} \ + down + done +} + +check_ethtool_counter_group_support +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh index 8f6ca458af9a..e3740163c384 100755 --- a/tools/testing/selftests/net/forwarding/lib.sh +++ b/tools/testing/selftests/net/forwarding/lib.sh @@ -146,6 +146,15 @@ check_ethtool_mm_support() fi } +check_ethtool_counter_group_support() +{ + ethtool --help 2>&1| grep -- '--all-groups' &> /dev/null + if [[ $? -ne 0 ]]; then + echo "SKIP: ethtool too old; it is missing standard counter group support" + exit $ksft_skip + fi +} + check_locked_port_support() { if ! bridge -d link show | grep -q " locked"; then -- cgit v1.2.3 From fddd9e3e4e716e3c484413b95579b40a7b6dbe41 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 6 Dec 2023 17:59:38 -0800 Subject: tools/testing/nvdimm: Add compile-test coverage for ndtest Greg lamented: "Ick, sorry about that, obviously this test isn't actually built by any bots :(" A quick and dirty way to prevent this problem going forward is to always compile ndtest.ko whenever nfit_test is built. While this still does not expose the test code to any of the known build bots, it at least makes it the case that anyone that runs the x86 tests also compiles the powerpc test. I.e. the Intel NVDIMM maintainers are less likely to fall into this hole in the future. Link: http://lore.kernel.org/r/2023112729-aids-drainable-5744@gregkh Cc: Greg KH Cc: Yi Zhang Cc: Dave Jiang Cc: Ira Weiny Signed-off-by: Dan Williams Reviewed-by: Ira Weiny Reviewed-by: Dave Jiang Link: https://lore.kernel.org/r/170191437889.426826.15528612879942432918.stgit@dwillia2-xfh.jf.intel.com Signed-off-by: Greg Kroah-Hartman --- tools/testing/nvdimm/test/Kbuild | 1 + 1 file changed, 1 insertion(+) (limited to 'tools') diff --git a/tools/testing/nvdimm/test/Kbuild b/tools/testing/nvdimm/test/Kbuild index 197bcb2b7f35..003d48f5f24f 100644 --- a/tools/testing/nvdimm/test/Kbuild +++ b/tools/testing/nvdimm/test/Kbuild @@ -7,6 +7,7 @@ obj-m += nfit_test_iomap.o ifeq ($(CONFIG_ACPI_NFIT),m) nfit_test-y := nfit.o + obj-m += ndtest.o else nfit_test-y := ndtest.o endif -- cgit v1.2.3 From 0d83786f5661154d015b498a3d23d4c37e30f6ef Mon Sep 17 00:00:00 2001 From: Hou Tao Date: Fri, 15 Dec 2023 18:07:06 +0800 Subject: selftests/bpf: Add test for abnormal cnt during multi-uprobe attachment If an abnormally huge cnt is used for multi-uprobes attachment, the following warning will be reported: ------------[ cut here ]------------ WARNING: CPU: 7 PID: 406 at mm/util.c:632 kvmalloc_node+0xd9/0xe0 Modules linked in: bpf_testmod(O) CPU: 7 PID: 406 Comm: test_progs Tainted: G ...... 6.7.0-rc3+ #32 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996) ...... RIP: 0010:kvmalloc_node+0xd9/0xe0 ...... Call Trace: ? __warn+0x89/0x150 ? kvmalloc_node+0xd9/0xe0 bpf_uprobe_multi_link_attach+0x14a/0x480 __sys_bpf+0x14a9/0x2bc0 do_syscall_64+0x36/0xb0 entry_SYSCALL_64_after_hwframe+0x6e/0x76 ...... ---[ end trace 0000000000000000 ]--- So add a test to ensure the warning is fixed. Signed-off-by: Hou Tao Signed-off-by: Daniel Borkmann Acked-by: Jiri Olsa Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20231215100708.2265609-4-houtao@huaweicloud.com --- .../selftests/bpf/prog_tests/uprobe_multi_test.c | 32 +++++++++++++++++++++- 1 file changed, 31 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/uprobe_multi_test.c b/tools/testing/selftests/bpf/prog_tests/uprobe_multi_test.c index ece260cf2c0b..07a009f95e85 100644 --- a/tools/testing/selftests/bpf/prog_tests/uprobe_multi_test.c +++ b/tools/testing/selftests/bpf/prog_tests/uprobe_multi_test.c @@ -234,6 +234,34 @@ static void test_attach_api_syms(void) test_attach_api("/proc/self/exe", NULL, &opts); } +static void test_attach_api_fails(void) +{ + LIBBPF_OPTS(bpf_link_create_opts, opts); + const char *path = "/proc/self/exe"; + struct uprobe_multi *skel = NULL; + unsigned long offset = 0; + int link_fd = -1; + + skel = uprobe_multi__open_and_load(); + if (!ASSERT_OK_PTR(skel, "uprobe_multi__open_and_load")) + goto cleanup; + + /* abnormal cnt */ + opts.uprobe_multi.path = path; + opts.uprobe_multi.offsets = &offset; + opts.uprobe_multi.cnt = INT_MAX; + link_fd = bpf_link_create(bpf_program__fd(skel->progs.uprobe), 0, + BPF_TRACE_UPROBE_MULTI, &opts); + if (!ASSERT_ERR(link_fd, "link_fd")) + goto cleanup; + if (!ASSERT_EQ(link_fd, -E2BIG, "big cnt")) + goto cleanup; +cleanup: + if (link_fd >= 0) + close(link_fd); + uprobe_multi__destroy(skel); +} + static void __test_link_api(struct child *child) { int prog_fd, link1_fd = -1, link2_fd = -1, link3_fd = -1, link4_fd = -1; @@ -311,7 +339,7 @@ cleanup: free(offsets); } -void test_link_api(void) +static void test_link_api(void) { struct child *child; @@ -412,4 +440,6 @@ void test_uprobe_multi_test(void) test_bench_attach_uprobe(); if (test__start_subtest("bench_usdt")) test_bench_attach_usdt(); + if (test__start_subtest("attach_api_fails")) + test_attach_api_fails(); } -- cgit v1.2.3 From 00cdcd2900bdb9190d1e75438b39cef74cd99232 Mon Sep 17 00:00:00 2001 From: Hou Tao Date: Fri, 15 Dec 2023 18:07:07 +0800 Subject: selftests/bpf: Don't use libbpf_get_error() in kprobe_multi_test Since libbpf v1.0, libbpf doesn't return error code embedded into the pointer iteself, libbpf_get_error() is deprecated and it is basically the same as using -errno directly. So replace the invocations of libbpf_get_error() by -errno in kprobe_multi_test. For libbpf_get_error() in test_attach_api_fails(), saving -errno before invoking ASSERT_xx() macros just in case that errno is overwritten by these macros. However, the invocation of libbpf_get_error() in get_syms() should be kept intact, because hashmap__new() still returns a pointer with embedded error code. Signed-off-by: Hou Tao Signed-off-by: Daniel Borkmann Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20231215100708.2265609-5-houtao@huaweicloud.com --- .../testing/selftests/bpf/prog_tests/kprobe_multi_test.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c b/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c index 4041cfa670eb..6079611b5df4 100644 --- a/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c +++ b/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c @@ -222,6 +222,7 @@ static void test_attach_api_fails(void) "bpf_fentry_test2", }; __u64 cookies[2]; + int saved_error; addrs[0] = ksym_get_addr("bpf_fentry_test1"); addrs[1] = ksym_get_addr("bpf_fentry_test2"); @@ -238,10 +239,11 @@ static void test_attach_api_fails(void) /* fail_1 - pattern and opts NULL */ link = bpf_program__attach_kprobe_multi_opts(skel->progs.test_kprobe_manual, NULL, NULL); + saved_error = -errno; if (!ASSERT_ERR_PTR(link, "fail_1")) goto cleanup; - if (!ASSERT_EQ(libbpf_get_error(link), -EINVAL, "fail_1_error")) + if (!ASSERT_EQ(saved_error, -EINVAL, "fail_1_error")) goto cleanup; /* fail_2 - both addrs and syms set */ @@ -252,10 +254,11 @@ static void test_attach_api_fails(void) link = bpf_program__attach_kprobe_multi_opts(skel->progs.test_kprobe_manual, NULL, &opts); + saved_error = -errno; if (!ASSERT_ERR_PTR(link, "fail_2")) goto cleanup; - if (!ASSERT_EQ(libbpf_get_error(link), -EINVAL, "fail_2_error")) + if (!ASSERT_EQ(saved_error, -EINVAL, "fail_2_error")) goto cleanup; /* fail_3 - pattern and addrs set */ @@ -266,10 +269,11 @@ static void test_attach_api_fails(void) link = bpf_program__attach_kprobe_multi_opts(skel->progs.test_kprobe_manual, "ksys_*", &opts); + saved_error = -errno; if (!ASSERT_ERR_PTR(link, "fail_3")) goto cleanup; - if (!ASSERT_EQ(libbpf_get_error(link), -EINVAL, "fail_3_error")) + if (!ASSERT_EQ(saved_error, -EINVAL, "fail_3_error")) goto cleanup; /* fail_4 - pattern and cnt set */ @@ -280,10 +284,11 @@ static void test_attach_api_fails(void) link = bpf_program__attach_kprobe_multi_opts(skel->progs.test_kprobe_manual, "ksys_*", &opts); + saved_error = -errno; if (!ASSERT_ERR_PTR(link, "fail_4")) goto cleanup; - if (!ASSERT_EQ(libbpf_get_error(link), -EINVAL, "fail_4_error")) + if (!ASSERT_EQ(saved_error, -EINVAL, "fail_4_error")) goto cleanup; /* fail_5 - pattern and cookies */ @@ -294,10 +299,11 @@ static void test_attach_api_fails(void) link = bpf_program__attach_kprobe_multi_opts(skel->progs.test_kprobe_manual, "ksys_*", &opts); + saved_error = -errno; if (!ASSERT_ERR_PTR(link, "fail_5")) goto cleanup; - if (!ASSERT_EQ(libbpf_get_error(link), -EINVAL, "fail_5_error")) + if (!ASSERT_EQ(saved_error, -EINVAL, "fail_5_error")) goto cleanup; cleanup: -- cgit v1.2.3 From 1467affd16b236fc86e1b8ec5eaa147e104cd2a6 Mon Sep 17 00:00:00 2001 From: Hou Tao Date: Fri, 15 Dec 2023 18:07:08 +0800 Subject: selftests/bpf: Add test for abnormal cnt during multi-kprobe attachment If an abnormally huge cnt is used for multi-kprobes attachment, the following warning will be reported: ------------[ cut here ]------------ WARNING: CPU: 1 PID: 392 at mm/util.c:632 kvmalloc_node+0xd9/0xe0 Modules linked in: bpf_testmod(O) CPU: 1 PID: 392 Comm: test_progs Tainted: G ...... 6.7.0-rc3+ #32 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996) ...... RIP: 0010:kvmalloc_node+0xd9/0xe0 ? __warn+0x89/0x150 ? kvmalloc_node+0xd9/0xe0 bpf_kprobe_multi_link_attach+0x87/0x670 __sys_bpf+0x2a28/0x2bc0 __x64_sys_bpf+0x1a/0x30 do_syscall_64+0x36/0xb0 entry_SYSCALL_64_after_hwframe+0x6e/0x76 RIP: 0033:0x7fbe067f0e0d ...... ---[ end trace 0000000000000000 ]--- So add a test to ensure the warning is fixed. Signed-off-by: Hou Tao Signed-off-by: Daniel Borkmann Acked-by: Jiri Olsa Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20231215100708.2265609-6-houtao@huaweicloud.com --- .../testing/selftests/bpf/prog_tests/kprobe_multi_test.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c b/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c index 6079611b5df4..05000810e28e 100644 --- a/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c +++ b/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c @@ -306,6 +306,21 @@ static void test_attach_api_fails(void) if (!ASSERT_EQ(saved_error, -EINVAL, "fail_5_error")) goto cleanup; + /* fail_6 - abnormal cnt */ + opts.addrs = (const unsigned long *) addrs; + opts.syms = NULL; + opts.cnt = INT_MAX; + opts.cookies = NULL; + + link = bpf_program__attach_kprobe_multi_opts(skel->progs.test_kprobe_manual, + NULL, &opts); + saved_error = -errno; + if (!ASSERT_ERR_PTR(link, "fail_6")) + goto cleanup; + + if (!ASSERT_EQ(saved_error, -E2BIG, "fail_6_error")) + goto cleanup; + cleanup: bpf_link__destroy(link); kprobe_multi__destroy(skel); -- cgit v1.2.3 From ae1914174a63a558113e80d24ccac2773f9f7b2b Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 15 Dec 2023 13:40:57 -0700 Subject: cred: get rid of CONFIG_DEBUG_CREDENTIALS This code is rarely (never?) enabled by distros, and it hasn't caught anything in decades. Let's kill off this legacy debug code. Suggested-by: Linus Torvalds Signed-off-by: Jens Axboe Signed-off-by: Linus Torvalds --- arch/powerpc/configs/skiroot_defconfig | 1 - arch/s390/configs/debug_defconfig | 1 - fs/nfsd/auth.c | 4 - fs/nfsd/nfssvc.c | 1 - fs/nfsd/vfs.c | 9 +- fs/open.c | 3 - include/linux/cred.h | 50 ------- kernel/cred.c | 231 +++--------------------------- kernel/exit.c | 3 - lib/Kconfig.debug | 15 -- net/sunrpc/auth.c | 3 - security/selinux/hooks.c | 6 - tools/objtool/noreturns.h | 1 - tools/testing/selftests/bpf/config.x86_64 | 1 - tools/testing/selftests/hid/config.common | 1 - 15 files changed, 17 insertions(+), 313 deletions(-) (limited to 'tools') diff --git a/arch/powerpc/configs/skiroot_defconfig b/arch/powerpc/configs/skiroot_defconfig index 8d3eacb50d56..9d44e6630908 100644 --- a/arch/powerpc/configs/skiroot_defconfig +++ b/arch/powerpc/configs/skiroot_defconfig @@ -301,7 +301,6 @@ CONFIG_WQ_WATCHDOG=y CONFIG_DEBUG_SG=y CONFIG_DEBUG_NOTIFIERS=y CONFIG_BUG_ON_DATA_CORRUPTION=y -CONFIG_DEBUG_CREDENTIALS=y # CONFIG_FTRACE is not set CONFIG_XMON=y # CONFIG_RUNTIME_TESTING_MENU is not set diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig index 438cd92e6080..dd0608629310 100644 --- a/arch/s390/configs/debug_defconfig +++ b/arch/s390/configs/debug_defconfig @@ -834,7 +834,6 @@ CONFIG_DEBUG_IRQFLAGS=y CONFIG_DEBUG_LIST=y CONFIG_DEBUG_SG=y CONFIG_DEBUG_NOTIFIERS=y -CONFIG_DEBUG_CREDENTIALS=y CONFIG_RCU_TORTURE_TEST=m CONFIG_RCU_REF_SCALE_TEST=m CONFIG_RCU_CPU_STALL_TIMEOUT=300 diff --git a/fs/nfsd/auth.c b/fs/nfsd/auth.c index fdf2aad73470..e6beaaf4f170 100644 --- a/fs/nfsd/auth.c +++ b/fs/nfsd/auth.c @@ -26,8 +26,6 @@ int nfsd_setuser(struct svc_rqst *rqstp, struct svc_export *exp) int i; int flags = nfsexp_flags(rqstp, exp); - validate_process_creds(); - /* discard any old override before preparing the new set */ revert_creds(get_cred(current_real_cred())); new = prepare_creds(); @@ -81,10 +79,8 @@ int nfsd_setuser(struct svc_rqst *rqstp, struct svc_export *exp) else new->cap_effective = cap_raise_nfsd_set(new->cap_effective, new->cap_permitted); - validate_process_creds(); put_cred(override_creds(new)); put_cred(new); - validate_process_creds(); return 0; oom: diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index fe61d9bbcc1f..5014ab87d313 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -955,7 +955,6 @@ nfsd(void *vrqstp) rqstp->rq_server->sv_maxconn = nn->max_connections; svc_recv(rqstp); - validate_process_creds(); } atomic_dec(&nfsdstats.th_cnt); diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index fbbea7498f02..e01e4e2acbd9 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -901,7 +901,6 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type, int host_err; bool retried = false; - validate_process_creds(); /* * If we get here, then the client has already done an "open", * and (hopefully) checked permission - so allow OWNER_OVERRIDE @@ -926,7 +925,6 @@ retry: } err = nfserrno(host_err); } - validate_process_creds(); return err; } @@ -943,12 +941,7 @@ int nfsd_open_verified(struct svc_rqst *rqstp, struct svc_fh *fhp, int may_flags, struct file **filp) { - int err; - - validate_process_creds(); - err = __nfsd_open(rqstp, fhp, S_IFREG, may_flags, filp); - validate_process_creds(); - return err; + return __nfsd_open(rqstp, fhp, S_IFREG, may_flags, filp); } /* diff --git a/fs/open.c b/fs/open.c index 02dc608d40d8..3494a9cd8046 100644 --- a/fs/open.c +++ b/fs/open.c @@ -1088,8 +1088,6 @@ struct file *dentry_open(const struct path *path, int flags, int error; struct file *f; - validate_creds(cred); - /* We must always pass in a valid mount pointer. */ BUG_ON(!path->mnt); @@ -1128,7 +1126,6 @@ struct file *dentry_create(const struct path *path, int flags, umode_t mode, struct file *f; int error; - validate_creds(cred); f = alloc_empty_file(flags, cred); if (IS_ERR(f)) return f; diff --git a/include/linux/cred.h b/include/linux/cred.h index a3383f8efb8f..2976f534a7a3 100644 --- a/include/linux/cred.h +++ b/include/linux/cred.h @@ -110,13 +110,6 @@ static inline int groups_search(const struct group_info *group_info, kgid_t grp) */ struct cred { atomic_long_t usage; -#ifdef CONFIG_DEBUG_CREDENTIALS - atomic_t subscribers; /* number of processes subscribed */ - void *put_addr; - unsigned magic; -#define CRED_MAGIC 0x43736564 -#define CRED_MAGIC_DEAD 0x44656144 -#endif kuid_t uid; /* real UID of the task */ kgid_t gid; /* real GID of the task */ kuid_t suid; /* saved UID of the task */ @@ -172,46 +165,6 @@ extern int cred_fscmp(const struct cred *, const struct cred *); extern void __init cred_init(void); extern int set_cred_ucounts(struct cred *); -/* - * check for validity of credentials - */ -#ifdef CONFIG_DEBUG_CREDENTIALS -extern void __noreturn __invalid_creds(const struct cred *, const char *, unsigned); -extern void __validate_process_creds(struct task_struct *, - const char *, unsigned); - -extern bool creds_are_invalid(const struct cred *cred); - -static inline void __validate_creds(const struct cred *cred, - const char *file, unsigned line) -{ - if (unlikely(creds_are_invalid(cred))) - __invalid_creds(cred, file, line); -} - -#define validate_creds(cred) \ -do { \ - __validate_creds((cred), __FILE__, __LINE__); \ -} while(0) - -#define validate_process_creds() \ -do { \ - __validate_process_creds(current, __FILE__, __LINE__); \ -} while(0) - -extern void validate_creds_for_do_exit(struct task_struct *); -#else -static inline void validate_creds(const struct cred *cred) -{ -} -static inline void validate_creds_for_do_exit(struct task_struct *tsk) -{ -} -static inline void validate_process_creds(void) -{ -} -#endif - static inline bool cap_ambient_invariant_ok(const struct cred *cred) { return cap_issubset(cred->cap_ambient, @@ -264,7 +217,6 @@ static inline const struct cred *get_cred_many(const struct cred *cred, int nr) struct cred *nonconst_cred = (struct cred *) cred; if (!cred) return cred; - validate_creds(cred); nonconst_cred->non_rcu = 0; return get_new_cred_many(nonconst_cred, nr); } @@ -290,7 +242,6 @@ static inline const struct cred *get_cred_rcu(const struct cred *cred) return NULL; if (!atomic_long_inc_not_zero(&nonconst_cred->usage)) return NULL; - validate_creds(cred); nonconst_cred->non_rcu = 0; return cred; } @@ -312,7 +263,6 @@ static inline void put_cred_many(const struct cred *_cred, int nr) struct cred *cred = (struct cred *) _cred; if (cred) { - validate_creds(cred); if (atomic_long_sub_and_test(nr, &cred->usage)) __put_cred(cred); } diff --git a/kernel/cred.c b/kernel/cred.c index 4a6cd0f0fef5..c033a201c808 100644 --- a/kernel/cred.c +++ b/kernel/cred.c @@ -43,10 +43,6 @@ static struct group_info init_groups = { .usage = REFCOUNT_INIT(2) }; */ struct cred init_cred = { .usage = ATOMIC_INIT(4), -#ifdef CONFIG_DEBUG_CREDENTIALS - .subscribers = ATOMIC_INIT(2), - .magic = CRED_MAGIC, -#endif .uid = GLOBAL_ROOT_UID, .gid = GLOBAL_ROOT_GID, .suid = GLOBAL_ROOT_UID, @@ -66,31 +62,6 @@ struct cred init_cred = { .ucounts = &init_ucounts, }; -static inline void set_cred_subscribers(struct cred *cred, int n) -{ -#ifdef CONFIG_DEBUG_CREDENTIALS - atomic_set(&cred->subscribers, n); -#endif -} - -static inline int read_cred_subscribers(const struct cred *cred) -{ -#ifdef CONFIG_DEBUG_CREDENTIALS - return atomic_read(&cred->subscribers); -#else - return 0; -#endif -} - -static inline void alter_cred_subscribers(const struct cred *_cred, int n) -{ -#ifdef CONFIG_DEBUG_CREDENTIALS - struct cred *cred = (struct cred *) _cred; - - atomic_add(n, &cred->subscribers); -#endif -} - /* * The RCU callback to actually dispose of a set of credentials */ @@ -100,20 +71,9 @@ static void put_cred_rcu(struct rcu_head *rcu) kdebug("put_cred_rcu(%p)", cred); -#ifdef CONFIG_DEBUG_CREDENTIALS - if (cred->magic != CRED_MAGIC_DEAD || - atomic_long_read(&cred->usage) != 0 || - read_cred_subscribers(cred) != 0) - panic("CRED: put_cred_rcu() sees %p with" - " mag %x, put %p, usage %ld, subscr %d\n", - cred, cred->magic, cred->put_addr, - atomic_long_read(&cred->usage), - read_cred_subscribers(cred)); -#else if (atomic_long_read(&cred->usage) != 0) panic("CRED: put_cred_rcu() sees %p with usage %ld\n", cred, atomic_long_read(&cred->usage)); -#endif security_cred_free(cred); key_put(cred->session_keyring); @@ -137,16 +97,10 @@ static void put_cred_rcu(struct rcu_head *rcu) */ void __put_cred(struct cred *cred) { - kdebug("__put_cred(%p{%ld,%d})", cred, - atomic_long_read(&cred->usage), - read_cred_subscribers(cred)); + kdebug("__put_cred(%p{%ld})", cred, + atomic_long_read(&cred->usage)); BUG_ON(atomic_long_read(&cred->usage) != 0); -#ifdef CONFIG_DEBUG_CREDENTIALS - BUG_ON(read_cred_subscribers(cred) != 0); - cred->magic = CRED_MAGIC_DEAD; - cred->put_addr = __builtin_return_address(0); -#endif BUG_ON(cred == current->cred); BUG_ON(cred == current->real_cred); @@ -164,9 +118,8 @@ void exit_creds(struct task_struct *tsk) { struct cred *real_cred, *cred; - kdebug("exit_creds(%u,%p,%p,{%ld,%d})", tsk->pid, tsk->real_cred, tsk->cred, - atomic_long_read(&tsk->cred->usage), - read_cred_subscribers(tsk->cred)); + kdebug("exit_creds(%u,%p,%p,{%ld})", tsk->pid, tsk->real_cred, tsk->cred, + atomic_long_read(&tsk->cred->usage)); real_cred = (struct cred *) tsk->real_cred; tsk->real_cred = NULL; @@ -174,15 +127,10 @@ void exit_creds(struct task_struct *tsk) cred = (struct cred *) tsk->cred; tsk->cred = NULL; - validate_creds(cred); if (real_cred == cred) { - alter_cred_subscribers(cred, -2); put_cred_many(cred, 2); } else { - validate_creds(real_cred); - alter_cred_subscribers(real_cred, -1); put_cred(real_cred); - alter_cred_subscribers(cred, -1); put_cred(cred); } @@ -231,9 +179,6 @@ struct cred *cred_alloc_blank(void) return NULL; atomic_long_set(&new->usage, 1); -#ifdef CONFIG_DEBUG_CREDENTIALS - new->magic = CRED_MAGIC; -#endif if (security_cred_alloc_blank(new, GFP_KERNEL_ACCOUNT) < 0) goto error; @@ -264,8 +209,6 @@ struct cred *prepare_creds(void) const struct cred *old; struct cred *new; - validate_process_creds(); - new = kmem_cache_alloc(cred_jar, GFP_KERNEL); if (!new) return NULL; @@ -277,7 +220,6 @@ struct cred *prepare_creds(void) new->non_rcu = 0; atomic_long_set(&new->usage, 1); - set_cred_subscribers(new, 0); get_group_info(new->group_info); get_uid(new->user); get_user_ns(new->user_ns); @@ -300,7 +242,6 @@ struct cred *prepare_creds(void) if (security_prepare_creds(new, old, GFP_KERNEL_ACCOUNT) < 0) goto error; - validate_creds(new); return new; error: @@ -362,10 +303,8 @@ int copy_creds(struct task_struct *p, unsigned long clone_flags) clone_flags & CLONE_THREAD ) { p->real_cred = get_cred_many(p->cred, 2); - alter_cred_subscribers(p->cred, 2); - kdebug("share_creds(%p{%ld,%d})", - p->cred, atomic_long_read(&p->cred->usage), - read_cred_subscribers(p->cred)); + kdebug("share_creds(%p{%ld})", + p->cred, atomic_long_read(&p->cred->usage)); inc_rlimit_ucounts(task_ucounts(p), UCOUNT_RLIMIT_NPROC, 1); return 0; } @@ -404,8 +343,6 @@ int copy_creds(struct task_struct *p, unsigned long clone_flags) p->cred = p->real_cred = get_cred(new); inc_rlimit_ucounts(task_ucounts(p), UCOUNT_RLIMIT_NPROC, 1); - alter_cred_subscribers(new, 2); - validate_creds(new); return 0; error_put: @@ -457,16 +394,10 @@ int commit_creds(struct cred *new) struct task_struct *task = current; const struct cred *old = task->real_cred; - kdebug("commit_creds(%p{%ld,%d})", new, - atomic_long_read(&new->usage), - read_cred_subscribers(new)); + kdebug("commit_creds(%p{%ld})", new, + atomic_long_read(&new->usage)); BUG_ON(task->cred != old); -#ifdef CONFIG_DEBUG_CREDENTIALS - BUG_ON(read_cred_subscribers(old) < 2); - validate_creds(old); - validate_creds(new); -#endif BUG_ON(atomic_long_read(&new->usage) < 1); get_cred(new); /* we will require a ref for the subj creds too */ @@ -502,14 +433,12 @@ int commit_creds(struct cred *new) * RLIMIT_NPROC limits on user->processes have already been checked * in set_user(). */ - alter_cred_subscribers(new, 2); if (new->user != old->user || new->user_ns != old->user_ns) inc_rlimit_ucounts(new->ucounts, UCOUNT_RLIMIT_NPROC, 1); rcu_assign_pointer(task->real_cred, new); rcu_assign_pointer(task->cred, new); if (new->user != old->user || new->user_ns != old->user_ns) dec_rlimit_ucounts(old->ucounts, UCOUNT_RLIMIT_NPROC, 1); - alter_cred_subscribers(old, -2); /* send notifications */ if (!uid_eq(new->uid, old->uid) || @@ -539,13 +468,9 @@ EXPORT_SYMBOL(commit_creds); */ void abort_creds(struct cred *new) { - kdebug("abort_creds(%p{%ld,%d})", new, - atomic_long_read(&new->usage), - read_cred_subscribers(new)); + kdebug("abort_creds(%p{%ld})", new, + atomic_long_read(&new->usage)); -#ifdef CONFIG_DEBUG_CREDENTIALS - BUG_ON(read_cred_subscribers(new) != 0); -#endif BUG_ON(atomic_long_read(&new->usage) < 1); put_cred(new); } @@ -562,12 +487,8 @@ const struct cred *override_creds(const struct cred *new) { const struct cred *old = current->cred; - kdebug("override_creds(%p{%ld,%d})", new, - atomic_long_read(&new->usage), - read_cred_subscribers(new)); - - validate_creds(old); - validate_creds(new); + kdebug("override_creds(%p{%ld})", new, + atomic_long_read(&new->usage)); /* * NOTE! This uses 'get_new_cred()' rather than 'get_cred()'. @@ -576,18 +497,12 @@ const struct cred *override_creds(const struct cred *new) * we are only installing the cred into the thread-synchronous * '->cred' pointer, not the '->real_cred' pointer that is * visible to other threads under RCU. - * - * Also note that we did validate_creds() manually, not depending - * on the validation in 'get_cred()'. */ get_new_cred((struct cred *)new); - alter_cred_subscribers(new, 1); rcu_assign_pointer(current->cred, new); - alter_cred_subscribers(old, -1); - kdebug("override_creds() = %p{%ld,%d}", old, - atomic_long_read(&old->usage), - read_cred_subscribers(old)); + kdebug("override_creds() = %p{%ld}", old, + atomic_long_read(&old->usage)); return old; } EXPORT_SYMBOL(override_creds); @@ -603,15 +518,10 @@ void revert_creds(const struct cred *old) { const struct cred *override = current->cred; - kdebug("revert_creds(%p{%ld,%d})", old, - atomic_long_read(&old->usage), - read_cred_subscribers(old)); + kdebug("revert_creds(%p{%ld})", old, + atomic_long_read(&old->usage)); - validate_creds(old); - validate_creds(override); - alter_cred_subscribers(old, 1); rcu_assign_pointer(current->cred, old); - alter_cred_subscribers(override, -1); put_cred(override); } EXPORT_SYMBOL(revert_creds); @@ -731,12 +641,10 @@ struct cred *prepare_kernel_cred(struct task_struct *daemon) kdebug("prepare_kernel_cred() alloc %p", new); old = get_task_cred(daemon); - validate_creds(old); *new = *old; new->non_rcu = 0; atomic_long_set(&new->usage, 1); - set_cred_subscribers(new, 0); get_uid(new->user); get_user_ns(new->user_ns); get_group_info(new->group_info); @@ -760,7 +668,6 @@ struct cred *prepare_kernel_cred(struct task_struct *daemon) goto error; put_cred(old); - validate_creds(new); return new; error: @@ -825,109 +732,3 @@ int set_create_files_as(struct cred *new, struct inode *inode) return security_kernel_create_files_as(new, inode); } EXPORT_SYMBOL(set_create_files_as); - -#ifdef CONFIG_DEBUG_CREDENTIALS - -bool creds_are_invalid(const struct cred *cred) -{ - if (cred->magic != CRED_MAGIC) - return true; - return false; -} -EXPORT_SYMBOL(creds_are_invalid); - -/* - * dump invalid credentials - */ -static void dump_invalid_creds(const struct cred *cred, const char *label, - const struct task_struct *tsk) -{ - pr_err("%s credentials: %p %s%s%s\n", - label, cred, - cred == &init_cred ? "[init]" : "", - cred == tsk->real_cred ? "[real]" : "", - cred == tsk->cred ? "[eff]" : ""); - pr_err("->magic=%x, put_addr=%p\n", - cred->magic, cred->put_addr); - pr_err("->usage=%ld, subscr=%d\n", - atomic_long_read(&cred->usage), - read_cred_subscribers(cred)); - pr_err("->*uid = { %d,%d,%d,%d }\n", - from_kuid_munged(&init_user_ns, cred->uid), - from_kuid_munged(&init_user_ns, cred->euid), - from_kuid_munged(&init_user_ns, cred->suid), - from_kuid_munged(&init_user_ns, cred->fsuid)); - pr_err("->*gid = { %d,%d,%d,%d }\n", - from_kgid_munged(&init_user_ns, cred->gid), - from_kgid_munged(&init_user_ns, cred->egid), - from_kgid_munged(&init_user_ns, cred->sgid), - from_kgid_munged(&init_user_ns, cred->fsgid)); -#ifdef CONFIG_SECURITY - pr_err("->security is %p\n", cred->security); - if ((unsigned long) cred->security >= PAGE_SIZE && - (((unsigned long) cred->security & 0xffffff00) != - (POISON_FREE << 24 | POISON_FREE << 16 | POISON_FREE << 8))) - pr_err("->security {%x, %x}\n", - ((u32*)cred->security)[0], - ((u32*)cred->security)[1]); -#endif -} - -/* - * report use of invalid credentials - */ -void __noreturn __invalid_creds(const struct cred *cred, const char *file, unsigned line) -{ - pr_err("Invalid credentials\n"); - pr_err("At %s:%u\n", file, line); - dump_invalid_creds(cred, "Specified", current); - BUG(); -} -EXPORT_SYMBOL(__invalid_creds); - -/* - * check the credentials on a process - */ -void __validate_process_creds(struct task_struct *tsk, - const char *file, unsigned line) -{ - if (tsk->cred == tsk->real_cred) { - if (unlikely(read_cred_subscribers(tsk->cred) < 2 || - creds_are_invalid(tsk->cred))) - goto invalid_creds; - } else { - if (unlikely(read_cred_subscribers(tsk->real_cred) < 1 || - read_cred_subscribers(tsk->cred) < 1 || - creds_are_invalid(tsk->real_cred) || - creds_are_invalid(tsk->cred))) - goto invalid_creds; - } - return; - -invalid_creds: - pr_err("Invalid process credentials\n"); - pr_err("At %s:%u\n", file, line); - - dump_invalid_creds(tsk->real_cred, "Real", tsk); - if (tsk->cred != tsk->real_cred) - dump_invalid_creds(tsk->cred, "Effective", tsk); - else - pr_err("Effective creds == Real creds\n"); - BUG(); -} -EXPORT_SYMBOL(__validate_process_creds); - -/* - * check creds for do_exit() - */ -void validate_creds_for_do_exit(struct task_struct *tsk) -{ - kdebug("validate_creds_for_do_exit(%p,%p{%ld,%d})", - tsk->real_cred, tsk->cred, - atomic_long_read(&tsk->cred->usage), - read_cred_subscribers(tsk->cred)); - - __validate_process_creds(tsk, __FILE__, __LINE__); -} - -#endif /* CONFIG_DEBUG_CREDENTIALS */ diff --git a/kernel/exit.c b/kernel/exit.c index ee9f43bed49a..aedc0832c9f4 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -824,8 +824,6 @@ void __noreturn do_exit(long code) ptrace_event(PTRACE_EVENT_EXIT, code); user_events_exit(tsk); - validate_creds_for_do_exit(tsk); - io_uring_files_cancel(); exit_signals(tsk); /* sets PF_EXITING */ @@ -909,7 +907,6 @@ void __noreturn do_exit(long code) if (tsk->task_frag.page) put_page(tsk->task_frag.page); - validate_creds_for_do_exit(tsk); exit_task_stack_account(tsk); check_stack_usage(); diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index cc7d53d9dc01..4405f81248fb 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1739,21 +1739,6 @@ config DEBUG_MAPLE_TREE endmenu -config DEBUG_CREDENTIALS - bool "Debug credential management" - depends on DEBUG_KERNEL - help - Enable this to turn on some debug checking for credential - management. The additional code keeps track of the number of - pointers from task_structs to any given cred struct, and checks to - see that this number never exceeds the usage count of the cred - struct. - - Furthermore, if SELinux is enabled, this also checks that the - security pointer in the cred struct is never seen to be invalid. - - If unsure, say N. - source "kernel/rcu/Kconfig.debug" config DEBUG_WQ_FORCE_RR_CPU diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c index 7bfe7d9a32aa..04534ea537c8 100644 --- a/net/sunrpc/auth.c +++ b/net/sunrpc/auth.c @@ -40,9 +40,6 @@ static unsigned long number_cred_unused; static struct cred machine_cred = { .usage = ATOMIC_INIT(1), -#ifdef CONFIG_DEBUG_CREDENTIALS - .magic = CRED_MAGIC, -#endif }; /* diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index feda711c6b7b..340b2bbbb2dd 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -1660,8 +1660,6 @@ static int inode_has_perm(const struct cred *cred, struct inode_security_struct *isec; u32 sid; - validate_creds(cred); - if (unlikely(IS_PRIVATE(inode))) return 0; @@ -3056,8 +3054,6 @@ static int selinux_inode_follow_link(struct dentry *dentry, struct inode *inode, struct inode_security_struct *isec; u32 sid; - validate_creds(cred); - ad.type = LSM_AUDIT_DATA_DENTRY; ad.u.dentry = dentry; sid = cred_sid(cred); @@ -3101,8 +3097,6 @@ static int selinux_inode_permission(struct inode *inode, int mask) if (!mask) return 0; - validate_creds(cred); - if (unlikely(IS_PRIVATE(inode))) return 0; diff --git a/tools/objtool/noreturns.h b/tools/objtool/noreturns.h index 649ebdef9c3f..1685d7ea6a9f 100644 --- a/tools/objtool/noreturns.h +++ b/tools/objtool/noreturns.h @@ -6,7 +6,6 @@ * * Yes, this is unfortunate. A better solution is in the works. */ -NORETURN(__invalid_creds) NORETURN(__kunit_abort) NORETURN(__module_put_and_kthread_exit) NORETURN(__reiserfs_panic) diff --git a/tools/testing/selftests/bpf/config.x86_64 b/tools/testing/selftests/bpf/config.x86_64 index 2e70a6048278..49a29dbc1910 100644 --- a/tools/testing/selftests/bpf/config.x86_64 +++ b/tools/testing/selftests/bpf/config.x86_64 @@ -50,7 +50,6 @@ CONFIG_CRYPTO_SEQIV=y CONFIG_CRYPTO_XXHASH=y CONFIG_DCB=y CONFIG_DEBUG_ATOMIC_SLEEP=y -CONFIG_DEBUG_CREDENTIALS=y CONFIG_DEBUG_INFO_BTF=y CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y CONFIG_DEBUG_MEMORY_INIT=y diff --git a/tools/testing/selftests/hid/config.common b/tools/testing/selftests/hid/config.common index 0617275d93cc..0f456dbab62f 100644 --- a/tools/testing/selftests/hid/config.common +++ b/tools/testing/selftests/hid/config.common @@ -46,7 +46,6 @@ CONFIG_CRYPTO_SEQIV=y CONFIG_CRYPTO_XXHASH=y CONFIG_DCB=y CONFIG_DEBUG_ATOMIC_SLEEP=y -CONFIG_DEBUG_CREDENTIALS=y CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y CONFIG_DEBUG_MEMORY_INIT=y CONFIG_DEFAULT_FQ_CODEL=y -- cgit v1.2.3 From 42d45c45624a098a9fdc477c7a8b86167f948c77 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Fri, 15 Dec 2023 16:28:25 -0800 Subject: selftests/bpf: Temporarily disable dummy_struct_ops test on s390 Temporarily disable dummy_struct_ops test on s390. The breakage is likely due to commit 2cd3e3772e41 ("x86/cfi,bpf: Fix bpf_struct_ops CFI"). Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/DENYLIST.s390x | 2 ++ 1 file changed, 2 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/DENYLIST.s390x b/tools/testing/selftests/bpf/DENYLIST.s390x index 1a63996c0304..d27aa42d11a4 100644 --- a/tools/testing/selftests/bpf/DENYLIST.s390x +++ b/tools/testing/selftests/bpf/DENYLIST.s390x @@ -1,5 +1,7 @@ # TEMPORARY # Alphabetical order +dummy_st_ops/dummy_init_ret_value +dummy_st_ops/dummy_init_ptr_arg exceptions # JIT does not support calling kfunc bpf_throw (exceptions) get_stack_raw_tp # user_stack corrupted user stack (no backchain userspace) stacktrace_build_id # compare_map_keys stackid_hmap vs. stackmap err -2 errno 2 (?) -- cgit v1.2.3 From cfbab37b3da094579b8f7492e4df8a8a4c8c41b0 Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Fri, 15 Dec 2023 02:36:15 +0000 Subject: selftests/net: Add TCP-AO library Provide functions to create selftests dedicated to TCP-AO. They can run in parallel, as they use temporary net namespaces. They can be very specific to the feature being tested. This will allow to create a lot of TCP-AO tests, without complicating one binary with many --options and to create scenarios, that are hard to put in bash script that uses one binary. Signed-off-by: Dmitry Safonov Signed-off-by: David S. Miller --- tools/testing/selftests/Makefile | 1 + tools/testing/selftests/net/tcp_ao/.gitignore | 2 + tools/testing/selftests/net/tcp_ao/Makefile | 45 ++ tools/testing/selftests/net/tcp_ao/connect.c | 90 ++++ tools/testing/selftests/net/tcp_ao/lib/aolib.h | 605 +++++++++++++++++++++++ tools/testing/selftests/net/tcp_ao/lib/kconfig.c | 148 ++++++ tools/testing/selftests/net/tcp_ao/lib/netlink.c | 415 ++++++++++++++++ tools/testing/selftests/net/tcp_ao/lib/proc.c | 273 ++++++++++ tools/testing/selftests/net/tcp_ao/lib/repair.c | 254 ++++++++++ tools/testing/selftests/net/tcp_ao/lib/setup.c | 342 +++++++++++++ tools/testing/selftests/net/tcp_ao/lib/sock.c | 592 ++++++++++++++++++++++ tools/testing/selftests/net/tcp_ao/lib/utils.c | 30 ++ 12 files changed, 2797 insertions(+) create mode 100644 tools/testing/selftests/net/tcp_ao/.gitignore create mode 100644 tools/testing/selftests/net/tcp_ao/Makefile create mode 100644 tools/testing/selftests/net/tcp_ao/connect.c create mode 100644 tools/testing/selftests/net/tcp_ao/lib/aolib.h create mode 100644 tools/testing/selftests/net/tcp_ao/lib/kconfig.c create mode 100644 tools/testing/selftests/net/tcp_ao/lib/netlink.c create mode 100644 tools/testing/selftests/net/tcp_ao/lib/proc.c create mode 100644 tools/testing/selftests/net/tcp_ao/lib/repair.c create mode 100644 tools/testing/selftests/net/tcp_ao/lib/setup.c create mode 100644 tools/testing/selftests/net/tcp_ao/lib/sock.c create mode 100644 tools/testing/selftests/net/tcp_ao/lib/utils.c (limited to 'tools') diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile index 3b2061d1c1a5..f0c854d6511c 100644 --- a/tools/testing/selftests/Makefile +++ b/tools/testing/selftests/Makefile @@ -58,6 +58,7 @@ TARGETS += net/forwarding TARGETS += net/hsr TARGETS += net/mptcp TARGETS += net/openvswitch +TARGETS += net/tcp_ao TARGETS += netfilter TARGETS += nsfs TARGETS += perf_events diff --git a/tools/testing/selftests/net/tcp_ao/.gitignore b/tools/testing/selftests/net/tcp_ao/.gitignore new file mode 100644 index 000000000000..e8bb81b715b7 --- /dev/null +++ b/tools/testing/selftests/net/tcp_ao/.gitignore @@ -0,0 +1,2 @@ +*_ipv4 +*_ipv6 diff --git a/tools/testing/selftests/net/tcp_ao/Makefile b/tools/testing/selftests/net/tcp_ao/Makefile new file mode 100644 index 000000000000..62425b9fb73c --- /dev/null +++ b/tools/testing/selftests/net/tcp_ao/Makefile @@ -0,0 +1,45 @@ +# SPDX-License-Identifier: GPL-2.0 +TEST_BOTH_AF := connect + +TEST_IPV4_PROGS := $(TEST_BOTH_AF:%=%_ipv4) +TEST_IPV6_PROGS := $(TEST_BOTH_AF:%=%_ipv6) + +TEST_GEN_PROGS := $(TEST_IPV4_PROGS) $(TEST_IPV6_PROGS) + +top_srcdir := ../../../../.. +KSFT_KHDR_INSTALL := 1 +include ../../lib.mk + +HOSTAR ?= ar + +# Drop it on port to linux/master with commit 8ce72dc32578 +.DEFAULT_GOAL := all + +LIBDIR := $(OUTPUT)/lib +LIB := $(LIBDIR)/libaotst.a +LDLIBS += $(LIB) -pthread +LIBDEPS := lib/aolib.h Makefile + +CFLAGS := -Wall -O2 -g -D_GNU_SOURCE -fno-strict-aliasing +CFLAGS += -I ../../../../../usr/include/ -iquote $(LIBDIR) +CFLAGS += -I ../../../../include/ + +# Library +LIBSRC := kconfig.c netlink.c proc.c repair.c setup.c sock.c utils.c +LIBOBJ := $(LIBSRC:%.c=$(LIBDIR)/%.o) +EXTRA_CLEAN += $(LIBOBJ) $(LIB) + +$(LIB): $(LIBOBJ) + $(HOSTAR) rcs $@ $^ + +$(LIBDIR)/%.o: ./lib/%.c $(LIBDEPS) + $(CC) $< $(CFLAGS) $(CPPFLAGS) -o $@ -c + +$(TEST_GEN_PROGS): $(LIB) + +$(OUTPUT)/%_ipv4: %.c + $(LINK.c) $^ $(LDLIBS) -o $@ + +$(OUTPUT)/%_ipv6: %.c + $(LINK.c) -DIPV6_TEST $^ $(LDLIBS) -o $@ + diff --git a/tools/testing/selftests/net/tcp_ao/connect.c b/tools/testing/selftests/net/tcp_ao/connect.c new file mode 100644 index 000000000000..81653b47f303 --- /dev/null +++ b/tools/testing/selftests/net/tcp_ao/connect.c @@ -0,0 +1,90 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Author: Dmitry Safonov */ +#include +#include "aolib.h" + +static void *server_fn(void *arg) +{ + int sk, lsk; + ssize_t bytes; + + lsk = test_listen_socket(this_ip_addr, test_server_port, 1); + + if (test_add_key(lsk, DEFAULT_TEST_PASSWORD, this_ip_dest, -1, 100, 100)) + test_error("setsockopt(TCP_AO_ADD_KEY)"); + synchronize_threads(); + + if (test_wait_fd(lsk, TEST_TIMEOUT_SEC, 0)) + test_error("test_wait_fd()"); + + sk = accept(lsk, NULL, NULL); + if (sk < 0) + test_error("accept()"); + + synchronize_threads(); + + bytes = test_server_run(sk, 0, 0); + + test_fail("server served: %zd", bytes); + return NULL; +} + +static void *client_fn(void *arg) +{ + int sk = socket(test_family, SOCK_STREAM, IPPROTO_TCP); + uint64_t before_aogood, after_aogood; + const size_t nr_packets = 20; + struct netstat *ns_before, *ns_after; + struct tcp_ao_counters ao1, ao2; + + if (sk < 0) + test_error("socket()"); + + if (test_add_key(sk, DEFAULT_TEST_PASSWORD, this_ip_dest, -1, 100, 100)) + test_error("setsockopt(TCP_AO_ADD_KEY)"); + + synchronize_threads(); + if (test_connect_socket(sk, this_ip_dest, test_server_port) <= 0) + test_error("failed to connect()"); + synchronize_threads(); + + ns_before = netstat_read(); + before_aogood = netstat_get(ns_before, "TCPAOGood", NULL); + if (test_get_tcp_ao_counters(sk, &ao1)) + test_error("test_get_tcp_ao_counters()"); + + if (test_client_verify(sk, 100, nr_packets, TEST_TIMEOUT_SEC)) { + test_fail("verify failed"); + return NULL; + } + + ns_after = netstat_read(); + after_aogood = netstat_get(ns_after, "TCPAOGood", NULL); + if (test_get_tcp_ao_counters(sk, &ao2)) + test_error("test_get_tcp_ao_counters()"); + netstat_print_diff(ns_before, ns_after); + netstat_free(ns_before); + netstat_free(ns_after); + + if (nr_packets > (after_aogood - before_aogood)) { + test_fail("TCPAOGood counter mismatch: %zu > (%zu - %zu)", + nr_packets, after_aogood, before_aogood); + return NULL; + } + if (test_tcp_ao_counters_cmp("connect", &ao1, &ao2, TEST_CNT_GOOD)) + return NULL; + + test_ok("connect TCPAOGood %" PRIu64 "/%" PRIu64 "/%" PRIu64 " => %" PRIu64 "/%" PRIu64 "/%" PRIu64 ", sent %" PRIu64, + before_aogood, ao1.ao_info_pkt_good, + ao1.key_cnts[0].pkt_good, + after_aogood, ao2.ao_info_pkt_good, + ao2.key_cnts[0].pkt_good, + nr_packets); + return NULL; +} + +int main(int argc, char *argv[]) +{ + test_init(1, server_fn, client_fn); + return 0; +} diff --git a/tools/testing/selftests/net/tcp_ao/lib/aolib.h b/tools/testing/selftests/net/tcp_ao/lib/aolib.h new file mode 100644 index 000000000000..fbc7f6111815 --- /dev/null +++ b/tools/testing/selftests/net/tcp_ao/lib/aolib.h @@ -0,0 +1,605 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * TCP-AO selftest library. Provides helpers to unshare network + * namespaces, create veth, assign ip addresses, set routes, + * manipulate socket options, read network counter and etc. + * Author: Dmitry Safonov + */ +#ifndef _AOLIB_H_ +#define _AOLIB_H_ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "../../../../../include/linux/stringify.h" +#include "../../../../../include/linux/bits.h" + +#ifndef SOL_TCP +/* can't include as including */ +# define SOL_TCP 6 /* TCP level */ +#endif + +/* Working around ksft, see the comment in lib/setup.c */ +extern void __test_msg(const char *buf); +extern void __test_ok(const char *buf); +extern void __test_fail(const char *buf); +extern void __test_xfail(const char *buf); +extern void __test_error(const char *buf); +extern void __test_skip(const char *buf); + +__attribute__((__format__(__printf__, 2, 3))) +static inline void __test_print(void (*fn)(const char *), const char *fmt, ...) +{ +#define TEST_MSG_BUFFER_SIZE 4096 + char buf[TEST_MSG_BUFFER_SIZE]; + va_list arg; + + va_start(arg, fmt); + vsnprintf(buf, sizeof(buf), fmt, arg); + va_end(arg); + fn(buf); +} + +#define test_print(fmt, ...) \ + __test_print(__test_msg, "%ld[%s:%u] " fmt "\n", \ + syscall(SYS_gettid), \ + __FILE__, __LINE__, ##__VA_ARGS__) + +#define test_ok(fmt, ...) \ + __test_print(__test_ok, fmt "\n", ##__VA_ARGS__) +#define test_skip(fmt, ...) \ + __test_print(__test_skip, fmt "\n", ##__VA_ARGS__) +#define test_xfail(fmt, ...) \ + __test_print(__test_xfail, fmt "\n", ##__VA_ARGS__) + +#define test_fail(fmt, ...) \ +do { \ + if (errno) \ + __test_print(__test_fail, fmt ": %m\n", ##__VA_ARGS__); \ + else \ + __test_print(__test_fail, fmt "\n", ##__VA_ARGS__); \ + test_failed(); \ +} while (0) + +#define KSFT_FAIL 1 +#define test_error(fmt, ...) \ +do { \ + if (errno) \ + __test_print(__test_error, "%ld[%s:%u] " fmt ": %m\n", \ + syscall(SYS_gettid), __FILE__, __LINE__, \ + ##__VA_ARGS__); \ + else \ + __test_print(__test_error, "%ld[%s:%u] " fmt "\n", \ + syscall(SYS_gettid), __FILE__, __LINE__, \ + ##__VA_ARGS__); \ + exit(KSFT_FAIL); \ +} while (0) + +enum test_fault { + FAULT_TIMEOUT = 1, + FAULT_KEYREJECT, + FAULT_PREINSTALL_AO, + FAULT_PREINSTALL_MD5, + FAULT_POSTINSTALL, + FAULT_BUSY, + FAULT_CURRNEXT, + FAULT_FIXME, +}; +typedef enum test_fault fault_t; + +enum test_needs_kconfig { + KCONFIG_NET_NS = 0, /* required */ + KCONFIG_VETH, /* required */ + KCONFIG_TCP_AO, /* required */ + KCONFIG_TCP_MD5, /* optional, for TCP-MD5 features */ + KCONFIG_NET_VRF, /* optional, for L3/VRF testing */ + __KCONFIG_LAST__ +}; +extern bool kernel_config_has(enum test_needs_kconfig k); +extern const char *tests_skip_reason[__KCONFIG_LAST__]; +static inline bool should_skip_test(const char *tst_name, + enum test_needs_kconfig k) +{ + if (kernel_config_has(k)) + return false; + test_skip("%s: %s", tst_name, tests_skip_reason[k]); + return true; +} + +union tcp_addr { + struct in_addr a4; + struct in6_addr a6; +}; + +typedef void *(*thread_fn)(void *); +extern void test_failed(void); +extern void __test_init(unsigned int ntests, int family, unsigned int prefix, + union tcp_addr addr1, union tcp_addr addr2, + thread_fn peer1, thread_fn peer2); + +static inline void test_init2(unsigned int ntests, + thread_fn peer1, thread_fn peer2, + int family, unsigned int prefix, + const char *addr1, const char *addr2) +{ + union tcp_addr taddr1, taddr2; + + if (inet_pton(family, addr1, &taddr1) != 1) + test_error("Can't convert ip address %s", addr1); + if (inet_pton(family, addr2, &taddr2) != 1) + test_error("Can't convert ip address %s", addr2); + + __test_init(ntests, family, prefix, taddr1, taddr2, peer1, peer2); +} +extern void test_add_destructor(void (*d)(void)); + +/* To adjust optmem socket limit, approximately estimate a number, + * that is bigger than sizeof(struct tcp_ao_key). + */ +#define KERNEL_TCP_AO_KEY_SZ_ROUND_UP 300 + +extern void test_set_optmem(size_t value); +extern size_t test_get_optmem(void); + +extern const struct sockaddr_in6 addr_any6; +extern const struct sockaddr_in addr_any4; + +#ifdef IPV6_TEST +# define __TEST_CLIENT_IP(n) ("2001:db8:" __stringify(n) "::1") +# define TEST_CLIENT_IP __TEST_CLIENT_IP(1) +# define TEST_WRONG_IP "2001:db8:253::1" +# define TEST_SERVER_IP "2001:db8:254::1" +# define TEST_NETWORK "2001::" +# define TEST_PREFIX 128 +# define TEST_FAMILY AF_INET6 +# define SOCKADDR_ANY addr_any6 +# define sockaddr_af struct sockaddr_in6 +#else +# define __TEST_CLIENT_IP(n) ("10.0." __stringify(n) ".1") +# define TEST_CLIENT_IP __TEST_CLIENT_IP(1) +# define TEST_WRONG_IP "10.0.253.1" +# define TEST_SERVER_IP "10.0.254.1" +# define TEST_NETWORK "10.0.0.0" +# define TEST_PREFIX 32 +# define TEST_FAMILY AF_INET +# define SOCKADDR_ANY addr_any4 +# define sockaddr_af struct sockaddr_in +#endif + +static inline union tcp_addr gen_tcp_addr(union tcp_addr net, size_t n) +{ + union tcp_addr ret = net; + +#ifdef IPV6_TEST + ret.a6.s6_addr32[3] = htonl(n & (BIT(32) - 1)); + ret.a6.s6_addr32[2] = htonl((n >> 32) & (BIT(32) - 1)); +#else + ret.a4.s_addr = htonl(ntohl(net.a4.s_addr) + n); +#endif + + return ret; +} + +static inline void tcp_addr_to_sockaddr_in(void *dest, + const union tcp_addr *src, + unsigned int port) +{ + sockaddr_af *out = dest; + + memset(out, 0, sizeof(*out)); +#ifdef IPV6_TEST + out->sin6_family = AF_INET6; + out->sin6_port = port; + out->sin6_addr = src->a6; +#else + out->sin_family = AF_INET; + out->sin_port = port; + out->sin_addr = src->a4; +#endif +} + +static inline void test_init(unsigned int ntests, + thread_fn peer1, thread_fn peer2) +{ + test_init2(ntests, peer1, peer2, TEST_FAMILY, TEST_PREFIX, + TEST_SERVER_IP, TEST_CLIENT_IP); +} +extern void synchronize_threads(void); +extern void switch_ns(int fd); + +extern __thread union tcp_addr this_ip_addr; +extern __thread union tcp_addr this_ip_dest; +extern int test_family; + +extern void randomize_buffer(void *buf, size_t buflen); +extern int open_netns(void); +extern int unshare_open_netns(void); +extern const char veth_name[]; +extern int add_veth(const char *name, int nsfda, int nsfdb); +extern int add_vrf(const char *name, uint32_t tabid, int ifindex, int nsfd); +extern int ip_addr_add(const char *intf, int family, + union tcp_addr addr, uint8_t prefix); +extern int ip_route_add(const char *intf, int family, + union tcp_addr src, union tcp_addr dst); +extern int ip_route_add_vrf(const char *intf, int family, + union tcp_addr src, union tcp_addr dst, + uint8_t vrf); +extern int link_set_up(const char *intf); + +extern const unsigned int test_server_port; +extern int test_wait_fd(int sk, time_t sec, bool write); +extern int __test_connect_socket(int sk, const char *device, + void *addr, size_t addr_sz, time_t timeout); +extern int __test_listen_socket(int backlog, void *addr, size_t addr_sz); + +static inline int test_listen_socket(const union tcp_addr taddr, + unsigned int port, int backlog) +{ + sockaddr_af addr; + + tcp_addr_to_sockaddr_in(&addr, &taddr, htons(port)); + return __test_listen_socket(backlog, (void *)&addr, sizeof(addr)); +} + +/* + * In order for selftests to work under CONFIG_CRYPTO_FIPS=y, + * the password should be loger than 14 bytes, see hmac_setkey() + */ +#define TEST_TCP_AO_MINKEYLEN 14 +#define DEFAULT_TEST_PASSWORD "In this hour, I do not believe that any darkness will endure." + +#ifndef DEFAULT_TEST_ALGO +#define DEFAULT_TEST_ALGO "cmac(aes128)" +#endif + +#ifdef IPV6_TEST +#define DEFAULT_TEST_PREFIX 128 +#else +#define DEFAULT_TEST_PREFIX 32 +#endif + +/* + * Timeout on syscalls where failure is not expected. + * You may want to rise it if the test machine is very busy. + */ +#ifndef TEST_TIMEOUT_SEC +#define TEST_TIMEOUT_SEC 5 +#endif + +/* + * Timeout on connect() where a failure is expected. + * If set to 0 - kernel will try to retransmit SYN number of times, set in + * /proc/sys/net/ipv4/tcp_syn_retries + * By default set to 1 to make tests pass faster on non-busy machine. + */ +#ifndef TEST_RETRANSMIT_SEC +#define TEST_RETRANSMIT_SEC 1 +#endif + +static inline int _test_connect_socket(int sk, const union tcp_addr taddr, + unsigned int port, time_t timeout) +{ + sockaddr_af addr; + + tcp_addr_to_sockaddr_in(&addr, &taddr, htons(port)); + return __test_connect_socket(sk, veth_name, + (void *)&addr, sizeof(addr), timeout); +} + +static inline int test_connect_socket(int sk, const union tcp_addr taddr, + unsigned int port) +{ + return _test_connect_socket(sk, taddr, port, TEST_TIMEOUT_SEC); +} + +extern int __test_set_md5(int sk, void *addr, size_t addr_sz, + uint8_t prefix, int vrf, const char *password); +static inline int test_set_md5(int sk, const union tcp_addr in_addr, + uint8_t prefix, int vrf, const char *password) +{ + sockaddr_af addr; + + if (prefix > DEFAULT_TEST_PREFIX) + prefix = DEFAULT_TEST_PREFIX; + + tcp_addr_to_sockaddr_in(&addr, &in_addr, 0); + return __test_set_md5(sk, (void *)&addr, sizeof(addr), + prefix, vrf, password); +} + +extern int test_prepare_key_sockaddr(struct tcp_ao_add *ao, const char *alg, + void *addr, size_t addr_sz, bool set_current, bool set_rnext, + uint8_t prefix, uint8_t vrf, + uint8_t sndid, uint8_t rcvid, uint8_t maclen, + uint8_t keyflags, uint8_t keylen, const char *key); + +static inline int test_prepare_key(struct tcp_ao_add *ao, + const char *alg, union tcp_addr taddr, + bool set_current, bool set_rnext, + uint8_t prefix, uint8_t vrf, + uint8_t sndid, uint8_t rcvid, uint8_t maclen, + uint8_t keyflags, uint8_t keylen, const char *key) +{ + sockaddr_af addr; + + tcp_addr_to_sockaddr_in(&addr, &taddr, 0); + return test_prepare_key_sockaddr(ao, alg, (void *)&addr, sizeof(addr), + set_current, set_rnext, prefix, vrf, sndid, rcvid, + maclen, keyflags, keylen, key); +} + +static inline int test_prepare_def_key(struct tcp_ao_add *ao, + const char *key, uint8_t keyflags, + union tcp_addr in_addr, uint8_t prefix, uint8_t vrf, + uint8_t sndid, uint8_t rcvid) +{ + if (prefix > DEFAULT_TEST_PREFIX) + prefix = DEFAULT_TEST_PREFIX; + + return test_prepare_key(ao, DEFAULT_TEST_ALGO, in_addr, false, false, + prefix, vrf, sndid, rcvid, 0, keyflags, + strlen(key), key); +} + +extern int test_get_one_ao(int sk, struct tcp_ao_getsockopt *out, + void *addr, size_t addr_sz, + uint8_t prefix, uint8_t sndid, uint8_t rcvid); +extern int test_get_ao_info(int sk, struct tcp_ao_info_opt *out); +extern int test_set_ao_info(int sk, struct tcp_ao_info_opt *in); +extern int test_cmp_getsockopt_setsockopt(const struct tcp_ao_add *a, + const struct tcp_ao_getsockopt *b); +extern int test_cmp_getsockopt_setsockopt_ao(const struct tcp_ao_info_opt *a, + const struct tcp_ao_info_opt *b); + +static inline int test_verify_socket_key(int sk, struct tcp_ao_add *key) +{ + struct tcp_ao_getsockopt key2 = {}; + int err; + + err = test_get_one_ao(sk, &key2, &key->addr, sizeof(key->addr), + key->prefix, key->sndid, key->rcvid); + if (err) + return err; + + return test_cmp_getsockopt_setsockopt(key, &key2); +} + +static inline int test_add_key_vrf(int sk, + const char *key, uint8_t keyflags, + union tcp_addr in_addr, uint8_t prefix, + uint8_t vrf, uint8_t sndid, uint8_t rcvid) +{ + struct tcp_ao_add tmp = {}; + int err; + + err = test_prepare_def_key(&tmp, key, keyflags, in_addr, prefix, + vrf, sndid, rcvid); + if (err) + return err; + + err = setsockopt(sk, IPPROTO_TCP, TCP_AO_ADD_KEY, &tmp, sizeof(tmp)); + if (err < 0) + return -errno; + + return test_verify_socket_key(sk, &tmp); +} + +static inline int test_add_key(int sk, const char *key, + union tcp_addr in_addr, uint8_t prefix, + uint8_t sndid, uint8_t rcvid) +{ + return test_add_key_vrf(sk, key, 0, in_addr, prefix, 0, sndid, rcvid); +} + +static inline int test_verify_socket_ao(int sk, struct tcp_ao_info_opt *ao) +{ + struct tcp_ao_info_opt ao2 = {}; + int err; + + err = test_get_ao_info(sk, &ao2); + if (err) + return err; + + return test_cmp_getsockopt_setsockopt_ao(ao, &ao2); +} + +static inline int test_set_ao_flags(int sk, bool ao_required, bool accept_icmps) +{ + struct tcp_ao_info_opt ao = {}; + int err; + + err = test_get_ao_info(sk, &ao); + /* Maybe ao_info wasn't allocated yet */ + if (err && err != -ENOENT) + return err; + + ao.ao_required = !!ao_required; + ao.accept_icmps = !!accept_icmps; + err = test_set_ao_info(sk, &ao); + if (err) + return err; + + return test_verify_socket_ao(sk, &ao); +} + +extern ssize_t test_server_run(int sk, ssize_t quota, time_t timeout_sec); +extern ssize_t test_client_loop(int sk, char *buf, size_t buf_sz, + const size_t msg_len, time_t timeout_sec); +extern int test_client_verify(int sk, const size_t msg_len, const size_t nr, + time_t timeout_sec); + +struct tcp_ao_key_counters { + uint8_t sndid; + uint8_t rcvid; + uint64_t pkt_good; + uint64_t pkt_bad; +}; + +struct tcp_ao_counters { + /* per-netns */ + uint64_t netns_ao_good; + uint64_t netns_ao_bad; + uint64_t netns_ao_key_not_found; + uint64_t netns_ao_required; + uint64_t netns_ao_dropped_icmp; + /* per-socket */ + uint64_t ao_info_pkt_good; + uint64_t ao_info_pkt_bad; + uint64_t ao_info_pkt_key_not_found; + uint64_t ao_info_pkt_ao_required; + uint64_t ao_info_pkt_dropped_icmp; + /* per-key */ + size_t nr_keys; + struct tcp_ao_key_counters *key_cnts; +}; +extern int test_get_tcp_ao_counters(int sk, struct tcp_ao_counters *out); + +#define TEST_CNT_KEY_GOOD BIT(0) +#define TEST_CNT_KEY_BAD BIT(1) +#define TEST_CNT_SOCK_GOOD BIT(2) +#define TEST_CNT_SOCK_BAD BIT(3) +#define TEST_CNT_SOCK_KEY_NOT_FOUND BIT(4) +#define TEST_CNT_SOCK_AO_REQUIRED BIT(5) +#define TEST_CNT_SOCK_DROPPED_ICMP BIT(6) +#define TEST_CNT_NS_GOOD BIT(7) +#define TEST_CNT_NS_BAD BIT(8) +#define TEST_CNT_NS_KEY_NOT_FOUND BIT(9) +#define TEST_CNT_NS_AO_REQUIRED BIT(10) +#define TEST_CNT_NS_DROPPED_ICMP BIT(11) +typedef uint16_t test_cnt; + +#define TEST_CNT_AO_GOOD (TEST_CNT_SOCK_GOOD | TEST_CNT_NS_GOOD) +#define TEST_CNT_AO_BAD (TEST_CNT_SOCK_BAD | TEST_CNT_NS_BAD) +#define TEST_CNT_AO_KEY_NOT_FOUND (TEST_CNT_SOCK_KEY_NOT_FOUND | \ + TEST_CNT_NS_KEY_NOT_FOUND) +#define TEST_CNT_AO_REQUIRED (TEST_CNT_SOCK_AO_REQUIRED | \ + TEST_CNT_NS_AO_REQUIRED) +#define TEST_CNT_AO_DROPPED_ICMP (TEST_CNT_SOCK_DROPPED_ICMP | \ + TEST_CNT_NS_DROPPED_ICMP) +#define TEST_CNT_GOOD (TEST_CNT_KEY_GOOD | TEST_CNT_AO_GOOD) +#define TEST_CNT_BAD (TEST_CNT_KEY_BAD | TEST_CNT_AO_BAD) + +extern int __test_tcp_ao_counters_cmp(const char *tst_name, + struct tcp_ao_counters *before, struct tcp_ao_counters *after, + test_cnt expected); +extern int test_tcp_ao_key_counters_cmp(const char *tst_name, + struct tcp_ao_counters *before, struct tcp_ao_counters *after, + test_cnt expected, int sndid, int rcvid); +extern void test_tcp_ao_counters_free(struct tcp_ao_counters *cnts); +/* + * Frees buffers allocated in test_get_tcp_ao_counters(). + * The function doesn't expect new keys or keys removed between calls + * to test_get_tcp_ao_counters(). Check key counters manually if they + * may change. + */ +static inline int test_tcp_ao_counters_cmp(const char *tst_name, + struct tcp_ao_counters *before, + struct tcp_ao_counters *after, + test_cnt expected) +{ + int ret; + + ret = __test_tcp_ao_counters_cmp(tst_name, before, after, expected); + if (ret) + goto out; + ret = test_tcp_ao_key_counters_cmp(tst_name, before, after, + expected, -1, -1); +out: + test_tcp_ao_counters_free(before); + test_tcp_ao_counters_free(after); + return ret; +} + +struct netstat; +extern struct netstat *netstat_read(void); +extern void netstat_free(struct netstat *ns); +extern void netstat_print_diff(struct netstat *nsa, struct netstat *nsb); +extern uint64_t netstat_get(struct netstat *ns, + const char *name, bool *not_found); + +static inline uint64_t netstat_get_one(const char *name, bool *not_found) +{ + struct netstat *ns = netstat_read(); + uint64_t ret; + + ret = netstat_get(ns, name, not_found); + + netstat_free(ns); + return ret; +} + +struct tcp_sock_queue { + uint32_t seq; + void *buf; +}; + +struct tcp_sock_state { + struct tcp_info info; + struct tcp_repair_window trw; + struct tcp_sock_queue out; + int outq_len; /* output queue size (not sent + not acked) */ + int outq_nsd_len; /* output queue size (not sent only) */ + struct tcp_sock_queue in; + int inq_len; + int mss; + int timestamp; +}; + +extern void __test_sock_checkpoint(int sk, struct tcp_sock_state *state, + void *addr, size_t addr_size); +static inline void test_sock_checkpoint(int sk, struct tcp_sock_state *state, + sockaddr_af *saddr) +{ + __test_sock_checkpoint(sk, state, saddr, sizeof(*saddr)); +} +extern void test_ao_checkpoint(int sk, struct tcp_ao_repair *state); +extern void __test_sock_restore(int sk, const char *device, + struct tcp_sock_state *state, + void *saddr, void *daddr, size_t addr_size); +static inline void test_sock_restore(int sk, struct tcp_sock_state *state, + sockaddr_af *saddr, + const union tcp_addr daddr, + unsigned int dport) +{ + sockaddr_af addr; + + tcp_addr_to_sockaddr_in(&addr, &daddr, htons(dport)); + __test_sock_restore(sk, veth_name, state, saddr, &addr, sizeof(addr)); +} +extern void test_ao_restore(int sk, struct tcp_ao_repair *state); +extern void test_sock_state_free(struct tcp_sock_state *state); +extern void test_enable_repair(int sk); +extern void test_disable_repair(int sk); +extern void test_kill_sk(int sk); +static inline int test_add_repaired_key(int sk, + const char *key, uint8_t keyflags, + union tcp_addr in_addr, uint8_t prefix, + uint8_t sndid, uint8_t rcvid) +{ + struct tcp_ao_add tmp = {}; + int err; + + err = test_prepare_def_key(&tmp, key, keyflags, in_addr, prefix, + 0, sndid, rcvid); + if (err) + return err; + + tmp.set_current = 1; + tmp.set_rnext = 1; + if (setsockopt(sk, IPPROTO_TCP, TCP_AO_ADD_KEY, &tmp, sizeof(tmp)) < 0) + return -errno; + + return test_verify_socket_key(sk, &tmp); +} + +#endif /* _AOLIB_H_ */ diff --git a/tools/testing/selftests/net/tcp_ao/lib/kconfig.c b/tools/testing/selftests/net/tcp_ao/lib/kconfig.c new file mode 100644 index 000000000000..f279ffc3843b --- /dev/null +++ b/tools/testing/selftests/net/tcp_ao/lib/kconfig.c @@ -0,0 +1,148 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Check what features does the kernel support (where the selftest is running). + * Somewhat inspired by CRIU kerndat/kdat kernel features detector. + */ +#include +#include "aolib.h" + +struct kconfig_t { + int _errno; /* the returned error if not supported */ + int (*check_kconfig)(int *error); +}; + +static int has_net_ns(int *err) +{ + if (access("/proc/self/ns/net", F_OK) < 0) { + *err = errno; + if (errno == ENOENT) + return 0; + test_print("Unable to access /proc/self/ns/net: %m"); + return -errno; + } + return *err = errno = 0; +} + +static int has_veth(int *err) +{ + int orig_netns, ns_a, ns_b; + + orig_netns = open_netns(); + ns_a = unshare_open_netns(); + ns_b = unshare_open_netns(); + + *err = add_veth("check_veth", ns_a, ns_b); + + switch_ns(orig_netns); + close(orig_netns); + close(ns_a); + close(ns_b); + return 0; +} + +static int has_tcp_ao(int *err) +{ + struct sockaddr_in addr = { + .sin_family = test_family, + }; + struct tcp_ao_add tmp = {}; + const char *password = DEFAULT_TEST_PASSWORD; + int sk, ret = 0; + + sk = socket(test_family, SOCK_STREAM, IPPROTO_TCP); + if (sk < 0) { + test_print("socket(): %m"); + return -errno; + } + + tmp.sndid = 100; + tmp.rcvid = 100; + tmp.keylen = strlen(password); + memcpy(tmp.key, password, strlen(password)); + strcpy(tmp.alg_name, "hmac(sha1)"); + memcpy(&tmp.addr, &addr, sizeof(addr)); + *err = 0; + if (setsockopt(sk, IPPROTO_TCP, TCP_AO_ADD_KEY, &tmp, sizeof(tmp)) < 0) { + *err = errno; + if (errno != ENOPROTOOPT) + ret = -errno; + } + close(sk); + return ret; +} + +static int has_tcp_md5(int *err) +{ + union tcp_addr addr_any = {}; + int sk, ret = 0; + + sk = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP); + if (sk < 0) { + test_print("socket(): %m"); + return -errno; + } + + /* + * Under CONFIG_CRYPTO_FIPS=y it fails with ENOMEM, rather with + * anything more descriptive. Oh well. + */ + *err = 0; + if (test_set_md5(sk, addr_any, 0, -1, DEFAULT_TEST_PASSWORD)) { + *err = errno; + if (errno != ENOPROTOOPT && errno == ENOMEM) { + test_print("setsockopt(TCP_MD5SIG_EXT): %m"); + ret = -errno; + } + } + close(sk); + return ret; +} + +static int has_vrfs(int *err) +{ + int orig_netns, ns_test, ret = 0; + + orig_netns = open_netns(); + ns_test = unshare_open_netns(); + + *err = add_vrf("ksft-check", 55, 101, ns_test); + if (*err && *err != -EOPNOTSUPP) { + test_print("Failed to add a VRF: %d", *err); + ret = *err; + } + + switch_ns(orig_netns); + close(orig_netns); + close(ns_test); + return ret; +} + +static pthread_mutex_t kconfig_lock = PTHREAD_MUTEX_INITIALIZER; +static struct kconfig_t kconfig[__KCONFIG_LAST__] = { + { -1, has_net_ns }, + { -1, has_veth }, + { -1, has_tcp_ao }, + { -1, has_tcp_md5 }, + { -1, has_vrfs }, +}; + +const char *tests_skip_reason[__KCONFIG_LAST__] = { + "Tests require network namespaces support (CONFIG_NET_NS)", + "Tests require veth support (CONFIG_VETH)", + "Tests require TCP-AO support (CONFIG_TCP_AO)", + "setsockopt(TCP_MD5SIG_EXT) is not supported (CONFIG_TCP_MD5)", + "VRFs are not supported (CONFIG_NET_VRF)", +}; + +bool kernel_config_has(enum test_needs_kconfig k) +{ + bool ret; + + pthread_mutex_lock(&kconfig_lock); + if (kconfig[k]._errno == -1) { + if (kconfig[k].check_kconfig(&kconfig[k]._errno)) + test_error("Failed to initialize kconfig %u", k); + } + ret = kconfig[k]._errno == 0; + pthread_mutex_unlock(&kconfig_lock); + return ret; +} diff --git a/tools/testing/selftests/net/tcp_ao/lib/netlink.c b/tools/testing/selftests/net/tcp_ao/lib/netlink.c new file mode 100644 index 000000000000..b731f2c84083 --- /dev/null +++ b/tools/testing/selftests/net/tcp_ao/lib/netlink.c @@ -0,0 +1,415 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Original from tools/testing/selftests/net/ipsec.c */ +#include +#include +#include +#include +#include +#include +#include +#include + +#include "aolib.h" + +#define MAX_PAYLOAD 2048 + +static int netlink_sock(int *sock, uint32_t *seq_nr, int proto) +{ + if (*sock > 0) { + seq_nr++; + return 0; + } + + *sock = socket(AF_NETLINK, SOCK_RAW | SOCK_CLOEXEC, proto); + if (*sock < 0) { + test_print("socket(AF_NETLINK)"); + return -1; + } + + randomize_buffer(seq_nr, sizeof(*seq_nr)); + + return 0; +} + +static int netlink_check_answer(int sock, bool quite) +{ + struct nlmsgerror { + struct nlmsghdr hdr; + int error; + struct nlmsghdr orig_msg; + } answer; + + if (recv(sock, &answer, sizeof(answer), 0) < 0) { + test_print("recv()"); + return -1; + } else if (answer.hdr.nlmsg_type != NLMSG_ERROR) { + test_print("expected NLMSG_ERROR, got %d", + (int)answer.hdr.nlmsg_type); + return -1; + } else if (answer.error) { + if (!quite) { + test_print("NLMSG_ERROR: %d: %s", + answer.error, strerror(-answer.error)); + } + return answer.error; + } + + return 0; +} + +static inline struct rtattr *rtattr_hdr(struct nlmsghdr *nh) +{ + return (struct rtattr *)((char *)(nh) + RTA_ALIGN((nh)->nlmsg_len)); +} + +static int rtattr_pack(struct nlmsghdr *nh, size_t req_sz, + unsigned short rta_type, const void *payload, size_t size) +{ + /* NLMSG_ALIGNTO == RTA_ALIGNTO, nlmsg_len already aligned */ + struct rtattr *attr = rtattr_hdr(nh); + size_t nl_size = RTA_ALIGN(nh->nlmsg_len) + RTA_LENGTH(size); + + if (req_sz < nl_size) { + test_print("req buf is too small: %zu < %zu", req_sz, nl_size); + return -1; + } + nh->nlmsg_len = nl_size; + + attr->rta_len = RTA_LENGTH(size); + attr->rta_type = rta_type; + memcpy(RTA_DATA(attr), payload, size); + + return 0; +} + +static struct rtattr *_rtattr_begin(struct nlmsghdr *nh, size_t req_sz, + unsigned short rta_type, const void *payload, size_t size) +{ + struct rtattr *ret = rtattr_hdr(nh); + + if (rtattr_pack(nh, req_sz, rta_type, payload, size)) + return 0; + + return ret; +} + +static inline struct rtattr *rtattr_begin(struct nlmsghdr *nh, size_t req_sz, + unsigned short rta_type) +{ + return _rtattr_begin(nh, req_sz, rta_type, 0, 0); +} + +static inline void rtattr_end(struct nlmsghdr *nh, struct rtattr *attr) +{ + char *nlmsg_end = (char *)nh + nh->nlmsg_len; + + attr->rta_len = nlmsg_end - (char *)attr; +} + +static int veth_pack_peerb(struct nlmsghdr *nh, size_t req_sz, + const char *peer, int ns) +{ + struct ifinfomsg pi; + struct rtattr *peer_attr; + + memset(&pi, 0, sizeof(pi)); + pi.ifi_family = AF_UNSPEC; + pi.ifi_change = 0xFFFFFFFF; + + peer_attr = _rtattr_begin(nh, req_sz, VETH_INFO_PEER, &pi, sizeof(pi)); + if (!peer_attr) + return -1; + + if (rtattr_pack(nh, req_sz, IFLA_IFNAME, peer, strlen(peer))) + return -1; + + if (rtattr_pack(nh, req_sz, IFLA_NET_NS_FD, &ns, sizeof(ns))) + return -1; + + rtattr_end(nh, peer_attr); + + return 0; +} + +static int __add_veth(int sock, uint32_t seq, const char *name, + int ns_a, int ns_b) +{ + uint16_t flags = NLM_F_REQUEST | NLM_F_ACK | NLM_F_EXCL | NLM_F_CREATE; + struct { + struct nlmsghdr nh; + struct ifinfomsg info; + char attrbuf[MAX_PAYLOAD]; + } req; + static const char veth_type[] = "veth"; + struct rtattr *link_info, *info_data; + + memset(&req, 0, sizeof(req)); + req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.info)); + req.nh.nlmsg_type = RTM_NEWLINK; + req.nh.nlmsg_flags = flags; + req.nh.nlmsg_seq = seq; + req.info.ifi_family = AF_UNSPEC; + req.info.ifi_change = 0xFFFFFFFF; + + if (rtattr_pack(&req.nh, sizeof(req), IFLA_IFNAME, name, strlen(name))) + return -1; + + if (rtattr_pack(&req.nh, sizeof(req), IFLA_NET_NS_FD, &ns_a, sizeof(ns_a))) + return -1; + + link_info = rtattr_begin(&req.nh, sizeof(req), IFLA_LINKINFO); + if (!link_info) + return -1; + + if (rtattr_pack(&req.nh, sizeof(req), IFLA_INFO_KIND, veth_type, sizeof(veth_type))) + return -1; + + info_data = rtattr_begin(&req.nh, sizeof(req), IFLA_INFO_DATA); + if (!info_data) + return -1; + + if (veth_pack_peerb(&req.nh, sizeof(req), name, ns_b)) + return -1; + + rtattr_end(&req.nh, info_data); + rtattr_end(&req.nh, link_info); + + if (send(sock, &req, req.nh.nlmsg_len, 0) < 0) { + test_print("send()"); + return -1; + } + return netlink_check_answer(sock, false); +} + +int add_veth(const char *name, int nsfda, int nsfdb) +{ + int route_sock = -1, ret; + uint32_t route_seq; + + if (netlink_sock(&route_sock, &route_seq, NETLINK_ROUTE)) + test_error("Failed to open netlink route socket\n"); + + ret = __add_veth(route_sock, route_seq++, name, nsfda, nsfdb); + close(route_sock); + return ret; +} + +static int __ip_addr_add(int sock, uint32_t seq, const char *intf, + int family, union tcp_addr addr, uint8_t prefix) +{ + uint16_t flags = NLM_F_REQUEST | NLM_F_ACK | NLM_F_EXCL | NLM_F_CREATE; + struct { + struct nlmsghdr nh; + struct ifaddrmsg info; + char attrbuf[MAX_PAYLOAD]; + } req; + size_t addr_len = (family == AF_INET) ? sizeof(struct in_addr) : + sizeof(struct in6_addr); + + memset(&req, 0, sizeof(req)); + req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.info)); + req.nh.nlmsg_type = RTM_NEWADDR; + req.nh.nlmsg_flags = flags; + req.nh.nlmsg_seq = seq; + req.info.ifa_family = family; + req.info.ifa_prefixlen = prefix; + req.info.ifa_index = if_nametoindex(intf); + req.info.ifa_flags = IFA_F_NODAD; + + if (rtattr_pack(&req.nh, sizeof(req), IFA_LOCAL, &addr, addr_len)) + return -1; + + if (send(sock, &req, req.nh.nlmsg_len, 0) < 0) { + test_print("send()"); + return -1; + } + return netlink_check_answer(sock, true); +} + +int ip_addr_add(const char *intf, int family, + union tcp_addr addr, uint8_t prefix) +{ + int route_sock = -1, ret; + uint32_t route_seq; + + if (netlink_sock(&route_sock, &route_seq, NETLINK_ROUTE)) + test_error("Failed to open netlink route socket\n"); + + ret = __ip_addr_add(route_sock, route_seq++, intf, + family, addr, prefix); + + close(route_sock); + return ret; +} + +static int __ip_route_add(int sock, uint32_t seq, const char *intf, int family, + union tcp_addr src, union tcp_addr dst, uint8_t vrf) +{ + struct { + struct nlmsghdr nh; + struct rtmsg rt; + char attrbuf[MAX_PAYLOAD]; + } req; + unsigned int index = if_nametoindex(intf); + size_t addr_len = (family == AF_INET) ? sizeof(struct in_addr) : + sizeof(struct in6_addr); + + memset(&req, 0, sizeof(req)); + req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.rt)); + req.nh.nlmsg_type = RTM_NEWROUTE; + req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK | NLM_F_CREATE; + req.nh.nlmsg_seq = seq; + req.rt.rtm_family = family; + req.rt.rtm_dst_len = (family == AF_INET) ? 32 : 128; + req.rt.rtm_table = RT_TABLE_MAIN; + req.rt.rtm_protocol = RTPROT_BOOT; + req.rt.rtm_scope = RT_SCOPE_UNIVERSE; + req.rt.rtm_type = RTN_UNICAST; + + if (rtattr_pack(&req.nh, sizeof(req), RTA_DST, &dst, addr_len)) + return -1; + + if (rtattr_pack(&req.nh, sizeof(req), RTA_PREFSRC, &src, addr_len)) + return -1; + + if (rtattr_pack(&req.nh, sizeof(req), RTA_OIF, &index, sizeof(index))) + return -1; + + if (send(sock, &req, req.nh.nlmsg_len, 0) < 0) { + test_print("send()"); + return -1; + } + + return netlink_check_answer(sock, true); +} + +int ip_route_add_vrf(const char *intf, int family, + union tcp_addr src, union tcp_addr dst, uint8_t vrf) +{ + int route_sock = -1, ret; + uint32_t route_seq; + + if (netlink_sock(&route_sock, &route_seq, NETLINK_ROUTE)) + test_error("Failed to open netlink route socket\n"); + + ret = __ip_route_add(route_sock, route_seq++, intf, + family, src, dst, vrf); + if (ret == -EEXIST) /* ignoring */ + ret = 0; + + close(route_sock); + return ret; +} + +int ip_route_add(const char *intf, int family, + union tcp_addr src, union tcp_addr dst) +{ + return ip_route_add_vrf(intf, family, src, dst, RT_TABLE_MAIN); +} + +static int __link_set_up(int sock, uint32_t seq, const char *intf) +{ + struct { + struct nlmsghdr nh; + struct ifinfomsg info; + char attrbuf[MAX_PAYLOAD]; + } req; + + memset(&req, 0, sizeof(req)); + req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.info)); + req.nh.nlmsg_type = RTM_NEWLINK; + req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK; + req.nh.nlmsg_seq = seq; + req.info.ifi_family = AF_UNSPEC; + req.info.ifi_change = 0xFFFFFFFF; + req.info.ifi_index = if_nametoindex(intf); + req.info.ifi_flags = IFF_UP; + req.info.ifi_change = IFF_UP; + + if (send(sock, &req, req.nh.nlmsg_len, 0) < 0) { + test_print("send()"); + return -1; + } + return netlink_check_answer(sock, false); +} + +int link_set_up(const char *intf) +{ + int route_sock = -1, ret; + uint32_t route_seq; + + if (netlink_sock(&route_sock, &route_seq, NETLINK_ROUTE)) + test_error("Failed to open netlink route socket\n"); + + ret = __link_set_up(route_sock, route_seq++, intf); + + close(route_sock); + return ret; +} + +static int __add_vrf(int sock, uint32_t seq, const char *name, + uint32_t tabid, int ifindex, int nsfd) +{ + uint16_t flags = NLM_F_REQUEST | NLM_F_ACK | NLM_F_EXCL | NLM_F_CREATE; + struct { + struct nlmsghdr nh; + struct ifinfomsg info; + char attrbuf[MAX_PAYLOAD]; + } req; + static const char vrf_type[] = "vrf"; + struct rtattr *link_info, *info_data; + + memset(&req, 0, sizeof(req)); + req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.info)); + req.nh.nlmsg_type = RTM_NEWLINK; + req.nh.nlmsg_flags = flags; + req.nh.nlmsg_seq = seq; + req.info.ifi_family = AF_UNSPEC; + req.info.ifi_change = 0xFFFFFFFF; + req.info.ifi_index = ifindex; + + if (rtattr_pack(&req.nh, sizeof(req), IFLA_IFNAME, name, strlen(name))) + return -1; + + if (nsfd >= 0) + if (rtattr_pack(&req.nh, sizeof(req), IFLA_NET_NS_FD, + &nsfd, sizeof(nsfd))) + return -1; + + link_info = rtattr_begin(&req.nh, sizeof(req), IFLA_LINKINFO); + if (!link_info) + return -1; + + if (rtattr_pack(&req.nh, sizeof(req), IFLA_INFO_KIND, vrf_type, sizeof(vrf_type))) + return -1; + + info_data = rtattr_begin(&req.nh, sizeof(req), IFLA_INFO_DATA); + if (!info_data) + return -1; + + if (rtattr_pack(&req.nh, sizeof(req), IFLA_VRF_TABLE, + &tabid, sizeof(tabid))) + return -1; + + rtattr_end(&req.nh, info_data); + rtattr_end(&req.nh, link_info); + + if (send(sock, &req, req.nh.nlmsg_len, 0) < 0) { + test_print("send()"); + return -1; + } + return netlink_check_answer(sock, true); +} + +int add_vrf(const char *name, uint32_t tabid, int ifindex, int nsfd) +{ + int route_sock = -1, ret; + uint32_t route_seq; + + if (netlink_sock(&route_sock, &route_seq, NETLINK_ROUTE)) + test_error("Failed to open netlink route socket\n"); + + ret = __add_vrf(route_sock, route_seq++, name, tabid, ifindex, nsfd); + close(route_sock); + return ret; +} diff --git a/tools/testing/selftests/net/tcp_ao/lib/proc.c b/tools/testing/selftests/net/tcp_ao/lib/proc.c new file mode 100644 index 000000000000..2322f4d4676d --- /dev/null +++ b/tools/testing/selftests/net/tcp_ao/lib/proc.c @@ -0,0 +1,273 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include +#include "../../../../../include/linux/compiler.h" +#include "../../../../../include/linux/kernel.h" +#include "aolib.h" + +struct netstat_counter { + uint64_t val; + char *name; +}; + +struct netstat { + char *header_name; + struct netstat *next; + size_t counters_nr; + struct netstat_counter *counters; +}; + +static struct netstat *lookup_type(struct netstat *ns, + const char *type, size_t len) +{ + while (ns != NULL) { + size_t cmp = max(len, strlen(ns->header_name)); + + if (!strncmp(ns->header_name, type, cmp)) + return ns; + ns = ns->next; + } + return NULL; +} + +static struct netstat *lookup_get(struct netstat *ns, + const char *type, const size_t len) +{ + struct netstat *ret; + + ret = lookup_type(ns, type, len); + if (ret != NULL) + return ret; + + ret = malloc(sizeof(struct netstat)); + if (!ret) + test_error("malloc()"); + + ret->header_name = strndup(type, len); + if (ret->header_name == NULL) + test_error("strndup()"); + ret->next = ns; + ret->counters_nr = 0; + ret->counters = NULL; + + return ret; +} + +static struct netstat *lookup_get_column(struct netstat *ns, const char *line) +{ + char *column; + + column = strchr(line, ':'); + if (!column) + test_error("can't parse netstat file"); + + return lookup_get(ns, line, column - line); +} + +static void netstat_read_type(FILE *fnetstat, struct netstat **dest, char *line) +{ + struct netstat *type = lookup_get_column(*dest, line); + const char *pos = line; + size_t i, nr_elems = 0; + char tmp; + + while ((pos = strchr(pos, ' '))) { + nr_elems++; + pos++; + } + + *dest = type; + type->counters = reallocarray(type->counters, + type->counters_nr + nr_elems, + sizeof(struct netstat_counter)); + if (!type->counters) + test_error("reallocarray()"); + + pos = strchr(line, ' ') + 1; + + if (fscanf(fnetstat, type->header_name) == EOF) + test_error("fscanf(%s)", type->header_name); + if (fread(&tmp, 1, 1, fnetstat) != 1 || tmp != ':') + test_error("Unexpected netstat format (%c)", tmp); + + for (i = type->counters_nr; i < type->counters_nr + nr_elems; i++) { + struct netstat_counter *nc = &type->counters[i]; + const char *new_pos = strchr(pos, ' '); + const char *fmt = " %" PRIu64; + + if (new_pos == NULL) + new_pos = strchr(pos, '\n'); + + nc->name = strndup(pos, new_pos - pos); + if (nc->name == NULL) + test_error("strndup()"); + + if (unlikely(!strcmp(nc->name, "MaxConn"))) + fmt = " %" PRId64; /* MaxConn is signed, RFC 2012 */ + if (fscanf(fnetstat, fmt, &nc->val) != 1) + test_error("fscanf(%s)", nc->name); + pos = new_pos + 1; + } + type->counters_nr += nr_elems; + + if (fread(&tmp, 1, 1, fnetstat) != 1 || tmp != '\n') + test_error("Unexpected netstat format"); +} + +static const char *snmp6_name = "Snmp6"; +static void snmp6_read(FILE *fnetstat, struct netstat **dest) +{ + struct netstat *type = lookup_get(*dest, snmp6_name, strlen(snmp6_name)); + char *counter_name; + size_t i; + + for (i = type->counters_nr;; i++) { + struct netstat_counter *nc; + uint64_t counter; + + if (fscanf(fnetstat, "%ms", &counter_name) == EOF) + break; + if (fscanf(fnetstat, "%" PRIu64, &counter) == EOF) + test_error("Unexpected snmp6 format"); + type->counters = reallocarray(type->counters, i + 1, + sizeof(struct netstat_counter)); + if (!type->counters) + test_error("reallocarray()"); + nc = &type->counters[i]; + nc->name = counter_name; + nc->val = counter; + } + type->counters_nr = i; + *dest = type; +} + +struct netstat *netstat_read(void) +{ + struct netstat *ret = 0; + size_t line_sz = 0; + char *line = NULL; + FILE *fnetstat; + + /* + * Opening thread-self instead of /proc/net/... as the latter + * points to /proc/self/net/ which instantiates thread-leader's + * net-ns, see: + * commit 155134fef2b6 ("Revert "proc: Point /proc/{mounts,net} at..") + */ + errno = 0; + fnetstat = fopen("/proc/thread-self/net/netstat", "r"); + if (fnetstat == NULL) + test_error("failed to open /proc/net/netstat"); + + while (getline(&line, &line_sz, fnetstat) != -1) + netstat_read_type(fnetstat, &ret, line); + fclose(fnetstat); + + errno = 0; + fnetstat = fopen("/proc/thread-self/net/snmp", "r"); + if (fnetstat == NULL) + test_error("failed to open /proc/net/snmp"); + + while (getline(&line, &line_sz, fnetstat) != -1) + netstat_read_type(fnetstat, &ret, line); + fclose(fnetstat); + + errno = 0; + fnetstat = fopen("/proc/thread-self/net/snmp6", "r"); + if (fnetstat == NULL) + test_error("failed to open /proc/net/snmp6"); + + snmp6_read(fnetstat, &ret); + fclose(fnetstat); + + free(line); + return ret; +} + +void netstat_free(struct netstat *ns) +{ + while (ns != NULL) { + struct netstat *prev = ns; + size_t i; + + free(ns->header_name); + for (i = 0; i < ns->counters_nr; i++) + free(ns->counters[i].name); + free(ns->counters); + ns = ns->next; + free(prev); + } +} + +static inline void +__netstat_print_diff(uint64_t a, struct netstat *nsb, size_t i) +{ + if (unlikely(!strcmp(nsb->header_name, "MaxConn"))) { + test_print("%8s %25s: %" PRId64 " => %" PRId64, + nsb->header_name, nsb->counters[i].name, + a, nsb->counters[i].val); + return; + } + + test_print("%8s %25s: %" PRIu64 " => %" PRIu64, nsb->header_name, + nsb->counters[i].name, a, nsb->counters[i].val); +} + +void netstat_print_diff(struct netstat *nsa, struct netstat *nsb) +{ + size_t i, j; + + while (nsb != NULL) { + if (unlikely(strcmp(nsb->header_name, nsa->header_name))) { + for (i = 0; i < nsb->counters_nr; i++) + __netstat_print_diff(0, nsb, i); + nsb = nsb->next; + continue; + } + + if (nsb->counters_nr < nsa->counters_nr) + test_error("Unexpected: some counters dissapeared!"); + + for (j = 0, i = 0; i < nsb->counters_nr; i++) { + if (strcmp(nsb->counters[i].name, nsa->counters[j].name)) { + __netstat_print_diff(0, nsb, i); + continue; + } + + if (nsa->counters[j].val == nsb->counters[i].val) { + j++; + continue; + } + + __netstat_print_diff(nsa->counters[j].val, nsb, i); + j++; + } + if (j != nsa->counters_nr) + test_error("Unexpected: some counters dissapeared!"); + + nsb = nsb->next; + nsa = nsa->next; + } +} + +uint64_t netstat_get(struct netstat *ns, const char *name, bool *not_found) +{ + if (not_found) + *not_found = false; + + while (ns != NULL) { + size_t i; + + for (i = 0; i < ns->counters_nr; i++) { + if (!strcmp(name, ns->counters[i].name)) + return ns->counters[i].val; + } + + ns = ns->next; + } + + if (not_found) + *not_found = true; + return 0; +} diff --git a/tools/testing/selftests/net/tcp_ao/lib/repair.c b/tools/testing/selftests/net/tcp_ao/lib/repair.c new file mode 100644 index 000000000000..9893b3ba69f5 --- /dev/null +++ b/tools/testing/selftests/net/tcp_ao/lib/repair.c @@ -0,0 +1,254 @@ +// SPDX-License-Identifier: GPL-2.0 +/* This is over-simplified TCP_REPAIR for TCP_ESTABLISHED sockets + * It tests that TCP-AO enabled connection can be restored. + * For the proper socket repair see: + * https://github.com/checkpoint-restore/criu/blob/criu-dev/soccr/soccr.h + */ +#include +#include +#include +#include "aolib.h" + +#ifndef TCPOPT_MAXSEG +# define TCPOPT_MAXSEG 2 +#endif +#ifndef TCPOPT_WINDOW +# define TCPOPT_WINDOW 3 +#endif +#ifndef TCPOPT_SACK_PERMITTED +# define TCPOPT_SACK_PERMITTED 4 +#endif +#ifndef TCPOPT_TIMESTAMP +# define TCPOPT_TIMESTAMP 8 +#endif + +enum { + TCP_ESTABLISHED = 1, + TCP_SYN_SENT, + TCP_SYN_RECV, + TCP_FIN_WAIT1, + TCP_FIN_WAIT2, + TCP_TIME_WAIT, + TCP_CLOSE, + TCP_CLOSE_WAIT, + TCP_LAST_ACK, + TCP_LISTEN, + TCP_CLOSING, /* Now a valid state */ + TCP_NEW_SYN_RECV, + + TCP_MAX_STATES /* Leave at the end! */ +}; + +static void test_sock_checkpoint_queue(int sk, int queue, int qlen, + struct tcp_sock_queue *q) +{ + socklen_t len; + int ret; + + if (setsockopt(sk, SOL_TCP, TCP_REPAIR_QUEUE, &queue, sizeof(queue))) + test_error("setsockopt(TCP_REPAIR_QUEUE)"); + + len = sizeof(q->seq); + ret = getsockopt(sk, SOL_TCP, TCP_QUEUE_SEQ, &q->seq, &len); + if (ret || len != sizeof(q->seq)) + test_error("getsockopt(TCP_QUEUE_SEQ): %d", (int)len); + + if (!qlen) { + q->buf = NULL; + return; + } + + q->buf = malloc(qlen); + if (q->buf == NULL) + test_error("malloc()"); + ret = recv(sk, q->buf, qlen, MSG_PEEK | MSG_DONTWAIT); + if (ret != qlen) + test_error("recv(%d): %d", qlen, ret); +} + +void __test_sock_checkpoint(int sk, struct tcp_sock_state *state, + void *addr, size_t addr_size) +{ + socklen_t len = sizeof(state->info); + int ret; + + memset(state, 0, sizeof(*state)); + + ret = getsockopt(sk, SOL_TCP, TCP_INFO, &state->info, &len); + if (ret || len != sizeof(state->info)) + test_error("getsockopt(TCP_INFO): %d", (int)len); + + len = addr_size; + if (getsockname(sk, addr, &len) || len != addr_size) + test_error("getsockname(): %d", (int)len); + + len = sizeof(state->trw); + ret = getsockopt(sk, SOL_TCP, TCP_REPAIR_WINDOW, &state->trw, &len); + if (ret || len != sizeof(state->trw)) + test_error("getsockopt(TCP_REPAIR_WINDOW): %d", (int)len); + + if (ioctl(sk, SIOCOUTQ, &state->outq_len)) + test_error("ioctl(SIOCOUTQ)"); + + if (ioctl(sk, SIOCOUTQNSD, &state->outq_nsd_len)) + test_error("ioctl(SIOCOUTQNSD)"); + test_sock_checkpoint_queue(sk, TCP_SEND_QUEUE, state->outq_len, &state->out); + + if (ioctl(sk, SIOCINQ, &state->inq_len)) + test_error("ioctl(SIOCINQ)"); + test_sock_checkpoint_queue(sk, TCP_RECV_QUEUE, state->inq_len, &state->in); + + if (state->info.tcpi_state == TCP_CLOSE) + state->outq_len = state->outq_nsd_len = 0; + + len = sizeof(state->mss); + ret = getsockopt(sk, SOL_TCP, TCP_MAXSEG, &state->mss, &len); + if (ret || len != sizeof(state->mss)) + test_error("getsockopt(TCP_MAXSEG): %d", (int)len); + + len = sizeof(state->timestamp); + ret = getsockopt(sk, SOL_TCP, TCP_TIMESTAMP, &state->timestamp, &len); + if (ret || len != sizeof(state->timestamp)) + test_error("getsockopt(TCP_TIMESTAMP): %d", (int)len); +} + +void test_ao_checkpoint(int sk, struct tcp_ao_repair *state) +{ + socklen_t len = sizeof(*state); + int ret; + + memset(state, 0, sizeof(*state)); + + ret = getsockopt(sk, SOL_TCP, TCP_AO_REPAIR, state, &len); + if (ret || len != sizeof(*state)) + test_error("getsockopt(TCP_AO_REPAIR): %d", (int)len); +} + +static void test_sock_restore_seq(int sk, int queue, uint32_t seq) +{ + if (setsockopt(sk, SOL_TCP, TCP_REPAIR_QUEUE, &queue, sizeof(queue))) + test_error("setsockopt(TCP_REPAIR_QUEUE)"); + + if (setsockopt(sk, SOL_TCP, TCP_QUEUE_SEQ, &seq, sizeof(seq))) + test_error("setsockopt(TCP_QUEUE_SEQ)"); +} + +static void test_sock_restore_queue(int sk, int queue, void *buf, int len) +{ + int chunk = len; + size_t off = 0; + + if (len == 0) + return; + + if (setsockopt(sk, SOL_TCP, TCP_REPAIR_QUEUE, &queue, sizeof(queue))) + test_error("setsockopt(TCP_REPAIR_QUEUE)"); + + do { + int ret; + + ret = send(sk, buf + off, chunk, 0); + if (ret <= 0) { + if (chunk > 1024) { + chunk >>= 1; + continue; + } + test_error("send()"); + } + off += ret; + len -= ret; + } while (len > 0); +} + +void __test_sock_restore(int sk, const char *device, + struct tcp_sock_state *state, + void *saddr, void *daddr, size_t addr_size) +{ + struct tcp_repair_opt opts[4]; + unsigned int opt_nr = 0; + long flags; + + if (bind(sk, saddr, addr_size)) + test_error("bind()"); + + flags = fcntl(sk, F_GETFL); + if ((flags < 0) || (fcntl(sk, F_SETFL, flags | O_NONBLOCK) < 0)) + test_error("fcntl()"); + + test_sock_restore_seq(sk, TCP_RECV_QUEUE, state->in.seq - state->inq_len); + test_sock_restore_seq(sk, TCP_SEND_QUEUE, state->out.seq - state->outq_len); + + if (device != NULL && setsockopt(sk, SOL_SOCKET, SO_BINDTODEVICE, + device, strlen(device) + 1)) + test_error("setsockopt(SO_BINDTODEVICE, %s)", device); + + if (connect(sk, daddr, addr_size)) + test_error("connect()"); + + if (state->info.tcpi_options & TCPI_OPT_SACK) { + opts[opt_nr].opt_code = TCPOPT_SACK_PERMITTED; + opts[opt_nr].opt_val = 0; + opt_nr++; + } + if (state->info.tcpi_options & TCPI_OPT_WSCALE) { + opts[opt_nr].opt_code = TCPOPT_WINDOW; + opts[opt_nr].opt_val = state->info.tcpi_snd_wscale + + (state->info.tcpi_rcv_wscale << 16); + opt_nr++; + } + if (state->info.tcpi_options & TCPI_OPT_TIMESTAMPS) { + opts[opt_nr].opt_code = TCPOPT_TIMESTAMP; + opts[opt_nr].opt_val = 0; + opt_nr++; + } + opts[opt_nr].opt_code = TCPOPT_MAXSEG; + opts[opt_nr].opt_val = state->mss; + opt_nr++; + + if (setsockopt(sk, SOL_TCP, TCP_REPAIR_OPTIONS, opts, opt_nr * sizeof(opts[0]))) + test_error("setsockopt(TCP_REPAIR_OPTIONS)"); + + if (state->info.tcpi_options & TCPI_OPT_TIMESTAMPS) { + if (setsockopt(sk, SOL_TCP, TCP_TIMESTAMP, + &state->timestamp, opt_nr * sizeof(opts[0]))) + test_error("setsockopt(TCP_TIMESTAMP)"); + } + test_sock_restore_queue(sk, TCP_RECV_QUEUE, state->in.buf, state->inq_len); + test_sock_restore_queue(sk, TCP_SEND_QUEUE, state->out.buf, state->outq_len); + if (setsockopt(sk, SOL_TCP, TCP_REPAIR_WINDOW, &state->trw, sizeof(state->trw))) + test_error("setsockopt(TCP_REPAIR_WINDOW)"); +} + +void test_ao_restore(int sk, struct tcp_ao_repair *state) +{ + if (setsockopt(sk, SOL_TCP, TCP_AO_REPAIR, state, sizeof(*state))) + test_error("setsockopt(TCP_AO_REPAIR)"); +} + +void test_sock_state_free(struct tcp_sock_state *state) +{ + free(state->out.buf); + free(state->in.buf); +} + +void test_enable_repair(int sk) +{ + int val = TCP_REPAIR_ON; + + if (setsockopt(sk, SOL_TCP, TCP_REPAIR, &val, sizeof(val))) + test_error("setsockopt(TCP_REPAIR)"); +} + +void test_disable_repair(int sk) +{ + int val = TCP_REPAIR_OFF_NO_WP; + + if (setsockopt(sk, SOL_TCP, TCP_REPAIR, &val, sizeof(val))) + test_error("setsockopt(TCP_REPAIR)"); +} + +void test_kill_sk(int sk) +{ + test_enable_repair(sk); + close(sk); +} diff --git a/tools/testing/selftests/net/tcp_ao/lib/setup.c b/tools/testing/selftests/net/tcp_ao/lib/setup.c new file mode 100644 index 000000000000..374b27c26ebd --- /dev/null +++ b/tools/testing/selftests/net/tcp_ao/lib/setup.c @@ -0,0 +1,342 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include +#include +#include "aolib.h" + +/* + * Can't be included in the header: it defines static variables which + * will be unique to every object. Let's include it only once here. + */ +#include "../../../kselftest.h" + +/* Prevent overriding of one thread's output by another */ +static pthread_mutex_t ksft_print_lock = PTHREAD_MUTEX_INITIALIZER; + +void __test_msg(const char *buf) +{ + pthread_mutex_lock(&ksft_print_lock); + ksft_print_msg(buf); + pthread_mutex_unlock(&ksft_print_lock); +} +void __test_ok(const char *buf) +{ + pthread_mutex_lock(&ksft_print_lock); + ksft_test_result_pass(buf); + pthread_mutex_unlock(&ksft_print_lock); +} +void __test_fail(const char *buf) +{ + pthread_mutex_lock(&ksft_print_lock); + ksft_test_result_fail(buf); + pthread_mutex_unlock(&ksft_print_lock); +} +void __test_xfail(const char *buf) +{ + pthread_mutex_lock(&ksft_print_lock); + ksft_test_result_xfail(buf); + pthread_mutex_unlock(&ksft_print_lock); +} +void __test_error(const char *buf) +{ + pthread_mutex_lock(&ksft_print_lock); + ksft_test_result_error(buf); + pthread_mutex_unlock(&ksft_print_lock); +} +void __test_skip(const char *buf) +{ + pthread_mutex_lock(&ksft_print_lock); + ksft_test_result_skip(buf); + pthread_mutex_unlock(&ksft_print_lock); +} + +static volatile int failed; +static volatile int skipped; + +void test_failed(void) +{ + failed = 1; +} + +static void test_exit(void) +{ + if (failed) { + ksft_exit_fail(); + } else if (skipped) { + /* ksft_exit_skip() is different from ksft_exit_*() */ + ksft_print_cnts(); + exit(KSFT_SKIP); + } else { + ksft_exit_pass(); + } +} + +struct dlist_t { + void (*destruct)(void); + struct dlist_t *next; +}; +static struct dlist_t *destructors_list; + +void test_add_destructor(void (*d)(void)) +{ + struct dlist_t *p; + + p = malloc(sizeof(struct dlist_t)); + if (p == NULL) + test_error("malloc() failed"); + + p->next = destructors_list; + p->destruct = d; + destructors_list = p; +} + +static void test_destructor(void) __attribute__((destructor)); +static void test_destructor(void) +{ + while (destructors_list) { + struct dlist_t *p = destructors_list->next; + + destructors_list->destruct(); + free(destructors_list); + destructors_list = p; + } + test_exit(); +} + +static void sig_int(int signo) +{ + test_error("Caught SIGINT - exiting"); +} + +int open_netns(void) +{ + const char *netns_path = "/proc/self/ns/net"; + int fd; + + fd = open(netns_path, O_RDONLY); + if (fd < 0) + test_error("open(%s)", netns_path); + return fd; +} + +int unshare_open_netns(void) +{ + if (unshare(CLONE_NEWNET) != 0) + test_error("unshare()"); + + return open_netns(); +} + +void switch_ns(int fd) +{ + if (setns(fd, CLONE_NEWNET)) + test_error("setns()"); +} + +int switch_save_ns(int new_ns) +{ + int ret = open_netns(); + + switch_ns(new_ns); + return ret; +} + +static int nsfd_outside = -1; +static int nsfd_parent = -1; +static int nsfd_child = -1; +const char veth_name[] = "ktst-veth"; + +static void init_namespaces(void) +{ + nsfd_outside = open_netns(); + nsfd_parent = unshare_open_netns(); + nsfd_child = unshare_open_netns(); +} + +static void link_init(const char *veth, int family, uint8_t prefix, + union tcp_addr addr, union tcp_addr dest) +{ + if (link_set_up(veth)) + test_error("Failed to set link up"); + if (ip_addr_add(veth, family, addr, prefix)) + test_error("Failed to add ip address"); + if (ip_route_add(veth, family, addr, dest)) + test_error("Failed to add route"); +} + +static unsigned int nr_threads = 1; + +static pthread_mutex_t sync_lock = PTHREAD_MUTEX_INITIALIZER; +static pthread_cond_t sync_cond = PTHREAD_COND_INITIALIZER; +static volatile unsigned int stage_threads[2]; +static volatile unsigned int stage_nr; + +/* synchronize all threads in the same stage */ +void synchronize_threads(void) +{ + unsigned int q = stage_nr; + + pthread_mutex_lock(&sync_lock); + stage_threads[q]++; + if (stage_threads[q] == nr_threads) { + stage_nr ^= 1; + stage_threads[stage_nr] = 0; + pthread_cond_signal(&sync_cond); + } + while (stage_threads[q] < nr_threads) + pthread_cond_wait(&sync_cond, &sync_lock); + pthread_mutex_unlock(&sync_lock); +} + +__thread union tcp_addr this_ip_addr; +__thread union tcp_addr this_ip_dest; +int test_family; + +struct new_pthread_arg { + thread_fn func; + union tcp_addr my_ip; + union tcp_addr dest_ip; +}; +static void *new_pthread_entry(void *arg) +{ + struct new_pthread_arg *p = arg; + + this_ip_addr = p->my_ip; + this_ip_dest = p->dest_ip; + p->func(NULL); /* shouldn't return */ + exit(KSFT_FAIL); +} + +static void __test_skip_all(const char *msg) +{ + ksft_set_plan(1); + ksft_print_header(); + skipped = 1; + test_skip("%s", msg); + exit(KSFT_SKIP); +} + +void __test_init(unsigned int ntests, int family, unsigned int prefix, + union tcp_addr addr1, union tcp_addr addr2, + thread_fn peer1, thread_fn peer2) +{ + struct sigaction sa = { + .sa_handler = sig_int, + .sa_flags = SA_RESTART, + }; + time_t seed = time(NULL); + + sigemptyset(&sa.sa_mask); + if (sigaction(SIGINT, &sa, NULL)) + test_error("Can't set SIGINT handler"); + + test_family = family; + if (!kernel_config_has(KCONFIG_NET_NS)) + __test_skip_all(tests_skip_reason[KCONFIG_NET_NS]); + if (!kernel_config_has(KCONFIG_VETH)) + __test_skip_all(tests_skip_reason[KCONFIG_VETH]); + if (!kernel_config_has(KCONFIG_TCP_AO)) + __test_skip_all(tests_skip_reason[KCONFIG_TCP_AO]); + + ksft_set_plan(ntests); + test_print("rand seed %u", (unsigned int)seed); + srand(seed); + + + ksft_print_header(); + init_namespaces(); + + if (add_veth(veth_name, nsfd_parent, nsfd_child)) + test_error("Failed to add veth"); + + switch_ns(nsfd_child); + link_init(veth_name, family, prefix, addr2, addr1); + if (peer2) { + struct new_pthread_arg targ; + pthread_t t; + + targ.my_ip = addr2; + targ.dest_ip = addr1; + targ.func = peer2; + nr_threads++; + if (pthread_create(&t, NULL, new_pthread_entry, &targ)) + test_error("Failed to create pthread"); + } + switch_ns(nsfd_parent); + link_init(veth_name, family, prefix, addr1, addr2); + + this_ip_addr = addr1; + this_ip_dest = addr2; + peer1(NULL); + if (failed) + exit(KSFT_FAIL); + else + exit(KSFT_PASS); +} + +/* /proc/sys/net/core/optmem_max artifically limits the amount of memory + * that can be allocated with sock_kmalloc() on each socket in the system. + * It is not virtualized, so it has to written outside test namespaces. + * To be nice a test will revert optmem back to the old value. + * Keeping it simple without any file lock, which means the tests that + * need to set/increase optmem value shouldn't run in parallel. + * Also, not re-entrant. + */ +static const char *optmem_file = "/proc/sys/net/core/optmem_max"; +static size_t saved_optmem; + +size_t test_get_optmem(void) +{ + FILE *foptmem; + int old_ns; + size_t ret; + + old_ns = switch_save_ns(nsfd_outside); + foptmem = fopen(optmem_file, "r"); + if (!foptmem) + test_error("failed to open %s", optmem_file); + + if (fscanf(foptmem, "%zu", &ret) != 1) + test_error("can't read from %s", optmem_file); + fclose(foptmem); + switch_ns(old_ns); + return ret; +} + +static void __test_set_optmem(size_t new, size_t *old) +{ + FILE *foptmem; + int old_ns; + + if (old != NULL) + *old = test_get_optmem(); + + old_ns = switch_save_ns(nsfd_outside); + foptmem = fopen(optmem_file, "w"); + if (!foptmem) + test_error("failed to open %s", optmem_file); + + if (fprintf(foptmem, "%zu", new) <= 0) + test_error("can't write %zu to %s", new, optmem_file); + fclose(foptmem); + switch_ns(old_ns); +} + +static void test_revert_optmem(void) +{ + if (saved_optmem == 0) + return; + + __test_set_optmem(saved_optmem, NULL); +} + +void test_set_optmem(size_t value) +{ + if (saved_optmem == 0) { + __test_set_optmem(value, &saved_optmem); + test_add_destructor(test_revert_optmem); + } else { + __test_set_optmem(value, NULL); + } +} diff --git a/tools/testing/selftests/net/tcp_ao/lib/sock.c b/tools/testing/selftests/net/tcp_ao/lib/sock.c new file mode 100644 index 000000000000..7f3c31b7d997 --- /dev/null +++ b/tools/testing/selftests/net/tcp_ao/lib/sock.c @@ -0,0 +1,592 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include +#include +#include "../../../../../include/linux/kernel.h" +#include "../../../../../include/linux/stringify.h" +#include "aolib.h" + +const unsigned int test_server_port = 7010; +int __test_listen_socket(int backlog, void *addr, size_t addr_sz) +{ + int err, sk = socket(test_family, SOCK_STREAM, IPPROTO_TCP); + long flags; + + if (sk < 0) + test_error("socket()"); + + err = setsockopt(sk, SOL_SOCKET, SO_BINDTODEVICE, veth_name, + strlen(veth_name) + 1); + if (err < 0) + test_error("setsockopt(SO_BINDTODEVICE)"); + + if (bind(sk, (struct sockaddr *)addr, addr_sz) < 0) + test_error("bind()"); + + flags = fcntl(sk, F_GETFL); + if ((flags < 0) || (fcntl(sk, F_SETFL, flags | O_NONBLOCK) < 0)) + test_error("fcntl()"); + + if (listen(sk, backlog)) + test_error("listen()"); + + return sk; +} + +int test_wait_fd(int sk, time_t sec, bool write) +{ + struct timeval tv = { .tv_sec = sec }; + struct timeval *ptv = NULL; + fd_set fds, efds; + int ret; + socklen_t slen = sizeof(ret); + + FD_ZERO(&fds); + FD_SET(sk, &fds); + FD_ZERO(&efds); + FD_SET(sk, &efds); + + if (sec) + ptv = &tv; + + errno = 0; + if (write) + ret = select(sk + 1, NULL, &fds, &efds, ptv); + else + ret = select(sk + 1, &fds, NULL, &efds, ptv); + if (ret < 0) + return -errno; + if (ret == 0) { + errno = ETIMEDOUT; + return -ETIMEDOUT; + } + + if (getsockopt(sk, SOL_SOCKET, SO_ERROR, &ret, &slen) || ret) + return -ret; + return 0; +} + +int __test_connect_socket(int sk, const char *device, + void *addr, size_t addr_sz, time_t timeout) +{ + long flags; + int err; + + if (device != NULL) { + err = setsockopt(sk, SOL_SOCKET, SO_BINDTODEVICE, device, + strlen(device) + 1); + if (err < 0) + test_error("setsockopt(SO_BINDTODEVICE, %s)", device); + } + + if (!timeout) { + err = connect(sk, addr, addr_sz); + if (err) { + err = -errno; + goto out; + } + return 0; + } + + flags = fcntl(sk, F_GETFL); + if ((flags < 0) || (fcntl(sk, F_SETFL, flags | O_NONBLOCK) < 0)) + test_error("fcntl()"); + + if (connect(sk, addr, addr_sz) < 0) { + if (errno != EINPROGRESS) { + err = -errno; + goto out; + } + if (timeout < 0) + return sk; + err = test_wait_fd(sk, timeout, 1); + if (err) + goto out; + } + return sk; + +out: + close(sk); + return err; +} + +int __test_set_md5(int sk, void *addr, size_t addr_sz, uint8_t prefix, + int vrf, const char *password) +{ + size_t pwd_len = strlen(password); + struct tcp_md5sig md5sig = {}; + + md5sig.tcpm_keylen = pwd_len; + memcpy(md5sig.tcpm_key, password, pwd_len); + md5sig.tcpm_flags = TCP_MD5SIG_FLAG_PREFIX; + md5sig.tcpm_prefixlen = prefix; + if (vrf >= 0) { + md5sig.tcpm_flags |= TCP_MD5SIG_FLAG_IFINDEX; + md5sig.tcpm_ifindex = (uint8_t)vrf; + } + memcpy(&md5sig.tcpm_addr, addr, addr_sz); + + errno = 0; + return setsockopt(sk, IPPROTO_TCP, TCP_MD5SIG_EXT, + &md5sig, sizeof(md5sig)); +} + + +int test_prepare_key_sockaddr(struct tcp_ao_add *ao, const char *alg, + void *addr, size_t addr_sz, bool set_current, bool set_rnext, + uint8_t prefix, uint8_t vrf, uint8_t sndid, uint8_t rcvid, + uint8_t maclen, uint8_t keyflags, + uint8_t keylen, const char *key) +{ + memset(ao, 0, sizeof(struct tcp_ao_add)); + + ao->set_current = !!set_current; + ao->set_rnext = !!set_rnext; + ao->prefix = prefix; + ao->sndid = sndid; + ao->rcvid = rcvid; + ao->maclen = maclen; + ao->keyflags = keyflags; + ao->keylen = keylen; + ao->ifindex = vrf; + + memcpy(&ao->addr, addr, addr_sz); + + if (strlen(alg) > 64) + return -ENOBUFS; + strncpy(ao->alg_name, alg, 64); + + memcpy(ao->key, key, + (keylen > TCP_AO_MAXKEYLEN) ? TCP_AO_MAXKEYLEN : keylen); + return 0; +} + +static int test_get_ao_keys_nr(int sk) +{ + struct tcp_ao_getsockopt tmp = {}; + socklen_t tmp_sz = sizeof(tmp); + int ret; + + tmp.nkeys = 1; + tmp.get_all = 1; + + ret = getsockopt(sk, IPPROTO_TCP, TCP_AO_GET_KEYS, &tmp, &tmp_sz); + if (ret) + return -errno; + return (int)tmp.nkeys; +} + +int test_get_one_ao(int sk, struct tcp_ao_getsockopt *out, + void *addr, size_t addr_sz, uint8_t prefix, + uint8_t sndid, uint8_t rcvid) +{ + struct tcp_ao_getsockopt tmp = {}; + socklen_t tmp_sz = sizeof(tmp); + int ret; + + memcpy(&tmp.addr, addr, addr_sz); + tmp.prefix = prefix; + tmp.sndid = sndid; + tmp.rcvid = rcvid; + tmp.nkeys = 1; + + ret = getsockopt(sk, IPPROTO_TCP, TCP_AO_GET_KEYS, &tmp, &tmp_sz); + if (ret) + return ret; + if (tmp.nkeys != 1) + return -E2BIG; + *out = tmp; + return 0; +} + +int test_get_ao_info(int sk, struct tcp_ao_info_opt *out) +{ + socklen_t sz = sizeof(*out); + + out->reserved = 0; + out->reserved2 = 0; + if (getsockopt(sk, IPPROTO_TCP, TCP_AO_INFO, out, &sz)) + return -errno; + if (sz != sizeof(*out)) + return -EMSGSIZE; + return 0; +} + +int test_set_ao_info(int sk, struct tcp_ao_info_opt *in) +{ + socklen_t sz = sizeof(*in); + + in->reserved = 0; + in->reserved2 = 0; + if (setsockopt(sk, IPPROTO_TCP, TCP_AO_INFO, in, sz)) + return -errno; + return 0; +} + +int test_cmp_getsockopt_setsockopt(const struct tcp_ao_add *a, + const struct tcp_ao_getsockopt *b) +{ + bool is_kdf_aes_128_cmac = false; + bool is_cmac_aes = false; + + if (!strcmp("cmac(aes128)", a->alg_name)) { + is_kdf_aes_128_cmac = (a->keylen != 16); + is_cmac_aes = true; + } + +#define __cmp_ao(member) \ +do { \ + if (b->member != a->member) { \ + test_fail("getsockopt(): " __stringify(member) " %u != %u", \ + b->member, a->member); \ + return -1; \ + } \ +} while(0) + __cmp_ao(sndid); + __cmp_ao(rcvid); + __cmp_ao(prefix); + __cmp_ao(keyflags); + __cmp_ao(ifindex); + if (a->maclen) { + __cmp_ao(maclen); + } else if (b->maclen != 12) { + test_fail("getsockopt(): expected default maclen 12, but it's %u", + b->maclen); + return -1; + } + if (!is_kdf_aes_128_cmac) { + __cmp_ao(keylen); + } else if (b->keylen != 16) { + test_fail("getsockopt(): expected keylen 16 for cmac(aes128), but it's %u", + b->keylen); + return -1; + } +#undef __cmp_ao + if (!is_kdf_aes_128_cmac && memcmp(b->key, a->key, a->keylen)) { + test_fail("getsockopt(): returned key is different `%s' != `%s'", + b->key, a->key); + return -1; + } + if (memcmp(&b->addr, &a->addr, sizeof(b->addr))) { + test_fail("getsockopt(): returned address is different"); + return -1; + } + if (!is_cmac_aes && strcmp(b->alg_name, a->alg_name)) { + test_fail("getsockopt(): returned algorithm %s is different than %s", b->alg_name, a->alg_name); + return -1; + } + if (is_cmac_aes && strcmp(b->alg_name, "cmac(aes)")) { + test_fail("getsockopt(): returned algorithm %s is different than cmac(aes)", b->alg_name); + return -1; + } + /* For a established key rotation test don't add a key with + * set_current = 1, as it's likely to change by peer's request; + * rather use setsockopt(TCP_AO_INFO) + */ + if (a->set_current != b->is_current) { + test_fail("getsockopt(): returned key is not Current_key"); + return -1; + } + if (a->set_rnext != b->is_rnext) { + test_fail("getsockopt(): returned key is not RNext_key"); + return -1; + } + + return 0; +} + +int test_cmp_getsockopt_setsockopt_ao(const struct tcp_ao_info_opt *a, + const struct tcp_ao_info_opt *b) +{ + /* No check for ::current_key, as it may change by the peer */ + if (a->ao_required != b->ao_required) { + test_fail("getsockopt(): returned ao doesn't have ao_required"); + return -1; + } + if (a->accept_icmps != b->accept_icmps) { + test_fail("getsockopt(): returned ao doesn't accept ICMPs"); + return -1; + } + if (a->set_rnext && a->rnext != b->rnext) { + test_fail("getsockopt(): RNext KeyID has changed"); + return -1; + } +#define __cmp_cnt(member) \ +do { \ + if (b->member != a->member) { \ + test_fail("getsockopt(): " __stringify(member) " %llu != %llu", \ + b->member, a->member); \ + return -1; \ + } \ +} while(0) + if (a->set_counters) { + __cmp_cnt(pkt_good); + __cmp_cnt(pkt_bad); + __cmp_cnt(pkt_key_not_found); + __cmp_cnt(pkt_ao_required); + __cmp_cnt(pkt_dropped_icmp); + } +#undef __cmp_cnt + return 0; +} + +int test_get_tcp_ao_counters(int sk, struct tcp_ao_counters *out) +{ + struct tcp_ao_getsockopt *key_dump; + socklen_t key_dump_sz = sizeof(*key_dump); + struct tcp_ao_info_opt info = {}; + bool c1, c2, c3, c4, c5; + struct netstat *ns; + int err, nr_keys; + + memset(out, 0, sizeof(*out)); + + /* per-netns */ + ns = netstat_read(); + out->netns_ao_good = netstat_get(ns, "TCPAOGood", &c1); + out->netns_ao_bad = netstat_get(ns, "TCPAOBad", &c2); + out->netns_ao_key_not_found = netstat_get(ns, "TCPAOKeyNotFound", &c3); + out->netns_ao_required = netstat_get(ns, "TCPAORequired", &c4); + out->netns_ao_dropped_icmp = netstat_get(ns, "TCPAODroppedIcmps", &c5); + netstat_free(ns); + if (c1 || c2 || c3 || c4 || c5) + return -EOPNOTSUPP; + + err = test_get_ao_info(sk, &info); + if (err) + return err; + + /* per-socket */ + out->ao_info_pkt_good = info.pkt_good; + out->ao_info_pkt_bad = info.pkt_bad; + out->ao_info_pkt_key_not_found = info.pkt_key_not_found; + out->ao_info_pkt_ao_required = info.pkt_ao_required; + out->ao_info_pkt_dropped_icmp = info.pkt_dropped_icmp; + + /* per-key */ + nr_keys = test_get_ao_keys_nr(sk); + if (nr_keys < 0) + return nr_keys; + if (nr_keys == 0) + test_error("test_get_ao_keys_nr() == 0"); + out->nr_keys = (size_t)nr_keys; + key_dump = calloc(nr_keys, key_dump_sz); + if (!key_dump) + return -errno; + + key_dump[0].nkeys = nr_keys; + key_dump[0].get_all = 1; + key_dump[0].get_all = 1; + err = getsockopt(sk, IPPROTO_TCP, TCP_AO_GET_KEYS, + key_dump, &key_dump_sz); + if (err) { + free(key_dump); + return -errno; + } + + out->key_cnts = calloc(nr_keys, sizeof(out->key_cnts[0])); + if (!out->key_cnts) { + free(key_dump); + return -errno; + } + + while (nr_keys--) { + out->key_cnts[nr_keys].sndid = key_dump[nr_keys].sndid; + out->key_cnts[nr_keys].rcvid = key_dump[nr_keys].rcvid; + out->key_cnts[nr_keys].pkt_good = key_dump[nr_keys].pkt_good; + out->key_cnts[nr_keys].pkt_bad = key_dump[nr_keys].pkt_bad; + } + free(key_dump); + + return 0; +} + +int __test_tcp_ao_counters_cmp(const char *tst_name, + struct tcp_ao_counters *before, + struct tcp_ao_counters *after, + test_cnt expected) +{ +#define __cmp_ao(cnt, expecting_inc) \ +do { \ + if (before->cnt > after->cnt) { \ + test_fail("%s: Decreased counter " __stringify(cnt) " %" PRIu64 " > %" PRIu64, \ + tst_name ?: "", before->cnt, after->cnt); \ + return -1; \ + } \ + if ((before->cnt != after->cnt) != (expecting_inc)) { \ + test_fail("%s: Counter " __stringify(cnt) " was %sexpected to increase %" PRIu64 " => %" PRIu64, \ + tst_name ?: "", (expecting_inc) ? "" : "not ", \ + before->cnt, after->cnt); \ + return -1; \ + } \ +} while(0) + + errno = 0; + /* per-netns */ + __cmp_ao(netns_ao_good, !!(expected & TEST_CNT_NS_GOOD)); + __cmp_ao(netns_ao_bad, !!(expected & TEST_CNT_NS_BAD)); + __cmp_ao(netns_ao_key_not_found, + !!(expected & TEST_CNT_NS_KEY_NOT_FOUND)); + __cmp_ao(netns_ao_required, !!(expected & TEST_CNT_NS_AO_REQUIRED)); + __cmp_ao(netns_ao_dropped_icmp, + !!(expected & TEST_CNT_NS_DROPPED_ICMP)); + /* per-socket */ + __cmp_ao(ao_info_pkt_good, !!(expected & TEST_CNT_SOCK_GOOD)); + __cmp_ao(ao_info_pkt_bad, !!(expected & TEST_CNT_SOCK_BAD)); + __cmp_ao(ao_info_pkt_key_not_found, + !!(expected & TEST_CNT_SOCK_KEY_NOT_FOUND)); + __cmp_ao(ao_info_pkt_ao_required, !!(expected & TEST_CNT_SOCK_AO_REQUIRED)); + __cmp_ao(ao_info_pkt_dropped_icmp, + !!(expected & TEST_CNT_SOCK_DROPPED_ICMP)); + return 0; +#undef __cmp_ao +} + +int test_tcp_ao_key_counters_cmp(const char *tst_name, + struct tcp_ao_counters *before, + struct tcp_ao_counters *after, + test_cnt expected, + int sndid, int rcvid) +{ + size_t i; +#define __cmp_ao(i, cnt, expecting_inc) \ +do { \ + if (before->key_cnts[i].cnt > after->key_cnts[i].cnt) { \ + test_fail("%s: Decreased counter " __stringify(cnt) " %" PRIu64 " > %" PRIu64 " for key %u:%u", \ + tst_name ?: "", before->key_cnts[i].cnt, \ + after->key_cnts[i].cnt, \ + before->key_cnts[i].sndid, \ + before->key_cnts[i].rcvid); \ + return -1; \ + } \ + if ((before->key_cnts[i].cnt != after->key_cnts[i].cnt) != (expecting_inc)) { \ + test_fail("%s: Counter " __stringify(cnt) " was %sexpected to increase %" PRIu64 " => %" PRIu64 " for key %u:%u", \ + tst_name ?: "", (expecting_inc) ? "" : "not ",\ + before->key_cnts[i].cnt, \ + after->key_cnts[i].cnt, \ + before->key_cnts[i].sndid, \ + before->key_cnts[i].rcvid); \ + return -1; \ + } \ +} while(0) + + if (before->nr_keys != after->nr_keys) { + test_fail("%s: Keys changed on the socket %zu != %zu", + tst_name, before->nr_keys, after->nr_keys); + return -1; + } + + /* per-key */ + i = before->nr_keys; + while (i--) { + if (sndid >= 0 && before->key_cnts[i].sndid != sndid) + continue; + if (rcvid >= 0 && before->key_cnts[i].rcvid != rcvid) + continue; + __cmp_ao(i, pkt_good, !!(expected & TEST_CNT_KEY_GOOD)); + __cmp_ao(i, pkt_bad, !!(expected & TEST_CNT_KEY_BAD)); + } + return 0; +#undef __cmp_ao +} + +void test_tcp_ao_counters_free(struct tcp_ao_counters *cnts) +{ + free(cnts->key_cnts); +} + +#define TEST_BUF_SIZE 4096 +ssize_t test_server_run(int sk, ssize_t quota, time_t timeout_sec) +{ + ssize_t total = 0; + + do { + char buf[TEST_BUF_SIZE]; + ssize_t bytes, sent; + int ret; + + ret = test_wait_fd(sk, timeout_sec, 0); + if (ret) + return ret; + + bytes = recv(sk, buf, sizeof(buf), 0); + + if (bytes < 0) + test_error("recv(): %zd", bytes); + if (bytes == 0) + break; + + ret = test_wait_fd(sk, timeout_sec, 1); + if (ret) + return ret; + + sent = send(sk, buf, bytes, 0); + if (sent == 0) + break; + if (sent != bytes) + test_error("send()"); + total += bytes; + } while (!quota || total < quota); + + return total; +} + +ssize_t test_client_loop(int sk, char *buf, size_t buf_sz, + const size_t msg_len, time_t timeout_sec) +{ + char msg[msg_len]; + int nodelay = 1; + size_t i; + + if (setsockopt(sk, IPPROTO_TCP, TCP_NODELAY, &nodelay, sizeof(nodelay))) + test_error("setsockopt(TCP_NODELAY)"); + + for (i = 0; i < buf_sz; i += min(msg_len, buf_sz - i)) { + size_t sent, bytes = min(msg_len, buf_sz - i); + int ret; + + ret = test_wait_fd(sk, timeout_sec, 1); + if (ret) + return ret; + + sent = send(sk, buf + i, bytes, 0); + if (sent == 0) + break; + if (sent != bytes) + test_error("send()"); + + bytes = 0; + do { + ssize_t got; + + ret = test_wait_fd(sk, timeout_sec, 0); + if (ret) + return ret; + + got = recv(sk, msg + bytes, sizeof(msg) - bytes, 0); + if (got <= 0) + test_error("recv(): %zd", got); + bytes += got; + } while (bytes < sent); + if (bytes > sent) + test_error("recv(): %zd > %zd", bytes, sent); + if (memcmp(buf + i, msg, bytes) != 0) { + test_fail("received message differs"); + return -1; + } + } + return i; +} + +int test_client_verify(int sk, const size_t msg_len, const size_t nr, + time_t timeout_sec) +{ + size_t buf_sz = msg_len * nr; + char *buf = alloca(buf_sz); + + randomize_buffer(buf, buf_sz); + if (test_client_loop(sk, buf, buf_sz, msg_len, timeout_sec) != buf_sz) + return -1; + return 0; +} diff --git a/tools/testing/selftests/net/tcp_ao/lib/utils.c b/tools/testing/selftests/net/tcp_ao/lib/utils.c new file mode 100644 index 000000000000..372daca525f5 --- /dev/null +++ b/tools/testing/selftests/net/tcp_ao/lib/utils.c @@ -0,0 +1,30 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "aolib.h" +#include + +void randomize_buffer(void *buf, size_t buflen) +{ + int *p = (int *)buf; + size_t words = buflen / sizeof(int); + size_t leftover = buflen % sizeof(int); + + if (!buflen) + return; + + while (words--) + *p++ = rand(); + + if (leftover) { + int tmp = rand(); + + memcpy(buf + buflen - leftover, &tmp, leftover); + } +} + +const struct sockaddr_in6 addr_any6 = { + .sin6_family = AF_INET6, +}; + +const struct sockaddr_in addr_any4 = { + .sin_family = AF_INET, +}; -- cgit v1.2.3 From a8fcf8ca14d7b374519e5637d7b49b03ebaf580d Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Fri, 15 Dec 2023 02:36:16 +0000 Subject: selftests/net: Verify that TCP-AO complies with ignoring ICMPs Hand-crafted ICMP packets are sent to the server, the server checks for hard/soft errors and fails if any. Expected output for ipv4 version: > # ./icmps-discard_ipv4 > 1..3 > # 3164[lib/setup.c:166] rand seed 1642623745 > TAP version 13 > # 3164[lib/proc.c:207] Snmp6 Ip6InReceives: 0 => 1 > # 3164[lib/proc.c:207] Snmp6 Ip6InNoRoutes: 0 => 1 > # 3164[lib/proc.c:207] Snmp6 Ip6InOctets: 0 => 76 > # 3164[lib/proc.c:207] Snmp6 Ip6InNoECTPkts: 0 => 1 > # 3164[lib/proc.c:207] Tcp InSegs: 2 => 203 > # 3164[lib/proc.c:207] Tcp OutSegs: 1 => 202 > # 3164[lib/proc.c:207] IcmpMsg InType3: 0 => 543 > # 3164[lib/proc.c:207] Icmp InMsgs: 0 => 543 > # 3164[lib/proc.c:207] Icmp InDestUnreachs: 0 => 543 > # 3164[lib/proc.c:207] Ip InReceives: 2 => 746 > # 3164[lib/proc.c:207] Ip InDelivers: 2 => 746 > # 3164[lib/proc.c:207] Ip OutRequests: 1 => 202 > # 3164[lib/proc.c:207] IpExt InOctets: 132 => 61684 > # 3164[lib/proc.c:207] IpExt OutOctets: 68 => 31324 > # 3164[lib/proc.c:207] IpExt InNoECTPkts: 2 => 744 > # 3164[lib/proc.c:207] TcpExt TCPPureAcks: 1 => 2 > # 3164[lib/proc.c:207] TcpExt TCPOrigDataSent: 0 => 200 > # 3164[lib/proc.c:207] TcpExt TCPDelivered: 0 => 199 > # 3164[lib/proc.c:207] TcpExt TCPAOGood: 2 => 203 > # 3164[lib/proc.c:207] TcpExt TCPAODroppedIcmps: 0 => 541 > ok 1 InDestUnreachs delivered 543 > ok 2 Server survived 20000 bytes of traffic > ok 3 ICMPs ignored 541 > # Totals: pass:3 fail:0 xfail:0 xpass:0 skip:0 error:0 Expected output for ipv6 version: > # ./icmps-discard_ipv6 > 1..3 > # 3186[lib/setup.c:166] rand seed 1642623803 > TAP version 13 > # 3186[lib/proc.c:207] Snmp6 Ip6InReceives: 4 => 568 > # 3186[lib/proc.c:207] Snmp6 Ip6InDelivers: 3 => 564 > # 3186[lib/proc.c:207] Snmp6 Ip6OutRequests: 2 => 204 > # 3186[lib/proc.c:207] Snmp6 Ip6InMcastPkts: 1 => 4 > # 3186[lib/proc.c:207] Snmp6 Ip6OutMcastPkts: 0 => 1 > # 3186[lib/proc.c:207] Snmp6 Ip6InOctets: 320 => 70420 > # 3186[lib/proc.c:207] Snmp6 Ip6OutOctets: 160 => 35512 > # 3186[lib/proc.c:207] Snmp6 Ip6InMcastOctets: 72 => 336 > # 3186[lib/proc.c:207] Snmp6 Ip6OutMcastOctets: 0 => 76 > # 3186[lib/proc.c:207] Snmp6 Ip6InNoECTPkts: 4 => 568 > # 3186[lib/proc.c:207] Snmp6 Icmp6InMsgs: 1 => 361 > # 3186[lib/proc.c:207] Snmp6 Icmp6OutMsgs: 1 => 2 > # 3186[lib/proc.c:207] Snmp6 Icmp6InDestUnreachs: 0 => 360 > # 3186[lib/proc.c:207] Snmp6 Icmp6OutMLDv2Reports: 0 => 1 > # 3186[lib/proc.c:207] Snmp6 Icmp6InType1: 0 => 360 > # 3186[lib/proc.c:207] Snmp6 Icmp6OutType143: 0 => 1 > # 3186[lib/proc.c:207] Tcp InSegs: 2 => 203 > # 3186[lib/proc.c:207] Tcp OutSegs: 1 => 202 > # 3186[lib/proc.c:207] TcpExt TCPPureAcks: 1 => 2 > # 3186[lib/proc.c:207] TcpExt TCPOrigDataSent: 0 => 200 > # 3186[lib/proc.c:207] TcpExt TCPDelivered: 0 => 199 > # 3186[lib/proc.c:207] TcpExt TCPAOGood: 2 => 203 > # 3186[lib/proc.c:207] TcpExt TCPAODroppedIcmps: 0 => 360 > ok 1 Icmp6InDestUnreachs delivered 360 > ok 2 Server survived 20000 bytes of traffic > ok 3 ICMPs ignored 360 > # Totals: pass:3 fail:0 xfail:0 xpass:0 skip:0 error:0 Signed-off-by: Dmitry Safonov Signed-off-by: David S. Miller --- tools/testing/selftests/net/tcp_ao/Makefile | 1 + tools/testing/selftests/net/tcp_ao/icmps-discard.c | 438 +++++++++++++++++++++ 2 files changed, 439 insertions(+) create mode 100644 tools/testing/selftests/net/tcp_ao/icmps-discard.c (limited to 'tools') diff --git a/tools/testing/selftests/net/tcp_ao/Makefile b/tools/testing/selftests/net/tcp_ao/Makefile index 62425b9fb73c..0fc5db59be0c 100644 --- a/tools/testing/selftests/net/tcp_ao/Makefile +++ b/tools/testing/selftests/net/tcp_ao/Makefile @@ -1,5 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 TEST_BOTH_AF := connect +TEST_BOTH_AF += icmps-discard TEST_IPV4_PROGS := $(TEST_BOTH_AF:%=%_ipv4) TEST_IPV6_PROGS := $(TEST_BOTH_AF:%=%_ipv6) diff --git a/tools/testing/selftests/net/tcp_ao/icmps-discard.c b/tools/testing/selftests/net/tcp_ao/icmps-discard.c new file mode 100644 index 000000000000..d77c791754de --- /dev/null +++ b/tools/testing/selftests/net/tcp_ao/icmps-discard.c @@ -0,0 +1,438 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Selftest that verifies that incomping ICMPs are ignored, + * the TCP connection stays alive, no hard or soft errors get reported + * to the usespace and the counter for ignored ICMPs is updated. + * + * RFC5925, 7.8: + * >> A TCP-AO implementation MUST default to ignore incoming ICMPv4 + * messages of Type 3 (destination unreachable), Codes 2-4 (protocol + * unreachable, port unreachable, and fragmentation needed -- ’hard + * errors’), and ICMPv6 Type 1 (destination unreachable), Code 1 + * (administratively prohibited) and Code 4 (port unreachable) intended + * for connections in synchronized states (ESTABLISHED, FIN-WAIT-1, FIN- + * WAIT-2, CLOSE-WAIT, CLOSING, LAST-ACK, TIME-WAIT) that match MKTs. + * + * Author: Dmitry Safonov + */ +#include +#include +#include +#include +#include +#include +#include +#include "aolib.h" +#include "../../../../include/linux/compiler.h" + +const size_t packets_nr = 20; +const size_t packet_size = 100; +const char *tcpao_icmps = "TCPAODroppedIcmps"; + +#ifdef IPV6_TEST +const char *dst_unreach = "Icmp6InDestUnreachs"; +const int sk_ip_level = SOL_IPV6; +const int sk_recverr = IPV6_RECVERR; +#else +const char *dst_unreach = "InDestUnreachs"; +const int sk_ip_level = SOL_IP; +const int sk_recverr = IP_RECVERR; +#endif + +#define test_icmps_fail test_fail +#define test_icmps_ok test_ok + +static void serve_interfered(int sk) +{ + ssize_t test_quota = packet_size * packets_nr * 10; + uint64_t dest_unreach_a, dest_unreach_b; + uint64_t icmp_ignored_a, icmp_ignored_b; + struct tcp_ao_counters ao_cnt1, ao_cnt2; + bool counter_not_found; + struct netstat *ns_after, *ns_before; + ssize_t bytes; + + ns_before = netstat_read(); + dest_unreach_a = netstat_get(ns_before, dst_unreach, NULL); + icmp_ignored_a = netstat_get(ns_before, tcpao_icmps, NULL); + if (test_get_tcp_ao_counters(sk, &ao_cnt1)) + test_error("test_get_tcp_ao_counters()"); + bytes = test_server_run(sk, test_quota, 0); + ns_after = netstat_read(); + netstat_print_diff(ns_before, ns_after); + dest_unreach_b = netstat_get(ns_after, dst_unreach, NULL); + icmp_ignored_b = netstat_get(ns_after, tcpao_icmps, + &counter_not_found); + if (test_get_tcp_ao_counters(sk, &ao_cnt2)) + test_error("test_get_tcp_ao_counters()"); + + netstat_free(ns_before); + netstat_free(ns_after); + + if (dest_unreach_a >= dest_unreach_b) { + test_fail("%s counter didn't change: %" PRIu64 " >= %" PRIu64, + dst_unreach, dest_unreach_a, dest_unreach_b); + return; + } + test_ok("%s delivered %" PRIu64, + dst_unreach, dest_unreach_b - dest_unreach_a); + if (bytes < 0) + test_icmps_fail("Server failed with %zd: %s", bytes, strerrordesc_np(-bytes)); + else + test_icmps_ok("Server survived %zd bytes of traffic", test_quota); + if (counter_not_found) { + test_fail("Not found %s counter", tcpao_icmps); + return; + } + test_tcp_ao_counters_cmp(NULL, &ao_cnt1, &ao_cnt2, TEST_CNT_GOOD | TEST_CNT_AO_DROPPED_ICMP); + if (icmp_ignored_a >= icmp_ignored_b) { + test_icmps_fail("%s counter didn't change: %" PRIu64 " >= %" PRIu64, + tcpao_icmps, icmp_ignored_a, icmp_ignored_b); + return; + } + test_icmps_ok("ICMPs ignored %" PRIu64, icmp_ignored_b - icmp_ignored_a); +} + +static void *server_fn(void *arg) +{ + int val, err, sk, lsk; + bool accept_icmps = false; + + lsk = test_listen_socket(this_ip_addr, test_server_port, 1); + + if (test_set_ao_flags(lsk, false, accept_icmps)) + test_error("setsockopt(TCP_AO_INFO)"); + + if (test_add_key(lsk, DEFAULT_TEST_PASSWORD, this_ip_dest, -1, 100, 100)) + test_error("setsockopt(TCP_AO_ADD_KEY)"); + synchronize_threads(); + + err = test_wait_fd(lsk, TEST_TIMEOUT_SEC, 0); + if (!err) + test_error("timeouted for accept()"); + else if (err < 0) + test_error("test_wait_fd()"); + + sk = accept(lsk, NULL, NULL); + if (sk < 0) + test_error("accept()"); + + /* Fail on hard ip errors, such as dest unreachable (RFC1122) */ + val = 1; + if (setsockopt(sk, sk_ip_level, sk_recverr, &val, sizeof(val))) + test_error("setsockopt()"); + + synchronize_threads(); + + serve_interfered(sk); + return NULL; +} + +static size_t packets_sent; +static size_t icmps_sent; + +static uint32_t checksum4_nofold(void *data, size_t len, uint32_t sum) +{ + uint16_t *words = data; + size_t i; + + for (i = 0; i < len / sizeof(uint16_t); i++) + sum += words[i]; + if (len & 1) + sum += ((char *)data)[len - 1]; + return sum; +} + +static uint16_t checksum4_fold(void *data, size_t len, uint32_t sum) +{ + sum = checksum4_nofold(data, len, sum); + while (sum > 0xFFFF) + sum = (sum & 0xFFFF) + (sum >> 16); + return ~sum; +} + +static void set_ip4hdr(struct iphdr *iph, size_t packet_len, int proto, + struct sockaddr_in *src, struct sockaddr_in *dst) +{ + iph->version = 4; + iph->ihl = 5; + iph->tos = 0; + iph->tot_len = htons(packet_len); + iph->ttl = 2; + iph->protocol = proto; + iph->saddr = src->sin_addr.s_addr; + iph->daddr = dst->sin_addr.s_addr; + iph->check = checksum4_fold((void *)iph, iph->ihl << 1, 0); +} + +static void icmp_interfere4(uint8_t type, uint8_t code, uint32_t rcv_nxt, + struct sockaddr_in *src, struct sockaddr_in *dst) +{ + int sk = socket(AF_INET, SOCK_RAW, IPPROTO_RAW); + struct { + struct iphdr iph; + struct icmphdr icmph; + struct iphdr iphe; + struct { + uint16_t sport; + uint16_t dport; + uint32_t seq; + } tcph; + } packet = {}; + size_t packet_len; + ssize_t bytes; + + if (sk < 0) + test_error("socket(AF_INET, SOCK_RAW, IPPROTO_RAW)"); + + packet_len = sizeof(packet); + set_ip4hdr(&packet.iph, packet_len, IPPROTO_ICMP, src, dst); + + packet.icmph.type = type; + packet.icmph.code = code; + if (code == ICMP_FRAG_NEEDED) { + randomize_buffer(&packet.icmph.un.frag.mtu, + sizeof(packet.icmph.un.frag.mtu)); + } + + packet_len = sizeof(packet.iphe) + sizeof(packet.tcph); + set_ip4hdr(&packet.iphe, packet_len, IPPROTO_TCP, dst, src); + + packet.tcph.sport = dst->sin_port; + packet.tcph.dport = src->sin_port; + packet.tcph.seq = htonl(rcv_nxt); + + packet_len = sizeof(packet) - sizeof(packet.iph); + packet.icmph.checksum = checksum4_fold((void *)&packet.icmph, + packet_len, 0); + + bytes = sendto(sk, &packet, sizeof(packet), 0, + (struct sockaddr *)dst, sizeof(*dst)); + if (bytes != sizeof(packet)) + test_error("send(): %zd", bytes); + icmps_sent++; + + close(sk); +} + +static void set_ip6hdr(struct ipv6hdr *iph, size_t packet_len, int proto, + struct sockaddr_in6 *src, struct sockaddr_in6 *dst) +{ + iph->version = 6; + iph->payload_len = htons(packet_len); + iph->nexthdr = proto; + iph->hop_limit = 2; + iph->saddr = src->sin6_addr; + iph->daddr = dst->sin6_addr; +} + +static inline uint16_t csum_fold(uint32_t csum) +{ + uint32_t sum = csum; + + sum = (sum & 0xffff) + (sum >> 16); + sum = (sum & 0xffff) + (sum >> 16); + return (uint16_t)~sum; +} + +static inline uint32_t csum_add(uint32_t csum, uint32_t addend) +{ + uint32_t res = csum; + + res += addend; + return res + (res < addend); +} + +noinline uint32_t checksum6_nofold(void *data, size_t len, uint32_t sum) +{ + uint16_t *words = data; + size_t i; + + for (i = 0; i < len / sizeof(uint16_t); i++) + sum = csum_add(sum, words[i]); + if (len & 1) + sum = csum_add(sum, ((char *)data)[len - 1]); + return sum; +} + +noinline uint16_t icmp6_checksum(struct sockaddr_in6 *src, + struct sockaddr_in6 *dst, + void *ptr, size_t len, uint8_t proto) +{ + struct { + struct in6_addr saddr; + struct in6_addr daddr; + uint32_t payload_len; + uint8_t zero[3]; + uint8_t nexthdr; + } pseudo_header = {}; + uint32_t sum; + + pseudo_header.saddr = src->sin6_addr; + pseudo_header.daddr = dst->sin6_addr; + pseudo_header.payload_len = htonl(len); + pseudo_header.nexthdr = proto; + + sum = checksum6_nofold(&pseudo_header, sizeof(pseudo_header), 0); + sum = checksum6_nofold(ptr, len, sum); + + return csum_fold(sum); +} + +static void icmp6_interfere(int type, int code, uint32_t rcv_nxt, + struct sockaddr_in6 *src, struct sockaddr_in6 *dst) +{ + int sk = socket(AF_INET6, SOCK_RAW, IPPROTO_RAW); + struct sockaddr_in6 dst_raw = *dst; + struct { + struct ipv6hdr iph; + struct icmp6hdr icmph; + struct ipv6hdr iphe; + struct { + uint16_t sport; + uint16_t dport; + uint32_t seq; + } tcph; + } packet = {}; + size_t packet_len; + ssize_t bytes; + + + if (sk < 0) + test_error("socket(AF_INET6, SOCK_RAW, IPPROTO_RAW)"); + + packet_len = sizeof(packet) - sizeof(packet.iph); + set_ip6hdr(&packet.iph, packet_len, IPPROTO_ICMPV6, src, dst); + + packet.icmph.icmp6_type = type; + packet.icmph.icmp6_code = code; + + packet_len = sizeof(packet.iphe) + sizeof(packet.tcph); + set_ip6hdr(&packet.iphe, packet_len, IPPROTO_TCP, dst, src); + + packet.tcph.sport = dst->sin6_port; + packet.tcph.dport = src->sin6_port; + packet.tcph.seq = htonl(rcv_nxt); + + packet_len = sizeof(packet) - sizeof(packet.iph); + + packet.icmph.icmp6_cksum = icmp6_checksum(src, dst, + (void *)&packet.icmph, packet_len, IPPROTO_ICMPV6); + + dst_raw.sin6_port = htons(IPPROTO_RAW); + bytes = sendto(sk, &packet, sizeof(packet), 0, + (struct sockaddr *)&dst_raw, sizeof(dst_raw)); + if (bytes != sizeof(packet)) + test_error("send(): %zd", bytes); + icmps_sent++; + + close(sk); +} + +static uint32_t get_rcv_nxt(int sk) +{ + int val = TCP_REPAIR_ON; + uint32_t ret; + socklen_t sz = sizeof(ret); + + if (setsockopt(sk, SOL_TCP, TCP_REPAIR, &val, sizeof(val))) + test_error("setsockopt(TCP_REPAIR)"); + val = TCP_RECV_QUEUE; + if (setsockopt(sk, SOL_TCP, TCP_REPAIR_QUEUE, &val, sizeof(val))) + test_error("setsockopt(TCP_REPAIR_QUEUE)"); + if (getsockopt(sk, SOL_TCP, TCP_QUEUE_SEQ, &ret, &sz)) + test_error("getsockopt(TCP_QUEUE_SEQ)"); + val = TCP_REPAIR_OFF_NO_WP; + if (setsockopt(sk, SOL_TCP, TCP_REPAIR, &val, sizeof(val))) + test_error("setsockopt(TCP_REPAIR)"); + return ret; +} + +static void icmp_interfere(const size_t nr, uint32_t rcv_nxt, void *src, void *dst) +{ + struct sockaddr_in *saddr4 = src; + struct sockaddr_in *daddr4 = dst; + struct sockaddr_in6 *saddr6 = src; + struct sockaddr_in6 *daddr6 = dst; + size_t i; + + if (saddr4->sin_family != daddr4->sin_family) + test_error("Different address families"); + + for (i = 0; i < nr; i++) { + if (saddr4->sin_family == AF_INET) { + icmp_interfere4(ICMP_DEST_UNREACH, ICMP_PROT_UNREACH, + rcv_nxt, saddr4, daddr4); + icmp_interfere4(ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, + rcv_nxt, saddr4, daddr4); + icmp_interfere4(ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, + rcv_nxt, saddr4, daddr4); + icmps_sent += 3; + } else if (saddr4->sin_family == AF_INET6) { + icmp6_interfere(ICMPV6_DEST_UNREACH, + ICMPV6_ADM_PROHIBITED, + rcv_nxt, saddr6, daddr6); + icmp6_interfere(ICMPV6_DEST_UNREACH, + ICMPV6_PORT_UNREACH, + rcv_nxt, saddr6, daddr6); + icmps_sent += 2; + } else { + test_error("Not ip address family"); + } + } +} + +static void send_interfered(int sk) +{ + const unsigned int timeout = TEST_TIMEOUT_SEC; + struct sockaddr_in6 src, dst; + socklen_t addr_sz; + + addr_sz = sizeof(src); + if (getsockname(sk, &src, &addr_sz)) + test_error("getsockname()"); + addr_sz = sizeof(dst); + if (getpeername(sk, &dst, &addr_sz)) + test_error("getpeername()"); + + while (1) { + uint32_t rcv_nxt; + + if (test_client_verify(sk, packet_size, packets_nr, timeout)) { + test_fail("client: connection is broken"); + return; + } + packets_sent += packets_nr; + rcv_nxt = get_rcv_nxt(sk); + icmp_interfere(packets_nr, rcv_nxt, (void *)&src, (void *)&dst); + } +} + +static void *client_fn(void *arg) +{ + int sk = socket(test_family, SOCK_STREAM, IPPROTO_TCP); + + if (sk < 0) + test_error("socket()"); + + if (test_add_key(sk, DEFAULT_TEST_PASSWORD, this_ip_dest, -1, 100, 100)) + test_error("setsockopt(TCP_AO_ADD_KEY)"); + + synchronize_threads(); + if (test_connect_socket(sk, this_ip_dest, test_server_port) <= 0) + test_error("failed to connect()"); + synchronize_threads(); + + send_interfered(sk); + + /* Not expecting client to quit */ + test_fail("client disconnected"); + + return NULL; +} + +int main(int argc, char *argv[]) +{ + test_init(3, server_fn, client_fn); + return 0; +} -- cgit v1.2.3 From d11301f65977244ca8bdcc6ac80431683b9b3b0a Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Fri, 15 Dec 2023 02:36:17 +0000 Subject: selftests/net: Add TCP-AO ICMPs accept test Reverse to icmps-discard test: the server accepts ICMPs, using TCP_AO_CMDF_ACCEPT_ICMP and it is expected to fail under ICMP flood from client. Test that the default pre-TCP-AO behaviour functions when TCP_AO_CMDF_ACCEPT_ICMP is set. Expected output for ipv4 version (in case it receives ICMP_PROT_UNREACH): > # ./icmps-accept_ipv4 > 1..3 > # 3209[lib/setup.c:166] rand seed 1642623870 > TAP version 13 > # 3209[lib/proc.c:207] Snmp6 Ip6InReceives: 0 => 1 > # 3209[lib/proc.c:207] Snmp6 Ip6InNoRoutes: 0 => 1 > # 3209[lib/proc.c:207] Snmp6 Ip6InOctets: 0 => 76 > # 3209[lib/proc.c:207] Snmp6 Ip6InNoECTPkts: 0 => 1 > # 3209[lib/proc.c:207] Tcp InSegs: 3 => 23 > # 3209[lib/proc.c:207] Tcp OutSegs: 2 => 22 > # 3209[lib/proc.c:207] IcmpMsg InType3: 0 => 4 > # 3209[lib/proc.c:207] Icmp InMsgs: 0 => 4 > # 3209[lib/proc.c:207] Icmp InDestUnreachs: 0 => 4 > # 3209[lib/proc.c:207] Ip InReceives: 3 => 27 > # 3209[lib/proc.c:207] Ip InDelivers: 3 => 27 > # 3209[lib/proc.c:207] Ip OutRequests: 2 => 22 > # 3209[lib/proc.c:207] IpExt InOctets: 288 => 3420 > # 3209[lib/proc.c:207] IpExt OutOctets: 124 => 3244 > # 3209[lib/proc.c:207] IpExt InNoECTPkts: 3 => 25 > # 3209[lib/proc.c:207] TcpExt TCPPureAcks: 1 => 2 > # 3209[lib/proc.c:207] TcpExt TCPOrigDataSent: 0 => 20 > # 3209[lib/proc.c:207] TcpExt TCPDelivered: 0 => 19 > # 3209[lib/proc.c:207] TcpExt TCPAOGood: 3 => 23 > ok 1 InDestUnreachs delivered 4 > ok 2 server failed with -92: Protocol not available > ok 3 TCPAODroppedIcmps counter didn't change: 0 >= 0 > # Totals: pass:3 fail:0 xfail:0 xpass:0 skip:0 error:0 Expected output for ipv6 version (in case it receives ADM_PROHIBITED): > # ./icmps-accept_ipv6 > 1..3 > # 3277[lib/setup.c:166] rand seed 1642624035 > TAP version 13 > # 3277[lib/proc.c:207] Snmp6 Ip6InReceives: 6 => 31 > # 3277[lib/proc.c:207] Snmp6 Ip6InDelivers: 4 => 29 > # 3277[lib/proc.c:207] Snmp6 Ip6OutRequests: 4 => 24 > # 3277[lib/proc.c:207] Snmp6 Ip6InOctets: 592 => 4492 > # 3277[lib/proc.c:207] Snmp6 Ip6OutOctets: 332 => 3852 > # 3277[lib/proc.c:207] Snmp6 Ip6InNoECTPkts: 6 => 31 > # 3277[lib/proc.c:207] Snmp6 Icmp6InMsgs: 1 => 6 > # 3277[lib/proc.c:207] Snmp6 Icmp6InDestUnreachs: 0 => 5 > # 3277[lib/proc.c:207] Snmp6 Icmp6InType1: 0 => 5 > # 3277[lib/proc.c:207] Tcp InSegs: 3 => 23 > # 3277[lib/proc.c:207] Tcp OutSegs: 2 => 22 > # 3277[lib/proc.c:207] TcpExt TCPPureAcks: 1 => 2 > # 3277[lib/proc.c:207] TcpExt TCPOrigDataSent: 0 => 20 > # 3277[lib/proc.c:207] TcpExt TCPDelivered: 0 => 19 > # 3277[lib/proc.c:207] TcpExt TCPAOGood: 3 => 23 > ok 1 Icmp6InDestUnreachs delivered 5 > ok 2 server failed with -13: Permission denied > ok 3 TCPAODroppedIcmps counter didn't change: 0 >= 0 > # Totals: pass:3 fail:0 xfail:0 xpass:0 skip:0 error:0 With some luck the server may fail with ECONNREFUSED (depending on what icmp packet was delivered firstly). For the kernel error handlers see: tab_unreach[] and icmp_err_convert[]. Signed-off-by: Dmitry Safonov Signed-off-by: David S. Miller --- tools/testing/selftests/net/tcp_ao/Makefile | 4 +++- tools/testing/selftests/net/tcp_ao/icmps-accept.c | 1 + tools/testing/selftests/net/tcp_ao/icmps-discard.c | 25 ++++++++++++++++------ 3 files changed, 22 insertions(+), 8 deletions(-) create mode 120000 tools/testing/selftests/net/tcp_ao/icmps-accept.c (limited to 'tools') diff --git a/tools/testing/selftests/net/tcp_ao/Makefile b/tools/testing/selftests/net/tcp_ao/Makefile index 0fc5db59be0c..7bf61b167ec5 100644 --- a/tools/testing/selftests/net/tcp_ao/Makefile +++ b/tools/testing/selftests/net/tcp_ao/Makefile @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 TEST_BOTH_AF := connect -TEST_BOTH_AF += icmps-discard +TEST_BOTH_AF += icmps-accept icmps-discard TEST_IPV4_PROGS := $(TEST_BOTH_AF:%=%_ipv4) TEST_IPV6_PROGS := $(TEST_BOTH_AF:%=%_ipv6) @@ -44,3 +44,5 @@ $(OUTPUT)/%_ipv4: %.c $(OUTPUT)/%_ipv6: %.c $(LINK.c) -DIPV6_TEST $^ $(LDLIBS) -o $@ +$(OUTPUT)/icmps-accept_ipv4: CFLAGS+= -DTEST_ICMPS_ACCEPT +$(OUTPUT)/icmps-accept_ipv6: CFLAGS+= -DTEST_ICMPS_ACCEPT diff --git a/tools/testing/selftests/net/tcp_ao/icmps-accept.c b/tools/testing/selftests/net/tcp_ao/icmps-accept.c new file mode 120000 index 000000000000..0a5bb85eb260 --- /dev/null +++ b/tools/testing/selftests/net/tcp_ao/icmps-accept.c @@ -0,0 +1 @@ +icmps-discard.c \ No newline at end of file diff --git a/tools/testing/selftests/net/tcp_ao/icmps-discard.c b/tools/testing/selftests/net/tcp_ao/icmps-discard.c index d77c791754de..d69bcba3c929 100644 --- a/tools/testing/selftests/net/tcp_ao/icmps-discard.c +++ b/tools/testing/selftests/net/tcp_ao/icmps-discard.c @@ -39,8 +39,14 @@ const int sk_ip_level = SOL_IP; const int sk_recverr = IP_RECVERR; #endif -#define test_icmps_fail test_fail -#define test_icmps_ok test_ok +/* Server is expected to fail with hard error if ::accept_icmp is set */ +#ifdef TEST_ICMPS_ACCEPT +# define test_icmps_fail test_ok +# define test_icmps_ok test_fail +#else +# define test_icmps_fail test_fail +# define test_icmps_ok test_ok +#endif static void serve_interfered(int sk) { @@ -84,7 +90,11 @@ static void serve_interfered(int sk) test_fail("Not found %s counter", tcpao_icmps); return; } +#ifdef TEST_ICMPS_ACCEPT + test_tcp_ao_counters_cmp(NULL, &ao_cnt1, &ao_cnt2, TEST_CNT_GOOD); +#else test_tcp_ao_counters_cmp(NULL, &ao_cnt1, &ao_cnt2, TEST_CNT_GOOD | TEST_CNT_AO_DROPPED_ICMP); +#endif if (icmp_ignored_a >= icmp_ignored_b) { test_icmps_fail("%s counter didn't change: %" PRIu64 " >= %" PRIu64, tcpao_icmps, icmp_ignored_a, icmp_ignored_b); @@ -95,11 +105,15 @@ static void serve_interfered(int sk) static void *server_fn(void *arg) { - int val, err, sk, lsk; + int val, sk, lsk; bool accept_icmps = false; lsk = test_listen_socket(this_ip_addr, test_server_port, 1); +#ifdef TEST_ICMPS_ACCEPT + accept_icmps = true; +#endif + if (test_set_ao_flags(lsk, false, accept_icmps)) test_error("setsockopt(TCP_AO_INFO)"); @@ -107,10 +121,7 @@ static void *server_fn(void *arg) test_error("setsockopt(TCP_AO_ADD_KEY)"); synchronize_threads(); - err = test_wait_fd(lsk, TEST_TIMEOUT_SEC, 0); - if (!err) - test_error("timeouted for accept()"); - else if (err < 0) + if (test_wait_fd(lsk, TEST_TIMEOUT_SEC, 0)) test_error("test_wait_fd()"); sk = accept(lsk, NULL, NULL); -- cgit v1.2.3 From ed9d09b309b17cead3bbb910894399da6b74e898 Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Fri, 15 Dec 2023 02:36:18 +0000 Subject: selftests/net: Add a test for TCP-AO keys matching Add TCP-AO tests on connect()/accept() pair. SNMP counters exposed by kernel are very useful here to verify the expected behavior of TCP-AO. Expected output for ipv4 version: > # ./connect-deny_ipv4 > 1..19 > # 1702[lib/setup.c:254] rand seed 1680553689 > TAP version 13 > ok 1 Non-AO server + AO client > ok 2 Non-AO server + AO client: counter TCPAOKeyNotFound increased 0 => 1 > ok 3 AO server + Non-AO client > ok 4 AO server + Non-AO client: counter TCPAORequired increased 0 => 1 > ok 5 Wrong password > ok 6 Wrong password: counter TCPAOBad increased 0 => 1 > ok 7 Wrong rcv id > ok 8 Wrong rcv id: counter TCPAOKeyNotFound increased 1 => 2 > ok 9 Wrong snd id > ok 10 Wrong snd id: counter TCPAOGood increased 0 => 1 > ok 11 Server: Wrong addr: counter TCPAOKeyNotFound increased 2 => 3 > ok 12 Server: Wrong addr > ok 13 Client: Wrong addr: connect() was prevented > ok 14 rcv id != snd id: connected > ok 15 rcv id != snd id: counter TCPAOGood increased 1 => 3 > ok 16 Server: prefix match: connected > ok 17 Server: prefix match: counter TCPAOGood increased 4 => 6 > ok 18 Client: prefix match: connected > ok 19 Client: prefix match: counter TCPAOGood increased 7 => 9 > # Totals: pass:19 fail:0 xfail:0 xpass:0 skip:0 error:0 Expected output for ipv6 version: > # ./connect-deny_ipv6 > 1..19 > # 1725[lib/setup.c:254] rand seed 1680553711 > TAP version 13 > ok 1 Non-AO server + AO client > ok 2 Non-AO server + AO client: counter TCPAOKeyNotFound increased 0 => 1 > ok 3 AO server + Non-AO client: counter TCPAORequired increased 0 => 1 > ok 4 AO server + Non-AO client > ok 5 Wrong password: counter TCPAOBad increased 0 => 1 > ok 6 Wrong password > ok 7 Wrong rcv id: counter TCPAOKeyNotFound increased 1 => 2 > ok 8 Wrong rcv id > ok 9 Wrong snd id: counter TCPAOGood increased 0 => 1 > ok 10 Wrong snd id > ok 11 Server: Wrong addr > ok 12 Server: Wrong addr: counter TCPAOKeyNotFound increased 2 => 3 > ok 13 Client: Wrong addr: connect() was prevented > ok 14 rcv id != snd id: connected > ok 15 rcv id != snd id: counter TCPAOGood increased 1 => 3 > ok 16 Server: prefix match: connected > ok 17 Server: prefix match: counter TCPAOGood increased 5 => 7 > ok 18 Client: prefix match: connected > ok 19 Client: prefix match: counter TCPAOGood increased 8 => 10 > # Totals: pass:19 fail:0 xfail:0 xpass:0 skip:0 error:0 Signed-off-by: Dmitry Safonov Signed-off-by: David S. Miller --- tools/testing/selftests/net/tcp_ao/Makefile | 1 + tools/testing/selftests/net/tcp_ao/connect-deny.c | 264 ++++++++++++++++++++++ 2 files changed, 265 insertions(+) create mode 100644 tools/testing/selftests/net/tcp_ao/connect-deny.c (limited to 'tools') diff --git a/tools/testing/selftests/net/tcp_ao/Makefile b/tools/testing/selftests/net/tcp_ao/Makefile index 7bf61b167ec5..f3b1d7f42edb 100644 --- a/tools/testing/selftests/net/tcp_ao/Makefile +++ b/tools/testing/selftests/net/tcp_ao/Makefile @@ -1,5 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 TEST_BOTH_AF := connect +TEST_BOTH_AF += connect-deny TEST_BOTH_AF += icmps-accept icmps-discard TEST_IPV4_PROGS := $(TEST_BOTH_AF:%=%_ipv4) diff --git a/tools/testing/selftests/net/tcp_ao/connect-deny.c b/tools/testing/selftests/net/tcp_ao/connect-deny.c new file mode 100644 index 000000000000..1ca78040d8b7 --- /dev/null +++ b/tools/testing/selftests/net/tcp_ao/connect-deny.c @@ -0,0 +1,264 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Author: Dmitry Safonov */ +#include +#include "aolib.h" + +#define fault(type) (inj == FAULT_ ## type) + +static inline int test_add_key_maclen(int sk, const char *key, uint8_t maclen, + union tcp_addr in_addr, uint8_t prefix, + uint8_t sndid, uint8_t rcvid) +{ + struct tcp_ao_add tmp = {}; + int err; + + if (prefix > DEFAULT_TEST_PREFIX) + prefix = DEFAULT_TEST_PREFIX; + + err = test_prepare_key(&tmp, DEFAULT_TEST_ALGO, in_addr, false, false, + prefix, 0, sndid, rcvid, maclen, + 0, strlen(key), key); + if (err) + return err; + + err = setsockopt(sk, IPPROTO_TCP, TCP_AO_ADD_KEY, &tmp, sizeof(tmp)); + if (err < 0) + return -errno; + + return test_verify_socket_key(sk, &tmp); +} + +static void try_accept(const char *tst_name, unsigned int port, const char *pwd, + union tcp_addr addr, uint8_t prefix, + uint8_t sndid, uint8_t rcvid, uint8_t maclen, + const char *cnt_name, test_cnt cnt_expected, + fault_t inj) +{ + struct tcp_ao_counters ao_cnt1, ao_cnt2; + uint64_t before_cnt = 0, after_cnt = 0; /* silence GCC */ + int lsk, err, sk = 0; + time_t timeout; + + lsk = test_listen_socket(this_ip_addr, port, 1); + + if (pwd && test_add_key_maclen(lsk, pwd, maclen, addr, prefix, sndid, rcvid)) + test_error("setsockopt(TCP_AO_ADD_KEY)"); + + if (cnt_name) + before_cnt = netstat_get_one(cnt_name, NULL); + if (pwd && test_get_tcp_ao_counters(lsk, &ao_cnt1)) + test_error("test_get_tcp_ao_counters()"); + + synchronize_threads(); /* preparations done */ + + timeout = fault(TIMEOUT) ? TEST_RETRANSMIT_SEC : TEST_TIMEOUT_SEC; + err = test_wait_fd(lsk, timeout, 0); + if (err == -ETIMEDOUT) { + if (!fault(TIMEOUT)) + test_fail("timeouted for accept()"); + } else if (err < 0) { + test_error("test_wait_fd()"); + } else { + if (fault(TIMEOUT)) + test_fail("ready to accept"); + + sk = accept(lsk, NULL, NULL); + if (sk < 0) { + test_error("accept()"); + } else { + if (fault(TIMEOUT)) + test_fail("%s: accepted", tst_name); + } + } + + if (pwd && test_get_tcp_ao_counters(lsk, &ao_cnt2)) + test_error("test_get_tcp_ao_counters()"); + + close(lsk); + if (pwd) + test_tcp_ao_counters_cmp(tst_name, &ao_cnt1, &ao_cnt2, cnt_expected); + + if (!cnt_name) + goto out; + + after_cnt = netstat_get_one(cnt_name, NULL); + + if (after_cnt <= before_cnt) { + test_fail("%s: %s counter did not increase: %zu <= %zu", + tst_name, cnt_name, after_cnt, before_cnt); + } else { + test_ok("%s: counter %s increased %zu => %zu", + tst_name, cnt_name, before_cnt, after_cnt); + } + +out: + synchronize_threads(); /* close() */ + if (sk > 0) + close(sk); +} + +static void *server_fn(void *arg) +{ + union tcp_addr wrong_addr, network_addr; + unsigned int port = test_server_port; + + if (inet_pton(TEST_FAMILY, TEST_WRONG_IP, &wrong_addr) != 1) + test_error("Can't convert ip address %s", TEST_WRONG_IP); + + try_accept("Non-AO server + AO client", port++, NULL, + this_ip_dest, -1, 100, 100, 0, + "TCPAOKeyNotFound", 0, FAULT_TIMEOUT); + + try_accept("AO server + Non-AO client", port++, DEFAULT_TEST_PASSWORD, + this_ip_dest, -1, 100, 100, 0, + "TCPAORequired", TEST_CNT_AO_REQUIRED, FAULT_TIMEOUT); + + try_accept("Wrong password", port++, "something that is not DEFAULT_TEST_PASSWORD", + this_ip_dest, -1, 100, 100, 0, + "TCPAOBad", TEST_CNT_BAD, FAULT_TIMEOUT); + + try_accept("Wrong rcv id", port++, DEFAULT_TEST_PASSWORD, + this_ip_dest, -1, 100, 101, 0, + "TCPAOKeyNotFound", TEST_CNT_AO_KEY_NOT_FOUND, FAULT_TIMEOUT); + + try_accept("Wrong snd id", port++, DEFAULT_TEST_PASSWORD, + this_ip_dest, -1, 101, 100, 0, + "TCPAOGood", TEST_CNT_GOOD, FAULT_TIMEOUT); + + try_accept("Different maclen", port++, DEFAULT_TEST_PASSWORD, + this_ip_dest, -1, 100, 100, 8, + "TCPAOBad", TEST_CNT_BAD, FAULT_TIMEOUT); + + try_accept("Server: Wrong addr", port++, DEFAULT_TEST_PASSWORD, + wrong_addr, -1, 100, 100, 0, + "TCPAOKeyNotFound", TEST_CNT_AO_KEY_NOT_FOUND, FAULT_TIMEOUT); + + try_accept("Client: Wrong addr", port++, NULL, + this_ip_dest, -1, 100, 100, 0, NULL, 0, FAULT_TIMEOUT); + + try_accept("rcv id != snd id", port++, DEFAULT_TEST_PASSWORD, + this_ip_dest, -1, 200, 100, 0, + "TCPAOGood", TEST_CNT_GOOD, 0); + + if (inet_pton(TEST_FAMILY, TEST_NETWORK, &network_addr) != 1) + test_error("Can't convert ip address %s", TEST_NETWORK); + + try_accept("Server: prefix match", port++, DEFAULT_TEST_PASSWORD, + network_addr, 16, 100, 100, 0, + "TCPAOGood", TEST_CNT_GOOD, 0); + + try_accept("Client: prefix match", port++, DEFAULT_TEST_PASSWORD, + this_ip_dest, -1, 100, 100, 0, + "TCPAOGood", TEST_CNT_GOOD, 0); + + /* client exits */ + synchronize_threads(); + return NULL; +} + +static void try_connect(const char *tst_name, unsigned int port, + const char *pwd, union tcp_addr addr, uint8_t prefix, + uint8_t sndid, uint8_t rcvid, + test_cnt cnt_expected, fault_t inj) +{ + struct tcp_ao_counters ao_cnt1, ao_cnt2; + time_t timeout; + int sk, ret; + + sk = socket(test_family, SOCK_STREAM, IPPROTO_TCP); + if (sk < 0) + test_error("socket()"); + + if (pwd && test_add_key(sk, pwd, addr, prefix, sndid, rcvid)) + test_error("setsockopt(TCP_AO_ADD_KEY)"); + + if (pwd && test_get_tcp_ao_counters(sk, &ao_cnt1)) + test_error("test_get_tcp_ao_counters()"); + + synchronize_threads(); /* preparations done */ + + timeout = fault(TIMEOUT) ? TEST_RETRANSMIT_SEC : TEST_TIMEOUT_SEC; + ret = _test_connect_socket(sk, this_ip_dest, port, timeout); + + if (ret < 0) { + if (fault(KEYREJECT) && ret == -EKEYREJECTED) { + test_ok("%s: connect() was prevented", tst_name); + } else if (ret == -ETIMEDOUT && fault(TIMEOUT)) { + test_ok("%s", tst_name); + } else if (ret == -ECONNREFUSED && + (fault(TIMEOUT) || fault(KEYREJECT))) { + test_ok("%s: refused to connect", tst_name); + } else { + test_error("%s: connect() returned %d", tst_name, ret); + } + goto out; + } + + if (fault(TIMEOUT) || fault(KEYREJECT)) + test_fail("%s: connected", tst_name); + else + test_ok("%s: connected", tst_name); + if (pwd && ret > 0) { + if (test_get_tcp_ao_counters(sk, &ao_cnt2)) + test_error("test_get_tcp_ao_counters()"); + test_tcp_ao_counters_cmp(tst_name, &ao_cnt1, &ao_cnt2, cnt_expected); + } +out: + synchronize_threads(); /* close() */ + + if (ret > 0) + close(sk); +} + +static void *client_fn(void *arg) +{ + union tcp_addr wrong_addr, network_addr; + unsigned int port = test_server_port; + + if (inet_pton(TEST_FAMILY, TEST_WRONG_IP, &wrong_addr) != 1) + test_error("Can't convert ip address %s", TEST_WRONG_IP); + + try_connect("Non-AO server + AO client", port++, DEFAULT_TEST_PASSWORD, + this_ip_dest, -1, 100, 100, 0, FAULT_TIMEOUT); + + try_connect("AO server + Non-AO client", port++, NULL, + this_ip_dest, -1, 100, 100, 0, FAULT_TIMEOUT); + + try_connect("Wrong password", port++, DEFAULT_TEST_PASSWORD, + this_ip_dest, -1, 100, 100, 0, FAULT_TIMEOUT); + + try_connect("Wrong rcv id", port++, DEFAULT_TEST_PASSWORD, + this_ip_dest, -1, 100, 100, 0, FAULT_TIMEOUT); + + try_connect("Wrong snd id", port++, DEFAULT_TEST_PASSWORD, + this_ip_dest, -1, 100, 100, 0, FAULT_TIMEOUT); + + try_connect("Different maclen", port++, DEFAULT_TEST_PASSWORD, + this_ip_dest, -1, 100, 100, 0, FAULT_TIMEOUT); + + try_connect("Server: Wrong addr", port++, DEFAULT_TEST_PASSWORD, + this_ip_dest, -1, 100, 100, 0, FAULT_TIMEOUT); + + try_connect("Client: Wrong addr", port++, DEFAULT_TEST_PASSWORD, + wrong_addr, -1, 100, 100, 0, FAULT_KEYREJECT); + + try_connect("rcv id != snd id", port++, DEFAULT_TEST_PASSWORD, + this_ip_dest, -1, 100, 200, TEST_CNT_GOOD, 0); + + if (inet_pton(TEST_FAMILY, TEST_NETWORK, &network_addr) != 1) + test_error("Can't convert ip address %s", TEST_NETWORK); + + try_connect("Server: prefix match", port++, DEFAULT_TEST_PASSWORD, + this_ip_dest, -1, 100, 100, TEST_CNT_GOOD, 0); + + try_connect("Client: prefix match", port++, DEFAULT_TEST_PASSWORD, + network_addr, 16, 100, 100, TEST_CNT_GOOD, 0); + + return NULL; +} + +int main(int argc, char *argv[]) +{ + test_init(21, server_fn, client_fn); + return 0; +} -- cgit v1.2.3 From b26660531cf66e1c8daab551535d3ad07b78fa54 Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Fri, 15 Dec 2023 02:36:19 +0000 Subject: selftests/net: Add test for TCP-AO add setsockopt() command Verify corner-cases for UAPI. Sample output: > # ./setsockopt-closed_ipv4 > 1..120 > # 1657[lib/setup.c:254] rand seed 1681938184 > TAP version 13 > ok 1 AO add: minimum size > ok 2 AO add: extended size > ok 3 AO add: null optval > ok 4 AO del: minimum size > ok 5 AO del: extended size > ok 6 AO del: null optval > ok 7 AO set info: minimum size > ok 8 AO set info: extended size > ok 9 AO info get: : extended size > ok 10 AO set info: null optval > ok 11 AO get info: minimum size > ok 12 AO get info: extended size > ok 13 AO get info: null optval > ok 14 AO get info: null optlen > ok 15 AO get keys: minimum size > ok 16 AO get keys: extended size > ok 17 AO get keys: null optval > ok 18 AO get keys: null optlen > ok 19 key add: too big keylen > ok 20 key add: using reserved padding > ok 21 key add: using reserved2 padding > ok 22 key add: wrong address family > ok 23 key add: port (unsupported) > ok 24 key add: no prefix, addr > ok 25 key add: no prefix, any addr > ok 26 key add: prefix, any addr > ok 27 key add: too big prefix > ok 28 key add: too short prefix > ok 29 key add: bad key flags > ok 30 key add: add current key on a listen socket > ok 31 key add: add rnext key on a listen socket > ok 32 key add: add current+rnext key on a listen socket > ok 33 key add: add key and set as current > ok 34 key add: add key and set as rnext > ok 35 key add: add key and set as current+rnext > ok 36 key add: ifindex without TCP_AO_KEYF_IFNINDEX > ok 37 key add: non-existent VRF > ok 38 optmem limit was hit on adding 69 key > ok 39 key add: maclen bigger than TCP hdr > ok 40 key add: bad algo > ok 41 key del: using reserved padding > ok 42 key del: using reserved2 padding > ok 43 key del: del and set current key on a listen socket > ok 44 key del: del and set rnext key on a listen socket > ok 45 key del: del and set current+rnext key on a listen socket > ok 46 key del: bad key flags > ok 47 key del: ifindex without TCP_AO_KEYF_IFNINDEX > ok 48 key del: non-existent VRF > ok 49 key del: set non-exising current key > ok 50 key del: set non-existing rnext key > ok 51 key del: set non-existing current+rnext key > ok 52 key del: set current key > ok 53 key del: set rnext key > ok 54 key del: set current+rnext key > ok 55 key del: set as current key to be removed > ok 56 key del: set as rnext key to be removed > ok 57 key del: set as current+rnext key to be removed > ok 58 key del: async on non-listen > ok 59 key del: non-existing sndid > ok 60 key del: non-existing rcvid > ok 61 key del: incorrect addr > ok 62 key del: correct key delete > ok 63 AO info set: set current key on a listen socket > ok 64 AO info set: set rnext key on a listen socket > ok 65 AO info set: set current+rnext key on a listen socket > ok 66 AO info set: using reserved padding > ok 67 AO info set: using reserved2 padding > ok 68 AO info set: accept_icmps > ok 69 AO info get: accept_icmps > ok 70 AO info set: ao required > ok 71 AO info get: ao required > ok 72 AO info set: ao required with MD5 key > ok 73 AO info set: set non-existing current key > ok 74 AO info set: set non-existing rnext key > ok 75 AO info set: set non-existing current+rnext key > ok 76 AO info set: set current key > ok 77 AO info get: set current key > ok 78 AO info set: set rnext key > ok 79 AO info get: set rnext key > ok 80 AO info set: set current+rnext key > ok 81 AO info get: set current+rnext key > ok 82 AO info set: set counters > ok 83 AO info get: set counters > ok 84 AO info set: no-op > ok 85 AO info get: no-op > ok 86 get keys: no ao_info > ok 87 get keys: proper tcp_ao_get_mkts() > ok 88 get keys: set out-only pkt_good counter > ok 89 get keys: set out-only pkt_bad counter > ok 90 get keys: bad keyflags > ok 91 get keys: ifindex without TCP_AO_KEYF_IFNINDEX > ok 92 get keys: using reserved field > ok 93 get keys: no prefix, addr > ok 94 get keys: no prefix, any addr > ok 95 get keys: prefix, any addr > ok 96 get keys: too big prefix > ok 97 get keys: too short prefix > ok 98 get keys: prefix + addr > ok 99 get keys: get_all + prefix > ok 100 get keys: get_all + addr > ok 101 get keys: get_all + sndid > ok 102 get keys: get_all + rcvid > ok 103 get keys: current + prefix > ok 104 get keys: current + addr > ok 105 get keys: current + sndid > ok 106 get keys: current + rcvid > ok 107 get keys: rnext + prefix > ok 108 get keys: rnext + addr > ok 109 get keys: rnext + sndid > ok 110 get keys: rnext + rcvid > ok 111 get keys: get_all + current > ok 112 get keys: get_all + rnext > ok 113 get keys: current + rnext > ok 114 key add: duplicate: full copy > ok 115 key add: duplicate: any addr key on the socket > ok 116 key add: duplicate: add any addr key > ok 117 key add: duplicate: add any addr for the same subnet > ok 118 key add: duplicate: full copy of a key > ok 119 key add: duplicate: RecvID differs > ok 120 key add: duplicate: SendID differs > # Totals: pass:120 fail:0 xfail:0 xpass:0 skip:0 error:0 Signed-off-by: Dmitry Safonov Signed-off-by: David S. Miller --- tools/testing/selftests/net/tcp_ao/Makefile | 1 + .../selftests/net/tcp_ao/setsockopt-closed.c | 835 +++++++++++++++++++++ 2 files changed, 836 insertions(+) create mode 100644 tools/testing/selftests/net/tcp_ao/setsockopt-closed.c (limited to 'tools') diff --git a/tools/testing/selftests/net/tcp_ao/Makefile b/tools/testing/selftests/net/tcp_ao/Makefile index f3b1d7f42edb..1efd98ca12db 100644 --- a/tools/testing/selftests/net/tcp_ao/Makefile +++ b/tools/testing/selftests/net/tcp_ao/Makefile @@ -2,6 +2,7 @@ TEST_BOTH_AF := connect TEST_BOTH_AF += connect-deny TEST_BOTH_AF += icmps-accept icmps-discard +TEST_BOTH_AF += setsockopt-closed TEST_IPV4_PROGS := $(TEST_BOTH_AF:%=%_ipv4) TEST_IPV6_PROGS := $(TEST_BOTH_AF:%=%_ipv6) diff --git a/tools/testing/selftests/net/tcp_ao/setsockopt-closed.c b/tools/testing/selftests/net/tcp_ao/setsockopt-closed.c new file mode 100644 index 000000000000..7e4601b3f6a3 --- /dev/null +++ b/tools/testing/selftests/net/tcp_ao/setsockopt-closed.c @@ -0,0 +1,835 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Author: Dmitry Safonov */ +#include +#include "../../../../include/linux/kernel.h" +#include "aolib.h" + +static union tcp_addr tcp_md5_client; + +static int test_port = 7788; +static void make_listen(int sk) +{ + sockaddr_af addr; + + tcp_addr_to_sockaddr_in(&addr, &this_ip_addr, htons(test_port++)); + if (bind(sk, (struct sockaddr *)&addr, sizeof(addr)) < 0) + test_error("bind()"); + if (listen(sk, 1)) + test_error("listen()"); +} + +static void test_vefify_ao_info(int sk, struct tcp_ao_info_opt *info, + const char *tst) +{ + struct tcp_ao_info_opt tmp; + socklen_t len = sizeof(tmp); + + if (getsockopt(sk, IPPROTO_TCP, TCP_AO_INFO, &tmp, &len)) + test_error("getsockopt(TCP_AO_INFO) failed"); + +#define __cmp_ao(member) \ +do { \ + if (info->member != tmp.member) { \ + test_fail("%s: getsockopt(): " __stringify(member) " %zu != %zu", \ + tst, (size_t)info->member, (size_t)tmp.member); \ + return; \ + } \ +} while(0) + if (info->set_current) + __cmp_ao(current_key); + if (info->set_rnext) + __cmp_ao(rnext); + if (info->set_counters) { + __cmp_ao(pkt_good); + __cmp_ao(pkt_bad); + __cmp_ao(pkt_key_not_found); + __cmp_ao(pkt_ao_required); + __cmp_ao(pkt_dropped_icmp); + } + __cmp_ao(ao_required); + __cmp_ao(accept_icmps); + + test_ok("AO info get: %s", tst); +#undef __cmp_ao +} + +static void __setsockopt_checked(int sk, int optname, bool get, + void *optval, socklen_t *len, + int err, const char *tst, const char *tst2) +{ + int ret; + + if (!tst) + tst = ""; + if (!tst2) + tst2 = ""; + + errno = 0; + if (get) + ret = getsockopt(sk, IPPROTO_TCP, optname, optval, len); + else + ret = setsockopt(sk, IPPROTO_TCP, optname, optval, *len); + if (ret == -1) { + if (errno == err) + test_ok("%s%s", tst ?: "", tst2 ?: ""); + else + test_fail("%s%s: %setsockopt() failed", + tst, tst2, get ? "g" : "s"); + close(sk); + return; + } + + if (err) { + test_fail("%s%s: %setsockopt() was expected to fail with %d", + tst, tst2, get ? "g" : "s", err); + } else { + test_ok("%s%s", tst ?: "", tst2 ?: ""); + if (optname == TCP_AO_ADD_KEY) { + test_verify_socket_key(sk, optval); + } else if (optname == TCP_AO_INFO && !get) { + test_vefify_ao_info(sk, optval, tst2); + } else if (optname == TCP_AO_GET_KEYS) { + if (*len != sizeof(struct tcp_ao_getsockopt)) + test_fail("%s%s: get keys returned wrong tcp_ao_getsockopt size", + tst, tst2); + } + } + close(sk); +} + +static void setsockopt_checked(int sk, int optname, void *optval, + int err, const char *tst) +{ + const char *cmd = NULL; + socklen_t len; + + switch (optname) { + case TCP_AO_ADD_KEY: + cmd = "key add: "; + len = sizeof(struct tcp_ao_add); + break; + case TCP_AO_DEL_KEY: + cmd = "key del: "; + len = sizeof(struct tcp_ao_del); + break; + case TCP_AO_INFO: + cmd = "AO info set: "; + len = sizeof(struct tcp_ao_info_opt); + break; + default: + break; + }; + + __setsockopt_checked(sk, optname, false, optval, &len, err, cmd, tst); +} + +static int prepare_defs(int cmd, void *optval) +{ + int sk = socket(test_family, SOCK_STREAM, IPPROTO_TCP); + + if (sk < 0) + test_error("socket()"); + + switch (cmd) { + case TCP_AO_ADD_KEY: { + struct tcp_ao_add *add = optval; + + if (test_prepare_def_key(add, DEFAULT_TEST_PASSWORD, 0, this_ip_dest, + -1, 0, 100, 100)) + test_error("prepare default tcp_ao_add"); + break; + } + case TCP_AO_DEL_KEY: { + struct tcp_ao_del *del = optval; + + if (test_add_key(sk, DEFAULT_TEST_PASSWORD, this_ip_dest, + DEFAULT_TEST_PREFIX, 100, 100)) + test_error("add default key"); + memset(del, 0, sizeof(struct tcp_ao_del)); + del->sndid = 100; + del->rcvid = 100; + del->prefix = DEFAULT_TEST_PREFIX; + tcp_addr_to_sockaddr_in(&del->addr, &this_ip_dest, 0); + break; + } + case TCP_AO_INFO: { + struct tcp_ao_info_opt *info = optval; + + if (test_add_key(sk, DEFAULT_TEST_PASSWORD, this_ip_dest, + DEFAULT_TEST_PREFIX, 100, 100)) + test_error("add default key"); + memset(info, 0, sizeof(struct tcp_ao_info_opt)); + break; + } + case TCP_AO_GET_KEYS: { + struct tcp_ao_getsockopt *get = optval; + + if (test_add_key(sk, DEFAULT_TEST_PASSWORD, this_ip_dest, + DEFAULT_TEST_PREFIX, 100, 100)) + test_error("add default key"); + memset(get, 0, sizeof(struct tcp_ao_getsockopt)); + get->nkeys = 1; + get->get_all = 1; + break; + } + default: + test_error("unknown cmd"); + } + + return sk; +} + +static void test_extend(int cmd, bool get, const char *tst, socklen_t under_size) +{ + struct { + union { + struct tcp_ao_add add; + struct tcp_ao_del del; + struct tcp_ao_getsockopt get; + struct tcp_ao_info_opt info; + }; + char *extend[100]; + } tmp_opt; + socklen_t extended_size = sizeof(tmp_opt); + int sk; + + memset(&tmp_opt, 0, sizeof(tmp_opt)); + sk = prepare_defs(cmd, &tmp_opt); + __setsockopt_checked(sk, cmd, get, &tmp_opt, &under_size, + EINVAL, tst, ": minimum size"); + + memset(&tmp_opt, 0, sizeof(tmp_opt)); + sk = prepare_defs(cmd, &tmp_opt); + __setsockopt_checked(sk, cmd, get, &tmp_opt, &extended_size, + 0, tst, ": extended size"); + + memset(&tmp_opt, 0, sizeof(tmp_opt)); + sk = prepare_defs(cmd, &tmp_opt); + __setsockopt_checked(sk, cmd, get, NULL, &extended_size, + EFAULT, tst, ": null optval"); + + if (get) { + memset(&tmp_opt, 0, sizeof(tmp_opt)); + sk = prepare_defs(cmd, &tmp_opt); + __setsockopt_checked(sk, cmd, get, &tmp_opt, NULL, + EFAULT, tst, ": null optlen"); + } +} + +static void extend_tests(void) +{ + test_extend(TCP_AO_ADD_KEY, false, "AO add", + offsetof(struct tcp_ao_add, key)); + test_extend(TCP_AO_DEL_KEY, false, "AO del", + offsetof(struct tcp_ao_del, keyflags)); + test_extend(TCP_AO_INFO, false, "AO set info", + offsetof(struct tcp_ao_info_opt, pkt_dropped_icmp)); + test_extend(TCP_AO_INFO, true, "AO get info", -1); + test_extend(TCP_AO_GET_KEYS, true, "AO get keys", -1); +} + +static void test_optmem_limit(void) +{ + size_t i, keys_limit, current_optmem = test_get_optmem(); + struct tcp_ao_add ao; + union tcp_addr net = {}; + int sk; + + if (inet_pton(TEST_FAMILY, TEST_NETWORK, &net) != 1) + test_error("Can't convert ip address %s", TEST_NETWORK); + + sk = prepare_defs(TCP_AO_ADD_KEY, &ao); + keys_limit = current_optmem / KERNEL_TCP_AO_KEY_SZ_ROUND_UP; + for (i = 0;; i++) { + union tcp_addr key_peer; + int err; + + key_peer = gen_tcp_addr(net, i + 1); + tcp_addr_to_sockaddr_in(&ao.addr, &key_peer, 0); + err = setsockopt(sk, IPPROTO_TCP, TCP_AO_ADD_KEY, + &ao, sizeof(ao)); + if (!err) { + /* + * TCP_AO_ADD_KEY should be the same order as the real + * sizeof(struct tcp_ao_key) in kernel. + */ + if (i <= keys_limit * 10) + continue; + test_fail("optmem limit test failed: added %zu key", i); + break; + } + if (i < keys_limit) { + test_fail("optmem limit test failed: couldn't add %zu key", i); + break; + } + test_ok("optmem limit was hit on adding %zu key", i); + break; + } + close(sk); +} + +static void test_einval_add_key(void) +{ + struct tcp_ao_add ao; + int sk; + + sk = prepare_defs(TCP_AO_ADD_KEY, &ao); + ao.keylen = TCP_AO_MAXKEYLEN + 1; + setsockopt_checked(sk, TCP_AO_ADD_KEY, &ao, EINVAL, "too big keylen"); + + sk = prepare_defs(TCP_AO_ADD_KEY, &ao); + ao.reserved = 1; + setsockopt_checked(sk, TCP_AO_ADD_KEY, &ao, EINVAL, "using reserved padding"); + + sk = prepare_defs(TCP_AO_ADD_KEY, &ao); + ao.reserved2 = 1; + setsockopt_checked(sk, TCP_AO_ADD_KEY, &ao, EINVAL, "using reserved2 padding"); + + /* tcp_ao_verify_ipv{4,6}() checks */ + sk = prepare_defs(TCP_AO_ADD_KEY, &ao); + ao.addr.ss_family = AF_UNIX; + memcpy(&ao.addr, &SOCKADDR_ANY, sizeof(SOCKADDR_ANY)); + setsockopt_checked(sk, TCP_AO_ADD_KEY, &ao, EINVAL, "wrong address family"); + + sk = prepare_defs(TCP_AO_ADD_KEY, &ao); + tcp_addr_to_sockaddr_in(&ao.addr, &this_ip_dest, 1234); + setsockopt_checked(sk, TCP_AO_ADD_KEY, &ao, EINVAL, "port (unsupported)"); + + sk = prepare_defs(TCP_AO_ADD_KEY, &ao); + ao.prefix = 0; + setsockopt_checked(sk, TCP_AO_ADD_KEY, &ao, EINVAL, "no prefix, addr"); + + sk = prepare_defs(TCP_AO_ADD_KEY, &ao); + ao.prefix = 0; + memcpy(&ao.addr, &SOCKADDR_ANY, sizeof(SOCKADDR_ANY)); + setsockopt_checked(sk, TCP_AO_ADD_KEY, &ao, 0, "no prefix, any addr"); + + sk = prepare_defs(TCP_AO_ADD_KEY, &ao); + ao.prefix = 32; + memcpy(&ao.addr, &SOCKADDR_ANY, sizeof(SOCKADDR_ANY)); + setsockopt_checked(sk, TCP_AO_ADD_KEY, &ao, EINVAL, "prefix, any addr"); + + sk = prepare_defs(TCP_AO_ADD_KEY, &ao); + ao.prefix = 129; + setsockopt_checked(sk, TCP_AO_ADD_KEY, &ao, EINVAL, "too big prefix"); + + sk = prepare_defs(TCP_AO_ADD_KEY, &ao); + ao.prefix = 2; + setsockopt_checked(sk, TCP_AO_ADD_KEY, &ao, EINVAL, "too short prefix"); + + sk = prepare_defs(TCP_AO_ADD_KEY, &ao); + ao.keyflags = (uint8_t)(-1); + setsockopt_checked(sk, TCP_AO_ADD_KEY, &ao, EINVAL, "bad key flags"); + + sk = prepare_defs(TCP_AO_ADD_KEY, &ao); + make_listen(sk); + ao.set_current = 1; + setsockopt_checked(sk, TCP_AO_ADD_KEY, &ao, EINVAL, "add current key on a listen socket"); + + sk = prepare_defs(TCP_AO_ADD_KEY, &ao); + make_listen(sk); + ao.set_rnext = 1; + setsockopt_checked(sk, TCP_AO_ADD_KEY, &ao, EINVAL, "add rnext key on a listen socket"); + + sk = prepare_defs(TCP_AO_ADD_KEY, &ao); + make_listen(sk); + ao.set_current = 1; + ao.set_rnext = 1; + setsockopt_checked(sk, TCP_AO_ADD_KEY, &ao, EINVAL, "add current+rnext key on a listen socket"); + + sk = prepare_defs(TCP_AO_ADD_KEY, &ao); + ao.set_current = 1; + setsockopt_checked(sk, TCP_AO_ADD_KEY, &ao, 0, "add key and set as current"); + + sk = prepare_defs(TCP_AO_ADD_KEY, &ao); + ao.set_rnext = 1; + setsockopt_checked(sk, TCP_AO_ADD_KEY, &ao, 0, "add key and set as rnext"); + + sk = prepare_defs(TCP_AO_ADD_KEY, &ao); + ao.set_current = 1; + ao.set_rnext = 1; + setsockopt_checked(sk, TCP_AO_ADD_KEY, &ao, 0, "add key and set as current+rnext"); + + sk = prepare_defs(TCP_AO_ADD_KEY, &ao); + ao.ifindex = 42; + setsockopt_checked(sk, TCP_AO_ADD_KEY, &ao, EINVAL, + "ifindex without TCP_AO_KEYF_IFNINDEX"); + + sk = prepare_defs(TCP_AO_ADD_KEY, &ao); + ao.keyflags |= TCP_AO_KEYF_IFINDEX; + ao.ifindex = 42; + setsockopt_checked(sk, TCP_AO_ADD_KEY, &ao, EINVAL, "non-existent VRF"); + /* + * tcp_md5_do_lookup{,_any_l3index}() are checked in unsigned-md5 + * see client_vrf_tests(). + */ + + test_optmem_limit(); + + /* tcp_ao_parse_crypto() */ + sk = prepare_defs(TCP_AO_ADD_KEY, &ao); + ao.maclen = 100; + setsockopt_checked(sk, TCP_AO_ADD_KEY, &ao, EMSGSIZE, "maclen bigger than TCP hdr"); + + sk = prepare_defs(TCP_AO_ADD_KEY, &ao); + strcpy(ao.alg_name, "imaginary hash algo"); + setsockopt_checked(sk, TCP_AO_ADD_KEY, &ao, ENOENT, "bad algo"); +} + +static void test_einval_del_key(void) +{ + struct tcp_ao_del del; + int sk; + + sk = prepare_defs(TCP_AO_DEL_KEY, &del); + del.reserved = 1; + setsockopt_checked(sk, TCP_AO_DEL_KEY, &del, EINVAL, "using reserved padding"); + + sk = prepare_defs(TCP_AO_DEL_KEY, &del); + del.reserved2 = 1; + setsockopt_checked(sk, TCP_AO_DEL_KEY, &del, EINVAL, "using reserved2 padding"); + + sk = prepare_defs(TCP_AO_DEL_KEY, &del); + make_listen(sk); + if (test_add_key(sk, DEFAULT_TEST_PASSWORD, this_ip_dest, DEFAULT_TEST_PREFIX, 0, 0)) + test_error("add key"); + del.set_current = 1; + setsockopt_checked(sk, TCP_AO_DEL_KEY, &del, EINVAL, "del and set current key on a listen socket"); + + sk = prepare_defs(TCP_AO_DEL_KEY, &del); + make_listen(sk); + if (test_add_key(sk, DEFAULT_TEST_PASSWORD, this_ip_dest, DEFAULT_TEST_PREFIX, 0, 0)) + test_error("add key"); + del.set_rnext = 1; + setsockopt_checked(sk, TCP_AO_DEL_KEY, &del, EINVAL, "del and set rnext key on a listen socket"); + + sk = prepare_defs(TCP_AO_DEL_KEY, &del); + make_listen(sk); + if (test_add_key(sk, DEFAULT_TEST_PASSWORD, this_ip_dest, DEFAULT_TEST_PREFIX, 0, 0)) + test_error("add key"); + del.set_current = 1; + del.set_rnext = 1; + setsockopt_checked(sk, TCP_AO_DEL_KEY, &del, EINVAL, "del and set current+rnext key on a listen socket"); + + sk = prepare_defs(TCP_AO_DEL_KEY, &del); + del.keyflags = (uint8_t)(-1); + setsockopt_checked(sk, TCP_AO_DEL_KEY, &del, EINVAL, "bad key flags"); + + sk = prepare_defs(TCP_AO_DEL_KEY, &del); + del.ifindex = 42; + setsockopt_checked(sk, TCP_AO_DEL_KEY, &del, EINVAL, + "ifindex without TCP_AO_KEYF_IFNINDEX"); + + sk = prepare_defs(TCP_AO_DEL_KEY, &del); + del.keyflags |= TCP_AO_KEYF_IFINDEX; + del.ifindex = 42; + setsockopt_checked(sk, TCP_AO_DEL_KEY, &del, ENOENT, "non-existent VRF"); + + sk = prepare_defs(TCP_AO_DEL_KEY, &del); + del.set_current = 1; + setsockopt_checked(sk, TCP_AO_DEL_KEY, &del, ENOENT, "set non-exising current key"); + + sk = prepare_defs(TCP_AO_DEL_KEY, &del); + del.set_rnext = 1; + setsockopt_checked(sk, TCP_AO_DEL_KEY, &del, ENOENT, "set non-existing rnext key"); + + sk = prepare_defs(TCP_AO_DEL_KEY, &del); + del.set_current = 1; + del.set_rnext = 1; + setsockopt_checked(sk, TCP_AO_DEL_KEY, &del, ENOENT, "set non-existing current+rnext key"); + + sk = prepare_defs(TCP_AO_DEL_KEY, &del); + if (test_add_key(sk, DEFAULT_TEST_PASSWORD, this_ip_dest, DEFAULT_TEST_PREFIX, 0, 0)) + test_error("add key"); + del.set_current = 1; + setsockopt_checked(sk, TCP_AO_DEL_KEY, &del, 0, "set current key"); + + sk = prepare_defs(TCP_AO_DEL_KEY, &del); + if (test_add_key(sk, DEFAULT_TEST_PASSWORD, this_ip_dest, DEFAULT_TEST_PREFIX, 0, 0)) + test_error("add key"); + del.set_rnext = 1; + setsockopt_checked(sk, TCP_AO_DEL_KEY, &del, 0, "set rnext key"); + + sk = prepare_defs(TCP_AO_DEL_KEY, &del); + if (test_add_key(sk, DEFAULT_TEST_PASSWORD, this_ip_dest, DEFAULT_TEST_PREFIX, 0, 0)) + test_error("add key"); + del.set_current = 1; + del.set_rnext = 1; + setsockopt_checked(sk, TCP_AO_DEL_KEY, &del, 0, "set current+rnext key"); + + sk = prepare_defs(TCP_AO_DEL_KEY, &del); + del.set_current = 1; + del.current_key = 100; + setsockopt_checked(sk, TCP_AO_DEL_KEY, &del, ENOENT, "set as current key to be removed"); + + sk = prepare_defs(TCP_AO_DEL_KEY, &del); + del.set_rnext = 1; + del.rnext = 100; + setsockopt_checked(sk, TCP_AO_DEL_KEY, &del, ENOENT, "set as rnext key to be removed"); + + sk = prepare_defs(TCP_AO_DEL_KEY, &del); + del.set_current = 1; + del.current_key = 100; + del.set_rnext = 1; + del.rnext = 100; + setsockopt_checked(sk, TCP_AO_DEL_KEY, &del, ENOENT, "set as current+rnext key to be removed"); + + sk = prepare_defs(TCP_AO_DEL_KEY, &del); + del.del_async = 1; + setsockopt_checked(sk, TCP_AO_DEL_KEY, &del, EINVAL, "async on non-listen"); + + sk = prepare_defs(TCP_AO_DEL_KEY, &del); + del.sndid = 101; + setsockopt_checked(sk, TCP_AO_DEL_KEY, &del, ENOENT, "non-existing sndid"); + + sk = prepare_defs(TCP_AO_DEL_KEY, &del); + del.rcvid = 101; + setsockopt_checked(sk, TCP_AO_DEL_KEY, &del, ENOENT, "non-existing rcvid"); + + sk = prepare_defs(TCP_AO_DEL_KEY, &del); + tcp_addr_to_sockaddr_in(&del.addr, &this_ip_addr, 0); + setsockopt_checked(sk, TCP_AO_DEL_KEY, &del, ENOENT, "incorrect addr"); + + sk = prepare_defs(TCP_AO_DEL_KEY, &del); + setsockopt_checked(sk, TCP_AO_DEL_KEY, &del, 0, "correct key delete"); +} + +static void test_einval_ao_info(void) +{ + struct tcp_ao_info_opt info; + int sk; + + sk = prepare_defs(TCP_AO_INFO, &info); + make_listen(sk); + info.set_current = 1; + setsockopt_checked(sk, TCP_AO_INFO, &info, EINVAL, "set current key on a listen socket"); + + sk = prepare_defs(TCP_AO_INFO, &info); + make_listen(sk); + info.set_rnext = 1; + setsockopt_checked(sk, TCP_AO_INFO, &info, EINVAL, "set rnext key on a listen socket"); + + sk = prepare_defs(TCP_AO_INFO, &info); + make_listen(sk); + info.set_current = 1; + info.set_rnext = 1; + setsockopt_checked(sk, TCP_AO_INFO, &info, EINVAL, "set current+rnext key on a listen socket"); + + sk = prepare_defs(TCP_AO_INFO, &info); + info.reserved = 1; + setsockopt_checked(sk, TCP_AO_INFO, &info, EINVAL, "using reserved padding"); + + sk = prepare_defs(TCP_AO_INFO, &info); + info.reserved2 = 1; + setsockopt_checked(sk, TCP_AO_INFO, &info, EINVAL, "using reserved2 padding"); + + sk = prepare_defs(TCP_AO_INFO, &info); + info.accept_icmps = 1; + setsockopt_checked(sk, TCP_AO_INFO, &info, 0, "accept_icmps"); + + sk = prepare_defs(TCP_AO_INFO, &info); + info.ao_required = 1; + setsockopt_checked(sk, TCP_AO_INFO, &info, 0, "ao required"); + + if (!should_skip_test("ao required with MD5 key", KCONFIG_TCP_MD5)) { + sk = prepare_defs(TCP_AO_INFO, &info); + info.ao_required = 1; + if (test_set_md5(sk, tcp_md5_client, TEST_PREFIX, -1, + "long long secret")) { + test_error("setsockopt(TCP_MD5SIG_EXT)"); + close(sk); + } else { + setsockopt_checked(sk, TCP_AO_INFO, &info, EKEYREJECTED, + "ao required with MD5 key"); + } + } + + sk = prepare_defs(TCP_AO_INFO, &info); + info.set_current = 1; + setsockopt_checked(sk, TCP_AO_INFO, &info, ENOENT, "set non-existing current key"); + + sk = prepare_defs(TCP_AO_INFO, &info); + info.set_rnext = 1; + setsockopt_checked(sk, TCP_AO_INFO, &info, ENOENT, "set non-existing rnext key"); + + sk = prepare_defs(TCP_AO_INFO, &info); + info.set_current = 1; + info.set_rnext = 1; + setsockopt_checked(sk, TCP_AO_INFO, &info, ENOENT, "set non-existing current+rnext key"); + + sk = prepare_defs(TCP_AO_INFO, &info); + info.set_current = 1; + info.current_key = 100; + setsockopt_checked(sk, TCP_AO_INFO, &info, 0, "set current key"); + + sk = prepare_defs(TCP_AO_INFO, &info); + info.set_rnext = 1; + info.rnext = 100; + setsockopt_checked(sk, TCP_AO_INFO, &info, 0, "set rnext key"); + + sk = prepare_defs(TCP_AO_INFO, &info); + info.set_current = 1; + info.set_rnext = 1; + info.current_key = 100; + info.rnext = 100; + setsockopt_checked(sk, TCP_AO_INFO, &info, 0, "set current+rnext key"); + + sk = prepare_defs(TCP_AO_INFO, &info); + info.set_counters = 1; + info.pkt_good = 321; + info.pkt_bad = 888; + info.pkt_key_not_found = 654; + info.pkt_ao_required = 987654; + info.pkt_dropped_icmp = 10000; + setsockopt_checked(sk, TCP_AO_INFO, &info, 0, "set counters"); + + sk = prepare_defs(TCP_AO_INFO, &info); + setsockopt_checked(sk, TCP_AO_INFO, &info, 0, "no-op"); +} + +static void getsockopt_checked(int sk, struct tcp_ao_getsockopt *optval, + int err, const char *tst) +{ + socklen_t len = sizeof(struct tcp_ao_getsockopt); + + __setsockopt_checked(sk, TCP_AO_GET_KEYS, true, optval, &len, err, + "get keys: ", tst); +} + +static void test_einval_get_keys(void) +{ + struct tcp_ao_getsockopt out; + int sk; + + sk = socket(test_family, SOCK_STREAM, IPPROTO_TCP); + if (sk < 0) + test_error("socket()"); + getsockopt_checked(sk, &out, ENOENT, "no ao_info"); + + sk = prepare_defs(TCP_AO_GET_KEYS, &out); + getsockopt_checked(sk, &out, 0, "proper tcp_ao_get_mkts()"); + + sk = prepare_defs(TCP_AO_GET_KEYS, &out); + out.pkt_good = 643; + getsockopt_checked(sk, &out, EINVAL, "set out-only pkt_good counter"); + + sk = prepare_defs(TCP_AO_GET_KEYS, &out); + out.pkt_bad = 94; + getsockopt_checked(sk, &out, EINVAL, "set out-only pkt_bad counter"); + + sk = prepare_defs(TCP_AO_GET_KEYS, &out); + out.keyflags = (uint8_t)(-1); + getsockopt_checked(sk, &out, EINVAL, "bad keyflags"); + + sk = prepare_defs(TCP_AO_GET_KEYS, &out); + out.ifindex = 42; + getsockopt_checked(sk, &out, EINVAL, + "ifindex without TCP_AO_KEYF_IFNINDEX"); + + sk = prepare_defs(TCP_AO_GET_KEYS, &out); + out.reserved = 1; + getsockopt_checked(sk, &out, EINVAL, "using reserved field"); + + sk = prepare_defs(TCP_AO_GET_KEYS, &out); + out.get_all = 0; + out.prefix = 0; + tcp_addr_to_sockaddr_in(&out.addr, &this_ip_dest, 0); + getsockopt_checked(sk, &out, EINVAL, "no prefix, addr"); + + sk = prepare_defs(TCP_AO_GET_KEYS, &out); + out.get_all = 0; + out.prefix = 0; + memcpy(&out.addr, &SOCKADDR_ANY, sizeof(SOCKADDR_ANY)); + getsockopt_checked(sk, &out, 0, "no prefix, any addr"); + + sk = prepare_defs(TCP_AO_GET_KEYS, &out); + out.get_all = 0; + out.prefix = 32; + memcpy(&out.addr, &SOCKADDR_ANY, sizeof(SOCKADDR_ANY)); + getsockopt_checked(sk, &out, EINVAL, "prefix, any addr"); + + sk = prepare_defs(TCP_AO_GET_KEYS, &out); + out.get_all = 0; + out.prefix = 129; + tcp_addr_to_sockaddr_in(&out.addr, &this_ip_dest, 0); + getsockopt_checked(sk, &out, EINVAL, "too big prefix"); + + sk = prepare_defs(TCP_AO_GET_KEYS, &out); + out.get_all = 0; + out.prefix = 2; + tcp_addr_to_sockaddr_in(&out.addr, &this_ip_dest, 0); + getsockopt_checked(sk, &out, EINVAL, "too short prefix"); + + sk = prepare_defs(TCP_AO_GET_KEYS, &out); + out.get_all = 0; + out.prefix = DEFAULT_TEST_PREFIX; + tcp_addr_to_sockaddr_in(&out.addr, &this_ip_dest, 0); + getsockopt_checked(sk, &out, 0, "prefix + addr"); + + sk = prepare_defs(TCP_AO_GET_KEYS, &out); + out.get_all = 1; + out.prefix = DEFAULT_TEST_PREFIX; + getsockopt_checked(sk, &out, EINVAL, "get_all + prefix"); + + sk = prepare_defs(TCP_AO_GET_KEYS, &out); + out.get_all = 1; + tcp_addr_to_sockaddr_in(&out.addr, &this_ip_dest, 0); + getsockopt_checked(sk, &out, EINVAL, "get_all + addr"); + + sk = prepare_defs(TCP_AO_GET_KEYS, &out); + out.get_all = 1; + out.sndid = 1; + getsockopt_checked(sk, &out, EINVAL, "get_all + sndid"); + + sk = prepare_defs(TCP_AO_GET_KEYS, &out); + out.get_all = 1; + out.rcvid = 1; + getsockopt_checked(sk, &out, EINVAL, "get_all + rcvid"); + + sk = prepare_defs(TCP_AO_GET_KEYS, &out); + out.get_all = 0; + out.is_current = 1; + out.prefix = DEFAULT_TEST_PREFIX; + getsockopt_checked(sk, &out, EINVAL, "current + prefix"); + + sk = prepare_defs(TCP_AO_GET_KEYS, &out); + out.get_all = 0; + out.is_current = 1; + tcp_addr_to_sockaddr_in(&out.addr, &this_ip_dest, 0); + getsockopt_checked(sk, &out, EINVAL, "current + addr"); + + sk = prepare_defs(TCP_AO_GET_KEYS, &out); + out.get_all = 0; + out.is_current = 1; + out.sndid = 1; + getsockopt_checked(sk, &out, EINVAL, "current + sndid"); + + sk = prepare_defs(TCP_AO_GET_KEYS, &out); + out.get_all = 0; + out.is_current = 1; + out.rcvid = 1; + getsockopt_checked(sk, &out, EINVAL, "current + rcvid"); + + sk = prepare_defs(TCP_AO_GET_KEYS, &out); + out.get_all = 0; + out.is_rnext = 1; + out.prefix = DEFAULT_TEST_PREFIX; + getsockopt_checked(sk, &out, EINVAL, "rnext + prefix"); + + sk = prepare_defs(TCP_AO_GET_KEYS, &out); + out.get_all = 0; + out.is_rnext = 1; + tcp_addr_to_sockaddr_in(&out.addr, &this_ip_dest, 0); + getsockopt_checked(sk, &out, EINVAL, "rnext + addr"); + + sk = prepare_defs(TCP_AO_GET_KEYS, &out); + out.get_all = 0; + out.is_rnext = 1; + out.sndid = 1; + getsockopt_checked(sk, &out, EINVAL, "rnext + sndid"); + + sk = prepare_defs(TCP_AO_GET_KEYS, &out); + out.get_all = 0; + out.is_rnext = 1; + out.rcvid = 1; + getsockopt_checked(sk, &out, EINVAL, "rnext + rcvid"); + + sk = prepare_defs(TCP_AO_GET_KEYS, &out); + out.get_all = 1; + out.is_current = 1; + getsockopt_checked(sk, &out, EINVAL, "get_all + current"); + + sk = prepare_defs(TCP_AO_GET_KEYS, &out); + out.get_all = 1; + out.is_rnext = 1; + getsockopt_checked(sk, &out, EINVAL, "get_all + rnext"); + + sk = prepare_defs(TCP_AO_GET_KEYS, &out); + out.get_all = 0; + out.is_current = 1; + out.is_rnext = 1; + getsockopt_checked(sk, &out, 0, "current + rnext"); +} + +static void einval_tests(void) +{ + test_einval_add_key(); + test_einval_del_key(); + test_einval_ao_info(); + test_einval_get_keys(); +} + +static void duplicate_tests(void) +{ + union tcp_addr network_dup; + struct tcp_ao_add ao, ao2; + int sk; + + sk = prepare_defs(TCP_AO_ADD_KEY, &ao); + if (setsockopt(sk, IPPROTO_TCP, TCP_AO_ADD_KEY, &ao, sizeof(ao))) + test_error("setsockopt()"); + setsockopt_checked(sk, TCP_AO_ADD_KEY, &ao, EEXIST, "duplicate: full copy"); + + sk = prepare_defs(TCP_AO_ADD_KEY, &ao); + ao2 = ao; + memcpy(&ao2.addr, &SOCKADDR_ANY, sizeof(SOCKADDR_ANY)); + ao2.prefix = 0; + if (setsockopt(sk, IPPROTO_TCP, TCP_AO_ADD_KEY, &ao2, sizeof(ao))) + test_error("setsockopt()"); + setsockopt_checked(sk, TCP_AO_ADD_KEY, &ao, EEXIST, "duplicate: any addr key on the socket"); + + sk = prepare_defs(TCP_AO_ADD_KEY, &ao); + if (setsockopt(sk, IPPROTO_TCP, TCP_AO_ADD_KEY, &ao, sizeof(ao))) + test_error("setsockopt()"); + memcpy(&ao.addr, &SOCKADDR_ANY, sizeof(SOCKADDR_ANY)); + ao.prefix = 0; + setsockopt_checked(sk, TCP_AO_ADD_KEY, &ao, EEXIST, "duplicate: add any addr key"); + + if (inet_pton(TEST_FAMILY, TEST_NETWORK, &network_dup) != 1) + test_error("Can't convert ip address %s", TEST_NETWORK); + sk = prepare_defs(TCP_AO_ADD_KEY, &ao); + if (setsockopt(sk, IPPROTO_TCP, TCP_AO_ADD_KEY, &ao, sizeof(ao))) + test_error("setsockopt()"); + if (test_prepare_def_key(&ao, "password", 0, network_dup, + 16, 0, 100, 100)) + test_error("prepare default tcp_ao_add"); + setsockopt_checked(sk, TCP_AO_ADD_KEY, &ao, EEXIST, "duplicate: add any addr for the same subnet"); + + sk = prepare_defs(TCP_AO_ADD_KEY, &ao); + if (setsockopt(sk, IPPROTO_TCP, TCP_AO_ADD_KEY, &ao, sizeof(ao))) + test_error("setsockopt()"); + setsockopt_checked(sk, TCP_AO_ADD_KEY, &ao, EEXIST, "duplicate: full copy of a key"); + + sk = prepare_defs(TCP_AO_ADD_KEY, &ao); + if (setsockopt(sk, IPPROTO_TCP, TCP_AO_ADD_KEY, &ao, sizeof(ao))) + test_error("setsockopt()"); + ao.rcvid = 101; + setsockopt_checked(sk, TCP_AO_ADD_KEY, &ao, EEXIST, "duplicate: RecvID differs"); + + sk = prepare_defs(TCP_AO_ADD_KEY, &ao); + if (setsockopt(sk, IPPROTO_TCP, TCP_AO_ADD_KEY, &ao, sizeof(ao))) + test_error("setsockopt()"); + ao.sndid = 101; + setsockopt_checked(sk, TCP_AO_ADD_KEY, &ao, EEXIST, "duplicate: SendID differs"); +} + +static void *client_fn(void *arg) +{ + if (inet_pton(TEST_FAMILY, __TEST_CLIENT_IP(2), &tcp_md5_client) != 1) + test_error("Can't convert ip address"); + extend_tests(); + einval_tests(); + duplicate_tests(); + /* + * TODO: check getsockopt(TCP_AO_GET_KEYS) with different filters + * returning proper nr & keys; + */ + + return NULL; +} + +int main(int argc, char *argv[]) +{ + test_init(120, client_fn, NULL); + return 0; +} -- cgit v1.2.3 From 6f0c472a681586161e4b3988243754514eef8a0d Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Fri, 15 Dec 2023 02:36:20 +0000 Subject: selftests/net: Add TCP-AO + TCP-MD5 + no sign listen socket tests The test plan was (most of tests have all 3 client types): 1. TCP-AO listen (INADDR_ANY) 2. TCP-MD5 listen (INADDR_ANY) 3. non-signed listen (INADDR_ANY) 4. TCP-AO + TCP-MD5 listen (prefix) 5. TCP-AO subprefix add failure [checked in setsockopt-closed.c] 6. TCP-AO out of prefix connect [checked in connect-deny.c] 7. TCP-AO + TCP-MD5 on connect() 8. TCP-AO intersect with TCP-MD5 failure 9. Established TCP-AO: add TCP-MD5 key 10. Established TCP-MD5: add TCP-AO key 11. Established non-signed: add TCP-AO key Output produced: > # ./unsigned-md5_ipv6 > 1..72 > # 1592[lib/setup.c:239] rand seed 1697567046 > TAP version 13 > ok 1 AO server (INADDR_ANY): AO client: counter TCPAOGood increased 0 => 2 > ok 2 AO server (INADDR_ANY): AO client: connected > ok 3 AO server (INADDR_ANY): MD5 client > ok 4 AO server (INADDR_ANY): MD5 client: counter TCPMD5Unexpected increased 0 => 1 > ok 5 AO server (INADDR_ANY): no sign client: counter TCPAORequired increased 0 => 1 > ok 6 AO server (INADDR_ANY): unsigned client > ok 7 AO server (AO_REQUIRED): AO client: connected > ok 8 AO server (AO_REQUIRED): AO client: counter TCPAOGood increased 4 => 6 > ok 9 AO server (AO_REQUIRED): unsigned client > ok 10 AO server (AO_REQUIRED): unsigned client: counter TCPAORequired increased 1 => 2 > ok 11 MD5 server (INADDR_ANY): AO client: counter TCPAOKeyNotFound increased 0 => 1 > ok 12 MD5 server (INADDR_ANY): AO client > ok 13 MD5 server (INADDR_ANY): MD5 client: connected > ok 14 MD5 server (INADDR_ANY): MD5 client: no counter checks > ok 15 MD5 server (INADDR_ANY): no sign client > ok 16 MD5 server (INADDR_ANY): no sign client: counter TCPMD5NotFound increased 0 => 1 > ok 17 no sign server: AO client > ok 18 no sign server: AO client: counter TCPAOKeyNotFound increased 1 => 2 > ok 19 no sign server: MD5 client > ok 20 no sign server: MD5 client: counter TCPMD5Unexpected increased 1 => 2 > ok 21 no sign server: no sign client: connected > ok 22 no sign server: no sign client: counter CurrEstab increased 0 => 1 > ok 23 AO+MD5 server: AO client (matching): connected > ok 24 AO+MD5 server: AO client (matching): counter TCPAOGood increased 8 => 10 > ok 25 AO+MD5 server: AO client (misconfig, matching MD5) > ok 26 AO+MD5 server: AO client (misconfig, matching MD5): counter TCPAOKeyNotFound increased 2 => 3 > ok 27 AO+MD5 server: AO client (misconfig, non-matching): counter TCPAOKeyNotFound increased 3 => 4 > ok 28 AO+MD5 server: AO client (misconfig, non-matching) > ok 29 AO+MD5 server: MD5 client (matching): connected > ok 30 AO+MD5 server: MD5 client (matching): no counter checks > ok 31 AO+MD5 server: MD5 client (misconfig, matching AO) > ok 32 AO+MD5 server: MD5 client (misconfig, matching AO): counter TCPMD5Unexpected increased 2 => 3 > ok 33 AO+MD5 server: MD5 client (misconfig, non-matching) > ok 34 AO+MD5 server: MD5 client (misconfig, non-matching): counter TCPMD5Unexpected increased 3 => 4 > ok 35 AO+MD5 server: no sign client (unmatched): connected > ok 36 AO+MD5 server: no sign client (unmatched): counter CurrEstab increased 0 => 1 > ok 37 AO+MD5 server: no sign client (misconfig, matching AO) > ok 38 AO+MD5 server: no sign client (misconfig, matching AO): counter TCPAORequired increased 2 => 3 > ok 39 AO+MD5 server: no sign client (misconfig, matching MD5) > ok 40 AO+MD5 server: no sign client (misconfig, matching MD5): counter TCPMD5NotFound increased 1 => 2 > ok 41 AO+MD5 server: client with both [TCP-MD5] and TCP-AO keys: connect() was prevented > ok 42 AO+MD5 server: client with both [TCP-MD5] and TCP-AO keys: no counter checks > ok 43 AO+MD5 server: client with both TCP-MD5 and [TCP-AO] keys: connect() was prevented > ok 44 AO+MD5 server: client with both TCP-MD5 and [TCP-AO] keys: no counter checks > ok 45 TCP-AO established: add TCP-MD5 key: postfailed as expected > ok 46 TCP-AO established: add TCP-MD5 key: counter TCPAOGood increased 12 => 14 > ok 47 TCP-MD5 established: add TCP-AO key: postfailed as expected > ok 48 TCP-MD5 established: add TCP-AO key: no counter checks > ok 49 non-signed established: add TCP-AO key: postfailed as expected > ok 50 non-signed established: add TCP-AO key: counter CurrEstab increased 0 => 1 > ok 51 TCP-AO key intersects with existing TCP-MD5 key: prefailed as expected: Key was rejected by service > ok 52 TCP-MD5 key intersects with existing TCP-AO key: prefailed as expected: Key was rejected by service > ok 53 TCP-MD5 key + TCP-AO required: prefailed as expected: Key was rejected by service > ok 54 TCP-AO required on socket + TCP-MD5 key: prefailed as expected: Key was rejected by service > ok 55 VRF: TCP-AO key (no l3index) + TCP-MD5 key (no l3index): prefailed as expected: Key was rejected by service > ok 56 VRF: TCP-MD5 key (no l3index) + TCP-AO key (no l3index): prefailed as expected: Key was rejected by service > ok 57 VRF: TCP-AO key (no l3index) + TCP-MD5 key (l3index=0): prefailed as expected: Key was rejected by service > ok 58 VRF: TCP-MD5 key (l3index=0) + TCP-AO key (no l3index): prefailed as expected: Key was rejected by service > ok 59 VRF: TCP-AO key (no l3index) + TCP-MD5 key (l3index=N): prefailed as expected: Key was rejected by service > ok 60 VRF: TCP-MD5 key (l3index=N) + TCP-AO key (no l3index): prefailed as expected: Key was rejected by service > ok 61 VRF: TCP-AO key (l3index=0) + TCP-MD5 key (no l3index): prefailed as expected: Key was rejected by service > ok 62 VRF: TCP-MD5 key (no l3index) + TCP-AO key (l3index=0): prefailed as expected: Key was rejected by service > ok 63 VRF: TCP-AO key (l3index=0) + TCP-MD5 key (l3index=0): prefailed as expected: Key was rejected by service > ok 64 VRF: TCP-MD5 key (l3index=0) + TCP-AO key (l3index=0): prefailed as expected: Key was rejected by service > ok 65 VRF: TCP-AO key (l3index=0) + TCP-MD5 key (l3index=N) > ok 66 VRF: TCP-MD5 key (l3index=N) + TCP-AO key (l3index=0) > ok 67 VRF: TCP-AO key (l3index=N) + TCP-MD5 key (no l3index): prefailed as expected: Key was rejected by service > ok 68 VRF: TCP-MD5 key (no l3index) + TCP-AO key (l3index=N): prefailed as expected: Key was rejected by service > ok 69 VRF: TCP-AO key (l3index=N) + TCP-MD5 key (l3index=0) > ok 70 VRF: TCP-MD5 key (l3index=0) + TCP-AO key (l3index=N) > ok 71 VRF: TCP-AO key (l3index=N) + TCP-MD5 key (l3index=N): prefailed as expected: Key was rejected by service > ok 72 VRF: TCP-MD5 key (l3index=N) + TCP-AO key (l3index=N): prefailed as expected: Key was rejected by service > # Totals: pass:72 fail:0 xfail:0 xpass:0 skip:0 error:0 Signed-off-by: Dmitry Safonov Signed-off-by: David S. Miller --- tools/testing/selftests/net/tcp_ao/Makefile | 1 + tools/testing/selftests/net/tcp_ao/unsigned-md5.c | 742 ++++++++++++++++++++++ 2 files changed, 743 insertions(+) create mode 100644 tools/testing/selftests/net/tcp_ao/unsigned-md5.c (limited to 'tools') diff --git a/tools/testing/selftests/net/tcp_ao/Makefile b/tools/testing/selftests/net/tcp_ao/Makefile index 1efd98ca12db..ee2f1a17e805 100644 --- a/tools/testing/selftests/net/tcp_ao/Makefile +++ b/tools/testing/selftests/net/tcp_ao/Makefile @@ -3,6 +3,7 @@ TEST_BOTH_AF := connect TEST_BOTH_AF += connect-deny TEST_BOTH_AF += icmps-accept icmps-discard TEST_BOTH_AF += setsockopt-closed +TEST_BOTH_AF += unsigned-md5 TEST_IPV4_PROGS := $(TEST_BOTH_AF:%=%_ipv4) TEST_IPV6_PROGS := $(TEST_BOTH_AF:%=%_ipv6) diff --git a/tools/testing/selftests/net/tcp_ao/unsigned-md5.c b/tools/testing/selftests/net/tcp_ao/unsigned-md5.c new file mode 100644 index 000000000000..7cffde02d2be --- /dev/null +++ b/tools/testing/selftests/net/tcp_ao/unsigned-md5.c @@ -0,0 +1,742 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Author: Dmitry Safonov */ +#include +#include "aolib.h" + +#define fault(type) (inj == FAULT_ ## type) +static const char *md5_password = "Some evil genius, enemy to mankind, must have been the first contriver."; +static const char *ao_password = DEFAULT_TEST_PASSWORD; + +static union tcp_addr client2; +static union tcp_addr client3; + +static const int test_vrf_ifindex = 200; +static const uint8_t test_vrf_tabid = 42; +static void setup_vrfs(void) +{ + int err; + + if (!kernel_config_has(KCONFIG_NET_VRF)) + return; + + err = add_vrf("ksft-vrf", test_vrf_tabid, test_vrf_ifindex, -1); + if (err) + test_error("Failed to add a VRF: %d", err); + + err = link_set_up("ksft-vrf"); + if (err) + test_error("Failed to bring up a VRF"); + + err = ip_route_add_vrf(veth_name, TEST_FAMILY, + this_ip_addr, this_ip_dest, test_vrf_tabid); + if (err) + test_error("Failed to add a route to VRF"); +} + +static void try_accept(const char *tst_name, unsigned int port, + union tcp_addr *md5_addr, uint8_t md5_prefix, + union tcp_addr *ao_addr, uint8_t ao_prefix, + bool set_ao_required, + uint8_t sndid, uint8_t rcvid, uint8_t vrf, + const char *cnt_name, test_cnt cnt_expected, + int needs_tcp_md5, fault_t inj) +{ + struct tcp_ao_counters ao_cnt1, ao_cnt2; + uint64_t before_cnt = 0, after_cnt = 0; /* silence GCC */ + int lsk, err, sk = 0; + time_t timeout; + + if (needs_tcp_md5 && should_skip_test(tst_name, KCONFIG_TCP_MD5)) + return; + + lsk = test_listen_socket(this_ip_addr, port, 1); + + if (md5_addr && test_set_md5(lsk, *md5_addr, md5_prefix, -1, md5_password)) + test_error("setsockopt(TCP_MD5SIG_EXT)"); + + if (ao_addr && test_add_key(lsk, ao_password, + *ao_addr, ao_prefix, sndid, rcvid)) + test_error("setsockopt(TCP_AO_ADD_KEY)"); + + if (set_ao_required && test_set_ao_flags(lsk, true, false)) + test_error("setsockopt(TCP_AO_INFO)"); + + if (cnt_name) + before_cnt = netstat_get_one(cnt_name, NULL); + if (ao_addr && test_get_tcp_ao_counters(lsk, &ao_cnt1)) + test_error("test_get_tcp_ao_counters()"); + + synchronize_threads(); /* preparations done */ + + timeout = fault(TIMEOUT) ? TEST_RETRANSMIT_SEC : TEST_TIMEOUT_SEC; + err = test_wait_fd(lsk, timeout, 0); + if (err == -ETIMEDOUT) { + if (!fault(TIMEOUT)) + test_fail("timeouted for accept()"); + } else if (err < 0) { + test_error("test_wait_fd()"); + } else { + if (fault(TIMEOUT)) + test_fail("ready to accept"); + + sk = accept(lsk, NULL, NULL); + if (sk < 0) { + test_error("accept()"); + } else { + if (fault(TIMEOUT)) + test_fail("%s: accepted", tst_name); + } + } + + if (ao_addr && test_get_tcp_ao_counters(lsk, &ao_cnt2)) + test_error("test_get_tcp_ao_counters()"); + close(lsk); + + if (!cnt_name) { + test_ok("%s: no counter checks", tst_name); + goto out; + } + + after_cnt = netstat_get_one(cnt_name, NULL); + + if (after_cnt <= before_cnt) { + test_fail("%s: %s counter did not increase: %zu <= %zu", + tst_name, cnt_name, after_cnt, before_cnt); + } else { + test_ok("%s: counter %s increased %zu => %zu", + tst_name, cnt_name, before_cnt, after_cnt); + } + if (ao_addr) + test_tcp_ao_counters_cmp(tst_name, &ao_cnt1, &ao_cnt2, cnt_expected); + +out: + synchronize_threads(); /* close() */ + if (sk > 0) + close(sk); +} + +static void server_add_routes(void) +{ + int family = TEST_FAMILY; + + synchronize_threads(); /* client_add_ips() */ + + if (ip_route_add(veth_name, family, this_ip_addr, client2)) + test_error("Failed to add route"); + if (ip_route_add(veth_name, family, this_ip_addr, client3)) + test_error("Failed to add route"); +} + +static void server_add_fail_tests(unsigned int *port) +{ + union tcp_addr addr_any = {}; + + try_accept("TCP-AO established: add TCP-MD5 key", (*port)++, NULL, 0, + &addr_any, 0, 0, 100, 100, 0, "TCPAOGood", TEST_CNT_GOOD, + 1, 0); + try_accept("TCP-MD5 established: add TCP-AO key", (*port)++, &addr_any, + 0, NULL, 0, 0, 0, 0, 0, NULL, 0, 1, 0); + try_accept("non-signed established: add TCP-AO key", (*port)++, NULL, 0, + NULL, 0, 0, 0, 0, 0, "CurrEstab", 0, 0, 0); +} + +static void server_vrf_tests(unsigned int *port) +{ + setup_vrfs(); +} + +static void *server_fn(void *arg) +{ + unsigned int port = test_server_port; + union tcp_addr addr_any = {}; + + server_add_routes(); + + try_accept("AO server (INADDR_ANY): AO client", port++, NULL, 0, + &addr_any, 0, 0, 100, 100, 0, "TCPAOGood", + TEST_CNT_GOOD, 0, 0); + try_accept("AO server (INADDR_ANY): MD5 client", port++, NULL, 0, + &addr_any, 0, 0, 100, 100, 0, "TCPMD5Unexpected", + 0, 1, FAULT_TIMEOUT); + try_accept("AO server (INADDR_ANY): no sign client", port++, NULL, 0, + &addr_any, 0, 0, 100, 100, 0, "TCPAORequired", + TEST_CNT_AO_REQUIRED, 0, FAULT_TIMEOUT); + try_accept("AO server (AO_REQUIRED): AO client", port++, NULL, 0, + &this_ip_dest, TEST_PREFIX, true, + 100, 100, 0, "TCPAOGood", TEST_CNT_GOOD, 0, 0); + try_accept("AO server (AO_REQUIRED): unsigned client", port++, NULL, 0, + &this_ip_dest, TEST_PREFIX, true, + 100, 100, 0, "TCPAORequired", + TEST_CNT_AO_REQUIRED, 0, FAULT_TIMEOUT); + + try_accept("MD5 server (INADDR_ANY): AO client", port++, &addr_any, 0, + NULL, 0, 0, 0, 0, 0, "TCPAOKeyNotFound", + 0, 1, FAULT_TIMEOUT); + try_accept("MD5 server (INADDR_ANY): MD5 client", port++, &addr_any, 0, + NULL, 0, 0, 0, 0, 0, NULL, 0, 1, 0); + try_accept("MD5 server (INADDR_ANY): no sign client", port++, &addr_any, + 0, NULL, 0, 0, 0, 0, 0, "TCPMD5NotFound", + 0, 1, FAULT_TIMEOUT); + + try_accept("no sign server: AO client", port++, NULL, 0, + NULL, 0, 0, 0, 0, 0, "TCPAOKeyNotFound", + TEST_CNT_AO_KEY_NOT_FOUND, 0, FAULT_TIMEOUT); + try_accept("no sign server: MD5 client", port++, NULL, 0, + NULL, 0, 0, 0, 0, 0, "TCPMD5Unexpected", + 0, 1, FAULT_TIMEOUT); + try_accept("no sign server: no sign client", port++, NULL, 0, + NULL, 0, 0, 0, 0, 0, "CurrEstab", 0, 0, 0); + + try_accept("AO+MD5 server: AO client (matching)", port++, + &this_ip_dest, TEST_PREFIX, &client2, TEST_PREFIX, 0, + 100, 100, 0, "TCPAOGood", TEST_CNT_GOOD, 1, 0); + try_accept("AO+MD5 server: AO client (misconfig, matching MD5)", port++, + &this_ip_dest, TEST_PREFIX, &client2, TEST_PREFIX, 0, + 100, 100, 0, "TCPAOKeyNotFound", TEST_CNT_AO_KEY_NOT_FOUND, + 1, FAULT_TIMEOUT); + try_accept("AO+MD5 server: AO client (misconfig, non-matching)", port++, + &this_ip_dest, TEST_PREFIX, &client2, TEST_PREFIX, 0, + 100, 100, 0, "TCPAOKeyNotFound", TEST_CNT_AO_KEY_NOT_FOUND, + 1, FAULT_TIMEOUT); + try_accept("AO+MD5 server: MD5 client (matching)", port++, + &this_ip_dest, TEST_PREFIX, &client2, TEST_PREFIX, 0, + 100, 100, 0, NULL, 0, 1, 0); + try_accept("AO+MD5 server: MD5 client (misconfig, matching AO)", port++, + &this_ip_dest, TEST_PREFIX, &client2, TEST_PREFIX, 0, + 100, 100, 0, "TCPMD5Unexpected", 0, 1, FAULT_TIMEOUT); + try_accept("AO+MD5 server: MD5 client (misconfig, non-matching)", port++, + &this_ip_dest, TEST_PREFIX, &client2, TEST_PREFIX, 0, + 100, 100, 0, "TCPMD5Unexpected", 0, 1, FAULT_TIMEOUT); + try_accept("AO+MD5 server: no sign client (unmatched)", port++, + &this_ip_dest, TEST_PREFIX, &client2, TEST_PREFIX, 0, + 100, 100, 0, "CurrEstab", 0, 1, 0); + try_accept("AO+MD5 server: no sign client (misconfig, matching AO)", + port++, &this_ip_dest, TEST_PREFIX, &client2, TEST_PREFIX, 0, + 100, 100, 0, "TCPAORequired", + TEST_CNT_AO_REQUIRED, 1, FAULT_TIMEOUT); + try_accept("AO+MD5 server: no sign client (misconfig, matching MD5)", + port++, &this_ip_dest, TEST_PREFIX, &client2, TEST_PREFIX, 0, + 100, 100, 0, "TCPMD5NotFound", 0, 1, FAULT_TIMEOUT); + + try_accept("AO+MD5 server: client with both [TCP-MD5] and TCP-AO keys", + port++, &this_ip_dest, TEST_PREFIX, &client2, TEST_PREFIX, 0, + 100, 100, 0, NULL, 0, 1, FAULT_TIMEOUT); + try_accept("AO+MD5 server: client with both TCP-MD5 and [TCP-AO] keys", + port++, &this_ip_dest, TEST_PREFIX, &client2, TEST_PREFIX, 0, + 100, 100, 0, NULL, 0, 1, FAULT_TIMEOUT); + + server_add_fail_tests(&port); + + server_vrf_tests(&port); + + /* client exits */ + synchronize_threads(); + return NULL; +} + +static int client_bind(int sk, union tcp_addr bind_addr) +{ +#ifdef IPV6_TEST + struct sockaddr_in6 addr = { + .sin6_family = AF_INET6, + .sin6_port = 0, + .sin6_addr = bind_addr.a6, + }; +#else + struct sockaddr_in addr = { + .sin_family = AF_INET, + .sin_port = 0, + .sin_addr = bind_addr.a4, + }; +#endif + return bind(sk, &addr, sizeof(addr)); +} + +static void try_connect(const char *tst_name, unsigned int port, + union tcp_addr *md5_addr, uint8_t md5_prefix, + union tcp_addr *ao_addr, uint8_t ao_prefix, + uint8_t sndid, uint8_t rcvid, uint8_t vrf, + fault_t inj, int needs_tcp_md5, union tcp_addr *bind_addr) +{ + time_t timeout; + int sk, ret; + + if (needs_tcp_md5 && should_skip_test(tst_name, KCONFIG_TCP_MD5)) + return; + + sk = socket(test_family, SOCK_STREAM, IPPROTO_TCP); + if (sk < 0) + test_error("socket()"); + + if (bind_addr && client_bind(sk, *bind_addr)) + test_error("bind()"); + + if (md5_addr && test_set_md5(sk, *md5_addr, md5_prefix, -1, md5_password)) + test_error("setsockopt(TCP_MD5SIG_EXT)"); + + if (ao_addr && test_add_key(sk, ao_password, *ao_addr, + ao_prefix, sndid, rcvid)) + test_error("setsockopt(TCP_AO_ADD_KEY)"); + + synchronize_threads(); /* preparations done */ + + timeout = fault(TIMEOUT) ? TEST_RETRANSMIT_SEC : TEST_TIMEOUT_SEC; + ret = _test_connect_socket(sk, this_ip_dest, port, timeout); + + if (ret < 0) { + if (fault(KEYREJECT) && ret == -EKEYREJECTED) + test_ok("%s: connect() was prevented", tst_name); + else if (ret == -ETIMEDOUT && fault(TIMEOUT)) + test_ok("%s", tst_name); + else if (ret == -ECONNREFUSED && + (fault(TIMEOUT) || fault(KEYREJECT))) + test_ok("%s: refused to connect", tst_name); + else + test_error("%s: connect() returned %d", tst_name, ret); + goto out; + } + + if (fault(TIMEOUT) || fault(KEYREJECT)) + test_fail("%s: connected", tst_name); + else + test_ok("%s: connected", tst_name); + +out: + synchronize_threads(); /* close() */ + /* _test_connect_socket() cleans up on failure */ + if (ret > 0) + close(sk); +} + +#define PREINSTALL_MD5_FIRST BIT(0) +#define PREINSTALL_AO BIT(1) +#define POSTINSTALL_AO BIT(2) +#define PREINSTALL_MD5 BIT(3) +#define POSTINSTALL_MD5 BIT(4) + +static int try_add_key_vrf(int sk, union tcp_addr in_addr, uint8_t prefix, + int vrf, uint8_t sndid, uint8_t rcvid, + bool set_ao_required) +{ + uint8_t keyflags = 0; + + if (vrf >= 0) + keyflags |= TCP_AO_KEYF_IFINDEX; + else + vrf = 0; + if (set_ao_required) { + int err = test_set_ao_flags(sk, true, 0); + + if (err) + return err; + } + return test_add_key_vrf(sk, ao_password, keyflags, in_addr, prefix, + (uint8_t)vrf, sndid, rcvid); +} + +static bool test_continue(const char *tst_name, int err, + fault_t inj, bool added_ao) +{ + bool expected_to_fail; + + expected_to_fail = fault(PREINSTALL_AO) && added_ao; + expected_to_fail |= fault(PREINSTALL_MD5) && !added_ao; + + if (!err) { + if (!expected_to_fail) + return true; + test_fail("%s: setsockopt()s were expected to fail", tst_name); + return false; + } + if (err != -EKEYREJECTED || !expected_to_fail) { + test_error("%s: setsockopt(%s) = %d", tst_name, + added_ao ? "TCP_AO_ADD_KEY" : "TCP_MD5SIG_EXT", err); + return false; + } + test_ok("%s: prefailed as expected: %m", tst_name); + return false; +} + +static int open_add(const char *tst_name, unsigned int port, + unsigned int strategy, + union tcp_addr md5_addr, uint8_t md5_prefix, int md5_vrf, + union tcp_addr ao_addr, uint8_t ao_prefix, + int ao_vrf, bool set_ao_required, + uint8_t sndid, uint8_t rcvid, + fault_t inj) +{ + int sk; + + sk = socket(test_family, SOCK_STREAM, IPPROTO_TCP); + if (sk < 0) + test_error("socket()"); + + if (client_bind(sk, this_ip_addr)) + test_error("bind()"); + + if (strategy & PREINSTALL_MD5_FIRST) { + if (test_set_md5(sk, md5_addr, md5_prefix, md5_vrf, md5_password)) + test_error("setsockopt(TCP_MD5SIG_EXT)"); + } + + if (strategy & PREINSTALL_AO) { + int err = try_add_key_vrf(sk, ao_addr, ao_prefix, ao_vrf, + sndid, rcvid, set_ao_required); + + if (!test_continue(tst_name, err, inj, true)) { + close(sk); + return -1; + } + } + + if (strategy & PREINSTALL_MD5) { + errno = 0; + test_set_md5(sk, md5_addr, md5_prefix, md5_vrf, md5_password); + if (!test_continue(tst_name, -errno, inj, false)) { + close(sk); + return -1; + } + } + + return sk; +} + +static void try_to_preadd(const char *tst_name, unsigned int port, + unsigned int strategy, + union tcp_addr md5_addr, uint8_t md5_prefix, + int md5_vrf, + union tcp_addr ao_addr, uint8_t ao_prefix, + int ao_vrf, bool set_ao_required, + uint8_t sndid, uint8_t rcvid, + int needs_tcp_md5, int needs_vrf, fault_t inj) +{ + int sk; + + if (needs_tcp_md5 && should_skip_test(tst_name, KCONFIG_TCP_MD5)) + return; + if (needs_vrf && should_skip_test(tst_name, KCONFIG_NET_VRF)) + return; + + sk = open_add(tst_name, port, strategy, md5_addr, md5_prefix, md5_vrf, + ao_addr, ao_prefix, ao_vrf, set_ao_required, + sndid, rcvid, inj); + if (sk < 0) + return; + + test_ok("%s", tst_name); + close(sk); +} + +static void try_to_add(const char *tst_name, unsigned int port, + unsigned int strategy, + union tcp_addr md5_addr, uint8_t md5_prefix, + int md5_vrf, + union tcp_addr ao_addr, uint8_t ao_prefix, + int ao_vrf, uint8_t sndid, uint8_t rcvid, + int needs_tcp_md5, fault_t inj) +{ + time_t timeout; + int sk, ret; + + if (needs_tcp_md5 && should_skip_test(tst_name, KCONFIG_TCP_MD5)) + return; + + sk = open_add(tst_name, port, strategy, md5_addr, md5_prefix, md5_vrf, + ao_addr, ao_prefix, ao_vrf, 0, sndid, rcvid, inj); + if (sk < 0) + return; + + synchronize_threads(); /* preparations done */ + + timeout = fault(TIMEOUT) ? TEST_RETRANSMIT_SEC : TEST_TIMEOUT_SEC; + ret = _test_connect_socket(sk, this_ip_dest, port, timeout); + + if (ret <= 0) { + test_error("%s: connect() returned %d", tst_name, ret); + goto out; + } + + if (strategy & POSTINSTALL_MD5) { + if (test_set_md5(sk, md5_addr, md5_prefix, md5_vrf, md5_password)) { + if (fault(POSTINSTALL)) { + test_ok("%s: postfailed as expected", tst_name); + goto out; + } else { + test_error("setsockopt(TCP_MD5SIG_EXT)"); + } + } else if (fault(POSTINSTALL)) { + test_fail("%s: post setsockopt() was expected to fail", tst_name); + goto out; + } + } + + if (strategy & POSTINSTALL_AO) { + if (try_add_key_vrf(sk, ao_addr, ao_prefix, ao_vrf, + sndid, rcvid, 0)) { + if (fault(POSTINSTALL)) { + test_ok("%s: postfailed as expected", tst_name); + goto out; + } else { + test_error("setsockopt(TCP_AO_ADD_KEY)"); + } + } else if (fault(POSTINSTALL)) { + test_fail("%s: post setsockopt() was expected to fail", tst_name); + goto out; + } + } + +out: + synchronize_threads(); /* close() */ + /* _test_connect_socket() cleans up on failure */ + if (ret > 0) + close(sk); +} + +static void client_add_ip(union tcp_addr *client, const char *ip) +{ + int family = TEST_FAMILY; + + if (inet_pton(family, ip, client) != 1) + test_error("Can't convert ip address %s", ip); + + if (ip_addr_add(veth_name, family, *client, TEST_PREFIX)) + test_error("Failed to add ip address"); + if (ip_route_add(veth_name, family, *client, this_ip_dest)) + test_error("Failed to add route"); +} + +static void client_add_ips(void) +{ + client_add_ip(&client2, __TEST_CLIENT_IP(2)); + client_add_ip(&client3, __TEST_CLIENT_IP(3)); + synchronize_threads(); /* server_add_routes() */ +} + +static void client_add_fail_tests(unsigned int *port) +{ + try_to_add("TCP-AO established: add TCP-MD5 key", + (*port)++, POSTINSTALL_MD5 | PREINSTALL_AO, + this_ip_dest, TEST_PREFIX, -1, this_ip_dest, TEST_PREFIX, 0, + 100, 100, 1, FAULT_POSTINSTALL); + try_to_add("TCP-MD5 established: add TCP-AO key", + (*port)++, PREINSTALL_MD5 | POSTINSTALL_AO, + this_ip_dest, TEST_PREFIX, -1, this_ip_dest, TEST_PREFIX, 0, + 100, 100, 1, FAULT_POSTINSTALL); + try_to_add("non-signed established: add TCP-AO key", + (*port)++, POSTINSTALL_AO, + this_ip_dest, TEST_PREFIX, -1, this_ip_dest, TEST_PREFIX, 0, + 100, 100, 0, FAULT_POSTINSTALL); + + try_to_add("TCP-AO key intersects with existing TCP-MD5 key", + (*port)++, PREINSTALL_MD5_FIRST | PREINSTALL_AO, + this_ip_addr, TEST_PREFIX, -1, this_ip_addr, TEST_PREFIX, -1, + 100, 100, 1, FAULT_PREINSTALL_AO); + try_to_add("TCP-MD5 key intersects with existing TCP-AO key", + (*port)++, PREINSTALL_MD5 | PREINSTALL_AO, + this_ip_addr, TEST_PREFIX, -1, this_ip_addr, TEST_PREFIX, -1, + 100, 100, 1, FAULT_PREINSTALL_MD5); + + try_to_preadd("TCP-MD5 key + TCP-AO required", + (*port)++, PREINSTALL_MD5_FIRST | PREINSTALL_AO, + this_ip_addr, TEST_PREFIX, -1, + this_ip_addr, TEST_PREFIX, -1, true, + 100, 100, 1, 0, FAULT_PREINSTALL_AO); + try_to_preadd("TCP-AO required on socket + TCP-MD5 key", + (*port)++, PREINSTALL_MD5 | PREINSTALL_AO, + this_ip_addr, TEST_PREFIX, -1, + this_ip_addr, TEST_PREFIX, -1, true, + 100, 100, 1, 0, FAULT_PREINSTALL_MD5); +} + +static void client_vrf_tests(unsigned int *port) +{ + setup_vrfs(); + + /* The following restrictions for setsockopt()s are expected: + * + * |--------------|-----------------|-------------|-------------| + * | | MD5 key without | MD5 key | MD5 key | + * | | l3index | l3index=0 | l3index=N | + * |--------------|-----------------|-------------|-------------| + * | TCP-AO key | | | | + * | without | reject | reject | reject | + * | l3index | | | | + * |--------------|-----------------|-------------|-------------| + * | TCP-AO key | | | | + * | l3index=0 | reject | reject | allow | + * |--------------|-----------------|-------------|-------------| + * | TCP-AO key | | | | + * | l3index=N | reject | allow | reject | + * |--------------|-----------------|-------------|-------------| + */ + try_to_preadd("VRF: TCP-AO key (no l3index) + TCP-MD5 key (no l3index)", + (*port)++, PREINSTALL_MD5 | PREINSTALL_AO, + this_ip_addr, TEST_PREFIX, -1, + this_ip_addr, TEST_PREFIX, -1, 0, 100, 100, + 1, 1, FAULT_PREINSTALL_MD5); + try_to_preadd("VRF: TCP-MD5 key (no l3index) + TCP-AO key (no l3index)", + (*port)++, PREINSTALL_MD5_FIRST | PREINSTALL_AO, + this_ip_addr, TEST_PREFIX, -1, + this_ip_addr, TEST_PREFIX, -1, 0, 100, 100, + 1, 1, FAULT_PREINSTALL_AO); + try_to_preadd("VRF: TCP-AO key (no l3index) + TCP-MD5 key (l3index=0)", + (*port)++, PREINSTALL_MD5 | PREINSTALL_AO, + this_ip_addr, TEST_PREFIX, 0, + this_ip_addr, TEST_PREFIX, -1, 0, 100, 100, + 1, 1, FAULT_PREINSTALL_MD5); + try_to_preadd("VRF: TCP-MD5 key (l3index=0) + TCP-AO key (no l3index)", + (*port)++, PREINSTALL_MD5_FIRST | PREINSTALL_AO, + this_ip_addr, TEST_PREFIX, 0, + this_ip_addr, TEST_PREFIX, -1, 0, 100, 100, + 1, 1, FAULT_PREINSTALL_AO); + try_to_preadd("VRF: TCP-AO key (no l3index) + TCP-MD5 key (l3index=N)", + (*port)++, PREINSTALL_MD5 | PREINSTALL_AO, + this_ip_addr, TEST_PREFIX, test_vrf_ifindex, + this_ip_addr, TEST_PREFIX, -1, 0, 100, 100, + 1, 1, FAULT_PREINSTALL_MD5); + try_to_preadd("VRF: TCP-MD5 key (l3index=N) + TCP-AO key (no l3index)", + (*port)++, PREINSTALL_MD5_FIRST | PREINSTALL_AO, + this_ip_addr, TEST_PREFIX, test_vrf_ifindex, + this_ip_addr, TEST_PREFIX, -1, 0, 100, 100, + 1, 1, FAULT_PREINSTALL_AO); + + try_to_preadd("VRF: TCP-AO key (l3index=0) + TCP-MD5 key (no l3index)", + (*port)++, PREINSTALL_MD5 | PREINSTALL_AO, + this_ip_addr, TEST_PREFIX, -1, + this_ip_addr, TEST_PREFIX, 0, 0, 100, 100, + 1, 1, FAULT_PREINSTALL_MD5); + try_to_preadd("VRF: TCP-MD5 key (no l3index) + TCP-AO key (l3index=0)", + (*port)++, PREINSTALL_MD5_FIRST | PREINSTALL_AO, + this_ip_addr, TEST_PREFIX, -1, + this_ip_addr, TEST_PREFIX, 0, 0, 100, 100, + 1, 1, FAULT_PREINSTALL_AO); + try_to_preadd("VRF: TCP-AO key (l3index=0) + TCP-MD5 key (l3index=0)", + (*port)++, PREINSTALL_MD5 | PREINSTALL_AO, + this_ip_addr, TEST_PREFIX, 0, + this_ip_addr, TEST_PREFIX, 0, 0, 100, 100, + 1, 1, FAULT_PREINSTALL_MD5); + try_to_preadd("VRF: TCP-MD5 key (l3index=0) + TCP-AO key (l3index=0)", + (*port)++, PREINSTALL_MD5_FIRST | PREINSTALL_AO, + this_ip_addr, TEST_PREFIX, 0, + this_ip_addr, TEST_PREFIX, 0, 0, 100, 100, + 1, 1, FAULT_PREINSTALL_AO); + try_to_preadd("VRF: TCP-AO key (l3index=0) + TCP-MD5 key (l3index=N)", + (*port)++, PREINSTALL_MD5 | PREINSTALL_AO, + this_ip_addr, TEST_PREFIX, test_vrf_ifindex, + this_ip_addr, TEST_PREFIX, 0, 0, 100, 100, + 1, 1, 0); + try_to_preadd("VRF: TCP-MD5 key (l3index=N) + TCP-AO key (l3index=0)", + (*port)++, PREINSTALL_MD5_FIRST | PREINSTALL_AO, + this_ip_addr, TEST_PREFIX, test_vrf_ifindex, + this_ip_addr, TEST_PREFIX, 0, 0, 100, 100, + 1, 1, 0); + + try_to_preadd("VRF: TCP-AO key (l3index=N) + TCP-MD5 key (no l3index)", + (*port)++, PREINSTALL_MD5 | PREINSTALL_AO, + this_ip_addr, TEST_PREFIX, test_vrf_ifindex, + this_ip_addr, TEST_PREFIX, -1, 0, 100, 100, + 1, 1, FAULT_PREINSTALL_MD5); + try_to_preadd("VRF: TCP-MD5 key (no l3index) + TCP-AO key (l3index=N)", + (*port)++, PREINSTALL_MD5_FIRST | PREINSTALL_AO, + this_ip_addr, TEST_PREFIX, -1, + this_ip_addr, TEST_PREFIX, test_vrf_ifindex, 0, 100, 100, + 1, 1, FAULT_PREINSTALL_AO); + try_to_preadd("VRF: TCP-AO key (l3index=N) + TCP-MD5 key (l3index=0)", + (*port)++, PREINSTALL_MD5 | PREINSTALL_AO, + this_ip_addr, TEST_PREFIX, 0, + this_ip_addr, TEST_PREFIX, test_vrf_ifindex, 0, 100, 100, + 1, 1, 0); + try_to_preadd("VRF: TCP-MD5 key (l3index=0) + TCP-AO key (l3index=N)", + (*port)++, PREINSTALL_MD5_FIRST | PREINSTALL_AO, + this_ip_addr, TEST_PREFIX, 0, + this_ip_addr, TEST_PREFIX, test_vrf_ifindex, 0, 100, 100, + 1, 1, 0); + try_to_preadd("VRF: TCP-AO key (l3index=N) + TCP-MD5 key (l3index=N)", + (*port)++, PREINSTALL_MD5 | PREINSTALL_AO, + this_ip_addr, TEST_PREFIX, test_vrf_ifindex, + this_ip_addr, TEST_PREFIX, test_vrf_ifindex, 0, 100, 100, + 1, 1, FAULT_PREINSTALL_MD5); + try_to_preadd("VRF: TCP-MD5 key (l3index=N) + TCP-AO key (l3index=N)", + (*port)++, PREINSTALL_MD5_FIRST | PREINSTALL_AO, + this_ip_addr, TEST_PREFIX, test_vrf_ifindex, + this_ip_addr, TEST_PREFIX, test_vrf_ifindex, 0, 100, 100, + 1, 1, FAULT_PREINSTALL_AO); +} + +static void *client_fn(void *arg) +{ + unsigned int port = test_server_port; + union tcp_addr addr_any = {}; + + client_add_ips(); + + try_connect("AO server (INADDR_ANY): AO client", port++, NULL, 0, + &addr_any, 0, 100, 100, 0, 0, 0, &this_ip_addr); + try_connect("AO server (INADDR_ANY): MD5 client", port++, &addr_any, 0, + NULL, 0, 100, 100, 0, FAULT_TIMEOUT, 1, &this_ip_addr); + try_connect("AO server (INADDR_ANY): unsigned client", port++, NULL, 0, + NULL, 0, 100, 100, 0, FAULT_TIMEOUT, 0, &this_ip_addr); + try_connect("AO server (AO_REQUIRED): AO client", port++, NULL, 0, + &addr_any, 0, 100, 100, 0, 0, 0, &this_ip_addr); + try_connect("AO server (AO_REQUIRED): unsigned client", port++, NULL, 0, + NULL, 0, 100, 100, 0, FAULT_TIMEOUT, 0, &client2); + + try_connect("MD5 server (INADDR_ANY): AO client", port++, NULL, 0, + &addr_any, 0, 100, 100, 0, FAULT_TIMEOUT, 1, &this_ip_addr); + try_connect("MD5 server (INADDR_ANY): MD5 client", port++, &addr_any, 0, + NULL, 0, 100, 100, 0, 0, 1, &this_ip_addr); + try_connect("MD5 server (INADDR_ANY): no sign client", port++, NULL, 0, + NULL, 0, 100, 100, 0, FAULT_TIMEOUT, 1, &this_ip_addr); + + try_connect("no sign server: AO client", port++, NULL, 0, + &addr_any, 0, 100, 100, 0, FAULT_TIMEOUT, 0, &this_ip_addr); + try_connect("no sign server: MD5 client", port++, &addr_any, 0, + NULL, 0, 100, 100, 0, FAULT_TIMEOUT, 1, &this_ip_addr); + try_connect("no sign server: no sign client", port++, NULL, 0, + NULL, 0, 100, 100, 0, 0, 0, &this_ip_addr); + + try_connect("AO+MD5 server: AO client (matching)", port++, NULL, 0, + &addr_any, 0, 100, 100, 0, 0, 1, &client2); + try_connect("AO+MD5 server: AO client (misconfig, matching MD5)", + port++, NULL, 0, &addr_any, 0, 100, 100, 0, + FAULT_TIMEOUT, 1, &this_ip_addr); + try_connect("AO+MD5 server: AO client (misconfig, non-matching)", + port++, NULL, 0, &addr_any, 0, 100, 100, 0, + FAULT_TIMEOUT, 1, &client3); + try_connect("AO+MD5 server: MD5 client (matching)", port++, &addr_any, 0, + NULL, 0, 100, 100, 0, 0, 1, &this_ip_addr); + try_connect("AO+MD5 server: MD5 client (misconfig, matching AO)", + port++, &addr_any, 0, NULL, 0, 100, 100, 0, FAULT_TIMEOUT, + 1, &client2); + try_connect("AO+MD5 server: MD5 client (misconfig, non-matching)", + port++, &addr_any, 0, NULL, 0, 100, 100, 0, FAULT_TIMEOUT, + 1, &client3); + try_connect("AO+MD5 server: no sign client (unmatched)", + port++, NULL, 0, NULL, 0, 100, 100, 0, 0, 1, &client3); + try_connect("AO+MD5 server: no sign client (misconfig, matching AO)", + port++, NULL, 0, NULL, 0, 100, 100, 0, FAULT_TIMEOUT, + 1, &client2); + try_connect("AO+MD5 server: no sign client (misconfig, matching MD5)", + port++, NULL, 0, NULL, 0, 100, 100, 0, FAULT_TIMEOUT, + 1, &this_ip_addr); + + try_connect("AO+MD5 server: client with both [TCP-MD5] and TCP-AO keys", + port++, &this_ip_addr, TEST_PREFIX, + &client2, TEST_PREFIX, 100, 100, 0, FAULT_KEYREJECT, + 1, &this_ip_addr); + try_connect("AO+MD5 server: client with both TCP-MD5 and [TCP-AO] keys", + port++, &this_ip_addr, TEST_PREFIX, + &client2, TEST_PREFIX, 100, 100, 0, FAULT_KEYREJECT, + 1, &client2); + + client_add_fail_tests(&port); + client_vrf_tests(&port); + + return NULL; +} + +int main(int argc, char *argv[]) +{ + test_init(72, server_fn, client_fn); + return 0; +} -- cgit v1.2.3 From d1066c9c58d48bdbda0236b4744dc03f8a903d49 Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Fri, 15 Dec 2023 02:36:21 +0000 Subject: selftests/net: Add test/benchmark for removing MKTs Sample output: > 1..36 > # 1106[lib/setup.c:207] rand seed 1660754406 > TAP version 13 > ok 1 Worst case connect 512 keys: min=0ms max=1ms mean=0.583329ms stddev=0.076376 > ok 2 Connect random-search 512 keys: min=0ms max=1ms mean=0.53412ms stddev=0.0516779 > ok 3 Worst case delete 512 keys: min=2ms max=11ms mean=6.04139ms stddev=0.245792 > ok 4 Add a new key 512 keys: min=0ms max=13ms mean=0.673415ms stddev=0.0820618 > ok 5 Remove random-search 512 keys: min=5ms max=9ms mean=6.65969ms stddev=0.258064 > ok 6 Remove async 512 keys: min=0ms max=0ms mean=0.041825ms stddev=0.0204512 > ok 7 Worst case connect 1024 keys: min=0ms max=2ms mean=0.520357ms stddev=0.0721358 > ok 8 Connect random-search 1024 keys: min=0ms max=2ms mean=0.535312ms stddev=0.0517355 > ok 9 Worst case delete 1024 keys: min=5ms max=9ms mean=8.27219ms stddev=0.287614 > ok 10 Add a new key 1024 keys: min=0ms max=1ms mean=0.688121ms stddev=0.0829531 > ok 11 Remove random-search 1024 keys: min=5ms max=9ms mean=8.37649ms stddev=0.289422 > ok 12 Remove async 1024 keys: min=0ms max=0ms mean=0.0457096ms stddev=0.0213798 > ok 13 Worst case connect 2048 keys: min=0ms max=2ms mean=0.748804ms stddev=0.0865335 > ok 14 Connect random-search 2048 keys: min=0ms max=2ms mean=0.782993ms stddev=0.0625697 > ok 15 Worst case delete 2048 keys: min=5ms max=10ms mean=8.23106ms stddev=0.286898 > ok 16 Add a new key 2048 keys: min=0ms max=1ms mean=0.812988ms stddev=0.0901658 > ok 17 Remove random-search 2048 keys: min=8ms max=9ms mean=8.84949ms stddev=0.297481 > ok 18 Remove async 2048 keys: min=0ms max=0ms mean=0.0297223ms stddev=0.0172402 > ok 19 Worst case connect 4096 keys: min=1ms max=5ms mean=1.53352ms stddev=0.123836 > ok 20 Connect random-search 4096 keys: min=1ms max=5ms mean=1.52226ms stddev=0.0872429 > ok 21 Worst case delete 4096 keys: min=5ms max=9ms mean=8.25874ms stddev=0.28738 > ok 22 Add a new key 4096 keys: min=0ms max=3ms mean=1.67382ms stddev=0.129376 > ok 23 Remove random-search 4096 keys: min=5ms max=10ms mean=8.26178ms stddev=0.287433 > ok 24 Remove async 4096 keys: min=0ms max=0ms mean=0.0340009ms stddev=0.0184393 > ok 25 Worst case connect 8192 keys: min=2ms max=4ms mean=2.86208ms stddev=0.169177 > ok 26 Connect random-search 8192 keys: min=2ms max=4ms mean=2.87592ms stddev=0.119915 > ok 27 Worst case delete 8192 keys: min=6ms max=11ms mean=7.55291ms stddev=0.274826 > ok 28 Add a new key 8192 keys: min=1ms max=5ms mean=2.56797ms stddev=0.160249 > ok 29 Remove random-search 8192 keys: min=5ms max=10ms mean=7.14002ms stddev=0.267208 > ok 30 Remove async 8192 keys: min=0ms max=0ms mean=0.0320066ms stddev=0.0178904 > ok 31 Worst case connect 16384 keys: min=5ms max=6ms mean=5.55334ms stddev=0.235655 > ok 32 Connect random-search 16384 keys: min=5ms max=6ms mean=5.52614ms stddev=0.166225 > ok 33 Worst case delete 16384 keys: min=5ms max=11ms mean=7.39109ms stddev=0.271866 > ok 34 Add a new key 16384 keys: min=2ms max=4ms mean=3.35799ms stddev=0.183248 > ok 35 Remove random-search 16384 keys: min=5ms max=8ms mean=6.86078ms stddev=0.261931 > ok 36 Remove async 16384 keys: min=0ms max=0ms mean=0.0302384ms stddev=0.0173892 > # Totals: pass:36 fail:0 xfail:0 xpass:0 skip:0 error:0 >From the output it's visible that the current simplified approach with linked-list of MKTs scales quite fine even for thousands of keys. And that also means that the majority of the time for delete is eaten by synchronize_rcu() [which I can confirm separately by tracing]. Signed-off-by: Dmitry Safonov Signed-off-by: David S. Miller --- tools/testing/selftests/net/tcp_ao/Makefile | 5 +- tools/testing/selftests/net/tcp_ao/bench-lookups.c | 358 +++++++++++++++++++++ 2 files changed, 362 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/net/tcp_ao/bench-lookups.c (limited to 'tools') diff --git a/tools/testing/selftests/net/tcp_ao/Makefile b/tools/testing/selftests/net/tcp_ao/Makefile index ee2f1a17e805..f0b218b99506 100644 --- a/tools/testing/selftests/net/tcp_ao/Makefile +++ b/tools/testing/selftests/net/tcp_ao/Makefile @@ -1,5 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 -TEST_BOTH_AF := connect +TEST_BOTH_AF := bench-lookups +TEST_BOTH_AF += connect TEST_BOTH_AF += connect-deny TEST_BOTH_AF += icmps-accept icmps-discard TEST_BOTH_AF += setsockopt-closed @@ -49,3 +50,5 @@ $(OUTPUT)/%_ipv6: %.c $(OUTPUT)/icmps-accept_ipv4: CFLAGS+= -DTEST_ICMPS_ACCEPT $(OUTPUT)/icmps-accept_ipv6: CFLAGS+= -DTEST_ICMPS_ACCEPT +$(OUTPUT)/bench-lookups_ipv4: LDFLAGS+= -lm +$(OUTPUT)/bench-lookups_ipv6: LDFLAGS+= -lm diff --git a/tools/testing/selftests/net/tcp_ao/bench-lookups.c b/tools/testing/selftests/net/tcp_ao/bench-lookups.c new file mode 100644 index 000000000000..7be8a7d9308c --- /dev/null +++ b/tools/testing/selftests/net/tcp_ao/bench-lookups.c @@ -0,0 +1,358 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Author: Dmitry Safonov */ +#include +#include +#include +#include +#include +#include + +#include "../../../../include/linux/bits.h" +#include "../../../../include/linux/kernel.h" +#include "aolib.h" + +#define BENCH_NR_ITERS 100 /* number of times to run gathering statistics */ + +static void gen_test_ips(union tcp_addr *ips, size_t ips_nr, bool use_rand) +{ + union tcp_addr net = {}; + size_t i, j; + + if (inet_pton(TEST_FAMILY, TEST_NETWORK, &net) != 1) + test_error("Can't convert ip address %s", TEST_NETWORK); + + if (!use_rand) { + for (i = 0; i < ips_nr; i++) + ips[i] = gen_tcp_addr(net, 2 * i + 1); + return; + } + for (i = 0; i < ips_nr; i++) { + size_t r = (size_t)random() | 0x1; + + ips[i] = gen_tcp_addr(net, r); + + for (j = i - 1; j > 0 && i > 0; j--) { + if (!memcmp(&ips[i], &ips[j], sizeof(union tcp_addr))) { + i--; /* collision */ + break; + } + } + } +} + +static void test_add_routes(union tcp_addr *ips, size_t ips_nr) +{ + size_t i; + + for (i = 0; i < ips_nr; i++) { + union tcp_addr *p = (union tcp_addr *)&ips[i]; + + if (ip_route_add(veth_name, TEST_FAMILY, this_ip_addr, *p)) + test_error("Failed to add route"); + } +} + +static void server_apply_keys(int lsk, union tcp_addr *ips, size_t ips_nr) +{ + size_t i; + + for (i = 0; i < ips_nr; i++) { + union tcp_addr *p = (union tcp_addr *)&ips[i]; + + if (test_add_key(lsk, DEFAULT_TEST_PASSWORD, *p, -1, 100, 100)) + test_error("setsockopt(TCP_AO)"); + } +} + +static const size_t nr_keys[] = { 512, 1024, 2048, 4096, 8192 }; +static union tcp_addr *test_ips; + +struct bench_stats { + uint64_t min; + uint64_t max; + uint64_t nr; + double mean; + double s2; +}; + +static struct bench_tests { + struct bench_stats delete_last_key; + struct bench_stats add_key; + struct bench_stats delete_rand_key; + struct bench_stats connect_last_key; + struct bench_stats connect_rand_key; + struct bench_stats delete_async; +} bench_results[ARRAY_SIZE(nr_keys)]; + +#define NSEC_PER_SEC 1000000000ULL + +static void measure_call(struct bench_stats *st, + void (*f)(int, void *), int sk, void *arg) +{ + struct timespec start = {}, end = {}; + double delta; + uint64_t nsec; + + if (clock_gettime(CLOCK_MONOTONIC, &start)) + test_error("clock_gettime()"); + + f(sk, arg); + + if (clock_gettime(CLOCK_MONOTONIC, &end)) + test_error("clock_gettime()"); + + nsec = (end.tv_sec - start.tv_sec) * NSEC_PER_SEC; + if (end.tv_nsec >= start.tv_nsec) + nsec += end.tv_nsec - start.tv_nsec; + else + nsec -= start.tv_nsec - end.tv_nsec; + + if (st->nr == 0) { + st->min = st->max = nsec; + } else { + if (st->min > nsec) + st->min = nsec; + if (st->max < nsec) + st->max = nsec; + } + + /* Welford-Knuth algorithm */ + st->nr++; + delta = (double)nsec - st->mean; + st->mean += delta / st->nr; + st->s2 += delta * ((double)nsec - st->mean); +} + +static void delete_mkt(int sk, void *arg) +{ + struct tcp_ao_del *ao = arg; + + if (setsockopt(sk, IPPROTO_TCP, TCP_AO_DEL_KEY, ao, sizeof(*ao))) + test_error("setsockopt(TCP_AO_DEL_KEY)"); +} + +static void add_back_mkt(int sk, void *arg) +{ + union tcp_addr *p = arg; + + if (test_add_key(sk, DEFAULT_TEST_PASSWORD, *p, -1, 100, 100)) + test_error("setsockopt(TCP_AO)"); +} + +static void bench_delete(int lsk, struct bench_stats *add, + struct bench_stats *del, + union tcp_addr *ips, size_t ips_nr, + bool rand_order, bool async) +{ + struct tcp_ao_del ao_del = {}; + union tcp_addr *p; + size_t i; + + ao_del.sndid = 100; + ao_del.rcvid = 100; + ao_del.del_async = !!async; + ao_del.prefix = DEFAULT_TEST_PREFIX; + + /* Remove the first added */ + p = (union tcp_addr *)&ips[0]; + tcp_addr_to_sockaddr_in(&ao_del.addr, p, 0); + + for (i = 0; i < BENCH_NR_ITERS; i++) { + measure_call(del, delete_mkt, lsk, (void *)&ao_del); + + /* Restore it back */ + measure_call(add, add_back_mkt, lsk, (void *)p); + + /* + * Slowest for FILO-linked-list: + * on (i) iteration removing ips[i] element. When it gets + * added to the list back - it becomes first to fetch, so + * on (i + 1) iteration go to ips[i + 1] element. + */ + if (rand_order) + p = (union tcp_addr *)&ips[rand() % ips_nr]; + else + p = (union tcp_addr *)&ips[i % ips_nr]; + tcp_addr_to_sockaddr_in(&ao_del.addr, p, 0); + } +} + +static void bench_connect_srv(int lsk, union tcp_addr *ips, size_t ips_nr) +{ + size_t i; + + for (i = 0; i < BENCH_NR_ITERS; i++) { + int sk; + + synchronize_threads(); + + if (test_wait_fd(lsk, TEST_TIMEOUT_SEC, 0)) + test_error("test_wait_fd()"); + + sk = accept(lsk, NULL, NULL); + if (sk < 0) + test_error("accept()"); + + close(sk); + } +} + +static void test_print_stats(const char *desc, size_t nr, struct bench_stats *bs) +{ + test_ok("%-20s\t%zu keys: min=%" PRIu64 "ms max=%" PRIu64 "ms mean=%gms stddev=%g", + desc, nr, bs->min / 1000000, bs->max / 1000000, + bs->mean / 1000000, sqrt((bs->mean / 1000000) / bs->nr)); +} + +static void *server_fn(void *arg) +{ + size_t i; + + for (i = 0; i < ARRAY_SIZE(nr_keys); i++) { + struct bench_tests *bt = &bench_results[i]; + int lsk; + + test_ips = malloc(nr_keys[i] * sizeof(union tcp_addr)); + if (!test_ips) + test_error("malloc()"); + + lsk = test_listen_socket(this_ip_addr, test_server_port + i, 1); + + gen_test_ips(test_ips, nr_keys[i], false); + test_add_routes(test_ips, nr_keys[i]); + test_set_optmem(KERNEL_TCP_AO_KEY_SZ_ROUND_UP * nr_keys[i]); + server_apply_keys(lsk, test_ips, nr_keys[i]); + + synchronize_threads(); + bench_connect_srv(lsk, test_ips, nr_keys[i]); + bench_connect_srv(lsk, test_ips, nr_keys[i]); + + /* The worst case for FILO-list */ + bench_delete(lsk, &bt->add_key, &bt->delete_last_key, + test_ips, nr_keys[i], false, false); + test_print_stats("Add a new key", + nr_keys[i], &bt->add_key); + test_print_stats("Delete: worst case", + nr_keys[i], &bt->delete_last_key); + + bench_delete(lsk, &bt->add_key, &bt->delete_rand_key, + test_ips, nr_keys[i], true, false); + test_print_stats("Delete: random-search", + nr_keys[i], &bt->delete_rand_key); + + bench_delete(lsk, &bt->add_key, &bt->delete_async, + test_ips, nr_keys[i], false, true); + test_print_stats("Delete: async", nr_keys[i], &bt->delete_async); + + free(test_ips); + close(lsk); + } + + return NULL; +} + +static void connect_client(int sk, void *arg) +{ + size_t *p = arg; + + if (test_connect_socket(sk, this_ip_dest, test_server_port + *p) <= 0) + test_error("failed to connect()"); +} + +static void client_addr_setup(int sk, union tcp_addr taddr) +{ +#ifdef IPV6_TEST + struct sockaddr_in6 addr = { + .sin6_family = AF_INET6, + .sin6_port = 0, + .sin6_addr = taddr.a6, + }; +#else + struct sockaddr_in addr = { + .sin_family = AF_INET, + .sin_port = 0, + .sin_addr = taddr.a4, + }; +#endif + int ret; + + ret = ip_addr_add(veth_name, TEST_FAMILY, taddr, TEST_PREFIX); + if (ret && ret != -EEXIST) + test_error("Failed to add ip address"); + ret = ip_route_add(veth_name, TEST_FAMILY, taddr, this_ip_dest); + if (ret && ret != -EEXIST) + test_error("Failed to add route"); + + if (bind(sk, &addr, sizeof(addr))) + test_error("bind()"); +} + +static void bench_connect_client(size_t port_off, struct bench_tests *bt, + union tcp_addr *ips, size_t ips_nr, bool rand_order) +{ + struct bench_stats *con; + union tcp_addr *p; + size_t i; + + if (rand_order) + con = &bt->connect_rand_key; + else + con = &bt->connect_last_key; + + p = (union tcp_addr *)&ips[0]; + + for (i = 0; i < BENCH_NR_ITERS; i++) { + int sk = socket(test_family, SOCK_STREAM, IPPROTO_TCP); + + if (sk < 0) + test_error("socket()"); + + client_addr_setup(sk, *p); + if (test_add_key(sk, DEFAULT_TEST_PASSWORD, this_ip_dest, + -1, 100, 100)) + test_error("setsockopt(TCP_AO_ADD_KEY)"); + + synchronize_threads(); + + measure_call(con, connect_client, sk, (void *)&port_off); + + close(sk); + + /* + * Slowest for FILO-linked-list: + * on (i) iteration removing ips[i] element. When it gets + * added to the list back - it becomes first to fetch, so + * on (i + 1) iteration go to ips[i + 1] element. + */ + if (rand_order) + p = (union tcp_addr *)&ips[rand() % ips_nr]; + else + p = (union tcp_addr *)&ips[i % ips_nr]; + } +} + +static void *client_fn(void *arg) +{ + size_t i; + + for (i = 0; i < ARRAY_SIZE(nr_keys); i++) { + struct bench_tests *bt = &bench_results[i]; + + synchronize_threads(); + bench_connect_client(i, bt, test_ips, nr_keys[i], false); + test_print_stats("Connect: worst case", + nr_keys[i], &bt->connect_last_key); + + bench_connect_client(i, bt, test_ips, nr_keys[i], false); + test_print_stats("Connect: random-search", + nr_keys[i], &bt->connect_last_key); + } + synchronize_threads(); + return NULL; +} + +int main(int argc, char *argv[]) +{ + test_init(30, server_fn, client_fn); + return 0; +} -- cgit v1.2.3 From 3715d32dc97698e6d2f59b1579577178a1361686 Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Fri, 15 Dec 2023 02:36:22 +0000 Subject: selftests/net: Add TCP_REPAIR TCP-AO tests The test plan is: 1. check that TCP-AO connection may be restored on another socket 2. check restore with wrong send/recv ISN (checking that they are part of MAC generation) 3. check restore with wrong SEQ number extension (checking that high bytes of it taken into MAC generation) Sample output expected: > # ./restore_ipv4 > 1..20 > # 1412[lib/setup.c:254] rand seed 1686610825 > TAP version 13 > ok 1 TCP-AO migrate to another socket: server alive > ok 2 TCP-AO migrate to another socket: post-migrate connection is alive > ok 3 TCP-AO migrate to another socket: counter TCPAOGood increased 23 => 44 > ok 4 TCP-AO migrate to another socket: counter TCPAOGood increased 22 => 42 > ok 5 TCP-AO with wrong send ISN: server couldn't serve > ok 6 TCP-AO with wrong send ISN: post-migrate connection is broken > ok 7 TCP-AO with wrong send ISN: counter TCPAOBad increased 0 => 4 > ok 8 TCP-AO with wrong send ISN: counter TCPAOBad increased 0 => 3 > ok 9 TCP-AO with wrong receive ISN: server couldn't serve > ok 10 TCP-AO with wrong receive ISN: post-migrate connection is broken > ok 11 TCP-AO with wrong receive ISN: counter TCPAOBad increased 4 => 8 > ok 12 TCP-AO with wrong receive ISN: counter TCPAOBad increased 5 => 10 > ok 13 TCP-AO with wrong send SEQ ext number: server couldn't serve > ok 14 TCP-AO with wrong send SEQ ext number: post-migrate connection is broken > ok 15 TCP-AO with wrong send SEQ ext number: counter TCPAOBad increased 9 => 10 > ok 16 TCP-AO with wrong send SEQ ext number: counter TCPAOBad increased 11 => 19 > ok 17 TCP-AO with wrong receive SEQ ext number: post-migrate connection is broken > ok 18 TCP-AO with wrong receive SEQ ext number: server couldn't serve > ok 19 TCP-AO with wrong receive SEQ ext number: counter TCPAOBad increased 10 => 18 > ok 20 TCP-AO with wrong receive SEQ ext number: counter TCPAOBad increased 20 => 23 > # Totals: pass:20 fail:0 xfail:0 xpass:0 skip:0 error:0 Signed-off-by: Dmitry Safonov Signed-off-by: David S. Miller --- tools/testing/selftests/net/tcp_ao/Makefile | 1 + tools/testing/selftests/net/tcp_ao/restore.c | 236 +++++++++++++++++++++++++++ 2 files changed, 237 insertions(+) create mode 100644 tools/testing/selftests/net/tcp_ao/restore.c (limited to 'tools') diff --git a/tools/testing/selftests/net/tcp_ao/Makefile b/tools/testing/selftests/net/tcp_ao/Makefile index f0b218b99506..aa11a855c3e0 100644 --- a/tools/testing/selftests/net/tcp_ao/Makefile +++ b/tools/testing/selftests/net/tcp_ao/Makefile @@ -3,6 +3,7 @@ TEST_BOTH_AF := bench-lookups TEST_BOTH_AF += connect TEST_BOTH_AF += connect-deny TEST_BOTH_AF += icmps-accept icmps-discard +TEST_BOTH_AF += restore TEST_BOTH_AF += setsockopt-closed TEST_BOTH_AF += unsigned-md5 diff --git a/tools/testing/selftests/net/tcp_ao/restore.c b/tools/testing/selftests/net/tcp_ao/restore.c new file mode 100644 index 000000000000..8fdc808df325 --- /dev/null +++ b/tools/testing/selftests/net/tcp_ao/restore.c @@ -0,0 +1,236 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Author: Dmitry Safonov */ +/* This is over-simplified TCP_REPAIR for TCP_ESTABLISHED sockets + * It tests that TCP-AO enabled connection can be restored. + * For the proper socket repair see: + * https://github.com/checkpoint-restore/criu/blob/criu-dev/soccr/soccr.h + */ +#include +#include "aolib.h" + +const size_t nr_packets = 20; +const size_t msg_len = 100; +const size_t quota = nr_packets * msg_len; +#define fault(type) (inj == FAULT_ ## type) + +static void try_server_run(const char *tst_name, unsigned int port, + fault_t inj, test_cnt cnt_expected) +{ + const char *cnt_name = "TCPAOGood"; + struct tcp_ao_counters ao1, ao2; + uint64_t before_cnt, after_cnt; + int sk, lsk; + time_t timeout; + ssize_t bytes; + + if (fault(TIMEOUT)) + cnt_name = "TCPAOBad"; + lsk = test_listen_socket(this_ip_addr, port, 1); + + if (test_add_key(lsk, DEFAULT_TEST_PASSWORD, this_ip_dest, -1, 100, 100)) + test_error("setsockopt(TCP_AO_ADD_KEY)"); + synchronize_threads(); /* 1: MKT added => connect() */ + + if (test_wait_fd(lsk, TEST_TIMEOUT_SEC, 0)) + test_error("test_wait_fd()"); + + sk = accept(lsk, NULL, NULL); + if (sk < 0) + test_error("accept()"); + + synchronize_threads(); /* 2: accepted => send data */ + close(lsk); + + bytes = test_server_run(sk, quota, TEST_TIMEOUT_SEC); + if (bytes != quota) { + test_fail("%s: server served: %zd", tst_name, bytes); + goto out; + } + + before_cnt = netstat_get_one(cnt_name, NULL); + if (test_get_tcp_ao_counters(sk, &ao1)) + test_error("test_get_tcp_ao_counters()"); + + timeout = fault(TIMEOUT) ? TEST_RETRANSMIT_SEC : TEST_TIMEOUT_SEC; + bytes = test_server_run(sk, quota, timeout); + if (fault(TIMEOUT)) { + if (bytes > 0) + test_fail("%s: server served: %zd", tst_name, bytes); + else + test_ok("%s: server couldn't serve", tst_name); + } else { + if (bytes != quota) + test_fail("%s: server served: %zd", tst_name, bytes); + else + test_ok("%s: server alive", tst_name); + } + if (test_get_tcp_ao_counters(sk, &ao2)) + test_error("test_get_tcp_ao_counters()"); + after_cnt = netstat_get_one(cnt_name, NULL); + + test_tcp_ao_counters_cmp(tst_name, &ao1, &ao2, cnt_expected); + + if (after_cnt <= before_cnt) { + test_fail("%s: %s counter did not increase: %zu <= %zu", + tst_name, cnt_name, after_cnt, before_cnt); + } else { + test_ok("%s: counter %s increased %zu => %zu", + tst_name, cnt_name, before_cnt, after_cnt); + } + + /* + * Before close() as that will send FIN and move the peer in TCP_CLOSE + * and that will prevent reading AO counters from the peer's socket. + */ + synchronize_threads(); /* 3: verified => closed */ +out: + close(sk); +} + +static void *server_fn(void *arg) +{ + unsigned int port = test_server_port; + + try_server_run("TCP-AO migrate to another socket", port++, + 0, TEST_CNT_GOOD); + try_server_run("TCP-AO with wrong send ISN", port++, + FAULT_TIMEOUT, TEST_CNT_BAD); + try_server_run("TCP-AO with wrong receive ISN", port++, + FAULT_TIMEOUT, TEST_CNT_BAD); + try_server_run("TCP-AO with wrong send SEQ ext number", port++, + FAULT_TIMEOUT, TEST_CNT_BAD); + try_server_run("TCP-AO with wrong receive SEQ ext number", port++, + FAULT_TIMEOUT, TEST_CNT_NS_BAD | TEST_CNT_GOOD); + + synchronize_threads(); /* don't race to exit: client exits */ + return NULL; +} + +static void test_get_sk_checkpoint(unsigned int server_port, sockaddr_af *saddr, + struct tcp_sock_state *img, + struct tcp_ao_repair *ao_img) +{ + int sk; + + sk = socket(test_family, SOCK_STREAM, IPPROTO_TCP); + if (sk < 0) + test_error("socket()"); + + if (test_add_key(sk, DEFAULT_TEST_PASSWORD, this_ip_dest, -1, 100, 100)) + test_error("setsockopt(TCP_AO_ADD_KEY)"); + + synchronize_threads(); /* 1: MKT added => connect() */ + if (test_connect_socket(sk, this_ip_dest, server_port) <= 0) + test_error("failed to connect()"); + + synchronize_threads(); /* 2: accepted => send data */ + if (test_client_verify(sk, msg_len, nr_packets, TEST_TIMEOUT_SEC)) + test_fail("pre-migrate verify failed"); + + test_enable_repair(sk); + test_sock_checkpoint(sk, img, saddr); + test_ao_checkpoint(sk, ao_img); + test_kill_sk(sk); +} + +static void test_sk_restore(const char *tst_name, unsigned int server_port, + sockaddr_af *saddr, struct tcp_sock_state *img, + struct tcp_ao_repair *ao_img, + fault_t inj, test_cnt cnt_expected) +{ + const char *cnt_name = "TCPAOGood"; + struct tcp_ao_counters ao1, ao2; + uint64_t before_cnt, after_cnt; + time_t timeout; + int sk; + + if (fault(TIMEOUT)) + cnt_name = "TCPAOBad"; + + before_cnt = netstat_get_one(cnt_name, NULL); + sk = socket(test_family, SOCK_STREAM, IPPROTO_TCP); + if (sk < 0) + test_error("socket()"); + + test_enable_repair(sk); + test_sock_restore(sk, img, saddr, this_ip_dest, server_port); + if (test_add_repaired_key(sk, DEFAULT_TEST_PASSWORD, 0, this_ip_dest, -1, 100, 100)) + test_error("setsockopt(TCP_AO_ADD_KEY)"); + test_ao_restore(sk, ao_img); + + if (test_get_tcp_ao_counters(sk, &ao1)) + test_error("test_get_tcp_ao_counters()"); + + test_disable_repair(sk); + test_sock_state_free(img); + + timeout = fault(TIMEOUT) ? TEST_RETRANSMIT_SEC : TEST_TIMEOUT_SEC; + if (test_client_verify(sk, msg_len, nr_packets, timeout)) { + if (fault(TIMEOUT)) + test_ok("%s: post-migrate connection is broken", tst_name); + else + test_fail("%s: post-migrate connection is working", tst_name); + } else { + if (fault(TIMEOUT)) + test_fail("%s: post-migrate connection still working", tst_name); + else + test_ok("%s: post-migrate connection is alive", tst_name); + } + if (test_get_tcp_ao_counters(sk, &ao2)) + test_error("test_get_tcp_ao_counters()"); + after_cnt = netstat_get_one(cnt_name, NULL); + + test_tcp_ao_counters_cmp(tst_name, &ao1, &ao2, cnt_expected); + + if (after_cnt <= before_cnt) { + test_fail("%s: %s counter did not increase: %zu <= %zu", + tst_name, cnt_name, after_cnt, before_cnt); + } else { + test_ok("%s: counter %s increased %zu => %zu", + tst_name, cnt_name, before_cnt, after_cnt); + } + synchronize_threads(); /* 3: verified => closed */ + close(sk); +} + +static void *client_fn(void *arg) +{ + unsigned int port = test_server_port; + struct tcp_sock_state tcp_img; + struct tcp_ao_repair ao_img; + sockaddr_af saddr; + + test_get_sk_checkpoint(port, &saddr, &tcp_img, &ao_img); + test_sk_restore("TCP-AO migrate to another socket", port++, + &saddr, &tcp_img, &ao_img, 0, TEST_CNT_GOOD); + + test_get_sk_checkpoint(port, &saddr, &tcp_img, &ao_img); + ao_img.snt_isn += 1; + test_sk_restore("TCP-AO with wrong send ISN", port++, + &saddr, &tcp_img, &ao_img, FAULT_TIMEOUT, TEST_CNT_BAD); + + test_get_sk_checkpoint(port, &saddr, &tcp_img, &ao_img); + ao_img.rcv_isn += 1; + test_sk_restore("TCP-AO with wrong receive ISN", port++, + &saddr, &tcp_img, &ao_img, FAULT_TIMEOUT, TEST_CNT_BAD); + + test_get_sk_checkpoint(port, &saddr, &tcp_img, &ao_img); + ao_img.snd_sne += 1; + test_sk_restore("TCP-AO with wrong send SEQ ext number", port++, + &saddr, &tcp_img, &ao_img, FAULT_TIMEOUT, + TEST_CNT_NS_BAD | TEST_CNT_GOOD); + + test_get_sk_checkpoint(port, &saddr, &tcp_img, &ao_img); + ao_img.rcv_sne += 1; + test_sk_restore("TCP-AO with wrong receive SEQ ext number", port++, + &saddr, &tcp_img, &ao_img, FAULT_TIMEOUT, + TEST_CNT_NS_GOOD | TEST_CNT_BAD); + + return NULL; +} + +int main(int argc, char *argv[]) +{ + test_init(20, server_fn, client_fn); + return 0; +} -- cgit v1.2.3 From 0d16eae57456bbbd7eee45caee53f8d6c4d7ea17 Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Fri, 15 Dec 2023 02:36:23 +0000 Subject: selftests/net: Add SEQ number extension test Check that on SEQ number wraparound there is no disruption or TCPAOBad segments produced. Sample of expected output: > # ./seq-ext_ipv4 > 1..7 > # 1436[lib/setup.c:254] rand seed 1686611079 > TAP version 13 > ok 1 server alive > ok 2 post-migrate connection alive > ok 3 TCPAOGood counter increased 1002 => 3002 > ok 4 TCPAOGood counter increased 1003 => 3003 > ok 5 TCPAOBad counter didn't increase > ok 6 TCPAOBad counter didn't increase > ok 7 SEQ extension incremented: 1/1999, 1/998999 > # Totals: pass:7 fail:0 xfail:0 xpass:0 skip:0 error:0 Signed-off-by: Dmitry Safonov Signed-off-by: David S. Miller --- tools/testing/selftests/net/tcp_ao/Makefile | 1 + tools/testing/selftests/net/tcp_ao/seq-ext.c | 245 +++++++++++++++++++++++++++ 2 files changed, 246 insertions(+) create mode 100644 tools/testing/selftests/net/tcp_ao/seq-ext.c (limited to 'tools') diff --git a/tools/testing/selftests/net/tcp_ao/Makefile b/tools/testing/selftests/net/tcp_ao/Makefile index aa11a855c3e0..5408c7233460 100644 --- a/tools/testing/selftests/net/tcp_ao/Makefile +++ b/tools/testing/selftests/net/tcp_ao/Makefile @@ -4,6 +4,7 @@ TEST_BOTH_AF += connect TEST_BOTH_AF += connect-deny TEST_BOTH_AF += icmps-accept icmps-discard TEST_BOTH_AF += restore +TEST_BOTH_AF += seq-ext TEST_BOTH_AF += setsockopt-closed TEST_BOTH_AF += unsigned-md5 diff --git a/tools/testing/selftests/net/tcp_ao/seq-ext.c b/tools/testing/selftests/net/tcp_ao/seq-ext.c new file mode 100644 index 000000000000..ad4e77d6823e --- /dev/null +++ b/tools/testing/selftests/net/tcp_ao/seq-ext.c @@ -0,0 +1,245 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Check that after SEQ number wrap-around: + * 1. SEQ-extension has upper bytes set + * 2. TCP conneciton is alive and no TCPAOBad segments + * In order to test (2), the test doesn't just adjust seq number for a queue + * on a connected socket, but migrates it to another sk+port number, so + * that there won't be any delayed packets that will fail to verify + * with the new SEQ numbers. + */ +#include +#include "aolib.h" + +const unsigned int nr_packets = 1000; +const unsigned int msg_len = 1000; +const unsigned int quota = nr_packets * msg_len; +unsigned int client_new_port; + +/* Move them closer to roll-over */ +static void test_adjust_seqs(struct tcp_sock_state *img, + struct tcp_ao_repair *ao_img, + bool server) +{ + uint32_t new_seq1, new_seq2; + + /* make them roll-over during quota, but on different segments */ + if (server) { + new_seq1 = ((uint32_t)-1) - msg_len; + new_seq2 = ((uint32_t)-1) - (quota - 2 * msg_len); + } else { + new_seq1 = ((uint32_t)-1) - (quota - 2 * msg_len); + new_seq2 = ((uint32_t)-1) - msg_len; + } + + img->in.seq = new_seq1; + img->trw.snd_wl1 = img->in.seq - msg_len; + img->out.seq = new_seq2; + img->trw.rcv_wup = img->in.seq; +} + +static int test_sk_restore(struct tcp_sock_state *img, + struct tcp_ao_repair *ao_img, sockaddr_af *saddr, + const union tcp_addr daddr, unsigned int dport, + struct tcp_ao_counters *cnt) +{ + int sk; + + sk = socket(test_family, SOCK_STREAM, IPPROTO_TCP); + if (sk < 0) + test_error("socket()"); + + test_enable_repair(sk); + test_sock_restore(sk, img, saddr, daddr, dport); + if (test_add_repaired_key(sk, DEFAULT_TEST_PASSWORD, 0, daddr, -1, 100, 100)) + test_error("setsockopt(TCP_AO_ADD_KEY)"); + test_ao_restore(sk, ao_img); + + if (test_get_tcp_ao_counters(sk, cnt)) + test_error("test_get_tcp_ao_counters()"); + + test_disable_repair(sk); + test_sock_state_free(img); + return sk; +} + +static void *server_fn(void *arg) +{ + uint64_t before_good, after_good, after_bad; + struct tcp_ao_counters ao1, ao2; + struct tcp_sock_state img; + struct tcp_ao_repair ao_img; + sockaddr_af saddr; + ssize_t bytes; + int sk, lsk; + + lsk = test_listen_socket(this_ip_addr, test_server_port, 1); + + if (test_add_key(lsk, DEFAULT_TEST_PASSWORD, this_ip_dest, -1, 100, 100)) + test_error("setsockopt(TCP_AO_ADD_KEY)"); + + synchronize_threads(); /* 1: MKT added => connect() */ + + if (test_wait_fd(lsk, TEST_TIMEOUT_SEC, 0)) + test_error("test_wait_fd()"); + + sk = accept(lsk, NULL, NULL); + if (sk < 0) + test_error("accept()"); + + synchronize_threads(); /* 2: accepted => send data */ + close(lsk); + + bytes = test_server_run(sk, quota, TEST_TIMEOUT_SEC); + if (bytes != quota) { + if (bytes > 0) + test_fail("server served: %zd", bytes); + else + test_fail("server returned: %zd", bytes); + goto out; + } + + before_good = netstat_get_one("TCPAOGood", NULL); + + synchronize_threads(); /* 3: restore the connection on another port */ + + test_enable_repair(sk); + test_sock_checkpoint(sk, &img, &saddr); + test_ao_checkpoint(sk, &ao_img); + test_kill_sk(sk); +#ifdef IPV6_TEST + saddr.sin6_port = htons(ntohs(saddr.sin6_port) + 1); +#else + saddr.sin_port = htons(ntohs(saddr.sin_port) + 1); +#endif + test_adjust_seqs(&img, &ao_img, true); + synchronize_threads(); /* 4: dump finished */ + sk = test_sk_restore(&img, &ao_img, &saddr, this_ip_dest, + client_new_port, &ao1); + + synchronize_threads(); /* 5: verify counters during SEQ-number rollover */ + bytes = test_server_run(sk, quota, TEST_TIMEOUT_SEC); + if (bytes != quota) { + if (bytes > 0) + test_fail("server served: %zd", bytes); + else + test_fail("server returned: %zd", bytes); + } else { + test_ok("server alive"); + } + + if (test_get_tcp_ao_counters(sk, &ao2)) + test_error("test_get_tcp_ao_counters()"); + after_good = netstat_get_one("TCPAOGood", NULL); + + test_tcp_ao_counters_cmp(NULL, &ao1, &ao2, TEST_CNT_GOOD); + + if (after_good <= before_good) { + test_fail("TCPAOGood counter did not increase: %zu <= %zu", + after_good, before_good); + } else { + test_ok("TCPAOGood counter increased %zu => %zu", + before_good, after_good); + } + after_bad = netstat_get_one("TCPAOBad", NULL); + if (after_bad) + test_fail("TCPAOBad counter is non-zero: %zu", after_bad); + else + test_ok("TCPAOBad counter didn't increase"); + test_enable_repair(sk); + test_ao_checkpoint(sk, &ao_img); + if (ao_img.snd_sne && ao_img.rcv_sne) { + test_ok("SEQ extension incremented: %u/%u", + ao_img.snd_sne, ao_img.rcv_sne); + } else { + test_fail("SEQ extension was not incremented: %u/%u", + ao_img.snd_sne, ao_img.rcv_sne); + } + + synchronize_threads(); /* 6: verified => closed */ +out: + close(sk); + return NULL; +} + +static void *client_fn(void *arg) +{ + uint64_t before_good, after_good, after_bad; + struct tcp_ao_counters ao1, ao2; + struct tcp_sock_state img; + struct tcp_ao_repair ao_img; + sockaddr_af saddr; + int sk; + + sk = socket(test_family, SOCK_STREAM, IPPROTO_TCP); + if (sk < 0) + test_error("socket()"); + + if (test_add_key(sk, DEFAULT_TEST_PASSWORD, this_ip_dest, -1, 100, 100)) + test_error("setsockopt(TCP_AO_ADD_KEY)"); + + synchronize_threads(); /* 1: MKT added => connect() */ + if (test_connect_socket(sk, this_ip_dest, test_server_port) <= 0) + test_error("failed to connect()"); + + synchronize_threads(); /* 2: accepted => send data */ + if (test_client_verify(sk, msg_len, nr_packets, TEST_TIMEOUT_SEC)) { + test_fail("pre-migrate verify failed"); + return NULL; + } + + before_good = netstat_get_one("TCPAOGood", NULL); + + synchronize_threads(); /* 3: restore the connection on another port */ + test_enable_repair(sk); + test_sock_checkpoint(sk, &img, &saddr); + test_ao_checkpoint(sk, &ao_img); + test_kill_sk(sk); +#ifdef IPV6_TEST + client_new_port = ntohs(saddr.sin6_port) + 1; + saddr.sin6_port = htons(ntohs(saddr.sin6_port) + 1); +#else + client_new_port = ntohs(saddr.sin_port) + 1; + saddr.sin_port = htons(ntohs(saddr.sin_port) + 1); +#endif + test_adjust_seqs(&img, &ao_img, false); + synchronize_threads(); /* 4: dump finished */ + sk = test_sk_restore(&img, &ao_img, &saddr, this_ip_dest, + test_server_port + 1, &ao1); + + synchronize_threads(); /* 5: verify counters during SEQ-number rollover */ + if (test_client_verify(sk, msg_len, nr_packets, TEST_TIMEOUT_SEC)) + test_fail("post-migrate verify failed"); + else + test_ok("post-migrate connection alive"); + + if (test_get_tcp_ao_counters(sk, &ao2)) + test_error("test_get_tcp_ao_counters()"); + after_good = netstat_get_one("TCPAOGood", NULL); + + test_tcp_ao_counters_cmp(NULL, &ao1, &ao2, TEST_CNT_GOOD); + + if (after_good <= before_good) { + test_fail("TCPAOGood counter did not increase: %zu <= %zu", + after_good, before_good); + } else { + test_ok("TCPAOGood counter increased %zu => %zu", + before_good, after_good); + } + after_bad = netstat_get_one("TCPAOBad", NULL); + if (after_bad) + test_fail("TCPAOBad counter is non-zero: %zu", after_bad); + else + test_ok("TCPAOBad counter didn't increase"); + + synchronize_threads(); /* 6: verified => closed */ + close(sk); + + synchronize_threads(); /* don't race to exit: let server exit() */ + return NULL; +} + +int main(int argc, char *argv[]) +{ + test_init(7, server_fn, client_fn); + return 0; +} -- cgit v1.2.3 From c6df7b2361d721f40610df5c832ea0fa73e918b1 Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Fri, 15 Dec 2023 02:36:24 +0000 Subject: selftests/net: Add TCP-AO RST test Check that both active and passive reset works and correctly sign segments with TCP-AO or don't send RSTs if not possible to sign. A listening socket with backlog = 0 gets one connection in accept queue, another in syn queue. Once the server/listener socket is forcibly closed, client sockets aren't connected to anything. In regular situation they would receive RST on any segment, but with TCP-AO as there's no listener, no AO-key and unknown ISNs, no RST should be sent. And "passive" reset, where RST is sent on reply for some segment (tcp_v{4,6}_send_reset()) - there use TCP_REPAIR to corrupt SEQ numbers, which later results in TCP-AO signed RST, which will be verified and client socket will get EPIPE. No TCPAORequired/TCPAOBad segments are expected during these tests. Sample of the output: > # ./rst_ipv4 > 1..15 > # 1462[lib/setup.c:254] rand seed 1686611171 > TAP version 13 > ok 1 servered 1000 bytes > ok 2 Verified established tcp connection > ok 3 sk[0] = 7, connection was reset > ok 4 sk[1] = 8, connection was reset > ok 5 sk[2] = 9 > ok 6 MKT counters are good on server > ok 7 Verified established tcp connection > ok 8 client connection broken post-seq-adjust > ok 9 client connection was reset > ok 10 No segments without AO sign (server) > ok 11 Signed AO segments (server): 0 => 30 > ok 12 No segments with bad AO sign (server) > ok 13 No segments without AO sign (client) > ok 14 Signed AO segments (client): 0 => 30 > ok 15 No segments with bad AO sign (client) > # Totals: pass:15 fail:0 xfail:0 xpass:0 skip:0 error:0 Signed-off-by: Dmitry Safonov Signed-off-by: David S. Miller --- tools/testing/selftests/net/tcp_ao/Makefile | 1 + tools/testing/selftests/net/tcp_ao/lib/sock.c | 2 +- tools/testing/selftests/net/tcp_ao/rst.c | 415 ++++++++++++++++++++++++++ 3 files changed, 417 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/net/tcp_ao/rst.c (limited to 'tools') diff --git a/tools/testing/selftests/net/tcp_ao/Makefile b/tools/testing/selftests/net/tcp_ao/Makefile index 5408c7233460..1d4f7576d774 100644 --- a/tools/testing/selftests/net/tcp_ao/Makefile +++ b/tools/testing/selftests/net/tcp_ao/Makefile @@ -4,6 +4,7 @@ TEST_BOTH_AF += connect TEST_BOTH_AF += connect-deny TEST_BOTH_AF += icmps-accept icmps-discard TEST_BOTH_AF += restore +TEST_BOTH_AF += rst TEST_BOTH_AF += seq-ext TEST_BOTH_AF += setsockopt-closed TEST_BOTH_AF += unsigned-md5 diff --git a/tools/testing/selftests/net/tcp_ao/lib/sock.c b/tools/testing/selftests/net/tcp_ao/lib/sock.c index 7f3c31b7d997..c75d82885a2e 100644 --- a/tools/testing/selftests/net/tcp_ao/lib/sock.c +++ b/tools/testing/selftests/net/tcp_ao/lib/sock.c @@ -566,7 +566,7 @@ ssize_t test_client_loop(int sk, char *buf, size_t buf_sz, got = recv(sk, msg + bytes, sizeof(msg) - bytes, 0); if (got <= 0) - test_error("recv(): %zd", got); + return i; bytes += got; } while (bytes < sent); if (bytes > sent) diff --git a/tools/testing/selftests/net/tcp_ao/rst.c b/tools/testing/selftests/net/tcp_ao/rst.c new file mode 100644 index 000000000000..ac06009a7f5f --- /dev/null +++ b/tools/testing/selftests/net/tcp_ao/rst.c @@ -0,0 +1,415 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Author: Dmitry Safonov */ +#include +#include "../../../../include/linux/kernel.h" +#include "aolib.h" + +const size_t quota = 1000; +/* + * Backlog == 0 means 1 connection in queue, see: + * commit 64a146513f8f ("[NET]: Revert incorrect accept queue...") + */ +const unsigned int backlog; + +static void netstats_check(struct netstat *before, struct netstat *after, + char *msg) +{ + uint64_t before_cnt, after_cnt; + + before_cnt = netstat_get(before, "TCPAORequired", NULL); + after_cnt = netstat_get(after, "TCPAORequired", NULL); + if (after_cnt > before_cnt) + test_fail("Segments without AO sign (%s): %" PRIu64 " => %" PRIu64, + msg, before_cnt, after_cnt); + else + test_ok("No segments without AO sign (%s)", msg); + + before_cnt = netstat_get(before, "TCPAOGood", NULL); + after_cnt = netstat_get(after, "TCPAOGood", NULL); + if (after_cnt <= before_cnt) + test_fail("Signed AO segments (%s): %" PRIu64 " => %" PRIu64, + msg, before_cnt, after_cnt); + else + test_ok("Signed AO segments (%s): %" PRIu64 " => %" PRIu64, + msg, before_cnt, after_cnt); + + before_cnt = netstat_get(before, "TCPAOBad", NULL); + after_cnt = netstat_get(after, "TCPAOBad", NULL); + if (after_cnt > before_cnt) + test_fail("Segments with bad AO sign (%s): %" PRIu64 " => %" PRIu64, + msg, before_cnt, after_cnt); + else + test_ok("No segments with bad AO sign (%s)", msg); +} + +/* + * Another way to send RST, but not through tcp_v{4,6}_send_reset() + * is tcp_send_active_reset(), that is not in reply to inbound segment, + * but rather active send. It uses tcp_transmit_skb(), so that should + * work, but as it also sends RST - nice that it can be covered as well. + */ +static void close_forced(int sk) +{ + struct linger sl; + + sl.l_onoff = 1; + sl.l_linger = 0; + if (setsockopt(sk, SOL_SOCKET, SO_LINGER, &sl, sizeof(sl))) + test_error("setsockopt(SO_LINGER)"); + close(sk); +} + +static int test_wait_for_exception(int sk, time_t sec) +{ + struct timeval tv = { .tv_sec = sec }; + struct timeval *ptv = NULL; + fd_set efds; + int ret; + + FD_ZERO(&efds); + FD_SET(sk, &efds); + + if (sec) + ptv = &tv; + + errno = 0; + ret = select(sk + 1, NULL, NULL, &efds, ptv); + if (ret < 0) + return -errno; + return ret ? sk : 0; +} + +static void test_server_active_rst(unsigned int port) +{ + struct tcp_ao_counters cnt1, cnt2; + ssize_t bytes; + int sk, lsk; + + lsk = test_listen_socket(this_ip_addr, port, backlog); + if (test_add_key(lsk, DEFAULT_TEST_PASSWORD, this_ip_dest, -1, 100, 100)) + test_error("setsockopt(TCP_AO_ADD_KEY)"); + if (test_get_tcp_ao_counters(lsk, &cnt1)) + test_error("test_get_tcp_ao_counters()"); + + synchronize_threads(); /* 1: MKT added */ + if (test_wait_fd(lsk, TEST_TIMEOUT_SEC, 0)) + test_error("test_wait_fd()"); + + sk = accept(lsk, NULL, NULL); + if (sk < 0) + test_error("accept()"); + + synchronize_threads(); /* 2: connection accept()ed, another queued */ + if (test_get_tcp_ao_counters(lsk, &cnt2)) + test_error("test_get_tcp_ao_counters()"); + + synchronize_threads(); /* 3: close listen socket */ + close(lsk); + bytes = test_server_run(sk, quota, 0); + if (bytes != quota) + test_error("servered only %zd bytes", bytes); + else + test_ok("servered %zd bytes", bytes); + + synchronize_threads(); /* 4: finishing up */ + close_forced(sk); + + synchronize_threads(); /* 5: closed active sk */ + + synchronize_threads(); /* 6: counters checks */ + if (test_tcp_ao_counters_cmp("active RST server", &cnt1, &cnt2, TEST_CNT_GOOD)) + test_fail("MKT counters (server) have not only good packets"); + else + test_ok("MKT counters are good on server"); +} + +static void test_server_passive_rst(unsigned int port) +{ + struct tcp_ao_counters ao1, ao2; + int sk, lsk; + ssize_t bytes; + + lsk = test_listen_socket(this_ip_addr, port, 1); + + if (test_add_key(lsk, DEFAULT_TEST_PASSWORD, this_ip_dest, -1, 100, 100)) + test_error("setsockopt(TCP_AO_ADD_KEY)"); + + synchronize_threads(); /* 1: MKT added => connect() */ + if (test_wait_fd(lsk, TEST_TIMEOUT_SEC, 0)) + test_error("test_wait_fd()"); + + sk = accept(lsk, NULL, NULL); + if (sk < 0) + test_error("accept()"); + + synchronize_threads(); /* 2: accepted => send data */ + close(lsk); + if (test_get_tcp_ao_counters(sk, &ao1)) + test_error("test_get_tcp_ao_counters()"); + + bytes = test_server_run(sk, quota, TEST_TIMEOUT_SEC); + if (bytes != quota) { + if (bytes > 0) + test_fail("server served: %zd", bytes); + else + test_fail("server returned %zd", bytes); + } + + synchronize_threads(); /* 3: chekpoint/restore the connection */ + if (test_get_tcp_ao_counters(sk, &ao2)) + test_error("test_get_tcp_ao_counters()"); + + synchronize_threads(); /* 4: terminate server + send more on client */ + bytes = test_server_run(sk, quota, TEST_RETRANSMIT_SEC); + close(sk); + test_tcp_ao_counters_cmp("passive RST server", &ao1, &ao2, TEST_CNT_GOOD); + + synchronize_threads(); /* 5: verified => closed */ + close(sk); +} + +static void *server_fn(void *arg) +{ + struct netstat *ns_before, *ns_after; + unsigned int port = test_server_port; + + ns_before = netstat_read(); + + test_server_active_rst(port++); + test_server_passive_rst(port++); + + ns_after = netstat_read(); + netstats_check(ns_before, ns_after, "server"); + netstat_free(ns_after); + netstat_free(ns_before); + synchronize_threads(); /* exit */ + + synchronize_threads(); /* don't race to exit() - client exits */ + return NULL; +} + +static int test_wait_fds(int sk[], size_t nr, bool is_writable[], + ssize_t wait_for, time_t sec) +{ + struct timeval tv = { .tv_sec = sec }; + struct timeval *ptv = NULL; + fd_set left; + size_t i; + int ret; + + FD_ZERO(&left); + for (i = 0; i < nr; i++) { + FD_SET(sk[i], &left); + if (is_writable) + is_writable[i] = false; + } + + if (sec) + ptv = &tv; + + do { + bool is_empty = true; + fd_set fds, efds; + int nfd = 0; + + FD_ZERO(&fds); + FD_ZERO(&efds); + for (i = 0; i < nr; i++) { + if (!FD_ISSET(sk[i], &left)) + continue; + + if (sk[i] > nfd) + nfd = sk[i]; + + FD_SET(sk[i], &fds); + FD_SET(sk[i], &efds); + is_empty = false; + } + if (is_empty) + return -ENOENT; + + errno = 0; + ret = select(nfd + 1, NULL, &fds, &efds, ptv); + if (ret < 0) + return -errno; + if (!ret) + return -ETIMEDOUT; + for (i = 0; i < nr; i++) { + if (FD_ISSET(sk[i], &fds)) { + if (is_writable) + is_writable[i] = true; + FD_CLR(sk[i], &left); + wait_for--; + continue; + } + if (FD_ISSET(sk[i], &efds)) { + FD_CLR(sk[i], &left); + wait_for--; + } + } + } while (wait_for > 0); + + return 0; +} + +static void test_client_active_rst(unsigned int port) +{ + /* one in queue, another accept()ed */ + unsigned int wait_for = backlog + 2; + int i, sk[3], err; + bool is_writable[ARRAY_SIZE(sk)] = {false}; + unsigned int last = ARRAY_SIZE(sk) - 1; + + for (i = 0; i < ARRAY_SIZE(sk); i++) { + sk[i] = socket(test_family, SOCK_STREAM, IPPROTO_TCP); + if (sk[i] < 0) + test_error("socket()"); + if (test_add_key(sk[i], DEFAULT_TEST_PASSWORD, + this_ip_dest, -1, 100, 100)) + test_error("setsockopt(TCP_AO_ADD_KEY)"); + } + + synchronize_threads(); /* 1: MKT added */ + for (i = 0; i < last; i++) { + err = _test_connect_socket(sk[i], this_ip_dest, port, + (i == 0) ? TEST_TIMEOUT_SEC : -1); + + if (err < 0) + test_error("failed to connect()"); + } + + synchronize_threads(); /* 2: connection accept()ed, another queued */ + err = test_wait_fds(sk, last, is_writable, wait_for, TEST_TIMEOUT_SEC); + if (err < 0) + test_error("test_wait_fds(): %d", err); + + synchronize_threads(); /* 3: close listen socket */ + if (test_client_verify(sk[0], 100, quota / 100, TEST_TIMEOUT_SEC)) + test_fail("Failed to send data on connected socket"); + else + test_ok("Verified established tcp connection"); + + synchronize_threads(); /* 4: finishing up */ + err = _test_connect_socket(sk[last], this_ip_dest, port, -1); + if (err < 0) + test_error("failed to connect()"); + + synchronize_threads(); /* 5: closed active sk */ + err = test_wait_fds(sk, ARRAY_SIZE(sk), NULL, + wait_for, TEST_TIMEOUT_SEC); + if (err < 0) + test_error("select(): %d", err); + + for (i = 0; i < ARRAY_SIZE(sk); i++) { + socklen_t slen = sizeof(err); + + if (getsockopt(sk[i], SOL_SOCKET, SO_ERROR, &err, &slen)) + test_error("getsockopt()"); + if (is_writable[i] && err != ECONNRESET) { + test_fail("sk[%d] = %d, err = %d, connection wasn't reset", + i, sk[i], err); + } else { + test_ok("sk[%d] = %d%s", i, sk[i], + is_writable[i] ? ", connection was reset" : ""); + } + } + synchronize_threads(); /* 6: counters checks */ +} + +static void test_client_passive_rst(unsigned int port) +{ + struct tcp_ao_counters ao1, ao2; + struct tcp_ao_repair ao_img; + struct tcp_sock_state img; + sockaddr_af saddr; + int sk, err; + socklen_t slen = sizeof(err); + + sk = socket(test_family, SOCK_STREAM, IPPROTO_TCP); + if (sk < 0) + test_error("socket()"); + + if (test_add_key(sk, DEFAULT_TEST_PASSWORD, this_ip_dest, -1, 100, 100)) + test_error("setsockopt(TCP_AO_ADD_KEY)"); + + synchronize_threads(); /* 1: MKT added => connect() */ + if (test_connect_socket(sk, this_ip_dest, port) <= 0) + test_error("failed to connect()"); + + synchronize_threads(); /* 2: accepted => send data */ + if (test_client_verify(sk, 100, quota / 100, TEST_TIMEOUT_SEC)) + test_fail("Failed to send data on connected socket"); + else + test_ok("Verified established tcp connection"); + + synchronize_threads(); /* 3: chekpoint/restore the connection */ + test_enable_repair(sk); + test_sock_checkpoint(sk, &img, &saddr); + test_ao_checkpoint(sk, &ao_img); + test_kill_sk(sk); + + img.out.seq += quota; + + sk = socket(test_family, SOCK_STREAM, IPPROTO_TCP); + if (sk < 0) + test_error("socket()"); + + test_enable_repair(sk); + test_sock_restore(sk, &img, &saddr, this_ip_dest, port); + if (test_add_repaired_key(sk, DEFAULT_TEST_PASSWORD, 0, this_ip_dest, -1, 100, 100)) + test_error("setsockopt(TCP_AO_ADD_KEY)"); + test_ao_restore(sk, &ao_img); + + if (test_get_tcp_ao_counters(sk, &ao1)) + test_error("test_get_tcp_ao_counters()"); + + test_disable_repair(sk); + test_sock_state_free(&img); + + synchronize_threads(); /* 4: terminate server + send more on client */ + if (test_client_verify(sk, 100, quota / 100, 2 * TEST_TIMEOUT_SEC)) + test_ok("client connection broken post-seq-adjust"); + else + test_fail("client connection still works post-seq-adjust"); + + test_wait_for_exception(sk, TEST_TIMEOUT_SEC); + + if (getsockopt(sk, SOL_SOCKET, SO_ERROR, &err, &slen)) + test_error("getsockopt()"); + if (err != ECONNRESET && err != EPIPE) + test_fail("client connection was not reset: %d", err); + else + test_ok("client connection was reset"); + + if (test_get_tcp_ao_counters(sk, &ao2)) + test_error("test_get_tcp_ao_counters()"); + + synchronize_threads(); /* 5: verified => closed */ + close(sk); + test_tcp_ao_counters_cmp("client passive RST", &ao1, &ao2, TEST_CNT_GOOD); +} + +static void *client_fn(void *arg) +{ + struct netstat *ns_before, *ns_after; + unsigned int port = test_server_port; + + ns_before = netstat_read(); + + test_client_active_rst(port++); + test_client_passive_rst(port++); + + ns_after = netstat_read(); + netstats_check(ns_before, ns_after, "client"); + netstat_free(ns_after); + netstat_free(ns_before); + + synchronize_threads(); /* exit */ + return NULL; +} + +int main(int argc, char *argv[]) +{ + test_init(15, server_fn, client_fn); + return 0; +} -- cgit v1.2.3 From 8c4e8dd0c047db7c68be01e6c30ada320b8babbc Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Fri, 15 Dec 2023 02:36:25 +0000 Subject: selftests/net: Add TCP-AO selfconnect/simultaneous connect test Check that a rare functionality of TCP named self-connect works with TCP-AO. This "under the cover" also checks TCP simultaneous connect (TCP_SYN_RECV socket state), which would be harder to check other ways. In order to verify that it's indeed TCP simultaneous connect, check the counters TCPChallengeACK and TCPSYNChallenge. Sample of the output: > # ./self-connect_ipv6 > 1..4 > # 1738[lib/setup.c:254] rand seed 1696451931 > TAP version 13 > ok 1 self-connect(same keyids): connect TCPAOGood 0 => 24 > ok 2 self-connect(different keyids): connect TCPAOGood 26 => 50 > ok 3 self-connect(restore): connect TCPAOGood 52 => 97 > ok 4 self-connect(restore, different keyids): connect TCPAOGood 99 => 144 > # Totals: pass:4 fail:0 xfail:0 xpass:0 skip:0 error:0 Signed-off-by: Dmitry Safonov Signed-off-by: David S. Miller --- tools/testing/selftests/net/tcp_ao/Makefile | 1 + tools/testing/selftests/net/tcp_ao/self-connect.c | 197 ++++++++++++++++++++++ 2 files changed, 198 insertions(+) create mode 100644 tools/testing/selftests/net/tcp_ao/self-connect.c (limited to 'tools') diff --git a/tools/testing/selftests/net/tcp_ao/Makefile b/tools/testing/selftests/net/tcp_ao/Makefile index 1d4f7576d774..9286f9b99c86 100644 --- a/tools/testing/selftests/net/tcp_ao/Makefile +++ b/tools/testing/selftests/net/tcp_ao/Makefile @@ -5,6 +5,7 @@ TEST_BOTH_AF += connect-deny TEST_BOTH_AF += icmps-accept icmps-discard TEST_BOTH_AF += restore TEST_BOTH_AF += rst +TEST_BOTH_AF += self-connect TEST_BOTH_AF += seq-ext TEST_BOTH_AF += setsockopt-closed TEST_BOTH_AF += unsigned-md5 diff --git a/tools/testing/selftests/net/tcp_ao/self-connect.c b/tools/testing/selftests/net/tcp_ao/self-connect.c new file mode 100644 index 000000000000..e154d9e198a9 --- /dev/null +++ b/tools/testing/selftests/net/tcp_ao/self-connect.c @@ -0,0 +1,197 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Author: Dmitry Safonov */ +#include +#include "aolib.h" + +static union tcp_addr local_addr; + +static void __setup_lo_intf(const char *lo_intf, + const char *addr_str, uint8_t prefix) +{ + if (inet_pton(TEST_FAMILY, addr_str, &local_addr) != 1) + test_error("Can't convert local ip address"); + + if (ip_addr_add(lo_intf, TEST_FAMILY, local_addr, prefix)) + test_error("Failed to add %s ip address", lo_intf); + + if (link_set_up(lo_intf)) + test_error("Failed to bring %s up", lo_intf); +} + +static void setup_lo_intf(const char *lo_intf) +{ +#ifdef IPV6_TEST + __setup_lo_intf(lo_intf, "::1", 128); +#else + __setup_lo_intf(lo_intf, "127.0.0.1", 8); +#endif +} + +static void tcp_self_connect(const char *tst, unsigned int port, + bool different_keyids, bool check_restore) +{ + uint64_t before_challenge_ack, after_challenge_ack; + uint64_t before_syn_challenge, after_syn_challenge; + struct tcp_ao_counters before_ao, after_ao; + uint64_t before_aogood, after_aogood; + struct netstat *ns_before, *ns_after; + const size_t nr_packets = 20; + struct tcp_ao_repair ao_img; + struct tcp_sock_state img; + sockaddr_af addr; + int sk; + + tcp_addr_to_sockaddr_in(&addr, &local_addr, htons(port)); + + sk = socket(test_family, SOCK_STREAM, IPPROTO_TCP); + if (sk < 0) + test_error("socket()"); + + if (different_keyids) { + if (test_add_key(sk, DEFAULT_TEST_PASSWORD, local_addr, -1, 5, 7)) + test_error("setsockopt(TCP_AO_ADD_KEY)"); + if (test_add_key(sk, DEFAULT_TEST_PASSWORD, local_addr, -1, 7, 5)) + test_error("setsockopt(TCP_AO_ADD_KEY)"); + } else { + if (test_add_key(sk, DEFAULT_TEST_PASSWORD, local_addr, -1, 100, 100)) + test_error("setsockopt(TCP_AO_ADD_KEY)"); + } + + if (bind(sk, (struct sockaddr *)&addr, sizeof(addr)) < 0) + test_error("bind()"); + + ns_before = netstat_read(); + before_aogood = netstat_get(ns_before, "TCPAOGood", NULL); + before_challenge_ack = netstat_get(ns_before, "TCPChallengeACK", NULL); + before_syn_challenge = netstat_get(ns_before, "TCPSYNChallenge", NULL); + if (test_get_tcp_ao_counters(sk, &before_ao)) + test_error("test_get_tcp_ao_counters()"); + + if (__test_connect_socket(sk, "lo", (struct sockaddr *)&addr, + sizeof(addr), TEST_TIMEOUT_SEC) < 0) { + ns_after = netstat_read(); + netstat_print_diff(ns_before, ns_after); + test_error("failed to connect()"); + } + + if (test_client_verify(sk, 100, nr_packets, TEST_TIMEOUT_SEC)) { + test_fail("%s: tcp connection verify failed", tst); + close(sk); + return; + } + + ns_after = netstat_read(); + after_aogood = netstat_get(ns_after, "TCPAOGood", NULL); + after_challenge_ack = netstat_get(ns_after, "TCPChallengeACK", NULL); + after_syn_challenge = netstat_get(ns_after, "TCPSYNChallenge", NULL); + if (test_get_tcp_ao_counters(sk, &after_ao)) + test_error("test_get_tcp_ao_counters()"); + if (!check_restore) { + /* to debug: netstat_print_diff(ns_before, ns_after); */ + netstat_free(ns_before); + } + netstat_free(ns_after); + + if (after_aogood <= before_aogood) { + test_fail("%s: TCPAOGood counter mismatch: %zu <= %zu", + tst, after_aogood, before_aogood); + close(sk); + return; + } + if (after_challenge_ack <= before_challenge_ack || + after_syn_challenge <= before_syn_challenge) { + /* + * It's also meant to test simultaneous open, so check + * these counters as well. + */ + test_fail("%s: Didn't challenge SYN or ACK: %zu <= %zu OR %zu <= %zu", + tst, after_challenge_ack, before_challenge_ack, + after_syn_challenge, before_syn_challenge); + close(sk); + return; + } + + if (test_tcp_ao_counters_cmp(tst, &before_ao, &after_ao, TEST_CNT_GOOD)) { + close(sk); + return; + } + + if (!check_restore) { + test_ok("%s: connect TCPAOGood %" PRIu64 " => %" PRIu64, + tst, before_aogood, after_aogood); + close(sk); + return; + } + + test_enable_repair(sk); + test_sock_checkpoint(sk, &img, &addr); +#ifdef IPV6_TEST + addr.sin6_port = htons(port + 1); +#else + addr.sin_port = htons(port + 1); +#endif + test_ao_checkpoint(sk, &ao_img); + test_kill_sk(sk); + + sk = socket(test_family, SOCK_STREAM, IPPROTO_TCP); + if (sk < 0) + test_error("socket()"); + + test_enable_repair(sk); + __test_sock_restore(sk, "lo", &img, &addr, &addr, sizeof(addr)); + if (different_keyids) { + if (test_add_repaired_key(sk, DEFAULT_TEST_PASSWORD, 0, + local_addr, -1, 7, 5)) + test_error("setsockopt(TCP_AO_ADD_KEY)"); + if (test_add_repaired_key(sk, DEFAULT_TEST_PASSWORD, 0, + local_addr, -1, 5, 7)) + test_error("setsockopt(TCP_AO_ADD_KEY)"); + } else { + if (test_add_repaired_key(sk, DEFAULT_TEST_PASSWORD, 0, + local_addr, -1, 100, 100)) + test_error("setsockopt(TCP_AO_ADD_KEY)"); + } + test_ao_restore(sk, &ao_img); + test_disable_repair(sk); + test_sock_state_free(&img); + if (test_client_verify(sk, 100, nr_packets, TEST_TIMEOUT_SEC)) { + test_fail("%s: tcp connection verify failed", tst); + close(sk); + return; + } + ns_after = netstat_read(); + after_aogood = netstat_get(ns_after, "TCPAOGood", NULL); + /* to debug: netstat_print_diff(ns_before, ns_after); */ + netstat_free(ns_before); + netstat_free(ns_after); + close(sk); + if (after_aogood <= before_aogood) { + test_fail("%s: TCPAOGood counter mismatch: %zu <= %zu", + tst, after_aogood, before_aogood); + return; + } + test_ok("%s: connect TCPAOGood %" PRIu64 " => %" PRIu64, + tst, before_aogood, after_aogood); +} + +static void *client_fn(void *arg) +{ + unsigned int port = test_server_port; + + setup_lo_intf("lo"); + + tcp_self_connect("self-connect(same keyids)", port++, false, false); + tcp_self_connect("self-connect(different keyids)", port++, true, false); + tcp_self_connect("self-connect(restore)", port, false, true); + port += 2; + tcp_self_connect("self-connect(restore, different keyids)", port, true, true); + port += 2; + + return NULL; +} + +int main(int argc, char *argv[]) +{ + test_init(4, client_fn, NULL); + return 0; +} -- cgit v1.2.3 From 3c3ead55564825975cc40e59bfaf6c4834ea9745 Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Fri, 15 Dec 2023 02:36:26 +0000 Subject: selftests/net: Add TCP-AO key-management test Check multiple keys on a socket: - rotation on closed socket - current/rnext operations shouldn't be possible on listen sockets - current/rnext key set should be the one, that's used on connect() - key rotations with pseudo-random generated keys - copying matching keys on connect() and on accept() At this moment there are 3 tests that are "expected" to fail: a kernel fix is needed to improve the situation, they are marked XFAIL. Sample output: > # ./key-management_ipv4 > 1..120 > # 1601[lib/setup.c:239] rand seed 1700526653 > TAP version 13 > ok 1 closed socket, delete a key: the key was deleted > ok 2 closed socket, delete all keys: the key was deleted > ok 3 closed socket, delete current key: key deletion was prevented > ok 4 closed socket, delete rnext key: key deletion was prevented > ok 5 closed socket, delete a key + set current/rnext: the key was deleted > ok 6 closed socket, force-delete current key: the key was deleted > ok 7 closed socket, force-delete rnext key: the key was deleted > ok 8 closed socket, delete current+rnext key: key deletion was prevented > ok 9 closed socket, add + change current key > ok 10 closed socket, add + change rnext key > ok 11 listen socket, delete a key: the key was deleted > ok 12 listen socket, delete all keys: the key was deleted > ok 13 listen socket, setting current key not allowed > ok 14 listen socket, setting rnext key not allowed > ok 15 # XFAIL listen() after current/rnext keys set: the socket has current/rnext keys: 100:200 > ok 16 # XFAIL listen socket, delete current key from before listen(): failed to delete the key 100:100 -16 > ok 17 # XFAIL listen socket, delete rnext key from before listen(): failed to delete the key 200:200 -16 > ok 18 listen socket, getsockopt(TCP_AO_REPAIR) is restricted > ok 19 listen socket, setsockopt(TCP_AO_REPAIR) is restricted > ok 20 listen socket, delete a key + set current/rnext: key deletion was prevented > ok 21 listen socket, force-delete current key: key deletion was prevented > ok 22 listen socket, force-delete rnext key: key deletion was prevented > ok 23 listen socket, delete a key: the key was deleted > ok 24 listen socket, add + change current key > ok 25 listen socket, add + change rnext key > ok 26 server: Check current/rnext keys unset before connect(): The socket keys are consistent with the expectations > ok 27 client: Check current/rnext keys unset before connect(): current key 19 as expected > ok 28 client: Check current/rnext keys unset before connect(): rnext key 146 as expected > ok 29 server: Check current/rnext keys unset before connect(): server alive > ok 30 server: Check current/rnext keys unset before connect(): passed counters checks > ok 31 client: Check current/rnext keys unset before connect(): The socket keys are consistent with the expectations > ok 32 server: Check current/rnext keys unset before connect(): The socket keys are consistent with the expectations > ok 33 server: Check current/rnext keys unset before connect(): passed counters checks > ok 34 client: Check current/rnext keys unset before connect(): passed counters checks > ok 35 server: Check current/rnext keys set before connect(): The socket keys are consistent with the expectations > ok 36 server: Check current/rnext keys set before connect(): server alive > ok 37 server: Check current/rnext keys set before connect(): passed counters checks > ok 38 client: Check current/rnext keys set before connect(): current key 10 as expected > ok 39 client: Check current/rnext keys set before connect(): rnext key 137 as expected > ok 40 server: Check current/rnext keys set before connect(): The socket keys are consistent with the expectations > ok 41 client: Check current/rnext keys set before connect(): The socket keys are consistent with the expectations > ok 42 client: Check current/rnext keys set before connect(): passed counters checks > ok 43 server: Check current/rnext keys set before connect(): passed counters checks > ok 44 server: Check current != rnext keys set before connect(): The socket keys are consistent with the expectations > ok 45 server: Check current != rnext keys set before connect(): server alive > ok 46 server: Check current != rnext keys set before connect(): passed counters checks > ok 47 client: Check current != rnext keys set before connect(): current key 10 as expected > ok 48 client: Check current != rnext keys set before connect(): rnext key 132 as expected > ok 49 server: Check current != rnext keys set before connect(): The socket keys are consistent with the expectations > ok 50 client: Check current != rnext keys set before connect(): The socket keys are consistent with the expectations > ok 51 client: Check current != rnext keys set before connect(): passed counters checks > ok 52 server: Check current != rnext keys set before connect(): passed counters checks > ok 53 server: Check current flapping back on peer's RnextKey request: The socket keys are consistent with the expectations > ok 54 server: Check current flapping back on peer's RnextKey request: server alive > ok 55 server: Check current flapping back on peer's RnextKey request: passed counters checks > ok 56 client: Check current flapping back on peer's RnextKey request: current key 10 as expected > ok 57 client: Check current flapping back on peer's RnextKey request: rnext key 132 as expected > ok 58 server: Check current flapping back on peer's RnextKey request: The socket keys are consistent with the expectations > ok 59 client: Check current flapping back on peer's RnextKey request: The socket keys are consistent with the expectations > ok 60 server: Check current flapping back on peer's RnextKey request: passed counters checks > ok 61 client: Check current flapping back on peer's RnextKey request: passed counters checks > ok 62 server: Rotate over all different keys: The socket keys are consistent with the expectations > ok 63 server: Rotate over all different keys: server alive > ok 64 server: Rotate over all different keys: passed counters checks > ok 65 server: Rotate over all different keys: current key 128 as expected > ok 66 client: Rotate over all different keys: rnext key 128 as expected > ok 67 server: Rotate over all different keys: current key 129 as expected > ok 68 client: Rotate over all different keys: rnext key 129 as expected > ok 69 server: Rotate over all different keys: current key 130 as expected > ok 70 client: Rotate over all different keys: rnext key 130 as expected > ok 71 server: Rotate over all different keys: current key 131 as expected > ok 72 client: Rotate over all different keys: rnext key 131 as expected > ok 73 server: Rotate over all different keys: current key 132 as expected > ok 74 client: Rotate over all different keys: rnext key 132 as expected > ok 75 server: Rotate over all different keys: current key 133 as expected > ok 76 client: Rotate over all different keys: rnext key 133 as expected > ok 77 server: Rotate over all different keys: current key 134 as expected > ok 78 client: Rotate over all different keys: rnext key 134 as expected > ok 79 server: Rotate over all different keys: current key 135 as expected > ok 80 client: Rotate over all different keys: rnext key 135 as expected > ok 81 server: Rotate over all different keys: current key 136 as expected > ok 82 client: Rotate over all different keys: rnext key 136 as expected > ok 83 server: Rotate over all different keys: current key 137 as expected > ok 84 client: Rotate over all different keys: rnext key 137 as expected > ok 85 server: Rotate over all different keys: current key 138 as expected > ok 86 client: Rotate over all different keys: rnext key 138 as expected > ok 87 server: Rotate over all different keys: current key 139 as expected > ok 88 client: Rotate over all different keys: rnext key 139 as expected > ok 89 server: Rotate over all different keys: current key 140 as expected > ok 90 client: Rotate over all different keys: rnext key 140 as expected > ok 91 server: Rotate over all different keys: current key 141 as expected > ok 92 client: Rotate over all different keys: rnext key 141 as expected > ok 93 server: Rotate over all different keys: current key 142 as expected > ok 94 client: Rotate over all different keys: rnext key 142 as expected > ok 95 server: Rotate over all different keys: current key 143 as expected > ok 96 client: Rotate over all different keys: rnext key 143 as expected > ok 97 server: Rotate over all different keys: current key 144 as expected > ok 98 client: Rotate over all different keys: rnext key 144 as expected > ok 99 server: Rotate over all different keys: current key 145 as expected > ok 100 client: Rotate over all different keys: rnext key 145 as expected > ok 101 server: Rotate over all different keys: current key 146 as expected > ok 102 client: Rotate over all different keys: rnext key 146 as expected > ok 103 server: Rotate over all different keys: current key 127 as expected > ok 104 client: Rotate over all different keys: rnext key 127 as expected > ok 105 client: Rotate over all different keys: current key 0 as expected > ok 106 client: Rotate over all different keys: rnext key 127 as expected > ok 107 server: Rotate over all different keys: The socket keys are consistent with the expectations > ok 108 client: Rotate over all different keys: The socket keys are consistent with the expectations > ok 109 client: Rotate over all different keys: passed counters checks > ok 110 server: Rotate over all different keys: passed counters checks > ok 111 server: Check accept() => established key matching: The socket keys are consistent with the expectations > ok 112 Can't add a key with non-matching ip-address for established sk > ok 113 Can't add a key with non-matching VRF for established sk > ok 114 server: Check accept() => established key matching: server alive > ok 115 server: Check accept() => established key matching: passed counters checks > ok 116 client: Check connect() => established key matching: current key 0 as expected > ok 117 client: Check connect() => established key matching: rnext key 128 as expected > ok 118 client: Check connect() => established key matching: The socket keys are consistent with the expectations > ok 119 server: Check accept() => established key matching: The socket keys are consistent with the expectations > ok 120 server: Check accept() => established key matching: passed counters checks > # Totals: pass:120 fail:0 xfail:0 xpass:0 skip:0 error:0 Signed-off-by: Dmitry Safonov Signed-off-by: David S. Miller --- tools/testing/selftests/net/tcp_ao/Makefile | 1 + .../testing/selftests/net/tcp_ao/key-management.c | 1180 ++++++++++++++++++++ 2 files changed, 1181 insertions(+) create mode 100644 tools/testing/selftests/net/tcp_ao/key-management.c (limited to 'tools') diff --git a/tools/testing/selftests/net/tcp_ao/Makefile b/tools/testing/selftests/net/tcp_ao/Makefile index 9286f9b99c86..6343cfcf919b 100644 --- a/tools/testing/selftests/net/tcp_ao/Makefile +++ b/tools/testing/selftests/net/tcp_ao/Makefile @@ -3,6 +3,7 @@ TEST_BOTH_AF := bench-lookups TEST_BOTH_AF += connect TEST_BOTH_AF += connect-deny TEST_BOTH_AF += icmps-accept icmps-discard +TEST_BOTH_AF += key-management TEST_BOTH_AF += restore TEST_BOTH_AF += rst TEST_BOTH_AF += self-connect diff --git a/tools/testing/selftests/net/tcp_ao/key-management.c b/tools/testing/selftests/net/tcp_ao/key-management.c new file mode 100644 index 000000000000..c48b4970ca17 --- /dev/null +++ b/tools/testing/selftests/net/tcp_ao/key-management.c @@ -0,0 +1,1180 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Author: Dmitry Safonov */ +#include +#include "../../../../include/linux/kernel.h" +#include "aolib.h" + +const size_t nr_packets = 20; +const size_t msg_len = 100; +const size_t quota = nr_packets * msg_len; +union tcp_addr wrong_addr; +#define SECOND_PASSWORD "at all times sincere friends of freedom have been rare" +#define fault(type) (inj == FAULT_ ## type) + +static const int test_vrf_ifindex = 200; +static const uint8_t test_vrf_tabid = 42; +static void setup_vrfs(void) +{ + int err; + + if (!kernel_config_has(KCONFIG_NET_VRF)) + return; + + err = add_vrf("ksft-vrf", test_vrf_tabid, test_vrf_ifindex, -1); + if (err) + test_error("Failed to add a VRF: %d", err); + + err = link_set_up("ksft-vrf"); + if (err) + test_error("Failed to bring up a VRF"); + + err = ip_route_add_vrf(veth_name, TEST_FAMILY, + this_ip_addr, this_ip_dest, test_vrf_tabid); + if (err) + test_error("Failed to add a route to VRF"); +} + + +static int prepare_sk(union tcp_addr *addr, uint8_t sndid, uint8_t rcvid) +{ + int sk = socket(test_family, SOCK_STREAM, IPPROTO_TCP); + + if (sk < 0) + test_error("socket()"); + + if (test_add_key(sk, DEFAULT_TEST_PASSWORD, this_ip_dest, + DEFAULT_TEST_PREFIX, 100, 100)) + test_error("test_add_key()"); + + if (addr && test_add_key(sk, SECOND_PASSWORD, *addr, + DEFAULT_TEST_PREFIX, sndid, rcvid)) + test_error("test_add_key()"); + + return sk; +} + +static int prepare_lsk(union tcp_addr *addr, uint8_t sndid, uint8_t rcvid) +{ + int sk = prepare_sk(addr, sndid, rcvid); + + if (listen(sk, 10)) + test_error("listen()"); + + return sk; +} + +static int test_del_key(int sk, uint8_t sndid, uint8_t rcvid, bool async, + int current_key, int rnext_key) +{ + struct tcp_ao_info_opt ao_info = {}; + struct tcp_ao_getsockopt key = {}; + struct tcp_ao_del del = {}; + sockaddr_af sockaddr; + int err; + + tcp_addr_to_sockaddr_in(&del.addr, &this_ip_dest, 0); + del.prefix = DEFAULT_TEST_PREFIX; + del.sndid = sndid; + del.rcvid = rcvid; + + if (current_key >= 0) { + del.set_current = 1; + del.current_key = (uint8_t)current_key; + } + if (rnext_key >= 0) { + del.set_rnext = 1; + del.rnext = (uint8_t)rnext_key; + } + + err = setsockopt(sk, IPPROTO_TCP, TCP_AO_DEL_KEY, &del, sizeof(del)); + if (err < 0) + return -errno; + + if (async) + return 0; + + tcp_addr_to_sockaddr_in(&sockaddr, &this_ip_dest, 0); + err = test_get_one_ao(sk, &key, &sockaddr, sizeof(sockaddr), + DEFAULT_TEST_PREFIX, sndid, rcvid); + if (!err) + return -EEXIST; + if (err != -E2BIG) + test_error("getsockopt()"); + if (current_key < 0 && rnext_key < 0) + return 0; + if (test_get_ao_info(sk, &ao_info)) + test_error("getsockopt(TCP_AO_INFO) failed"); + if (current_key >= 0 && ao_info.current_key != (uint8_t)current_key) + return -ENOTRECOVERABLE; + if (rnext_key >= 0 && ao_info.rnext != (uint8_t)rnext_key) + return -ENOTRECOVERABLE; + return 0; +} + +static void try_delete_key(char *tst_name, int sk, uint8_t sndid, uint8_t rcvid, + bool async, int current_key, int rnext_key, + fault_t inj) +{ + int err; + + err = test_del_key(sk, sndid, rcvid, async, current_key, rnext_key); + if ((err == -EBUSY && fault(BUSY)) || (err == -EINVAL && fault(CURRNEXT))) { + test_ok("%s: key deletion was prevented", tst_name); + return; + } + if (err && fault(FIXME)) { + test_xfail("%s: failed to delete the key %u:%u %d", + tst_name, sndid, rcvid, err); + return; + } + if (!err) { + if (fault(BUSY) || fault(CURRNEXT)) { + test_fail("%s: the key was deleted %u:%u %d", tst_name, + sndid, rcvid, err); + } else { + test_ok("%s: the key was deleted", tst_name); + } + return; + } + test_fail("%s: can't delete the key %u:%u %d", tst_name, sndid, rcvid, err); +} + +static int test_set_key(int sk, int current_keyid, int rnext_keyid) +{ + struct tcp_ao_info_opt ao_info = {}; + int err; + + if (current_keyid >= 0) { + ao_info.set_current = 1; + ao_info.current_key = (uint8_t)current_keyid; + } + if (rnext_keyid >= 0) { + ao_info.set_rnext = 1; + ao_info.rnext = (uint8_t)rnext_keyid; + } + + err = test_set_ao_info(sk, &ao_info); + if (err) + return err; + if (test_get_ao_info(sk, &ao_info)) + test_error("getsockopt(TCP_AO_INFO) failed"); + if (current_keyid >= 0 && ao_info.current_key != (uint8_t)current_keyid) + return -ENOTRECOVERABLE; + if (rnext_keyid >= 0 && ao_info.rnext != (uint8_t)rnext_keyid) + return -ENOTRECOVERABLE; + return 0; +} + +static int test_add_current_rnext_key(int sk, const char *key, uint8_t keyflags, + union tcp_addr in_addr, uint8_t prefix, + bool set_current, bool set_rnext, + uint8_t sndid, uint8_t rcvid) +{ + struct tcp_ao_add tmp = {}; + int err; + + err = test_prepare_key(&tmp, DEFAULT_TEST_ALGO, in_addr, + set_current, set_rnext, + prefix, 0, sndid, rcvid, 0, keyflags, + strlen(key), key); + if (err) + return err; + + + err = setsockopt(sk, IPPROTO_TCP, TCP_AO_ADD_KEY, &tmp, sizeof(tmp)); + if (err < 0) + return -errno; + + return test_verify_socket_key(sk, &tmp); +} + +static int __try_add_current_rnext_key(int sk, const char *key, uint8_t keyflags, + union tcp_addr in_addr, uint8_t prefix, + bool set_current, bool set_rnext, + uint8_t sndid, uint8_t rcvid) +{ + struct tcp_ao_info_opt ao_info = {}; + int err; + + err = test_add_current_rnext_key(sk, key, keyflags, in_addr, prefix, + set_current, set_rnext, sndid, rcvid); + if (err) + return err; + + if (test_get_ao_info(sk, &ao_info)) + test_error("getsockopt(TCP_AO_INFO) failed"); + if (set_current && ao_info.current_key != sndid) + return -ENOTRECOVERABLE; + if (set_rnext && ao_info.rnext != rcvid) + return -ENOTRECOVERABLE; + return 0; +} + +static void try_add_current_rnext_key(char *tst_name, int sk, const char *key, + uint8_t keyflags, + union tcp_addr in_addr, uint8_t prefix, + bool set_current, bool set_rnext, + uint8_t sndid, uint8_t rcvid, fault_t inj) +{ + int err; + + err = __try_add_current_rnext_key(sk, key, keyflags, in_addr, prefix, + set_current, set_rnext, sndid, rcvid); + if (!err && !fault(CURRNEXT)) { + test_ok("%s", tst_name); + return; + } + if (err == -EINVAL && fault(CURRNEXT)) { + test_ok("%s", tst_name); + return; + } + test_fail("%s", tst_name); +} + +static void check_closed_socket(void) +{ + int sk; + + sk = prepare_sk(&this_ip_dest, 200, 200); + try_delete_key("closed socket, delete a key", sk, 200, 200, 0, -1, -1, 0); + try_delete_key("closed socket, delete all keys", sk, 100, 100, 0, -1, -1, 0); + close(sk); + + sk = prepare_sk(&this_ip_dest, 200, 200); + if (test_set_key(sk, 100, 200)) + test_error("failed to set current/rnext keys"); + try_delete_key("closed socket, delete current key", sk, 100, 100, 0, -1, -1, FAULT_BUSY); + try_delete_key("closed socket, delete rnext key", sk, 200, 200, 0, -1, -1, FAULT_BUSY); + close(sk); + + sk = prepare_sk(&this_ip_dest, 200, 200); + if (test_add_key(sk, "Glory to heros!", this_ip_dest, + DEFAULT_TEST_PREFIX, 10, 11)) + test_error("test_add_key()"); + if (test_add_key(sk, "Glory to Ukraine!", this_ip_dest, + DEFAULT_TEST_PREFIX, 12, 13)) + test_error("test_add_key()"); + try_delete_key("closed socket, delete a key + set current/rnext", sk, 100, 100, 0, 10, 13, 0); + try_delete_key("closed socket, force-delete current key", sk, 10, 11, 0, 200, -1, 0); + try_delete_key("closed socket, force-delete rnext key", sk, 12, 13, 0, -1, 200, 0); + try_delete_key("closed socket, delete current+rnext key", sk, 200, 200, 0, -1, -1, FAULT_BUSY); + close(sk); + + sk = prepare_sk(&this_ip_dest, 200, 200); + if (test_set_key(sk, 100, 200)) + test_error("failed to set current/rnext keys"); + try_add_current_rnext_key("closed socket, add + change current key", + sk, "Laaaa! Lalala-la-la-lalala...", 0, + this_ip_dest, DEFAULT_TEST_PREFIX, + true, false, 10, 20, 0); + try_add_current_rnext_key("closed socket, add + change rnext key", + sk, "Laaaa! Lalala-la-la-lalala...", 0, + this_ip_dest, DEFAULT_TEST_PREFIX, + false, true, 20, 10, 0); + close(sk); +} + +static void assert_no_current_rnext(const char *tst_msg, int sk) +{ + struct tcp_ao_info_opt ao_info = {}; + + if (test_get_ao_info(sk, &ao_info)) + test_error("getsockopt(TCP_AO_INFO) failed"); + + errno = 0; + if (ao_info.set_current || ao_info.set_rnext) { + test_xfail("%s: the socket has current/rnext keys: %d:%d", + tst_msg, + (ao_info.set_current) ? ao_info.current_key : -1, + (ao_info.set_rnext) ? ao_info.rnext : -1); + } else { + test_ok("%s: the socket has no current/rnext keys", tst_msg); + } +} + +static void assert_no_tcp_repair(void) +{ + struct tcp_ao_repair ao_img = {}; + socklen_t len = sizeof(ao_img); + int sk, err; + + sk = prepare_sk(&this_ip_dest, 200, 200); + test_enable_repair(sk); + if (listen(sk, 10)) + test_error("listen()"); + errno = 0; + err = getsockopt(sk, SOL_TCP, TCP_AO_REPAIR, &ao_img, &len); + if (err && errno == EPERM) + test_ok("listen socket, getsockopt(TCP_AO_REPAIR) is restricted"); + else + test_fail("listen socket, getsockopt(TCP_AO_REPAIR) works"); + errno = 0; + err = setsockopt(sk, SOL_TCP, TCP_AO_REPAIR, &ao_img, sizeof(ao_img)); + if (err && errno == EPERM) + test_ok("listen socket, setsockopt(TCP_AO_REPAIR) is restricted"); + else + test_fail("listen socket, setsockopt(TCP_AO_REPAIR) works"); + close(sk); +} + +static void check_listen_socket(void) +{ + int sk, err; + + sk = prepare_lsk(&this_ip_dest, 200, 200); + try_delete_key("listen socket, delete a key", sk, 200, 200, 0, -1, -1, 0); + try_delete_key("listen socket, delete all keys", sk, 100, 100, 0, -1, -1, 0); + close(sk); + + sk = prepare_lsk(&this_ip_dest, 200, 200); + err = test_set_key(sk, 100, -1); + if (err == -EINVAL) + test_ok("listen socket, setting current key not allowed"); + else + test_fail("listen socket, set current key"); + err = test_set_key(sk, -1, 200); + if (err == -EINVAL) + test_ok("listen socket, setting rnext key not allowed"); + else + test_fail("listen socket, set rnext key"); + close(sk); + + sk = prepare_sk(&this_ip_dest, 200, 200); + if (test_set_key(sk, 100, 200)) + test_error("failed to set current/rnext keys"); + if (listen(sk, 10)) + test_error("listen()"); + assert_no_current_rnext("listen() after current/rnext keys set", sk); + try_delete_key("listen socket, delete current key from before listen()", sk, 100, 100, 0, -1, -1, FAULT_FIXME); + try_delete_key("listen socket, delete rnext key from before listen()", sk, 200, 200, 0, -1, -1, FAULT_FIXME); + close(sk); + + assert_no_tcp_repair(); + + sk = prepare_lsk(&this_ip_dest, 200, 200); + if (test_add_key(sk, "Glory to heros!", this_ip_dest, + DEFAULT_TEST_PREFIX, 10, 11)) + test_error("test_add_key()"); + if (test_add_key(sk, "Glory to Ukraine!", this_ip_dest, + DEFAULT_TEST_PREFIX, 12, 13)) + test_error("test_add_key()"); + try_delete_key("listen socket, delete a key + set current/rnext", sk, + 100, 100, 0, 10, 13, FAULT_CURRNEXT); + try_delete_key("listen socket, force-delete current key", sk, + 10, 11, 0, 200, -1, FAULT_CURRNEXT); + try_delete_key("listen socket, force-delete rnext key", sk, + 12, 13, 0, -1, 200, FAULT_CURRNEXT); + try_delete_key("listen socket, delete a key", sk, + 200, 200, 0, -1, -1, 0); + close(sk); + + sk = prepare_lsk(&this_ip_dest, 200, 200); + try_add_current_rnext_key("listen socket, add + change current key", + sk, "Laaaa! Lalala-la-la-lalala...", 0, + this_ip_dest, DEFAULT_TEST_PREFIX, + true, false, 10, 20, FAULT_CURRNEXT); + try_add_current_rnext_key("listen socket, add + change rnext key", + sk, "Laaaa! Lalala-la-la-lalala...", 0, + this_ip_dest, DEFAULT_TEST_PREFIX, + false, true, 20, 10, FAULT_CURRNEXT); + close(sk); +} + +static const char *fips_fpath = "/proc/sys/crypto/fips_enabled"; +static bool is_fips_enabled(void) +{ + static int fips_checked = -1; + FILE *fenabled; + int enabled; + + if (fips_checked >= 0) + return !!fips_checked; + if (access(fips_fpath, R_OK)) { + if (errno != ENOENT) + test_error("Can't open %s", fips_fpath); + fips_checked = 0; + return false; + } + fenabled = fopen(fips_fpath, "r"); + if (!fenabled) + test_error("Can't open %s", fips_fpath); + if (fscanf(fenabled, "%d", &enabled) != 1) + test_error("Can't read from %s", fips_fpath); + fclose(fenabled); + fips_checked = !!enabled; + return !!fips_checked; +} + +struct test_key { + char password[TCP_AO_MAXKEYLEN]; + const char *alg; + unsigned int len; + uint8_t client_keyid; + uint8_t server_keyid; + uint8_t maclen; + uint8_t matches_client : 1, + matches_server : 1, + matches_vrf : 1, + is_current : 1, + is_rnext : 1, + used_on_handshake : 1, + used_after_accept : 1, + used_on_client : 1; +}; + +struct key_collection { + unsigned int nr_keys; + struct test_key *keys; +}; + +static struct key_collection collection; + +#define TEST_MAX_MACLEN 16 +const char *test_algos[] = { + "cmac(aes128)", + "hmac(sha1)", "hmac(sha512)", "hmac(sha384)", "hmac(sha256)", + "hmac(sha224)", "hmac(sha3-512)", + /* only if !CONFIG_FIPS */ +#define TEST_NON_FIPS_ALGOS 2 + "hmac(rmd160)", "hmac(md5)" +}; +const unsigned int test_maclens[] = { 1, 4, 12, 16 }; +#define MACLEN_SHIFT 2 +#define ALGOS_SHIFT 4 + +static unsigned int make_mask(unsigned int shift, unsigned int prev_shift) +{ + unsigned int ret = BIT(shift) - 1; + + return ret << prev_shift; +} + +static void init_key_in_collection(unsigned int index, bool randomized) +{ + struct test_key *key = &collection.keys[index]; + unsigned int algos_nr, algos_index; + + /* Same for randomized and non-randomized test flows */ + key->client_keyid = index; + key->server_keyid = 127 + index; + key->matches_client = 1; + key->matches_server = 1; + key->matches_vrf = 1; + /* not really even random, but good enough for a test */ + key->len = rand() % (TCP_AO_MAXKEYLEN - TEST_TCP_AO_MINKEYLEN); + key->len += TEST_TCP_AO_MINKEYLEN; + randomize_buffer(key->password, key->len); + + if (randomized) { + key->maclen = (rand() % TEST_MAX_MACLEN) + 1; + algos_index = rand(); + } else { + unsigned int shift = MACLEN_SHIFT; + + key->maclen = test_maclens[index & make_mask(shift, 0)]; + algos_index = index & make_mask(ALGOS_SHIFT, shift); + } + algos_nr = ARRAY_SIZE(test_algos); + if (is_fips_enabled()) + algos_nr -= TEST_NON_FIPS_ALGOS; + key->alg = test_algos[algos_index % algos_nr]; +} + +static int init_default_key_collection(unsigned int nr_keys, bool randomized) +{ + size_t key_sz = sizeof(collection.keys[0]); + + if (!nr_keys) { + free(collection.keys); + collection.keys = NULL; + return 0; + } + + /* + * All keys have uniq sndid/rcvid and sndid != rcvid in order to + * check for any bugs/issues for different keyids, visible to both + * peers. Keyid == 254 is unused. + */ + if (nr_keys > 127) + test_error("Test requires too many keys, correct the source"); + + collection.keys = reallocarray(collection.keys, nr_keys, key_sz); + if (!collection.keys) + return -ENOMEM; + + memset(collection.keys, 0, nr_keys * key_sz); + collection.nr_keys = nr_keys; + while (nr_keys--) + init_key_in_collection(nr_keys, randomized); + + return 0; +} + +static void test_key_error(const char *msg, struct test_key *key) +{ + test_error("%s: key: { %s, %u:%u, %u, %u:%u:%u:%u:%u (%u)}", + msg, key->alg, key->client_keyid, key->server_keyid, + key->maclen, key->matches_client, key->matches_server, + key->matches_vrf, key->is_current, key->is_rnext, key->len); +} + +static int test_add_key_cr(int sk, const char *pwd, unsigned int pwd_len, + union tcp_addr addr, uint8_t vrf, + uint8_t sndid, uint8_t rcvid, + uint8_t maclen, const char *alg, + bool set_current, bool set_rnext) +{ + struct tcp_ao_add tmp = {}; + uint8_t keyflags = 0; + int err; + + if (!alg) + alg = DEFAULT_TEST_ALGO; + + if (vrf) + keyflags |= TCP_AO_KEYF_IFINDEX; + err = test_prepare_key(&tmp, alg, addr, set_current, set_rnext, + DEFAULT_TEST_PREFIX, vrf, sndid, rcvid, maclen, + keyflags, pwd_len, pwd); + if (err) + return err; + + err = setsockopt(sk, IPPROTO_TCP, TCP_AO_ADD_KEY, &tmp, sizeof(tmp)); + if (err < 0) + return -errno; + + return test_verify_socket_key(sk, &tmp); +} + +static void verify_current_rnext(const char *tst, int sk, + int current_keyid, int rnext_keyid) +{ + struct tcp_ao_info_opt ao_info = {}; + + if (test_get_ao_info(sk, &ao_info)) + test_error("getsockopt(TCP_AO_INFO) failed"); + + errno = 0; + if (current_keyid >= 0) { + if (!ao_info.set_current) + test_fail("%s: the socket doesn't have current key", tst); + else if (ao_info.current_key != current_keyid) + test_fail("%s: current key is not the expected one %d != %u", + tst, current_keyid, ao_info.current_key); + else + test_ok("%s: current key %u as expected", + tst, ao_info.current_key); + } + if (rnext_keyid >= 0) { + if (!ao_info.set_rnext) + test_fail("%s: the socket doesn't have rnext key", tst); + else if (ao_info.rnext != rnext_keyid) + test_fail("%s: rnext key is not the expected one %d != %u", + tst, rnext_keyid, ao_info.rnext); + else + test_ok("%s: rnext key %u as expected", tst, ao_info.rnext); + } +} + + +static int key_collection_socket(bool server, unsigned int port) +{ + unsigned int i; + int sk; + + if (server) + sk = test_listen_socket(this_ip_addr, port, 1); + else + sk = socket(test_family, SOCK_STREAM, IPPROTO_TCP); + if (sk < 0) + test_error("socket()"); + + for (i = 0; i < collection.nr_keys; i++) { + struct test_key *key = &collection.keys[i]; + union tcp_addr *addr = &wrong_addr; + uint8_t sndid, rcvid, vrf; + bool set_current = false, set_rnext = false; + + if (key->matches_vrf) + vrf = 0; + else + vrf = test_vrf_ifindex; + if (server) { + if (key->matches_client) + addr = &this_ip_dest; + sndid = key->server_keyid; + rcvid = key->client_keyid; + } else { + if (key->matches_server) + addr = &this_ip_dest; + sndid = key->client_keyid; + rcvid = key->server_keyid; + set_current = key->is_current; + set_rnext = key->is_rnext; + } + + if (test_add_key_cr(sk, key->password, key->len, + *addr, vrf, sndid, rcvid, key->maclen, + key->alg, set_current, set_rnext)) + test_key_error("setsockopt(TCP_AO_ADD_KEY)", key); + if (set_current || set_rnext) + key->used_on_handshake = 1; +#ifdef DEBUG + test_print("%s [%u/%u] key: { %s, %u:%u, %u, %u:%u:%u:%u (%u)}", + server ? "server" : "client", i, collection.nr_keys, + key->alg, rcvid, sndid, key->maclen, + key->matches_client, key->matches_server, + key->is_current, key->is_rnext, key->len); +#endif + } + return sk; +} + +static void verify_counters(const char *tst_name, bool is_listen_sk, bool server, + struct tcp_ao_counters *a, struct tcp_ao_counters *b) +{ + unsigned int i; + + __test_tcp_ao_counters_cmp(tst_name, a, b, TEST_CNT_GOOD); + + for (i = 0; i < collection.nr_keys; i++) { + struct test_key *key = &collection.keys[i]; + uint8_t sndid, rcvid; + bool was_used; + + if (server) { + sndid = key->server_keyid; + rcvid = key->client_keyid; + if (is_listen_sk) + was_used = key->used_on_handshake; + else + was_used = key->used_after_accept; + } else { + sndid = key->client_keyid; + rcvid = key->server_keyid; + was_used = key->used_on_client; + } + + test_tcp_ao_key_counters_cmp(tst_name, a, b, was_used, + sndid, rcvid); + } + test_tcp_ao_counters_free(a); + test_tcp_ao_counters_free(b); + test_ok("%s: passed counters checks", tst_name); +} + +static struct tcp_ao_getsockopt *lookup_key(struct tcp_ao_getsockopt *buf, + size_t len, int sndid, int rcvid) +{ + size_t i; + + for (i = 0; i < len; i++) { + if (sndid >= 0 && buf[i].sndid != sndid) + continue; + if (rcvid >= 0 && buf[i].rcvid != rcvid) + continue; + return &buf[i]; + } + return NULL; +} + +static void verify_keys(const char *tst_name, int sk, + bool is_listen_sk, bool server) +{ + socklen_t len = sizeof(struct tcp_ao_getsockopt); + struct tcp_ao_getsockopt *keys; + bool passed_test = true; + unsigned int i; + + keys = calloc(collection.nr_keys, len); + if (!keys) + test_error("calloc()"); + + keys->nkeys = collection.nr_keys; + keys->get_all = 1; + + if (getsockopt(sk, IPPROTO_TCP, TCP_AO_GET_KEYS, keys, &len)) { + free(keys); + test_error("getsockopt(TCP_AO_GET_KEYS)"); + } + + for (i = 0; i < collection.nr_keys; i++) { + struct test_key *key = &collection.keys[i]; + struct tcp_ao_getsockopt *dump_key; + bool is_kdf_aes_128_cmac = false; + bool is_cmac_aes = false; + uint8_t sndid, rcvid; + bool matches = false; + + if (server) { + if (key->matches_client) + matches = true; + sndid = key->server_keyid; + rcvid = key->client_keyid; + } else { + if (key->matches_server) + matches = true; + sndid = key->client_keyid; + rcvid = key->server_keyid; + } + if (!key->matches_vrf) + matches = false; + /* no keys get removed on the original listener socket */ + if (is_listen_sk) + matches = true; + + dump_key = lookup_key(keys, keys->nkeys, sndid, rcvid); + if (matches != !!dump_key) { + test_fail("%s: key %u:%u %s%s on the socket", + tst_name, sndid, rcvid, + key->matches_vrf ? "" : "[vrf] ", + matches ? "disappeared" : "yet present"); + passed_test = false; + goto out; + } + if (!dump_key) + continue; + + if (!strcmp("cmac(aes128)", key->alg)) { + is_kdf_aes_128_cmac = (key->len != 16); + is_cmac_aes = true; + } + + if (is_cmac_aes) { + if (strcmp(dump_key->alg_name, "cmac(aes)")) { + test_fail("%s: key %u:%u cmac(aes) has unexpected alg %s", + tst_name, sndid, rcvid, + dump_key->alg_name); + passed_test = false; + continue; + } + } else if (strcmp(dump_key->alg_name, key->alg)) { + test_fail("%s: key %u:%u has unexpected alg %s != %s", + tst_name, sndid, rcvid, + dump_key->alg_name, key->alg); + passed_test = false; + continue; + } + if (is_kdf_aes_128_cmac) { + if (dump_key->keylen != 16) { + test_fail("%s: key %u:%u cmac(aes128) has unexpected len %u", + tst_name, sndid, rcvid, + dump_key->keylen); + continue; + } + } else if (dump_key->keylen != key->len) { + test_fail("%s: key %u:%u changed password len %u != %u", + tst_name, sndid, rcvid, + dump_key->keylen, key->len); + passed_test = false; + continue; + } + if (!is_kdf_aes_128_cmac && + memcmp(dump_key->key, key->password, key->len)) { + test_fail("%s: key %u:%u has different password", + tst_name, sndid, rcvid); + passed_test = false; + continue; + } + if (dump_key->maclen != key->maclen) { + test_fail("%s: key %u:%u changed maclen %u != %u", + tst_name, sndid, rcvid, + dump_key->maclen, key->maclen); + passed_test = false; + continue; + } + } + + if (passed_test) + test_ok("%s: The socket keys are consistent with the expectations", + tst_name); +out: + free(keys); +} + +static int start_server(const char *tst_name, unsigned int port, size_t quota, + struct tcp_ao_counters *begin, + unsigned int current_index, unsigned int rnext_index) +{ + struct tcp_ao_counters lsk_c1, lsk_c2; + ssize_t bytes; + int sk, lsk; + + synchronize_threads(); /* 1: key collection initialized */ + lsk = key_collection_socket(true, port); + if (test_get_tcp_ao_counters(lsk, &lsk_c1)) + test_error("test_get_tcp_ao_counters()"); + synchronize_threads(); /* 2: MKTs added => connect() */ + if (test_wait_fd(lsk, TEST_TIMEOUT_SEC, 0)) + test_error("test_wait_fd()"); + + sk = accept(lsk, NULL, NULL); + if (sk < 0) + test_error("accept()"); + if (test_get_tcp_ao_counters(sk, begin)) + test_error("test_get_tcp_ao_counters()"); + + synchronize_threads(); /* 3: accepted => send data */ + if (test_get_tcp_ao_counters(lsk, &lsk_c2)) + test_error("test_get_tcp_ao_counters()"); + verify_keys(tst_name, lsk, true, true); + close(lsk); + + bytes = test_server_run(sk, quota, TEST_TIMEOUT_SEC); + if (bytes != quota) + test_fail("%s: server served: %zd", tst_name, bytes); + else + test_ok("%s: server alive", tst_name); + + verify_counters(tst_name, true, true, &lsk_c1, &lsk_c2); + + return sk; +} + +static void end_server(const char *tst_name, int sk, + struct tcp_ao_counters *begin) +{ + struct tcp_ao_counters end; + + if (test_get_tcp_ao_counters(sk, &end)) + test_error("test_get_tcp_ao_counters()"); + verify_keys(tst_name, sk, false, true); + + synchronize_threads(); /* 4: verified => closed */ + close(sk); + + verify_counters(tst_name, true, false, begin, &end); + synchronize_threads(); /* 5: counters */ +} + +static void try_server_run(const char *tst_name, unsigned int port, size_t quota, + unsigned int current_index, unsigned int rnext_index) +{ + struct tcp_ao_counters tmp; + int sk; + + sk = start_server(tst_name, port, quota, &tmp, + current_index, rnext_index); + end_server(tst_name, sk, &tmp); +} + +static void server_rotations(const char *tst_name, unsigned int port, + size_t quota, unsigned int rotations, + unsigned int current_index, unsigned int rnext_index) +{ + struct tcp_ao_counters tmp; + unsigned int i; + int sk; + + sk = start_server(tst_name, port, quota, &tmp, + current_index, rnext_index); + + for (i = current_index + 1; rotations > 0; i++, rotations--) { + ssize_t bytes; + + if (i >= collection.nr_keys) + i = 0; + bytes = test_server_run(sk, quota, TEST_TIMEOUT_SEC); + if (bytes != quota) { + test_fail("%s: server served: %zd", tst_name, bytes); + return; + } + verify_current_rnext(tst_name, sk, + collection.keys[i].server_keyid, -1); + synchronize_threads(); /* verify current/rnext */ + } + end_server(tst_name, sk, &tmp); +} + +static int run_client(const char *tst_name, unsigned int port, + unsigned int nr_keys, int current_index, int rnext_index, + struct tcp_ao_counters *before, + const size_t msg_sz, const size_t msg_nr) +{ + int sk; + + synchronize_threads(); /* 1: key collection initialized */ + sk = key_collection_socket(false, port); + + if (current_index >= 0 || rnext_index >= 0) { + int sndid = -1, rcvid = -1; + + if (current_index >= 0) + sndid = collection.keys[current_index].client_keyid; + if (rnext_index >= 0) + rcvid = collection.keys[rnext_index].server_keyid; + if (test_set_key(sk, sndid, rcvid)) + test_error("failed to set current/rnext keys"); + } + if (before && test_get_tcp_ao_counters(sk, before)) + test_error("test_get_tcp_ao_counters()"); + + synchronize_threads(); /* 2: MKTs added => connect() */ + if (test_connect_socket(sk, this_ip_dest, port++) <= 0) + test_error("failed to connect()"); + if (current_index < 0) + current_index = nr_keys - 1; + if (rnext_index < 0) + rnext_index = nr_keys - 1; + collection.keys[current_index].used_on_handshake = 1; + collection.keys[rnext_index].used_after_accept = 1; + collection.keys[rnext_index].used_on_client = 1; + + synchronize_threads(); /* 3: accepted => send data */ + if (test_client_verify(sk, msg_sz, msg_nr, TEST_TIMEOUT_SEC)) { + test_fail("verify failed"); + close(sk); + if (before) + test_tcp_ao_counters_free(before); + return -1; + } + + return sk; +} + +static int start_client(const char *tst_name, unsigned int port, + unsigned int nr_keys, int current_index, int rnext_index, + struct tcp_ao_counters *before, + const size_t msg_sz, const size_t msg_nr) +{ + if (init_default_key_collection(nr_keys, true)) + test_error("Failed to init the key collection"); + + return run_client(tst_name, port, nr_keys, current_index, + rnext_index, before, msg_sz, msg_nr); +} + +static void end_client(const char *tst_name, int sk, unsigned int nr_keys, + int current_index, int rnext_index, + struct tcp_ao_counters *start) +{ + struct tcp_ao_counters end; + + /* Some application may become dependent on this kernel choice */ + if (current_index < 0) + current_index = nr_keys - 1; + if (rnext_index < 0) + rnext_index = nr_keys - 1; + verify_current_rnext(tst_name, sk, + collection.keys[current_index].client_keyid, + collection.keys[rnext_index].server_keyid); + if (start && test_get_tcp_ao_counters(sk, &end)) + test_error("test_get_tcp_ao_counters()"); + verify_keys(tst_name, sk, false, false); + synchronize_threads(); /* 4: verify => closed */ + close(sk); + if (start) + verify_counters(tst_name, false, false, start, &end); + synchronize_threads(); /* 5: counters */ +} + +static void try_unmatched_keys(int sk, int *rnext_index) +{ + struct test_key *key; + unsigned int i = 0; + int err; + + do { + key = &collection.keys[i]; + if (!key->matches_server) + break; + } while (++i < collection.nr_keys); + if (key->matches_server) + test_error("all keys on client match the server"); + + err = test_add_key_cr(sk, key->password, key->len, wrong_addr, + 0, key->client_keyid, key->server_keyid, + key->maclen, key->alg, 0, 0); + if (!err) { + test_fail("Added a key with non-matching ip-address for established sk"); + return; + } + if (err == -EINVAL) + test_ok("Can't add a key with non-matching ip-address for established sk"); + else + test_error("Failed to add a key"); + + err = test_add_key_cr(sk, key->password, key->len, this_ip_dest, + test_vrf_ifindex, + key->client_keyid, key->server_keyid, + key->maclen, key->alg, 0, 0); + if (!err) { + test_fail("Added a key with non-matching VRF for established sk"); + return; + } + if (err == -EINVAL) + test_ok("Can't add a key with non-matching VRF for established sk"); + else + test_error("Failed to add a key"); + + for (i = 0; i < collection.nr_keys; i++) { + key = &collection.keys[i]; + if (!key->matches_client) + break; + } + if (key->matches_client) + test_error("all keys on server match the client"); + if (test_set_key(sk, -1, key->server_keyid)) + test_error("Can't change the current key"); + if (test_client_verify(sk, msg_len, nr_packets, TEST_TIMEOUT_SEC)) + test_fail("verify failed"); + *rnext_index = i; +} + +static int client_non_matching(const char *tst_name, unsigned int port, + unsigned int nr_keys, + int current_index, int rnext_index, + const size_t msg_sz, const size_t msg_nr) +{ + unsigned int i; + + if (init_default_key_collection(nr_keys, true)) + test_error("Failed to init the key collection"); + + for (i = 0; i < nr_keys; i++) { + /* key (0, 0) matches */ + collection.keys[i].matches_client = !!((i + 3) % 4); + collection.keys[i].matches_server = !!((i + 2) % 4); + if (kernel_config_has(KCONFIG_NET_VRF)) + collection.keys[i].matches_vrf = !!((i + 1) % 4); + } + + return run_client(tst_name, port, nr_keys, current_index, + rnext_index, NULL, msg_sz, msg_nr); +} + +static void check_current_back(const char *tst_name, unsigned int port, + unsigned int nr_keys, + unsigned int current_index, unsigned int rnext_index, + unsigned int rotate_to_index) +{ + struct tcp_ao_counters tmp; + int sk; + + sk = start_client(tst_name, port, nr_keys, current_index, rnext_index, + &tmp, msg_len, nr_packets); + if (sk < 0) + return; + if (test_set_key(sk, collection.keys[rotate_to_index].client_keyid, -1)) + test_error("Can't change the current key"); + if (test_client_verify(sk, msg_len, nr_packets, TEST_TIMEOUT_SEC)) + test_fail("verify failed"); + collection.keys[rotate_to_index].used_after_accept = 1; + + end_client(tst_name, sk, nr_keys, current_index, rnext_index, &tmp); +} + +static void roll_over_keys(const char *tst_name, unsigned int port, + unsigned int nr_keys, unsigned int rotations, + unsigned int current_index, unsigned int rnext_index) +{ + struct tcp_ao_counters tmp; + unsigned int i; + int sk; + + sk = start_client(tst_name, port, nr_keys, current_index, rnext_index, + &tmp, msg_len, nr_packets); + if (sk < 0) + return; + for (i = rnext_index + 1; rotations > 0; i++, rotations--) { + if (i >= collection.nr_keys) + i = 0; + if (test_set_key(sk, -1, collection.keys[i].server_keyid)) + test_error("Can't change the Rnext key"); + if (test_client_verify(sk, msg_len, nr_packets, TEST_TIMEOUT_SEC)) { + test_fail("verify failed"); + close(sk); + test_tcp_ao_counters_free(&tmp); + return; + } + verify_current_rnext(tst_name, sk, -1, + collection.keys[i].server_keyid); + collection.keys[i].used_on_client = 1; + synchronize_threads(); /* verify current/rnext */ + } + end_client(tst_name, sk, nr_keys, current_index, rnext_index, &tmp); +} + +static void try_client_run(const char *tst_name, unsigned int port, + unsigned int nr_keys, int current_index, int rnext_index) +{ + struct tcp_ao_counters tmp; + int sk; + + sk = start_client(tst_name, port, nr_keys, current_index, rnext_index, + &tmp, msg_len, nr_packets); + if (sk < 0) + return; + end_client(tst_name, sk, nr_keys, current_index, rnext_index, &tmp); +} + +static void try_client_match(const char *tst_name, unsigned int port, + unsigned int nr_keys, + int current_index, int rnext_index) +{ + int sk; + + sk = client_non_matching(tst_name, port, nr_keys, current_index, + rnext_index, msg_len, nr_packets); + if (sk < 0) + return; + try_unmatched_keys(sk, &rnext_index); + end_client(tst_name, sk, nr_keys, current_index, rnext_index, NULL); +} + +static void *server_fn(void *arg) +{ + unsigned int port = test_server_port; + + setup_vrfs(); + try_server_run("server: Check current/rnext keys unset before connect()", + port++, quota, 19, 19); + try_server_run("server: Check current/rnext keys set before connect()", + port++, quota, 10, 10); + try_server_run("server: Check current != rnext keys set before connect()", + port++, quota, 5, 10); + try_server_run("server: Check current flapping back on peer's RnextKey request", + port++, quota * 2, 5, 10); + server_rotations("server: Rotate over all different keys", port++, + quota, 20, 0, 0); + try_server_run("server: Check accept() => established key matching", + port++, quota * 2, 0, 0); + + synchronize_threads(); /* don't race to exit: client exits */ + return NULL; +} + +static void check_established_socket(void) +{ + unsigned int port = test_server_port; + + setup_vrfs(); + try_client_run("client: Check current/rnext keys unset before connect()", + port++, 20, -1, -1); + try_client_run("client: Check current/rnext keys set before connect()", + port++, 20, 10, 10); + try_client_run("client: Check current != rnext keys set before connect()", + port++, 20, 10, 5); + check_current_back("client: Check current flapping back on peer's RnextKey request", + port++, 20, 10, 5, 2); + roll_over_keys("client: Rotate over all different keys", port++, + 20, 20, 0, 0); + try_client_match("client: Check connect() => established key matching", + port++, 20, 0, 0); +} + +static void *client_fn(void *arg) +{ + if (inet_pton(TEST_FAMILY, TEST_WRONG_IP, &wrong_addr) != 1) + test_error("Can't convert ip address %s", TEST_WRONG_IP); + check_closed_socket(); + check_listen_socket(); + check_established_socket(); + return NULL; +} + +int main(int argc, char *argv[]) +{ + test_init(120, server_fn, client_fn); + return 0; +} -- cgit v1.2.3 From c8f021eec5817601dbd25ab7e3ad5c720965c688 Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Fri, 15 Dec 2023 17:04:24 +0100 Subject: selftests: mptcp: join: fix subflow_send_ack lookup MPC backups tests will skip unexpected sometimes (For example, when compiling kernel with an older version of gcc, such as gcc-8), since static functions like mptcp_subflow_send_ack also be listed in /proc/kallsyms, with a 't' in front of it, not 'T' ('T' is for a global function): > grep "mptcp_subflow_send_ack" /proc/kallsyms 0000000000000000 T __pfx___mptcp_subflow_send_ack 0000000000000000 T __mptcp_subflow_send_ack 0000000000000000 t __pfx_mptcp_subflow_send_ack 0000000000000000 t mptcp_subflow_send_ack In this case, mptcp_lib_kallsyms_doesnt_have "mptcp_subflow_send_ack$" will be false, MPC backups tests will skip. This is not what we expected. The correct logic here should be: if mptcp_subflow_send_ack is not a global function in /proc/kallsyms, do these MPC backups tests. So a 'T' must be added in front of mptcp_subflow_send_ack. Fixes: 632978f0a961 ("selftests: mptcp: join: skip MPC backups tests if not supported") Cc: stable@vger.kernel.org Signed-off-by: Geliang Tang Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts Signed-off-by: David S. Miller --- tools/testing/selftests/net/mptcp/mptcp_join.sh | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index 3c94f2f194d6..24a57b3ae215 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -2776,7 +2776,7 @@ backup_tests() fi if reset "mpc backup" && - continue_if mptcp_lib_kallsyms_doesnt_have "mptcp_subflow_send_ack$"; then + continue_if mptcp_lib_kallsyms_doesnt_have "T mptcp_subflow_send_ack$"; then pm_nl_add_endpoint $ns2 10.0.1.2 flags subflow,backup speed=slow \ run_tests $ns1 $ns2 10.0.1.1 @@ -2785,7 +2785,7 @@ backup_tests() fi if reset "mpc backup both sides" && - continue_if mptcp_lib_kallsyms_doesnt_have "mptcp_subflow_send_ack$"; then + continue_if mptcp_lib_kallsyms_doesnt_have "T mptcp_subflow_send_ack$"; then pm_nl_add_endpoint $ns1 10.0.1.1 flags subflow,backup pm_nl_add_endpoint $ns2 10.0.1.2 flags subflow,backup speed=slow \ @@ -2795,7 +2795,7 @@ backup_tests() fi if reset "mpc switch to backup" && - continue_if mptcp_lib_kallsyms_doesnt_have "mptcp_subflow_send_ack$"; then + continue_if mptcp_lib_kallsyms_doesnt_have "T mptcp_subflow_send_ack$"; then pm_nl_add_endpoint $ns2 10.0.1.2 flags subflow sflags=backup speed=slow \ run_tests $ns1 $ns2 10.0.1.1 @@ -2804,7 +2804,7 @@ backup_tests() fi if reset "mpc switch to backup both sides" && - continue_if mptcp_lib_kallsyms_doesnt_have "mptcp_subflow_send_ack$"; then + continue_if mptcp_lib_kallsyms_doesnt_have "T mptcp_subflow_send_ack$"; then pm_nl_add_endpoint $ns1 10.0.1.1 flags subflow pm_nl_add_endpoint $ns2 10.0.1.2 flags subflow sflags=backup speed=slow \ -- cgit v1.2.3 From 13d605e32e4cfdedcecdf3d98d21710ffe887708 Mon Sep 17 00:00:00 2001 From: Ghanshyam Agrawal Date: Sun, 17 Dec 2023 13:30:19 +0530 Subject: kselftest: alsa: fixed a print formatting warning A statement used %d print formatter where %s should have been used. The same has been fixed in this commit. Signed-off-by: Ghanshyam Agrawal Link: 5aaf9efffc57 ("kselftest: alsa: Add simplistic test for ALSA mixer controls kselftest") Link: https://lore.kernel.org/r/20231217080019.1063476-1-ghanshyam1898@gmail.com Signed-off-by: Takashi Iwai --- tools/testing/selftests/alsa/mixer-test.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/alsa/mixer-test.c b/tools/testing/selftests/alsa/mixer-test.c index 21e482b23f50..23df154fcdd7 100644 --- a/tools/testing/selftests/alsa/mixer-test.c +++ b/tools/testing/selftests/alsa/mixer-test.c @@ -138,7 +138,7 @@ static void find_controls(void) err = snd_ctl_elem_info(card_data->handle, ctl_data->info); if (err < 0) { - ksft_print_msg("%s getting info for %d\n", + ksft_print_msg("%s getting info for %s\n", snd_strerror(err), ctl_data->name); } -- cgit v1.2.3 From 0c970ed2f87c058fe3ddeb4d7d8f64f72cf41d7a Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Fri, 15 Dec 2023 16:45:49 -0800 Subject: s390/bpf: Fix indirect trampoline generation The func_addr used to be NULL for indirect trampolines used by struct_ops. Now func_addr is a valid function pointer. Hence use BPF_TRAMP_F_INDIRECT flag to detect such condition. Fixes: 2cd3e3772e41 ("x86/cfi,bpf: Fix bpf_struct_ops CFI") Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann Reviewed-by: Ilya Leoshkevich Link: https://lore.kernel.org/bpf/20231216004549.78355-1-alexei.starovoitov@gmail.com --- arch/s390/net/bpf_jit_comp.c | 3 ++- tools/testing/selftests/bpf/DENYLIST.s390x | 2 -- 2 files changed, 2 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c index cc129617480a..7f0a7b97ef4c 100644 --- a/arch/s390/net/bpf_jit_comp.c +++ b/arch/s390/net/bpf_jit_comp.c @@ -2362,7 +2362,8 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, return -ENOTSUPP; /* Return to %r14, since func_addr and %r0 are not available. */ - if (!func_addr && !(flags & BPF_TRAMP_F_ORIG_STACK)) + if ((!func_addr && !(flags & BPF_TRAMP_F_ORIG_STACK)) || + (flags & BPF_TRAMP_F_INDIRECT)) flags |= BPF_TRAMP_F_SKIP_FRAME; /* diff --git a/tools/testing/selftests/bpf/DENYLIST.s390x b/tools/testing/selftests/bpf/DENYLIST.s390x index d27aa42d11a4..1a63996c0304 100644 --- a/tools/testing/selftests/bpf/DENYLIST.s390x +++ b/tools/testing/selftests/bpf/DENYLIST.s390x @@ -1,7 +1,5 @@ # TEMPORARY # Alphabetical order -dummy_st_ops/dummy_init_ret_value -dummy_st_ops/dummy_init_ptr_arg exceptions # JIT does not support calling kfunc bpf_throw (exceptions) get_stack_raw_tp # user_stack corrupted user stack (no backchain userspace) stacktrace_build_id # compare_map_keys stackid_hmap vs. stackmap err -2 errno 2 (?) -- cgit v1.2.3 From 1e73427f66353b7fe21c138787ff2b711ca1c0dd Mon Sep 17 00:00:00 2001 From: Fabrice Gasnier Date: Wed, 13 Dec 2023 18:31:16 +0100 Subject: tools/counter: add a flexible watch events tool This adds a new counter tool to be able to test various watch events. A flexible watch array can be populated from command line, each field may be tuned with a dedicated command line sub-option in "--watch" string. Several watch events can be defined, each can have specific watch options, by using "--watch --watch ". Watch options is a comma separated list. It also comes with a simple default watch (to monitor overflow/underflow events), used when no watch parameters are provided. It's equivalent to: counter_watch_events -w comp_count,scope_count,evt_ovf_udf The print_usage() routine proposes another example, from the command line, which generates a 2 elements watch array, to monitor: - overflow underflow events - capture events, on channel 3, that reads read captured data by specifying the component id (capture3_component_id being 7 here). Signed-off-by: Fabrice Gasnier Link: https://lore.kernel.org/r/20231213173117.4174511-2-fabrice.gasnier@foss.st.com Signed-off-by: William Breathitt Gray --- tools/counter/Build | 1 + tools/counter/Makefile | 12 +- tools/counter/counter_watch_events.c | 406 +++++++++++++++++++++++++++++++++++ 3 files changed, 417 insertions(+), 2 deletions(-) create mode 100644 tools/counter/counter_watch_events.c (limited to 'tools') diff --git a/tools/counter/Build b/tools/counter/Build index 33f4a51d715e..4bbadb7ec93a 100644 --- a/tools/counter/Build +++ b/tools/counter/Build @@ -1 +1,2 @@ counter_example-y += counter_example.o +counter_watch_events-y += counter_watch_events.o diff --git a/tools/counter/Makefile b/tools/counter/Makefile index b2c2946f44c9..d82d35a520f6 100644 --- a/tools/counter/Makefile +++ b/tools/counter/Makefile @@ -12,9 +12,10 @@ endif # (this improves performance and avoids hard-to-debug behaviour); MAKEFLAGS += -r -override CFLAGS += -O2 -Wall -g -D_GNU_SOURCE -I$(OUTPUT)include +override CFLAGS += -O2 -Wall -g -D_GNU_SOURCE -I$(OUTPUT)include \ + -I$(srctree)/tools/include -ALL_TARGETS := counter_example +ALL_TARGETS := counter_example counter_watch_events ALL_PROGRAMS := $(patsubst %,$(OUTPUT)%,$(ALL_TARGETS)) all: $(ALL_PROGRAMS) @@ -37,12 +38,19 @@ $(COUNTER_EXAMPLE): prepare FORCE $(OUTPUT)counter_example: $(COUNTER_EXAMPLE) $(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) $< -o $@ +COUNTER_WATCH_EVENTS := $(OUTPUT)counter_watch_events.o +$(COUNTER_WATCH_EVENTS): prepare FORCE + $(Q)$(MAKE) $(build)=counter_watch_events +$(OUTPUT)counter_watch_events: $(COUNTER_WATCH_EVENTS) + $(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) $< -o $@ + clean: rm -f $(ALL_PROGRAMS) rm -rf $(OUTPUT)include/linux/counter.h rm -df $(OUTPUT)include/linux rm -df $(OUTPUT)include find $(or $(OUTPUT),.) -name '*.o' -delete -o -name '\.*.d' -delete + find $(or $(OUTPUT),.) -name '\.*.o.cmd' -delete install: $(ALL_PROGRAMS) install -d -m 755 $(DESTDIR)$(bindir); \ diff --git a/tools/counter/counter_watch_events.c b/tools/counter/counter_watch_events.c new file mode 100644 index 000000000000..3898fe7e35ec --- /dev/null +++ b/tools/counter/counter_watch_events.c @@ -0,0 +1,406 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Counter Watch Events - Test various counter watch events in a userspace application + * + * Copyright (C) STMicroelectronics 2023 - All Rights Reserved + * Author: Fabrice Gasnier . + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static struct counter_watch simple_watch[] = { + { + /* Component data: Count 0 count */ + .component.type = COUNTER_COMPONENT_COUNT, + .component.scope = COUNTER_SCOPE_COUNT, + .component.parent = 0, + /* Event type: overflow or underflow */ + .event = COUNTER_EVENT_OVERFLOW_UNDERFLOW, + /* Device event channel 0 */ + .channel = 0, + }, +}; + +static const char * const counter_event_type_name[] = { + "COUNTER_EVENT_OVERFLOW", + "COUNTER_EVENT_UNDERFLOW", + "COUNTER_EVENT_OVERFLOW_UNDERFLOW", + "COUNTER_EVENT_THRESHOLD", + "COUNTER_EVENT_INDEX", + "COUNTER_EVENT_CHANGE_OF_STATE", + "COUNTER_EVENT_CAPTURE", +}; + +static const char * const counter_component_type_name[] = { + "COUNTER_COMPONENT_NONE", + "COUNTER_COMPONENT_SIGNAL", + "COUNTER_COMPONENT_COUNT", + "COUNTER_COMPONENT_FUNCTION", + "COUNTER_COMPONENT_SYNAPSE_ACTION", + "COUNTER_COMPONENT_EXTENSION", +}; + +static const char * const counter_scope_name[] = { + "COUNTER_SCOPE_DEVICE", + "COUNTER_SCOPE_SIGNAL", + "COUNTER_SCOPE_COUNT", +}; + +static void print_watch(struct counter_watch *watch, int nwatch) +{ + int i; + + /* prints the watch array in C-like structure */ + printf("watch[%d] = {\n", nwatch); + for (i = 0; i < nwatch; i++) { + printf(" [%d] =\t{\n" + "\t\t.component.type = %s\n" + "\t\t.component.scope = %s\n" + "\t\t.component.parent = %d\n" + "\t\t.component.id = %d\n" + "\t\t.event = %s\n" + "\t\t.channel = %d\n" + "\t},\n", + i, + counter_component_type_name[watch[i].component.type], + counter_scope_name[watch[i].component.scope], + watch[i].component.parent, + watch[i].component.id, + counter_event_type_name[watch[i].event], + watch[i].channel); + } + printf("};\n"); +} + +static void print_usage(void) +{ + fprintf(stderr, "Usage:\n\n" + "counter_watch_events [options] [-w ]\n" + "counter_watch_events [options] [-w ] [-w ]...\n" + "\n" + "When no --watch option has been provided, simple watch example is used:\n" + "counter_watch_events [options] -w comp_count,scope_count,evt_ovf_udf\n" + "\n" + "Test various watch events for given counter device.\n" + "\n" + "Options:\n" + " -d, --debug Prints debug information\n" + " -h, --help Prints usage\n" + " -n, --device-num Use /dev/counter [default: /dev/counter0]\n" + " -l, --loop Loop for events [default: 0 (forever)]\n" + " -w, --watch comma-separated list of watch options\n" + "\n" + "Watch options:\n" + " scope_device (COUNTER_SCOPE_DEVICE) [default: scope_device]\n" + " scope_signal (COUNTER_SCOPE_SIGNAL)\n" + " scope_count (COUNTER_SCOPE_COUNT)\n" + "\n" + " comp_none (COUNTER_COMPONENT_NONE) [default: comp_none]\n" + " comp_signal (COUNTER_COMPONENT_SIGNAL)\n" + " comp_count (COUNTER_COMPONENT_COUNT)\n" + " comp_function (COUNTER_COMPONENT_FUNCTION)\n" + " comp_synapse_action (COUNTER_COMPONENT_SYNAPSE_ACTION)\n" + " comp_extension (COUNTER_COMPONENT_EXTENSION)\n" + "\n" + " evt_ovf (COUNTER_EVENT_OVERFLOW) [default: evt_ovf]\n" + " evt_udf (COUNTER_EVENT_UNDERFLOW)\n" + " evt_ovf_udf (COUNTER_EVENT_OVERFLOW_UNDERFLOW)\n" + " evt_threshold (COUNTER_EVENT_THRESHOLD)\n" + " evt_index (COUNTER_EVENT_INDEX)\n" + " evt_change_of_state (COUNTER_EVENT_CHANGE_OF_STATE)\n" + " evt_capture (COUNTER_EVENT_CAPTURE)\n" + "\n" + " chan= channel for this watch [default: 0]\n" + " id= componend id for this watch [default: 0]\n" + " parent= componend parent for this watch [default: 0]\n" + "\n" + "Example with two watched events:\n\n" + "counter_watch_events -d \\\n" + "\t-w comp_count,scope_count,evt_ovf_udf \\\n" + "\t-w comp_extension,scope_count,evt_capture,id=7,chan=3\n" + ); +} + +static const struct option longopts[] = { + { "debug", no_argument, 0, 'd' }, + { "help", no_argument, 0, 'h' }, + { "device-num", required_argument, 0, 'n' }, + { "loop", required_argument, 0, 'l' }, + { "watch", required_argument, 0, 'w' }, + { }, +}; + +/* counter watch subopts */ +enum { + WATCH_SCOPE_DEVICE, + WATCH_SCOPE_SIGNAL, + WATCH_SCOPE_COUNT, + WATCH_COMPONENT_NONE, + WATCH_COMPONENT_SIGNAL, + WATCH_COMPONENT_COUNT, + WATCH_COMPONENT_FUNCTION, + WATCH_COMPONENT_SYNAPSE_ACTION, + WATCH_COMPONENT_EXTENSION, + WATCH_EVENT_OVERFLOW, + WATCH_EVENT_UNDERFLOW, + WATCH_EVENT_OVERFLOW_UNDERFLOW, + WATCH_EVENT_THRESHOLD, + WATCH_EVENT_INDEX, + WATCH_EVENT_CHANGE_OF_STATE, + WATCH_EVENT_CAPTURE, + WATCH_CHANNEL, + WATCH_ID, + WATCH_PARENT, + WATCH_SUBOPTS_MAX, +}; + +static char * const counter_watch_subopts[WATCH_SUBOPTS_MAX + 1] = { + /* component.scope */ + [WATCH_SCOPE_DEVICE] = "scope_device", + [WATCH_SCOPE_SIGNAL] = "scope_signal", + [WATCH_SCOPE_COUNT] = "scope_count", + /* component.type */ + [WATCH_COMPONENT_NONE] = "comp_none", + [WATCH_COMPONENT_SIGNAL] = "comp_signal", + [WATCH_COMPONENT_COUNT] = "comp_count", + [WATCH_COMPONENT_FUNCTION] = "comp_function", + [WATCH_COMPONENT_SYNAPSE_ACTION] = "comp_synapse_action", + [WATCH_COMPONENT_EXTENSION] = "comp_extension", + /* event */ + [WATCH_EVENT_OVERFLOW] = "evt_ovf", + [WATCH_EVENT_UNDERFLOW] = "evt_udf", + [WATCH_EVENT_OVERFLOW_UNDERFLOW] = "evt_ovf_udf", + [WATCH_EVENT_THRESHOLD] = "evt_threshold", + [WATCH_EVENT_INDEX] = "evt_index", + [WATCH_EVENT_CHANGE_OF_STATE] = "evt_change_of_state", + [WATCH_EVENT_CAPTURE] = "evt_capture", + /* channel, id, parent */ + [WATCH_CHANNEL] = "chan", + [WATCH_ID] = "id", + [WATCH_PARENT] = "parent", + /* Empty entry ends the opts array */ + NULL +}; + +int main(int argc, char **argv) +{ + int c, fd, i, ret, rc = 0, debug = 0, loop = 0, dev_num = 0, nwatch = 0; + struct counter_event event_data; + char *device_name = NULL, *subopts, *value; + struct counter_watch *watches; + + /* + * 1st pass: + * - list watch events number to allocate the watch array. + * - parse normal options (other than watch options) + */ + while ((c = getopt_long(argc, argv, "dhn:l:w:", longopts, NULL)) != -1) { + switch (c) { + case 'd': + debug = 1; + break; + case 'h': + print_usage(); + return EXIT_SUCCESS; + case 'n': + dev_num = strtoul(optarg, NULL, 10); + if (errno) { + perror("strtol failed: --device-num \n"); + return EXIT_FAILURE; + } + break; + case 'l': + loop = strtol(optarg, NULL, 10); + if (errno) { + perror("strtol failed: --loop \n"); + return EXIT_FAILURE; + } + break; + case 'w': + nwatch++; + break; + default: + return EXIT_FAILURE; + }; + }; + + if (nwatch) { + watches = calloc(nwatch, sizeof(*watches)); + if (!watches) { + perror("Error allocating watches\n"); + return EXIT_FAILURE; + } + } else { + /* default to simple watch example */ + watches = simple_watch; + nwatch = ARRAY_SIZE(simple_watch); + } + + /* 2nd pass: parse watch sub-options to fill in watch array */ + optind = 1; + i = 0; + while ((c = getopt_long(argc, argv, "dhn:l:w:", longopts, NULL)) != -1) { + switch (c) { + case 'w': + subopts = optarg; + while (*subopts != '\0') { + ret = getsubopt(&subopts, counter_watch_subopts, &value); + switch (ret) { + case WATCH_SCOPE_DEVICE: + case WATCH_SCOPE_SIGNAL: + case WATCH_SCOPE_COUNT: + /* match with counter_scope */ + watches[i].component.scope = ret; + break; + case WATCH_COMPONENT_NONE: + case WATCH_COMPONENT_SIGNAL: + case WATCH_COMPONENT_COUNT: + case WATCH_COMPONENT_FUNCTION: + case WATCH_COMPONENT_SYNAPSE_ACTION: + case WATCH_COMPONENT_EXTENSION: + /* match counter_component_type: subtract enum value */ + ret -= WATCH_COMPONENT_NONE; + watches[i].component.type = ret; + break; + case WATCH_EVENT_OVERFLOW: + case WATCH_EVENT_UNDERFLOW: + case WATCH_EVENT_OVERFLOW_UNDERFLOW: + case WATCH_EVENT_THRESHOLD: + case WATCH_EVENT_INDEX: + case WATCH_EVENT_CHANGE_OF_STATE: + case WATCH_EVENT_CAPTURE: + /* match counter_event_type: subtract enum value */ + ret -= WATCH_EVENT_OVERFLOW; + watches[i].event = ret; + break; + case WATCH_CHANNEL: + if (!value) { + fprintf(stderr, "Invalid chan=\n"); + rc = EXIT_FAILURE; + goto err_free_watches; + } + watches[i].channel = strtoul(value, NULL, 10); + if (errno) { + perror("strtoul failed: chan=\n"); + rc = EXIT_FAILURE; + goto err_free_watches; + } + break; + case WATCH_ID: + if (!value) { + fprintf(stderr, "Invalid id=\n"); + rc = EXIT_FAILURE; + goto err_free_watches; + } + watches[i].component.id = strtoul(value, NULL, 10); + if (errno) { + perror("strtoul failed: id=\n"); + rc = EXIT_FAILURE; + goto err_free_watches; + } + break; + case WATCH_PARENT: + if (!value) { + fprintf(stderr, "Invalid parent=\n"); + rc = EXIT_FAILURE; + goto err_free_watches; + } + watches[i].component.parent = strtoul(value, NULL, 10); + if (errno) { + perror("strtoul failed: parent=\n"); + rc = EXIT_FAILURE; + goto err_free_watches; + } + break; + default: + fprintf(stderr, "Unknown suboption '%s'\n", value); + rc = EXIT_FAILURE; + goto err_free_watches; + } + } + i++; + break; + } + }; + + if (debug) + print_watch(watches, nwatch); + + ret = asprintf(&device_name, "/dev/counter%d", dev_num); + if (ret < 0) { + fprintf(stderr, "asprintf failed\n"); + rc = EXIT_FAILURE; + goto err_free_watches; + } + + if (debug) + printf("Opening %s\n", device_name); + + fd = open(device_name, O_RDWR); + if (fd == -1) { + fprintf(stderr, "Unable to open %s: %s\n", device_name, strerror(errno)); + free(device_name); + rc = EXIT_FAILURE; + goto err_free_watches; + } + free(device_name); + + for (i = 0; i < nwatch; i++) { + ret = ioctl(fd, COUNTER_ADD_WATCH_IOCTL, watches + i); + if (ret == -1) { + fprintf(stderr, "Error adding watches[%d]: %s\n", i, + strerror(errno)); + rc = EXIT_FAILURE; + goto err_close; + } + } + + ret = ioctl(fd, COUNTER_ENABLE_EVENTS_IOCTL); + if (ret == -1) { + perror("Error enabling events"); + rc = EXIT_FAILURE; + goto err_close; + } + + for (i = 0; loop <= 0 || i < loop; i++) { + ret = read(fd, &event_data, sizeof(event_data)); + if (ret == -1) { + perror("Failed to read event data"); + rc = EXIT_FAILURE; + goto err_close; + } + + if (ret != sizeof(event_data)) { + fprintf(stderr, "Failed to read event data (got: %d)\n", ret); + rc = EXIT_FAILURE; + goto err_close; + } + + printf("Timestamp: %llu\tData: %llu\t event: %s\tch: %d\n", + event_data.timestamp, event_data.value, + counter_event_type_name[event_data.watch.event], + event_data.watch.channel); + + if (event_data.status) { + fprintf(stderr, "Error %d: %s\n", event_data.status, + strerror(event_data.status)); + } + } + +err_close: + close(fd); +err_free_watches: + if (watches != simple_watch) + free(watches); + + return rc; +} -- cgit v1.2.3 From e58aac1a9a179fa9dab3025ef955cdb548c439f2 Mon Sep 17 00:00:00 2001 From: Hou Tao Date: Sat, 16 Dec 2023 11:55:10 +0800 Subject: selftests/bpf: Test the release of map btf When there is bpf_list_head or bpf_rb_root field in map value, the free of map btf and the free of map value may run concurrently and there may be use-after-free problem, so add two test cases to demonstrate it. And the use-after-free problem can been easily reproduced by using bpf_next tree and a KASAN-enabled kernel. The first test case tests the racing between the free of map btf and the free of array map. It constructs the racing by releasing the array map in the end after other ref-counter of map btf has been released. To delay the free of array map and make it be invoked after btf_free_rcu() is invoked, it stresses system_unbound_wq by closing multiple percpu array maps before it closes the array map. The second case tests the racing between the free of map btf and the free of inner map. Beside using the similar method as the first one does, it uses bpf_map_delete_elem() to delete the inner map and to defer the release of inner map after one RCU grace period. The reason for using two skeletons is to prevent the release of outer map and inner map in map_in_map_btf.c interfering the release of bpf map in normal_map_btf.c. Signed-off-by: Hou Tao Signed-off-by: Daniel Borkmann Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20231216035510.4030605-1-houtao@huaweicloud.com --- tools/testing/selftests/bpf/prog_tests/map_btf.c | 98 ++++++++++++++++++++++ tools/testing/selftests/bpf/progs/map_in_map_btf.c | 73 ++++++++++++++++ tools/testing/selftests/bpf/progs/normal_map_btf.c | 56 +++++++++++++ 3 files changed, 227 insertions(+) create mode 100644 tools/testing/selftests/bpf/prog_tests/map_btf.c create mode 100644 tools/testing/selftests/bpf/progs/map_in_map_btf.c create mode 100644 tools/testing/selftests/bpf/progs/normal_map_btf.c (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/map_btf.c b/tools/testing/selftests/bpf/prog_tests/map_btf.c new file mode 100644 index 000000000000..2c4ef6037573 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/map_btf.c @@ -0,0 +1,98 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (C) 2023. Huawei Technologies Co., Ltd */ +#include + +#include "normal_map_btf.skel.h" +#include "map_in_map_btf.skel.h" + +static void do_test_normal_map_btf(void) +{ + struct normal_map_btf *skel; + int i, err, new_fd = -1; + int map_fd_arr[64]; + + skel = normal_map_btf__open_and_load(); + if (!ASSERT_OK_PTR(skel, "open_load")) + return; + + err = normal_map_btf__attach(skel); + if (!ASSERT_OK(err, "attach")) + goto out; + + skel->bss->pid = getpid(); + usleep(1); + ASSERT_TRUE(skel->bss->done, "done"); + + /* Use percpu_array to slow bpf_map_free_deferred() down. + * The memory allocation may fail, so doesn't check the returned fd. + */ + for (i = 0; i < ARRAY_SIZE(map_fd_arr); i++) + map_fd_arr[i] = bpf_map_create(BPF_MAP_TYPE_PERCPU_ARRAY, NULL, 4, 4, 256, NULL); + + /* Close array fd later */ + new_fd = dup(bpf_map__fd(skel->maps.array)); +out: + normal_map_btf__destroy(skel); + if (new_fd < 0) + return; + /* Use kern_sync_rcu() to wait for the start of the free of the bpf + * program and use an assumed delay to wait for the release of the map + * btf which is held by other maps (e.g, bss). After that, array map + * holds the last reference of map btf. + */ + kern_sync_rcu(); + usleep(4000); + /* Spawn multiple kworkers to delay the invocation of + * bpf_map_free_deferred() for array map. + */ + for (i = 0; i < ARRAY_SIZE(map_fd_arr); i++) { + if (map_fd_arr[i] < 0) + continue; + close(map_fd_arr[i]); + } + close(new_fd); +} + +static void do_test_map_in_map_btf(void) +{ + int err, zero = 0, new_fd = -1; + struct map_in_map_btf *skel; + + skel = map_in_map_btf__open_and_load(); + if (!ASSERT_OK_PTR(skel, "open_load")) + return; + + err = map_in_map_btf__attach(skel); + if (!ASSERT_OK(err, "attach")) + goto out; + + skel->bss->pid = getpid(); + usleep(1); + ASSERT_TRUE(skel->bss->done, "done"); + + /* Close inner_array fd later */ + new_fd = dup(bpf_map__fd(skel->maps.inner_array)); + /* Defer the free of inner_array */ + err = bpf_map__delete_elem(skel->maps.outer_array, &zero, sizeof(zero), 0); + ASSERT_OK(err, "delete inner map"); +out: + map_in_map_btf__destroy(skel); + if (new_fd < 0) + return; + /* Use kern_sync_rcu() to wait for the start of the free of the bpf + * program and use an assumed delay to wait for the free of the outer + * map and the release of map btf. After that, inner map holds the last + * reference of map btf. + */ + kern_sync_rcu(); + usleep(10000); + close(new_fd); +} + +void test_map_btf(void) +{ + if (test__start_subtest("array_btf")) + do_test_normal_map_btf(); + if (test__start_subtest("inner_array_btf")) + do_test_map_in_map_btf(); +} diff --git a/tools/testing/selftests/bpf/progs/map_in_map_btf.c b/tools/testing/selftests/bpf/progs/map_in_map_btf.c new file mode 100644 index 000000000000..7a1336d7b16a --- /dev/null +++ b/tools/testing/selftests/bpf/progs/map_in_map_btf.c @@ -0,0 +1,73 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (C) 2023. Huawei Technologies Co., Ltd */ +#include +#include +#include + +#include "bpf_misc.h" +#include "bpf_experimental.h" + +struct node_data { + __u64 data; + struct bpf_list_node node; +}; + +struct map_value { + struct bpf_list_head head __contains(node_data, node); + struct bpf_spin_lock lock; +}; + +struct inner_array_type { + __uint(type, BPF_MAP_TYPE_ARRAY); + __type(key, int); + __type(value, struct map_value); + __uint(max_entries, 1); +} inner_array SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS); + __uint(key_size, 4); + __uint(value_size, 4); + __uint(max_entries, 1); + __array(values, struct inner_array_type); +} outer_array SEC(".maps") = { + .values = { + [0] = &inner_array, + }, +}; + +char _license[] SEC("license") = "GPL"; + +int pid = 0; +bool done = false; + +SEC("fentry/" SYS_PREFIX "sys_nanosleep") +int add_to_list_in_inner_array(void *ctx) +{ + struct map_value *value; + struct node_data *new; + struct bpf_map *map; + int zero = 0; + + if (done || (u32)bpf_get_current_pid_tgid() != pid) + return 0; + + map = bpf_map_lookup_elem(&outer_array, &zero); + if (!map) + return 0; + + value = bpf_map_lookup_elem(map, &zero); + if (!value) + return 0; + + new = bpf_obj_new(typeof(*new)); + if (!new) + return 0; + + bpf_spin_lock(&value->lock); + bpf_list_push_back(&value->head, &new->node); + bpf_spin_unlock(&value->lock); + done = true; + + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/normal_map_btf.c b/tools/testing/selftests/bpf/progs/normal_map_btf.c new file mode 100644 index 000000000000..66cde82aa86d --- /dev/null +++ b/tools/testing/selftests/bpf/progs/normal_map_btf.c @@ -0,0 +1,56 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (C) 2023. Huawei Technologies Co., Ltd */ +#include +#include +#include + +#include "bpf_misc.h" +#include "bpf_experimental.h" + +struct node_data { + __u64 data; + struct bpf_list_node node; +}; + +struct map_value { + struct bpf_list_head head __contains(node_data, node); + struct bpf_spin_lock lock; +}; + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __type(key, int); + __type(value, struct map_value); + __uint(max_entries, 1); +} array SEC(".maps"); + +char _license[] SEC("license") = "GPL"; + +int pid = 0; +bool done = false; + +SEC("fentry/" SYS_PREFIX "sys_nanosleep") +int add_to_list_in_array(void *ctx) +{ + struct map_value *value; + struct node_data *new; + int zero = 0; + + if (done || (u32)bpf_get_current_pid_tgid() != pid) + return 0; + + value = bpf_map_lookup_elem(&array, &zero); + if (!value) + return 0; + + new = bpf_obj_new(typeof(*new)); + if (!new) + return 0; + + bpf_spin_lock(&value->lock); + bpf_list_push_back(&value->head, &new->node); + bpf_spin_unlock(&value->lock); + done = true; + + return 0; +} -- cgit v1.2.3 From f17d1a18a3dd6cc4b38a5226b0acbbad3f2063ae Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sun, 17 Dec 2023 22:55:38 +0100 Subject: selftests/bpf: Add more uprobe multi fail tests We fail to create uprobe if we pass negative offset. Add more tests validating kernel-side error checking code. Signed-off-by: Jiri Olsa Signed-off-by: Andrii Nakryiko Acked-by: Song Liu Link: https://lore.kernel.org/bpf/20231217215538.3361991-3-jolsa@kernel.org --- .../selftests/bpf/prog_tests/uprobe_multi_test.c | 149 ++++++++++++++++++++- 1 file changed, 146 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/uprobe_multi_test.c b/tools/testing/selftests/bpf/prog_tests/uprobe_multi_test.c index 07a009f95e85..8269cdee33ae 100644 --- a/tools/testing/selftests/bpf/prog_tests/uprobe_multi_test.c +++ b/tools/testing/selftests/bpf/prog_tests/uprobe_multi_test.c @@ -239,23 +239,166 @@ static void test_attach_api_fails(void) LIBBPF_OPTS(bpf_link_create_opts, opts); const char *path = "/proc/self/exe"; struct uprobe_multi *skel = NULL; + int prog_fd, link_fd = -1; unsigned long offset = 0; - int link_fd = -1; skel = uprobe_multi__open_and_load(); if (!ASSERT_OK_PTR(skel, "uprobe_multi__open_and_load")) goto cleanup; + prog_fd = bpf_program__fd(skel->progs.uprobe_extra); + /* abnormal cnt */ opts.uprobe_multi.path = path; opts.uprobe_multi.offsets = &offset; opts.uprobe_multi.cnt = INT_MAX; - link_fd = bpf_link_create(bpf_program__fd(skel->progs.uprobe), 0, - BPF_TRACE_UPROBE_MULTI, &opts); + link_fd = bpf_link_create(prog_fd, 0, BPF_TRACE_UPROBE_MULTI, &opts); if (!ASSERT_ERR(link_fd, "link_fd")) goto cleanup; if (!ASSERT_EQ(link_fd, -E2BIG, "big cnt")) goto cleanup; + + /* cnt is 0 */ + LIBBPF_OPTS_RESET(opts, + .uprobe_multi.path = path, + .uprobe_multi.offsets = (unsigned long *) &offset, + ); + + link_fd = bpf_link_create(prog_fd, 0, BPF_TRACE_UPROBE_MULTI, &opts); + if (!ASSERT_ERR(link_fd, "link_fd")) + goto cleanup; + if (!ASSERT_EQ(link_fd, -EINVAL, "cnt_is_zero")) + goto cleanup; + + /* negative offset */ + offset = -1; + opts.uprobe_multi.path = path; + opts.uprobe_multi.offsets = (unsigned long *) &offset; + opts.uprobe_multi.cnt = 1; + + link_fd = bpf_link_create(prog_fd, 0, BPF_TRACE_UPROBE_MULTI, &opts); + if (!ASSERT_ERR(link_fd, "link_fd")) + goto cleanup; + if (!ASSERT_EQ(link_fd, -EINVAL, "offset_is_negative")) + goto cleanup; + + /* offsets is NULL */ + LIBBPF_OPTS_RESET(opts, + .uprobe_multi.path = path, + .uprobe_multi.cnt = 1, + ); + + link_fd = bpf_link_create(prog_fd, 0, BPF_TRACE_UPROBE_MULTI, &opts); + if (!ASSERT_ERR(link_fd, "link_fd")) + goto cleanup; + if (!ASSERT_EQ(link_fd, -EINVAL, "offsets_is_null")) + goto cleanup; + + /* wrong offsets pointer */ + LIBBPF_OPTS_RESET(opts, + .uprobe_multi.path = path, + .uprobe_multi.offsets = (unsigned long *) 1, + .uprobe_multi.cnt = 1, + ); + + link_fd = bpf_link_create(prog_fd, 0, BPF_TRACE_UPROBE_MULTI, &opts); + if (!ASSERT_ERR(link_fd, "link_fd")) + goto cleanup; + if (!ASSERT_EQ(link_fd, -EFAULT, "offsets_is_wrong")) + goto cleanup; + + /* path is NULL */ + offset = 1; + LIBBPF_OPTS_RESET(opts, + .uprobe_multi.offsets = (unsigned long *) &offset, + .uprobe_multi.cnt = 1, + ); + + link_fd = bpf_link_create(prog_fd, 0, BPF_TRACE_UPROBE_MULTI, &opts); + if (!ASSERT_ERR(link_fd, "link_fd")) + goto cleanup; + if (!ASSERT_EQ(link_fd, -EINVAL, "path_is_null")) + goto cleanup; + + /* wrong path pointer */ + LIBBPF_OPTS_RESET(opts, + .uprobe_multi.path = (const char *) 1, + .uprobe_multi.offsets = (unsigned long *) &offset, + .uprobe_multi.cnt = 1, + ); + + link_fd = bpf_link_create(prog_fd, 0, BPF_TRACE_UPROBE_MULTI, &opts); + if (!ASSERT_ERR(link_fd, "link_fd")) + goto cleanup; + if (!ASSERT_EQ(link_fd, -EFAULT, "path_is_wrong")) + goto cleanup; + + /* wrong path type */ + LIBBPF_OPTS_RESET(opts, + .uprobe_multi.path = "/", + .uprobe_multi.offsets = (unsigned long *) &offset, + .uprobe_multi.cnt = 1, + ); + + link_fd = bpf_link_create(prog_fd, 0, BPF_TRACE_UPROBE_MULTI, &opts); + if (!ASSERT_ERR(link_fd, "link_fd")) + goto cleanup; + if (!ASSERT_EQ(link_fd, -EBADF, "path_is_wrong_type")) + goto cleanup; + + /* wrong cookies pointer */ + LIBBPF_OPTS_RESET(opts, + .uprobe_multi.path = path, + .uprobe_multi.offsets = (unsigned long *) &offset, + .uprobe_multi.cookies = (__u64 *) 1ULL, + .uprobe_multi.cnt = 1, + ); + + link_fd = bpf_link_create(prog_fd, 0, BPF_TRACE_UPROBE_MULTI, &opts); + if (!ASSERT_ERR(link_fd, "link_fd")) + goto cleanup; + if (!ASSERT_EQ(link_fd, -EFAULT, "cookies_is_wrong")) + goto cleanup; + + /* wrong ref_ctr_offsets pointer */ + LIBBPF_OPTS_RESET(opts, + .uprobe_multi.path = path, + .uprobe_multi.offsets = (unsigned long *) &offset, + .uprobe_multi.cookies = (__u64 *) &offset, + .uprobe_multi.ref_ctr_offsets = (unsigned long *) 1, + .uprobe_multi.cnt = 1, + ); + + link_fd = bpf_link_create(prog_fd, 0, BPF_TRACE_UPROBE_MULTI, &opts); + if (!ASSERT_ERR(link_fd, "link_fd")) + goto cleanup; + if (!ASSERT_EQ(link_fd, -EFAULT, "ref_ctr_offsets_is_wrong")) + goto cleanup; + + /* wrong flags */ + LIBBPF_OPTS_RESET(opts, + .uprobe_multi.flags = 1 << 31, + ); + + link_fd = bpf_link_create(prog_fd, 0, BPF_TRACE_UPROBE_MULTI, &opts); + if (!ASSERT_ERR(link_fd, "link_fd")) + goto cleanup; + if (!ASSERT_EQ(link_fd, -EINVAL, "wrong_flags")) + goto cleanup; + + /* wrong pid */ + LIBBPF_OPTS_RESET(opts, + .uprobe_multi.path = path, + .uprobe_multi.offsets = (unsigned long *) &offset, + .uprobe_multi.cnt = 1, + .uprobe_multi.pid = -2, + ); + + link_fd = bpf_link_create(prog_fd, 0, BPF_TRACE_UPROBE_MULTI, &opts); + if (!ASSERT_ERR(link_fd, "link_fd")) + goto cleanup; + ASSERT_EQ(link_fd, -ESRCH, "pid_is_wrong"); + cleanup: if (link_fd >= 0) close(link_fd); -- cgit v1.2.3 From 8ae27bc7fff4ef467a7964821a6cedb34a05d3b2 Mon Sep 17 00:00:00 2001 From: Rae Moar Date: Thu, 7 Dec 2023 21:34:09 +0000 Subject: kunit: tool: fix parsing of test attributes Add parsing of attributes as diagnostic data. Fixes issue with test plan being parsed incorrectly as diagnostic data when located after suite-level attributes. Note that if there does not exist a test plan line, the diagnostic lines between the suite header and the first result will be saved in the suite log rather than the first test case log. Signed-off-by: Rae Moar Reviewed-by: David Gow Signed-off-by: Shuah Khan --- tools/testing/kunit/kunit_parser.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/kunit/kunit_parser.py b/tools/testing/kunit/kunit_parser.py index 79d8832c862a..ce34be15c929 100644 --- a/tools/testing/kunit/kunit_parser.py +++ b/tools/testing/kunit/kunit_parser.py @@ -450,7 +450,7 @@ def parse_diagnostic(lines: LineStream) -> List[str]: Log of diagnostic lines """ log = [] # type: List[str] - non_diagnostic_lines = [TEST_RESULT, TEST_HEADER, KTAP_START, TAP_START] + non_diagnostic_lines = [TEST_RESULT, TEST_HEADER, KTAP_START, TAP_START, TEST_PLAN] while lines and not any(re.match(lines.peek()) for re in non_diagnostic_lines): log.append(lines.pop()) @@ -726,6 +726,7 @@ def parse_test(lines: LineStream, expected_num: int, log: List[str], is_subtest: # test plan test.name = "main" ktap_line = parse_ktap_header(lines, test) + test.log.extend(parse_diagnostic(lines)) parse_test_plan(lines, test) parent_test = True else: @@ -737,6 +738,7 @@ def parse_test(lines: LineStream, expected_num: int, log: List[str], is_subtest: if parent_test: # If KTAP version line and/or subtest header is found, attempt # to parse test plan and print test header + test.log.extend(parse_diagnostic(lines)) parse_test_plan(lines, test) print_test_header(test) expected_count = test.expected_count -- cgit v1.2.3 From 6eb0ea28c8e80ead03f08cf8cbdacf08d3073bd6 Mon Sep 17 00:00:00 2001 From: Rae Moar Date: Thu, 7 Dec 2023 21:34:10 +0000 Subject: kunit: tool: add test for parsing attributes Add test for parsing attributes to kunit_tool_test.py. Test checks attributes are parsed and saved in the test logs. This test also checks that the attributes have not interfered with the parsing of other test information, specifically the suite header as the test plan was being incorrectely parsed. Signed-off-by: Rae Moar Reviewed-by: David Gow Signed-off-by: Shuah Khan --- tools/testing/kunit/kunit_tool_test.py | 16 ++++++++++++++++ tools/testing/kunit/test_data/test_parse_attributes.log | 9 +++++++++ 2 files changed, 25 insertions(+) create mode 100644 tools/testing/kunit/test_data/test_parse_attributes.log (limited to 'tools') diff --git a/tools/testing/kunit/kunit_tool_test.py b/tools/testing/kunit/kunit_tool_test.py index b28c1510be2e..2beb7327e53f 100755 --- a/tools/testing/kunit/kunit_tool_test.py +++ b/tools/testing/kunit/kunit_tool_test.py @@ -331,6 +331,22 @@ class KUnitParserTest(unittest.TestCase): kunit_parser.parse_run_tests(file.readlines()) self.print_mock.assert_any_call(StrContains('suite (1 subtest)')) + def test_parse_attributes(self): + ktap_log = test_data_path('test_parse_attributes.log') + with open(ktap_log) as file: + result = kunit_parser.parse_run_tests(file.readlines()) + + # Test should pass with no errors + self.assertEqual(result.counts, kunit_parser.TestCounts(passed=1, errors=0)) + self.assertEqual(kunit_parser.TestStatus.SUCCESS, result.status) + + # Ensure suite header is parsed correctly + self.print_mock.assert_any_call(StrContains('suite (1 subtest)')) + + # Ensure attributes in correct test log + self.assertContains('# module: example', result.subtests[0].log) + self.assertContains('# test.speed: slow', result.subtests[0].subtests[0].log) + def test_show_test_output_on_failure(self): output = """ KTAP version 1 diff --git a/tools/testing/kunit/test_data/test_parse_attributes.log b/tools/testing/kunit/test_data/test_parse_attributes.log new file mode 100644 index 000000000000..1a13c371fe9d --- /dev/null +++ b/tools/testing/kunit/test_data/test_parse_attributes.log @@ -0,0 +1,9 @@ +KTAP version 1 +1..1 + KTAP version 1 + # Subtest: suite + # module: example + 1..1 + # test.speed: slow + ok 1 test +ok 1 suite \ No newline at end of file -- cgit v1.2.3 From 62691b801daa497e36ad77636c3a6cd0f6dda440 Mon Sep 17 00:00:00 2001 From: Donald Hunter Date: Fri, 15 Dec 2023 09:37:08 +0000 Subject: tools/net/ynl: Use consistent array index expression formatting Use expression formatting that conforms to the python style guide. Reviewed-by: Jakub Kicinski Signed-off-by: Donald Hunter Link: https://lore.kernel.org/r/20231215093720.18774-2-donald.hunter@gmail.com Signed-off-by: Jakub Kicinski --- tools/net/ynl/lib/ynl.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'tools') diff --git a/tools/net/ynl/lib/ynl.py b/tools/net/ynl/lib/ynl.py index c56dad9593c6..df7b1547de1f 100644 --- a/tools/net/ynl/lib/ynl.py +++ b/tools/net/ynl/lib/ynl.py @@ -98,12 +98,12 @@ class NlAttr: } def __init__(self, raw, offset): - self._len, self._type = struct.unpack("HH", raw[offset:offset + 4]) + self._len, self._type = struct.unpack("HH", raw[offset : offset + 4]) self.type = self._type & ~Netlink.NLA_TYPE_MASK self.is_nest = self._type & Netlink.NLA_F_NESTED self.payload_len = self._len self.full_len = (self.payload_len + 3) & ~3 - self.raw = raw[offset + 4:offset + self.payload_len] + self.raw = raw[offset + 4 : offset + self.payload_len] @classmethod def get_format(cls, attr_type, byte_order=None): @@ -154,7 +154,7 @@ class NlAttr: for m in members: # TODO: handle non-scalar members if m.type == 'binary': - decoded = self.raw[offset:offset+m['len']] + decoded = self.raw[offset : offset + m['len']] offset += m['len'] elif m.type in NlAttr.type_formats: format = self.get_format(m.type, m.byte_order) @@ -193,12 +193,12 @@ class NlAttrs: class NlMsg: def __init__(self, msg, offset, attr_space=None): - self.hdr = msg[offset:offset + 16] + self.hdr = msg[offset : offset + 16] self.nl_len, self.nl_type, self.nl_flags, self.nl_seq, self.nl_portid = \ struct.unpack("IHHII", self.hdr) - self.raw = msg[offset + 16:offset + self.nl_len] + self.raw = msg[offset + 16 : offset + self.nl_len] self.error = 0 self.done = 0 -- cgit v1.2.3 From 1769e2be4baaa3273b56d1137bf67a6a747222ed Mon Sep 17 00:00:00 2001 From: Donald Hunter Date: Fri, 15 Dec 2023 09:37:11 +0000 Subject: tools/net/ynl: Add 'sub-message' attribute decoding to ynl Implement the 'sub-message' attribute type in ynl. Encode support is not yet implemented. Support for sub-message selectors at a different nest level from the key attribute is not yet supported. Reviewed-by: Jakub Kicinski Signed-off-by: Donald Hunter Link: https://lore.kernel.org/r/20231215093720.18774-5-donald.hunter@gmail.com Signed-off-by: Jakub Kicinski --- tools/net/ynl/lib/nlspec.py | 55 +++++++++++++++++++++++++++++++++++++++++++++ tools/net/ynl/lib/ynl.py | 48 ++++++++++++++++++++++++++++++++------- 2 files changed, 95 insertions(+), 8 deletions(-) (limited to 'tools') diff --git a/tools/net/ynl/lib/nlspec.py b/tools/net/ynl/lib/nlspec.py index 92889298b197..44f13e383e8a 100644 --- a/tools/net/ynl/lib/nlspec.py +++ b/tools/net/ynl/lib/nlspec.py @@ -158,6 +158,9 @@ class SpecAttr(SpecElement): len integer, optional byte length of binary types display_hint string, hint to help choose format specifier when displaying the value + sub_message string, name of sub message type + selector string, name of attribute used to select + sub-message type is_auto_scalar bool, attr is a variable-size scalar """ @@ -173,6 +176,8 @@ class SpecAttr(SpecElement): self.byte_order = yaml.get('byte-order') self.len = yaml.get('len') self.display_hint = yaml.get('display-hint') + self.sub_message = yaml.get('sub-message') + self.selector = yaml.get('selector') self.is_auto_scalar = self.type == "sint" or self.type == "uint" @@ -278,6 +283,47 @@ class SpecStruct(SpecElement): return self.members.items() +class SpecSubMessage(SpecElement): + """ Netlink sub-message definition + + Represents a set of sub-message formats for polymorphic nlattrs + that contain type-specific sub messages. + + Attributes: + name string, name of sub-message definition + formats dict of sub-message formats indexed by match value + """ + def __init__(self, family, yaml): + super().__init__(family, yaml) + + self.formats = collections.OrderedDict() + for elem in self.yaml['formats']: + format = self.new_format(family, elem) + self.formats[format.value] = format + + def new_format(self, family, format): + return SpecSubMessageFormat(family, format) + + +class SpecSubMessageFormat(SpecElement): + """ Netlink sub-message definition + + Represents a set of sub-message formats for polymorphic nlattrs + that contain type-specific sub messages. + + Attributes: + value attribute value to match against type selector + fixed_header string, name of fixed header, or None + attr_set string, name of attribute set, or None + """ + def __init__(self, family, yaml): + super().__init__(family, yaml) + + self.value = yaml.get('value') + self.fixed_header = yaml.get('fixed-header') + self.attr_set = yaml.get('attribute-set') + + class SpecOperation(SpecElement): """Netlink Operation @@ -365,6 +411,7 @@ class SpecFamily(SpecElement): attr_sets dict of attribute sets msgs dict of all messages (index by name) + sub_msgs dict of all sub messages (index by name) ops dict of all valid requests / responses ntfs dict of all async events consts dict of all constants/enums @@ -405,6 +452,7 @@ class SpecFamily(SpecElement): jsonschema.validate(self.yaml, schema) self.attr_sets = collections.OrderedDict() + self.sub_msgs = collections.OrderedDict() self.msgs = collections.OrderedDict() self.req_by_value = collections.OrderedDict() self.rsp_by_value = collections.OrderedDict() @@ -441,6 +489,9 @@ class SpecFamily(SpecElement): def new_struct(self, elem): return SpecStruct(self, elem) + def new_sub_message(self, elem): + return SpecSubMessage(self, elem); + def new_operation(self, elem, req_val, rsp_val): return SpecOperation(self, elem, req_val, rsp_val) @@ -529,6 +580,10 @@ class SpecFamily(SpecElement): attr_set = self.new_attr_set(elem) self.attr_sets[elem['name']] = attr_set + for elem in self.yaml.get('sub-messages', []): + sub_message = self.new_sub_message(elem) + self.sub_msgs[sub_message.name] = sub_message + if self.msg_id_model == 'unified': self._dictify_ops_unified() elif self.msg_id_model == 'directional': diff --git a/tools/net/ynl/lib/ynl.py b/tools/net/ynl/lib/ynl.py index df7b1547de1f..a8db71441634 100644 --- a/tools/net/ynl/lib/ynl.py +++ b/tools/net/ynl/lib/ynl.py @@ -170,10 +170,9 @@ class NlAttr: class NlAttrs: - def __init__(self, msg): + def __init__(self, msg, offset=0): self.attrs = [] - offset = 0 while offset < len(msg): attr = NlAttr(msg, offset) offset += attr.full_len @@ -371,8 +370,8 @@ class NetlinkProtocol: fixed_header_size = 0 if ynl: op = ynl.rsp_by_value[msg.cmd()] - fixed_header_size = ynl._fixed_header_size(op) - msg.raw_attrs = NlAttrs(msg.raw[fixed_header_size:]) + fixed_header_size = ynl._fixed_header_size(op.fixed_header) + msg.raw_attrs = NlAttrs(msg.raw, fixed_header_size) return msg def get_mcast_id(self, mcast_name, mcast_groups): @@ -549,6 +548,37 @@ class YnlFamily(SpecFamily): else: rsp[name] = [decoded] + def _resolve_selector(self, attr_spec, vals): + sub_msg = attr_spec.sub_message + if sub_msg not in self.sub_msgs: + raise Exception(f"No sub-message spec named {sub_msg} for {attr_spec.name}") + sub_msg_spec = self.sub_msgs[sub_msg] + + selector = attr_spec.selector + if selector not in vals: + raise Exception(f"There is no value for {selector} to resolve '{attr_spec.name}'") + value = vals[selector] + if value not in sub_msg_spec.formats: + raise Exception(f"No message format for '{value}' in sub-message spec '{sub_msg}'") + + spec = sub_msg_spec.formats[value] + return spec + + def _decode_sub_msg(self, attr, attr_spec, rsp): + msg_format = self._resolve_selector(attr_spec, rsp) + decoded = {} + offset = 0 + if msg_format.fixed_header: + decoded.update(self._decode_fixed_header(attr, msg_format.fixed_header)); + offset = self._fixed_header_size(msg_format.fixed_header) + if msg_format.attr_set: + if msg_format.attr_set in self.attr_sets: + subdict = self._decode(NlAttrs(attr.raw, offset), msg_format.attr_set) + decoded.update(subdict) + else: + raise Exception(f"Unknown attribute-set '{attr_space}' when decoding '{attr_spec.name}'") + return decoded + def _decode(self, attrs, space): if space: attr_space = self.attr_sets[space] @@ -586,6 +616,8 @@ class YnlFamily(SpecFamily): value = self._decode_enum(value, attr_spec) selector = self._decode_enum(selector, attr_spec) decoded = {"value": value, "selector": selector} + elif attr_spec["type"] == 'sub-message': + decoded = self._decode_sub_msg(attr, attr_spec, rsp) else: if not self.process_unknown: raise Exception(f'Unknown {attr_spec["type"]} with name {attr_spec["name"]}') @@ -626,16 +658,16 @@ class YnlFamily(SpecFamily): return msg = self.nlproto.decode(self, NlMsg(request, 0, op.attr_set)) - offset = 20 + self._fixed_header_size(op) + offset = 20 + self._fixed_header_size(op.fixed_header) path = self._decode_extack_path(msg.raw_attrs, op.attr_set, offset, extack['bad-attr-offs']) if path: del extack['bad-attr-offs'] extack['bad-attr'] = path - def _fixed_header_size(self, op): - if op.fixed_header: - fixed_header_members = self.consts[op.fixed_header].members + def _fixed_header_size(self, name): + if name: + fixed_header_members = self.consts[name].members size = 0 for m in fixed_header_members: format = NlAttr.get_format(m.type, m.byte_order) -- cgit v1.2.3 From 8b6811d96666b7ea0a53f56e65cc656d390feb19 Mon Sep 17 00:00:00 2001 From: Donald Hunter Date: Fri, 15 Dec 2023 09:37:12 +0000 Subject: tools/net/ynl: Add binary and pad support to structs for tc The tc netlink-raw family needs binary and pad types for several qopt C structs. Add support for them to ynl. Reviewed-by: Jakub Kicinski Signed-off-by: Donald Hunter Link: https://lore.kernel.org/r/20231215093720.18774-6-donald.hunter@gmail.com Signed-off-by: Jakub Kicinski --- tools/net/ynl/lib/ynl.py | 36 ++++++++++++++++++++++++++---------- 1 file changed, 26 insertions(+), 10 deletions(-) (limited to 'tools') diff --git a/tools/net/ynl/lib/ynl.py b/tools/net/ynl/lib/ynl.py index a8db71441634..1e10512b2117 100644 --- a/tools/net/ynl/lib/ynl.py +++ b/tools/net/ynl/lib/ynl.py @@ -670,8 +670,11 @@ class YnlFamily(SpecFamily): fixed_header_members = self.consts[name].members size = 0 for m in fixed_header_members: - format = NlAttr.get_format(m.type, m.byte_order) - size += format.size + if m.type in ['pad', 'binary']: + size += m.len + else: + format = NlAttr.get_format(m.type, m.byte_order) + size += format.size return size else: return 0 @@ -681,12 +684,20 @@ class YnlFamily(SpecFamily): fixed_header_attrs = dict() offset = 0 for m in fixed_header_members: - format = NlAttr.get_format(m.type, m.byte_order) - [ value ] = format.unpack_from(msg.raw, offset) - offset += format.size - if m.enum: - value = self._decode_enum(value, m) - fixed_header_attrs[m.name] = value + value = None + if m.type == 'pad': + offset += m.len + elif m.type == 'binary': + value = msg.raw[offset : offset + m.len] + offset += m.len + else: + format = NlAttr.get_format(m.type, m.byte_order) + [ value ] = format.unpack_from(msg.raw, offset) + offset += format.size + if value is not None: + if m.enum: + value = self._decode_enum(value, m) + fixed_header_attrs[m.name] = value return fixed_header_attrs def handle_ntf(self, decoded): @@ -753,8 +764,13 @@ class YnlFamily(SpecFamily): fixed_header_members = self.consts[op.fixed_header].members for m in fixed_header_members: value = vals.pop(m.name) if m.name in vals else 0 - format = NlAttr.get_format(m.type, m.byte_order) - msg += format.pack(value) + if m.type == 'pad': + msg += bytearray(m.len) + elif m.type == 'binary': + msg += bytes.fromhex(value) + else: + format = NlAttr.get_format(m.type, m.byte_order) + msg += format.pack(value) for name, value in vals.items(): msg += self._add_attr(op.attr_set.name, name, value) msg = _genl_msg_finalize(msg) -- cgit v1.2.3 From 6235b3d8bc3f81e81561c151237503fcf7868a98 Mon Sep 17 00:00:00 2001 From: Donald Hunter Date: Fri, 15 Dec 2023 09:37:17 +0000 Subject: tools/net/ynl-gen-rst: Add sub-messages to generated docs Add a section for sub-messages to the generated .rst files. Reviewed-by: Breno Leitao Signed-off-by: Donald Hunter Link: https://lore.kernel.org/r/20231215093720.18774-11-donald.hunter@gmail.com Signed-off-by: Jakub Kicinski --- tools/net/ynl/ynl-gen-rst.py | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) (limited to 'tools') diff --git a/tools/net/ynl/ynl-gen-rst.py b/tools/net/ynl/ynl-gen-rst.py index 8c62e040df5d..6825db92c899 100755 --- a/tools/net/ynl/ynl-gen-rst.py +++ b/tools/net/ynl/ynl-gen-rst.py @@ -256,6 +256,24 @@ def parse_attr_sets(entries: List[Dict[str, Any]]) -> str: return "\n".join(lines) +def parse_sub_messages(entries: List[Dict[str, Any]]) -> str: + """Parse sub-message definitions""" + lines = [] + + for entry in entries: + lines.append(rst_section(entry["name"])) + for fmt in entry["formats"]: + value = fmt["value"] + + lines.append(rst_bullet(bold(value))) + for attr in ['fixed-header', 'attribute-set']: + if attr in fmt: + lines.append(rst_fields(attr, fmt[attr], 2)) + lines.append("\n") + + return "\n".join(lines) + + def parse_yaml(obj: Dict[str, Any]) -> str: """Format the whole YAML into a RST string""" lines = [] @@ -292,6 +310,11 @@ def parse_yaml(obj: Dict[str, Any]) -> str: lines.append(rst_subtitle("Attribute sets")) lines.append(parse_attr_sets(obj["attribute-sets"])) + # Sub-messages + if "sub-messages" in obj: + lines.append(rst_subtitle("Sub-messages")) + lines.append(parse_sub_messages(obj["sub-messages"])) + return "\n".join(lines) -- cgit v1.2.3 From e8c32339cf49cd9c2626e143c548f5897aa58b17 Mon Sep 17 00:00:00 2001 From: Donald Hunter Date: Fri, 15 Dec 2023 09:37:18 +0000 Subject: tools/net/ynl-gen-rst: Sort the index of generated netlink specs The index of netlink specs was being generated unsorted. Sort the output before generating the index entries. Reviewed-by: Jakub Kicinski Reviewed-by: Breno Leitao Signed-off-by: Donald Hunter Link: https://lore.kernel.org/r/20231215093720.18774-12-donald.hunter@gmail.com Signed-off-by: Jakub Kicinski --- tools/net/ynl/ynl-gen-rst.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/net/ynl/ynl-gen-rst.py b/tools/net/ynl/ynl-gen-rst.py index 6825db92c899..68f9a9cd57cb 100755 --- a/tools/net/ynl/ynl-gen-rst.py +++ b/tools/net/ynl/ynl-gen-rst.py @@ -383,7 +383,7 @@ def generate_main_index_rst(output: str) -> None: index_dir = os.path.dirname(output) logging.debug("Looking for .rst files in %s", index_dir) - for filename in os.listdir(index_dir): + for filename in sorted(os.listdir(index_dir)): if not filename.endswith(".rst") or filename == "index.rst": continue lines.append(f" {filename.replace('.rst', '')}\n") -- cgit v1.2.3 From e9d7c59212e43f079dffaf65001b006da6a12580 Mon Sep 17 00:00:00 2001 From: Donald Hunter Date: Fri, 15 Dec 2023 09:37:19 +0000 Subject: tools/net/ynl-gen-rst: Remove bold from attribute-set headings The generated .rst for attribute-sets currently uses a sub-sub-heading for each attribute, with the attribute name in bold. This makes attributes stand out more than the attribute-set sub-headings they are part of. Remove the bold markup from attribute sub-sub-headings. Signed-off-by: Donald Hunter Link: https://lore.kernel.org/r/20231215093720.18774-13-donald.hunter@gmail.com Signed-off-by: Jakub Kicinski --- tools/net/ynl/ynl-gen-rst.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/net/ynl/ynl-gen-rst.py b/tools/net/ynl/ynl-gen-rst.py index 68f9a9cd57cb..7ac5714f73a2 100755 --- a/tools/net/ynl/ynl-gen-rst.py +++ b/tools/net/ynl/ynl-gen-rst.py @@ -240,7 +240,7 @@ def parse_attr_sets(entries: List[Dict[str, Any]]) -> str: lines.append(rst_section(entry["name"])) for attr in entry["attributes"]: type_ = attr.get("type") - attr_line = bold(attr["name"]) + attr_line = attr["name"] if type_: # Add the attribute type in the same line attr_line += f" ({inline(type_)})" -- cgit v1.2.3 From 9b0aa2244d9d12cc39726ffabc0609a029fda95c Mon Sep 17 00:00:00 2001 From: Donald Hunter Date: Fri, 15 Dec 2023 09:37:20 +0000 Subject: tools/net/ynl-gen-rst: Remove extra indentation from generated docs The output from ynl-gen-rst.py has extra indentation that causes extra
elements to be generated in the HTML output. Reduce the indentation so that sphinx doesn't generate unnecessary
elements. Reviewed-by: Breno Leitao Signed-off-by: Donald Hunter Link: https://lore.kernel.org/r/20231215093720.18774-14-donald.hunter@gmail.com Signed-off-by: Jakub Kicinski --- tools/net/ynl/ynl-gen-rst.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/net/ynl/ynl-gen-rst.py b/tools/net/ynl/ynl-gen-rst.py index 7ac5714f73a2..262d88f88696 100755 --- a/tools/net/ynl/ynl-gen-rst.py +++ b/tools/net/ynl/ynl-gen-rst.py @@ -69,7 +69,7 @@ def rst_paragraph(paragraph: str, level: int = 0) -> str: def rst_bullet(item: str, level: int = 0) -> str: """Return a formatted a bullet""" - return headroom(level) + f" - {item}" + return headroom(level) + f"- {item}" def rst_subsection(title: str) -> str: @@ -250,7 +250,7 @@ def parse_attr_sets(entries: List[Dict[str, Any]]) -> str: for k in attr.keys(): if k in preprocessed + ignored: continue - lines.append(rst_fields(k, sanitize(attr[k]), 2)) + lines.append(rst_fields(k, sanitize(attr[k]), 0)) lines.append("\n") return "\n".join(lines) @@ -268,7 +268,7 @@ def parse_sub_messages(entries: List[Dict[str, Any]]) -> str: lines.append(rst_bullet(bold(value))) for attr in ['fixed-header', 'attribute-set']: if attr in fmt: - lines.append(rst_fields(attr, fmt[attr], 2)) + lines.append(rst_fields(attr, fmt[attr], 1)) lines.append("\n") return "\n".join(lines) -- cgit v1.2.3 From 8e432e6197cef6250dfd6fdffd41c06613c874ca Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Mon, 18 Dec 2023 09:36:01 -0800 Subject: bpf: Ensure precise is reset to false in __mark_reg_const_zero() It is safe to always start with imprecise SCALAR_VALUE register. Previously __mark_reg_const_zero() relied on caller to reset precise mark, but it's very error prone and we already missed it in a few places. So instead make __mark_reg_const_zero() reset precision always, as it's a safe default for SCALAR_VALUE. Explanation is basically the same as for why we are resetting (or rather not setting) precision in current state. If necessary, precision propagation will set it to precise correctly. As such, also remove a big comment about forward precision propagation in mark_reg_stack_read() and avoid unnecessarily setting precision to true after reading from STACK_ZERO stack. Again, precision propagation will correctly handle this, if that SCALAR_VALUE register will ever be needed to be precise. Reported-by: Maxim Mikityanskiy Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Acked-by: Yonghong Song Acked-by: Maxim Mikityanskiy Acked-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20231218173601.53047-1-andrii@kernel.org --- kernel/bpf/verifier.c | 29 ++++++++-------------- .../selftests/bpf/progs/verifier_spill_fill.c | 10 ++++++-- 2 files changed, 19 insertions(+), 20 deletions(-) (limited to 'tools') diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 1863826a4ac3..9456ee0ad129 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -1777,10 +1777,14 @@ static void __mark_reg_known_zero(struct bpf_reg_state *reg) __mark_reg_known(reg, 0); } -static void __mark_reg_const_zero(struct bpf_reg_state *reg) +static void __mark_reg_const_zero(const struct bpf_verifier_env *env, struct bpf_reg_state *reg) { __mark_reg_known(reg, 0); reg->type = SCALAR_VALUE; + /* all scalars are assumed imprecise initially (unless unprivileged, + * in which case everything is forced to be precise) + */ + reg->precise = !env->bpf_capable; } static void mark_reg_known_zero(struct bpf_verifier_env *env, @@ -4706,21 +4710,10 @@ static void mark_reg_stack_read(struct bpf_verifier_env *env, zeros++; } if (zeros == max_off - min_off) { - /* any access_size read into register is zero extended, - * so the whole register == const_zero - */ - __mark_reg_const_zero(&state->regs[dst_regno]); - /* backtracking doesn't support STACK_ZERO yet, - * so mark it precise here, so that later - * backtracking can stop here. - * Backtracking may not need this if this register - * doesn't participate in pointer adjustment. - * Forward propagation of precise flag is not - * necessary either. This mark is only to stop - * backtracking. Any register that contributed - * to const 0 was marked precise before spill. + /* Any access_size read into register is zero extended, + * so the whole register == const_zero. */ - state->regs[dst_regno].precise = true; + __mark_reg_const_zero(env, &state->regs[dst_regno]); } else { /* have read misc data from the stack */ mark_reg_unknown(env, state->regs, dst_regno); @@ -4803,11 +4796,11 @@ static int check_stack_read_fixed_off(struct bpf_verifier_env *env, if (spill_cnt == size && tnum_is_const(reg->var_off) && reg->var_off.value == 0) { - __mark_reg_const_zero(&state->regs[dst_regno]); + __mark_reg_const_zero(env, &state->regs[dst_regno]); /* this IS register fill, so keep insn_flags */ } else if (zero_cnt == size) { /* similarly to mark_reg_stack_read(), preserve zeroes */ - __mark_reg_const_zero(&state->regs[dst_regno]); + __mark_reg_const_zero(env, &state->regs[dst_regno]); insn_flags = 0; /* not restoring original register state */ } else { mark_reg_unknown(env, state->regs, dst_regno); @@ -7963,7 +7956,7 @@ static int process_iter_next_call(struct bpf_verifier_env *env, int insn_idx, /* switch to DRAINED state, but keep the depth unchanged */ /* mark current iter state as drained and assume returned NULL */ cur_iter->iter.state = BPF_ITER_STATE_DRAINED; - __mark_reg_const_zero(&cur_fr->regs[BPF_REG_0]); + __mark_reg_const_zero(env, &cur_fr->regs[BPF_REG_0]); return 0; } diff --git a/tools/testing/selftests/bpf/progs/verifier_spill_fill.c b/tools/testing/selftests/bpf/progs/verifier_spill_fill.c index 508f5d6c7347..39fe3372e0e0 100644 --- a/tools/testing/selftests/bpf/progs/verifier_spill_fill.c +++ b/tools/testing/selftests/bpf/progs/verifier_spill_fill.c @@ -499,8 +499,14 @@ __success __msg("2: (7a) *(u64 *)(r10 -8) = 0 ; R10=fp0 fp-8_w=00000000") /* but fp-16 is spilled IMPRECISE zero const reg */ __msg("4: (7b) *(u64 *)(r10 -16) = r0 ; R0_w=0 R10=fp0 fp-16_w=0") -/* and now check that precision propagation works even for such tricky case */ -__msg("10: (71) r2 = *(u8 *)(r10 -9) ; R2_w=P0 R10=fp0 fp-16_w=0") +/* validate that assigning R2 from STACK_ZERO doesn't mark register + * precise immediately; if necessary, it will be marked precise later + */ +__msg("6: (71) r2 = *(u8 *)(r10 -1) ; R2_w=0 R10=fp0 fp-8_w=00000000") +/* similarly, when R2 is assigned from spilled register, it is initially + * imprecise, but will be marked precise later once it is used in precise context + */ +__msg("10: (71) r2 = *(u8 *)(r10 -9) ; R2_w=0 R10=fp0 fp-16_w=0") __msg("11: (0f) r1 += r2") __msg("mark_precise: frame0: last_idx 11 first_idx 0 subseq_idx -1") __msg("mark_precise: frame0: regs=r2 stack= before 10: (71) r2 = *(u8 *)(r10 -9)") -- cgit v1.2.3 From 0b4b785d1f2557678c493dc1b431ca4ad16fde9b Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 15 Dec 2023 15:23:30 -0300 Subject: perf evlist: Move event attributes to after the / when uniquefying using the PMU name When turning an event with attributes to the format including the PMU we need to move the "event:attributes" format to "event/attributes/" so that we can copy the event displayed and use it in the command line, i.e. in 'perf top' we had: 1K cpu_atom/cycles:P/ 11K cpu_core/cycles:P/ If I try to use that on the command line: # perf top -e cpu_atom/cycles:P/ event syntax error: 'cpu_atom/cycles:P/' \___ Bad event or PMU Unable to find PMU or event on a PMU of 'cpu_atom' Initial error: event syntax error: 'cpu_atom/cycles:P/' \___ unknown term 'cycles:P' for pmu 'cpu_atom' valid terms: event,pc,edge,offcore_rsp,ldlat,inv,umask,cmask,config,config1,config2,config3,name,period,freq,branch_type,time,call-graph,stack-size,no-inherit,inherit,max-stack,nr,no-overwrite,overwrite ,driver-config,percore,aux-output,aux-sample-size,metric-id,raw,legacy-cache,hardware Run 'perf list' for a list of valid events Usage: perf top [] -e, --event event selector. use 'perf list' to list available events # Tested-by: Kan Liang Cc: Hector Martin Cc: Ian Rogers Cc: Marc Zyngier Cc: Mark Rutland Cc: Namhyung Kim Link: https://lore.kernel.org/lkml/ZXxyanyZgWBTOnoK@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evlist.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 6f0892803c22..95f25e9fb994 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -2521,9 +2521,8 @@ void evlist__warn_user_requested_cpus(struct evlist *evlist, const char *cpu_lis void evlist__uniquify_name(struct evlist *evlist) { + char *new_name, empty_attributes[2] = ":", *attributes; struct evsel *pos; - char *new_name; - int ret; if (perf_pmus__num_core_pmus() == 1) return; @@ -2535,11 +2534,17 @@ void evlist__uniquify_name(struct evlist *evlist) if (strchr(pos->name, '/')) continue; - ret = asprintf(&new_name, "%s/%s/", - pos->pmu_name, pos->name); - if (ret) { + attributes = strchr(pos->name, ':'); + if (attributes) + *attributes = '\0'; + else + attributes = empty_attributes; + + if (asprintf(&new_name, "%s/%s/%s", pos->pmu_name, pos->name, attributes + 1)) { free(pos->name); pos->name = new_name; + } else { + *attributes = ':'; } } } -- cgit v1.2.3 From 9a07a71ed3d23b56e1f05ea808ec5f59448fcc16 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Tue, 28 Nov 2023 11:46:24 -0800 Subject: perf tests: Make DSO tests a suite rather than individual Make the DSO data tests a suite rather than individual so their output is grouped. Signed-off-by: Ian Rogers Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Kan Liang Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Yang Jihong Link: https://lore.kernel.org/r/20231128194624.1419260-1-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/builtin-test.c | 2 -- tools/perf/tests/dso-data.c | 15 ++++++++++++--- 2 files changed, 12 insertions(+), 5 deletions(-) (limited to 'tools') diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c index b8c21e81a021..4a5973f9bb9b 100644 --- a/tools/perf/tests/builtin-test.c +++ b/tools/perf/tests/builtin-test.c @@ -62,8 +62,6 @@ static struct test_suite *generic_tests[] = { &suite__pmu, &suite__pmu_events, &suite__dso_data, - &suite__dso_data_cache, - &suite__dso_data_reopen, &suite__perf_evsel__roundtrip_name_test, #ifdef HAVE_LIBTRACEEVENT &suite__perf_evsel__tp_sched_test, diff --git a/tools/perf/tests/dso-data.c b/tools/perf/tests/dso-data.c index 3419a4ab5590..2d67422c1222 100644 --- a/tools/perf/tests/dso-data.c +++ b/tools/perf/tests/dso-data.c @@ -394,6 +394,15 @@ static int test__dso_data_reopen(struct test_suite *test __maybe_unused, int sub return 0; } -DEFINE_SUITE("DSO data read", dso_data); -DEFINE_SUITE("DSO data cache", dso_data_cache); -DEFINE_SUITE("DSO data reopen", dso_data_reopen); + +static struct test_case tests__dso_data[] = { + TEST_CASE("read", dso_data), + TEST_CASE("cache", dso_data_cache), + TEST_CASE("reopen", dso_data_reopen), + { .name = NULL, } +}; + +struct test_suite suite__dso_data = { + .desc = "DSO data tests", + .test_cases = tests__dso_data, +}; -- cgit v1.2.3 From 3e0594f9f0f774918d63701dc7de634bb1bc7b1e Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Tue, 28 Nov 2023 22:02:07 -0800 Subject: perf top: Avoid repeated function calls to perf_cpu_map__nr(). MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a local variable to avoid repeated calls to perf_cpu_map__nr(). Reviewed-by: James Clark Signed-off-by: Adrian Hunter Signed-off-by: Ian Rogers Acked-by: Adrian Hunter Acked-by: Namhyung Kim Cc: Alexander Shishkin Cc: Alexandre Ghiti Cc: Andrew Jones Cc: André Almeida Cc: Athira Rajeev Cc: Atish Patra Cc: Changbin Du Cc: Darren Hart Cc: Davidlohr Bueso Cc: Huacai Chen Cc: Ingo Molnar Cc: Jiri Olsa Cc: John Garry Cc: K Prateek Nayak Cc: Kajol Jain Cc: Kan Liang Cc: Leo Yan Cc: Mark Rutland Cc: Mike Leach Cc: Nick Desaulniers Cc: Paolo Bonzini Cc: Paran Lee Cc: Peter Zijlstra Cc: Ravi Bangoria Cc: Sandipan Das Cc: Sean Christopherson Cc: Steinar H. Gunderson Cc: Suzuki Poulouse Cc: Thomas Gleixner Cc: Will Deacon Cc: Yang Jihong Cc: Yang Li Cc: Yanteng Si Link: https://lore.kernel.org/r/20231129060211.1890454-11-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/top.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/top.c b/tools/perf/util/top.c index be7157de0451..4db3d1bd686c 100644 --- a/tools/perf/util/top.c +++ b/tools/perf/util/top.c @@ -28,6 +28,7 @@ size_t perf_top__header_snprintf(struct perf_top *top, char *bf, size_t size) struct record_opts *opts = &top->record_opts; struct target *target = &opts->target; size_t ret = 0; + int nr_cpus; if (top->samples) { samples_per_sec = top->samples / top->delay_secs; @@ -93,19 +94,17 @@ size_t perf_top__header_snprintf(struct perf_top *top, char *bf, size_t size) else ret += SNPRINTF(bf + ret, size - ret, " (all"); + nr_cpus = perf_cpu_map__nr(top->evlist->core.user_requested_cpus); if (target->cpu_list) ret += SNPRINTF(bf + ret, size - ret, ", CPU%s: %s)", - perf_cpu_map__nr(top->evlist->core.user_requested_cpus) > 1 - ? "s" : "", + nr_cpus > 1 ? "s" : "", target->cpu_list); else { if (target->tid) ret += SNPRINTF(bf + ret, size - ret, ")"); else ret += SNPRINTF(bf + ret, size - ret, ", %d CPU%s)", - perf_cpu_map__nr(top->evlist->core.user_requested_cpus), - perf_cpu_map__nr(top->evlist->core.user_requested_cpus) > 1 - ? "s" : ""); + nr_cpus, nr_cpus > 1 ? "s" : ""); } perf_top__reset_sample_counters(top); -- cgit v1.2.3 From 67bc993446d340d4f059014f6be6ead35ec5f88b Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Tue, 28 Nov 2023 22:02:11 -0800 Subject: libperf cpumap: Document perf_cpu_map__nr()'s behavior MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit perf_cpu_map__nr()'s behavior around an empty CPU map is strange as it returns that there is 1 CPU. Changing code that may rely on this behavior is hard, we can at least document the behavior. Reviewed-by: James Clark Signed-off-by: Ian Rogers Acked-by: Adrian Hunter Acked-by: Namhyung Kim Cc: Alexander Shishkin Cc: Alexandre Ghiti Cc: Andrew Jones Cc: André Almeida Cc: Athira Rajeev Cc: Atish Patra Cc: Changbin Du Cc: Darren Hart Cc: Davidlohr Bueso Cc: Huacai Chen Cc: Ingo Molnar Cc: Jiri Olsa Cc: John Garry Cc: K Prateek Nayak Cc: Kajol Jain Cc: Kan Liang Cc: Leo Yan Cc: Mark Rutland Cc: Mike Leach Cc: Nick Desaulniers Cc: Paolo Bonzini Cc: Paran Lee Cc: Peter Zijlstra Cc: Ravi Bangoria Cc: Sandipan Das Cc: Sean Christopherson Cc: Steinar H. Gunderson Cc: Suzuki Poulouse Cc: Thomas Gleixner Cc: Will Deacon Cc: Yang Jihong Cc: Yang Li Cc: Yanteng Si Link: https://lore.kernel.org/r/20231129060211.1890454-15-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/perf/include/perf/cpumap.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'tools') diff --git a/tools/lib/perf/include/perf/cpumap.h b/tools/lib/perf/include/perf/cpumap.h index dbe0a7352b64..228c6c629b0c 100644 --- a/tools/lib/perf/include/perf/cpumap.h +++ b/tools/lib/perf/include/perf/cpumap.h @@ -44,7 +44,18 @@ LIBPERF_API struct perf_cpu_map *perf_cpu_map__merge(struct perf_cpu_map *orig, LIBPERF_API struct perf_cpu_map *perf_cpu_map__intersect(struct perf_cpu_map *orig, struct perf_cpu_map *other); LIBPERF_API void perf_cpu_map__put(struct perf_cpu_map *map); +/** + * perf_cpu_map__cpu - get the CPU value at the given index. Returns -1 if index + * is invalid. + */ LIBPERF_API struct perf_cpu perf_cpu_map__cpu(const struct perf_cpu_map *cpus, int idx); +/** + * perf_cpu_map__nr - for an empty map returns 1, as perf_cpu_map__cpu returns a + * cpu of -1 for an invalid index, this makes an empty map + * look like it contains the "any CPU"/dummy value. Otherwise + * the result is the number CPUs in the map plus one if the + * "any CPU"/dummy value is present. + */ LIBPERF_API int perf_cpu_map__nr(const struct perf_cpu_map *cpus); /** * perf_cpu_map__has_any_cpu_or_is_empty - is map either empty or has the "any CPU"/dummy value. -- cgit v1.2.3 From 5cc47ffba7b71e37d65c259f7f5778b3622f1e8f Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Wed, 6 Dec 2023 17:16:35 -0800 Subject: perf map: Improve map/unmap parameter names The u64 values are either absolute or relative, try to hint better in the parameter names. Suggested-by: Namhyung Kim Signed-off-by: Ian Rogers Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Andi Kleen Cc: Athira Rajeev Cc: Changbin Du Cc: Colin Ian King Cc: Dmitrii Dolgov <9erthalion6@gmail.com> Cc: German Gomez Cc: Guilherme Amadio Cc: Huacai Chen Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: K Prateek Nayak Cc: Kajol Jain Cc: Kan Liang Cc: Leo Yan Cc: Li Dong Cc: Liam Howlett Cc: Mark Rutland Cc: Masami Hiramatsu Cc: Miguel Ojeda Cc: Ming Wang Cc: Nick Terrell Cc: Paolo Bonzini Cc: Peter Zijlstra Cc: Ravi Bangoria Cc: Sandipan Das Cc: Sean Christopherson Cc: Steinar H. Gunderson Cc: Vincent Whitchurch Cc: Wenyu Liu Cc: Yang Jihong Link: https://lore.kernel.org/r/20231207011722.1220634-2-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/map.h | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/map.h b/tools/perf/util/map.h index 3a3b7757da5f..49756716cb13 100644 --- a/tools/perf/util/map.h +++ b/tools/perf/util/map.h @@ -105,25 +105,25 @@ static inline u64 map__dso_map_ip(const struct map *map, u64 ip) } /* dso rip -> ip */ -static inline u64 map__dso_unmap_ip(const struct map *map, u64 ip) +static inline u64 map__dso_unmap_ip(const struct map *map, u64 rip) { - return ip + map__start(map) - map__pgoff(map); + return rip + map__start(map) - map__pgoff(map); } -static inline u64 map__map_ip(const struct map *map, u64 ip) +static inline u64 map__map_ip(const struct map *map, u64 ip_or_rip) { if ((RC_CHK_ACCESS(map)->mapping_type) == MAPPING_TYPE__DSO) - return map__dso_map_ip(map, ip); + return map__dso_map_ip(map, ip_or_rip); else - return ip; + return ip_or_rip; } -static inline u64 map__unmap_ip(const struct map *map, u64 ip) +static inline u64 map__unmap_ip(const struct map *map, u64 ip_or_rip) { if ((RC_CHK_ACCESS(map)->mapping_type) == MAPPING_TYPE__DSO) - return map__dso_unmap_ip(map, ip); + return map__dso_unmap_ip(map, ip_or_rip); else - return ip; + return ip_or_rip; } /* rip/ip <-> addr suitable for passing to `objdump --start-address=` */ -- cgit v1.2.3 From 19b5bd9a59bed4e9937092e2b685d69656bfc606 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Wed, 6 Dec 2023 17:16:36 -0800 Subject: perf maps: Add maps__for_each_map to iterate maps holding the lock The macro maps__for_each_entry is error prone as it doesn't require holding the maps lock. Add a new function that iterates the maps holding the read lock. Convert maps__find_symbol_by_name() and maps__fprintf() to use callbacks, the latter being an example of where the read lock wasn't being held. Signed-off-by: Ian Rogers Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Andi Kleen Cc: Athira Rajeev Cc: Changbin Du Cc: Colin Ian King Cc: Dmitrii Dolgov <9erthalion6@gmail.com> Cc: German Gomez Cc: Guilherme Amadio Cc: Huacai Chen Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: K Prateek Nayak Cc: Kajol Jain Cc: Kan Liang Cc: Leo Yan Cc: Li Dong Cc: Liam Howlett Cc: Mark Rutland Cc: Masami Hiramatsu Cc: Miguel Ojeda Cc: Ming Wang Cc: Namhyung Kim Cc: Nick Terrell Cc: Paolo Bonzini Cc: Peter Zijlstra Cc: Ravi Bangoria Cc: Sandipan Das Cc: Sean Christopherson Cc: Steinar H. Gunderson Cc: Vincent Whitchurch Cc: Wenyu Liu Cc: Yang Jihong Link: https://lore.kernel.org/r/20231207011722.1220634-3-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/maps.c | 99 +++++++++++++++++++++++++++++++++----------------- tools/perf/util/maps.h | 3 ++ 2 files changed, 69 insertions(+), 33 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/maps.c b/tools/perf/util/maps.c index 9a011aed4b75..160a6dce54bb 100644 --- a/tools/perf/util/maps.c +++ b/tools/perf/util/maps.c @@ -196,6 +196,21 @@ void maps__put(struct maps *maps) RC_CHK_PUT(maps); } +int maps__for_each_map(struct maps *maps, int (*cb)(struct map *map, void *data), void *data) +{ + struct map_rb_node *pos; + int ret = 0; + + down_read(maps__lock(maps)); + maps__for_each_entry(maps, pos) { + ret = cb(pos->map, data); + if (ret) + break; + } + up_read(maps__lock(maps)); + return ret; +} + struct symbol *maps__find_symbol(struct maps *maps, u64 addr, struct map **mapp) { struct map *map = maps__find(maps, addr); @@ -210,31 +225,40 @@ struct symbol *maps__find_symbol(struct maps *maps, u64 addr, struct map **mapp) return NULL; } -struct symbol *maps__find_symbol_by_name(struct maps *maps, const char *name, struct map **mapp) -{ +struct maps__find_symbol_by_name_args { + struct map **mapp; + const char *name; struct symbol *sym; - struct map_rb_node *pos; +}; - down_read(maps__lock(maps)); +static int maps__find_symbol_by_name_cb(struct map *map, void *data) +{ + struct maps__find_symbol_by_name_args *args = data; - maps__for_each_entry(maps, pos) { - sym = map__find_symbol_by_name(pos->map, name); + args->sym = map__find_symbol_by_name(map, args->name); + if (!args->sym) + return 0; - if (sym == NULL) - continue; - if (!map__contains_symbol(pos->map, sym)) { - sym = NULL; - continue; - } - if (mapp != NULL) - *mapp = pos->map; - goto out; + if (!map__contains_symbol(map, args->sym)) { + args->sym = NULL; + return 0; } - sym = NULL; -out: - up_read(maps__lock(maps)); - return sym; + if (args->mapp != NULL) + *args->mapp = map__get(map); + return 1; +} + +struct symbol *maps__find_symbol_by_name(struct maps *maps, const char *name, struct map **mapp) +{ + struct maps__find_symbol_by_name_args args = { + .mapp = mapp, + .name = name, + .sym = NULL, + }; + + maps__for_each_map(maps, maps__find_symbol_by_name_cb, &args); + return args.sym; } int maps__find_ams(struct maps *maps, struct addr_map_symbol *ams) @@ -253,25 +277,34 @@ int maps__find_ams(struct maps *maps, struct addr_map_symbol *ams) return ams->ms.sym ? 0 : -1; } -size_t maps__fprintf(struct maps *maps, FILE *fp) -{ - size_t printed = 0; - struct map_rb_node *pos; +struct maps__fprintf_args { + FILE *fp; + size_t printed; +}; - down_read(maps__lock(maps)); +static int maps__fprintf_cb(struct map *map, void *data) +{ + struct maps__fprintf_args *args = data; - maps__for_each_entry(maps, pos) { - printed += fprintf(fp, "Map:"); - printed += map__fprintf(pos->map, fp); - if (verbose > 2) { - printed += dso__fprintf(map__dso(pos->map), fp); - printed += fprintf(fp, "--\n"); - } + args->printed += fprintf(args->fp, "Map:"); + args->printed += map__fprintf(map, args->fp); + if (verbose > 2) { + args->printed += dso__fprintf(map__dso(map), args->fp); + args->printed += fprintf(args->fp, "--\n"); } + return 0; +} - up_read(maps__lock(maps)); +size_t maps__fprintf(struct maps *maps, FILE *fp) +{ + struct maps__fprintf_args args = { + .fp = fp, + .printed = 0, + }; + + maps__for_each_map(maps, maps__fprintf_cb, &args); - return printed; + return args.printed; } int maps__fixup_overlappings(struct maps *maps, struct map *map, FILE *fp) diff --git a/tools/perf/util/maps.h b/tools/perf/util/maps.h index a689149be8c4..14ad95979257 100644 --- a/tools/perf/util/maps.h +++ b/tools/perf/util/maps.h @@ -81,6 +81,9 @@ static inline void __maps__zput(struct maps **map) #define maps__zput(map) __maps__zput(&map) +/* Iterate over map calling cb for each entry. */ +int maps__for_each_map(struct maps *maps, int (*cb)(struct map *map, void *data), void *data); + static inline struct rb_root *maps__entries(struct maps *maps) { return &RC_CHK_ACCESS(maps)->entries; -- cgit v1.2.3 From bc4bc56d9d74f4593f253531edcea9ea8e30e7f1 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Wed, 6 Dec 2023 17:16:37 -0800 Subject: perf events x86: Use function to add missing lock Switch from loop macro maps__for_each_entry to maps__for_each_map function that takes a callback. The function holds the maps lock, which should be held during iteration. Signed-off-by: Ian Rogers Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Andi Kleen Cc: Athira Rajeev Cc: Changbin Du Cc: Colin Ian King Cc: Dmitrii Dolgov <9erthalion6@gmail.com> Cc: German Gomez Cc: Guilherme Amadio Cc: Huacai Chen Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: K Prateek Nayak Cc: Kajol Jain Cc: Kan Liang Cc: Leo Yan Cc: Li Dong Cc: Liam Howlett Cc: Mark Rutland Cc: Masami Hiramatsu Cc: Miguel Ojeda Cc: Ming Wang Cc: Namhyung Kim Cc: Nick Terrell Cc: Paolo Bonzini Cc: Peter Zijlstra Cc: Ravi Bangoria Cc: Sandipan Das Cc: Sean Christopherson Cc: Steinar H. Gunderson Cc: Vincent Whitchurch Cc: Wenyu Liu Cc: Yang Jihong Link: https://lore.kernel.org/r/20231207011722.1220634-4-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/x86/util/event.c | 103 ++++++++++++++++++++++----------------- 1 file changed, 58 insertions(+), 45 deletions(-) (limited to 'tools') diff --git a/tools/perf/arch/x86/util/event.c b/tools/perf/arch/x86/util/event.c index 5741ffe47312..e65b7dbe27fb 100644 --- a/tools/perf/arch/x86/util/event.c +++ b/tools/perf/arch/x86/util/event.c @@ -14,66 +14,79 @@ #if defined(__x86_64__) -int perf_event__synthesize_extra_kmaps(struct perf_tool *tool, - perf_event__handler_t process, - struct machine *machine) +struct perf_event__synthesize_extra_kmaps_cb_args { + struct perf_tool *tool; + perf_event__handler_t process; + struct machine *machine; + union perf_event *event; +}; + +static int perf_event__synthesize_extra_kmaps_cb(struct map *map, void *data) { - int rc = 0; - struct map_rb_node *pos; - struct maps *kmaps = machine__kernel_maps(machine); - union perf_event *event = zalloc(sizeof(event->mmap) + - machine->id_hdr_size); + struct perf_event__synthesize_extra_kmaps_cb_args *args = data; + union perf_event *event = args->event; + struct kmap *kmap; + size_t size; - if (!event) { - pr_debug("Not enough memory synthesizing mmap event " - "for extra kernel maps\n"); - return -1; - } + if (!__map__is_extra_kernel_map(map)) + return 0; - maps__for_each_entry(kmaps, pos) { - struct kmap *kmap; - size_t size; - struct map *map = pos->map; + kmap = map__kmap(map); - if (!__map__is_extra_kernel_map(map)) - continue; + size = sizeof(event->mmap) - sizeof(event->mmap.filename) + + PERF_ALIGN(strlen(kmap->name) + 1, sizeof(u64)) + + args->machine->id_hdr_size; - kmap = map__kmap(map); + memset(event, 0, size); - size = sizeof(event->mmap) - sizeof(event->mmap.filename) + - PERF_ALIGN(strlen(kmap->name) + 1, sizeof(u64)) + - machine->id_hdr_size; + event->mmap.header.type = PERF_RECORD_MMAP; - memset(event, 0, size); + /* + * kernel uses 0 for user space maps, see kernel/perf_event.c + * __perf_event_mmap + */ + if (machine__is_host(args->machine)) + event->header.misc = PERF_RECORD_MISC_KERNEL; + else + event->header.misc = PERF_RECORD_MISC_GUEST_KERNEL; - event->mmap.header.type = PERF_RECORD_MMAP; + event->mmap.header.size = size; - /* - * kernel uses 0 for user space maps, see kernel/perf_event.c - * __perf_event_mmap - */ - if (machine__is_host(machine)) - event->header.misc = PERF_RECORD_MISC_KERNEL; - else - event->header.misc = PERF_RECORD_MISC_GUEST_KERNEL; + event->mmap.start = map__start(map); + event->mmap.len = map__size(map); + event->mmap.pgoff = map__pgoff(map); + event->mmap.pid = args->machine->pid; - event->mmap.header.size = size; + strlcpy(event->mmap.filename, kmap->name, PATH_MAX); - event->mmap.start = map__start(map); - event->mmap.len = map__size(map); - event->mmap.pgoff = map__pgoff(map); - event->mmap.pid = machine->pid; + if (perf_tool__process_synth_event(args->tool, event, args->machine, args->process) != 0) + return -1; - strlcpy(event->mmap.filename, kmap->name, PATH_MAX); + return 0; +} - if (perf_tool__process_synth_event(tool, event, machine, - process) != 0) { - rc = -1; - break; - } +int perf_event__synthesize_extra_kmaps(struct perf_tool *tool, + perf_event__handler_t process, + struct machine *machine) +{ + int rc; + struct maps *kmaps = machine__kernel_maps(machine); + struct perf_event__synthesize_extra_kmaps_cb_args args = { + .tool = tool, + .process = process, + .machine = machine, + .event = zalloc(sizeof(args.event->mmap) + machine->id_hdr_size), + }; + + if (!args.event) { + pr_debug("Not enough memory synthesizing mmap event " + "for extra kernel maps\n"); + return -1; } - free(event); + rc = maps__for_each_map(kmaps, perf_event__synthesize_extra_kmaps_cb, &args); + + free(args.event); return rc; } -- cgit v1.2.3 From 431be14b193ad19946aeb26a6016dc5b8eb6a248 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Wed, 6 Dec 2023 17:16:38 -0800 Subject: perf report: Use function to add missing maps lock Switch maps__fprintf_task from loop macro maps__for_each_entry to maps__for_each_map function that takes a callback. The function holds the maps lock, which should be held during iteration. Signed-off-by: Ian Rogers Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Andi Kleen Cc: Athira Rajeev Cc: Changbin Du Cc: Colin Ian King Cc: Dmitrii Dolgov <9erthalion6@gmail.com> Cc: German Gomez Cc: Guilherme Amadio Cc: Huacai Chen Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: K Prateek Nayak Cc: Kajol Jain Cc: Kan Liang Cc: Leo Yan Cc: Li Dong Cc: Liam Howlett Cc: Mark Rutland Cc: Masami Hiramatsu Cc: Miguel Ojeda Cc: Ming Wang Cc: Namhyung Kim Cc: Nick Terrell Cc: Paolo Bonzini Cc: Peter Zijlstra Cc: Ravi Bangoria Cc: Sandipan Das Cc: Sean Christopherson Cc: Steinar H. Gunderson Cc: Vincent Whitchurch Cc: Wenyu Liu Cc: Yang Jihong Link: https://lore.kernel.org/r/20231207011722.1220634-5-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-report.c | 54 +++++++++++++++++++++++++++++++-------------- 1 file changed, 37 insertions(+), 17 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 17fb171e898b..178fb602bc98 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -856,27 +856,47 @@ static struct task *tasks_list(struct task *task, struct machine *machine) return tasks_list(parent_task, machine); } -static size_t maps__fprintf_task(struct maps *maps, int indent, FILE *fp) +struct maps__fprintf_task_args { + int indent; + FILE *fp; + size_t printed; +}; + +static int maps__fprintf_task_cb(struct map *map, void *data) { - size_t printed = 0; - struct map_rb_node *rb_node; + struct maps__fprintf_task_args *args = data; + const struct dso *dso = map__dso(map); + u32 prot = map__prot(map); + int ret; - maps__for_each_entry(maps, rb_node) { - struct map *map = rb_node->map; - const struct dso *dso = map__dso(map); - u32 prot = map__prot(map); + ret = fprintf(args->fp, + "%*s %" PRIx64 "-%" PRIx64 " %c%c%c%c %08" PRIx64 " %" PRIu64 " %s\n", + args->indent, "", map__start(map), map__end(map), + prot & PROT_READ ? 'r' : '-', + prot & PROT_WRITE ? 'w' : '-', + prot & PROT_EXEC ? 'x' : '-', + map__flags(map) ? 's' : 'p', + map__pgoff(map), + dso->id.ino, dso->name); - printed += fprintf(fp, "%*s %" PRIx64 "-%" PRIx64 " %c%c%c%c %08" PRIx64 " %" PRIu64 " %s\n", - indent, "", map__start(map), map__end(map), - prot & PROT_READ ? 'r' : '-', - prot & PROT_WRITE ? 'w' : '-', - prot & PROT_EXEC ? 'x' : '-', - map__flags(map) ? 's' : 'p', - map__pgoff(map), - dso->id.ino, dso->name); - } + if (ret < 0) + return ret; + + args->printed += ret; + return 0; +} + +static size_t maps__fprintf_task(struct maps *maps, int indent, FILE *fp) +{ + struct maps__fprintf_task_args args = { + .indent = indent, + .fp = fp, + .printed = 0, + }; + + maps__for_each_map(maps, maps__fprintf_task_cb, &args); - return printed; + return args.printed; } static void task__print_level(struct task *task, FILE *fp, int level) -- cgit v1.2.3 From b1928ca950386729b3bcd555efe559eaa1e2c8cc Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Wed, 6 Dec 2023 17:16:39 -0800 Subject: perf tests: Use function to add missing maps lock Switch loop macro maps__for_each_entry to maps__for_each_map function that takes a callback. The function holds the maps lock, which should be held during iteration. Signed-off-by: Ian Rogers Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Andi Kleen Cc: Athira Rajeev Cc: Changbin Du Cc: Colin Ian King Cc: Dmitrii Dolgov <9erthalion6@gmail.com> Cc: German Gomez Cc: Guilherme Amadio Cc: Huacai Chen Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: K Prateek Nayak Cc: Kajol Jain Cc: Kan Liang Cc: Leo Yan Cc: Li Dong Cc: Liam Howlett Cc: Mark Rutland Cc: Masami Hiramatsu Cc: Miguel Ojeda Cc: Ming Wang Cc: Namhyung Kim Cc: Nick Terrell Cc: Paolo Bonzini Cc: Peter Zijlstra Cc: Ravi Bangoria Cc: Sandipan Das Cc: Sean Christopherson Cc: Steinar H. Gunderson Cc: Vincent Whitchurch Cc: Wenyu Liu Cc: Yang Jihong Link: https://lore.kernel.org/r/20231207011722.1220634-6-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/maps.c | 61 +++++++----- tools/perf/tests/vmlinux-kallsyms.c | 181 +++++++++++++++++++----------------- 2 files changed, 136 insertions(+), 106 deletions(-) (limited to 'tools') diff --git a/tools/perf/tests/maps.c b/tools/perf/tests/maps.c index 5bb1123a91a7..bb3fbfe5a73e 100644 --- a/tools/perf/tests/maps.c +++ b/tools/perf/tests/maps.c @@ -14,44 +14,59 @@ struct map_def { u64 end; }; +struct check_maps_cb_args { + struct map_def *merged; + unsigned int i; +}; + +static int check_maps_cb(struct map *map, void *data) +{ + struct check_maps_cb_args *args = data; + struct map_def *merged = &args->merged[args->i]; + + if (map__start(map) != merged->start || + map__end(map) != merged->end || + strcmp(map__dso(map)->name, merged->name) || + refcount_read(map__refcnt(map)) != 1) { + return 1; + } + args->i++; + return 0; +} + +static int failed_cb(struct map *map, void *data __maybe_unused) +{ + pr_debug("\tstart: %" PRIu64 " end: %" PRIu64 " name: '%s' refcnt: %d\n", + map__start(map), + map__end(map), + map__dso(map)->name, + refcount_read(map__refcnt(map))); + + return 0; +} + static int check_maps(struct map_def *merged, unsigned int size, struct maps *maps) { - struct map_rb_node *rb_node; - unsigned int i = 0; bool failed = false; if (maps__nr_maps(maps) != size) { pr_debug("Expected %d maps, got %d", size, maps__nr_maps(maps)); failed = true; } else { - maps__for_each_entry(maps, rb_node) { - struct map *map = rb_node->map; - - if (map__start(map) != merged[i].start || - map__end(map) != merged[i].end || - strcmp(map__dso(map)->name, merged[i].name) || - refcount_read(map__refcnt(map)) != 1) { - failed = true; - } - i++; - } + struct check_maps_cb_args args = { + .merged = merged, + .i = 0, + }; + failed = maps__for_each_map(maps, check_maps_cb, &args); } if (failed) { pr_debug("Expected:\n"); - for (i = 0; i < size; i++) { + for (unsigned int i = 0; i < size; i++) { pr_debug("\tstart: %" PRIu64 " end: %" PRIu64 " name: '%s' refcnt: 1\n", merged[i].start, merged[i].end, merged[i].name); } pr_debug("Got:\n"); - maps__for_each_entry(maps, rb_node) { - struct map *map = rb_node->map; - - pr_debug("\tstart: %" PRIu64 " end: %" PRIu64 " name: '%s' refcnt: %d\n", - map__start(map), - map__end(map), - map__dso(map)->name, - refcount_read(map__refcnt(map))); - } + maps__for_each_map(maps, failed_cb, NULL); } return failed ? TEST_FAIL : TEST_OK; } diff --git a/tools/perf/tests/vmlinux-kallsyms.c b/tools/perf/tests/vmlinux-kallsyms.c index 1078a93b01aa..822f893e67d5 100644 --- a/tools/perf/tests/vmlinux-kallsyms.c +++ b/tools/perf/tests/vmlinux-kallsyms.c @@ -112,18 +112,92 @@ static bool is_ignored_symbol(const char *name, char type) return false; } +struct test__vmlinux_matches_kallsyms_cb_args { + struct machine kallsyms; + struct map *vmlinux_map; + bool header_printed; +}; + +static int test__vmlinux_matches_kallsyms_cb1(struct map *map, void *data) +{ + struct test__vmlinux_matches_kallsyms_cb_args *args = data; + struct dso *dso = map__dso(map); + /* + * If it is the kernel, kallsyms is always "[kernel.kallsyms]", while + * the kernel will have the path for the vmlinux file being used, so use + * the short name, less descriptive but the same ("[kernel]" in both + * cases. + */ + struct map *pair = maps__find_by_name(args->kallsyms.kmaps, + (dso->kernel ? dso->short_name : dso->name)); + + if (pair) + map__set_priv(pair, 1); + else { + if (!args->header_printed) { + pr_info("WARN: Maps only in vmlinux:\n"); + args->header_printed = true; + } + map__fprintf(map, stderr); + } + return 0; +} + +static int test__vmlinux_matches_kallsyms_cb2(struct map *map, void *data) +{ + struct test__vmlinux_matches_kallsyms_cb_args *args = data; + struct map *pair; + u64 mem_start = map__unmap_ip(args->vmlinux_map, map__start(map)); + u64 mem_end = map__unmap_ip(args->vmlinux_map, map__end(map)); + + pair = maps__find(args->kallsyms.kmaps, mem_start); + if (pair == NULL || map__priv(pair)) + return 0; + + if (map__start(pair) == mem_start) { + struct dso *dso = map__dso(map); + + if (!args->header_printed) { + pr_info("WARN: Maps in vmlinux with a different name in kallsyms:\n"); + args->header_printed = true; + } + + pr_info("WARN: %" PRIx64 "-%" PRIx64 " %" PRIx64 " %s in kallsyms as", + map__start(map), map__end(map), map__pgoff(map), dso->name); + if (mem_end != map__end(pair)) + pr_info(":\nWARN: *%" PRIx64 "-%" PRIx64 " %" PRIx64, + map__start(pair), map__end(pair), map__pgoff(pair)); + pr_info(" %s\n", dso->name); + map__set_priv(pair, 1); + } + return 0; +} + +static int test__vmlinux_matches_kallsyms_cb3(struct map *map, void *data) +{ + struct test__vmlinux_matches_kallsyms_cb_args *args = data; + + if (!map__priv(map)) { + if (!args->header_printed) { + pr_info("WARN: Maps only in kallsyms:\n"); + args->header_printed = true; + } + map__fprintf(map, stderr); + } + return 0; +} + static int test__vmlinux_matches_kallsyms(struct test_suite *test __maybe_unused, int subtest __maybe_unused) { int err = TEST_FAIL; struct rb_node *nd; struct symbol *sym; - struct map *kallsyms_map, *vmlinux_map; - struct map_rb_node *rb_node; - struct machine kallsyms, vmlinux; + struct map *kallsyms_map; + struct machine vmlinux; struct maps *maps; u64 mem_start, mem_end; - bool header_printed; + struct test__vmlinux_matches_kallsyms_cb_args args; /* * Step 1: @@ -131,7 +205,7 @@ static int test__vmlinux_matches_kallsyms(struct test_suite *test __maybe_unused * Init the machines that will hold kernel, modules obtained from * both vmlinux + .ko files and from /proc/kallsyms split by modules. */ - machine__init(&kallsyms, "", HOST_KERNEL_ID); + machine__init(&args.kallsyms, "", HOST_KERNEL_ID); machine__init(&vmlinux, "", HOST_KERNEL_ID); maps = machine__kernel_maps(&vmlinux); @@ -143,7 +217,7 @@ static int test__vmlinux_matches_kallsyms(struct test_suite *test __maybe_unused * load /proc/kallsyms. Also create the modules maps from /proc/modules * and find the .ko files that match them in /lib/modules/`uname -r`/. */ - if (machine__create_kernel_maps(&kallsyms) < 0) { + if (machine__create_kernel_maps(&args.kallsyms) < 0) { pr_debug("machine__create_kernel_maps failed"); err = TEST_SKIP; goto out; @@ -160,7 +234,7 @@ static int test__vmlinux_matches_kallsyms(struct test_suite *test __maybe_unused * be compacted against the list of modules found in the "vmlinux" * code and with the one got from /proc/modules from the "kallsyms" code. */ - if (machine__load_kallsyms(&kallsyms, "/proc/kallsyms") <= 0) { + if (machine__load_kallsyms(&args.kallsyms, "/proc/kallsyms") <= 0) { pr_debug("machine__load_kallsyms failed"); err = TEST_SKIP; goto out; @@ -174,7 +248,7 @@ static int test__vmlinux_matches_kallsyms(struct test_suite *test __maybe_unused * to see if the running kernel was relocated by checking if it has the * same value in the vmlinux file we load. */ - kallsyms_map = machine__kernel_map(&kallsyms); + kallsyms_map = machine__kernel_map(&args.kallsyms); /* * Step 5: @@ -186,7 +260,7 @@ static int test__vmlinux_matches_kallsyms(struct test_suite *test __maybe_unused goto out; } - vmlinux_map = machine__kernel_map(&vmlinux); + args.vmlinux_map = machine__kernel_map(&vmlinux); /* * Step 6: @@ -213,7 +287,7 @@ static int test__vmlinux_matches_kallsyms(struct test_suite *test __maybe_unused * in the kallsyms dso. For the ones that are in both, check its names and * end addresses too. */ - map__for_each_symbol(vmlinux_map, sym, nd) { + map__for_each_symbol(args.vmlinux_map, sym, nd) { struct symbol *pair, *first_pair; sym = rb_entry(nd, struct symbol, rb_node); @@ -221,10 +295,10 @@ static int test__vmlinux_matches_kallsyms(struct test_suite *test __maybe_unused if (sym->start == sym->end) continue; - mem_start = map__unmap_ip(vmlinux_map, sym->start); - mem_end = map__unmap_ip(vmlinux_map, sym->end); + mem_start = map__unmap_ip(args.vmlinux_map, sym->start); + mem_end = map__unmap_ip(args.vmlinux_map, sym->end); - first_pair = machine__find_kernel_symbol(&kallsyms, mem_start, NULL); + first_pair = machine__find_kernel_symbol(&args.kallsyms, mem_start, NULL); pair = first_pair; if (pair && UM(pair->start) == mem_start) { @@ -253,7 +327,8 @@ next_pair: */ continue; } else { - pair = machine__find_kernel_symbol_by_name(&kallsyms, sym->name, NULL); + pair = machine__find_kernel_symbol_by_name(&args.kallsyms, + sym->name, NULL); if (pair) { if (UM(pair->start) == mem_start) goto next_pair; @@ -267,7 +342,7 @@ next_pair: continue; } - } else if (mem_start == map__end(kallsyms.vmlinux_map)) { + } else if (mem_start == map__end(args.kallsyms.vmlinux_map)) { /* * Ignore aliases to _etext, i.e. to the end of the kernel text area, * such as __indirect_thunk_end. @@ -289,78 +364,18 @@ next_pair: if (verbose <= 0) goto out; - header_printed = false; - - maps__for_each_entry(maps, rb_node) { - struct map *map = rb_node->map; - struct dso *dso = map__dso(map); - /* - * If it is the kernel, kallsyms is always "[kernel.kallsyms]", while - * the kernel will have the path for the vmlinux file being used, - * so use the short name, less descriptive but the same ("[kernel]" in - * both cases. - */ - struct map *pair = maps__find_by_name(kallsyms.kmaps, (dso->kernel ? - dso->short_name : - dso->name)); - if (pair) { - map__set_priv(pair, 1); - } else { - if (!header_printed) { - pr_info("WARN: Maps only in vmlinux:\n"); - header_printed = true; - } - map__fprintf(map, stderr); - } - } - - header_printed = false; - - maps__for_each_entry(maps, rb_node) { - struct map *pair, *map = rb_node->map; - - mem_start = map__unmap_ip(vmlinux_map, map__start(map)); - mem_end = map__unmap_ip(vmlinux_map, map__end(map)); + args.header_printed = false; + maps__for_each_map(maps, test__vmlinux_matches_kallsyms_cb1, &args); - pair = maps__find(kallsyms.kmaps, mem_start); - if (pair == NULL || map__priv(pair)) - continue; - - if (map__start(pair) == mem_start) { - struct dso *dso = map__dso(map); - - if (!header_printed) { - pr_info("WARN: Maps in vmlinux with a different name in kallsyms:\n"); - header_printed = true; - } - - pr_info("WARN: %" PRIx64 "-%" PRIx64 " %" PRIx64 " %s in kallsyms as", - map__start(map), map__end(map), map__pgoff(map), dso->name); - if (mem_end != map__end(pair)) - pr_info(":\nWARN: *%" PRIx64 "-%" PRIx64 " %" PRIx64, - map__start(pair), map__end(pair), map__pgoff(pair)); - pr_info(" %s\n", dso->name); - map__set_priv(pair, 1); - } - } - - header_printed = false; - - maps = machine__kernel_maps(&kallsyms); + args.header_printed = false; + maps__for_each_map(maps, test__vmlinux_matches_kallsyms_cb2, &args); - maps__for_each_entry(maps, rb_node) { - struct map *map = rb_node->map; + args.header_printed = false; + maps = machine__kernel_maps(&args.kallsyms); + maps__for_each_map(maps, test__vmlinux_matches_kallsyms_cb3, &args); - if (!map__priv(map)) { - if (!header_printed) { - pr_info("WARN: Maps only in kallsyms:\n"); - header_printed = true; - } - map__fprintf(map, stderr); - } - } out: - machine__exit(&kallsyms); + machine__exit(&args.kallsyms); machine__exit(&vmlinux); return err; } -- cgit v1.2.3 From 2dc549b1dd495e7cdaa822a0af97365a8a1de840 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Wed, 6 Dec 2023 17:16:40 -0800 Subject: perf machine: Use function to add missing maps lock Switch machine__map_x86_64_entry_trampolines and machine__for_each_kernel_map from loop macro maps__for_each_entry to maps__for_each_map function that takes a callback. The function holds the maps lock, which should be held during iteration. Signed-off-by: Ian Rogers Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Andi Kleen Cc: Athira Rajeev Cc: Changbin Du Cc: Colin Ian King Cc: Dmitrii Dolgov <9erthalion6@gmail.com> Cc: German Gomez Cc: Guilherme Amadio Cc: Huacai Chen Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: K Prateek Nayak Cc: Kajol Jain Cc: Kan Liang Cc: Leo Yan Cc: Li Dong Cc: Liam Howlett Cc: Mark Rutland Cc: Masami Hiramatsu Cc: Miguel Ojeda Cc: Ming Wang Cc: Namhyung Kim Cc: Nick Terrell Cc: Paolo Bonzini Cc: Peter Zijlstra Cc: Ravi Bangoria Cc: Sandipan Das Cc: Sean Christopherson Cc: Steinar H. Gunderson Cc: Vincent Whitchurch Cc: Wenyu Liu Cc: Yang Jihong Link: https://lore.kernel.org/r/20231207011722.1220634-7-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/machine.c | 53 ++++++++++++++++++++++++++--------------------- 1 file changed, 29 insertions(+), 24 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index c5de5363b5e7..ca855fc435ac 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -1285,33 +1285,46 @@ static u64 find_entry_trampoline(struct dso *dso) #define X86_64_CPU_ENTRY_AREA_SIZE 0x2c000 #define X86_64_ENTRY_TRAMPOLINE 0x6000 +struct machine__map_x86_64_entry_trampolines_args { + struct maps *kmaps; + bool found; +}; + +static int machine__map_x86_64_entry_trampolines_cb(struct map *map, void *data) +{ + struct machine__map_x86_64_entry_trampolines_args *args = data; + struct map *dest_map; + struct kmap *kmap = __map__kmap(map); + + if (!kmap || !is_entry_trampoline(kmap->name)) + return 0; + + dest_map = maps__find(args->kmaps, map__pgoff(map)); + if (dest_map != map) + map__set_pgoff(map, map__map_ip(dest_map, map__pgoff(map))); + + args->found = true; + return 0; +} + /* Map x86_64 PTI entry trampolines */ int machine__map_x86_64_entry_trampolines(struct machine *machine, struct dso *kernel) { - struct maps *kmaps = machine__kernel_maps(machine); + struct machine__map_x86_64_entry_trampolines_args args = { + .kmaps = machine__kernel_maps(machine), + .found = false, + }; int nr_cpus_avail, cpu; - bool found = false; - struct map_rb_node *rb_node; u64 pgoff; /* * In the vmlinux case, pgoff is a virtual address which must now be * mapped to a vmlinux offset. */ - maps__for_each_entry(kmaps, rb_node) { - struct map *dest_map, *map = rb_node->map; - struct kmap *kmap = __map__kmap(map); - - if (!kmap || !is_entry_trampoline(kmap->name)) - continue; + maps__for_each_map(args.kmaps, machine__map_x86_64_entry_trampolines_cb, &args); - dest_map = maps__find(kmaps, map__pgoff(map)); - if (dest_map != map) - map__set_pgoff(map, map__map_ip(dest_map, map__pgoff(map))); - found = true; - } - if (found || machine->trampolines_mapped) + if (args.found || machine->trampolines_mapped) return 0; pgoff = find_entry_trampoline(kernel); @@ -3398,16 +3411,8 @@ int machine__for_each_dso(struct machine *machine, machine__dso_t fn, void *priv int machine__for_each_kernel_map(struct machine *machine, machine__map_t fn, void *priv) { struct maps *maps = machine__kernel_maps(machine); - struct map_rb_node *pos; - int err = 0; - maps__for_each_entry(maps, pos) { - err = fn(pos->map, priv); - if (err != 0) { - break; - } - } - return err; + return maps__for_each_map(maps, fn, priv); } bool machine__is_lock_function(struct machine *machine, u64 addr) -- cgit v1.2.3 From 300b53d5b819a33e2d4567cc35e1d92b4b49cd9c Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Wed, 6 Dec 2023 17:16:41 -0800 Subject: perf probe-event: Use function to add missing maps lock Switch kernel_get_module_map from loop macro maps__for_each_entry to maps__for_each_map function that takes a callback. The function holds the maps lock, which should be held during iteration. Signed-off-by: Ian Rogers Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Andi Kleen Cc: Athira Rajeev Cc: Changbin Du Cc: Colin Ian King Cc: Dmitrii Dolgov <9erthalion6@gmail.com> Cc: German Gomez Cc: Guilherme Amadio Cc: Huacai Chen Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: K Prateek Nayak Cc: Kajol Jain Cc: Kan Liang Cc: Leo Yan Cc: Li Dong Cc: Liam Howlett Cc: Mark Rutland Cc: Masami Hiramatsu Cc: Miguel Ojeda Cc: Ming Wang Cc: Namhyung Kim Cc: Nick Terrell Cc: Paolo Bonzini Cc: Peter Zijlstra Cc: Ravi Bangoria Cc: Sandipan Das Cc: Sean Christopherson Cc: Steinar H. Gunderson Cc: Vincent Whitchurch Cc: Wenyu Liu Cc: Yang Jihong Link: https://lore.kernel.org/r/20231207011722.1220634-8-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/probe-event.c | 40 ++++++++++++++++++++++++++-------------- 1 file changed, 26 insertions(+), 14 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index 1a5b7fa459b2..a1a796043691 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -149,10 +149,32 @@ static int kernel_get_symbol_address_by_name(const char *name, u64 *addr, return 0; } +struct kernel_get_module_map_cb_args { + const char *module; + struct map *result; +}; + +static int kernel_get_module_map_cb(struct map *map, void *data) +{ + struct kernel_get_module_map_cb_args *args = data; + struct dso *dso = map__dso(map); + const char *short_name = dso->short_name; /* short_name is "[module]" */ + u16 short_name_len = dso->short_name_len; + + if (strncmp(short_name + 1, args->module, short_name_len - 2) == 0 && + args->module[short_name_len - 2] == '\0') { + args->result = map__get(map); + return 1; + } + return 0; +} + static struct map *kernel_get_module_map(const char *module) { - struct maps *maps = machine__kernel_maps(host_machine); - struct map_rb_node *pos; + struct kernel_get_module_map_cb_args args = { + .module = module, + .result = NULL, + }; /* A file path -- this is an offline module */ if (module && strchr(module, '/')) @@ -164,19 +186,9 @@ static struct map *kernel_get_module_map(const char *module) return map__get(map); } - maps__for_each_entry(maps, pos) { - /* short_name is "[module]" */ - struct dso *dso = map__dso(pos->map); - const char *short_name = dso->short_name; - u16 short_name_len = dso->short_name_len; + maps__for_each_map(machine__kernel_maps(host_machine), kernel_get_module_map_cb, &args); - if (strncmp(short_name + 1, module, - short_name_len - 2) == 0 && - module[short_name_len - 2] == '\0') { - return map__get(pos->map); - } - } - return NULL; + return args.result; } struct map *get_target_map(const char *target, struct nsinfo *nsi, bool user) -- cgit v1.2.3 From 111350c67d15ffc284801509acdcf256d77876ca Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Wed, 6 Dec 2023 17:16:42 -0800 Subject: perf symbol: Use function to add missing maps lock Switch do_validate_kcore_modules from loop macro maps__for_each_entry to maps__for_each_map function that takes a callback. The function holds the maps lock, which should be held during iteration. Signed-off-by: Ian Rogers Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Andi Kleen Cc: Athira Rajeev Cc: Changbin Du Cc: Colin Ian King Cc: Dmitrii Dolgov <9erthalion6@gmail.com> Cc: German Gomez Cc: Guilherme Amadio Cc: Huacai Chen Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: K Prateek Nayak Cc: Kajol Jain Cc: Kan Liang Cc: Leo Yan Cc: Li Dong Cc: Liam Howlett Cc: Mark Rutland Cc: Masami Hiramatsu Cc: Miguel Ojeda Cc: Ming Wang Cc: Namhyung Kim Cc: Nick Terrell Cc: Paolo Bonzini Cc: Peter Zijlstra Cc: Ravi Bangoria Cc: Sandipan Das Cc: Sean Christopherson Cc: Steinar H. Gunderson Cc: Vincent Whitchurch Cc: Wenyu Liu Cc: Yang Jihong Link: https://lore.kernel.org/r/20231207011722.1220634-9-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/symbol.c | 36 +++++++++++++++++++----------------- 1 file changed, 19 insertions(+), 17 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 1cc42b8d8afb..72f03b875478 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -1114,33 +1114,35 @@ out_delete_from: return ret; } +static int do_validate_kcore_modules_cb(struct map *old_map, void *data) +{ + struct rb_root *modules = data; + struct module_info *mi; + struct dso *dso; + + if (!__map__is_kmodule(old_map)) + return 0; + + dso = map__dso(old_map); + /* Module must be in memory at the same address */ + mi = find_module(dso->short_name, modules); + if (!mi || mi->start != map__start(old_map)) + return -EINVAL; + + return 0; +} + static int do_validate_kcore_modules(const char *filename, struct maps *kmaps) { struct rb_root modules = RB_ROOT; - struct map_rb_node *old_node; int err; err = read_proc_modules(filename, &modules); if (err) return err; - maps__for_each_entry(kmaps, old_node) { - struct map *old_map = old_node->map; - struct module_info *mi; - struct dso *dso; + err = maps__for_each_map(kmaps, do_validate_kcore_modules_cb, &modules); - if (!__map__is_kmodule(old_map)) { - continue; - } - dso = map__dso(old_map); - /* Module must be in memory at the same address */ - mi = find_module(dso->short_name, &modules); - if (!mi || mi->start != map__start(old_map)) { - err = -EINVAL; - goto out; - } - } -out: delete_modules(&modules); return err; } -- cgit v1.2.3 From 228493d0a83bc2aec7f4a25491621f9d3b6d7bf2 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Wed, 6 Dec 2023 17:16:43 -0800 Subject: perf synthetic-events: Use function to add missing maps lock Switch perf_event__synthesize_modules from loop macro maps__for_each_entry to maps__for_each_map function that takes a callback. The function holds the maps lock, which should be held during iteration. Signed-off-by: Ian Rogers Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Andi Kleen Cc: Athira Rajeev Cc: Changbin Du Cc: Colin Ian King Cc: Dmitrii Dolgov <9erthalion6@gmail.com> Cc: German Gomez Cc: Guilherme Amadio Cc: Huacai Chen Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: K Prateek Nayak Cc: Kajol Jain Cc: Kan Liang Cc: Leo Yan Cc: Li Dong Cc: Liam Howlett Cc: Mark Rutland Cc: Masami Hiramatsu Cc: Miguel Ojeda Cc: Ming Wang Cc: Namhyung Kim Cc: Nick Terrell Cc: Paolo Bonzini Cc: Peter Zijlstra Cc: Ravi Bangoria Cc: Sandipan Das Cc: Sean Christopherson Cc: Steinar H. Gunderson Cc: Vincent Whitchurch Cc: Wenyu Liu Cc: Yang Jihong Link: https://lore.kernel.org/r/20231207011722.1220634-10-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/synthetic-events.c | 118 +++++++++++++++++++++---------------- 1 file changed, 67 insertions(+), 51 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/synthetic-events.c b/tools/perf/util/synthetic-events.c index a0579c7d7b9e..3712186353fb 100644 --- a/tools/perf/util/synthetic-events.c +++ b/tools/perf/util/synthetic-events.c @@ -665,18 +665,74 @@ int perf_event__synthesize_cgroups(struct perf_tool *tool __maybe_unused, } #endif +struct perf_event__synthesize_modules_maps_cb_args { + struct perf_tool *tool; + perf_event__handler_t process; + struct machine *machine; + union perf_event *event; +}; + +static int perf_event__synthesize_modules_maps_cb(struct map *map, void *data) +{ + struct perf_event__synthesize_modules_maps_cb_args *args = data; + union perf_event *event = args->event; + struct dso *dso; + size_t size; + + if (!__map__is_kmodule(map)) + return 0; + + dso = map__dso(map); + if (symbol_conf.buildid_mmap2) { + size = PERF_ALIGN(dso->long_name_len + 1, sizeof(u64)); + event->mmap2.header.type = PERF_RECORD_MMAP2; + event->mmap2.header.size = (sizeof(event->mmap2) - + (sizeof(event->mmap2.filename) - size)); + memset(event->mmap2.filename + size, 0, args->machine->id_hdr_size); + event->mmap2.header.size += args->machine->id_hdr_size; + event->mmap2.start = map__start(map); + event->mmap2.len = map__size(map); + event->mmap2.pid = args->machine->pid; + + memcpy(event->mmap2.filename, dso->long_name, dso->long_name_len + 1); + + perf_record_mmap2__read_build_id(&event->mmap2, args->machine, false); + } else { + size = PERF_ALIGN(dso->long_name_len + 1, sizeof(u64)); + event->mmap.header.type = PERF_RECORD_MMAP; + event->mmap.header.size = (sizeof(event->mmap) - + (sizeof(event->mmap.filename) - size)); + memset(event->mmap.filename + size, 0, args->machine->id_hdr_size); + event->mmap.header.size += args->machine->id_hdr_size; + event->mmap.start = map__start(map); + event->mmap.len = map__size(map); + event->mmap.pid = args->machine->pid; + + memcpy(event->mmap.filename, dso->long_name, dso->long_name_len + 1); + } + + if (perf_tool__process_synth_event(args->tool, event, args->machine, args->process) != 0) + return -1; + + return 0; +} + int perf_event__synthesize_modules(struct perf_tool *tool, perf_event__handler_t process, struct machine *machine) { - int rc = 0; - struct map_rb_node *pos; + int rc; struct maps *maps = machine__kernel_maps(machine); - union perf_event *event; - size_t size = symbol_conf.buildid_mmap2 ? - sizeof(event->mmap2) : sizeof(event->mmap); + struct perf_event__synthesize_modules_maps_cb_args args = { + .tool = tool, + .process = process, + .machine = machine, + }; + size_t size = symbol_conf.buildid_mmap2 + ? sizeof(args.event->mmap2) + : sizeof(args.event->mmap); - event = zalloc(size + machine->id_hdr_size); - if (event == NULL) { + args.event = zalloc(size + machine->id_hdr_size); + if (args.event == NULL) { pr_debug("Not enough memory synthesizing mmap event " "for kernel modules\n"); return -1; @@ -687,53 +743,13 @@ int perf_event__synthesize_modules(struct perf_tool *tool, perf_event__handler_t * __perf_event_mmap */ if (machine__is_host(machine)) - event->header.misc = PERF_RECORD_MISC_KERNEL; + args.event->header.misc = PERF_RECORD_MISC_KERNEL; else - event->header.misc = PERF_RECORD_MISC_GUEST_KERNEL; - - maps__for_each_entry(maps, pos) { - struct map *map = pos->map; - struct dso *dso; + args.event->header.misc = PERF_RECORD_MISC_GUEST_KERNEL; - if (!__map__is_kmodule(map)) - continue; + rc = maps__for_each_map(maps, perf_event__synthesize_modules_maps_cb, &args); - dso = map__dso(map); - if (symbol_conf.buildid_mmap2) { - size = PERF_ALIGN(dso->long_name_len + 1, sizeof(u64)); - event->mmap2.header.type = PERF_RECORD_MMAP2; - event->mmap2.header.size = (sizeof(event->mmap2) - - (sizeof(event->mmap2.filename) - size)); - memset(event->mmap2.filename + size, 0, machine->id_hdr_size); - event->mmap2.header.size += machine->id_hdr_size; - event->mmap2.start = map__start(map); - event->mmap2.len = map__size(map); - event->mmap2.pid = machine->pid; - - memcpy(event->mmap2.filename, dso->long_name, dso->long_name_len + 1); - - perf_record_mmap2__read_build_id(&event->mmap2, machine, false); - } else { - size = PERF_ALIGN(dso->long_name_len + 1, sizeof(u64)); - event->mmap.header.type = PERF_RECORD_MMAP; - event->mmap.header.size = (sizeof(event->mmap) - - (sizeof(event->mmap.filename) - size)); - memset(event->mmap.filename + size, 0, machine->id_hdr_size); - event->mmap.header.size += machine->id_hdr_size; - event->mmap.start = map__start(map); - event->mmap.len = map__size(map); - event->mmap.pid = machine->pid; - - memcpy(event->mmap.filename, dso->long_name, dso->long_name_len + 1); - } - - if (perf_tool__process_synth_event(tool, event, machine, process) != 0) { - rc = -1; - break; - } - } - - free(event); + free(args.event); return rc; } -- cgit v1.2.3 From 71225af17f611632226a4a2fae25235fd8dad268 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Wed, 6 Dec 2023 17:16:44 -0800 Subject: perf thread: Use function to add missing maps lock Switch thread__prepare_access from loop macro maps__for_each_entry to maps__for_each_map function that takes a callback. The function holds the maps lock, which should be held during iteration. Signed-off-by: Ian Rogers Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Andi Kleen Cc: Athira Rajeev Cc: Changbin Du Cc: Colin Ian King Cc: Dmitrii Dolgov <9erthalion6@gmail.com> Cc: German Gomez Cc: Guilherme Amadio Cc: Huacai Chen Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: K Prateek Nayak Cc: Kajol Jain Cc: Kan Liang Cc: Leo Yan Cc: Li Dong Cc: Liam Howlett Cc: Mark Rutland Cc: Masami Hiramatsu Cc: Miguel Ojeda Cc: Ming Wang Cc: Namhyung Kim Cc: Nick Terrell Cc: Paolo Bonzini Cc: Peter Zijlstra Cc: Ravi Bangoria Cc: Sandipan Das Cc: Sean Christopherson Cc: Steinar H. Gunderson Cc: Vincent Whitchurch Cc: Wenyu Liu Cc: Yang Jihong Link: https://lore.kernel.org/r/20231207011722.1220634-11-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/thread.c | 35 +++++++++++++++++------------------ 1 file changed, 17 insertions(+), 18 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c index b9c2039c4230..b6986a81aa6d 100644 --- a/tools/perf/util/thread.c +++ b/tools/perf/util/thread.c @@ -349,34 +349,33 @@ int thread__insert_map(struct thread *thread, struct map *map) return maps__insert(thread__maps(thread), map); } -static int __thread__prepare_access(struct thread *thread) +struct thread__prepare_access_maps_cb_args { + int err; + struct maps *maps; +}; + +static int thread__prepare_access_maps_cb(struct map *map, void *data) { bool initialized = false; - int err = 0; - struct maps *maps = thread__maps(thread); - struct map_rb_node *rb_node; - - down_read(maps__lock(maps)); - - maps__for_each_entry(maps, rb_node) { - err = unwind__prepare_access(thread__maps(thread), rb_node->map, &initialized); - if (err || initialized) - break; - } + struct thread__prepare_access_maps_cb_args *args = data; - up_read(maps__lock(maps)); + args->err = unwind__prepare_access(args->maps, map, &initialized); - return err; + return (args->err || initialized) ? 1 : 0; } static int thread__prepare_access(struct thread *thread) { - int err = 0; + struct thread__prepare_access_maps_cb_args args = { + .err = 0, + }; - if (dwarf_callchain_users) - err = __thread__prepare_access(thread); + if (dwarf_callchain_users) { + args.maps = thread__maps(thread); + maps__for_each_map(thread__maps(thread), thread__prepare_access_maps_cb, &args); + } - return err; + return args.err; } static int thread__clone_maps(struct thread *thread, struct thread *parent, bool do_maps_clone) -- cgit v1.2.3 From 9a802ddb2123e5adec394d35cd539cc0b15bc830 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 18 Dec 2023 23:39:32 +0000 Subject: kselftest/arm64: Don't probe the current VL for unsupported vector types The vec-syscfg selftest verifies that setting the VL of the currently tested vector type does not disrupt the VL of the other vector type. To do this it records the current vector length for each type but neglects to guard this with a check for that vector type actually being supported. Add one, using a helper function which we also update all the other instances of this pattern. Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20231218-kselftest-arm64-vec-syscfg-rdvl-v1-1-0ac22d47e81f@kernel.org Signed-off-by: Will Deacon --- tools/testing/selftests/arm64/fp/vec-syscfg.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/arm64/fp/vec-syscfg.c b/tools/testing/selftests/arm64/fp/vec-syscfg.c index 5f648b97a06f..ea9c7d47790f 100644 --- a/tools/testing/selftests/arm64/fp/vec-syscfg.c +++ b/tools/testing/selftests/arm64/fp/vec-syscfg.c @@ -66,6 +66,11 @@ static struct vec_data vec_data[] = { }, }; +static bool vec_type_supported(struct vec_data *data) +{ + return getauxval(data->hwcap_type) & data->hwcap; +} + static int stdio_read_integer(FILE *f, const char *what, int *val) { int n = 0; @@ -564,8 +569,11 @@ static void prctl_set_all_vqs(struct vec_data *data) return; } - for (i = 0; i < ARRAY_SIZE(vec_data); i++) + for (i = 0; i < ARRAY_SIZE(vec_data); i++) { + if (!vec_type_supported(&vec_data[i])) + continue; orig_vls[i] = vec_data[i].rdvl(); + } for (vq = SVE_VQ_MIN; vq <= SVE_VQ_MAX; vq++) { vl = sve_vl_from_vq(vq); @@ -594,7 +602,7 @@ static void prctl_set_all_vqs(struct vec_data *data) if (&vec_data[i] == data) continue; - if (!(getauxval(vec_data[i].hwcap_type) & vec_data[i].hwcap)) + if (!vec_type_supported(&vec_data[i])) continue; if (vec_data[i].rdvl() != orig_vls[i]) { @@ -765,7 +773,7 @@ int main(void) struct vec_data *data = &vec_data[i]; unsigned long supported; - supported = getauxval(data->hwcap_type) & data->hwcap; + supported = vec_type_supported(data); if (!supported) all_supported = false; -- cgit v1.2.3 From 2258b666482d3326aec8b72ec3e009a2aad9582c Mon Sep 17 00:00:00 2001 From: Liu Jian Date: Sat, 16 Dec 2023 15:52:19 +0800 Subject: selftests: add vlan hw filter tests Add one basic vlan hw filter test. Signed-off-by: Liu Jian Reviewed-by: Hangbin Liu Signed-off-by: Paolo Abeni --- tools/testing/selftests/net/Makefile | 1 + tools/testing/selftests/net/vlan_hw_filter.sh | 29 +++++++++++++++++++++++++++ 2 files changed, 30 insertions(+) create mode 100755 tools/testing/selftests/net/vlan_hw_filter.sh (limited to 'tools') diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index 5b2aca4c5f10..9e5bf59a20bf 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -91,6 +91,7 @@ TEST_PROGS += test_bridge_neigh_suppress.sh TEST_PROGS += test_vxlan_nolocalbypass.sh TEST_PROGS += test_bridge_backup_port.sh TEST_PROGS += fdb_flush.sh +TEST_PROGS += vlan_hw_filter.sh TEST_FILES := settings diff --git a/tools/testing/selftests/net/vlan_hw_filter.sh b/tools/testing/selftests/net/vlan_hw_filter.sh new file mode 100755 index 000000000000..7bc804ffaf7c --- /dev/null +++ b/tools/testing/selftests/net/vlan_hw_filter.sh @@ -0,0 +1,29 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 + +readonly NETNS="ns-$(mktemp -u XXXXXX)" + +ret=0 + +cleanup() { + ip netns del $NETNS +} + +trap cleanup EXIT + +fail() { + echo "ERROR: ${1:-unexpected return code} (ret: $_)" >&2 + ret=1 +} + +ip netns add ${NETNS} +ip netns exec ${NETNS} ip link add bond0 type bond mode 0 +ip netns exec ${NETNS} ip link add bond_slave_1 type veth peer veth2 +ip netns exec ${NETNS} ip link set bond_slave_1 master bond0 +ip netns exec ${NETNS} ethtool -K bond0 rx-vlan-filter off +ip netns exec ${NETNS} ip link add link bond_slave_1 name bond_slave_1.0 type vlan id 0 +ip netns exec ${NETNS} ip link add link bond0 name bond0.0 type vlan id 0 +ip netns exec ${NETNS} ip link set bond_slave_1 nomaster +ip netns exec ${NETNS} ip link del veth2 || fail "Please check vlan HW filter function" + +exit $ret -- cgit v1.2.3 From 3bf7009251f0f41cdd0188ab7b3879df81810703 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Wed, 13 Dec 2023 11:15:27 -0500 Subject: tracing/selftests: Add test to test the trace_marker Add a test that writes longs strings, some over the size of the sub buffer and make sure that the entire content is there. Link: https://lore.kernel.org/linux-trace-kernel/20231213111527.6a4c9b58@gandalf.local.home Cc: Masami Hiramatsu Cc: Mark Rutland Cc: Mathieu Desnoyers Acked-by: Shuah Khan Signed-off-by: Steven Rostedt (Google) --- .../ftrace/test.d/00basic/trace_marker.tc | 82 ++++++++++++++++++++++ 1 file changed, 82 insertions(+) create mode 100755 tools/testing/selftests/ftrace/test.d/00basic/trace_marker.tc (limited to 'tools') diff --git a/tools/testing/selftests/ftrace/test.d/00basic/trace_marker.tc b/tools/testing/selftests/ftrace/test.d/00basic/trace_marker.tc new file mode 100755 index 000000000000..9aa0db2b84fc --- /dev/null +++ b/tools/testing/selftests/ftrace/test.d/00basic/trace_marker.tc @@ -0,0 +1,82 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 +# description: Basic tests on writing to trace_marker +# requires: trace_marker +# flags: instance + +get_buffer_data_size() { + sed -ne 's/^.*data.*size:\([0-9][0-9]*\).*/\1/p' events/header_page +} + +get_buffer_data_offset() { + sed -ne 's/^.*data.*offset:\([0-9][0-9]*\).*/\1/p' events/header_page +} + +get_event_header_size() { + type_len=`sed -ne 's/^.*type_len.*:[^0-9]*\([0-9][0-9]*\).*/\1/p' events/header_event` + time_len=`sed -ne 's/^.*time_delta.*:[^0-9]*\([0-9][0-9]*\).*/\1/p' events/header_event` + array_len=`sed -ne 's/^.*array.*:[^0-9]*\([0-9][0-9]*\).*/\1/p' events/header_event` + total_bits=$((type_len+time_len+array_len)) + total_bits=$((total_bits+7)) + echo $((total_bits/8)) +} + +get_print_event_buf_offset() { + sed -ne 's/^.*buf.*offset:\([0-9][0-9]*\).*/\1/p' events/ftrace/print/format +} + +event_header_size=`get_event_header_size` +print_header_size=`get_print_event_buf_offset` + +data_offset=`get_buffer_data_offset` + +marker_meta=$((event_header_size+print_header_size)) + +make_str() { + cnt=$1 + # subtract two for \n\0 as marker adds these + cnt=$((cnt-2)) + printf -- 'X%.0s' $(seq $cnt) +} + +write_buffer() { + size=$1 + + str=`make_str $size` + + # clear the buffer + echo > trace + + # write the string into the marker + echo -n $str > trace_marker + + echo $str +} + +test_buffer() { + + size=`get_buffer_data_size` + oneline_size=$((size-marker_meta)) + echo size = $size + echo meta size = $marker_meta + + # Now add a little more the meta data overhead will overflow + + str=`write_buffer $size` + + # Make sure the line was broken + new_str=`awk ' /tracing_mark_write:/ { sub(/^.*tracing_mark_write: /,"");printf "%s", $0; exit}' trace` + + if [ "$new_str" = "$str" ]; then + exit fail; + fi + + # Make sure the entire line can be found + new_str=`awk ' /tracing_mark_write:/ { sub(/^.*tracing_mark_write: /,"");printf "%s", $0; }' trace` + + if [ "$new_str" != "$str" ]; then + exit fail; + fi +} + +test_buffer -- cgit v1.2.3 From 624dda101e03c3a3a155d51e37a7bb7607cb760b Mon Sep 17 00:00:00 2001 From: Veronika Molnarova Date: Tue, 12 Dec 2023 17:59:08 +0100 Subject: perf archive: Add new option '--all' to pack perf.data with DSOs 'perf archive' has limited functionality and people from Red Hat Global Support Services sent a request for a new feature that would pack perf.data file together with an archive with debug symbols created by the command 'perf archive' as customers were being confused and often would forget to send perf.data file with the debug symbols. With this patch 'perf archive' now accepts an option '--all' that generates archive 'perf.all-hostname-date-time.tar.bz2' that holds file 'perf.data' and a sub-tar 'perf.symbols.tar.bz2' with debug symbols. The functionality of the command 'perf archive' was not changed. Committer testing: Run 'perf record' on a Intel 14900K machine, hybrid: root@number:~# perf record -a sleep 5s [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 4.006 MB perf.data (15427 samples) ] root@number:~# perf archive --all Now please run: $ tar xvf perf.all-number-20231219-104854.tar.bz2 && tar xvf perf.symbols.tar.bz2 -C ~/.debug wherever you need to run 'perf report' on. root@number:~# root@number:~# perf report --header-only # ======== # captured on : Tue Dec 19 10:48:48 2023 # header version : 1 # data offset : 1008 # data size : 4199936 # feat offset : 4200944 # hostname : number # os release : 6.6.4-200.fc39.x86_64 # perf version : 6.7.rc6.gca90f8e17b84 # arch : x86_64 # nrcpus online : 28 # nrcpus avail : 28 # cpudesc : Intel(R) Core(TM) i7-14700K # cpuid : GenuineIntel,6,183,1 # total memory : 32610508 kB # cmdline : /home/acme/bin/perf (deleted) record -a sleep 5s # event : name = cpu_atom/cycles/P, , id = { 5088024, 5088025, 5088026, 5088027, 5088028, 5088029, 5088030, 5088031, 5088032, 5088033, 5088034, 5088035 }, type = 0 (PERF_TYPE_HARDWARE), size> # event : name = cpu_core/cycles/P, , id = { 5088036, 5088037, 5088038, 5088039, 5088040, 5088041, 5088042, 5088043, 5088044, 5088045, 5088046, 5088047, 5088048, 5088049, 5088050, 5088051 },> # event : name = dummy:u, , id = { 5088052, 5088053, 5088054, 5088055, 5088056, 5088057, 5088058, 5088059, 5088060, 5088061, 5088062, 5088063, 5088064, 5088065, 5088066, 5088067, 5088068, 50> # CPU_TOPOLOGY info available, use -I to display # NUMA_TOPOLOGY info available, use -I to display # pmu mappings: cpu_atom = 10, cpu_core = 4, breakpoint = 5, cstate_core = 34, cstate_pkg = 35, i915 = 14, intel_bts = 11, intel_pt = 12, kprobe = 8, msr = 13, power = 36, software = 1, trac> # CACHE info available, use -I to display # time of first sample : 124739.850375 # time of last sample : 124744.855181 # sample duration : 5004.806 ms # sample duration : 5004.806 ms # MEM_TOPOLOGY info available, use -I to display # bpf_prog_info 2: bpf_prog_7cc47bbf07148bfe_hid_tail_call addr 0xffffffffc0000978 size 113 # bpf_prog_info 47: bpf_prog_713a545fe0530ce7_restrict_filesystems addr 0xffffffffc0000748 size 305 # bpf_prog_info 163: bpf_prog_bd834b0730296056 addr 0xffffffffc000df14 size 331 # bpf_prog_info 258: bpf_prog_ee0e253c78993a24_sd_devices addr 0xffffffffc001fc08 size 264 # bpf_prog_info 259: bpf_prog_40ddf486530245f5_sd_devices addr 0xffffffffc00204bc size 318 # bpf_prog_info 260: bpf_prog_6deef7357e7b4530_sd_fw_egress addr 0xffffffffc0020630 size 63 # bpf_prog_info 261: bpf_prog_6deef7357e7b4530_sd_fw_ingress addr 0xffffffffc0020688 size 63 # bpf_prog_info 262: bpf_prog_b37200ab714f0e17_sd_devices addr 0xffffffffc002072c size 110 # bpf_prog_info 263: bpf_prog_b90a282ee45cfed9_sd_devices addr 0xffffffffc00207d8 size 393 # bpf_prog_info 264: bpf_prog_ee0e253c78993a24_sd_devices addr 0xffffffffc002099c size 264 # bpf_prog_info 265: bpf_prog_6deef7357e7b4530_sd_fw_egress addr 0xffffffffc0020ad4 size 63 # bpf_prog_info 266: bpf_prog_6deef7357e7b4530_sd_fw_ingress addr 0xffffffffc0020b50 size 63 # bpf_prog_info 267: bpf_prog_ee0e253c78993a24_sd_devices addr 0xffffffffc002d98c size 264 # bpf_prog_info 268: bpf_prog_be31ae23198a0378_sd_devices addr 0xffffffffc002dac8 size 297 # bpf_prog_info 269: bpf_prog_ccbbf91f3c6979c7_sd_devices addr 0xffffffffc002dc54 size 360 # bpf_prog_info 270: bpf_prog_3a0ef5414c2f6fca_sd_devices addr 0xffffffffc002dde8 size 456 # bpf_prog_info 271: bpf_prog_6deef7357e7b4530_sd_fw_egress addr 0xffffffffc0020bd4 size 63 # bpf_prog_info 272: bpf_prog_6deef7357e7b4530_sd_fw_ingress addr 0xffffffffc00299b4 size 63 # bpf_prog_info 273: bpf_prog_ee0e253c78993a24_sd_devices addr 0xffffffffc002dfd0 size 264 # bpf_prog_info 274: bpf_prog_6deef7357e7b4530_sd_fw_egress addr 0xffffffffc0029a3c size 63 # bpf_prog_info 275: bpf_prog_6deef7357e7b4530_sd_fw_ingress addr 0xffffffffc002d71c size 63 # bpf_prog_info 276: bpf_prog_6deef7357e7b4530_sd_fw_egress addr 0xffffffffc002d7a8 size 63 # bpf_prog_info 277: bpf_prog_6deef7357e7b4530_sd_fw_ingress addr 0xffffffffc002e13c size 63 # bpf_prog_info 278: bpf_prog_6deef7357e7b4530_sd_fw_egress addr 0xffffffffc002e1a8 size 63 # bpf_prog_info 279: bpf_prog_6deef7357e7b4530_sd_fw_ingress addr 0xffffffffc002e234 size 63 # bpf_prog_info 280: bpf_prog_be31ae23198a0378_sd_devices addr 0xffffffffc002e2ac size 297 # bpf_prog_info 281: bpf_prog_6deef7357e7b4530_sd_fw_egress addr 0xffffffffc002e42c size 63 # bpf_prog_info 282: bpf_prog_6deef7357e7b4530_sd_fw_ingress addr 0xffffffffc002e49c size 63 # bpf_prog_info 290: bpf_prog_ee0e253c78993a24_sd_devices addr 0xffffffffc0004b18 size 264 # bpf_prog_info 294: bpf_prog_0b1566e4b83190c5_sd_devices addr 0xffffffffc0004c50 size 360 # bpf_prog_info 295: bpf_prog_ee0e253c78993a24_sd_devices addr 0xffffffffc001cfc8 size 264 # bpf_prog_info 296: bpf_prog_6deef7357e7b4530_sd_fw_egress addr 0xffffffffc0013abc size 63 # bpf_prog_info 297: bpf_prog_6deef7357e7b4530_sd_fw_ingress addr 0xffffffffc0013b24 size 63 # btf info of id 2 # btf info of id 52 # HYBRID_TOPOLOGY info available, use -I to display # cpu_atom pmu capabilities: branches=32, max_precise=3, pmu_name=alderlake_hybrid # cpu_core pmu capabilities: branches=32, max_precise=3, pmu_name=alderlake_hybrid # intel_pt pmu capabilities: topa_multiple_entries=1, psb_cyc=1, single_range_output=1, mtc_periods=249, ip_filtering=1, output_subsys=0, cr3_filtering=1, psb_periods=3f, event_trace=0, cycl> # missing features: TRACING_DATA BRANCH_STACK GROUP_DESC AUXTRACE STAT CLOCKID DIR_FORMAT COMPRESSED CPU_PMU_CAPS CLOCK_DATA # ======== # root@number:~# And then transferring it to a ARM64 machine, a Libre Computer RK3399-PC: root@number:~# scp perf.all-number-20231219-104854.tar.bz2 acme@192.168.86.114:. acme@192.168.86.114's password: perf.all-number-20231219-104854.tar.bz2 100% 145MB 85.4MB/s 00:01 root@number:~# root@number:~# ssh acme@192.168.86.114 acme@192.168.86.114's password: Welcome to Ubuntu 23.04 (GNU/Linux 6.1.68-12200-g1c40dda3081e aarch64) * Documentation: https://help.ubuntu.com * Management: https://landscape.canonical.com * Support: https://ubuntu.com/advantage Last login: Tue Dec 19 14:53:18 2023 from 192.168.86.42 acme@roc-rk3399-pc:~$ tar xvf perf.all-number-20231219-104854.tar.bz2 && tar xvf perf.symbols.tar.bz2 -C ~/.debug perf.data perf.symbols.tar.bz2 .build-id/ad/acc227f470409213308050b71f664322e2956c [kernel.kallsyms]/adacc227f470409213308050b71f664322e2956c/ [kernel.kallsyms]/adacc227f470409213308050b71f664322e2956c/kallsyms [kernel.kallsyms]/adacc227f470409213308050b71f664322e2956c/probes .build-id/76/c91f4d62baa06bb52e07e20aba36d21a8f9797 usr/lib64/libz.so.1.2.13/76c91f4d62baa06bb52e07e20aba36d21a8f9797/ .build-id/09/d7e96bc1e3f599d15ca28b36959124b2d74410 usr/lib64/librpm_sequoia.so.1/09d7e96bc1e3f599d15ca28b36959124b2d74410/ usr/lib64/librpm_sequoia.so.1/09d7e96bc1e3f599d15ca28b36959124b2d74410/elf usr/lib64/librpm_sequoia.so.1/09d7e96bc1e3f599d15ca28b36959124b2d74410/probes acme@roc-rk3399-pc:~$ acme@roc-rk3399-pc:~$ perf report --stdio | head -40 # To display the perf.data header info, please use --header/--header-only options. # # Total Lost Samples: 0 # # Samples: 6K of event 'cpu_atom/cycles/P' # Event count (approx.): 4519946621 # # Overhead Command Shared Object Symbol # ........ ............... .............................................. ......................................................................................................................................................... # 1.73% swapper [kernel.kallsyms] [k] intel_idle 1.43% sh [kernel.kallsyms] [k] next_uptodate_folio 0.94% make ld-linux-x86-64.so.2 [.] do_lookup_x 0.90% sh ld-linux-x86-64.so.2 [.] do_lookup_x 0.82% sh [kernel.kallsyms] [k] perf_event_mmap_output 0.74% sh [kernel.kallsyms] [k] filemap_map_pages 0.72% sh ld-linux-x86-64.so.2 [.] _dl_relocate_object 0.69% cc1 [kernel.kallsyms] [k] clear_page_erms 0.61% sh [kernel.kallsyms] [k] unmap_page_range 0.56% swapper [kernel.kallsyms] [k] poll_idle 0.52% cc1 ld-linux-x86-64.so.2 [.] do_lookup_x 0.47% make ld-linux-x86-64.so.2 [.] _dl_relocate_object 0.44% cc1 cc1 [.] make_node(tree_code) 0.43% sh [kernel.kallsyms] [k] native_irq_return_iret 0.38% sh libc.so.6 [.] _int_malloc 0.38% cc1 cc1 [.] decl_attributes(tree_node**, tree_node*, int, tree_node*) 0.38% sh [kernel.kallsyms] [k] clear_page_erms 0.37% cc1 cc1 [.] ht_lookup_with_hash(ht*, unsigned char const*, unsigned long, unsigned int, ht_lookup_option) 0.37% make [kernel.kallsyms] [k] perf_event_mmap_output 0.37% make ld-linux-x86-64.so.2 [.] _dl_lookup_symbol_x 0.35% sh [kernel.kallsyms] [k] _compound_head 0.35% make make [.] hash_find_slot 0.33% sh libc.so.6 [.] __strlen_avx2 0.33% cc1 cc1 [.] ggc_internal_alloc(unsigned long, void (*)(void*), unsigned long, unsigned long) 0.33% sh [kernel.kallsyms] [k] perf_iterate_ctx 0.31% make make [.] jhash_string 0.31% sh [kernel.kallsyms] [k] page_remove_rmap 0.30% cc1 libc.so.6 [.] _int_malloc 0.30% make libc.so.6 [.] _int_malloc acme@roc-rk3399-pc:~$ Signed-off-by: Veronika Molnarova Tested-by: Arnaldo Carvalho de Melo Cc: Michael Petlan Link: https://lore.kernel.org/r/20231212165909.14459-1-vmolnaro@redhat.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/perf-archive.sh | 40 ++++++++++++++++++++++++++++++++-------- 1 file changed, 32 insertions(+), 8 deletions(-) mode change 100644 => 100755 tools/perf/perf-archive.sh (limited to 'tools') diff --git a/tools/perf/perf-archive.sh b/tools/perf/perf-archive.sh old mode 100644 new mode 100755 index 133f0eddbcc4..a92042eae95a --- a/tools/perf/perf-archive.sh +++ b/tools/perf/perf-archive.sh @@ -4,9 +4,19 @@ # Arnaldo Carvalho de Melo PERF_DATA=perf.data -if [ $# -ne 0 ] ; then - PERF_DATA=$1 -fi +PERF_SYMBOLS=perf.symbols +PERF_ALL=perf.all +ALL=0 + +while [ $# -gt 0 ] ; do + if [ $1 == "--all" ]; then + ALL=1 + shift + else + PERF_DATA=$1 + shift + fi +done # # PERF_BUILDID_DIR environment variable set by perf @@ -39,9 +49,23 @@ while read build_id ; do echo ${filename#$PERF_BUILDID_LINKDIR} >> $MANIFEST done -tar cjf $PERF_DATA.tar.bz2 -C $PERF_BUILDID_DIR -T $MANIFEST -rm $MANIFEST $BUILDIDS || true -echo -e "Now please run:\n" -echo -e "$ tar xvf $PERF_DATA.tar.bz2 -C ~/.debug\n" -echo "wherever you need to run 'perf report' on." +if [ $ALL -eq 1 ]; then # pack perf.data file together with tar containing debug symbols + HOSTNAME=$(hostname) + DATE=$(date '+%Y%m%d-%H%M%S') + tar cjf $PERF_SYMBOLS.tar.bz2 -C $PERF_BUILDID_DIR -T $MANIFEST + tar cjf $PERF_ALL-$HOSTNAME-$DATE.tar.bz2 $PERF_DATA $PERF_SYMBOLS.tar.bz2 + rm $PERF_SYMBOLS.tar.bz2 $MANIFEST $BUILDIDS || true + + echo -e "Now please run:\n" + echo -e "$ tar xvf $PERF_ALL-$HOSTNAME-$DATE.tar.bz2 && tar xvf $PERF_SYMBOLS.tar.bz2 -C ~/.debug\n" + echo "wherever you need to run 'perf report' on." +else # pack only the debug symbols + tar cjf $PERF_DATA.tar.bz2 -C $PERF_BUILDID_DIR -T $MANIFEST + rm $MANIFEST $BUILDIDS || true + + echo -e "Now please run:\n" + echo -e "$ tar xvf $PERF_DATA.tar.bz2 -C ~/.debug\n" + echo "wherever you need to run 'perf report' on." +fi + exit 0 -- cgit v1.2.3 From d17aff807f845cf93926c28705216639c7279110 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Tue, 19 Dec 2023 07:37:35 -0800 Subject: Revert BPF token-related functionality This patch includes the following revert (one conflicting BPF FS patch and three token patch sets, represented by merge commits): - revert 0f5d5454c723 "Merge branch 'bpf-fs-mount-options-parsing-follow-ups'"; - revert 750e785796bb "bpf: Support uid and gid when mounting bpffs"; - revert 733763285acf "Merge branch 'bpf-token-support-in-libbpf-s-bpf-object'"; - revert c35919dcce28 "Merge branch 'bpf-token-and-bpf-fs-based-delegation'". Link: https://lore.kernel.org/bpf/CAHk-=wg7JuFYwGy=GOMbRCtOL+jwSQsdUaBsRWkDVYbxipbM5A@mail.gmail.com Signed-off-by: Andrii Nakryiko --- drivers/media/rc/bpf-lirc.c | 2 +- include/linux/bpf.h | 85 +- include/linux/filter.h | 2 +- include/linux/lsm_hook_defs.h | 15 +- include/linux/security.h | 43 +- include/uapi/linux/bpf.h | 42 - kernel/bpf/Makefile | 2 +- kernel/bpf/arraymap.c | 2 +- kernel/bpf/bpf_lsm.c | 15 +- kernel/bpf/cgroup.c | 6 +- kernel/bpf/core.c | 3 +- kernel/bpf/helpers.c | 6 +- kernel/bpf/inode.c | 326 +------ kernel/bpf/syscall.c | 215 ++-- kernel/bpf/token.c | 271 ----- kernel/bpf/verifier.c | 13 +- kernel/trace/bpf_trace.c | 2 +- net/core/filter.c | 36 +- net/ipv4/bpf_tcp_ca.c | 2 +- net/netfilter/nf_bpf_link.c | 2 +- security/security.c | 101 +- security/selinux/hooks.c | 47 +- tools/include/uapi/linux/bpf.h | 42 - tools/lib/bpf/Build | 2 +- tools/lib/bpf/bpf.c | 37 +- tools/lib/bpf/bpf.h | 35 +- tools/lib/bpf/btf.c | 7 +- tools/lib/bpf/elf.c | 2 + tools/lib/bpf/features.c | 478 --------- tools/lib/bpf/libbpf.c | 573 ++++++++--- tools/lib/bpf/libbpf.h | 37 +- tools/lib/bpf/libbpf.map | 1 - tools/lib/bpf/libbpf_internal.h | 36 +- tools/lib/bpf/libbpf_probes.c | 8 +- tools/lib/bpf/str_error.h | 3 - .../selftests/bpf/prog_tests/libbpf_probes.c | 4 - .../testing/selftests/bpf/prog_tests/libbpf_str.c | 6 - tools/testing/selftests/bpf/prog_tests/token.c | 1031 -------------------- tools/testing/selftests/bpf/progs/priv_map.c | 13 - tools/testing/selftests/bpf/progs/priv_prog.c | 13 - 40 files changed, 641 insertions(+), 2925 deletions(-) delete mode 100644 kernel/bpf/token.c delete mode 100644 tools/lib/bpf/features.c delete mode 100644 tools/testing/selftests/bpf/prog_tests/token.c delete mode 100644 tools/testing/selftests/bpf/progs/priv_map.c delete mode 100644 tools/testing/selftests/bpf/progs/priv_prog.c (limited to 'tools') diff --git a/drivers/media/rc/bpf-lirc.c b/drivers/media/rc/bpf-lirc.c index 6d07693c6b9f..fe17c7f98e81 100644 --- a/drivers/media/rc/bpf-lirc.c +++ b/drivers/media/rc/bpf-lirc.c @@ -110,7 +110,7 @@ lirc_mode2_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) case BPF_FUNC_get_prandom_u32: return &bpf_get_prandom_u32_proto; case BPF_FUNC_trace_printk: - if (bpf_token_capable(prog->aux->token, CAP_PERFMON)) + if (perfmon_capable()) return bpf_get_trace_printk_proto(); fallthrough; default: diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 2f54cc0436c4..7a8d4c81a39a 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -52,10 +52,6 @@ struct module; struct bpf_func_state; struct ftrace_ops; struct cgroup; -struct bpf_token; -struct user_namespace; -struct super_block; -struct inode; extern struct idr btf_idr; extern spinlock_t btf_idr_lock; @@ -1488,7 +1484,6 @@ struct bpf_prog_aux { #ifdef CONFIG_SECURITY void *security; #endif - struct bpf_token *token; struct bpf_prog_offload *offload; struct btf *btf; struct bpf_func_info *func_info; @@ -1613,31 +1608,6 @@ struct bpf_link_primer { u32 id; }; -struct bpf_mount_opts { - kuid_t uid; - kgid_t gid; - umode_t mode; - - /* BPF token-related delegation options */ - u64 delegate_cmds; - u64 delegate_maps; - u64 delegate_progs; - u64 delegate_attachs; -}; - -struct bpf_token { - struct work_struct work; - atomic64_t refcnt; - struct user_namespace *userns; - u64 allowed_cmds; - u64 allowed_maps; - u64 allowed_progs; - u64 allowed_attachs; -#ifdef CONFIG_SECURITY - void *security; -#endif -}; - struct bpf_struct_ops_value; struct btf_member; @@ -2097,7 +2067,6 @@ static inline void bpf_enable_instrumentation(void) migrate_enable(); } -extern const struct super_operations bpf_super_ops; extern const struct file_operations bpf_map_fops; extern const struct file_operations bpf_prog_fops; extern const struct file_operations bpf_iter_fops; @@ -2232,26 +2201,24 @@ static inline void bpf_map_dec_elem_count(struct bpf_map *map) extern int sysctl_unprivileged_bpf_disabled; -bool bpf_token_capable(const struct bpf_token *token, int cap); - -static inline bool bpf_allow_ptr_leaks(const struct bpf_token *token) +static inline bool bpf_allow_ptr_leaks(void) { - return bpf_token_capable(token, CAP_PERFMON); + return perfmon_capable(); } -static inline bool bpf_allow_uninit_stack(const struct bpf_token *token) +static inline bool bpf_allow_uninit_stack(void) { - return bpf_token_capable(token, CAP_PERFMON); + return perfmon_capable(); } -static inline bool bpf_bypass_spec_v1(const struct bpf_token *token) +static inline bool bpf_bypass_spec_v1(void) { - return cpu_mitigations_off() || bpf_token_capable(token, CAP_PERFMON); + return cpu_mitigations_off() || perfmon_capable(); } -static inline bool bpf_bypass_spec_v4(const struct bpf_token *token) +static inline bool bpf_bypass_spec_v4(void) { - return cpu_mitigations_off() || bpf_token_capable(token, CAP_PERFMON); + return cpu_mitigations_off() || perfmon_capable(); } int bpf_map_new_fd(struct bpf_map *map, int flags); @@ -2268,21 +2235,8 @@ int bpf_link_new_fd(struct bpf_link *link); struct bpf_link *bpf_link_get_from_fd(u32 ufd); struct bpf_link *bpf_link_get_curr_or_next(u32 *id); -void bpf_token_inc(struct bpf_token *token); -void bpf_token_put(struct bpf_token *token); -int bpf_token_create(union bpf_attr *attr); -struct bpf_token *bpf_token_get_from_fd(u32 ufd); - -bool bpf_token_allow_cmd(const struct bpf_token *token, enum bpf_cmd cmd); -bool bpf_token_allow_map_type(const struct bpf_token *token, enum bpf_map_type type); -bool bpf_token_allow_prog_type(const struct bpf_token *token, - enum bpf_prog_type prog_type, - enum bpf_attach_type attach_type); - int bpf_obj_pin_user(u32 ufd, int path_fd, const char __user *pathname); int bpf_obj_get_user(int path_fd, const char __user *pathname, int flags); -struct inode *bpf_get_inode(struct super_block *sb, const struct inode *dir, - umode_t mode); #define BPF_ITER_FUNC_PREFIX "bpf_iter_" #define DEFINE_BPF_ITER_FUNC(target, args...) \ @@ -2526,8 +2480,7 @@ const char *btf_find_decl_tag_value(const struct btf *btf, const struct btf_type struct bpf_prog *bpf_prog_by_id(u32 id); struct bpf_link *bpf_link_by_id(u32 id); -const struct bpf_func_proto *bpf_base_func_proto(enum bpf_func_id func_id, - const struct bpf_prog *prog); +const struct bpf_func_proto *bpf_base_func_proto(enum bpf_func_id func_id); void bpf_task_storage_free(struct task_struct *task); void bpf_cgrp_storage_free(struct cgroup *cgroup); bool bpf_prog_has_kfunc_call(const struct bpf_prog *prog); @@ -2646,24 +2599,6 @@ static inline int bpf_obj_get_user(const char __user *pathname, int flags) return -EOPNOTSUPP; } -static inline bool bpf_token_capable(const struct bpf_token *token, int cap) -{ - return capable(cap) || (cap != CAP_SYS_ADMIN && capable(CAP_SYS_ADMIN)); -} - -static inline void bpf_token_inc(struct bpf_token *token) -{ -} - -static inline void bpf_token_put(struct bpf_token *token) -{ -} - -static inline struct bpf_token *bpf_token_get_from_fd(u32 ufd) -{ - return ERR_PTR(-EOPNOTSUPP); -} - static inline void __dev_flush(void) { } @@ -2787,7 +2722,7 @@ static inline int btf_struct_access(struct bpf_verifier_log *log, } static inline const struct bpf_func_proto * -bpf_base_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) +bpf_base_func_proto(enum bpf_func_id func_id) { return NULL; } diff --git a/include/linux/filter.h b/include/linux/filter.h index 12d907f17d36..68fb6c8142fe 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -1139,7 +1139,7 @@ static inline bool bpf_jit_blinding_enabled(struct bpf_prog *prog) return false; if (!bpf_jit_harden) return false; - if (bpf_jit_harden == 1 && bpf_token_capable(prog->aux->token, CAP_BPF)) + if (bpf_jit_harden == 1 && bpf_capable()) return false; return true; diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h index 3fdd00b452ac..ff217a5ce552 100644 --- a/include/linux/lsm_hook_defs.h +++ b/include/linux/lsm_hook_defs.h @@ -398,17 +398,10 @@ LSM_HOOK(void, LSM_RET_VOID, audit_rule_free, void *lsmrule) LSM_HOOK(int, 0, bpf, int cmd, union bpf_attr *attr, unsigned int size) LSM_HOOK(int, 0, bpf_map, struct bpf_map *map, fmode_t fmode) LSM_HOOK(int, 0, bpf_prog, struct bpf_prog *prog) -LSM_HOOK(int, 0, bpf_map_create, struct bpf_map *map, union bpf_attr *attr, - struct bpf_token *token) -LSM_HOOK(void, LSM_RET_VOID, bpf_map_free, struct bpf_map *map) -LSM_HOOK(int, 0, bpf_prog_load, struct bpf_prog *prog, union bpf_attr *attr, - struct bpf_token *token) -LSM_HOOK(void, LSM_RET_VOID, bpf_prog_free, struct bpf_prog *prog) -LSM_HOOK(int, 0, bpf_token_create, struct bpf_token *token, union bpf_attr *attr, - struct path *path) -LSM_HOOK(void, LSM_RET_VOID, bpf_token_free, struct bpf_token *token) -LSM_HOOK(int, 0, bpf_token_cmd, const struct bpf_token *token, enum bpf_cmd cmd) -LSM_HOOK(int, 0, bpf_token_capable, const struct bpf_token *token, int cap) +LSM_HOOK(int, 0, bpf_map_alloc_security, struct bpf_map *map) +LSM_HOOK(void, LSM_RET_VOID, bpf_map_free_security, struct bpf_map *map) +LSM_HOOK(int, 0, bpf_prog_alloc_security, struct bpf_prog_aux *aux) +LSM_HOOK(void, LSM_RET_VOID, bpf_prog_free_security, struct bpf_prog_aux *aux) #endif /* CONFIG_BPF_SYSCALL */ LSM_HOOK(int, 0, locked_down, enum lockdown_reason what) diff --git a/include/linux/security.h b/include/linux/security.h index 00809d2d5c38..1d1df326c881 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -32,7 +32,6 @@ #include #include #include -#include struct linux_binprm; struct cred; @@ -2021,22 +2020,15 @@ static inline void securityfs_remove(struct dentry *dentry) union bpf_attr; struct bpf_map; struct bpf_prog; -struct bpf_token; +struct bpf_prog_aux; #ifdef CONFIG_SECURITY extern int security_bpf(int cmd, union bpf_attr *attr, unsigned int size); extern int security_bpf_map(struct bpf_map *map, fmode_t fmode); extern int security_bpf_prog(struct bpf_prog *prog); -extern int security_bpf_map_create(struct bpf_map *map, union bpf_attr *attr, - struct bpf_token *token); +extern int security_bpf_map_alloc(struct bpf_map *map); extern void security_bpf_map_free(struct bpf_map *map); -extern int security_bpf_prog_load(struct bpf_prog *prog, union bpf_attr *attr, - struct bpf_token *token); -extern void security_bpf_prog_free(struct bpf_prog *prog); -extern int security_bpf_token_create(struct bpf_token *token, union bpf_attr *attr, - struct path *path); -extern void security_bpf_token_free(struct bpf_token *token); -extern int security_bpf_token_cmd(const struct bpf_token *token, enum bpf_cmd cmd); -extern int security_bpf_token_capable(const struct bpf_token *token, int cap); +extern int security_bpf_prog_alloc(struct bpf_prog_aux *aux); +extern void security_bpf_prog_free(struct bpf_prog_aux *aux); #else static inline int security_bpf(int cmd, union bpf_attr *attr, unsigned int size) @@ -2054,8 +2046,7 @@ static inline int security_bpf_prog(struct bpf_prog *prog) return 0; } -static inline int security_bpf_map_create(struct bpf_map *map, union bpf_attr *attr, - struct bpf_token *token) +static inline int security_bpf_map_alloc(struct bpf_map *map) { return 0; } @@ -2063,33 +2054,13 @@ static inline int security_bpf_map_create(struct bpf_map *map, union bpf_attr *a static inline void security_bpf_map_free(struct bpf_map *map) { } -static inline int security_bpf_prog_load(struct bpf_prog *prog, union bpf_attr *attr, - struct bpf_token *token) +static inline int security_bpf_prog_alloc(struct bpf_prog_aux *aux) { return 0; } -static inline void security_bpf_prog_free(struct bpf_prog *prog) +static inline void security_bpf_prog_free(struct bpf_prog_aux *aux) { } - -static inline int security_bpf_token_create(struct bpf_token *token, union bpf_attr *attr, - struct path *path) -{ - return 0; -} - -static inline void security_bpf_token_free(struct bpf_token *token) -{ } - -static inline int security_bpf_token_cmd(const struct bpf_token *token, enum bpf_cmd cmd) -{ - return 0; -} - -static inline int security_bpf_token_capable(const struct bpf_token *token, int cap) -{ - return 0; -} #endif /* CONFIG_SECURITY */ #endif /* CONFIG_BPF_SYSCALL */ diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 42f4d3090efe..754e68ca8744 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -847,36 +847,6 @@ union bpf_iter_link_info { * Returns zero on success. On error, -1 is returned and *errno* * is set appropriately. * - * BPF_TOKEN_CREATE - * Description - * Create BPF token with embedded information about what - * BPF-related functionality it allows: - * - a set of allowed bpf() syscall commands; - * - a set of allowed BPF map types to be created with - * BPF_MAP_CREATE command, if BPF_MAP_CREATE itself is allowed; - * - a set of allowed BPF program types and BPF program attach - * types to be loaded with BPF_PROG_LOAD command, if - * BPF_PROG_LOAD itself is allowed. - * - * BPF token is created (derived) from an instance of BPF FS, - * assuming it has necessary delegation mount options specified. - * This BPF token can be passed as an extra parameter to various - * bpf() syscall commands to grant BPF subsystem functionality to - * unprivileged processes. - * - * When created, BPF token is "associated" with the owning - * user namespace of BPF FS instance (super block) that it was - * derived from, and subsequent BPF operations performed with - * BPF token would be performing capabilities checks (i.e., - * CAP_BPF, CAP_PERFMON, CAP_NET_ADMIN, CAP_SYS_ADMIN) within - * that user namespace. Without BPF token, such capabilities - * have to be granted in init user namespace, making bpf() - * syscall incompatible with user namespace, for the most part. - * - * Return - * A new file descriptor (a nonnegative integer), or -1 if an - * error occurred (in which case, *errno* is set appropriately). - * * NOTES * eBPF objects (maps and programs) can be shared between processes. * @@ -931,8 +901,6 @@ enum bpf_cmd { BPF_ITER_CREATE, BPF_LINK_DETACH, BPF_PROG_BIND_MAP, - BPF_TOKEN_CREATE, - __MAX_BPF_CMD, }; enum bpf_map_type { @@ -983,7 +951,6 @@ enum bpf_map_type { BPF_MAP_TYPE_BLOOM_FILTER, BPF_MAP_TYPE_USER_RINGBUF, BPF_MAP_TYPE_CGRP_STORAGE, - __MAX_BPF_MAP_TYPE }; /* Note that tracing related programs such as @@ -1028,7 +995,6 @@ enum bpf_prog_type { BPF_PROG_TYPE_SK_LOOKUP, BPF_PROG_TYPE_SYSCALL, /* a program that can execute syscalls */ BPF_PROG_TYPE_NETFILTER, - __MAX_BPF_PROG_TYPE }; enum bpf_attach_type { @@ -1437,7 +1403,6 @@ union bpf_attr { * to using 5 hash functions). */ __u64 map_extra; - __u32 map_token_fd; }; struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */ @@ -1507,7 +1472,6 @@ union bpf_attr { * truncated), or smaller (if log buffer wasn't filled completely). */ __u32 log_true_size; - __u32 prog_token_fd; }; struct { /* anonymous struct used by BPF_OBJ_* commands */ @@ -1620,7 +1584,6 @@ union bpf_attr { * truncated), or smaller (if log buffer wasn't filled completely). */ __u32 btf_log_true_size; - __u32 btf_token_fd; }; struct { @@ -1751,11 +1714,6 @@ union bpf_attr { __u32 flags; /* extra flags */ } prog_bind_map; - struct { /* struct used by BPF_TOKEN_CREATE command */ - __u32 flags; - __u32 bpffs_fd; - } token_create; - } __attribute__((aligned(8))); /* The description below is an attempt at providing documentation to eBPF diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile index 4ce95acfcaa7..f526b7573e97 100644 --- a/kernel/bpf/Makefile +++ b/kernel/bpf/Makefile @@ -6,7 +6,7 @@ cflags-nogcse-$(CONFIG_X86)$(CONFIG_CC_IS_GCC) := -fno-gcse endif CFLAGS_core.o += $(call cc-disable-warning, override-init) $(cflags-nogcse-yy) -obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o log.o token.o +obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o log.o obj-$(CONFIG_BPF_SYSCALL) += bpf_iter.o map_iter.o task_iter.o prog_iter.o link_iter.o obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o bpf_lru_list.o lpm_trie.o map_in_map.o bloom_filter.o obj-$(CONFIG_BPF_SYSCALL) += local_storage.o queue_stack_maps.o ringbuf.o diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c index 13358675ff2e..0bdbbbeab155 100644 --- a/kernel/bpf/arraymap.c +++ b/kernel/bpf/arraymap.c @@ -82,7 +82,7 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr) bool percpu = attr->map_type == BPF_MAP_TYPE_PERCPU_ARRAY; int numa_node = bpf_map_attr_numa_node(attr); u32 elem_size, index_mask, max_entries; - bool bypass_spec_v1 = bpf_bypass_spec_v1(NULL); + bool bypass_spec_v1 = bpf_bypass_spec_v1(); u64 array_size, mask64; struct bpf_array *array; diff --git a/kernel/bpf/bpf_lsm.c b/kernel/bpf/bpf_lsm.c index 63b4dc495125..e8e910395bf6 100644 --- a/kernel/bpf/bpf_lsm.c +++ b/kernel/bpf/bpf_lsm.c @@ -260,15 +260,9 @@ bpf_lsm_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) BTF_SET_START(sleepable_lsm_hooks) BTF_ID(func, bpf_lsm_bpf) BTF_ID(func, bpf_lsm_bpf_map) -BTF_ID(func, bpf_lsm_bpf_map_create) -BTF_ID(func, bpf_lsm_bpf_map_free) +BTF_ID(func, bpf_lsm_bpf_map_alloc_security) +BTF_ID(func, bpf_lsm_bpf_map_free_security) BTF_ID(func, bpf_lsm_bpf_prog) -BTF_ID(func, bpf_lsm_bpf_prog_load) -BTF_ID(func, bpf_lsm_bpf_prog_free) -BTF_ID(func, bpf_lsm_bpf_token_create) -BTF_ID(func, bpf_lsm_bpf_token_free) -BTF_ID(func, bpf_lsm_bpf_token_cmd) -BTF_ID(func, bpf_lsm_bpf_token_capable) BTF_ID(func, bpf_lsm_bprm_check_security) BTF_ID(func, bpf_lsm_bprm_committed_creds) BTF_ID(func, bpf_lsm_bprm_committing_creds) @@ -363,8 +357,9 @@ BTF_ID(func, bpf_lsm_userns_create) BTF_SET_END(sleepable_lsm_hooks) BTF_SET_START(untrusted_lsm_hooks) -BTF_ID(func, bpf_lsm_bpf_map_free) -BTF_ID(func, bpf_lsm_bpf_prog_free) +BTF_ID(func, bpf_lsm_bpf_map_free_security) +BTF_ID(func, bpf_lsm_bpf_prog_alloc_security) +BTF_ID(func, bpf_lsm_bpf_prog_free_security) BTF_ID(func, bpf_lsm_file_alloc_security) BTF_ID(func, bpf_lsm_file_free_security) #ifdef CONFIG_SECURITY_NETWORK diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c index 98e0e3835b28..491d20038cbe 100644 --- a/kernel/bpf/cgroup.c +++ b/kernel/bpf/cgroup.c @@ -1630,7 +1630,7 @@ cgroup_dev_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) case BPF_FUNC_perf_event_output: return &bpf_event_output_data_proto; default: - return bpf_base_func_proto(func_id, prog); + return bpf_base_func_proto(func_id); } } @@ -2191,7 +2191,7 @@ sysctl_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) case BPF_FUNC_perf_event_output: return &bpf_event_output_data_proto; default: - return bpf_base_func_proto(func_id, prog); + return bpf_base_func_proto(func_id); } } @@ -2348,7 +2348,7 @@ cg_sockopt_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) case BPF_FUNC_perf_event_output: return &bpf_event_output_data_proto; default: - return bpf_base_func_proto(func_id, prog); + return bpf_base_func_proto(func_id); } } diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 14ace23d517b..ea6843be2616 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -682,7 +682,7 @@ static bool bpf_prog_kallsyms_candidate(const struct bpf_prog *fp) void bpf_prog_kallsyms_add(struct bpf_prog *fp) { if (!bpf_prog_kallsyms_candidate(fp) || - !bpf_token_capable(fp->aux->token, CAP_BPF)) + !bpf_capable()) return; bpf_prog_ksym_set_addr(fp); @@ -2779,7 +2779,6 @@ void bpf_prog_free(struct bpf_prog *fp) if (aux->dst_prog) bpf_prog_put(aux->dst_prog); - bpf_token_put(aux->token); INIT_WORK(&aux->work, bpf_prog_free_deferred); schedule_work(&aux->work); } diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index 07fd4b5704f3..be72824f32b2 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -1679,7 +1679,7 @@ const struct bpf_func_proto bpf_probe_read_kernel_str_proto __weak; const struct bpf_func_proto bpf_task_pt_regs_proto __weak; const struct bpf_func_proto * -bpf_base_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) +bpf_base_func_proto(enum bpf_func_id func_id) { switch (func_id) { case BPF_FUNC_map_lookup_elem: @@ -1730,7 +1730,7 @@ bpf_base_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) break; } - if (!bpf_token_capable(prog->aux->token, CAP_BPF)) + if (!bpf_capable()) return NULL; switch (func_id) { @@ -1788,7 +1788,7 @@ bpf_base_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) break; } - if (!bpf_token_capable(prog->aux->token, CAP_PERFMON)) + if (!perfmon_capable()) return NULL; switch (func_id) { diff --git a/kernel/bpf/inode.c b/kernel/bpf/inode.c index 4383b3d13a55..1aafb2ff2e95 100644 --- a/kernel/bpf/inode.c +++ b/kernel/bpf/inode.c @@ -20,7 +20,6 @@ #include #include #include -#include #include "preload/bpf_preload.h" enum bpf_type { @@ -99,9 +98,9 @@ static const struct inode_operations bpf_prog_iops = { }; static const struct inode_operations bpf_map_iops = { }; static const struct inode_operations bpf_link_iops = { }; -struct inode *bpf_get_inode(struct super_block *sb, - const struct inode *dir, - umode_t mode) +static struct inode *bpf_get_inode(struct super_block *sb, + const struct inode *dir, + umode_t mode) { struct inode *inode; @@ -595,183 +594,15 @@ struct bpf_prog *bpf_prog_get_type_path(const char *name, enum bpf_prog_type typ } EXPORT_SYMBOL(bpf_prog_get_type_path); -struct bpffs_btf_enums { - const struct btf *btf; - const struct btf_type *cmd_t; - const struct btf_type *map_t; - const struct btf_type *prog_t; - const struct btf_type *attach_t; -}; - -static int find_bpffs_btf_enums(struct bpffs_btf_enums *info) -{ - const struct btf *btf; - const struct btf_type *t; - const char *name; - int i, n; - - memset(info, 0, sizeof(*info)); - - btf = bpf_get_btf_vmlinux(); - if (IS_ERR(btf)) - return PTR_ERR(btf); - if (!btf) - return -ENOENT; - - info->btf = btf; - - for (i = 1, n = btf_nr_types(btf); i < n; i++) { - t = btf_type_by_id(btf, i); - if (!btf_type_is_enum(t)) - continue; - - name = btf_name_by_offset(btf, t->name_off); - if (!name) - continue; - - if (strcmp(name, "bpf_cmd") == 0) - info->cmd_t = t; - else if (strcmp(name, "bpf_map_type") == 0) - info->map_t = t; - else if (strcmp(name, "bpf_prog_type") == 0) - info->prog_t = t; - else if (strcmp(name, "bpf_attach_type") == 0) - info->attach_t = t; - else - continue; - - if (info->cmd_t && info->map_t && info->prog_t && info->attach_t) - return 0; - } - - return -ESRCH; -} - -static bool find_btf_enum_const(const struct btf *btf, const struct btf_type *enum_t, - const char *prefix, const char *str, int *value) -{ - const struct btf_enum *e; - const char *name; - int i, n, pfx_len = strlen(prefix); - - *value = 0; - - if (!btf || !enum_t) - return false; - - for (i = 0, n = btf_vlen(enum_t); i < n; i++) { - e = &btf_enum(enum_t)[i]; - - name = btf_name_by_offset(btf, e->name_off); - if (!name || strncasecmp(name, prefix, pfx_len) != 0) - continue; - - /* match symbolic name case insensitive and ignoring prefix */ - if (strcasecmp(name + pfx_len, str) == 0) { - *value = e->val; - return true; - } - } - - return false; -} - -static void seq_print_delegate_opts(struct seq_file *m, - const char *opt_name, - const struct btf *btf, - const struct btf_type *enum_t, - const char *prefix, - u64 delegate_msk, u64 any_msk) -{ - const struct btf_enum *e; - bool first = true; - const char *name; - u64 msk; - int i, n, pfx_len = strlen(prefix); - - delegate_msk &= any_msk; /* clear unknown bits */ - - if (delegate_msk == 0) - return; - - seq_printf(m, ",%s", opt_name); - if (delegate_msk == any_msk) { - seq_printf(m, "=any"); - return; - } - - if (btf && enum_t) { - for (i = 0, n = btf_vlen(enum_t); i < n; i++) { - e = &btf_enum(enum_t)[i]; - name = btf_name_by_offset(btf, e->name_off); - if (!name || strncasecmp(name, prefix, pfx_len) != 0) - continue; - msk = 1ULL << e->val; - if (delegate_msk & msk) { - /* emit lower-case name without prefix */ - seq_printf(m, "%c", first ? '=' : ':'); - name += pfx_len; - while (*name) { - seq_printf(m, "%c", tolower(*name)); - name++; - } - - delegate_msk &= ~msk; - first = false; - } - } - } - if (delegate_msk) - seq_printf(m, "%c0x%llx", first ? '=' : ':', delegate_msk); -} - /* * Display the mount options in /proc/mounts. */ static int bpf_show_options(struct seq_file *m, struct dentry *root) { - struct bpf_mount_opts *opts = root->d_sb->s_fs_info; - struct inode *inode = d_inode(root); - umode_t mode = inode->i_mode & S_IALLUGO & ~S_ISVTX; - u64 mask; - - if (!uid_eq(inode->i_uid, GLOBAL_ROOT_UID)) - seq_printf(m, ",uid=%u", - from_kuid_munged(&init_user_ns, inode->i_uid)); - if (!gid_eq(inode->i_gid, GLOBAL_ROOT_GID)) - seq_printf(m, ",gid=%u", - from_kgid_munged(&init_user_ns, inode->i_gid)); + umode_t mode = d_inode(root)->i_mode & S_IALLUGO & ~S_ISVTX; + if (mode != S_IRWXUGO) seq_printf(m, ",mode=%o", mode); - - if (opts->delegate_cmds || opts->delegate_maps || - opts->delegate_progs || opts->delegate_attachs) { - struct bpffs_btf_enums info; - - /* ignore errors, fallback to hex */ - (void)find_bpffs_btf_enums(&info); - - mask = (1ULL << __MAX_BPF_CMD) - 1; - seq_print_delegate_opts(m, "delegate_cmds", - info.btf, info.cmd_t, "BPF_", - opts->delegate_cmds, mask); - - mask = (1ULL << __MAX_BPF_MAP_TYPE) - 1; - seq_print_delegate_opts(m, "delegate_maps", - info.btf, info.map_t, "BPF_MAP_TYPE_", - opts->delegate_maps, mask); - - mask = (1ULL << __MAX_BPF_PROG_TYPE) - 1; - seq_print_delegate_opts(m, "delegate_progs", - info.btf, info.prog_t, "BPF_PROG_TYPE_", - opts->delegate_progs, mask); - - mask = (1ULL << __MAX_BPF_ATTACH_TYPE) - 1; - seq_print_delegate_opts(m, "delegate_attachs", - info.btf, info.attach_t, "BPF_", - opts->delegate_attachs, mask); - } - return 0; } @@ -786,7 +617,7 @@ static void bpf_free_inode(struct inode *inode) free_inode_nonrcu(inode); } -const struct super_operations bpf_super_ops = { +static const struct super_operations bpf_super_ops = { .statfs = simple_statfs, .drop_inode = generic_delete_inode, .show_options = bpf_show_options, @@ -794,33 +625,23 @@ const struct super_operations bpf_super_ops = { }; enum { - OPT_UID, - OPT_GID, OPT_MODE, - OPT_DELEGATE_CMDS, - OPT_DELEGATE_MAPS, - OPT_DELEGATE_PROGS, - OPT_DELEGATE_ATTACHS, }; static const struct fs_parameter_spec bpf_fs_parameters[] = { - fsparam_u32 ("uid", OPT_UID), - fsparam_u32 ("gid", OPT_GID), fsparam_u32oct ("mode", OPT_MODE), - fsparam_string ("delegate_cmds", OPT_DELEGATE_CMDS), - fsparam_string ("delegate_maps", OPT_DELEGATE_MAPS), - fsparam_string ("delegate_progs", OPT_DELEGATE_PROGS), - fsparam_string ("delegate_attachs", OPT_DELEGATE_ATTACHS), {} }; +struct bpf_mount_opts { + umode_t mode; +}; + static int bpf_parse_param(struct fs_context *fc, struct fs_parameter *param) { - struct bpf_mount_opts *opts = fc->s_fs_info; + struct bpf_mount_opts *opts = fc->fs_private; struct fs_parse_result result; - kuid_t uid; - kgid_t gid; - int opt, err; + int opt; opt = fs_parse(fc, bpf_fs_parameters, param, &result); if (opt < 0) { @@ -841,104 +662,12 @@ static int bpf_parse_param(struct fs_context *fc, struct fs_parameter *param) } switch (opt) { - case OPT_UID: - uid = make_kuid(current_user_ns(), result.uint_32); - if (!uid_valid(uid)) - goto bad_value; - - /* - * The requested uid must be representable in the - * filesystem's idmapping. - */ - if (!kuid_has_mapping(fc->user_ns, uid)) - goto bad_value; - - opts->uid = uid; - break; - case OPT_GID: - gid = make_kgid(current_user_ns(), result.uint_32); - if (!gid_valid(gid)) - goto bad_value; - - /* - * The requested gid must be representable in the - * filesystem's idmapping. - */ - if (!kgid_has_mapping(fc->user_ns, gid)) - goto bad_value; - - opts->gid = gid; - break; case OPT_MODE: opts->mode = result.uint_32 & S_IALLUGO; break; - case OPT_DELEGATE_CMDS: - case OPT_DELEGATE_MAPS: - case OPT_DELEGATE_PROGS: - case OPT_DELEGATE_ATTACHS: { - struct bpffs_btf_enums info; - const struct btf_type *enum_t; - const char *enum_pfx; - u64 *delegate_msk, msk = 0; - char *p; - int val; - - /* ignore errors, fallback to hex */ - (void)find_bpffs_btf_enums(&info); - - switch (opt) { - case OPT_DELEGATE_CMDS: - delegate_msk = &opts->delegate_cmds; - enum_t = info.cmd_t; - enum_pfx = "BPF_"; - break; - case OPT_DELEGATE_MAPS: - delegate_msk = &opts->delegate_maps; - enum_t = info.map_t; - enum_pfx = "BPF_MAP_TYPE_"; - break; - case OPT_DELEGATE_PROGS: - delegate_msk = &opts->delegate_progs; - enum_t = info.prog_t; - enum_pfx = "BPF_PROG_TYPE_"; - break; - case OPT_DELEGATE_ATTACHS: - delegate_msk = &opts->delegate_attachs; - enum_t = info.attach_t; - enum_pfx = "BPF_"; - break; - default: - return -EINVAL; - } - - while ((p = strsep(¶m->string, ":"))) { - if (strcmp(p, "any") == 0) { - msk |= ~0ULL; - } else if (find_btf_enum_const(info.btf, enum_t, enum_pfx, p, &val)) { - msk |= 1ULL << val; - } else { - err = kstrtou64(p, 0, &msk); - if (err) - return err; - } - } - - /* Setting delegation mount options requires privileges */ - if (msk && !capable(CAP_SYS_ADMIN)) - return -EPERM; - - *delegate_msk |= msk; - break; - } - default: - /* ignore unknown mount options */ - break; } return 0; - -bad_value: - return invalfc(fc, "Bad value for '%s'", param->key); } struct bpf_preload_ops *bpf_preload_ops; @@ -1010,14 +739,10 @@ out: static int bpf_fill_super(struct super_block *sb, struct fs_context *fc) { static const struct tree_descr bpf_rfiles[] = { { "" } }; - struct bpf_mount_opts *opts = sb->s_fs_info; + struct bpf_mount_opts *opts = fc->fs_private; struct inode *inode; int ret; - /* Mounting an instance of BPF FS requires privileges */ - if (fc->user_ns != &init_user_ns && !capable(CAP_SYS_ADMIN)) - return -EPERM; - ret = simple_fill_super(sb, BPF_FS_MAGIC, bpf_rfiles); if (ret) return ret; @@ -1025,8 +750,6 @@ static int bpf_fill_super(struct super_block *sb, struct fs_context *fc) sb->s_op = &bpf_super_ops; inode = sb->s_root->d_inode; - inode->i_uid = opts->uid; - inode->i_gid = opts->gid; inode->i_op = &bpf_dir_iops; inode->i_mode &= ~S_IALLUGO; populate_bpffs(sb->s_root); @@ -1041,7 +764,7 @@ static int bpf_get_tree(struct fs_context *fc) static void bpf_free_fc(struct fs_context *fc) { - kfree(fc->s_fs_info); + kfree(fc->fs_private); } static const struct fs_context_operations bpf_context_ops = { @@ -1062,35 +785,18 @@ static int bpf_init_fs_context(struct fs_context *fc) return -ENOMEM; opts->mode = S_IRWXUGO; - opts->uid = current_fsuid(); - opts->gid = current_fsgid(); - - /* start out with no BPF token delegation enabled */ - opts->delegate_cmds = 0; - opts->delegate_maps = 0; - opts->delegate_progs = 0; - opts->delegate_attachs = 0; - fc->s_fs_info = opts; + fc->fs_private = opts; fc->ops = &bpf_context_ops; return 0; } -static void bpf_kill_super(struct super_block *sb) -{ - struct bpf_mount_opts *opts = sb->s_fs_info; - - kill_litter_super(sb); - kfree(opts); -} - static struct file_system_type bpf_fs_type = { .owner = THIS_MODULE, .name = "bpf", .init_fs_context = bpf_init_fs_context, .parameters = bpf_fs_parameters, - .kill_sb = bpf_kill_super, - .fs_flags = FS_USERNS_MOUNT, + .kill_sb = kill_litter_super, }; static int __init bpf_init(void) diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 8faa1a20edf8..1bf9805ee185 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -1011,8 +1011,8 @@ int map_check_no_btf(const struct bpf_map *map, return -ENOTSUPP; } -static int map_check_btf(struct bpf_map *map, struct bpf_token *token, - const struct btf *btf, u32 btf_key_id, u32 btf_value_id) +static int map_check_btf(struct bpf_map *map, const struct btf *btf, + u32 btf_key_id, u32 btf_value_id) { const struct btf_type *key_type, *value_type; u32 key_size, value_size; @@ -1040,7 +1040,7 @@ static int map_check_btf(struct bpf_map *map, struct bpf_token *token, if (!IS_ERR_OR_NULL(map->record)) { int i; - if (!bpf_token_capable(token, CAP_BPF)) { + if (!bpf_capable()) { ret = -EPERM; goto free_map_tab; } @@ -1123,17 +1123,11 @@ free_map_tab: return ret; } -static bool bpf_net_capable(void) -{ - return capable(CAP_NET_ADMIN) || capable(CAP_SYS_ADMIN); -} - -#define BPF_MAP_CREATE_LAST_FIELD map_token_fd +#define BPF_MAP_CREATE_LAST_FIELD map_extra /* called via syscall */ static int map_create(union bpf_attr *attr) { const struct bpf_map_ops *ops; - struct bpf_token *token = NULL; int numa_node = bpf_map_attr_numa_node(attr); u32 map_type = attr->map_type; struct bpf_map *map; @@ -1184,32 +1178,14 @@ static int map_create(union bpf_attr *attr) if (!ops->map_mem_usage) return -EINVAL; - if (attr->map_token_fd) { - token = bpf_token_get_from_fd(attr->map_token_fd); - if (IS_ERR(token)) - return PTR_ERR(token); - - /* if current token doesn't grant map creation permissions, - * then we can't use this token, so ignore it and rely on - * system-wide capabilities checks - */ - if (!bpf_token_allow_cmd(token, BPF_MAP_CREATE) || - !bpf_token_allow_map_type(token, attr->map_type)) { - bpf_token_put(token); - token = NULL; - } - } - - err = -EPERM; - /* Intent here is for unprivileged_bpf_disabled to block BPF map * creation for unprivileged users; other actions depend * on fd availability and access to bpffs, so are dependent on * object creation success. Even with unprivileged BPF disabled, * capability checks are still carried out. */ - if (sysctl_unprivileged_bpf_disabled && !bpf_token_capable(token, CAP_BPF)) - goto put_token; + if (sysctl_unprivileged_bpf_disabled && !bpf_capable()) + return -EPERM; /* check privileged map type permissions */ switch (map_type) { @@ -1242,27 +1218,25 @@ static int map_create(union bpf_attr *attr) case BPF_MAP_TYPE_LRU_PERCPU_HASH: case BPF_MAP_TYPE_STRUCT_OPS: case BPF_MAP_TYPE_CPUMAP: - if (!bpf_token_capable(token, CAP_BPF)) - goto put_token; + if (!bpf_capable()) + return -EPERM; break; case BPF_MAP_TYPE_SOCKMAP: case BPF_MAP_TYPE_SOCKHASH: case BPF_MAP_TYPE_DEVMAP: case BPF_MAP_TYPE_DEVMAP_HASH: case BPF_MAP_TYPE_XSKMAP: - if (!bpf_token_capable(token, CAP_NET_ADMIN)) - goto put_token; + if (!capable(CAP_NET_ADMIN)) + return -EPERM; break; default: WARN(1, "unsupported map type %d", map_type); - goto put_token; + return -EPERM; } map = ops->map_alloc(attr); - if (IS_ERR(map)) { - err = PTR_ERR(map); - goto put_token; - } + if (IS_ERR(map)) + return PTR_ERR(map); map->ops = ops; map->map_type = map_type; @@ -1299,7 +1273,7 @@ static int map_create(union bpf_attr *attr) map->btf = btf; if (attr->btf_value_type_id) { - err = map_check_btf(map, token, btf, attr->btf_key_type_id, + err = map_check_btf(map, btf, attr->btf_key_type_id, attr->btf_value_type_id); if (err) goto free_map; @@ -1311,16 +1285,15 @@ static int map_create(union bpf_attr *attr) attr->btf_vmlinux_value_type_id; } - err = security_bpf_map_create(map, attr, token); + err = security_bpf_map_alloc(map); if (err) - goto free_map_sec; + goto free_map; err = bpf_map_alloc_id(map); if (err) goto free_map_sec; bpf_map_save_memcg(map); - bpf_token_put(token); err = bpf_map_new_fd(map, f_flags); if (err < 0) { @@ -1341,8 +1314,6 @@ free_map_sec: free_map: btf_put(map->btf); map->ops->map_free(map); -put_token: - bpf_token_put(token); return err; } @@ -2173,7 +2144,7 @@ static void __bpf_prog_put_rcu(struct rcu_head *rcu) kvfree(aux->func_info); kfree(aux->func_info_aux); free_uid(aux->user); - security_bpf_prog_free(aux->prog); + security_bpf_prog_free(aux); bpf_prog_free(aux->prog); } @@ -2619,15 +2590,13 @@ static bool is_perfmon_prog_type(enum bpf_prog_type prog_type) } /* last field in 'union bpf_attr' used by this command */ -#define BPF_PROG_LOAD_LAST_FIELD prog_token_fd +#define BPF_PROG_LOAD_LAST_FIELD log_true_size static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size) { enum bpf_prog_type type = attr->prog_type; struct bpf_prog *prog, *dst_prog = NULL; struct btf *attach_btf = NULL; - struct bpf_token *token = NULL; - bool bpf_cap; int err; char license[128]; @@ -2644,31 +2613,10 @@ static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size) BPF_F_TEST_REG_INVARIANTS)) return -EINVAL; - bpf_prog_load_fixup_attach_type(attr); - - if (attr->prog_token_fd) { - token = bpf_token_get_from_fd(attr->prog_token_fd); - if (IS_ERR(token)) - return PTR_ERR(token); - /* if current token doesn't grant prog loading permissions, - * then we can't use this token, so ignore it and rely on - * system-wide capabilities checks - */ - if (!bpf_token_allow_cmd(token, BPF_PROG_LOAD) || - !bpf_token_allow_prog_type(token, attr->prog_type, - attr->expected_attach_type)) { - bpf_token_put(token); - token = NULL; - } - } - - bpf_cap = bpf_token_capable(token, CAP_BPF); - err = -EPERM; - if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && (attr->prog_flags & BPF_F_ANY_ALIGNMENT) && - !bpf_cap) - goto put_token; + !bpf_capable()) + return -EPERM; /* Intent here is for unprivileged_bpf_disabled to block BPF program * creation for unprivileged users; other actions depend @@ -2677,23 +2625,21 @@ static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size) * capability checks are still carried out for these * and other operations. */ - if (sysctl_unprivileged_bpf_disabled && !bpf_cap) - goto put_token; + if (sysctl_unprivileged_bpf_disabled && !bpf_capable()) + return -EPERM; if (attr->insn_cnt == 0 || - attr->insn_cnt > (bpf_cap ? BPF_COMPLEXITY_LIMIT_INSNS : BPF_MAXINSNS)) { - err = -E2BIG; - goto put_token; - } + attr->insn_cnt > (bpf_capable() ? BPF_COMPLEXITY_LIMIT_INSNS : BPF_MAXINSNS)) + return -E2BIG; if (type != BPF_PROG_TYPE_SOCKET_FILTER && type != BPF_PROG_TYPE_CGROUP_SKB && - !bpf_cap) - goto put_token; + !bpf_capable()) + return -EPERM; - if (is_net_admin_prog_type(type) && !bpf_token_capable(token, CAP_NET_ADMIN)) - goto put_token; - if (is_perfmon_prog_type(type) && !bpf_token_capable(token, CAP_PERFMON)) - goto put_token; + if (is_net_admin_prog_type(type) && !capable(CAP_NET_ADMIN) && !capable(CAP_SYS_ADMIN)) + return -EPERM; + if (is_perfmon_prog_type(type) && !perfmon_capable()) + return -EPERM; /* attach_prog_fd/attach_btf_obj_fd can specify fd of either bpf_prog * or btf, we need to check which one it is @@ -2703,33 +2649,27 @@ static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size) if (IS_ERR(dst_prog)) { dst_prog = NULL; attach_btf = btf_get_by_fd(attr->attach_btf_obj_fd); - if (IS_ERR(attach_btf)) { - err = -EINVAL; - goto put_token; - } + if (IS_ERR(attach_btf)) + return -EINVAL; if (!btf_is_kernel(attach_btf)) { /* attaching through specifying bpf_prog's BTF * objects directly might be supported eventually */ btf_put(attach_btf); - err = -ENOTSUPP; - goto put_token; + return -ENOTSUPP; } } } else if (attr->attach_btf_id) { /* fall back to vmlinux BTF, if BTF type ID is specified */ attach_btf = bpf_get_btf_vmlinux(); - if (IS_ERR(attach_btf)) { - err = PTR_ERR(attach_btf); - goto put_token; - } - if (!attach_btf) { - err = -EINVAL; - goto put_token; - } + if (IS_ERR(attach_btf)) + return PTR_ERR(attach_btf); + if (!attach_btf) + return -EINVAL; btf_get(attach_btf); } + bpf_prog_load_fixup_attach_type(attr); if (bpf_prog_load_check_attach(type, attr->expected_attach_type, attach_btf, attr->attach_btf_id, dst_prog)) { @@ -2737,8 +2677,7 @@ static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size) bpf_prog_put(dst_prog); if (attach_btf) btf_put(attach_btf); - err = -EINVAL; - goto put_token; + return -EINVAL; } /* plain bpf_prog allocation */ @@ -2748,8 +2687,7 @@ static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size) bpf_prog_put(dst_prog); if (attach_btf) btf_put(attach_btf); - err = -EINVAL; - goto put_token; + return -ENOMEM; } prog->expected_attach_type = attr->expected_attach_type; @@ -2760,9 +2698,9 @@ static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size) prog->aux->sleepable = attr->prog_flags & BPF_F_SLEEPABLE; prog->aux->xdp_has_frags = attr->prog_flags & BPF_F_XDP_HAS_FRAGS; - /* move token into prog->aux, reuse taken refcnt */ - prog->aux->token = token; - token = NULL; + err = security_bpf_prog_alloc(prog->aux); + if (err) + goto free_prog; prog->aux->user = get_current_user(); prog->len = attr->insn_cnt; @@ -2771,12 +2709,12 @@ static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size) if (copy_from_bpfptr(prog->insns, make_bpfptr(attr->insns, uattr.is_kernel), bpf_prog_insn_size(prog)) != 0) - goto free_prog; + goto free_prog_sec; /* copy eBPF program license from user space */ if (strncpy_from_bpfptr(license, make_bpfptr(attr->license, uattr.is_kernel), sizeof(license) - 1) < 0) - goto free_prog; + goto free_prog_sec; license[sizeof(license) - 1] = 0; /* eBPF programs must be GPL compatible to use GPL-ed functions */ @@ -2790,29 +2728,25 @@ static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size) if (bpf_prog_is_dev_bound(prog->aux)) { err = bpf_prog_dev_bound_init(prog, attr); if (err) - goto free_prog; + goto free_prog_sec; } if (type == BPF_PROG_TYPE_EXT && dst_prog && bpf_prog_is_dev_bound(dst_prog->aux)) { err = bpf_prog_dev_bound_inherit(prog, dst_prog); if (err) - goto free_prog; + goto free_prog_sec; } /* find program type: socket_filter vs tracing_filter */ err = find_prog_type(type, prog); if (err < 0) - goto free_prog; + goto free_prog_sec; prog->aux->load_time = ktime_get_boottime_ns(); err = bpf_obj_name_cpy(prog->aux->name, attr->prog_name, sizeof(attr->prog_name)); if (err < 0) - goto free_prog; - - err = security_bpf_prog_load(prog, attr, token); - if (err) goto free_prog_sec; /* run eBPF verifier */ @@ -2858,16 +2792,13 @@ free_used_maps: */ __bpf_prog_put_noref(prog, prog->aux->real_func_cnt); return err; - free_prog_sec: - security_bpf_prog_free(prog); -free_prog: free_uid(prog->aux->user); + security_bpf_prog_free(prog->aux); +free_prog: if (prog->aux->attach_btf) btf_put(prog->aux->attach_btf); bpf_prog_free(prog); -put_token: - bpf_token_put(token); return err; } @@ -3857,7 +3788,7 @@ static int bpf_prog_attach_check_attach_type(const struct bpf_prog *prog, case BPF_PROG_TYPE_SK_LOOKUP: return attach_type == prog->expected_attach_type ? 0 : -EINVAL; case BPF_PROG_TYPE_CGROUP_SKB: - if (!bpf_token_capable(prog->aux->token, CAP_NET_ADMIN)) + if (!capable(CAP_NET_ADMIN)) /* cg-skb progs can be loaded by unpriv user. * check permissions at attach time. */ @@ -4060,7 +3991,7 @@ static int bpf_prog_detach(const union bpf_attr *attr) static int bpf_prog_query(const union bpf_attr *attr, union bpf_attr __user *uattr) { - if (!bpf_net_capable()) + if (!capable(CAP_NET_ADMIN)) return -EPERM; if (CHECK_ATTR(BPF_PROG_QUERY)) return -EINVAL; @@ -4828,31 +4759,15 @@ static int bpf_obj_get_info_by_fd(const union bpf_attr *attr, return err; } -#define BPF_BTF_LOAD_LAST_FIELD btf_token_fd +#define BPF_BTF_LOAD_LAST_FIELD btf_log_true_size static int bpf_btf_load(const union bpf_attr *attr, bpfptr_t uattr, __u32 uattr_size) { - struct bpf_token *token = NULL; - if (CHECK_ATTR(BPF_BTF_LOAD)) return -EINVAL; - if (attr->btf_token_fd) { - token = bpf_token_get_from_fd(attr->btf_token_fd); - if (IS_ERR(token)) - return PTR_ERR(token); - if (!bpf_token_allow_cmd(token, BPF_BTF_LOAD)) { - bpf_token_put(token); - token = NULL; - } - } - - if (!bpf_token_capable(token, CAP_BPF)) { - bpf_token_put(token); + if (!bpf_capable()) return -EPERM; - } - - bpf_token_put(token); return btf_new_fd(attr, uattr, uattr_size); } @@ -5470,20 +5385,6 @@ out_prog_put: return ret; } -#define BPF_TOKEN_CREATE_LAST_FIELD token_create.bpffs_fd - -static int token_create(union bpf_attr *attr) -{ - if (CHECK_ATTR(BPF_TOKEN_CREATE)) - return -EINVAL; - - /* no flags are supported yet */ - if (attr->token_create.flags) - return -EINVAL; - - return bpf_token_create(attr); -} - static int __sys_bpf(int cmd, bpfptr_t uattr, unsigned int size) { union bpf_attr attr; @@ -5617,9 +5518,6 @@ static int __sys_bpf(int cmd, bpfptr_t uattr, unsigned int size) case BPF_PROG_BIND_MAP: err = bpf_prog_bind_map(&attr); break; - case BPF_TOKEN_CREATE: - err = token_create(&attr); - break; default: err = -EINVAL; break; @@ -5726,7 +5624,7 @@ static const struct bpf_func_proto bpf_sys_bpf_proto = { const struct bpf_func_proto * __weak tracing_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) { - return bpf_base_func_proto(func_id, prog); + return bpf_base_func_proto(func_id); } BPF_CALL_1(bpf_sys_close, u32, fd) @@ -5776,8 +5674,7 @@ syscall_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) { switch (func_id) { case BPF_FUNC_sys_bpf: - return !bpf_token_capable(prog->aux->token, CAP_PERFMON) - ? NULL : &bpf_sys_bpf_proto; + return !perfmon_capable() ? NULL : &bpf_sys_bpf_proto; case BPF_FUNC_btf_find_by_name_kind: return &bpf_btf_find_by_name_kind_proto; case BPF_FUNC_sys_close: diff --git a/kernel/bpf/token.c b/kernel/bpf/token.c deleted file mode 100644 index a86fccd57e2d..000000000000 --- a/kernel/bpf/token.c +++ /dev/null @@ -1,271 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -bool bpf_token_capable(const struct bpf_token *token, int cap) -{ - /* BPF token allows ns_capable() level of capabilities, but only if - * token's userns is *exactly* the same as current user's userns - */ - if (token && current_user_ns() == token->userns) { - if (ns_capable(token->userns, cap) || - (cap != CAP_SYS_ADMIN && ns_capable(token->userns, CAP_SYS_ADMIN))) - return security_bpf_token_capable(token, cap) == 0; - } - /* otherwise fallback to capable() checks */ - return capable(cap) || (cap != CAP_SYS_ADMIN && capable(CAP_SYS_ADMIN)); -} - -void bpf_token_inc(struct bpf_token *token) -{ - atomic64_inc(&token->refcnt); -} - -static void bpf_token_free(struct bpf_token *token) -{ - security_bpf_token_free(token); - put_user_ns(token->userns); - kvfree(token); -} - -static void bpf_token_put_deferred(struct work_struct *work) -{ - struct bpf_token *token = container_of(work, struct bpf_token, work); - - bpf_token_free(token); -} - -void bpf_token_put(struct bpf_token *token) -{ - if (!token) - return; - - if (!atomic64_dec_and_test(&token->refcnt)) - return; - - INIT_WORK(&token->work, bpf_token_put_deferred); - schedule_work(&token->work); -} - -static int bpf_token_release(struct inode *inode, struct file *filp) -{ - struct bpf_token *token = filp->private_data; - - bpf_token_put(token); - return 0; -} - -static void bpf_token_show_fdinfo(struct seq_file *m, struct file *filp) -{ - struct bpf_token *token = filp->private_data; - u64 mask; - - BUILD_BUG_ON(__MAX_BPF_CMD >= 64); - mask = (1ULL << __MAX_BPF_CMD) - 1; - if ((token->allowed_cmds & mask) == mask) - seq_printf(m, "allowed_cmds:\tany\n"); - else - seq_printf(m, "allowed_cmds:\t0x%llx\n", token->allowed_cmds); - - BUILD_BUG_ON(__MAX_BPF_MAP_TYPE >= 64); - mask = (1ULL << __MAX_BPF_MAP_TYPE) - 1; - if ((token->allowed_maps & mask) == mask) - seq_printf(m, "allowed_maps:\tany\n"); - else - seq_printf(m, "allowed_maps:\t0x%llx\n", token->allowed_maps); - - BUILD_BUG_ON(__MAX_BPF_PROG_TYPE >= 64); - mask = (1ULL << __MAX_BPF_PROG_TYPE) - 1; - if ((token->allowed_progs & mask) == mask) - seq_printf(m, "allowed_progs:\tany\n"); - else - seq_printf(m, "allowed_progs:\t0x%llx\n", token->allowed_progs); - - BUILD_BUG_ON(__MAX_BPF_ATTACH_TYPE >= 64); - mask = (1ULL << __MAX_BPF_ATTACH_TYPE) - 1; - if ((token->allowed_attachs & mask) == mask) - seq_printf(m, "allowed_attachs:\tany\n"); - else - seq_printf(m, "allowed_attachs:\t0x%llx\n", token->allowed_attachs); -} - -#define BPF_TOKEN_INODE_NAME "bpf-token" - -static const struct inode_operations bpf_token_iops = { }; - -static const struct file_operations bpf_token_fops = { - .release = bpf_token_release, - .show_fdinfo = bpf_token_show_fdinfo, -}; - -int bpf_token_create(union bpf_attr *attr) -{ - struct bpf_mount_opts *mnt_opts; - struct bpf_token *token = NULL; - struct user_namespace *userns; - struct inode *inode; - struct file *file; - struct path path; - struct fd f; - umode_t mode; - int err, fd; - - f = fdget(attr->token_create.bpffs_fd); - if (!f.file) - return -EBADF; - - path = f.file->f_path; - path_get(&path); - fdput(f); - - if (path.dentry != path.mnt->mnt_sb->s_root) { - err = -EINVAL; - goto out_path; - } - if (path.mnt->mnt_sb->s_op != &bpf_super_ops) { - err = -EINVAL; - goto out_path; - } - err = path_permission(&path, MAY_ACCESS); - if (err) - goto out_path; - - userns = path.dentry->d_sb->s_user_ns; - /* - * Enforce that creators of BPF tokens are in the same user - * namespace as the BPF FS instance. This makes reasoning about - * permissions a lot easier and we can always relax this later. - */ - if (current_user_ns() != userns) { - err = -EPERM; - goto out_path; - } - if (!ns_capable(userns, CAP_BPF)) { - err = -EPERM; - goto out_path; - } - - mnt_opts = path.dentry->d_sb->s_fs_info; - if (mnt_opts->delegate_cmds == 0 && - mnt_opts->delegate_maps == 0 && - mnt_opts->delegate_progs == 0 && - mnt_opts->delegate_attachs == 0) { - err = -ENOENT; /* no BPF token delegation is set up */ - goto out_path; - } - - mode = S_IFREG | ((S_IRUSR | S_IWUSR) & ~current_umask()); - inode = bpf_get_inode(path.mnt->mnt_sb, NULL, mode); - if (IS_ERR(inode)) { - err = PTR_ERR(inode); - goto out_path; - } - - inode->i_op = &bpf_token_iops; - inode->i_fop = &bpf_token_fops; - clear_nlink(inode); /* make sure it is unlinked */ - - file = alloc_file_pseudo(inode, path.mnt, BPF_TOKEN_INODE_NAME, O_RDWR, &bpf_token_fops); - if (IS_ERR(file)) { - iput(inode); - err = PTR_ERR(file); - goto out_path; - } - - token = kvzalloc(sizeof(*token), GFP_USER); - if (!token) { - err = -ENOMEM; - goto out_file; - } - - atomic64_set(&token->refcnt, 1); - - /* remember bpffs owning userns for future ns_capable() checks */ - token->userns = get_user_ns(userns); - - token->allowed_cmds = mnt_opts->delegate_cmds; - token->allowed_maps = mnt_opts->delegate_maps; - token->allowed_progs = mnt_opts->delegate_progs; - token->allowed_attachs = mnt_opts->delegate_attachs; - - err = security_bpf_token_create(token, attr, &path); - if (err) - goto out_token; - - fd = get_unused_fd_flags(O_CLOEXEC); - if (fd < 0) { - err = fd; - goto out_token; - } - - file->private_data = token; - fd_install(fd, file); - - path_put(&path); - return fd; - -out_token: - bpf_token_free(token); -out_file: - fput(file); -out_path: - path_put(&path); - return err; -} - -struct bpf_token *bpf_token_get_from_fd(u32 ufd) -{ - struct fd f = fdget(ufd); - struct bpf_token *token; - - if (!f.file) - return ERR_PTR(-EBADF); - if (f.file->f_op != &bpf_token_fops) { - fdput(f); - return ERR_PTR(-EINVAL); - } - - token = f.file->private_data; - bpf_token_inc(token); - fdput(f); - - return token; -} - -bool bpf_token_allow_cmd(const struct bpf_token *token, enum bpf_cmd cmd) -{ - /* BPF token can be used only within exactly the same userns in which - * it was created - */ - if (!token || current_user_ns() != token->userns) - return false; - if (!(token->allowed_cmds & (1ULL << cmd))) - return false; - return security_bpf_token_cmd(token, cmd) == 0; -} - -bool bpf_token_allow_map_type(const struct bpf_token *token, enum bpf_map_type type) -{ - if (!token || type >= __MAX_BPF_MAP_TYPE) - return false; - - return token->allowed_maps & (1ULL << type); -} - -bool bpf_token_allow_prog_type(const struct bpf_token *token, - enum bpf_prog_type prog_type, - enum bpf_attach_type attach_type) -{ - if (!token || prog_type >= __MAX_BPF_PROG_TYPE || attach_type >= __MAX_BPF_ATTACH_TYPE) - return false; - - return (token->allowed_progs & (1ULL << prog_type)) && - (token->allowed_attachs & (1ULL << attach_type)); -} diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 9456ee0ad129..4ceec8c2a484 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -20594,12 +20594,7 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr, __u3 env->prog = *prog; env->ops = bpf_verifier_ops[env->prog->type]; env->fd_array = make_bpfptr(attr->fd_array, uattr.is_kernel); - - env->allow_ptr_leaks = bpf_allow_ptr_leaks(env->prog->aux->token); - env->allow_uninit_stack = bpf_allow_uninit_stack(env->prog->aux->token); - env->bypass_spec_v1 = bpf_bypass_spec_v1(env->prog->aux->token); - env->bypass_spec_v4 = bpf_bypass_spec_v4(env->prog->aux->token); - env->bpf_capable = is_priv = bpf_token_capable(env->prog->aux->token, CAP_BPF); + is_priv = bpf_capable(); bpf_get_btf_vmlinux(); @@ -20631,6 +20626,12 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr, __u3 if (attr->prog_flags & BPF_F_ANY_ALIGNMENT) env->strict_alignment = false; + env->allow_ptr_leaks = bpf_allow_ptr_leaks(); + env->allow_uninit_stack = bpf_allow_uninit_stack(); + env->bypass_spec_v1 = bpf_bypass_spec_v1(); + env->bypass_spec_v4 = bpf_bypass_spec_v4(); + env->bpf_capable = bpf_capable(); + if (is_priv) env->test_state_freq = attr->prog_flags & BPF_F_TEST_STATE_FREQ; env->test_reg_invariants = attr->prog_flags & BPF_F_TEST_REG_INVARIANTS; diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 492d60e9c480..7ac6c52b25eb 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -1629,7 +1629,7 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) case BPF_FUNC_trace_vprintk: return bpf_get_trace_vprintk_proto(); default: - return bpf_base_func_proto(func_id, prog); + return bpf_base_func_proto(func_id); } } diff --git a/net/core/filter.c b/net/core/filter.c index 3cc52b82bab8..24061f29c9dd 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -87,7 +87,7 @@ #include "dev.h" static const struct bpf_func_proto * -bpf_sk_base_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog); +bpf_sk_base_func_proto(enum bpf_func_id func_id); int copy_bpf_fprog_from_user(struct sock_fprog *dst, sockptr_t src, int len) { @@ -7862,7 +7862,7 @@ sock_filter_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) case BPF_FUNC_ktime_get_coarse_ns: return &bpf_ktime_get_coarse_ns_proto; default: - return bpf_base_func_proto(func_id, prog); + return bpf_base_func_proto(func_id); } } @@ -7955,7 +7955,7 @@ sock_addr_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return NULL; } default: - return bpf_sk_base_func_proto(func_id, prog); + return bpf_sk_base_func_proto(func_id); } } @@ -7974,7 +7974,7 @@ sk_filter_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) case BPF_FUNC_perf_event_output: return &bpf_skb_event_output_proto; default: - return bpf_sk_base_func_proto(func_id, prog); + return bpf_sk_base_func_proto(func_id); } } @@ -8161,7 +8161,7 @@ tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) #endif #endif default: - return bpf_sk_base_func_proto(func_id, prog); + return bpf_sk_base_func_proto(func_id); } } @@ -8220,7 +8220,7 @@ xdp_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) #endif #endif default: - return bpf_sk_base_func_proto(func_id, prog); + return bpf_sk_base_func_proto(func_id); } #if IS_MODULE(CONFIG_NF_CONNTRACK) && IS_ENABLED(CONFIG_DEBUG_INFO_BTF_MODULES) @@ -8281,7 +8281,7 @@ sock_ops_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_tcp_sock_proto; #endif /* CONFIG_INET */ default: - return bpf_sk_base_func_proto(func_id, prog); + return bpf_sk_base_func_proto(func_id); } } @@ -8323,7 +8323,7 @@ sk_msg_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_get_cgroup_classid_curr_proto; #endif default: - return bpf_sk_base_func_proto(func_id, prog); + return bpf_sk_base_func_proto(func_id); } } @@ -8367,7 +8367,7 @@ sk_skb_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_skc_lookup_tcp_proto; #endif default: - return bpf_sk_base_func_proto(func_id, prog); + return bpf_sk_base_func_proto(func_id); } } @@ -8378,7 +8378,7 @@ flow_dissector_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) case BPF_FUNC_skb_load_bytes: return &bpf_flow_dissector_load_bytes_proto; default: - return bpf_sk_base_func_proto(func_id, prog); + return bpf_sk_base_func_proto(func_id); } } @@ -8405,7 +8405,7 @@ lwt_out_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) case BPF_FUNC_skb_under_cgroup: return &bpf_skb_under_cgroup_proto; default: - return bpf_sk_base_func_proto(func_id, prog); + return bpf_sk_base_func_proto(func_id); } } @@ -8580,7 +8580,7 @@ static bool cg_skb_is_valid_access(int off, int size, return false; case bpf_ctx_range(struct __sk_buff, data): case bpf_ctx_range(struct __sk_buff, data_end): - if (!bpf_token_capable(prog->aux->token, CAP_BPF)) + if (!bpf_capable()) return false; break; } @@ -8592,7 +8592,7 @@ static bool cg_skb_is_valid_access(int off, int size, case bpf_ctx_range_till(struct __sk_buff, cb[0], cb[4]): break; case bpf_ctx_range(struct __sk_buff, tstamp): - if (!bpf_token_capable(prog->aux->token, CAP_BPF)) + if (!bpf_capable()) return false; break; default: @@ -11236,7 +11236,7 @@ sk_reuseport_func_proto(enum bpf_func_id func_id, case BPF_FUNC_ktime_get_coarse_ns: return &bpf_ktime_get_coarse_ns_proto; default: - return bpf_base_func_proto(func_id, prog); + return bpf_base_func_proto(func_id); } } @@ -11418,7 +11418,7 @@ sk_lookup_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) case BPF_FUNC_sk_release: return &bpf_sk_release_proto; default: - return bpf_sk_base_func_proto(func_id, prog); + return bpf_sk_base_func_proto(func_id); } } @@ -11752,7 +11752,7 @@ const struct bpf_func_proto bpf_sock_from_file_proto = { }; static const struct bpf_func_proto * -bpf_sk_base_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) +bpf_sk_base_func_proto(enum bpf_func_id func_id) { const struct bpf_func_proto *func; @@ -11781,10 +11781,10 @@ bpf_sk_base_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) case BPF_FUNC_ktime_get_coarse_ns: return &bpf_ktime_get_coarse_ns_proto; default: - return bpf_base_func_proto(func_id, prog); + return bpf_base_func_proto(func_id); } - if (!bpf_token_capable(prog->aux->token, CAP_PERFMON)) + if (!perfmon_capable()) return NULL; return func; diff --git a/net/ipv4/bpf_tcp_ca.c b/net/ipv4/bpf_tcp_ca.c index 634cfafa583d..ae8b15e6896f 100644 --- a/net/ipv4/bpf_tcp_ca.c +++ b/net/ipv4/bpf_tcp_ca.c @@ -191,7 +191,7 @@ bpf_tcp_ca_get_func_proto(enum bpf_func_id func_id, case BPF_FUNC_ktime_get_coarse_ns: return &bpf_ktime_get_coarse_ns_proto; default: - return bpf_base_func_proto(func_id, prog); + return bpf_base_func_proto(func_id); } } diff --git a/net/netfilter/nf_bpf_link.c b/net/netfilter/nf_bpf_link.c index 5257d5e7eb09..0e4beae421f8 100644 --- a/net/netfilter/nf_bpf_link.c +++ b/net/netfilter/nf_bpf_link.c @@ -314,7 +314,7 @@ static bool nf_is_valid_access(int off, int size, enum bpf_access_type type, static const struct bpf_func_proto * bpf_nf_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) { - return bpf_base_func_proto(func_id, prog); + return bpf_base_func_proto(func_id); } const struct bpf_verifier_ops netfilter_verifier_ops = { diff --git a/security/security.c b/security/security.c index 088a79c35c26..dcb3e7014f9b 100644 --- a/security/security.c +++ b/security/security.c @@ -5167,87 +5167,29 @@ int security_bpf_prog(struct bpf_prog *prog) } /** - * security_bpf_map_create() - Check if BPF map creation is allowed - * @map: BPF map object - * @attr: BPF syscall attributes used to create BPF map - * @token: BPF token used to grant user access - * - * Do a check when the kernel creates a new BPF map. This is also the - * point where LSM blob is allocated for LSMs that need them. - * - * Return: Returns 0 on success, error on failure. - */ -int security_bpf_map_create(struct bpf_map *map, union bpf_attr *attr, - struct bpf_token *token) -{ - return call_int_hook(bpf_map_create, 0, map, attr, token); -} - -/** - * security_bpf_prog_load() - Check if loading of BPF program is allowed - * @prog: BPF program object - * @attr: BPF syscall attributes used to create BPF program - * @token: BPF token used to grant user access to BPF subsystem - * - * Perform an access control check when the kernel loads a BPF program and - * allocates associated BPF program object. This hook is also responsible for - * allocating any required LSM state for the BPF program. - * - * Return: Returns 0 on success, error on failure. - */ -int security_bpf_prog_load(struct bpf_prog *prog, union bpf_attr *attr, - struct bpf_token *token) -{ - return call_int_hook(bpf_prog_load, 0, prog, attr, token); -} - -/** - * security_bpf_token_create() - Check if creating of BPF token is allowed - * @token: BPF token object - * @attr: BPF syscall attributes used to create BPF token - * @path: path pointing to BPF FS mount point from which BPF token is created - * - * Do a check when the kernel instantiates a new BPF token object from BPF FS - * instance. This is also the point where LSM blob can be allocated for LSMs. - * - * Return: Returns 0 on success, error on failure. - */ -int security_bpf_token_create(struct bpf_token *token, union bpf_attr *attr, - struct path *path) -{ - return call_int_hook(bpf_token_create, 0, token, attr, path); -} - -/** - * security_bpf_token_cmd() - Check if BPF token is allowed to delegate - * requested BPF syscall command - * @token: BPF token object - * @cmd: BPF syscall command requested to be delegated by BPF token + * security_bpf_map_alloc() - Allocate a bpf map LSM blob + * @map: bpf map * - * Do a check when the kernel decides whether provided BPF token should allow - * delegation of requested BPF syscall command. + * Initialize the security field inside bpf map. * * Return: Returns 0 on success, error on failure. */ -int security_bpf_token_cmd(const struct bpf_token *token, enum bpf_cmd cmd) +int security_bpf_map_alloc(struct bpf_map *map) { - return call_int_hook(bpf_token_cmd, 0, token, cmd); + return call_int_hook(bpf_map_alloc_security, 0, map); } /** - * security_bpf_token_capable() - Check if BPF token is allowed to delegate - * requested BPF-related capability - * @token: BPF token object - * @cap: capabilities requested to be delegated by BPF token + * security_bpf_prog_alloc() - Allocate a bpf program LSM blob + * @aux: bpf program aux info struct * - * Do a check when the kernel decides whether provided BPF token should allow - * delegation of requested BPF-related capabilities. + * Initialize the security field inside bpf program. * * Return: Returns 0 on success, error on failure. */ -int security_bpf_token_capable(const struct bpf_token *token, int cap) +int security_bpf_prog_alloc(struct bpf_prog_aux *aux) { - return call_int_hook(bpf_token_capable, 0, token, cap); + return call_int_hook(bpf_prog_alloc_security, 0, aux); } /** @@ -5258,29 +5200,18 @@ int security_bpf_token_capable(const struct bpf_token *token, int cap) */ void security_bpf_map_free(struct bpf_map *map) { - call_void_hook(bpf_map_free, map); -} - -/** - * security_bpf_prog_free() - Free a BPF program's LSM blob - * @prog: BPF program struct - * - * Clean up the security information stored inside BPF program. - */ -void security_bpf_prog_free(struct bpf_prog *prog) -{ - call_void_hook(bpf_prog_free, prog); + call_void_hook(bpf_map_free_security, map); } /** - * security_bpf_token_free() - Free a BPF token's LSM blob - * @token: BPF token struct + * security_bpf_prog_free() - Free a bpf program's LSM blob + * @aux: bpf program aux info struct * - * Clean up the security information stored inside BPF token. + * Clean up the security information stored inside bpf prog. */ -void security_bpf_token_free(struct bpf_token *token) +void security_bpf_prog_free(struct bpf_prog_aux *aux) { - call_void_hook(bpf_token_free, token); + call_void_hook(bpf_prog_free_security, aux); } #endif /* CONFIG_BPF_SYSCALL */ diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 1501e95366a1..feda711c6b7b 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -6783,8 +6783,7 @@ static int selinux_bpf_prog(struct bpf_prog *prog) BPF__PROG_RUN, NULL); } -static int selinux_bpf_map_create(struct bpf_map *map, union bpf_attr *attr, - struct bpf_token *token) +static int selinux_bpf_map_alloc(struct bpf_map *map) { struct bpf_security_struct *bpfsec; @@ -6806,8 +6805,7 @@ static void selinux_bpf_map_free(struct bpf_map *map) kfree(bpfsec); } -static int selinux_bpf_prog_load(struct bpf_prog *prog, union bpf_attr *attr, - struct bpf_token *token) +static int selinux_bpf_prog_alloc(struct bpf_prog_aux *aux) { struct bpf_security_struct *bpfsec; @@ -6816,39 +6814,16 @@ static int selinux_bpf_prog_load(struct bpf_prog *prog, union bpf_attr *attr, return -ENOMEM; bpfsec->sid = current_sid(); - prog->aux->security = bpfsec; + aux->security = bpfsec; return 0; } -static void selinux_bpf_prog_free(struct bpf_prog *prog) +static void selinux_bpf_prog_free(struct bpf_prog_aux *aux) { - struct bpf_security_struct *bpfsec = prog->aux->security; + struct bpf_security_struct *bpfsec = aux->security; - prog->aux->security = NULL; - kfree(bpfsec); -} - -static int selinux_bpf_token_create(struct bpf_token *token, union bpf_attr *attr, - struct path *path) -{ - struct bpf_security_struct *bpfsec; - - bpfsec = kzalloc(sizeof(*bpfsec), GFP_KERNEL); - if (!bpfsec) - return -ENOMEM; - - bpfsec->sid = current_sid(); - token->security = bpfsec; - - return 0; -} - -static void selinux_bpf_token_free(struct bpf_token *token) -{ - struct bpf_security_struct *bpfsec = token->security; - - token->security = NULL; + aux->security = NULL; kfree(bpfsec); } #endif @@ -7204,9 +7179,8 @@ static struct security_hook_list selinux_hooks[] __ro_after_init = { LSM_HOOK_INIT(bpf, selinux_bpf), LSM_HOOK_INIT(bpf_map, selinux_bpf_map), LSM_HOOK_INIT(bpf_prog, selinux_bpf_prog), - LSM_HOOK_INIT(bpf_map_free, selinux_bpf_map_free), - LSM_HOOK_INIT(bpf_prog_free, selinux_bpf_prog_free), - LSM_HOOK_INIT(bpf_token_free, selinux_bpf_token_free), + LSM_HOOK_INIT(bpf_map_free_security, selinux_bpf_map_free), + LSM_HOOK_INIT(bpf_prog_free_security, selinux_bpf_prog_free), #endif #ifdef CONFIG_PERF_EVENTS @@ -7263,9 +7237,8 @@ static struct security_hook_list selinux_hooks[] __ro_after_init = { LSM_HOOK_INIT(audit_rule_init, selinux_audit_rule_init), #endif #ifdef CONFIG_BPF_SYSCALL - LSM_HOOK_INIT(bpf_map_create, selinux_bpf_map_create), - LSM_HOOK_INIT(bpf_prog_load, selinux_bpf_prog_load), - LSM_HOOK_INIT(bpf_token_create, selinux_bpf_token_create), + LSM_HOOK_INIT(bpf_map_alloc_security, selinux_bpf_map_alloc), + LSM_HOOK_INIT(bpf_prog_alloc_security, selinux_bpf_prog_alloc), #endif #ifdef CONFIG_PERF_EVENTS LSM_HOOK_INIT(perf_event_alloc, selinux_perf_event_alloc), diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index e0545201b55f..7f24d898efbb 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -847,36 +847,6 @@ union bpf_iter_link_info { * Returns zero on success. On error, -1 is returned and *errno* * is set appropriately. * - * BPF_TOKEN_CREATE - * Description - * Create BPF token with embedded information about what - * BPF-related functionality it allows: - * - a set of allowed bpf() syscall commands; - * - a set of allowed BPF map types to be created with - * BPF_MAP_CREATE command, if BPF_MAP_CREATE itself is allowed; - * - a set of allowed BPF program types and BPF program attach - * types to be loaded with BPF_PROG_LOAD command, if - * BPF_PROG_LOAD itself is allowed. - * - * BPF token is created (derived) from an instance of BPF FS, - * assuming it has necessary delegation mount options specified. - * This BPF token can be passed as an extra parameter to various - * bpf() syscall commands to grant BPF subsystem functionality to - * unprivileged processes. - * - * When created, BPF token is "associated" with the owning - * user namespace of BPF FS instance (super block) that it was - * derived from, and subsequent BPF operations performed with - * BPF token would be performing capabilities checks (i.e., - * CAP_BPF, CAP_PERFMON, CAP_NET_ADMIN, CAP_SYS_ADMIN) within - * that user namespace. Without BPF token, such capabilities - * have to be granted in init user namespace, making bpf() - * syscall incompatible with user namespace, for the most part. - * - * Return - * A new file descriptor (a nonnegative integer), or -1 if an - * error occurred (in which case, *errno* is set appropriately). - * * NOTES * eBPF objects (maps and programs) can be shared between processes. * @@ -931,8 +901,6 @@ enum bpf_cmd { BPF_ITER_CREATE, BPF_LINK_DETACH, BPF_PROG_BIND_MAP, - BPF_TOKEN_CREATE, - __MAX_BPF_CMD, }; enum bpf_map_type { @@ -983,7 +951,6 @@ enum bpf_map_type { BPF_MAP_TYPE_BLOOM_FILTER, BPF_MAP_TYPE_USER_RINGBUF, BPF_MAP_TYPE_CGRP_STORAGE, - __MAX_BPF_MAP_TYPE }; /* Note that tracing related programs such as @@ -1028,7 +995,6 @@ enum bpf_prog_type { BPF_PROG_TYPE_SK_LOOKUP, BPF_PROG_TYPE_SYSCALL, /* a program that can execute syscalls */ BPF_PROG_TYPE_NETFILTER, - __MAX_BPF_PROG_TYPE }; enum bpf_attach_type { @@ -1437,7 +1403,6 @@ union bpf_attr { * to using 5 hash functions). */ __u64 map_extra; - __u32 map_token_fd; }; struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */ @@ -1507,7 +1472,6 @@ union bpf_attr { * truncated), or smaller (if log buffer wasn't filled completely). */ __u32 log_true_size; - __u32 prog_token_fd; }; struct { /* anonymous struct used by BPF_OBJ_* commands */ @@ -1620,7 +1584,6 @@ union bpf_attr { * truncated), or smaller (if log buffer wasn't filled completely). */ __u32 btf_log_true_size; - __u32 btf_token_fd; }; struct { @@ -1751,11 +1714,6 @@ union bpf_attr { __u32 flags; /* extra flags */ } prog_bind_map; - struct { /* struct used by BPF_TOKEN_CREATE command */ - __u32 flags; - __u32 bpffs_fd; - } token_create; - } __attribute__((aligned(8))); /* The description below is an attempt at providing documentation to eBPF diff --git a/tools/lib/bpf/Build b/tools/lib/bpf/Build index b6619199a706..2d0c282c8588 100644 --- a/tools/lib/bpf/Build +++ b/tools/lib/bpf/Build @@ -1,4 +1,4 @@ libbpf-y := libbpf.o bpf.o nlattr.o btf.o libbpf_errno.o str_error.o \ netlink.o bpf_prog_linfo.o libbpf_probes.o hashmap.o \ btf_dump.o ringbuf.o strset.o linker.o gen_loader.o relo_core.o \ - usdt.o zip.o elf.o features.o + usdt.o zip.o elf.o diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c index 0ad8e532b3cf..9dc9625651dc 100644 --- a/tools/lib/bpf/bpf.c +++ b/tools/lib/bpf/bpf.c @@ -103,7 +103,7 @@ int sys_bpf_prog_load(union bpf_attr *attr, unsigned int size, int attempts) * [0] https://lore.kernel.org/bpf/20201201215900.3569844-1-guro@fb.com/ * [1] d05512618056 ("bpf: Add bpf_ktime_get_coarse_ns helper") */ -int probe_memcg_account(int token_fd) +int probe_memcg_account(void) { const size_t attr_sz = offsetofend(union bpf_attr, attach_btf_obj_fd); struct bpf_insn insns[] = { @@ -120,7 +120,6 @@ int probe_memcg_account(int token_fd) attr.insns = ptr_to_u64(insns); attr.insn_cnt = insn_cnt; attr.license = ptr_to_u64("GPL"); - attr.prog_token_fd = token_fd; prog_fd = sys_bpf_fd(BPF_PROG_LOAD, &attr, attr_sz); if (prog_fd >= 0) { @@ -147,7 +146,7 @@ int bump_rlimit_memlock(void) struct rlimit rlim; /* if kernel supports memcg-based accounting, skip bumping RLIMIT_MEMLOCK */ - if (memlock_bumped || feat_supported(NULL, FEAT_MEMCG_ACCOUNT)) + if (memlock_bumped || kernel_supports(NULL, FEAT_MEMCG_ACCOUNT)) return 0; memlock_bumped = true; @@ -170,7 +169,7 @@ int bpf_map_create(enum bpf_map_type map_type, __u32 max_entries, const struct bpf_map_create_opts *opts) { - const size_t attr_sz = offsetofend(union bpf_attr, map_token_fd); + const size_t attr_sz = offsetofend(union bpf_attr, map_extra); union bpf_attr attr; int fd; @@ -182,7 +181,7 @@ int bpf_map_create(enum bpf_map_type map_type, return libbpf_err(-EINVAL); attr.map_type = map_type; - if (map_name && feat_supported(NULL, FEAT_PROG_NAME)) + if (map_name && kernel_supports(NULL, FEAT_PROG_NAME)) libbpf_strlcpy(attr.map_name, map_name, sizeof(attr.map_name)); attr.key_size = key_size; attr.value_size = value_size; @@ -199,8 +198,6 @@ int bpf_map_create(enum bpf_map_type map_type, attr.numa_node = OPTS_GET(opts, numa_node, 0); attr.map_ifindex = OPTS_GET(opts, map_ifindex, 0); - attr.map_token_fd = OPTS_GET(opts, token_fd, 0); - fd = sys_bpf_fd(BPF_MAP_CREATE, &attr, attr_sz); return libbpf_err_errno(fd); } @@ -235,7 +232,7 @@ int bpf_prog_load(enum bpf_prog_type prog_type, const struct bpf_insn *insns, size_t insn_cnt, struct bpf_prog_load_opts *opts) { - const size_t attr_sz = offsetofend(union bpf_attr, prog_token_fd); + const size_t attr_sz = offsetofend(union bpf_attr, log_true_size); void *finfo = NULL, *linfo = NULL; const char *func_info, *line_info; __u32 log_size, log_level, attach_prog_fd, attach_btf_obj_fd; @@ -264,9 +261,8 @@ int bpf_prog_load(enum bpf_prog_type prog_type, attr.prog_flags = OPTS_GET(opts, prog_flags, 0); attr.prog_ifindex = OPTS_GET(opts, prog_ifindex, 0); attr.kern_version = OPTS_GET(opts, kern_version, 0); - attr.prog_token_fd = OPTS_GET(opts, token_fd, 0); - if (prog_name && feat_supported(NULL, FEAT_PROG_NAME)) + if (prog_name && kernel_supports(NULL, FEAT_PROG_NAME)) libbpf_strlcpy(attr.prog_name, prog_name, sizeof(attr.prog_name)); attr.license = ptr_to_u64(license); @@ -1186,7 +1182,7 @@ int bpf_raw_tracepoint_open(const char *name, int prog_fd) int bpf_btf_load(const void *btf_data, size_t btf_size, struct bpf_btf_load_opts *opts) { - const size_t attr_sz = offsetofend(union bpf_attr, btf_token_fd); + const size_t attr_sz = offsetofend(union bpf_attr, btf_log_true_size); union bpf_attr attr; char *log_buf; size_t log_size; @@ -1211,8 +1207,6 @@ int bpf_btf_load(const void *btf_data, size_t btf_size, struct bpf_btf_load_opts attr.btf = ptr_to_u64(btf_data); attr.btf_size = btf_size; - attr.btf_token_fd = OPTS_GET(opts, token_fd, 0); - /* log_level == 0 and log_buf != NULL means "try loading without * log_buf, but retry with log_buf and log_level=1 on error", which is * consistent across low-level and high-level BTF and program loading @@ -1293,20 +1287,3 @@ int bpf_prog_bind_map(int prog_fd, int map_fd, ret = sys_bpf(BPF_PROG_BIND_MAP, &attr, attr_sz); return libbpf_err_errno(ret); } - -int bpf_token_create(int bpffs_fd, struct bpf_token_create_opts *opts) -{ - const size_t attr_sz = offsetofend(union bpf_attr, token_create); - union bpf_attr attr; - int fd; - - if (!OPTS_VALID(opts, bpf_token_create_opts)) - return libbpf_err(-EINVAL); - - memset(&attr, 0, attr_sz); - attr.token_create.bpffs_fd = bpffs_fd; - attr.token_create.flags = OPTS_GET(opts, flags, 0); - - fd = sys_bpf_fd(BPF_TOKEN_CREATE, &attr, attr_sz); - return libbpf_err_errno(fd); -} diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h index 991b86bfe7e4..d0f53772bdc0 100644 --- a/tools/lib/bpf/bpf.h +++ b/tools/lib/bpf/bpf.h @@ -51,11 +51,8 @@ struct bpf_map_create_opts { __u32 numa_node; __u32 map_ifindex; - - __u32 token_fd; - size_t :0; }; -#define bpf_map_create_opts__last_field token_fd +#define bpf_map_create_opts__last_field map_ifindex LIBBPF_API int bpf_map_create(enum bpf_map_type map_type, const char *map_name, @@ -105,10 +102,9 @@ struct bpf_prog_load_opts { * If kernel doesn't support this feature, log_size is left unchanged. */ __u32 log_true_size; - __u32 token_fd; size_t :0; }; -#define bpf_prog_load_opts__last_field token_fd +#define bpf_prog_load_opts__last_field log_true_size LIBBPF_API int bpf_prog_load(enum bpf_prog_type prog_type, const char *prog_name, const char *license, @@ -134,10 +130,9 @@ struct bpf_btf_load_opts { * If kernel doesn't support this feature, log_size is left unchanged. */ __u32 log_true_size; - __u32 token_fd; size_t :0; }; -#define bpf_btf_load_opts__last_field token_fd +#define bpf_btf_load_opts__last_field log_true_size LIBBPF_API int bpf_btf_load(const void *btf_data, size_t btf_size, struct bpf_btf_load_opts *opts); @@ -645,30 +640,6 @@ struct bpf_test_run_opts { LIBBPF_API int bpf_prog_test_run_opts(int prog_fd, struct bpf_test_run_opts *opts); -struct bpf_token_create_opts { - size_t sz; /* size of this struct for forward/backward compatibility */ - __u32 flags; - size_t :0; -}; -#define bpf_token_create_opts__last_field flags - -/** - * @brief **bpf_token_create()** creates a new instance of BPF token derived - * from specified BPF FS mount point. - * - * BPF token created with this API can be passed to bpf() syscall for - * commands like BPF_PROG_LOAD, BPF_MAP_CREATE, etc. - * - * @param bpffs_fd FD for BPF FS instance from which to derive a BPF token - * instance. - * @param opts optional BPF token creation options, can be NULL - * - * @return BPF token FD > 0, on success; negative error code, otherwise (errno - * is also set to the error code) - */ -LIBBPF_API int bpf_token_create(int bpffs_fd, - struct bpf_token_create_opts *opts); - #ifdef __cplusplus } /* extern "C" */ #endif diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c index 63033c334320..ee95fd379d4d 100644 --- a/tools/lib/bpf/btf.c +++ b/tools/lib/bpf/btf.c @@ -1317,9 +1317,7 @@ struct btf *btf__parse_split(const char *path, struct btf *base_btf) static void *btf_get_raw_data(const struct btf *btf, __u32 *size, bool swap_endian); -int btf_load_into_kernel(struct btf *btf, - char *log_buf, size_t log_sz, __u32 log_level, - int token_fd) +int btf_load_into_kernel(struct btf *btf, char *log_buf, size_t log_sz, __u32 log_level) { LIBBPF_OPTS(bpf_btf_load_opts, opts); __u32 buf_sz = 0, raw_size; @@ -1369,7 +1367,6 @@ retry_load: opts.log_level = log_level; } - opts.token_fd = token_fd; btf->fd = bpf_btf_load(raw_data, raw_size, &opts); if (btf->fd < 0) { /* time to turn on verbose mode and try again */ @@ -1397,7 +1394,7 @@ done: int btf__load_into_kernel(struct btf *btf) { - return btf_load_into_kernel(btf, NULL, 0, 0, 0); + return btf_load_into_kernel(btf, NULL, 0, 0); } int btf__fd(const struct btf *btf) diff --git a/tools/lib/bpf/elf.c b/tools/lib/bpf/elf.c index c92e02394159..b02faec748a5 100644 --- a/tools/lib/bpf/elf.c +++ b/tools/lib/bpf/elf.c @@ -11,6 +11,8 @@ #include "libbpf_internal.h" #include "str_error.h" +#define STRERR_BUFSIZE 128 + /* A SHT_GNU_versym section holds 16-bit words. This bit is set if * the symbol is hidden and can only be seen when referenced using an * explicit version number. This is a GNU extension. diff --git a/tools/lib/bpf/features.c b/tools/lib/bpf/features.c deleted file mode 100644 index ce98a334be21..000000000000 --- a/tools/lib/bpf/features.c +++ /dev/null @@ -1,478 +0,0 @@ -// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) -/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */ -#include -#include -#include "bpf.h" -#include "libbpf.h" -#include "libbpf_common.h" -#include "libbpf_internal.h" -#include "str_error.h" - -static inline __u64 ptr_to_u64(const void *ptr) -{ - return (__u64)(unsigned long)ptr; -} - -static int probe_fd(int fd) -{ - if (fd >= 0) - close(fd); - return fd >= 0; -} - -static int probe_kern_prog_name(int token_fd) -{ - const size_t attr_sz = offsetofend(union bpf_attr, prog_name); - struct bpf_insn insns[] = { - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }; - union bpf_attr attr; - int ret; - - memset(&attr, 0, attr_sz); - attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER; - attr.license = ptr_to_u64("GPL"); - attr.insns = ptr_to_u64(insns); - attr.insn_cnt = (__u32)ARRAY_SIZE(insns); - attr.prog_token_fd = token_fd; - libbpf_strlcpy(attr.prog_name, "libbpf_nametest", sizeof(attr.prog_name)); - - /* make sure loading with name works */ - ret = sys_bpf_prog_load(&attr, attr_sz, PROG_LOAD_ATTEMPTS); - return probe_fd(ret); -} - -static int probe_kern_global_data(int token_fd) -{ - char *cp, errmsg[STRERR_BUFSIZE]; - struct bpf_insn insns[] = { - BPF_LD_MAP_VALUE(BPF_REG_1, 0, 16), - BPF_ST_MEM(BPF_DW, BPF_REG_1, 0, 42), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }; - LIBBPF_OPTS(bpf_map_create_opts, map_opts, .token_fd = token_fd); - LIBBPF_OPTS(bpf_prog_load_opts, prog_opts, .token_fd = token_fd); - int ret, map, insn_cnt = ARRAY_SIZE(insns); - - map = bpf_map_create(BPF_MAP_TYPE_ARRAY, "libbpf_global", sizeof(int), 32, 1, &map_opts); - if (map < 0) { - ret = -errno; - cp = libbpf_strerror_r(ret, errmsg, sizeof(errmsg)); - pr_warn("Error in %s():%s(%d). Couldn't create simple array map.\n", - __func__, cp, -ret); - return ret; - } - - insns[0].imm = map; - - ret = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL", insns, insn_cnt, &prog_opts); - close(map); - return probe_fd(ret); -} - -static int probe_kern_btf(int token_fd) -{ - static const char strs[] = "\0int"; - __u32 types[] = { - /* int */ - BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4), - }; - - return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), - strs, sizeof(strs), token_fd)); -} - -static int probe_kern_btf_func(int token_fd) -{ - static const char strs[] = "\0int\0x\0a"; - /* void x(int a) {} */ - __u32 types[] = { - /* int */ - BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - /* FUNC_PROTO */ /* [2] */ - BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_FUNC_PROTO, 0, 1), 0), - BTF_PARAM_ENC(7, 1), - /* FUNC x */ /* [3] */ - BTF_TYPE_ENC(5, BTF_INFO_ENC(BTF_KIND_FUNC, 0, 0), 2), - }; - - return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), - strs, sizeof(strs), token_fd)); -} - -static int probe_kern_btf_func_global(int token_fd) -{ - static const char strs[] = "\0int\0x\0a"; - /* static void x(int a) {} */ - __u32 types[] = { - /* int */ - BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - /* FUNC_PROTO */ /* [2] */ - BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_FUNC_PROTO, 0, 1), 0), - BTF_PARAM_ENC(7, 1), - /* FUNC x BTF_FUNC_GLOBAL */ /* [3] */ - BTF_TYPE_ENC(5, BTF_INFO_ENC(BTF_KIND_FUNC, 0, BTF_FUNC_GLOBAL), 2), - }; - - return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), - strs, sizeof(strs), token_fd)); -} - -static int probe_kern_btf_datasec(int token_fd) -{ - static const char strs[] = "\0x\0.data"; - /* static int a; */ - __u32 types[] = { - /* int */ - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - /* VAR x */ /* [2] */ - BTF_TYPE_ENC(1, BTF_INFO_ENC(BTF_KIND_VAR, 0, 0), 1), - BTF_VAR_STATIC, - /* DATASEC val */ /* [3] */ - BTF_TYPE_ENC(3, BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 1), 4), - BTF_VAR_SECINFO_ENC(2, 0, 4), - }; - - return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), - strs, sizeof(strs), token_fd)); -} - -static int probe_kern_btf_float(int token_fd) -{ - static const char strs[] = "\0float"; - __u32 types[] = { - /* float */ - BTF_TYPE_FLOAT_ENC(1, 4), - }; - - return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), - strs, sizeof(strs), token_fd)); -} - -static int probe_kern_btf_decl_tag(int token_fd) -{ - static const char strs[] = "\0tag"; - __u32 types[] = { - /* int */ - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - /* VAR x */ /* [2] */ - BTF_TYPE_ENC(1, BTF_INFO_ENC(BTF_KIND_VAR, 0, 0), 1), - BTF_VAR_STATIC, - /* attr */ - BTF_TYPE_DECL_TAG_ENC(1, 2, -1), - }; - - return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), - strs, sizeof(strs), token_fd)); -} - -static int probe_kern_btf_type_tag(int token_fd) -{ - static const char strs[] = "\0tag"; - __u32 types[] = { - /* int */ - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - /* attr */ - BTF_TYPE_TYPE_TAG_ENC(1, 1), /* [2] */ - /* ptr */ - BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 2), /* [3] */ - }; - - return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), - strs, sizeof(strs), token_fd)); -} - -static int probe_kern_array_mmap(int token_fd) -{ - LIBBPF_OPTS(bpf_map_create_opts, opts, - .map_flags = BPF_F_MMAPABLE, - .token_fd = token_fd, - ); - int fd; - - fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, "libbpf_mmap", sizeof(int), sizeof(int), 1, &opts); - return probe_fd(fd); -} - -static int probe_kern_exp_attach_type(int token_fd) -{ - LIBBPF_OPTS(bpf_prog_load_opts, opts, - .expected_attach_type = BPF_CGROUP_INET_SOCK_CREATE, - .token_fd = token_fd, - ); - struct bpf_insn insns[] = { - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }; - int fd, insn_cnt = ARRAY_SIZE(insns); - - /* use any valid combination of program type and (optional) - * non-zero expected attach type (i.e., not a BPF_CGROUP_INET_INGRESS) - * to see if kernel supports expected_attach_type field for - * BPF_PROG_LOAD command - */ - fd = bpf_prog_load(BPF_PROG_TYPE_CGROUP_SOCK, NULL, "GPL", insns, insn_cnt, &opts); - return probe_fd(fd); -} - -static int probe_kern_probe_read_kernel(int token_fd) -{ - LIBBPF_OPTS(bpf_prog_load_opts, opts, .token_fd = token_fd); - struct bpf_insn insns[] = { - BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), /* r1 = r10 (fp) */ - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), /* r1 += -8 */ - BPF_MOV64_IMM(BPF_REG_2, 8), /* r2 = 8 */ - BPF_MOV64_IMM(BPF_REG_3, 0), /* r3 = 0 */ - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_probe_read_kernel), - BPF_EXIT_INSN(), - }; - int fd, insn_cnt = ARRAY_SIZE(insns); - - fd = bpf_prog_load(BPF_PROG_TYPE_TRACEPOINT, NULL, "GPL", insns, insn_cnt, &opts); - return probe_fd(fd); -} - -static int probe_prog_bind_map(int token_fd) -{ - char *cp, errmsg[STRERR_BUFSIZE]; - struct bpf_insn insns[] = { - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }; - LIBBPF_OPTS(bpf_map_create_opts, map_opts, .token_fd = token_fd); - LIBBPF_OPTS(bpf_prog_load_opts, prog_opts, .token_fd = token_fd); - int ret, map, prog, insn_cnt = ARRAY_SIZE(insns); - - map = bpf_map_create(BPF_MAP_TYPE_ARRAY, "libbpf_det_bind", sizeof(int), 32, 1, &map_opts); - if (map < 0) { - ret = -errno; - cp = libbpf_strerror_r(ret, errmsg, sizeof(errmsg)); - pr_warn("Error in %s():%s(%d). Couldn't create simple array map.\n", - __func__, cp, -ret); - return ret; - } - - prog = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL", insns, insn_cnt, &prog_opts); - if (prog < 0) { - close(map); - return 0; - } - - ret = bpf_prog_bind_map(prog, map, NULL); - - close(map); - close(prog); - - return ret >= 0; -} - -static int probe_module_btf(int token_fd) -{ - static const char strs[] = "\0int"; - __u32 types[] = { - /* int */ - BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4), - }; - struct bpf_btf_info info; - __u32 len = sizeof(info); - char name[16]; - int fd, err; - - fd = libbpf__load_raw_btf((char *)types, sizeof(types), strs, sizeof(strs), token_fd); - if (fd < 0) - return 0; /* BTF not supported at all */ - - memset(&info, 0, sizeof(info)); - info.name = ptr_to_u64(name); - info.name_len = sizeof(name); - - /* check that BPF_OBJ_GET_INFO_BY_FD supports specifying name pointer; - * kernel's module BTF support coincides with support for - * name/name_len fields in struct bpf_btf_info. - */ - err = bpf_btf_get_info_by_fd(fd, &info, &len); - close(fd); - return !err; -} - -static int probe_perf_link(int token_fd) -{ - struct bpf_insn insns[] = { - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }; - LIBBPF_OPTS(bpf_prog_load_opts, opts, .token_fd = token_fd); - int prog_fd, link_fd, err; - - prog_fd = bpf_prog_load(BPF_PROG_TYPE_TRACEPOINT, NULL, "GPL", - insns, ARRAY_SIZE(insns), &opts); - if (prog_fd < 0) - return -errno; - - /* use invalid perf_event FD to get EBADF, if link is supported; - * otherwise EINVAL should be returned - */ - link_fd = bpf_link_create(prog_fd, -1, BPF_PERF_EVENT, NULL); - err = -errno; /* close() can clobber errno */ - - if (link_fd >= 0) - close(link_fd); - close(prog_fd); - - return link_fd < 0 && err == -EBADF; -} - -static int probe_uprobe_multi_link(int token_fd) -{ - LIBBPF_OPTS(bpf_prog_load_opts, load_opts, - .expected_attach_type = BPF_TRACE_UPROBE_MULTI, - .token_fd = token_fd, - ); - LIBBPF_OPTS(bpf_link_create_opts, link_opts); - struct bpf_insn insns[] = { - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }; - int prog_fd, link_fd, err; - unsigned long offset = 0; - - prog_fd = bpf_prog_load(BPF_PROG_TYPE_KPROBE, NULL, "GPL", - insns, ARRAY_SIZE(insns), &load_opts); - if (prog_fd < 0) - return -errno; - - /* Creating uprobe in '/' binary should fail with -EBADF. */ - link_opts.uprobe_multi.path = "/"; - link_opts.uprobe_multi.offsets = &offset; - link_opts.uprobe_multi.cnt = 1; - - link_fd = bpf_link_create(prog_fd, -1, BPF_TRACE_UPROBE_MULTI, &link_opts); - err = -errno; /* close() can clobber errno */ - - if (link_fd >= 0) - close(link_fd); - close(prog_fd); - - return link_fd < 0 && err == -EBADF; -} - -static int probe_kern_bpf_cookie(int token_fd) -{ - struct bpf_insn insns[] = { - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_attach_cookie), - BPF_EXIT_INSN(), - }; - LIBBPF_OPTS(bpf_prog_load_opts, opts, .token_fd = token_fd); - int ret, insn_cnt = ARRAY_SIZE(insns); - - ret = bpf_prog_load(BPF_PROG_TYPE_TRACEPOINT, NULL, "GPL", insns, insn_cnt, &opts); - return probe_fd(ret); -} - -static int probe_kern_btf_enum64(int token_fd) -{ - static const char strs[] = "\0enum64"; - __u32 types[] = { - BTF_TYPE_ENC(1, BTF_INFO_ENC(BTF_KIND_ENUM64, 0, 0), 8), - }; - - return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), - strs, sizeof(strs), token_fd)); -} - -typedef int (*feature_probe_fn)(int /* token_fd */); - -static struct kern_feature_cache feature_cache; - -static struct kern_feature_desc { - const char *desc; - feature_probe_fn probe; -} feature_probes[__FEAT_CNT] = { - [FEAT_PROG_NAME] = { - "BPF program name", probe_kern_prog_name, - }, - [FEAT_GLOBAL_DATA] = { - "global variables", probe_kern_global_data, - }, - [FEAT_BTF] = { - "minimal BTF", probe_kern_btf, - }, - [FEAT_BTF_FUNC] = { - "BTF functions", probe_kern_btf_func, - }, - [FEAT_BTF_GLOBAL_FUNC] = { - "BTF global function", probe_kern_btf_func_global, - }, - [FEAT_BTF_DATASEC] = { - "BTF data section and variable", probe_kern_btf_datasec, - }, - [FEAT_ARRAY_MMAP] = { - "ARRAY map mmap()", probe_kern_array_mmap, - }, - [FEAT_EXP_ATTACH_TYPE] = { - "BPF_PROG_LOAD expected_attach_type attribute", - probe_kern_exp_attach_type, - }, - [FEAT_PROBE_READ_KERN] = { - "bpf_probe_read_kernel() helper", probe_kern_probe_read_kernel, - }, - [FEAT_PROG_BIND_MAP] = { - "BPF_PROG_BIND_MAP support", probe_prog_bind_map, - }, - [FEAT_MODULE_BTF] = { - "module BTF support", probe_module_btf, - }, - [FEAT_BTF_FLOAT] = { - "BTF_KIND_FLOAT support", probe_kern_btf_float, - }, - [FEAT_PERF_LINK] = { - "BPF perf link support", probe_perf_link, - }, - [FEAT_BTF_DECL_TAG] = { - "BTF_KIND_DECL_TAG support", probe_kern_btf_decl_tag, - }, - [FEAT_BTF_TYPE_TAG] = { - "BTF_KIND_TYPE_TAG support", probe_kern_btf_type_tag, - }, - [FEAT_MEMCG_ACCOUNT] = { - "memcg-based memory accounting", probe_memcg_account, - }, - [FEAT_BPF_COOKIE] = { - "BPF cookie support", probe_kern_bpf_cookie, - }, - [FEAT_BTF_ENUM64] = { - "BTF_KIND_ENUM64 support", probe_kern_btf_enum64, - }, - [FEAT_SYSCALL_WRAPPER] = { - "Kernel using syscall wrapper", probe_kern_syscall_wrapper, - }, - [FEAT_UPROBE_MULTI_LINK] = { - "BPF multi-uprobe link support", probe_uprobe_multi_link, - }, -}; - -bool feat_supported(struct kern_feature_cache *cache, enum kern_feature_id feat_id) -{ - struct kern_feature_desc *feat = &feature_probes[feat_id]; - int ret; - - /* assume global feature cache, unless custom one is provided */ - if (!cache) - cache = &feature_cache; - - if (READ_ONCE(cache->res[feat_id]) == FEAT_UNKNOWN) { - ret = feat->probe(cache->token_fd); - if (ret > 0) { - WRITE_ONCE(cache->res[feat_id], FEAT_SUPPORTED); - } else if (ret == 0) { - WRITE_ONCE(cache->res[feat_id], FEAT_MISSING); - } else { - pr_warn("Detection of kernel %s support failed: %d\n", feat->desc, ret); - WRITE_ONCE(cache->res[feat_id], FEAT_MISSING); - } - } - - return READ_ONCE(cache->res[feat_id]) == FEAT_SUPPORTED; -} diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 4b5ff9508e18..ac54ebc0629f 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -59,8 +59,6 @@ #define BPF_FS_MAGIC 0xcafe4a11 #endif -#define BPF_FS_DEFAULT_PATH "/sys/fs/bpf" - #define BPF_INSN_SZ (sizeof(struct bpf_insn)) /* vsprintf() in __base_pr() uses nonliteral format string. It may break @@ -695,10 +693,6 @@ struct bpf_object { struct usdt_manager *usdt_man; - struct kern_feature_cache *feat_cache; - char *token_path; - int token_fd; - char path[]; }; @@ -2198,7 +2192,7 @@ static int build_map_pin_path(struct bpf_map *map, const char *path) int err; if (!path) - path = BPF_FS_DEFAULT_PATH; + path = "/sys/fs/bpf"; err = pathname_concat(buf, sizeof(buf), path, bpf_map__name(map)); if (err) @@ -3285,7 +3279,7 @@ skip_exception_cb: } else { /* currently BPF_BTF_LOAD only supports log_level 1 */ err = btf_load_into_kernel(kern_btf, obj->log_buf, obj->log_size, - obj->log_level ? 1 : 0, obj->token_fd); + obj->log_level ? 1 : 0); } if (sanitize) { if (!err) { @@ -4608,63 +4602,6 @@ int bpf_map__set_max_entries(struct bpf_map *map, __u32 max_entries) return 0; } -static int bpf_object_prepare_token(struct bpf_object *obj) -{ - const char *bpffs_path; - int bpffs_fd = -1, token_fd, err; - bool mandatory; - enum libbpf_print_level level; - - /* token is already set up */ - if (obj->token_fd > 0) - return 0; - /* token is explicitly prevented */ - if (obj->token_fd < 0) { - pr_debug("object '%s': token is prevented, skipping...\n", obj->name); - /* reset to zero to avoid extra checks during map_create and prog_load steps */ - obj->token_fd = 0; - return 0; - } - - mandatory = obj->token_path != NULL; - level = mandatory ? LIBBPF_WARN : LIBBPF_DEBUG; - - bpffs_path = obj->token_path ?: BPF_FS_DEFAULT_PATH; - bpffs_fd = open(bpffs_path, O_DIRECTORY, O_RDWR); - if (bpffs_fd < 0) { - err = -errno; - __pr(level, "object '%s': failed (%d) to open BPF FS mount at '%s'%s\n", - obj->name, err, bpffs_path, - mandatory ? "" : ", skipping optional step..."); - return mandatory ? err : 0; - } - - token_fd = bpf_token_create(bpffs_fd, 0); - close(bpffs_fd); - if (token_fd < 0) { - if (!mandatory && token_fd == -ENOENT) { - pr_debug("object '%s': BPF FS at '%s' doesn't have BPF token delegation set up, skipping...\n", - obj->name, bpffs_path); - return 0; - } - __pr(level, "object '%s': failed (%d) to create BPF token from '%s'%s\n", - obj->name, token_fd, bpffs_path, - mandatory ? "" : ", skipping optional step..."); - return mandatory ? token_fd : 0; - } - - obj->feat_cache = calloc(1, sizeof(*obj->feat_cache)); - if (!obj->feat_cache) { - close(token_fd); - return -ENOMEM; - } - - obj->token_fd = token_fd; - obj->feat_cache->token_fd = token_fd; - - return 0; -} - static int bpf_object__probe_loading(struct bpf_object *obj) { @@ -4674,7 +4611,6 @@ bpf_object__probe_loading(struct bpf_object *obj) BPF_EXIT_INSN(), }; int ret, insn_cnt = ARRAY_SIZE(insns); - LIBBPF_OPTS(bpf_prog_load_opts, opts, .token_fd = obj->token_fd); if (obj->gen_loader) return 0; @@ -4684,9 +4620,9 @@ bpf_object__probe_loading(struct bpf_object *obj) pr_warn("Failed to bump RLIMIT_MEMLOCK (err = %d), you might need to do it explicitly!\n", ret); /* make sure basic loading works */ - ret = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL", insns, insn_cnt, &opts); + ret = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL", insns, insn_cnt, NULL); if (ret < 0) - ret = bpf_prog_load(BPF_PROG_TYPE_TRACEPOINT, NULL, "GPL", insns, insn_cnt, &opts); + ret = bpf_prog_load(BPF_PROG_TYPE_TRACEPOINT, NULL, "GPL", insns, insn_cnt, NULL); if (ret < 0) { ret = errno; cp = libbpf_strerror_r(ret, errmsg, sizeof(errmsg)); @@ -4701,18 +4637,462 @@ bpf_object__probe_loading(struct bpf_object *obj) return 0; } +static int probe_fd(int fd) +{ + if (fd >= 0) + close(fd); + return fd >= 0; +} + +static int probe_kern_prog_name(void) +{ + const size_t attr_sz = offsetofend(union bpf_attr, prog_name); + struct bpf_insn insns[] = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }; + union bpf_attr attr; + int ret; + + memset(&attr, 0, attr_sz); + attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER; + attr.license = ptr_to_u64("GPL"); + attr.insns = ptr_to_u64(insns); + attr.insn_cnt = (__u32)ARRAY_SIZE(insns); + libbpf_strlcpy(attr.prog_name, "libbpf_nametest", sizeof(attr.prog_name)); + + /* make sure loading with name works */ + ret = sys_bpf_prog_load(&attr, attr_sz, PROG_LOAD_ATTEMPTS); + return probe_fd(ret); +} + +static int probe_kern_global_data(void) +{ + char *cp, errmsg[STRERR_BUFSIZE]; + struct bpf_insn insns[] = { + BPF_LD_MAP_VALUE(BPF_REG_1, 0, 16), + BPF_ST_MEM(BPF_DW, BPF_REG_1, 0, 42), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }; + int ret, map, insn_cnt = ARRAY_SIZE(insns); + + map = bpf_map_create(BPF_MAP_TYPE_ARRAY, "libbpf_global", sizeof(int), 32, 1, NULL); + if (map < 0) { + ret = -errno; + cp = libbpf_strerror_r(ret, errmsg, sizeof(errmsg)); + pr_warn("Error in %s():%s(%d). Couldn't create simple array map.\n", + __func__, cp, -ret); + return ret; + } + + insns[0].imm = map; + + ret = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL", insns, insn_cnt, NULL); + close(map); + return probe_fd(ret); +} + +static int probe_kern_btf(void) +{ + static const char strs[] = "\0int"; + __u32 types[] = { + /* int */ + BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4), + }; + + return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), + strs, sizeof(strs))); +} + +static int probe_kern_btf_func(void) +{ + static const char strs[] = "\0int\0x\0a"; + /* void x(int a) {} */ + __u32 types[] = { + /* int */ + BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + /* FUNC_PROTO */ /* [2] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_FUNC_PROTO, 0, 1), 0), + BTF_PARAM_ENC(7, 1), + /* FUNC x */ /* [3] */ + BTF_TYPE_ENC(5, BTF_INFO_ENC(BTF_KIND_FUNC, 0, 0), 2), + }; + + return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), + strs, sizeof(strs))); +} + +static int probe_kern_btf_func_global(void) +{ + static const char strs[] = "\0int\0x\0a"; + /* static void x(int a) {} */ + __u32 types[] = { + /* int */ + BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + /* FUNC_PROTO */ /* [2] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_FUNC_PROTO, 0, 1), 0), + BTF_PARAM_ENC(7, 1), + /* FUNC x BTF_FUNC_GLOBAL */ /* [3] */ + BTF_TYPE_ENC(5, BTF_INFO_ENC(BTF_KIND_FUNC, 0, BTF_FUNC_GLOBAL), 2), + }; + + return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), + strs, sizeof(strs))); +} + +static int probe_kern_btf_datasec(void) +{ + static const char strs[] = "\0x\0.data"; + /* static int a; */ + __u32 types[] = { + /* int */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + /* VAR x */ /* [2] */ + BTF_TYPE_ENC(1, BTF_INFO_ENC(BTF_KIND_VAR, 0, 0), 1), + BTF_VAR_STATIC, + /* DATASEC val */ /* [3] */ + BTF_TYPE_ENC(3, BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 1), 4), + BTF_VAR_SECINFO_ENC(2, 0, 4), + }; + + return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), + strs, sizeof(strs))); +} + +static int probe_kern_btf_float(void) +{ + static const char strs[] = "\0float"; + __u32 types[] = { + /* float */ + BTF_TYPE_FLOAT_ENC(1, 4), + }; + + return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), + strs, sizeof(strs))); +} + +static int probe_kern_btf_decl_tag(void) +{ + static const char strs[] = "\0tag"; + __u32 types[] = { + /* int */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + /* VAR x */ /* [2] */ + BTF_TYPE_ENC(1, BTF_INFO_ENC(BTF_KIND_VAR, 0, 0), 1), + BTF_VAR_STATIC, + /* attr */ + BTF_TYPE_DECL_TAG_ENC(1, 2, -1), + }; + + return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), + strs, sizeof(strs))); +} + +static int probe_kern_btf_type_tag(void) +{ + static const char strs[] = "\0tag"; + __u32 types[] = { + /* int */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + /* attr */ + BTF_TYPE_TYPE_TAG_ENC(1, 1), /* [2] */ + /* ptr */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 2), /* [3] */ + }; + + return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), + strs, sizeof(strs))); +} + +static int probe_kern_array_mmap(void) +{ + LIBBPF_OPTS(bpf_map_create_opts, opts, .map_flags = BPF_F_MMAPABLE); + int fd; + + fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, "libbpf_mmap", sizeof(int), sizeof(int), 1, &opts); + return probe_fd(fd); +} + +static int probe_kern_exp_attach_type(void) +{ + LIBBPF_OPTS(bpf_prog_load_opts, opts, .expected_attach_type = BPF_CGROUP_INET_SOCK_CREATE); + struct bpf_insn insns[] = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }; + int fd, insn_cnt = ARRAY_SIZE(insns); + + /* use any valid combination of program type and (optional) + * non-zero expected attach type (i.e., not a BPF_CGROUP_INET_INGRESS) + * to see if kernel supports expected_attach_type field for + * BPF_PROG_LOAD command + */ + fd = bpf_prog_load(BPF_PROG_TYPE_CGROUP_SOCK, NULL, "GPL", insns, insn_cnt, &opts); + return probe_fd(fd); +} + +static int probe_kern_probe_read_kernel(void) +{ + struct bpf_insn insns[] = { + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), /* r1 = r10 (fp) */ + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), /* r1 += -8 */ + BPF_MOV64_IMM(BPF_REG_2, 8), /* r2 = 8 */ + BPF_MOV64_IMM(BPF_REG_3, 0), /* r3 = 0 */ + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_probe_read_kernel), + BPF_EXIT_INSN(), + }; + int fd, insn_cnt = ARRAY_SIZE(insns); + + fd = bpf_prog_load(BPF_PROG_TYPE_TRACEPOINT, NULL, "GPL", insns, insn_cnt, NULL); + return probe_fd(fd); +} + +static int probe_prog_bind_map(void) +{ + char *cp, errmsg[STRERR_BUFSIZE]; + struct bpf_insn insns[] = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }; + int ret, map, prog, insn_cnt = ARRAY_SIZE(insns); + + map = bpf_map_create(BPF_MAP_TYPE_ARRAY, "libbpf_det_bind", sizeof(int), 32, 1, NULL); + if (map < 0) { + ret = -errno; + cp = libbpf_strerror_r(ret, errmsg, sizeof(errmsg)); + pr_warn("Error in %s():%s(%d). Couldn't create simple array map.\n", + __func__, cp, -ret); + return ret; + } + + prog = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL", insns, insn_cnt, NULL); + if (prog < 0) { + close(map); + return 0; + } + + ret = bpf_prog_bind_map(prog, map, NULL); + + close(map); + close(prog); + + return ret >= 0; +} + +static int probe_module_btf(void) +{ + static const char strs[] = "\0int"; + __u32 types[] = { + /* int */ + BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4), + }; + struct bpf_btf_info info; + __u32 len = sizeof(info); + char name[16]; + int fd, err; + + fd = libbpf__load_raw_btf((char *)types, sizeof(types), strs, sizeof(strs)); + if (fd < 0) + return 0; /* BTF not supported at all */ + + memset(&info, 0, sizeof(info)); + info.name = ptr_to_u64(name); + info.name_len = sizeof(name); + + /* check that BPF_OBJ_GET_INFO_BY_FD supports specifying name pointer; + * kernel's module BTF support coincides with support for + * name/name_len fields in struct bpf_btf_info. + */ + err = bpf_btf_get_info_by_fd(fd, &info, &len); + close(fd); + return !err; +} + +static int probe_perf_link(void) +{ + struct bpf_insn insns[] = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }; + int prog_fd, link_fd, err; + + prog_fd = bpf_prog_load(BPF_PROG_TYPE_TRACEPOINT, NULL, "GPL", + insns, ARRAY_SIZE(insns), NULL); + if (prog_fd < 0) + return -errno; + + /* use invalid perf_event FD to get EBADF, if link is supported; + * otherwise EINVAL should be returned + */ + link_fd = bpf_link_create(prog_fd, -1, BPF_PERF_EVENT, NULL); + err = -errno; /* close() can clobber errno */ + + if (link_fd >= 0) + close(link_fd); + close(prog_fd); + + return link_fd < 0 && err == -EBADF; +} + +static int probe_uprobe_multi_link(void) +{ + LIBBPF_OPTS(bpf_prog_load_opts, load_opts, + .expected_attach_type = BPF_TRACE_UPROBE_MULTI, + ); + LIBBPF_OPTS(bpf_link_create_opts, link_opts); + struct bpf_insn insns[] = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }; + int prog_fd, link_fd, err; + unsigned long offset = 0; + + prog_fd = bpf_prog_load(BPF_PROG_TYPE_KPROBE, NULL, "GPL", + insns, ARRAY_SIZE(insns), &load_opts); + if (prog_fd < 0) + return -errno; + + /* Creating uprobe in '/' binary should fail with -EBADF. */ + link_opts.uprobe_multi.path = "/"; + link_opts.uprobe_multi.offsets = &offset; + link_opts.uprobe_multi.cnt = 1; + + link_fd = bpf_link_create(prog_fd, -1, BPF_TRACE_UPROBE_MULTI, &link_opts); + err = -errno; /* close() can clobber errno */ + + if (link_fd >= 0) + close(link_fd); + close(prog_fd); + + return link_fd < 0 && err == -EBADF; +} + +static int probe_kern_bpf_cookie(void) +{ + struct bpf_insn insns[] = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_attach_cookie), + BPF_EXIT_INSN(), + }; + int ret, insn_cnt = ARRAY_SIZE(insns); + + ret = bpf_prog_load(BPF_PROG_TYPE_KPROBE, NULL, "GPL", insns, insn_cnt, NULL); + return probe_fd(ret); +} + +static int probe_kern_btf_enum64(void) +{ + static const char strs[] = "\0enum64"; + __u32 types[] = { + BTF_TYPE_ENC(1, BTF_INFO_ENC(BTF_KIND_ENUM64, 0, 0), 8), + }; + + return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), + strs, sizeof(strs))); +} + +static int probe_kern_syscall_wrapper(void); + +enum kern_feature_result { + FEAT_UNKNOWN = 0, + FEAT_SUPPORTED = 1, + FEAT_MISSING = 2, +}; + +typedef int (*feature_probe_fn)(void); + +static struct kern_feature_desc { + const char *desc; + feature_probe_fn probe; + enum kern_feature_result res; +} feature_probes[__FEAT_CNT] = { + [FEAT_PROG_NAME] = { + "BPF program name", probe_kern_prog_name, + }, + [FEAT_GLOBAL_DATA] = { + "global variables", probe_kern_global_data, + }, + [FEAT_BTF] = { + "minimal BTF", probe_kern_btf, + }, + [FEAT_BTF_FUNC] = { + "BTF functions", probe_kern_btf_func, + }, + [FEAT_BTF_GLOBAL_FUNC] = { + "BTF global function", probe_kern_btf_func_global, + }, + [FEAT_BTF_DATASEC] = { + "BTF data section and variable", probe_kern_btf_datasec, + }, + [FEAT_ARRAY_MMAP] = { + "ARRAY map mmap()", probe_kern_array_mmap, + }, + [FEAT_EXP_ATTACH_TYPE] = { + "BPF_PROG_LOAD expected_attach_type attribute", + probe_kern_exp_attach_type, + }, + [FEAT_PROBE_READ_KERN] = { + "bpf_probe_read_kernel() helper", probe_kern_probe_read_kernel, + }, + [FEAT_PROG_BIND_MAP] = { + "BPF_PROG_BIND_MAP support", probe_prog_bind_map, + }, + [FEAT_MODULE_BTF] = { + "module BTF support", probe_module_btf, + }, + [FEAT_BTF_FLOAT] = { + "BTF_KIND_FLOAT support", probe_kern_btf_float, + }, + [FEAT_PERF_LINK] = { + "BPF perf link support", probe_perf_link, + }, + [FEAT_BTF_DECL_TAG] = { + "BTF_KIND_DECL_TAG support", probe_kern_btf_decl_tag, + }, + [FEAT_BTF_TYPE_TAG] = { + "BTF_KIND_TYPE_TAG support", probe_kern_btf_type_tag, + }, + [FEAT_MEMCG_ACCOUNT] = { + "memcg-based memory accounting", probe_memcg_account, + }, + [FEAT_BPF_COOKIE] = { + "BPF cookie support", probe_kern_bpf_cookie, + }, + [FEAT_BTF_ENUM64] = { + "BTF_KIND_ENUM64 support", probe_kern_btf_enum64, + }, + [FEAT_SYSCALL_WRAPPER] = { + "Kernel using syscall wrapper", probe_kern_syscall_wrapper, + }, + [FEAT_UPROBE_MULTI_LINK] = { + "BPF multi-uprobe link support", probe_uprobe_multi_link, + }, +}; + bool kernel_supports(const struct bpf_object *obj, enum kern_feature_id feat_id) { + struct kern_feature_desc *feat = &feature_probes[feat_id]; + int ret; + if (obj && obj->gen_loader) /* To generate loader program assume the latest kernel * to avoid doing extra prog_load, map_create syscalls. */ return true; - if (obj->token_fd) - return feat_supported(obj->feat_cache, feat_id); + if (READ_ONCE(feat->res) == FEAT_UNKNOWN) { + ret = feat->probe(); + if (ret > 0) { + WRITE_ONCE(feat->res, FEAT_SUPPORTED); + } else if (ret == 0) { + WRITE_ONCE(feat->res, FEAT_MISSING); + } else { + pr_warn("Detection of kernel %s support failed: %d\n", feat->desc, ret); + WRITE_ONCE(feat->res, FEAT_MISSING); + } + } - return feat_supported(NULL, feat_id); + return READ_ONCE(feat->res) == FEAT_SUPPORTED; } static bool map_is_reuse_compat(const struct bpf_map *map, int map_fd) @@ -4831,7 +5211,6 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, b create_attr.map_flags = def->map_flags; create_attr.numa_node = map->numa_node; create_attr.map_extra = map->map_extra; - create_attr.token_fd = obj->token_fd; if (bpf_map__is_struct_ops(map)) create_attr.btf_vmlinux_value_type_id = map->btf_vmlinux_value_type_id; @@ -6667,7 +7046,6 @@ static int bpf_object_load_prog(struct bpf_object *obj, struct bpf_program *prog load_attr.attach_btf_id = prog->attach_btf_id; load_attr.kern_version = kern_version; load_attr.prog_ifindex = prog->prog_ifindex; - load_attr.token_fd = obj->token_fd; /* specify func_info/line_info only if kernel supports them */ btf_fd = bpf_object__btf_fd(obj); @@ -7129,10 +7507,10 @@ static int bpf_object_init_progs(struct bpf_object *obj, const struct bpf_object static struct bpf_object *bpf_object_open(const char *path, const void *obj_buf, size_t obj_buf_sz, const struct bpf_object_open_opts *opts) { - const char *obj_name, *kconfig, *btf_tmp_path, *token_path; + const char *obj_name, *kconfig, *btf_tmp_path; struct bpf_object *obj; char tmp_name[64]; - int err, token_fd; + int err; char *log_buf; size_t log_size; __u32 log_level; @@ -7166,28 +7544,6 @@ static struct bpf_object *bpf_object_open(const char *path, const void *obj_buf, if (log_size && !log_buf) return ERR_PTR(-EINVAL); - token_path = OPTS_GET(opts, bpf_token_path, NULL); - token_fd = OPTS_GET(opts, bpf_token_fd, -1); - /* non-empty token path can't be combined with invalid token FD */ - if (token_path && token_path[0] != '\0' && token_fd < 0) - return ERR_PTR(-EINVAL); - /* empty token path can't be combined with valid token FD */ - if (token_path && token_path[0] == '\0' && token_fd > 0) - return ERR_PTR(-EINVAL); - /* if user didn't specify bpf_token_path/bpf_token_fd explicitly, - * check if LIBBPF_BPF_TOKEN_PATH envvar was set and treat it as - * bpf_token_path option - */ - if (token_fd == 0 && !token_path) - token_path = getenv("LIBBPF_BPF_TOKEN_PATH"); - /* empty token_path is equivalent to invalid token_fd */ - if (token_path && token_path[0] == '\0') { - token_path = NULL; - token_fd = -1; - } - if (token_path && strlen(token_path) >= PATH_MAX) - return ERR_PTR(-ENAMETOOLONG); - obj = bpf_object__new(path, obj_buf, obj_buf_sz, obj_name); if (IS_ERR(obj)) return obj; @@ -7196,19 +7552,6 @@ static struct bpf_object *bpf_object_open(const char *path, const void *obj_buf, obj->log_size = log_size; obj->log_level = log_level; - obj->token_fd = token_fd <= 0 ? token_fd : dup_good_fd(token_fd); - if (token_fd > 0 && obj->token_fd < 0) { - err = -errno; - goto out; - } - if (token_path) { - obj->token_path = strdup(token_path); - if (!obj->token_path) { - err = -ENOMEM; - goto out; - } - } - btf_tmp_path = OPTS_GET(opts, btf_custom_path, NULL); if (btf_tmp_path) { if (strlen(btf_tmp_path) >= PATH_MAX) { @@ -7719,8 +8062,7 @@ static int bpf_object_load(struct bpf_object *obj, int extra_log_level, const ch if (obj->gen_loader) bpf_gen__init(obj->gen_loader, extra_log_level, obj->nr_programs, obj->nr_maps); - err = bpf_object_prepare_token(obj); - err = err ? : bpf_object__probe_loading(obj); + err = bpf_object__probe_loading(obj); err = err ? : bpf_object__load_vmlinux_btf(obj, false); err = err ? : bpf_object__resolve_externs(obj, obj->kconfig); err = err ? : bpf_object__sanitize_and_load_btf(obj); @@ -8257,11 +8599,6 @@ void bpf_object__close(struct bpf_object *obj) } zfree(&obj->programs); - zfree(&obj->feat_cache); - zfree(&obj->token_path); - if (obj->token_fd > 0) - close(obj->token_fd); - free(obj); } @@ -10275,7 +10612,7 @@ static const char *arch_specific_syscall_pfx(void) #endif } -int probe_kern_syscall_wrapper(int token_fd) +static int probe_kern_syscall_wrapper(void) { char syscall_name[64]; const char *ksys_pfx; diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index 916904bd2a7a..6cd9c501624f 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -177,45 +177,10 @@ struct bpf_object_open_opts { * logs through its print callback. */ __u32 kernel_log_level; - /* FD of a BPF token instantiated by user through bpf_token_create() - * API. BPF object will keep dup()'ed FD internally, so passed token - * FD can be closed after BPF object/skeleton open step. - * - * Setting bpf_token_fd to negative value disables libbpf's automatic - * attempt to create BPF token from default BPF FS mount point - * (/sys/fs/bpf), in case this default behavior is undesirable. - * - * If bpf_token_path and bpf_token_fd are not specified, libbpf will - * consult LIBBPF_BPF_TOKEN_PATH environment variable. If set, it will - * be taken as a value of bpf_token_path option and will force libbpf - * to either create BPF token from provided custom BPF FS path, or - * will disable implicit BPF token creation, if envvar value is an - * empty string. - * - * bpf_token_path and bpf_token_fd are mutually exclusive and only one - * of those options should be set. Either of them overrides - * LIBBPF_BPF_TOKEN_PATH envvar. - */ - int bpf_token_fd; - /* Path to BPF FS mount point to derive BPF token from. - * - * Created BPF token will be used for all bpf() syscall operations - * that accept BPF token (e.g., map creation, BTF and program loads, - * etc) automatically within instantiated BPF object. - * - * Setting bpf_token_path option to empty string disables libbpf's - * automatic attempt to create BPF token from default BPF FS mount - * point (/sys/fs/bpf), in case this default behavior is undesirable. - * - * bpf_token_path and bpf_token_fd are mutually exclusive and only one - * of those options should be set. Either of them overrides - * LIBBPF_BPF_TOKEN_PATH envvar. - */ - const char *bpf_token_path; size_t :0; }; -#define bpf_object_open_opts__last_field bpf_token_path +#define bpf_object_open_opts__last_field kernel_log_level /** * @brief **bpf_object__open()** creates a bpf_object by opening diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map index df7657b65c47..91c5aef7dae7 100644 --- a/tools/lib/bpf/libbpf.map +++ b/tools/lib/bpf/libbpf.map @@ -401,7 +401,6 @@ LIBBPF_1.3.0 { bpf_program__attach_netkit; bpf_program__attach_tcx; bpf_program__attach_uprobe_multi; - bpf_token_create; ring__avail_data_size; ring__consume; ring__consumer_pos; diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h index 4cda32298c49..b5d334754e5d 100644 --- a/tools/lib/bpf/libbpf_internal.h +++ b/tools/lib/bpf/libbpf_internal.h @@ -360,32 +360,15 @@ enum kern_feature_id { __FEAT_CNT, }; -enum kern_feature_result { - FEAT_UNKNOWN = 0, - FEAT_SUPPORTED = 1, - FEAT_MISSING = 2, -}; - -struct kern_feature_cache { - enum kern_feature_result res[__FEAT_CNT]; - int token_fd; -}; - -bool feat_supported(struct kern_feature_cache *cache, enum kern_feature_id feat_id); +int probe_memcg_account(void); bool kernel_supports(const struct bpf_object *obj, enum kern_feature_id feat_id); - -int probe_kern_syscall_wrapper(int token_fd); -int probe_memcg_account(int token_fd); int bump_rlimit_memlock(void); int parse_cpu_mask_str(const char *s, bool **mask, int *mask_sz); int parse_cpu_mask_file(const char *fcpu, bool **mask, int *mask_sz); int libbpf__load_raw_btf(const char *raw_types, size_t types_len, - const char *str_sec, size_t str_len, - int token_fd); -int btf_load_into_kernel(struct btf *btf, - char *log_buf, size_t log_sz, __u32 log_level, - int token_fd); + const char *str_sec, size_t str_len); +int btf_load_into_kernel(struct btf *btf, char *log_buf, size_t log_sz, __u32 log_level); struct btf *btf_get_from_fd(int btf_fd, struct btf *base_btf); void btf_get_kernel_prefix_kind(enum bpf_attach_type attach_type, @@ -549,17 +532,6 @@ static inline bool is_ldimm64_insn(struct bpf_insn *insn) return insn->code == (BPF_LD | BPF_IMM | BPF_DW); } -/* Unconditionally dup FD, ensuring it doesn't use [0, 2] range. - * Original FD is not closed or altered in any other way. - * Preserves original FD value, if it's invalid (negative). - */ -static inline int dup_good_fd(int fd) -{ - if (fd < 0) - return fd; - return fcntl(fd, F_DUPFD_CLOEXEC, 3); -} - /* if fd is stdin, stdout, or stderr, dup to a fd greater than 2 * Takes ownership of the fd passed in, and closes it if calling * fcntl(fd, F_DUPFD_CLOEXEC, 3). @@ -571,7 +543,7 @@ static inline int ensure_good_fd(int fd) if (fd < 0) return fd; if (fd < 3) { - fd = dup_good_fd(fd); + fd = fcntl(fd, F_DUPFD_CLOEXEC, 3); saved_errno = errno; close(old_fd); errno = saved_errno; diff --git a/tools/lib/bpf/libbpf_probes.c b/tools/lib/bpf/libbpf_probes.c index 8e7437006639..9c4db90b92b6 100644 --- a/tools/lib/bpf/libbpf_probes.c +++ b/tools/lib/bpf/libbpf_probes.c @@ -219,8 +219,7 @@ int libbpf_probe_bpf_prog_type(enum bpf_prog_type prog_type, const void *opts) } int libbpf__load_raw_btf(const char *raw_types, size_t types_len, - const char *str_sec, size_t str_len, - int token_fd) + const char *str_sec, size_t str_len) { struct btf_header hdr = { .magic = BTF_MAGIC, @@ -230,7 +229,6 @@ int libbpf__load_raw_btf(const char *raw_types, size_t types_len, .str_off = types_len, .str_len = str_len, }; - LIBBPF_OPTS(bpf_btf_load_opts, opts, .token_fd = token_fd); int btf_fd, btf_len; __u8 *raw_btf; @@ -243,7 +241,7 @@ int libbpf__load_raw_btf(const char *raw_types, size_t types_len, memcpy(raw_btf + hdr.hdr_len, raw_types, hdr.type_len); memcpy(raw_btf + hdr.hdr_len + hdr.type_len, str_sec, hdr.str_len); - btf_fd = bpf_btf_load(raw_btf, btf_len, &opts); + btf_fd = bpf_btf_load(raw_btf, btf_len, NULL); free(raw_btf); return btf_fd; @@ -273,7 +271,7 @@ static int load_local_storage_btf(void) }; return libbpf__load_raw_btf((char *)types, sizeof(types), - strs, sizeof(strs), 0); + strs, sizeof(strs)); } static int probe_map_create(enum bpf_map_type map_type) diff --git a/tools/lib/bpf/str_error.h b/tools/lib/bpf/str_error.h index 626d7ffb03d6..a139334d57b6 100644 --- a/tools/lib/bpf/str_error.h +++ b/tools/lib/bpf/str_error.h @@ -2,8 +2,5 @@ #ifndef __LIBBPF_STR_ERROR_H #define __LIBBPF_STR_ERROR_H -#define STRERR_BUFSIZE 128 - char *libbpf_strerror_r(int err, char *dst, int len); - #endif /* __LIBBPF_STR_ERROR_H */ diff --git a/tools/testing/selftests/bpf/prog_tests/libbpf_probes.c b/tools/testing/selftests/bpf/prog_tests/libbpf_probes.c index 4ed46ed58a7b..9f766ddd946a 100644 --- a/tools/testing/selftests/bpf/prog_tests/libbpf_probes.c +++ b/tools/testing/selftests/bpf/prog_tests/libbpf_probes.c @@ -30,8 +30,6 @@ void test_libbpf_probe_prog_types(void) if (prog_type == BPF_PROG_TYPE_UNSPEC) continue; - if (strcmp(prog_type_name, "__MAX_BPF_PROG_TYPE") == 0) - continue; if (!test__start_subtest(prog_type_name)) continue; @@ -70,8 +68,6 @@ void test_libbpf_probe_map_types(void) if (map_type == BPF_MAP_TYPE_UNSPEC) continue; - if (strcmp(map_type_name, "__MAX_BPF_MAP_TYPE") == 0) - continue; if (!test__start_subtest(map_type_name)) continue; diff --git a/tools/testing/selftests/bpf/prog_tests/libbpf_str.c b/tools/testing/selftests/bpf/prog_tests/libbpf_str.c index 62ea855ec4d0..eb34d612d6f8 100644 --- a/tools/testing/selftests/bpf/prog_tests/libbpf_str.c +++ b/tools/testing/selftests/bpf/prog_tests/libbpf_str.c @@ -132,9 +132,6 @@ static void test_libbpf_bpf_map_type_str(void) const char *map_type_str; char buf[256]; - if (map_type == __MAX_BPF_MAP_TYPE) - continue; - map_type_name = btf__str_by_offset(btf, e->name_off); map_type_str = libbpf_bpf_map_type_str(map_type); ASSERT_OK_PTR(map_type_str, map_type_name); @@ -189,9 +186,6 @@ static void test_libbpf_bpf_prog_type_str(void) const char *prog_type_str; char buf[256]; - if (prog_type == __MAX_BPF_PROG_TYPE) - continue; - prog_type_name = btf__str_by_offset(btf, e->name_off); prog_type_str = libbpf_bpf_prog_type_str(prog_type); ASSERT_OK_PTR(prog_type_str, prog_type_name); diff --git a/tools/testing/selftests/bpf/prog_tests/token.c b/tools/testing/selftests/bpf/prog_tests/token.c deleted file mode 100644 index b5dce630e0e1..000000000000 --- a/tools/testing/selftests/bpf/prog_tests/token.c +++ /dev/null @@ -1,1031 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */ -#define _GNU_SOURCE -#include -#include -#include "cap_helpers.h" -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "priv_map.skel.h" -#include "priv_prog.skel.h" -#include "dummy_st_ops_success.skel.h" - -static inline int sys_mount(const char *dev_name, const char *dir_name, - const char *type, unsigned long flags, - const void *data) -{ - return syscall(__NR_mount, dev_name, dir_name, type, flags, data); -} - -static inline int sys_fsopen(const char *fsname, unsigned flags) -{ - return syscall(__NR_fsopen, fsname, flags); -} - -static inline int sys_fspick(int dfd, const char *path, unsigned flags) -{ - return syscall(__NR_fspick, dfd, path, flags); -} - -static inline int sys_fsconfig(int fs_fd, unsigned cmd, const char *key, const void *val, int aux) -{ - return syscall(__NR_fsconfig, fs_fd, cmd, key, val, aux); -} - -static inline int sys_fsmount(int fs_fd, unsigned flags, unsigned ms_flags) -{ - return syscall(__NR_fsmount, fs_fd, flags, ms_flags); -} - -static inline int sys_move_mount(int from_dfd, const char *from_path, - int to_dfd, const char *to_path, - unsigned flags) -{ - return syscall(__NR_move_mount, from_dfd, from_path, to_dfd, to_path, flags); -} - -static int drop_priv_caps(__u64 *old_caps) -{ - return cap_disable_effective((1ULL << CAP_BPF) | - (1ULL << CAP_PERFMON) | - (1ULL << CAP_NET_ADMIN) | - (1ULL << CAP_SYS_ADMIN), old_caps); -} - -static int restore_priv_caps(__u64 old_caps) -{ - return cap_enable_effective(old_caps, NULL); -} - -static int set_delegate_mask(int fs_fd, const char *key, __u64 mask, const char *mask_str) -{ - char buf[32]; - int err; - - if (!mask_str) { - if (mask == ~0ULL) { - mask_str = "any"; - } else { - snprintf(buf, sizeof(buf), "0x%llx", (unsigned long long)mask); - mask_str = buf; - } - } - - err = sys_fsconfig(fs_fd, FSCONFIG_SET_STRING, key, - mask_str, 0); - if (err < 0) - err = -errno; - return err; -} - -#define zclose(fd) do { if (fd >= 0) close(fd); fd = -1; } while (0) - -struct bpffs_opts { - __u64 cmds; - __u64 maps; - __u64 progs; - __u64 attachs; - const char *cmds_str; - const char *maps_str; - const char *progs_str; - const char *attachs_str; -}; - -static int create_bpffs_fd(void) -{ - int fs_fd; - - /* create VFS context */ - fs_fd = sys_fsopen("bpf", 0); - ASSERT_GE(fs_fd, 0, "fs_fd"); - - return fs_fd; -} - -static int materialize_bpffs_fd(int fs_fd, struct bpffs_opts *opts) -{ - int mnt_fd, err; - - /* set up token delegation mount options */ - err = set_delegate_mask(fs_fd, "delegate_cmds", opts->cmds, opts->cmds_str); - if (!ASSERT_OK(err, "fs_cfg_cmds")) - return err; - err = set_delegate_mask(fs_fd, "delegate_maps", opts->maps, opts->maps_str); - if (!ASSERT_OK(err, "fs_cfg_maps")) - return err; - err = set_delegate_mask(fs_fd, "delegate_progs", opts->progs, opts->progs_str); - if (!ASSERT_OK(err, "fs_cfg_progs")) - return err; - err = set_delegate_mask(fs_fd, "delegate_attachs", opts->attachs, opts->attachs_str); - if (!ASSERT_OK(err, "fs_cfg_attachs")) - return err; - - /* instantiate FS object */ - err = sys_fsconfig(fs_fd, FSCONFIG_CMD_CREATE, NULL, NULL, 0); - if (err < 0) - return -errno; - - /* create O_PATH fd for detached mount */ - mnt_fd = sys_fsmount(fs_fd, 0, 0); - if (err < 0) - return -errno; - - return mnt_fd; -} - -/* send FD over Unix domain (AF_UNIX) socket */ -static int sendfd(int sockfd, int fd) -{ - struct msghdr msg = {}; - struct cmsghdr *cmsg; - int fds[1] = { fd }, err; - char iobuf[1]; - struct iovec io = { - .iov_base = iobuf, - .iov_len = sizeof(iobuf), - }; - union { - char buf[CMSG_SPACE(sizeof(fds))]; - struct cmsghdr align; - } u; - - msg.msg_iov = &io; - msg.msg_iovlen = 1; - msg.msg_control = u.buf; - msg.msg_controllen = sizeof(u.buf); - cmsg = CMSG_FIRSTHDR(&msg); - cmsg->cmsg_level = SOL_SOCKET; - cmsg->cmsg_type = SCM_RIGHTS; - cmsg->cmsg_len = CMSG_LEN(sizeof(fds)); - memcpy(CMSG_DATA(cmsg), fds, sizeof(fds)); - - err = sendmsg(sockfd, &msg, 0); - if (err < 0) - err = -errno; - if (!ASSERT_EQ(err, 1, "sendmsg")) - return -EINVAL; - - return 0; -} - -/* receive FD over Unix domain (AF_UNIX) socket */ -static int recvfd(int sockfd, int *fd) -{ - struct msghdr msg = {}; - struct cmsghdr *cmsg; - int fds[1], err; - char iobuf[1]; - struct iovec io = { - .iov_base = iobuf, - .iov_len = sizeof(iobuf), - }; - union { - char buf[CMSG_SPACE(sizeof(fds))]; - struct cmsghdr align; - } u; - - msg.msg_iov = &io; - msg.msg_iovlen = 1; - msg.msg_control = u.buf; - msg.msg_controllen = sizeof(u.buf); - - err = recvmsg(sockfd, &msg, 0); - if (err < 0) - err = -errno; - if (!ASSERT_EQ(err, 1, "recvmsg")) - return -EINVAL; - - cmsg = CMSG_FIRSTHDR(&msg); - if (!ASSERT_OK_PTR(cmsg, "cmsg_null") || - !ASSERT_EQ(cmsg->cmsg_len, CMSG_LEN(sizeof(fds)), "cmsg_len") || - !ASSERT_EQ(cmsg->cmsg_level, SOL_SOCKET, "cmsg_level") || - !ASSERT_EQ(cmsg->cmsg_type, SCM_RIGHTS, "cmsg_type")) - return -EINVAL; - - memcpy(fds, CMSG_DATA(cmsg), sizeof(fds)); - *fd = fds[0]; - - return 0; -} - -static ssize_t write_nointr(int fd, const void *buf, size_t count) -{ - ssize_t ret; - - do { - ret = write(fd, buf, count); - } while (ret < 0 && errno == EINTR); - - return ret; -} - -static int write_file(const char *path, const void *buf, size_t count) -{ - int fd; - ssize_t ret; - - fd = open(path, O_WRONLY | O_CLOEXEC | O_NOCTTY | O_NOFOLLOW); - if (fd < 0) - return -1; - - ret = write_nointr(fd, buf, count); - close(fd); - if (ret < 0 || (size_t)ret != count) - return -1; - - return 0; -} - -static int create_and_enter_userns(void) -{ - uid_t uid; - gid_t gid; - char map[100]; - - uid = getuid(); - gid = getgid(); - - if (unshare(CLONE_NEWUSER)) - return -1; - - if (write_file("/proc/self/setgroups", "deny", sizeof("deny") - 1) && - errno != ENOENT) - return -1; - - snprintf(map, sizeof(map), "0 %d 1", uid); - if (write_file("/proc/self/uid_map", map, strlen(map))) - return -1; - - - snprintf(map, sizeof(map), "0 %d 1", gid); - if (write_file("/proc/self/gid_map", map, strlen(map))) - return -1; - - if (setgid(0)) - return -1; - - if (setuid(0)) - return -1; - - return 0; -} - -typedef int (*child_callback_fn)(int); - -static void child(int sock_fd, struct bpffs_opts *opts, child_callback_fn callback) -{ - LIBBPF_OPTS(bpf_map_create_opts, map_opts); - int mnt_fd = -1, fs_fd = -1, err = 0, bpffs_fd = -1; - - /* setup userns with root mappings */ - err = create_and_enter_userns(); - if (!ASSERT_OK(err, "create_and_enter_userns")) - goto cleanup; - - /* setup mountns to allow creating BPF FS (fsopen("bpf")) from unpriv process */ - err = unshare(CLONE_NEWNS); - if (!ASSERT_OK(err, "create_mountns")) - goto cleanup; - - err = sys_mount(NULL, "/", NULL, MS_REC | MS_PRIVATE, 0); - if (!ASSERT_OK(err, "remount_root")) - goto cleanup; - - fs_fd = create_bpffs_fd(); - if (!ASSERT_GE(fs_fd, 0, "create_bpffs_fd")) { - err = -EINVAL; - goto cleanup; - } - - /* ensure unprivileged child cannot set delegation options */ - err = set_delegate_mask(fs_fd, "delegate_cmds", 0x1, NULL); - ASSERT_EQ(err, -EPERM, "delegate_cmd_eperm"); - err = set_delegate_mask(fs_fd, "delegate_maps", 0x1, NULL); - ASSERT_EQ(err, -EPERM, "delegate_maps_eperm"); - err = set_delegate_mask(fs_fd, "delegate_progs", 0x1, NULL); - ASSERT_EQ(err, -EPERM, "delegate_progs_eperm"); - err = set_delegate_mask(fs_fd, "delegate_attachs", 0x1, NULL); - ASSERT_EQ(err, -EPERM, "delegate_attachs_eperm"); - - /* pass BPF FS context object to parent */ - err = sendfd(sock_fd, fs_fd); - if (!ASSERT_OK(err, "send_fs_fd")) - goto cleanup; - zclose(fs_fd); - - /* avoid mucking around with mount namespaces and mounting at - * well-known path, just get detach-mounted BPF FS fd back from parent - */ - err = recvfd(sock_fd, &mnt_fd); - if (!ASSERT_OK(err, "recv_mnt_fd")) - goto cleanup; - - /* try to fspick() BPF FS and try to add some delegation options */ - fs_fd = sys_fspick(mnt_fd, "", FSPICK_EMPTY_PATH); - if (!ASSERT_GE(fs_fd, 0, "bpffs_fspick")) { - err = -EINVAL; - goto cleanup; - } - - /* ensure unprivileged child cannot reconfigure to set delegation options */ - err = set_delegate_mask(fs_fd, "delegate_cmds", 0, "any"); - if (!ASSERT_EQ(err, -EPERM, "delegate_cmd_eperm_reconfig")) { - err = -EINVAL; - goto cleanup; - } - err = set_delegate_mask(fs_fd, "delegate_maps", 0, "any"); - if (!ASSERT_EQ(err, -EPERM, "delegate_maps_eperm_reconfig")) { - err = -EINVAL; - goto cleanup; - } - err = set_delegate_mask(fs_fd, "delegate_progs", 0, "any"); - if (!ASSERT_EQ(err, -EPERM, "delegate_progs_eperm_reconfig")) { - err = -EINVAL; - goto cleanup; - } - err = set_delegate_mask(fs_fd, "delegate_attachs", 0, "any"); - if (!ASSERT_EQ(err, -EPERM, "delegate_attachs_eperm_reconfig")) { - err = -EINVAL; - goto cleanup; - } - zclose(fs_fd); - - bpffs_fd = openat(mnt_fd, ".", 0, O_RDWR); - if (!ASSERT_GE(bpffs_fd, 0, "bpffs_open")) { - err = -EINVAL; - goto cleanup; - } - - /* do custom test logic with customly set up BPF FS instance */ - err = callback(bpffs_fd); - if (!ASSERT_OK(err, "test_callback")) - goto cleanup; - - err = 0; -cleanup: - zclose(sock_fd); - zclose(mnt_fd); - zclose(fs_fd); - zclose(bpffs_fd); - - exit(-err); -} - -static int wait_for_pid(pid_t pid) -{ - int status, ret; - -again: - ret = waitpid(pid, &status, 0); - if (ret == -1) { - if (errno == EINTR) - goto again; - - return -1; - } - - if (!WIFEXITED(status)) - return -1; - - return WEXITSTATUS(status); -} - -static void parent(int child_pid, struct bpffs_opts *bpffs_opts, int sock_fd) -{ - int fs_fd = -1, mnt_fd = -1, err; - - err = recvfd(sock_fd, &fs_fd); - if (!ASSERT_OK(err, "recv_bpffs_fd")) - goto cleanup; - - mnt_fd = materialize_bpffs_fd(fs_fd, bpffs_opts); - if (!ASSERT_GE(mnt_fd, 0, "materialize_bpffs_fd")) { - err = -EINVAL; - goto cleanup; - } - zclose(fs_fd); - - /* pass BPF FS context object to parent */ - err = sendfd(sock_fd, mnt_fd); - if (!ASSERT_OK(err, "send_mnt_fd")) - goto cleanup; - zclose(mnt_fd); - - err = wait_for_pid(child_pid); - ASSERT_OK(err, "waitpid_child"); - -cleanup: - zclose(sock_fd); - zclose(fs_fd); - zclose(mnt_fd); - - if (child_pid > 0) - (void)kill(child_pid, SIGKILL); -} - -static void subtest_userns(struct bpffs_opts *bpffs_opts, child_callback_fn cb) -{ - int sock_fds[2] = { -1, -1 }; - int child_pid = 0, err; - - err = socketpair(AF_UNIX, SOCK_STREAM, 0, sock_fds); - if (!ASSERT_OK(err, "socketpair")) - goto cleanup; - - child_pid = fork(); - if (!ASSERT_GE(child_pid, 0, "fork")) - goto cleanup; - - if (child_pid == 0) { - zclose(sock_fds[0]); - return child(sock_fds[1], bpffs_opts, cb); - - } else { - zclose(sock_fds[1]); - return parent(child_pid, bpffs_opts, sock_fds[0]); - } - -cleanup: - zclose(sock_fds[0]); - zclose(sock_fds[1]); - if (child_pid > 0) - (void)kill(child_pid, SIGKILL); -} - -static int userns_map_create(int mnt_fd) -{ - LIBBPF_OPTS(bpf_map_create_opts, map_opts); - int err, token_fd = -1, map_fd = -1; - __u64 old_caps = 0; - - /* create BPF token from BPF FS mount */ - token_fd = bpf_token_create(mnt_fd, NULL); - if (!ASSERT_GT(token_fd, 0, "token_create")) { - err = -EINVAL; - goto cleanup; - } - - /* while inside non-init userns, we need both a BPF token *and* - * CAP_BPF inside current userns to create privileged map; let's test - * that neither BPF token alone nor namespaced CAP_BPF is sufficient - */ - err = drop_priv_caps(&old_caps); - if (!ASSERT_OK(err, "drop_caps")) - goto cleanup; - - /* no token, no CAP_BPF -> fail */ - map_opts.token_fd = 0; - map_fd = bpf_map_create(BPF_MAP_TYPE_STACK, "wo_token_wo_bpf", 0, 8, 1, &map_opts); - if (!ASSERT_LT(map_fd, 0, "stack_map_wo_token_wo_cap_bpf_should_fail")) { - err = -EINVAL; - goto cleanup; - } - - /* token without CAP_BPF -> fail */ - map_opts.token_fd = token_fd; - map_fd = bpf_map_create(BPF_MAP_TYPE_STACK, "w_token_wo_bpf", 0, 8, 1, &map_opts); - if (!ASSERT_LT(map_fd, 0, "stack_map_w_token_wo_cap_bpf_should_fail")) { - err = -EINVAL; - goto cleanup; - } - - /* get back effective local CAP_BPF (and CAP_SYS_ADMIN) */ - err = restore_priv_caps(old_caps); - if (!ASSERT_OK(err, "restore_caps")) - goto cleanup; - - /* CAP_BPF without token -> fail */ - map_opts.token_fd = 0; - map_fd = bpf_map_create(BPF_MAP_TYPE_STACK, "wo_token_w_bpf", 0, 8, 1, &map_opts); - if (!ASSERT_LT(map_fd, 0, "stack_map_wo_token_w_cap_bpf_should_fail")) { - err = -EINVAL; - goto cleanup; - } - - /* finally, namespaced CAP_BPF + token -> success */ - map_opts.token_fd = token_fd; - map_fd = bpf_map_create(BPF_MAP_TYPE_STACK, "w_token_w_bpf", 0, 8, 1, &map_opts); - if (!ASSERT_GT(map_fd, 0, "stack_map_w_token_w_cap_bpf")) { - err = -EINVAL; - goto cleanup; - } - -cleanup: - zclose(token_fd); - zclose(map_fd); - return err; -} - -static int userns_btf_load(int mnt_fd) -{ - LIBBPF_OPTS(bpf_btf_load_opts, btf_opts); - int err, token_fd = -1, btf_fd = -1; - const void *raw_btf_data; - struct btf *btf = NULL; - __u32 raw_btf_size; - __u64 old_caps = 0; - - /* create BPF token from BPF FS mount */ - token_fd = bpf_token_create(mnt_fd, NULL); - if (!ASSERT_GT(token_fd, 0, "token_create")) { - err = -EINVAL; - goto cleanup; - } - - /* while inside non-init userns, we need both a BPF token *and* - * CAP_BPF inside current userns to create privileged map; let's test - * that neither BPF token alone nor namespaced CAP_BPF is sufficient - */ - err = drop_priv_caps(&old_caps); - if (!ASSERT_OK(err, "drop_caps")) - goto cleanup; - - /* setup a trivial BTF data to load to the kernel */ - btf = btf__new_empty(); - if (!ASSERT_OK_PTR(btf, "empty_btf")) - goto cleanup; - - ASSERT_GT(btf__add_int(btf, "int", 4, 0), 0, "int_type"); - - raw_btf_data = btf__raw_data(btf, &raw_btf_size); - if (!ASSERT_OK_PTR(raw_btf_data, "raw_btf_data")) - goto cleanup; - - /* no token + no CAP_BPF -> failure */ - btf_opts.token_fd = 0; - btf_fd = bpf_btf_load(raw_btf_data, raw_btf_size, &btf_opts); - if (!ASSERT_LT(btf_fd, 0, "no_token_no_cap_should_fail")) - goto cleanup; - - /* token + no CAP_BPF -> failure */ - btf_opts.token_fd = token_fd; - btf_fd = bpf_btf_load(raw_btf_data, raw_btf_size, &btf_opts); - if (!ASSERT_LT(btf_fd, 0, "token_no_cap_should_fail")) - goto cleanup; - - /* get back effective local CAP_BPF (and CAP_SYS_ADMIN) */ - err = restore_priv_caps(old_caps); - if (!ASSERT_OK(err, "restore_caps")) - goto cleanup; - - /* token + CAP_BPF -> success */ - btf_opts.token_fd = token_fd; - btf_fd = bpf_btf_load(raw_btf_data, raw_btf_size, &btf_opts); - if (!ASSERT_GT(btf_fd, 0, "token_and_cap_success")) - goto cleanup; - - err = 0; -cleanup: - btf__free(btf); - zclose(btf_fd); - zclose(token_fd); - return err; -} - -static int userns_prog_load(int mnt_fd) -{ - LIBBPF_OPTS(bpf_prog_load_opts, prog_opts); - int err, token_fd = -1, prog_fd = -1; - struct bpf_insn insns[] = { - /* bpf_jiffies64() requires CAP_BPF */ - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_jiffies64), - /* bpf_get_current_task() requires CAP_PERFMON */ - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_current_task), - /* r0 = 0; exit; */ - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }; - size_t insn_cnt = ARRAY_SIZE(insns); - __u64 old_caps = 0; - - /* create BPF token from BPF FS mount */ - token_fd = bpf_token_create(mnt_fd, NULL); - if (!ASSERT_GT(token_fd, 0, "token_create")) { - err = -EINVAL; - goto cleanup; - } - - /* validate we can successfully load BPF program with token; this - * being XDP program (CAP_NET_ADMIN) using bpf_jiffies64() (CAP_BPF) - * and bpf_get_current_task() (CAP_PERFMON) helpers validates we have - * BPF token wired properly in a bunch of places in the kernel - */ - prog_opts.token_fd = token_fd; - prog_opts.expected_attach_type = BPF_XDP; - prog_fd = bpf_prog_load(BPF_PROG_TYPE_XDP, "token_prog", "GPL", - insns, insn_cnt, &prog_opts); - if (!ASSERT_GT(prog_fd, 0, "prog_fd")) { - err = -EPERM; - goto cleanup; - } - - /* no token + caps -> failure */ - prog_opts.token_fd = 0; - prog_fd = bpf_prog_load(BPF_PROG_TYPE_XDP, "token_prog", "GPL", - insns, insn_cnt, &prog_opts); - if (!ASSERT_EQ(prog_fd, -EPERM, "prog_fd_eperm")) { - err = -EPERM; - goto cleanup; - } - - err = drop_priv_caps(&old_caps); - if (!ASSERT_OK(err, "drop_caps")) - goto cleanup; - - /* no caps + token -> failure */ - prog_opts.token_fd = token_fd; - prog_fd = bpf_prog_load(BPF_PROG_TYPE_XDP, "token_prog", "GPL", - insns, insn_cnt, &prog_opts); - if (!ASSERT_EQ(prog_fd, -EPERM, "prog_fd_eperm")) { - err = -EPERM; - goto cleanup; - } - - /* no caps + no token -> definitely a failure */ - prog_opts.token_fd = 0; - prog_fd = bpf_prog_load(BPF_PROG_TYPE_XDP, "token_prog", "GPL", - insns, insn_cnt, &prog_opts); - if (!ASSERT_EQ(prog_fd, -EPERM, "prog_fd_eperm")) { - err = -EPERM; - goto cleanup; - } - - err = 0; -cleanup: - zclose(prog_fd); - zclose(token_fd); - return err; -} - -static int userns_obj_priv_map(int mnt_fd) -{ - LIBBPF_OPTS(bpf_object_open_opts, opts); - char buf[256]; - struct priv_map *skel; - int err, token_fd; - - skel = priv_map__open_and_load(); - if (!ASSERT_ERR_PTR(skel, "obj_tokenless_load")) { - priv_map__destroy(skel); - return -EINVAL; - } - - /* use bpf_token_path to provide BPF FS path */ - snprintf(buf, sizeof(buf), "/proc/self/fd/%d", mnt_fd); - opts.bpf_token_path = buf; - skel = priv_map__open_opts(&opts); - if (!ASSERT_OK_PTR(skel, "obj_token_path_open")) - return -EINVAL; - - err = priv_map__load(skel); - priv_map__destroy(skel); - if (!ASSERT_OK(err, "obj_token_path_load")) - return -EINVAL; - - /* create token and pass it through bpf_token_fd */ - token_fd = bpf_token_create(mnt_fd, NULL); - if (!ASSERT_GT(token_fd, 0, "create_token")) - return -EINVAL; - - opts.bpf_token_path = NULL; - opts.bpf_token_fd = token_fd; - skel = priv_map__open_opts(&opts); - if (!ASSERT_OK_PTR(skel, "obj_token_fd_open")) - return -EINVAL; - - /* we can close our token FD, bpf_object owns dup()'ed FD now */ - close(token_fd); - - err = priv_map__load(skel); - priv_map__destroy(skel); - if (!ASSERT_OK(err, "obj_token_fd_load")) - return -EINVAL; - - return 0; -} - -static int userns_obj_priv_prog(int mnt_fd) -{ - LIBBPF_OPTS(bpf_object_open_opts, opts); - char buf[256]; - struct priv_prog *skel; - int err; - - skel = priv_prog__open_and_load(); - if (!ASSERT_ERR_PTR(skel, "obj_tokenless_load")) { - priv_prog__destroy(skel); - return -EINVAL; - } - - /* use bpf_token_path to provide BPF FS path */ - snprintf(buf, sizeof(buf), "/proc/self/fd/%d", mnt_fd); - opts.bpf_token_path = buf; - skel = priv_prog__open_opts(&opts); - if (!ASSERT_OK_PTR(skel, "obj_token_path_open")) - return -EINVAL; - - err = priv_prog__load(skel); - priv_prog__destroy(skel); - if (!ASSERT_OK(err, "obj_token_path_load")) - return -EINVAL; - - return 0; -} - -/* this test is called with BPF FS that doesn't delegate BPF_BTF_LOAD command, - * which should cause struct_ops application to fail, as BTF won't be uploaded - * into the kernel, even if STRUCT_OPS programs themselves are allowed - */ -static int validate_struct_ops_load(int mnt_fd, bool expect_success) -{ - LIBBPF_OPTS(bpf_object_open_opts, opts); - char buf[256]; - struct dummy_st_ops_success *skel; - int err; - - snprintf(buf, sizeof(buf), "/proc/self/fd/%d", mnt_fd); - opts.bpf_token_path = buf; - skel = dummy_st_ops_success__open_opts(&opts); - if (!ASSERT_OK_PTR(skel, "obj_token_path_open")) - return -EINVAL; - - err = dummy_st_ops_success__load(skel); - dummy_st_ops_success__destroy(skel); - if (expect_success) { - if (!ASSERT_OK(err, "obj_token_path_load")) - return -EINVAL; - } else /* expect failure */ { - if (!ASSERT_ERR(err, "obj_token_path_load")) - return -EINVAL; - } - - return 0; -} - -static int userns_obj_priv_btf_fail(int mnt_fd) -{ - return validate_struct_ops_load(mnt_fd, false /* should fail */); -} - -static int userns_obj_priv_btf_success(int mnt_fd) -{ - return validate_struct_ops_load(mnt_fd, true /* should succeed */); -} - -#define TOKEN_ENVVAR "LIBBPF_BPF_TOKEN_PATH" -#define TOKEN_BPFFS_CUSTOM "/bpf-token-fs" - -static int userns_obj_priv_implicit_token(int mnt_fd) -{ - LIBBPF_OPTS(bpf_object_open_opts, opts); - struct dummy_st_ops_success *skel; - int err; - - /* before we mount BPF FS with token delegation, struct_ops skeleton - * should fail to load - */ - skel = dummy_st_ops_success__open_and_load(); - if (!ASSERT_ERR_PTR(skel, "obj_tokenless_load")) { - dummy_st_ops_success__destroy(skel); - return -EINVAL; - } - - /* mount custom BPF FS over /sys/fs/bpf so that libbpf can create BPF - * token automatically and implicitly - */ - err = sys_move_mount(mnt_fd, "", AT_FDCWD, "/sys/fs/bpf", MOVE_MOUNT_F_EMPTY_PATH); - if (!ASSERT_OK(err, "move_mount_bpffs")) - return -EINVAL; - - /* disable implicit BPF token creation by setting - * LIBBPF_BPF_TOKEN_PATH envvar to empty value, load should fail - */ - err = setenv(TOKEN_ENVVAR, "", 1 /*overwrite*/); - if (!ASSERT_OK(err, "setenv_token_path")) - return -EINVAL; - skel = dummy_st_ops_success__open_and_load(); - if (!ASSERT_ERR_PTR(skel, "obj_token_envvar_disabled_load")) { - unsetenv(TOKEN_ENVVAR); - dummy_st_ops_success__destroy(skel); - return -EINVAL; - } - unsetenv(TOKEN_ENVVAR); - - /* now the same struct_ops skeleton should succeed thanks to libppf - * creating BPF token from /sys/fs/bpf mount point - */ - skel = dummy_st_ops_success__open_and_load(); - if (!ASSERT_OK_PTR(skel, "obj_implicit_token_load")) - return -EINVAL; - - dummy_st_ops_success__destroy(skel); - - /* now disable implicit token through empty bpf_token_path, should fail */ - opts.bpf_token_path = ""; - skel = dummy_st_ops_success__open_opts(&opts); - if (!ASSERT_OK_PTR(skel, "obj_empty_token_path_open")) - return -EINVAL; - - err = dummy_st_ops_success__load(skel); - dummy_st_ops_success__destroy(skel); - if (!ASSERT_ERR(err, "obj_empty_token_path_load")) - return -EINVAL; - - /* now disable implicit token through negative bpf_token_fd, should fail */ - opts.bpf_token_path = NULL; - opts.bpf_token_fd = -1; - skel = dummy_st_ops_success__open_opts(&opts); - if (!ASSERT_OK_PTR(skel, "obj_neg_token_fd_open")) - return -EINVAL; - - err = dummy_st_ops_success__load(skel); - dummy_st_ops_success__destroy(skel); - if (!ASSERT_ERR(err, "obj_neg_token_fd_load")) - return -EINVAL; - - return 0; -} - -static int userns_obj_priv_implicit_token_envvar(int mnt_fd) -{ - LIBBPF_OPTS(bpf_object_open_opts, opts); - struct dummy_st_ops_success *skel; - int err; - - /* before we mount BPF FS with token delegation, struct_ops skeleton - * should fail to load - */ - skel = dummy_st_ops_success__open_and_load(); - if (!ASSERT_ERR_PTR(skel, "obj_tokenless_load")) { - dummy_st_ops_success__destroy(skel); - return -EINVAL; - } - - /* mount custom BPF FS over custom location, so libbpf can't create - * BPF token implicitly, unless pointed to it through - * LIBBPF_BPF_TOKEN_PATH envvar - */ - rmdir(TOKEN_BPFFS_CUSTOM); - if (!ASSERT_OK(mkdir(TOKEN_BPFFS_CUSTOM, 0777), "mkdir_bpffs_custom")) - goto err_out; - err = sys_move_mount(mnt_fd, "", AT_FDCWD, TOKEN_BPFFS_CUSTOM, MOVE_MOUNT_F_EMPTY_PATH); - if (!ASSERT_OK(err, "move_mount_bpffs")) - goto err_out; - - /* even though we have BPF FS with delegation, it's not at default - * /sys/fs/bpf location, so we still fail to load until envvar is set up - */ - skel = dummy_st_ops_success__open_and_load(); - if (!ASSERT_ERR_PTR(skel, "obj_tokenless_load2")) { - dummy_st_ops_success__destroy(skel); - goto err_out; - } - - err = setenv(TOKEN_ENVVAR, TOKEN_BPFFS_CUSTOM, 1 /*overwrite*/); - if (!ASSERT_OK(err, "setenv_token_path")) - goto err_out; - - /* now the same struct_ops skeleton should succeed thanks to libppf - * creating BPF token from custom mount point - */ - skel = dummy_st_ops_success__open_and_load(); - if (!ASSERT_OK_PTR(skel, "obj_implicit_token_load")) - goto err_out; - - dummy_st_ops_success__destroy(skel); - - /* now disable implicit token through empty bpf_token_path, envvar - * will be ignored, should fail - */ - opts.bpf_token_path = ""; - skel = dummy_st_ops_success__open_opts(&opts); - if (!ASSERT_OK_PTR(skel, "obj_empty_token_path_open")) - goto err_out; - - err = dummy_st_ops_success__load(skel); - dummy_st_ops_success__destroy(skel); - if (!ASSERT_ERR(err, "obj_empty_token_path_load")) - goto err_out; - - /* now disable implicit token through negative bpf_token_fd, envvar - * will be ignored, should fail - */ - opts.bpf_token_path = NULL; - opts.bpf_token_fd = -1; - skel = dummy_st_ops_success__open_opts(&opts); - if (!ASSERT_OK_PTR(skel, "obj_neg_token_fd_open")) - goto err_out; - - err = dummy_st_ops_success__load(skel); - dummy_st_ops_success__destroy(skel); - if (!ASSERT_ERR(err, "obj_neg_token_fd_load")) - goto err_out; - - rmdir(TOKEN_BPFFS_CUSTOM); - unsetenv(TOKEN_ENVVAR); - return 0; -err_out: - rmdir(TOKEN_BPFFS_CUSTOM); - unsetenv(TOKEN_ENVVAR); - return -EINVAL; -} - -#define bit(n) (1ULL << (n)) - -void test_token(void) -{ - if (test__start_subtest("map_token")) { - struct bpffs_opts opts = { - .cmds_str = "map_create", - .maps_str = "stack", - }; - - subtest_userns(&opts, userns_map_create); - } - if (test__start_subtest("btf_token")) { - struct bpffs_opts opts = { - .cmds = 1ULL << BPF_BTF_LOAD, - }; - - subtest_userns(&opts, userns_btf_load); - } - if (test__start_subtest("prog_token")) { - struct bpffs_opts opts = { - .cmds_str = "PROG_LOAD", - .progs_str = "XDP", - .attachs_str = "xdp", - }; - - subtest_userns(&opts, userns_prog_load); - } - if (test__start_subtest("obj_priv_map")) { - struct bpffs_opts opts = { - .cmds = bit(BPF_MAP_CREATE), - .maps = bit(BPF_MAP_TYPE_QUEUE), - }; - - subtest_userns(&opts, userns_obj_priv_map); - } - if (test__start_subtest("obj_priv_prog")) { - struct bpffs_opts opts = { - .cmds = bit(BPF_PROG_LOAD), - .progs = bit(BPF_PROG_TYPE_KPROBE), - .attachs = ~0ULL, - }; - - subtest_userns(&opts, userns_obj_priv_prog); - } - if (test__start_subtest("obj_priv_btf_fail")) { - struct bpffs_opts opts = { - /* disallow BTF loading */ - .cmds = bit(BPF_MAP_CREATE) | bit(BPF_PROG_LOAD), - .maps = bit(BPF_MAP_TYPE_STRUCT_OPS), - .progs = bit(BPF_PROG_TYPE_STRUCT_OPS), - .attachs = ~0ULL, - }; - - subtest_userns(&opts, userns_obj_priv_btf_fail); - } - if (test__start_subtest("obj_priv_btf_success")) { - struct bpffs_opts opts = { - /* allow BTF loading */ - .cmds = bit(BPF_BTF_LOAD) | bit(BPF_MAP_CREATE) | bit(BPF_PROG_LOAD), - .maps = bit(BPF_MAP_TYPE_STRUCT_OPS), - .progs = bit(BPF_PROG_TYPE_STRUCT_OPS), - .attachs = ~0ULL, - }; - - subtest_userns(&opts, userns_obj_priv_btf_success); - } - if (test__start_subtest("obj_priv_implicit_token")) { - struct bpffs_opts opts = { - /* allow BTF loading */ - .cmds = bit(BPF_BTF_LOAD) | bit(BPF_MAP_CREATE) | bit(BPF_PROG_LOAD), - .maps = bit(BPF_MAP_TYPE_STRUCT_OPS), - .progs = bit(BPF_PROG_TYPE_STRUCT_OPS), - .attachs = ~0ULL, - }; - - subtest_userns(&opts, userns_obj_priv_implicit_token); - } - if (test__start_subtest("obj_priv_implicit_token_envvar")) { - struct bpffs_opts opts = { - /* allow BTF loading */ - .cmds = bit(BPF_BTF_LOAD) | bit(BPF_MAP_CREATE) | bit(BPF_PROG_LOAD), - .maps = bit(BPF_MAP_TYPE_STRUCT_OPS), - .progs = bit(BPF_PROG_TYPE_STRUCT_OPS), - .attachs = ~0ULL, - }; - - subtest_userns(&opts, userns_obj_priv_implicit_token_envvar); - } -} diff --git a/tools/testing/selftests/bpf/progs/priv_map.c b/tools/testing/selftests/bpf/progs/priv_map.c deleted file mode 100644 index 9085be50f03b..000000000000 --- a/tools/testing/selftests/bpf/progs/priv_map.c +++ /dev/null @@ -1,13 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */ - -#include "vmlinux.h" -#include - -char _license[] SEC("license") = "GPL"; - -struct { - __uint(type, BPF_MAP_TYPE_QUEUE); - __uint(max_entries, 1); - __type(value, __u32); -} priv_map SEC(".maps"); diff --git a/tools/testing/selftests/bpf/progs/priv_prog.c b/tools/testing/selftests/bpf/progs/priv_prog.c deleted file mode 100644 index 3c7b2b618c8a..000000000000 --- a/tools/testing/selftests/bpf/progs/priv_prog.c +++ /dev/null @@ -1,13 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */ - -#include "vmlinux.h" -#include - -char _license[] SEC("license") = "GPL"; - -SEC("kprobe") -int kprobe_prog(void *ctx) -{ - return 1; -} -- cgit v1.2.3 From b0fb904d074e810c22c26883b8ed4489c17d1292 Mon Sep 17 00:00:00 2001 From: Jason Gerecke Date: Tue, 19 Dec 2023 13:33:44 -0800 Subject: HID: wacom: Add additional tests of confidence behavior Test for proper driver behavior when the touch confidence bit is set or cleared. Test the three flavors of touch confidence loss (tipswitch cleared before confidence, tipswitch and confidence cleared at the same time, and tipswitch only cleared when touch is actually removed). Also test two flavors of touch confidence gain (confidence added to a touch that was "never" confident, and confidence added to a touch that was previously confident). Signed-off-by: Jason Gerecke Signed-off-by: Jiri Kosina --- .../selftests/hid/tests/test_wacom_generic.py | 278 ++++++++++++++++++++- 1 file changed, 277 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/hid/tests/test_wacom_generic.py b/tools/testing/selftests/hid/tests/test_wacom_generic.py index f92fe8e02c1b..a7ee54e5090b 100644 --- a/tools/testing/selftests/hid/tests/test_wacom_generic.py +++ b/tools/testing/selftests/hid/tests/test_wacom_generic.py @@ -27,6 +27,7 @@ from .descriptors_wacom import ( ) import attr +from collections import namedtuple from enum import Enum from hidtools.hut import HUT from hidtools.hid import HidUnit @@ -862,6 +863,8 @@ class TestPTHX60_Pen(TestOpaqueCTLTablet): class TestDTH2452Tablet(test_multitouch.BaseTest.TestMultitouch, TouchTabletTest): + ContactIds = namedtuple("ContactIds", "contact_id, tracking_id, slot_num") + def create_device(self): return test_multitouch.Digitizer( "DTH 2452", @@ -869,6 +872,57 @@ class TestDTH2452Tablet(test_multitouch.BaseTest.TestMultitouch, TouchTabletTest input_info=(0x3, 0x056A, 0x0383), ) + def make_contact(self, contact_id=0, t=0): + """ + Make a single touch contact that can move over time. + + Creates a touch object that has a well-known position in space that + does not overlap with other contacts. The value of `t` may be + incremented over time to move the point along a linear path. + """ + x = 50 + 10 * contact_id + t + y = 100 + 100 * contact_id + t + return test_multitouch.Touch(contact_id, x, y) + + def make_contacts(self, n, t=0): + """ + Make multiple touch contacts that can move over time. + + Returns a list of `n` touch objects that are positioned at well-known + locations. The value of `t` may be incremented over time to move the + points along a linear path. + """ + return [ self.make_contact(id, t) for id in range(0, n) ] + + def assert_contact(self, uhdev, evdev, contact_ids, t=0): + """ + Assert properties of a contact generated by make_contact. + """ + contact_id = contact_ids.contact_id + tracking_id = contact_ids.tracking_id + slot_num = contact_ids.slot_num + + x = 50 + 10 * contact_id + t + y = 100 + 100 * contact_id + t + + # If the data isn't supposed to be stored in any slots, there is + # nothing we can check for in the evdev stream. + if slot_num is None: + assert tracking_id == -1 + return + + assert evdev.slots[slot_num][libevdev.EV_ABS.ABS_MT_TRACKING_ID] == tracking_id + if tracking_id != -1: + assert evdev.slots[slot_num][libevdev.EV_ABS.ABS_MT_POSITION_X] == x + assert evdev.slots[slot_num][libevdev.EV_ABS.ABS_MT_POSITION_Y] == y + + def assert_contacts(self, uhdev, evdev, data, t=0): + """ + Assert properties of a list of contacts generated by make_contacts. + """ + for contact_ids in data: + self.assert_contact(uhdev, evdev, contact_ids, t) + def test_contact_id_0(self): """ Bring a finger in contact with the tablet, then hold it down and remove it. @@ -919,4 +973,226 @@ class TestDTH2452Tablet(test_multitouch.BaseTest.TestMultitouch, TouchTabletTest slot = self.get_slot(uhdev, t0, 0) - assert not events \ No newline at end of file + assert not events + + def test_confidence_multitouch(self): + """ + Bring multiple fingers in contact with the tablet, some with the + confidence bit set, and some without. + + Ensure that all confident touches are reported and that all non- + confident touches are ignored. + """ + uhdev = self.uhdev + evdev = uhdev.get_evdev() + + touches = self.make_contacts(5) + touches[0].confidence = False + touches[2].confidence = False + touches[4].confidence = False + + r = uhdev.event(touches) + events = uhdev.next_sync_events() + self.debug_reports(r, uhdev, events) + + assert libevdev.InputEvent(libevdev.EV_KEY.BTN_TOUCH, 1) in events + + self.assert_contacts(uhdev, evdev, + [ self.ContactIds(contact_id = 0, tracking_id = -1, slot_num = None), + self.ContactIds(contact_id = 1, tracking_id = 0, slot_num = 0), + self.ContactIds(contact_id = 2, tracking_id = -1, slot_num = None), + self.ContactIds(contact_id = 3, tracking_id = 1, slot_num = 1), + self.ContactIds(contact_id = 4, tracking_id = -1, slot_num = None) ]) + + def confidence_change_assert_playback(self, uhdev, evdev, timeline): + """ + Assert proper behavior of contacts that move and change tipswitch / + confidence status over time. + + Given a `timeline` list of touch states to iterate over, verify + that the contacts move and are reported as up/down as expected + by the state of the tipswitch and confidence bits. + """ + t = 0 + + for state in timeline: + touches = self.make_contacts(len(state), t) + + for item in zip(touches, state): + item[0].tipswitch = item[1][1] + item[0].confidence = item[1][2] + + r = uhdev.event(touches) + events = uhdev.next_sync_events() + self.debug_reports(r, uhdev, events) + + ids = [ x[0] for x in state ] + self.assert_contacts(uhdev, evdev, ids, t) + + t += 1 + + def test_confidence_loss_a(self): + """ + Transition a confident contact to a non-confident contact by + first clearing the tipswitch. + + Ensure that the driver reports the transitioned contact as + being removed and that other contacts continue to report + normally. This mode of confidence loss is used by the + DTH-2452. + """ + uhdev = self.uhdev + evdev = uhdev.get_evdev() + + self.confidence_change_assert_playback(uhdev, evdev, [ + # t=0: Contact 0 == Down + confident; Contact 1 == Down + confident + # Both fingers confidently in contact + [(self.ContactIds(contact_id = 0, tracking_id = 0, slot_num = 0), True, True), + (self.ContactIds(contact_id = 1, tracking_id = 1, slot_num = 1), True, True)], + + # t=1: Contact 0 == !Down + confident; Contact 1 == Down + confident + # First finger looses confidence and clears only the tipswitch flag + [(self.ContactIds(contact_id = 0, tracking_id = -1, slot_num = 0), False, True), + (self.ContactIds(contact_id = 1, tracking_id = 1, slot_num = 1), True, True)], + + # t=2: Contact 0 == !Down + !confident; Contact 1 == Down + confident + # First finger has lost confidence and has both flags cleared + [(self.ContactIds(contact_id = 0, tracking_id = -1, slot_num = 0), False, False), + (self.ContactIds(contact_id = 1, tracking_id = 1, slot_num = 1), True, True)], + + # t=3: Contact 0 == !Down + !confident; Contact 1 == Down + confident + # First finger has lost confidence and has both flags cleared + [(self.ContactIds(contact_id = 0, tracking_id = -1, slot_num = 0), False, False), + (self.ContactIds(contact_id = 1, tracking_id = 1, slot_num = 1), True, True)] + ]) + + def test_confidence_loss_b(self): + """ + Transition a confident contact to a non-confident contact by + cleraing both tipswitch and confidence bits simultaneously. + + Ensure that the driver reports the transitioned contact as + being removed and that other contacts continue to report + normally. This mode of confidence loss is used by some + AES devices. + """ + uhdev = self.uhdev + evdev = uhdev.get_evdev() + + self.confidence_change_assert_playback(uhdev, evdev, [ + # t=0: Contact 0 == Down + confident; Contact 1 == Down + confident + # Both fingers confidently in contact + [(self.ContactIds(contact_id = 0, tracking_id = 0, slot_num = 0), True, True), + (self.ContactIds(contact_id = 1, tracking_id = 1, slot_num = 1), True, True)], + + # t=1: Contact 0 == !Down + !confident; Contact 1 == Down + confident + # First finger looses confidence and has both flags cleared simultaneously + [(self.ContactIds(contact_id = 0, tracking_id = -1, slot_num = 0), False, False), + (self.ContactIds(contact_id = 1, tracking_id = 1, slot_num = 1), True, True)], + + # t=2: Contact 0 == !Down + !confident; Contact 1 == Down + confident + # First finger has lost confidence and has both flags cleared + [(self.ContactIds(contact_id = 0, tracking_id = -1, slot_num = 0), False, False), + (self.ContactIds(contact_id = 1, tracking_id = 1, slot_num = 1), True, True)], + + # t=3: Contact 0 == !Down + !confident; Contact 1 == Down + confident + # First finger has lost confidence and has both flags cleared + [(self.ContactIds(contact_id = 0, tracking_id = -1, slot_num = 0), False, False), + (self.ContactIds(contact_id = 1, tracking_id = 1, slot_num = 1), True, True)] + ]) + + def test_confidence_loss_c(self): + """ + Transition a confident contact to a non-confident contact by + clearing only the confidence bit. + + Ensure that the driver reports the transitioned contact as + being removed and that other contacts continue to report + normally. + """ + uhdev = self.uhdev + evdev = uhdev.get_evdev() + + self.confidence_change_assert_playback(uhdev, evdev, [ + # t=0: Contact 0 == Down + confident; Contact 1 == Down + confident + # Both fingers confidently in contact + [(self.ContactIds(contact_id = 0, tracking_id = 0, slot_num = 0), True, True), + (self.ContactIds(contact_id = 1, tracking_id = 1, slot_num = 1), True, True)], + + # t=1: Contact 0 == Down + !confident; Contact 1 == Down + confident + # First finger looses confidence and clears only the confidence flag + [(self.ContactIds(contact_id = 0, tracking_id = -1, slot_num = 0), True, False), + (self.ContactIds(contact_id = 1, tracking_id = 1, slot_num = 1), True, True)], + + # t=2: Contact 0 == !Down + !confident; Contact 1 == Down + confident + # First finger has lost confidence and has both flags cleared + [(self.ContactIds(contact_id = 0, tracking_id = -1, slot_num = 0), False, False), + (self.ContactIds(contact_id = 1, tracking_id = 1, slot_num = 1), True, True)], + + # t=3: Contact 0 == !Down + !confident; Contact 1 == Down + confident + # First finger has lost confidence and has both flags cleared + [(self.ContactIds(contact_id = 0, tracking_id = -1, slot_num = 0), False, False), + (self.ContactIds(contact_id = 1, tracking_id = 1, slot_num = 1), True, True)] + ]) + + def test_confidence_gain_a(self): + """ + Transition a contact that was always non-confident to confident. + + Ensure that the confident contact is reported normally. + """ + uhdev = self.uhdev + evdev = uhdev.get_evdev() + + self.confidence_change_assert_playback(uhdev, evdev, [ + # t=0: Contact 0 == Down + !confident; Contact 1 == Down + confident + # Only second finger is confidently in contact + [(self.ContactIds(contact_id = 0, tracking_id = -1, slot_num = None), True, False), + (self.ContactIds(contact_id = 1, tracking_id = 0, slot_num = 0), True, True)], + + # t=1: Contact 0 == Down + !confident; Contact 1 == Down + confident + # First finger gains confidence + [(self.ContactIds(contact_id = 0, tracking_id = -1, slot_num = None), True, False), + (self.ContactIds(contact_id = 1, tracking_id = 0, slot_num = 0), True, True)], + + # t=2: Contact 0 == Down + confident; Contact 1 == Down + confident + # First finger remains confident + [(self.ContactIds(contact_id = 0, tracking_id = 1, slot_num = 1), True, True), + (self.ContactIds(contact_id = 1, tracking_id = 0, slot_num = 0), True, True)], + + # t=3: Contact 0 == Down + confident; Contact 1 == Down + confident + # First finger remains confident + [(self.ContactIds(contact_id = 0, tracking_id = 1, slot_num = 1), True, True), + (self.ContactIds(contact_id = 1, tracking_id = 0, slot_num = 0), True, True)] + ]) + + def test_confidence_gain_b(self): + """ + Transition a contact from non-confident to confident. + + Ensure that the confident contact is reported normally. + """ + uhdev = self.uhdev + evdev = uhdev.get_evdev() + + self.confidence_change_assert_playback(uhdev, evdev, [ + # t=0: Contact 0 == Down + confident; Contact 1 == Down + confident + # First and second finger confidently in contact + [(self.ContactIds(contact_id = 0, tracking_id = 0, slot_num = 0), True, True), + (self.ContactIds(contact_id = 1, tracking_id = 1, slot_num = 1), True, True)], + + # t=1: Contact 0 == Down + !confident; Contact 1 == Down + confident + # Firtst finger looses confidence + [(self.ContactIds(contact_id = 0, tracking_id = -1, slot_num = 0), True, False), + (self.ContactIds(contact_id = 1, tracking_id = 1, slot_num = 1), True, True)], + + # t=2: Contact 0 == Down + confident; Contact 1 == Down + confident + # First finger gains confidence + [(self.ContactIds(contact_id = 0, tracking_id = 2, slot_num = 0), True, True), + (self.ContactIds(contact_id = 1, tracking_id = 1, slot_num = 1), True, True)], + + # t=3: Contact 0 == !Down + confident; Contact 1 == Down + confident + # First finger goes up + [(self.ContactIds(contact_id = 0, tracking_id = -1, slot_num = 0), False, True), + (self.ContactIds(contact_id = 1, tracking_id = 1, slot_num = 1), True, True)] + ]) -- cgit v1.2.3 From 1de584832375d0dc4234ee406185384a58fb96ac Mon Sep 17 00:00:00 2001 From: Menglong Dong Date: Tue, 19 Dec 2023 21:47:58 +0800 Subject: selftests/bpf: remove reduplicated s32 casting in "crafted_cases" The "S32_MIN" is already defined with s32 casting, so there is no need to do it again. Signed-off-by: Menglong Dong Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20231219134800.1550388-3-menglong8.dong@gmail.com Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/prog_tests/reg_bounds.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/reg_bounds.c b/tools/testing/selftests/bpf/prog_tests/reg_bounds.c index 0c9abd279e18..3bf4ddd720a8 100644 --- a/tools/testing/selftests/bpf/prog_tests/reg_bounds.c +++ b/tools/testing/selftests/bpf/prog_tests/reg_bounds.c @@ -2097,10 +2097,10 @@ static struct subtest_case crafted_cases[] = { {U32, S32, {0, U32_MAX}, {U32_MAX, U32_MAX}}, - {S32, U64, {(u32)(s32)S32_MIN, (u32)(s32)S32_MIN}, {(u32)(s32)-255, 0}}, - {S32, S64, {(u32)(s32)S32_MIN, (u32)(s32)-255}, {(u32)(s32)-2, 0}}, - {S32, S64, {0, 1}, {(u32)(s32)S32_MIN, (u32)(s32)S32_MIN}}, - {S32, U32, {(u32)(s32)S32_MIN, (u32)(s32)S32_MIN}, {(u32)(s32)S32_MIN, (u32)(s32)S32_MIN}}, + {S32, U64, {(u32)S32_MIN, (u32)S32_MIN}, {(u32)(s32)-255, 0}}, + {S32, S64, {(u32)S32_MIN, (u32)(s32)-255}, {(u32)(s32)-2, 0}}, + {S32, S64, {0, 1}, {(u32)S32_MIN, (u32)S32_MIN}}, + {S32, U32, {(u32)S32_MIN, (u32)S32_MIN}, {(u32)S32_MIN, (u32)S32_MIN}}, }; /* Go over crafted hard-coded cases. This is fast, so we do it as part of -- cgit v1.2.3 From 31d9cc96b1e3b28daf74938cb1233231474bbcf6 Mon Sep 17 00:00:00 2001 From: Menglong Dong Date: Tue, 19 Dec 2023 21:47:59 +0800 Subject: selftests/bpf: activate the OP_NE logic in range_cond() The edge range checking for the registers is supported by the verifier now, so we can activate the extended logic in tools/testing/selftests/bpf/prog_tests/reg_bounds.c/range_cond() to test such logic. Besides, I added some cases to the "crafted_cases" array for this logic. These cases are mainly used to test the edge of the src reg and dst reg. All reg bounds testings has passed in the SLOW_TESTS mode: $ export SLOW_TESTS=1 && ./test_progs -t reg_bounds -j Summary: 65/18959832 PASSED, 0 SKIPPED, 0 FAILED Signed-off-by: Menglong Dong Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20231219134800.1550388-4-menglong8.dong@gmail.com Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/prog_tests/reg_bounds.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/reg_bounds.c b/tools/testing/selftests/bpf/prog_tests/reg_bounds.c index 3bf4ddd720a8..820d0bcfc474 100644 --- a/tools/testing/selftests/bpf/prog_tests/reg_bounds.c +++ b/tools/testing/selftests/bpf/prog_tests/reg_bounds.c @@ -590,12 +590,7 @@ static void range_cond(enum num_t t, struct range x, struct range y, *newy = range(t, max_t(t, x.a, y.a), min_t(t, x.b, y.b)); break; case OP_NE: - /* generic case, can't derive more information */ - *newx = range(t, x.a, x.b); - *newy = range(t, y.a, y.b); - break; - - /* below extended logic is not supported by verifier just yet */ + /* below logic is supported by the verifier now */ if (x.a == x.b && x.a == y.a) { /* X is a constant matching left side of Y */ *newx = range(t, x.a, x.b); @@ -2101,6 +2096,18 @@ static struct subtest_case crafted_cases[] = { {S32, S64, {(u32)S32_MIN, (u32)(s32)-255}, {(u32)(s32)-2, 0}}, {S32, S64, {0, 1}, {(u32)S32_MIN, (u32)S32_MIN}}, {S32, U32, {(u32)S32_MIN, (u32)S32_MIN}, {(u32)S32_MIN, (u32)S32_MIN}}, + + /* edge overlap testings for BPF_NE */ + {U64, U64, {0, U64_MAX}, {U64_MAX, U64_MAX}}, + {U64, U64, {0, U64_MAX}, {0, 0}}, + {S64, U64, {S64_MIN, 0}, {S64_MIN, S64_MIN}}, + {S64, U64, {S64_MIN, 0}, {0, 0}}, + {S64, U64, {S64_MIN, S64_MAX}, {S64_MAX, S64_MAX}}, + {U32, U32, {0, U32_MAX}, {0, 0}}, + {U32, U32, {0, U32_MAX}, {U32_MAX, U32_MAX}}, + {S32, U32, {(u32)S32_MIN, 0}, {0, 0}}, + {S32, U32, {(u32)S32_MIN, 0}, {(u32)S32_MIN, (u32)S32_MIN}}, + {S32, U32, {(u32)S32_MIN, S32_MAX}, {S32_MAX, S32_MAX}}, }; /* Go over crafted hard-coded cases. This is fast, so we do it as part of -- cgit v1.2.3 From 463ea64eb008b7abb63245ed69446b404bf042b1 Mon Sep 17 00:00:00 2001 From: Menglong Dong Date: Tue, 19 Dec 2023 21:48:00 +0800 Subject: selftests/bpf: add testcase to verifier_bounds.c for BPF_JNE Add testcase for the logic that the verifier tracks the BPF_JNE for regs. The assembly function "reg_not_equal_const()" and "reg_equal_const" that we add is exactly converted from the following case: u32 a = bpf_get_prandom_u32(); u64 b = 0; a %= 8; /* the "a > 0" here will be optimized to "a != 0" */ if (a > 0) { /* now the range of a should be [1, 7] */ bpf_skb_store_bytes(skb, 0, &b, a, 0); } Signed-off-by: Menglong Dong Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20231219134800.1550388-5-menglong8.dong@gmail.com Signed-off-by: Alexei Starovoitov --- .../testing/selftests/bpf/progs/verifier_bounds.c | 62 ++++++++++++++++++++++ 1 file changed, 62 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/progs/verifier_bounds.c b/tools/testing/selftests/bpf/progs/verifier_bounds.c index ec430b71730b..960998f16306 100644 --- a/tools/testing/selftests/bpf/progs/verifier_bounds.c +++ b/tools/testing/selftests/bpf/progs/verifier_bounds.c @@ -1075,4 +1075,66 @@ l0_%=: r0 = 0; \ : __clobber_all); } +SEC("tc") +__description("bounds check with JMP_NE for reg edge") +__success __retval(0) +__naked void reg_not_equal_const(void) +{ + asm volatile (" \ + r6 = r1; \ + r1 = 0; \ + *(u64*)(r10 - 8) = r1; \ + call %[bpf_get_prandom_u32]; \ + r4 = r0; \ + r4 &= 7; \ + if r4 != 0 goto l0_%=; \ + r0 = 0; \ + exit; \ +l0_%=: r1 = r6; \ + r2 = 0; \ + r3 = r10; \ + r3 += -8; \ + r5 = 0; \ + /* The 4th argument of bpf_skb_store_bytes is defined as \ + * ARG_CONST_SIZE, so 0 is not allowed. The 'r4 != 0' \ + * is providing us this exclusion of zero from initial \ + * [0, 7] range. \ + */ \ + call %[bpf_skb_store_bytes]; \ + r0 = 0; \ + exit; \ +" : + : __imm(bpf_get_prandom_u32), + __imm(bpf_skb_store_bytes) + : __clobber_all); +} + +SEC("tc") +__description("bounds check with JMP_EQ for reg edge") +__success __retval(0) +__naked void reg_equal_const(void) +{ + asm volatile (" \ + r6 = r1; \ + r1 = 0; \ + *(u64*)(r10 - 8) = r1; \ + call %[bpf_get_prandom_u32]; \ + r4 = r0; \ + r4 &= 7; \ + if r4 == 0 goto l0_%=; \ + r1 = r6; \ + r2 = 0; \ + r3 = r10; \ + r3 += -8; \ + r5 = 0; \ + /* Just the same as what we do in reg_not_equal_const() */ \ + call %[bpf_skb_store_bytes]; \ +l0_%=: r0 = 0; \ + exit; \ +" : + : __imm(bpf_get_prandom_u32), + __imm(bpf_skb_store_bytes) + : __clobber_all); +} + char _license[] SEC("license") = "GPL"; -- cgit v1.2.3 From 5eccd2db42d77e3570619c32d39e39bf486607cf Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Thu, 14 Dec 2023 17:13:26 -0800 Subject: bpf: reuse btf_prepare_func_args() check for main program BTF validation Instead of btf_check_subprog_arg_match(), use btf_prepare_func_args() logic to validate "trustworthiness" of main BPF program's BTF information, if it is present. We ignored results of original BTF check anyway, often times producing confusing and ominously-sounding "reg type unsupported for arg#0 function" message, which has no apparent effect on program correctness and verification process. All the -EFAULT returning sanity checks are already performed in check_btf_info_early(), so there is zero reason to have this duplication of logic between btf_check_subprog_call() and btf_check_subprog_arg_match(). Dropping btf_check_subprog_arg_match() simplifies btf_check_func_arg_match() further removing `bool processing_call` flag. One subtle bit that was done by btf_check_subprog_arg_match() was potentially marking main program's BTF as unreliable. We do this explicitly now with a dedicated simple check, preserving the original behavior, but now based on well factored btf_prepare_func_args() logic. Acked-by: Eduard Zingerman Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20231215011334.2307144-3-andrii@kernel.org Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 2 - kernel/bpf/btf.c | 50 ++-------------------- kernel/bpf/verifier.c | 25 ++++++----- tools/testing/selftests/bpf/prog_tests/log_fixup.c | 4 +- .../selftests/bpf/progs/cgrp_kfunc_failure.c | 2 +- .../selftests/bpf/progs/task_kfunc_failure.c | 2 +- 6 files changed, 19 insertions(+), 66 deletions(-) (limited to 'tools') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index c050c82cc9a5..d0d7eff22b8a 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -2466,8 +2466,6 @@ int btf_distill_func_proto(struct bpf_verifier_log *log, struct btf_func_model *m); struct bpf_reg_state; -int btf_check_subprog_arg_match(struct bpf_verifier_env *env, int subprog, - struct bpf_reg_state *regs); int btf_check_subprog_call(struct bpf_verifier_env *env, int subprog, struct bpf_reg_state *regs); int btf_prepare_func_args(struct bpf_verifier_env *env, int subprog); diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index be2104e5f2f5..33d9a1c73f6e 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -6768,8 +6768,7 @@ int btf_check_type_match(struct bpf_verifier_log *log, const struct bpf_prog *pr static int btf_check_func_arg_match(struct bpf_verifier_env *env, const struct btf *btf, u32 func_id, struct bpf_reg_state *regs, - bool ptr_to_mem_ok, - bool processing_call) + bool ptr_to_mem_ok) { enum bpf_prog_type prog_type = resolve_prog_type(env->prog); struct bpf_verifier_log *log = &env->log; @@ -6842,7 +6841,7 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env, i, btf_type_str(t)); return -EINVAL; } - } else if (ptr_to_mem_ok && processing_call) { + } else if (ptr_to_mem_ok) { const struct btf_type *resolve_ret; u32 type_size; @@ -6867,55 +6866,12 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env, return 0; } -/* Compare BTF of a function declaration with given bpf_reg_state. - * Returns: - * EFAULT - there is a verifier bug. Abort verification. - * EINVAL - there is a type mismatch or BTF is not available. - * 0 - BTF matches with what bpf_reg_state expects. - * Only PTR_TO_CTX and SCALAR_VALUE states are recognized. - */ -int btf_check_subprog_arg_match(struct bpf_verifier_env *env, int subprog, - struct bpf_reg_state *regs) -{ - struct bpf_prog *prog = env->prog; - struct btf *btf = prog->aux->btf; - bool is_global; - u32 btf_id; - int err; - - if (!prog->aux->func_info) - return -EINVAL; - - btf_id = prog->aux->func_info[subprog].type_id; - if (!btf_id) - return -EFAULT; - - if (prog->aux->func_info_aux[subprog].unreliable) - return -EINVAL; - - is_global = prog->aux->func_info_aux[subprog].linkage == BTF_FUNC_GLOBAL; - err = btf_check_func_arg_match(env, btf, btf_id, regs, is_global, false); - - /* Compiler optimizations can remove arguments from static functions - * or mismatched type can be passed into a global function. - * In such cases mark the function as unreliable from BTF point of view. - */ - if (err) - prog->aux->func_info_aux[subprog].unreliable = true; - return err; -} - /* Compare BTF of a function call with given bpf_reg_state. * Returns: * EFAULT - there is a verifier bug. Abort verification. * EINVAL - there is a type mismatch or BTF is not available. * 0 - BTF matches with what bpf_reg_state expects. * Only PTR_TO_CTX and SCALAR_VALUE states are recognized. - * - * NOTE: the code is duplicated from btf_check_subprog_arg_match() - * because btf_check_func_arg_match() is still doing both. Once that - * function is split in 2, we can call from here btf_check_subprog_arg_match() - * first, and then treat the calling part in a new code path. */ int btf_check_subprog_call(struct bpf_verifier_env *env, int subprog, struct bpf_reg_state *regs) @@ -6937,7 +6893,7 @@ int btf_check_subprog_call(struct bpf_verifier_env *env, int subprog, return -EINVAL; is_global = prog->aux->func_info_aux[subprog].linkage == BTF_FUNC_GLOBAL; - err = btf_check_func_arg_match(env, btf, btf_id, regs, is_global, true); + err = btf_check_func_arg_match(env, btf, btf_id, regs, is_global); /* Compiler optimizations can remove arguments from static functions * or mismatched type can be passed into a global function. diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 6555785b9f63..c26e9ab5226c 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -19904,6 +19904,7 @@ static void free_states(struct bpf_verifier_env *env) static int do_check_common(struct bpf_verifier_env *env, int subprog) { bool pop_log = !(env->log.level & BPF_LOG_LEVEL2); + struct bpf_subprog_info *sub = subprog_info(env, subprog); struct bpf_verifier_state *state; struct bpf_reg_state *regs; int ret, i; @@ -19930,9 +19931,9 @@ static int do_check_common(struct bpf_verifier_env *env, int subprog) state->first_insn_idx = env->subprog_info[subprog].start; state->last_insn_idx = -1; + regs = state->frame[state->curframe]->regs; if (subprog || env->prog->type == BPF_PROG_TYPE_EXT) { - struct bpf_subprog_info *sub = subprog_info(env, subprog); const char *sub_name = subprog_name(env, subprog); struct bpf_subprog_arg_info *arg; struct bpf_reg_state *reg; @@ -19979,21 +19980,19 @@ static int do_check_common(struct bpf_verifier_env *env, int subprog) } } } else { + /* if main BPF program has associated BTF info, validate that + * it's matching expected signature, and otherwise mark BTF + * info for main program as unreliable + */ + if (env->prog->aux->func_info_aux) { + ret = btf_prepare_func_args(env, 0); + if (ret || sub->arg_cnt != 1 || sub->args[0].arg_type != ARG_PTR_TO_CTX) + env->prog->aux->func_info_aux[0].unreliable = true; + } + /* 1st arg to a function */ regs[BPF_REG_1].type = PTR_TO_CTX; mark_reg_known_zero(env, regs, BPF_REG_1); - ret = btf_check_subprog_arg_match(env, subprog, regs); - if (ret == -EFAULT) - /* unlikely verifier bug. abort. - * ret == 0 and ret < 0 are sadly acceptable for - * main() function due to backward compatibility. - * Like socket filter program may be written as: - * int bpf_prog(struct pt_regs *ctx) - * and never dereference that ctx in the program. - * 'struct pt_regs' is a type mismatch for socket - * filter that should be using 'struct __sk_buff'. - */ - goto out; } ret = do_check(env); diff --git a/tools/testing/selftests/bpf/prog_tests/log_fixup.c b/tools/testing/selftests/bpf/prog_tests/log_fixup.c index effd78b2a657..7a3fa2ff567b 100644 --- a/tools/testing/selftests/bpf/prog_tests/log_fixup.c +++ b/tools/testing/selftests/bpf/prog_tests/log_fixup.c @@ -169,9 +169,9 @@ void test_log_fixup(void) if (test__start_subtest("bad_core_relo_trunc_none")) bad_core_relo(0, TRUNC_NONE /* full buf */); if (test__start_subtest("bad_core_relo_trunc_partial")) - bad_core_relo(300, TRUNC_PARTIAL /* truncate original log a bit */); + bad_core_relo(280, TRUNC_PARTIAL /* truncate original log a bit */); if (test__start_subtest("bad_core_relo_trunc_full")) - bad_core_relo(210, TRUNC_FULL /* truncate also libbpf's message patch */); + bad_core_relo(220, TRUNC_FULL /* truncate also libbpf's message patch */); if (test__start_subtest("bad_core_relo_subprog")) bad_core_relo_subprog(); if (test__start_subtest("missing_map")) diff --git a/tools/testing/selftests/bpf/progs/cgrp_kfunc_failure.c b/tools/testing/selftests/bpf/progs/cgrp_kfunc_failure.c index 0fa564a5cc5b..9fe9c4a4e8f6 100644 --- a/tools/testing/selftests/bpf/progs/cgrp_kfunc_failure.c +++ b/tools/testing/selftests/bpf/progs/cgrp_kfunc_failure.c @@ -78,7 +78,7 @@ int BPF_PROG(cgrp_kfunc_acquire_fp, struct cgroup *cgrp, const char *path) } SEC("kretprobe/cgroup_destroy_locked") -__failure __msg("reg type unsupported for arg#0 function") +__failure __msg("calling kernel function bpf_cgroup_acquire is not allowed") int BPF_PROG(cgrp_kfunc_acquire_unsafe_kretprobe, struct cgroup *cgrp) { struct cgroup *acquired; diff --git a/tools/testing/selftests/bpf/progs/task_kfunc_failure.c b/tools/testing/selftests/bpf/progs/task_kfunc_failure.c index dcdea3127086..ad88a3796ddf 100644 --- a/tools/testing/selftests/bpf/progs/task_kfunc_failure.c +++ b/tools/testing/selftests/bpf/progs/task_kfunc_failure.c @@ -248,7 +248,7 @@ int BPF_PROG(task_kfunc_from_pid_no_null_check, struct task_struct *task, u64 cl } SEC("lsm/task_free") -__failure __msg("reg type unsupported for arg#0 function") +__failure __msg("R1 must be a rcu pointer") int BPF_PROG(task_kfunc_from_lsm_task_free, struct task_struct *task) { struct task_struct *acquired; -- cgit v1.2.3 From f18c3d88deedf0defc3e4800341cc7bcaaabcdf9 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Thu, 14 Dec 2023 17:13:29 -0800 Subject: bpf: reuse subprog argument parsing logic for subprog call checks Remove duplicated BTF parsing logic when it comes to subprog call check. Instead, use (potentially cached) results of btf_prepare_func_args() to abstract away expectations of each subprog argument in generic terms (e.g., "this is pointer to context", or "this is a pointer to memory of size X"), and then use those simple high-level argument type expectations to validate actual register states to check if they match expectations. Acked-by: Eduard Zingerman Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20231215011334.2307144-6-andrii@kernel.org Signed-off-by: Alexei Starovoitov --- kernel/bpf/verifier.c | 110 ++++++--------------- .../selftests/bpf/progs/test_global_func5.c | 2 +- 2 files changed, 31 insertions(+), 81 deletions(-) (limited to 'tools') diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index f48e49f2d482..3c9e6d0d77d6 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -9249,101 +9249,54 @@ err_out: return err; } -static int btf_check_func_arg_match(struct bpf_verifier_env *env, - const struct btf *btf, u32 func_id, - struct bpf_reg_state *regs, - bool ptr_to_mem_ok) +static int btf_check_func_arg_match(struct bpf_verifier_env *env, int subprog, + const struct btf *btf, + struct bpf_reg_state *regs) { - enum bpf_prog_type prog_type = resolve_prog_type(env->prog); + struct bpf_subprog_info *sub = subprog_info(env, subprog); struct bpf_verifier_log *log = &env->log; - const char *func_name, *ref_tname; - const struct btf_type *t, *ref_t; - const struct btf_param *args; - u32 i, nargs, ref_id; + u32 i; int ret; - t = btf_type_by_id(btf, func_id); - if (!t || !btf_type_is_func(t)) { - /* These checks were already done by the verifier while loading - * struct bpf_func_info or in add_kfunc_call(). - */ - bpf_log(log, "BTF of func_id %u doesn't point to KIND_FUNC\n", - func_id); - return -EFAULT; - } - func_name = btf_name_by_offset(btf, t->name_off); - - t = btf_type_by_id(btf, t->type); - if (!t || !btf_type_is_func_proto(t)) { - bpf_log(log, "Invalid BTF of func %s\n", func_name); - return -EFAULT; - } - args = (const struct btf_param *)(t + 1); - nargs = btf_type_vlen(t); - if (nargs > MAX_BPF_FUNC_REG_ARGS) { - bpf_log(log, "Function %s has %d > %d args\n", func_name, nargs, - MAX_BPF_FUNC_REG_ARGS); - return -EINVAL; - } + ret = btf_prepare_func_args(env, subprog); + if (ret) + return ret; /* check that BTF function arguments match actual types that the * verifier sees. */ - for (i = 0; i < nargs; i++) { - enum bpf_arg_type arg_type = ARG_DONTCARE; + for (i = 0; i < sub->arg_cnt; i++) { u32 regno = i + 1; struct bpf_reg_state *reg = ®s[regno]; + struct bpf_subprog_arg_info *arg = &sub->args[i]; - t = btf_type_skip_modifiers(btf, args[i].type, NULL); - if (btf_type_is_scalar(t)) { - if (reg->type == SCALAR_VALUE) - continue; - bpf_log(log, "R%d is not a scalar\n", regno); - return -EINVAL; - } - - if (!btf_type_is_ptr(t)) { - bpf_log(log, "Unrecognized arg#%d type %s\n", - i, btf_type_str(t)); - return -EINVAL; - } - - ref_t = btf_type_skip_modifiers(btf, t->type, &ref_id); - ref_tname = btf_name_by_offset(btf, ref_t->name_off); - - ret = check_func_arg_reg_off(env, reg, regno, arg_type); - if (ret < 0) - return ret; - - if (btf_get_prog_ctx_type(log, btf, t, prog_type, i)) { + if (arg->arg_type == ARG_ANYTHING) { + if (reg->type != SCALAR_VALUE) { + bpf_log(log, "R%d is not a scalar\n", regno); + return -EINVAL; + } + } else if (arg->arg_type == ARG_PTR_TO_CTX) { + ret = check_func_arg_reg_off(env, reg, regno, ARG_DONTCARE); + if (ret < 0) + return ret; /* If function expects ctx type in BTF check that caller * is passing PTR_TO_CTX. */ if (reg->type != PTR_TO_CTX) { - bpf_log(log, - "arg#%d expected pointer to ctx, but got %s\n", - i, btf_type_str(t)); - return -EINVAL; - } - } else if (ptr_to_mem_ok) { - const struct btf_type *resolve_ret; - u32 type_size; - - resolve_ret = btf_resolve_size(btf, ref_t, &type_size); - if (IS_ERR(resolve_ret)) { - bpf_log(log, - "arg#%d reference type('%s %s') size cannot be determined: %ld\n", - i, btf_type_str(ref_t), ref_tname, - PTR_ERR(resolve_ret)); + bpf_log(log, "arg#%d expects pointer to ctx\n", i); return -EINVAL; } + } else if (base_type(arg->arg_type) == ARG_PTR_TO_MEM) { + ret = check_func_arg_reg_off(env, reg, regno, ARG_DONTCARE); + if (ret < 0) + return ret; - if (check_mem_reg(env, reg, regno, type_size)) + if (check_mem_reg(env, reg, regno, arg->mem_size)) return -EINVAL; } else { - bpf_log(log, "reg type unsupported for arg#%d function %s#%d\n", i, - func_name, func_id); - return -EINVAL; + bpf_log(log, "verifier bug: unrecognized arg#%d type %d\n", + i, arg->arg_type); + return -EFAULT; } } @@ -9358,11 +9311,10 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env, * Only PTR_TO_CTX and SCALAR_VALUE states are recognized. */ static int btf_check_subprog_call(struct bpf_verifier_env *env, int subprog, - struct bpf_reg_state *regs) + struct bpf_reg_state *regs) { struct bpf_prog *prog = env->prog; struct btf *btf = prog->aux->btf; - bool is_global; u32 btf_id; int err; @@ -9376,9 +9328,7 @@ static int btf_check_subprog_call(struct bpf_verifier_env *env, int subprog, if (prog->aux->func_info_aux[subprog].unreliable) return -EINVAL; - is_global = prog->aux->func_info_aux[subprog].linkage == BTF_FUNC_GLOBAL; - err = btf_check_func_arg_match(env, btf, btf_id, regs, is_global); - + err = btf_check_func_arg_match(env, subprog, btf, regs); /* Compiler optimizations can remove arguments from static functions * or mismatched type can be passed into a global function. * In such cases mark the function as unreliable from BTF point of view. diff --git a/tools/testing/selftests/bpf/progs/test_global_func5.c b/tools/testing/selftests/bpf/progs/test_global_func5.c index cc55aedaf82d..257c0569ff98 100644 --- a/tools/testing/selftests/bpf/progs/test_global_func5.c +++ b/tools/testing/selftests/bpf/progs/test_global_func5.c @@ -26,7 +26,7 @@ int f3(int val, struct __sk_buff *skb) } SEC("tc") -__failure __msg("expected pointer to ctx, but got PTR") +__failure __msg("expects pointer to ctx") int global_func5(struct __sk_buff *skb) { return f1(skb) + f2(2, skb) + f3(3, skb); -- cgit v1.2.3 From aae9c25dda159045b223ecb471cd0729ccec8285 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Thu, 14 Dec 2023 17:13:32 -0800 Subject: libbpf: add __arg_xxx macros for annotating global func args Add a set of __arg_xxx macros which can be used to augment BPF global subprogs/functions with extra information for use by BPF verifier. Acked-by: Eduard Zingerman Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20231215011334.2307144-9-andrii@kernel.org Signed-off-by: Alexei Starovoitov --- tools/lib/bpf/bpf_helpers.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'tools') diff --git a/tools/lib/bpf/bpf_helpers.h b/tools/lib/bpf/bpf_helpers.h index 77ceea575dc7..2324cc42b017 100644 --- a/tools/lib/bpf/bpf_helpers.h +++ b/tools/lib/bpf/bpf_helpers.h @@ -188,6 +188,9 @@ enum libbpf_tristate { !!sym; \ }) +#define __arg_ctx __attribute__((btf_decl_tag("arg:ctx"))) +#define __arg_nonnull __attribute((btf_decl_tag("arg:nonnull"))) + #ifndef ___bpf_concat #define ___bpf_concat(a, b) a ## b #endif -- cgit v1.2.3 From 0a0ffcac92d5b41133c97d260ad1f320572783a5 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Thu, 14 Dec 2023 17:13:33 -0800 Subject: selftests/bpf: add global subprog annotation tests Add test cases to validate semantics of global subprog argument annotations: - non-null pointers; - context argument; - const dynptr passing; - packet pointers (data, metadata, end). Acked-by: Eduard Zingerman Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20231215011334.2307144-10-andrii@kernel.org Signed-off-by: Alexei Starovoitov --- .../selftests/bpf/progs/verifier_global_subprogs.c | 99 +++++++++++++++++++++- 1 file changed, 95 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/progs/verifier_global_subprogs.c b/tools/testing/selftests/bpf/progs/verifier_global_subprogs.c index bd696a431244..9eeb2d89cda8 100644 --- a/tools/testing/selftests/bpf/progs/verifier_global_subprogs.c +++ b/tools/testing/selftests/bpf/progs/verifier_global_subprogs.c @@ -1,12 +1,11 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */ -#include -#include -#include -#include +#include #include #include "bpf_misc.h" +#include "xdp_metadata.h" +#include "bpf_kfuncs.h" int arr[1]; int unkn_idx; @@ -98,4 +97,96 @@ int unguarded_unsupp_global_called(void) return global_unsupp(&x); } +long stack[128]; + +__weak int subprog_nullable_ptr_bad(int *p) +{ + return (*p) * 2; /* bad, missing null check */ +} + +SEC("?raw_tp") +__failure __log_level(2) +__msg("invalid mem access 'mem_or_null'") +int arg_tag_nullable_ptr_fail(void *ctx) +{ + int x = 42; + + return subprog_nullable_ptr_bad(&x); +} + +__noinline __weak int subprog_nonnull_ptr_good(int *p1 __arg_nonnull, int *p2 __arg_nonnull) +{ + return (*p1) * (*p2); /* good, no need for NULL checks */ +} + +int x = 47; + +SEC("?raw_tp") +__success __log_level(2) +int arg_tag_nonnull_ptr_good(void *ctx) +{ + int y = 74; + + return subprog_nonnull_ptr_good(&x, &y); +} + +/* this global subprog can be now called from many types of entry progs, each + * with different context type + */ +__weak int subprog_ctx_tag(void *ctx __arg_ctx) +{ + return bpf_get_stack(ctx, stack, sizeof(stack), 0); +} + +SEC("?raw_tp") +__success __log_level(2) +int arg_tag_ctx_raw_tp(void *ctx) +{ + return subprog_ctx_tag(ctx); +} + +SEC("?tp") +__success __log_level(2) +int arg_tag_ctx_tp(void *ctx) +{ + return subprog_ctx_tag(ctx); +} + +SEC("?kprobe") +__success __log_level(2) +int arg_tag_ctx_kprobe(void *ctx) +{ + return subprog_ctx_tag(ctx); +} + +__weak int subprog_dynptr(struct bpf_dynptr *dptr) +{ + long *d, t, buf[1] = {}; + + d = bpf_dynptr_data(dptr, 0, sizeof(long)); + if (!d) + return 0; + + t = *d + 1; + + d = bpf_dynptr_slice(dptr, 0, &buf, sizeof(long)); + if (!d) + return t; + + t = *d + 2; + + return t; +} + +SEC("?xdp") +__success __log_level(2) +int arg_tag_dynptr(struct xdp_md *ctx) +{ + struct bpf_dynptr dptr; + + bpf_dynptr_from_xdp(ctx, 0, &dptr); + + return subprog_dynptr(&dptr); +} + char _license[] SEC("license") = "GPL"; -- cgit v1.2.3 From f0a5056222f2cfa6d40b4c888cb6b01e8569e282 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Thu, 14 Dec 2023 17:13:34 -0800 Subject: selftests/bpf: add freplace of BTF-unreliable main prog test Add a test validating that freplace'ing another main (entry) BPF program fails if the target BPF program doesn't have valid/expected func proto BTF. We extend fexit_bpf2bpf test to allow to specify expected log message for negative test cases (where freplace program is expected to fail to load). Acked-by: Eduard Zingerman Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20231215011334.2307144-11-andrii@kernel.org Signed-off-by: Alexei Starovoitov --- .../selftests/bpf/prog_tests/fexit_bpf2bpf.c | 30 +++++++++++++++++++--- tools/testing/selftests/bpf/prog_tests/verifier.c | 2 ++ .../selftests/bpf/progs/freplace_unreliable_prog.c | 20 +++++++++++++++ .../bpf/progs/verifier_btf_unreliable_prog.c | 20 +++++++++++++++ 4 files changed, 69 insertions(+), 3 deletions(-) create mode 100644 tools/testing/selftests/bpf/progs/freplace_unreliable_prog.c create mode 100644 tools/testing/selftests/bpf/progs/verifier_btf_unreliable_prog.c (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c b/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c index 8ec73fdfcdab..f29fc789c14b 100644 --- a/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c +++ b/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c @@ -348,7 +348,8 @@ static void test_func_sockmap_update(void) } static void test_obj_load_failure_common(const char *obj_file, - const char *target_obj_file) + const char *target_obj_file, + const char *exp_msg) { /* * standalone test that asserts failure to load freplace prog @@ -356,6 +357,7 @@ static void test_obj_load_failure_common(const char *obj_file, */ struct bpf_object *obj = NULL, *pkt_obj; struct bpf_program *prog; + char log_buf[64 * 1024]; int err, pkt_fd; __u32 duration = 0; @@ -374,11 +376,21 @@ static void test_obj_load_failure_common(const char *obj_file, err = bpf_program__set_attach_target(prog, pkt_fd, NULL); ASSERT_OK(err, "set_attach_target"); + log_buf[0] = '\0'; + if (exp_msg) + bpf_program__set_log_buf(prog, log_buf, sizeof(log_buf)); + if (env.verbosity > VERBOSE_NONE) + bpf_program__set_log_level(prog, 2); + /* It should fail to load the program */ err = bpf_object__load(obj); + if (env.verbosity > VERBOSE_NONE && exp_msg) /* we overtook log */ + printf("VERIFIER LOG:\n================\n%s\n================\n", log_buf); if (CHECK(!err, "bpf_obj_load should fail", "err %d\n", err)) goto close_prog; + if (exp_msg) + ASSERT_HAS_SUBSTR(log_buf, exp_msg, "fail_msg"); close_prog: bpf_object__close(obj); bpf_object__close(pkt_obj); @@ -388,14 +400,24 @@ static void test_func_replace_return_code(void) { /* test invalid return code in the replaced program */ test_obj_load_failure_common("./freplace_connect_v4_prog.bpf.o", - "./connect4_prog.bpf.o"); + "./connect4_prog.bpf.o", NULL); } static void test_func_map_prog_compatibility(void) { /* test with spin lock map value in the replaced program */ test_obj_load_failure_common("./freplace_attach_probe.bpf.o", - "./test_attach_probe.bpf.o"); + "./test_attach_probe.bpf.o", NULL); +} + +static void test_func_replace_unreliable(void) +{ + /* freplace'ing unreliable main prog should fail with error + * "Cannot replace static functions" + */ + test_obj_load_failure_common("freplace_unreliable_prog.bpf.o", + "./verifier_btf_unreliable_prog.bpf.o", + "Cannot replace static functions"); } static void test_func_replace_global_func(void) @@ -563,6 +585,8 @@ void serial_test_fexit_bpf2bpf(void) test_func_replace_return_code(); if (test__start_subtest("func_map_prog_compatibility")) test_func_map_prog_compatibility(); + if (test__start_subtest("func_replace_unreliable")) + test_func_replace_unreliable(); if (test__start_subtest("func_replace_multi")) test_func_replace_multi(); if (test__start_subtest("fmod_ret_freplace")) diff --git a/tools/testing/selftests/bpf/prog_tests/verifier.c b/tools/testing/selftests/bpf/prog_tests/verifier.c index ac49ec25211d..d62c5bf00e71 100644 --- a/tools/testing/selftests/bpf/prog_tests/verifier.c +++ b/tools/testing/selftests/bpf/prog_tests/verifier.c @@ -14,6 +14,7 @@ #include "verifier_bpf_get_stack.skel.h" #include "verifier_bswap.skel.h" #include "verifier_btf_ctx_access.skel.h" +#include "verifier_btf_unreliable_prog.skel.h" #include "verifier_cfg.skel.h" #include "verifier_cgroup_inv_retcode.skel.h" #include "verifier_cgroup_skb.skel.h" @@ -125,6 +126,7 @@ void test_verifier_bounds_mix_sign_unsign(void) { RUN(verifier_bounds_mix_sign_u void test_verifier_bpf_get_stack(void) { RUN(verifier_bpf_get_stack); } void test_verifier_bswap(void) { RUN(verifier_bswap); } void test_verifier_btf_ctx_access(void) { RUN(verifier_btf_ctx_access); } +void test_verifier_btf_unreliable_prog(void) { RUN(verifier_btf_unreliable_prog); } void test_verifier_cfg(void) { RUN(verifier_cfg); } void test_verifier_cgroup_inv_retcode(void) { RUN(verifier_cgroup_inv_retcode); } void test_verifier_cgroup_skb(void) { RUN(verifier_cgroup_skb); } diff --git a/tools/testing/selftests/bpf/progs/freplace_unreliable_prog.c b/tools/testing/selftests/bpf/progs/freplace_unreliable_prog.c new file mode 100644 index 000000000000..624078abf3de --- /dev/null +++ b/tools/testing/selftests/bpf/progs/freplace_unreliable_prog.c @@ -0,0 +1,20 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2020 Facebook + +#include "vmlinux.h" +#include +#include + +SEC("freplace/btf_unreliable_kprobe") +/* context type is what BPF verifier expects for kprobe context, but target + * program has `stuct whatever *ctx` argument, so freplace operation will be + * rejected with the following message: + * + * arg0 replace_btf_unreliable_kprobe(struct pt_regs *) doesn't match btf_unreliable_kprobe(struct whatever *) + */ +int replace_btf_unreliable_kprobe(bpf_user_pt_regs_t *ctx) +{ + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/verifier_btf_unreliable_prog.c b/tools/testing/selftests/bpf/progs/verifier_btf_unreliable_prog.c new file mode 100644 index 000000000000..36e033a2e02c --- /dev/null +++ b/tools/testing/selftests/bpf/progs/verifier_btf_unreliable_prog.c @@ -0,0 +1,20 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2017 Facebook + +#include "vmlinux.h" +#include +#include +#include +#include "bpf_misc.h" + +struct whatever {}; + +SEC("kprobe") +__success __log_level(2) +/* context type is wrong, making it impossible to freplace this program */ +int btf_unreliable_kprobe(struct whatever *ctx) +{ + return 0; +} + +char _license[] SEC("license") = "GPL"; -- cgit v1.2.3 From 441c725ed592cb22f2a82f2827dccd045356cc81 Mon Sep 17 00:00:00 2001 From: Hou Tao Date: Tue, 19 Dec 2023 21:57:27 +0800 Subject: selftests/bpf: Close cgrp fd before calling cleanup_cgroup_environment() There is error log when htab-mem benchmark completes. The error log looks as follows: $ ./bench htab-mem -d1 Setting up benchmark 'htab-mem'... Benchmark 'htab-mem' started. ...... (cgroup_helpers.c:353: errno: Device or resource busy) umount cgroup2 Fix it by closing cgrp fd before invoking cleanup_cgroup_environment(). Signed-off-by: Hou Tao Link: https://lore.kernel.org/r/20231219135727.2661527-1-houtao@huaweicloud.com Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/benchs/bench_htab_mem.c | 1 + 1 file changed, 1 insertion(+) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/benchs/bench_htab_mem.c b/tools/testing/selftests/bpf/benchs/bench_htab_mem.c index 9146d3f414d2..926ee822143e 100644 --- a/tools/testing/selftests/bpf/benchs/bench_htab_mem.c +++ b/tools/testing/selftests/bpf/benchs/bench_htab_mem.c @@ -335,6 +335,7 @@ static void htab_mem_report_final(struct bench_res res[], int res_cnt) " peak memory usage %7.2lfMiB\n", loop_mean, loop_stddev, mem_mean, mem_stddev, peak_mem / 1048576.0); + close(ctx.fd); cleanup_cgroup_environment(); } -- cgit v1.2.3 From bd2dcb94c81e3408731d129e884954c36ef2f1fa Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Sun, 17 Dec 2023 10:32:43 +0200 Subject: selftests: bridge_mdb: Add MDB bulk deletion test Add test cases to verify the behavior of the MDB bulk deletion functionality in the bridge driver. Signed-off-by: Ido Schimmel Acked-by: Petr Machata Reviewed-by: Hangbin Liu Acked-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- .../testing/selftests/net/forwarding/bridge_mdb.sh | 191 ++++++++++++++++++++- 1 file changed, 189 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/net/forwarding/bridge_mdb.sh b/tools/testing/selftests/net/forwarding/bridge_mdb.sh index e4e3e9405056..61348f71728c 100755 --- a/tools/testing/selftests/net/forwarding/bridge_mdb.sh +++ b/tools/testing/selftests/net/forwarding/bridge_mdb.sh @@ -803,11 +803,198 @@ cfg_test_dump() cfg_test_dump_common "L2" l2_grps_get } +# Check flush functionality with different parameters. +cfg_test_flush() +{ + local num_entries + + # Add entries with different attributes and check that they are all + # flushed when the flush command is given with no parameters. + + # Different port. + bridge mdb add dev br0 port $swp1 grp 239.1.1.1 vid 10 + bridge mdb add dev br0 port $swp2 grp 239.1.1.2 vid 10 + + # Different VLAN ID. + bridge mdb add dev br0 port $swp1 grp 239.1.1.3 vid 10 + bridge mdb add dev br0 port $swp1 grp 239.1.1.4 vid 20 + + # Different routing protocol. + bridge mdb add dev br0 port $swp1 grp 239.1.1.5 vid 10 proto bgp + bridge mdb add dev br0 port $swp1 grp 239.1.1.6 vid 10 proto zebra + + # Different state. + bridge mdb add dev br0 port $swp1 grp 239.1.1.7 vid 10 permanent + bridge mdb add dev br0 port $swp1 grp 239.1.1.8 vid 10 temp + + bridge mdb flush dev br0 + num_entries=$(bridge mdb show dev br0 | wc -l) + [[ $num_entries -eq 0 ]] + check_err $? 0 "Not all entries flushed after flush all" + + # Check that when flushing by port only entries programmed with the + # specified port are flushed and the rest are not. + + bridge mdb add dev br0 port $swp1 grp 239.1.1.1 vid 10 + bridge mdb add dev br0 port $swp2 grp 239.1.1.1 vid 10 + bridge mdb add dev br0 port br0 grp 239.1.1.1 vid 10 + + bridge mdb flush dev br0 port $swp1 + + bridge mdb get dev br0 grp 239.1.1.1 vid 10 | grep -q "port $swp1" + check_fail $? "Entry not flushed by specified port" + bridge mdb get dev br0 grp 239.1.1.1 vid 10 | grep -q "port $swp2" + check_err $? "Entry flushed by wrong port" + bridge mdb get dev br0 grp 239.1.1.1 vid 10 | grep -q "port br0" + check_err $? "Host entry flushed by wrong port" + + bridge mdb flush dev br0 port br0 + + bridge mdb get dev br0 grp 239.1.1.1 vid 10 | grep -q "port br0" + check_fail $? "Host entry not flushed by specified port" + + bridge mdb flush dev br0 + + # Check that when flushing by VLAN ID only entries programmed with the + # specified VLAN ID are flushed and the rest are not. + + bridge mdb add dev br0 port $swp1 grp 239.1.1.1 vid 10 + bridge mdb add dev br0 port $swp2 grp 239.1.1.1 vid 10 + bridge mdb add dev br0 port $swp1 grp 239.1.1.1 vid 20 + bridge mdb add dev br0 port $swp2 grp 239.1.1.1 vid 20 + + bridge mdb flush dev br0 vid 10 + + bridge mdb get dev br0 grp 239.1.1.1 vid 10 &> /dev/null + check_fail $? "Entry not flushed by specified VLAN ID" + bridge mdb get dev br0 grp 239.1.1.1 vid 20 &> /dev/null + check_err $? "Entry flushed by wrong VLAN ID" + + bridge mdb flush dev br0 + + # Check that all permanent entries are flushed when "permanent" is + # specified and that temporary entries are not. + + bridge mdb add dev br0 port $swp1 grp 239.1.1.1 permanent vid 10 + bridge mdb add dev br0 port $swp2 grp 239.1.1.1 temp vid 10 + + bridge mdb flush dev br0 permanent + + bridge mdb get dev br0 grp 239.1.1.1 vid 10 | grep -q "port $swp1" + check_fail $? "Entry not flushed by \"permanent\" state" + bridge mdb get dev br0 grp 239.1.1.1 vid 10 | grep -q "port $swp2" + check_err $? "Entry flushed by wrong state (\"permanent\")" + + bridge mdb flush dev br0 + + # Check that all temporary entries are flushed when "nopermanent" is + # specified and that permanent entries are not. + + bridge mdb add dev br0 port $swp1 grp 239.1.1.1 permanent vid 10 + bridge mdb add dev br0 port $swp2 grp 239.1.1.1 temp vid 10 + + bridge mdb flush dev br0 nopermanent + + bridge mdb get dev br0 grp 239.1.1.1 vid 10 | grep -q "port $swp1" + check_err $? "Entry flushed by wrong state (\"nopermanent\")" + bridge mdb get dev br0 grp 239.1.1.1 vid 10 | grep -q "port $swp2" + check_fail $? "Entry not flushed by \"nopermanent\" state" + + bridge mdb flush dev br0 + + # Check that L2 host entries are not flushed when "nopermanent" is + # specified, but flushed when "permanent" is specified. + + bridge mdb add dev br0 port br0 grp 01:02:03:04:05:06 permanent vid 10 + + bridge mdb flush dev br0 nopermanent + + bridge mdb get dev br0 grp 01:02:03:04:05:06 vid 10 &> /dev/null + check_err $? "L2 host entry flushed by wrong state (\"nopermanent\")" + + bridge mdb flush dev br0 permanent + + bridge mdb get dev br0 grp 01:02:03:04:05:06 vid 10 &> /dev/null + check_fail $? "L2 host entry not flushed by \"permanent\" state" + + bridge mdb flush dev br0 + + # Check that IPv4 host entries are not flushed when "permanent" is + # specified, but flushed when "nopermanent" is specified. + + bridge mdb add dev br0 port br0 grp 239.1.1.1 temp vid 10 + + bridge mdb flush dev br0 permanent + + bridge mdb get dev br0 grp 239.1.1.1 vid 10 &> /dev/null + check_err $? "IPv4 host entry flushed by wrong state (\"permanent\")" + + bridge mdb flush dev br0 nopermanent + + bridge mdb get dev br0 grp 239.1.1.1 vid 10 &> /dev/null + check_fail $? "IPv4 host entry not flushed by \"nopermanent\" state" + + bridge mdb flush dev br0 + + # Check that IPv6 host entries are not flushed when "permanent" is + # specified, but flushed when "nopermanent" is specified. + + bridge mdb add dev br0 port br0 grp ff0e::1 temp vid 10 + + bridge mdb flush dev br0 permanent + + bridge mdb get dev br0 grp ff0e::1 vid 10 &> /dev/null + check_err $? "IPv6 host entry flushed by wrong state (\"permanent\")" + + bridge mdb flush dev br0 nopermanent + + bridge mdb get dev br0 grp ff0e::1 vid 10 &> /dev/null + check_fail $? "IPv6 host entry not flushed by \"nopermanent\" state" + + bridge mdb flush dev br0 + + # Check that when flushing by routing protocol only entries programmed + # with the specified routing protocol are flushed and the rest are not. + + bridge mdb add dev br0 port $swp1 grp 239.1.1.1 vid 10 proto bgp + bridge mdb add dev br0 port $swp2 grp 239.1.1.1 vid 10 proto zebra + bridge mdb add dev br0 port br0 grp 239.1.1.1 vid 10 + + bridge mdb flush dev br0 proto bgp + + bridge mdb get dev br0 grp 239.1.1.1 vid 10 | grep -q "port $swp1" + check_fail $? "Entry not flushed by specified routing protocol" + bridge mdb get dev br0 grp 239.1.1.1 vid 10 | grep -q "port $swp2" + check_err $? "Entry flushed by wrong routing protocol" + bridge mdb get dev br0 grp 239.1.1.1 vid 10 | grep -q "port br0" + check_err $? "Host entry flushed by wrong routing protocol" + + bridge mdb flush dev br0 + + # Test that an error is returned when trying to flush using unsupported + # parameters. + + bridge mdb flush dev br0 src_vni 10 &> /dev/null + check_fail $? "Managed to flush by source VNI" + + bridge mdb flush dev br0 dst 198.51.100.1 &> /dev/null + check_fail $? "Managed to flush by destination IP" + + bridge mdb flush dev br0 dst_port 4789 &> /dev/null + check_fail $? "Managed to flush by UDP destination port" + + bridge mdb flush dev br0 vni 10 &> /dev/null + check_fail $? "Managed to flush by destination VNI" + + log_test "Flush tests" +} + cfg_test() { cfg_test_host cfg_test_port cfg_test_dump + cfg_test_flush } __fwd_test_host_ip() @@ -1166,8 +1353,8 @@ ctrl_test() ctrl_mldv2_is_in_test } -if ! bridge mdb help 2>&1 | grep -q "get"; then - echo "SKIP: iproute2 too old, missing bridge mdb get support" +if ! bridge mdb help 2>&1 | grep -q "flush"; then + echo "SKIP: iproute2 too old, missing bridge mdb flush support" exit $ksft_skip fi -- cgit v1.2.3 From c3e87a7fcd0bb5820ca6db9b385bbfacb556d083 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Sun, 17 Dec 2023 10:32:44 +0200 Subject: selftests: vxlan_mdb: Add MDB bulk deletion test Add test cases to verify the behavior of the MDB bulk deletion functionality in the VXLAN driver. Signed-off-by: Ido Schimmel Acked-by: Petr Machata Acked-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- tools/testing/selftests/net/test_vxlan_mdb.sh | 201 +++++++++++++++++++++++++- 1 file changed, 199 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/net/test_vxlan_mdb.sh b/tools/testing/selftests/net/test_vxlan_mdb.sh index 6725fd9157b9..84a05a9e46d8 100755 --- a/tools/testing/selftests/net/test_vxlan_mdb.sh +++ b/tools/testing/selftests/net/test_vxlan_mdb.sh @@ -79,6 +79,7 @@ CONTROL_PATH_TESTS=" dump_ipv6_ipv4 dump_ipv4_ipv6 dump_ipv6_ipv6 + flush " DATA_PATH_TESTS=" @@ -968,6 +969,202 @@ dump_ipv6_ipv6() dump_common $ns1 $local_addr $remote_prefix $fn } +flush() +{ + local num_entries + + echo + echo "Control path: Flush" + echo "-------------------" + + # Add entries with different attributes and check that they are all + # flushed when the flush command is given with no parameters. + + # Different source VNI. + run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.1 permanent dst 198.51.100.1 src_vni 10010" + run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.2 permanent dst 198.51.100.1 src_vni 10011" + + # Different routing protocol. + run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.3 permanent proto bgp dst 198.51.100.1 src_vni 10010" + run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.4 permanent proto zebra dst 198.51.100.1 src_vni 10010" + + # Different destination IP. + run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.5 permanent dst 198.51.100.1 src_vni 10010" + run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.6 permanent dst 198.51.100.2 src_vni 10010" + + # Different destination port. + run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.7 permanent dst 198.51.100.1 dst_port 11111 src_vni 10010" + run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.8 permanent dst 198.51.100.1 dst_port 22222 src_vni 10010" + + # Different VNI. + run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.9 permanent dst 198.51.100.1 vni 10010 src_vni 10010" + run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.10 permanent dst 198.51.100.1 vni 10020 src_vni 10010" + + run_cmd "bridge -n $ns1_v4 mdb flush dev vx0" + num_entries=$(bridge -n $ns1_v4 mdb show dev vx0 | wc -l) + [[ $num_entries -eq 0 ]] + log_test $? 0 "Flush all" + + # Check that entries are flushed when port is specified as the VXLAN + # device and that an error is returned when port is specified as a + # different net device. + + run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.1 permanent dst 198.51.100.1 src_vni 10010" + run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.1 permanent dst 198.51.100.2 src_vni 10010" + + run_cmd "bridge -n $ns1_v4 mdb flush dev vx0 port vx0" + run_cmd "bridge -n $ns1_v4 -d -s mdb get dev vx0 grp 239.1.1.1 src_vni 10010" + log_test $? 254 "Flush by port" + + run_cmd "bridge -n $ns1_v4 mdb flush dev vx0 port veth0" + log_test $? 255 "Flush by wrong port" + + # Check that when flushing by source VNI only entries programmed with + # the specified source VNI are flushed and the rest are not. + + run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.1 permanent dst 198.51.100.1 src_vni 10010" + run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.1 permanent dst 198.51.100.2 src_vni 10010" + run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.1 permanent dst 198.51.100.1 src_vni 10011" + run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.1 permanent dst 198.51.100.2 src_vni 10011" + + run_cmd "bridge -n $ns1_v4 mdb flush dev vx0 src_vni 10010" + + run_cmd "bridge -n $ns1_v4 -d -s mdb get dev vx0 grp 239.1.1.1 src_vni 10010" + log_test $? 254 "Flush by specified source VNI" + run_cmd "bridge -n $ns1_v4 -d -s mdb get dev vx0 grp 239.1.1.1 src_vni 10011" + log_test $? 0 "Flush by unspecified source VNI" + + run_cmd "bridge -n $ns1_v4 mdb flush dev vx0" + + # Check that all entries are flushed when "permanent" is specified and + # that an error is returned when "nopermanent" is specified. + + run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.1 permanent dst 198.51.100.1 src_vni 10010" + run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.1 permanent dst 198.51.100.2 src_vni 10010" + + run_cmd "bridge -n $ns1_v4 mdb flush dev vx0 permanent" + run_cmd "bridge -n $ns1_v4 -d -s mdb get dev vx0 grp 239.1.1.1 src_vni 10010" + log_test $? 254 "Flush by \"permanent\" state" + + run_cmd "bridge -n $ns1_v4 mdb flush dev vx0 nopermanent" + log_test $? 255 "Flush by \"nopermanent\" state" + + # Check that when flushing by routing protocol only entries programmed + # with the specified routing protocol are flushed and the rest are not. + + run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.1 permanent proto bgp dst 198.51.100.1 src_vni 10010" + run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.1 permanent proto zebra dst 198.51.100.2 src_vni 10010" + + run_cmd "bridge -n $ns1_v4 mdb flush dev vx0 proto bgp" + + run_cmd "bridge -n $ns1_v4 -d -s mdb get dev vx0 grp 239.1.1.1 src_vni 10010 | grep \"proto bgp\"" + log_test $? 1 "Flush by specified routing protocol" + run_cmd "bridge -n $ns1_v4 -d -s mdb get dev vx0 grp 239.1.1.1 src_vni 10010 | grep \"proto zebra\"" + log_test $? 0 "Flush by unspecified routing protocol" + + run_cmd "bridge -n $ns1_v4 mdb flush dev vx0" + + # Check that when flushing by destination IP only entries programmed + # with the specified destination IP are flushed and the rest are not. + + # IPv4. + + run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.1 permanent dst 198.51.100.1 src_vni 10010" + run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.1 permanent dst 198.51.100.2 src_vni 10010" + + run_cmd "bridge -n $ns1_v4 mdb flush dev vx0 dst 198.51.100.2" + + run_cmd "bridge -n $ns1_v4 -d -s mdb get dev vx0 grp 239.1.1.1 src_vni 10010 | grep 198.51.100.2" + log_test $? 1 "Flush by specified destination IP - IPv4" + run_cmd "bridge -n $ns1_v4 -d -s mdb get dev vx0 grp 239.1.1.1 src_vni 10010 | grep 198.51.100.1" + log_test $? 0 "Flush by unspecified destination IP - IPv4" + + run_cmd "bridge -n $ns1_v4 mdb flush dev vx0" + + # IPv6. + + run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.1 permanent dst 2001:db8:1000::1 src_vni 10010" + run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.1 permanent dst 2001:db8:1000::2 src_vni 10010" + + run_cmd "bridge -n $ns1_v4 mdb flush dev vx0 dst 2001:db8:1000::2" + + run_cmd "bridge -n $ns1_v4 -d -s mdb get dev vx0 grp 239.1.1.1 src_vni 10010 | grep 2001:db8:1000::2" + log_test $? 1 "Flush by specified destination IP - IPv6" + run_cmd "bridge -n $ns1_v4 -d -s mdb get dev vx0 grp 239.1.1.1 src_vni 10010 | grep 2001:db8:1000::1" + log_test $? 0 "Flush by unspecified destination IP - IPv6" + + run_cmd "bridge -n $ns1_v4 mdb flush dev vx0" + + # Check that when flushing by UDP destination port only entries + # programmed with the specified port are flushed and the rest are not. + + run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.1 permanent dst_port 11111 dst 198.51.100.1 src_vni 10010" + run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.1 permanent dst_port 22222 dst 198.51.100.2 src_vni 10010" + + run_cmd "bridge -n $ns1_v4 mdb flush dev vx0 dst_port 11111" + + run_cmd "bridge -n $ns1_v4 -d -s mdb get dev vx0 grp 239.1.1.1 src_vni 10010 | grep \"dst_port 11111\"" + log_test $? 1 "Flush by specified UDP destination port" + run_cmd "bridge -n $ns1_v4 -d -s mdb get dev vx0 grp 239.1.1.1 src_vni 10010 | grep \"dst_port 22222\"" + log_test $? 0 "Flush by unspecified UDP destination port" + + run_cmd "bridge -n $ns1_v4 mdb flush dev vx0" + + # When not specifying a UDP destination port for an entry, traffic is + # encapsulated with the device's UDP destination port. Check that when + # flushing by the device's UDP destination port only entries programmed + # with this port are flushed and the rest are not. + + run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.1 permanent dst 198.51.100.1 src_vni 10010" + run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.1 permanent dst_port 22222 dst 198.51.100.2 src_vni 10010" + + run_cmd "bridge -n $ns1_v4 mdb flush dev vx0 dst_port 4789" + + run_cmd "bridge -n $ns1_v4 -d -s mdb get dev vx0 grp 239.1.1.1 src_vni 10010 | grep 198.51.100.1" + log_test $? 1 "Flush by device's UDP destination port" + run_cmd "bridge -n $ns1_v4 -d -s mdb get dev vx0 grp 239.1.1.1 src_vni 10010 | grep 198.51.100.2" + log_test $? 0 "Flush by unspecified UDP destination port" + + run_cmd "bridge -n $ns1_v4 mdb flush dev vx0" + + # Check that when flushing by destination VNI only entries programmed + # with the specified destination VNI are flushed and the rest are not. + + run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.1 permanent vni 20010 dst 198.51.100.1 src_vni 10010" + run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.1 permanent vni 20011 dst 198.51.100.2 src_vni 10010" + + run_cmd "bridge -n $ns1_v4 mdb flush dev vx0 vni 20010" + + run_cmd "bridge -n $ns1_v4 -d -s mdb get dev vx0 grp 239.1.1.1 src_vni 10010 | grep \" vni 20010\"" + log_test $? 1 "Flush by specified destination VNI" + run_cmd "bridge -n $ns1_v4 -d -s mdb get dev vx0 grp 239.1.1.1 src_vni 10010 | grep \" vni 20011\"" + log_test $? 0 "Flush by unspecified destination VNI" + + run_cmd "bridge -n $ns1_v4 mdb flush dev vx0" + + # When not specifying a destination VNI for an entry, traffic is + # encapsulated with the source VNI. Check that when flushing by a + # destination VNI that is equal to the source VNI only such entries are + # flushed and the rest are not. + + run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.1 permanent dst 198.51.100.1 src_vni 10010" + run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.1 permanent vni 20010 dst 198.51.100.2 src_vni 10010" + + run_cmd "bridge -n $ns1_v4 mdb flush dev vx0 vni 10010" + + run_cmd "bridge -n $ns1_v4 -d -s mdb get dev vx0 grp 239.1.1.1 src_vni 10010 | grep 198.51.100.1" + log_test $? 1 "Flush by destination VNI equal to source VNI" + run_cmd "bridge -n $ns1_v4 -d -s mdb get dev vx0 grp 239.1.1.1 src_vni 10010 | grep 198.51.100.2" + log_test $? 0 "Flush by unspecified destination VNI" + + run_cmd "bridge -n $ns1_v4 mdb flush dev vx0" + + # Test that an error is returned when trying to flush using VLAN ID. + + run_cmd "bridge -n $ns1_v4 mdb flush dev vx0 vid 10" + log_test $? 255 "Flush by VLAN ID" +} + ################################################################################ # Tests - Data path @@ -2292,9 +2489,9 @@ if [ ! -x "$(command -v jq)" ]; then exit $ksft_skip fi -bridge mdb help 2>&1 | grep -q "get" +bridge mdb help 2>&1 | grep -q "flush" if [ $? -ne 0 ]; then - echo "SKIP: iproute2 bridge too old, missing VXLAN MDB get support" + echo "SKIP: iproute2 bridge too old, missing VXLAN MDB flush support" exit $ksft_skip fi -- cgit v1.2.3 From e43c64c971e48d11ee100c5a8b2eadbed056f924 Mon Sep 17 00:00:00 2001 From: Veronika Molnarova Date: Tue, 12 Dec 2023 17:59:09 +0100 Subject: perf archive: Add new option '--unpack' to expand tarballs Archives generated by the command 'perf archive' have to be unpacked manually. Following the addition of option '--all' now there also exist a nested structure of tars, and after further discussion with Red Hat Global Support Services, they found a feature correctly unpacking archives of 'perf archive' convenient. Option '--unpack' of 'perf archive' unpacks archives generated by the command 'perf archive' as well as archives generated when used with option '--all'. The 'perf.data' file is placed in the current directory, while debug symbols are unpacked in '~/.debug' directory. A tar filename can be passed as an argument, and if not provided the command tries to find a viable perf.tar file for unpacking. Signed-off-by: Veronika Molnarova Tested-by: Arnaldo Carvalho de Melo Cc: Michael Petlan Link: https://lore.kernel.org/r/20231212165909.14459-2-vmolnaro@redhat.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/perf-archive.sh | 66 ++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 58 insertions(+), 8 deletions(-) (limited to 'tools') diff --git a/tools/perf/perf-archive.sh b/tools/perf/perf-archive.sh index a92042eae95a..f94795794b36 100755 --- a/tools/perf/perf-archive.sh +++ b/tools/perf/perf-archive.sh @@ -7,17 +7,72 @@ PERF_DATA=perf.data PERF_SYMBOLS=perf.symbols PERF_ALL=perf.all ALL=0 +UNPACK=0 while [ $# -gt 0 ] ; do if [ $1 == "--all" ]; then ALL=1 shift + elif [ $1 == "--unpack" ]; then + UNPACK=1 + shift else PERF_DATA=$1 + UNPACK_TAR=$1 shift fi done +if [ $UNPACK -eq 1 ]; then + if [ ! -z "$UNPACK_TAR" ]; then # tar given as an argument + if [ ! -e "$UNPACK_TAR" ]; then + echo "Provided file $UNPACK_TAR does not exist" + exit 1 + fi + TARGET="$UNPACK_TAR" + else # search for perf tar in the current directory + TARGET=`find . -regex "\./perf.*\.tar\.bz2"` + TARGET_NUM=`echo -n "$TARGET" | grep -c '^'` + + if [ -z "$TARGET" -o $TARGET_NUM -gt 1 ]; then + echo -e "Error: $TARGET_NUM files found for unpacking:\n$TARGET" + echo "Provide the requested file as an argument" + exit 1 + else + echo "Found target file for unpacking: $TARGET" + fi + fi + + if [[ "$TARGET" =~ (\./)?$PERF_ALL.*.tar.bz2 ]]; then # perf tar generated by --all option + TAR_CONTENTS=`tar tvf "$TARGET" | tr -s " " | cut -d " " -f 6` + VALID_TAR=`echo "$TAR_CONTENTS" | grep "$PERF_SYMBOLS.tar.bz2" | wc -l` # check if it contains a sub-tar perf.symbols + if [ $VALID_TAR -ne 1 ]; then + echo "Error: $TARGET file is not valid (contains zero or multiple sub-tar files with debug symbols)" + exit 1 + fi + + INTERSECT=`comm -12 <(ls) <(echo "$TAR_CONTENTS") | tr "\n" " "` # check for overwriting + if [ ! -z "$INTERSECT" ]; then # prompt if file(s) already exist in the current directory + echo "File(s) ${INTERSECT::-1} already exist in the current directory." + while true; do + read -p 'Do you wish to overwrite them? ' yn + case $yn in + [Yy]* ) break;; + [Nn]* ) exit 1;; + * ) echo "Please answer yes or no.";; + esac + done + fi + + # unzip the perf.data file in the current working directory and debug symbols in ~/.debug directory + tar xvf $TARGET && tar xvf $PERF_SYMBOLS.tar.bz2 -C ~/.debug + + else # perf tar generated by perf archive (contains only debug symbols) + tar xvf $TARGET -C ~/.debug + fi + exit 0 +fi + # # PERF_BUILDID_DIR environment variable set by perf # path to buildid directory, default to $HOME/.debug @@ -55,17 +110,12 @@ if [ $ALL -eq 1 ]; then # pack perf.data file together with tar containing tar cjf $PERF_SYMBOLS.tar.bz2 -C $PERF_BUILDID_DIR -T $MANIFEST tar cjf $PERF_ALL-$HOSTNAME-$DATE.tar.bz2 $PERF_DATA $PERF_SYMBOLS.tar.bz2 rm $PERF_SYMBOLS.tar.bz2 $MANIFEST $BUILDIDS || true - - echo -e "Now please run:\n" - echo -e "$ tar xvf $PERF_ALL-$HOSTNAME-$DATE.tar.bz2 && tar xvf $PERF_SYMBOLS.tar.bz2 -C ~/.debug\n" - echo "wherever you need to run 'perf report' on." else # pack only the debug symbols tar cjf $PERF_DATA.tar.bz2 -C $PERF_BUILDID_DIR -T $MANIFEST rm $MANIFEST $BUILDIDS || true - - echo -e "Now please run:\n" - echo -e "$ tar xvf $PERF_DATA.tar.bz2 -C ~/.debug\n" - echo "wherever you need to run 'perf report' on." fi +echo -e "Now please run:\n" +echo -e "$ perf archive --unpack\n" +echo "or unpack the tar manually wherever you need to run 'perf report' on." exit 0 -- cgit v1.2.3 From 0647537df442e0ec818fc0bca347f13c11268202 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 19 Dec 2023 13:30:15 +0000 Subject: tools/counter: Fix spelling mistake "componend" -> "component" There are two spelling mistakes in the help text. Fix them. Signed-off-by: Colin Ian King Reviewed-by: Fabrice Gasnier Link: https://lore.kernel.org/r/20231219133015.365943-1-colin.i.king@gmail.com Signed-off-by: William Breathitt Gray --- tools/counter/counter_watch_events.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/counter/counter_watch_events.c b/tools/counter/counter_watch_events.c index 3898fe7e35ec..37d1b4b3e63d 100644 --- a/tools/counter/counter_watch_events.c +++ b/tools/counter/counter_watch_events.c @@ -120,8 +120,8 @@ static void print_usage(void) " evt_capture (COUNTER_EVENT_CAPTURE)\n" "\n" " chan= channel for this watch [default: 0]\n" - " id= componend id for this watch [default: 0]\n" - " parent= componend parent for this watch [default: 0]\n" + " id= component id for this watch [default: 0]\n" + " parent= component parent for this watch [default: 0]\n" "\n" "Example with two watched events:\n\n" "counter_watch_events -d \\\n" -- cgit v1.2.3 From b7760cf94d4f2665bf40d08dd69aa5d0b4aa593f Mon Sep 17 00:00:00 2001 From: Yang Li Date: Wed, 20 Dec 2023 08:51:43 +0800 Subject: tools/counter: Remove unneeded semicolon ./tools/counter/counter_watch_events.c:233:3-4: Unneeded semicolon ./tools/counter/counter_watch_events.c:234:2-3: Unneeded semicolon ./tools/counter/counter_watch_events.c:333:2-3: Unneeded semicolon Reported-by: Abaci Robot Closes: https://bugzilla.openanolis.cn/show_bug.cgi?id=7782 Signed-off-by: Yang Li Reviewed-by: Fabrice Gasnier Link: https://lore.kernel.org/r/20231220005143.84987-1-yang.lee@linux.alibaba.com Signed-off-by: William Breathitt Gray --- tools/counter/counter_watch_events.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/counter/counter_watch_events.c b/tools/counter/counter_watch_events.c index 37d1b4b3e63d..107631e0f2e3 100644 --- a/tools/counter/counter_watch_events.c +++ b/tools/counter/counter_watch_events.c @@ -230,8 +230,8 @@ int main(int argc, char **argv) break; default: return EXIT_FAILURE; - }; - }; + } + } if (nwatch) { watches = calloc(nwatch, sizeof(*watches)); @@ -330,7 +330,7 @@ int main(int argc, char **argv) i++; break; } - }; + } if (debug) print_watch(watches, nwatch); -- cgit v1.2.3 From c344675ad267040b1794b0b5d85147a5023c548d Mon Sep 17 00:00:00 2001 From: Ruidong Tian Date: Thu, 14 Dec 2023 20:33:03 +0800 Subject: perf scripts python arm-cs-trace-disasm.py: Set start vm addr of exectable file to 0 For exectable ELF file, which e_type is ET_EXEC, dso start address is a absolute address other than offset. Just set vm_start to zero when dso start is 0x400000, which means it is a exectable file. Reviewed-by: James Clark Signed-off-by: Ruidong Tian Cc: Adrian Hunter Cc: Al Grant Cc: Alexander Shishkin Cc: Leo Yan Cc: Mathieu Poirier Cc: Mike Leach Cc: Suzuki Poulouse Cc: Tor Jeremiassen Link: https://lore.kernel.org/r/20231214123304.34087-3-tianruidong@linux.alibaba.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/scripts/python/arm-cs-trace-disasm.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/perf/scripts/python/arm-cs-trace-disasm.py b/tools/perf/scripts/python/arm-cs-trace-disasm.py index de58991c78bb..e33aee5c78d7 100755 --- a/tools/perf/scripts/python/arm-cs-trace-disasm.py +++ b/tools/perf/scripts/python/arm-cs-trace-disasm.py @@ -258,8 +258,9 @@ def process_event(param_dict): if (options.objdump_name != None): # It doesn't need to decrease virtual memory offset for disassembly - # for kernel dso, so in this case we set vm_start to zero. - if (dso == "[kernel.kallsyms]"): + # for kernel dso and executable file dso, so in this case we set + # vm_start to zero. + if (dso == "[kernel.kallsyms]" or dso_start == 0x400000): dso_vm_start = 0 else: dso_vm_start = int(dso_start) -- cgit v1.2.3 From 2d98dbb4c9c5b09c8d2411581ab17921f872dbd3 Mon Sep 17 00:00:00 2001 From: Ruidong Tian Date: Thu, 14 Dec 2023 20:33:04 +0800 Subject: perf scripts python arm-cs-trace-disasm.py: Do not ignore disam first sample arm-cs-trace-disasm ignore disam the first branch sample, For example as follow, the instructions beteween 0x0000ffffae878750 and 0x0000ffffae878754 is lose: ARM CoreSight Trace Data Assembler Dump Event type: branches:uH Sample = { cpu: 0000 addr: 0x0000ffffae878750 phys_addr: 0x0000000000000000 ip: 0x0000000000000000 pid: 4003489 tid: 4003489 period: 1 time: 26765151766034 } Event type: branches:uH Sample = { cpu: 0000 addr: 0x0000000000000000 phys_addr: 0x0000000000000000 ip: 0x0000ffffae878754 pid: 4003489 tid: 4003489 period: 1 time: 26765151766034 } Initialize cpu_data earlier to fix it: ARM CoreSight Trace Data Assembler Dump Event type: branches:uH Sample = { cpu: 0000 addr: 0x0000000000000000 phys_addr: 0x0000000000000000 ip: 0x0000ffffae878754 pid: 4003489 tid: 4003489 period: 1 time: 26765151766034 } 0000000000028740 : (base address is 0x0000ffffae850000) 28750: b13ffc1f cmn x0, #4095 28754: 54000042 b.hs 0x2875c test 4003489/4003489 [0000] 26765.151766034 __GI___ioctl+0x14 /usr/lib64/libc-2.32.so Event type: branches:uH Sample = { cpu: 0000 addr: 0x0000ffffa67535ac phys_addr: 0x0000000000000000 ip: 0x0000000000000000 pid: 4003489 tid: 4003489 period: 1 time: 26765151766034 } Reviewed-by: James Clark Signed-off-by: Ruidong Tian Cc: Adrian Hunter Cc: Al Grant Cc: Alexander Shishkin Cc: Leo Yan Cc: Mathieu Poirier Cc: Mike Leach Cc: Suzuki Poulouse Cc: Tor Jeremiassen Link: https://lore.kernel.org/r/20231214123304.34087-4-tianruidong@linux.alibaba.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/scripts/python/arm-cs-trace-disasm.py | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) (limited to 'tools') diff --git a/tools/perf/scripts/python/arm-cs-trace-disasm.py b/tools/perf/scripts/python/arm-cs-trace-disasm.py index e33aee5c78d7..d973c2baed1c 100755 --- a/tools/perf/scripts/python/arm-cs-trace-disasm.py +++ b/tools/perf/scripts/python/arm-cs-trace-disasm.py @@ -188,6 +188,17 @@ def process_event(param_dict): dso_end = get_optional(param_dict, "dso_map_end") symbol = get_optional(param_dict, "symbol") + cpu = sample["cpu"] + ip = sample["ip"] + addr = sample["addr"] + + # Initialize CPU data if it's empty, and directly return back + # if this is the first tracing event for this CPU. + if (cpu_data.get(str(cpu) + 'addr') == None): + cpu_data[str(cpu) + 'addr'] = addr + return + + if (options.verbose == True): print("Event type: %s" % name) print_sample(sample) @@ -209,16 +220,6 @@ def process_event(param_dict): if (name[0:8] != "branches"): return - cpu = sample["cpu"] - ip = sample["ip"] - addr = sample["addr"] - - # Initialize CPU data if it's empty, and directly return back - # if this is the first tracing event for this CPU. - if (cpu_data.get(str(cpu) + 'addr') == None): - cpu_data[str(cpu) + 'addr'] = addr - return - # The format for packet is: # # +------------+------------+------------+ -- cgit v1.2.3 From 16f533ade706d33e60324ff32e526bda20bccbd9 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Wed, 6 Dec 2023 17:16:45 -0800 Subject: perf unwind: Use function to add missing maps lock Switch read_unwind_spec_eh_frame() from loop macro maps__for_each_entry() to maps__for_each_map() function that takes a callback. The function holds the maps lock, which should be held during iteration. Committer notes: Fixed up conflict with: 4fb54994b2360ab5 ("perf unwind-libunwind: Fix base address for .eh_frame") Signed-off-by: Ian Rogers Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Andi Kleen Cc: Athira Rajeev Cc: German Gomez Cc: James Clark Cc: Jiri Olsa Cc: Kajol Jain Cc: Kan Liang Cc: Leo Yan Cc: Mark Rutland Cc: Masami Hiramatsu Cc: Namhyung Kim Cc: Nick Terrell Cc: Paolo Bonzini Cc: Peter Zijlstra Cc: Ravi Bangoria Cc: Sandipan Das Cc: Steinar H. Gunderson Cc: Wenyu Liu Cc: Yang Jihong Cc: changbin du Cc: colin ian king Cc: dmitrii dolgov <9erthalion6@gmail.com> Cc: guilherme amadio Cc: huacai chen Cc: k prateek nayak Cc: li dong Cc: liam howlett Cc: miguel ojeda Cc: ming wang Cc: sean christopherson Cc: vincent whitchurch Link: http://lore.kernel.org/lkml/20231207011722.1220634-12-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/unwind-libunwind-local.c | 34 ++++++++++++++++++++++---------- 1 file changed, 24 insertions(+), 10 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/unwind-libunwind-local.c b/tools/perf/util/unwind-libunwind-local.c index 5e5c3395a499..dac536e28360 100644 --- a/tools/perf/util/unwind-libunwind-local.c +++ b/tools/perf/util/unwind-libunwind-local.c @@ -302,12 +302,31 @@ static int unwind_spec_ehframe(struct dso *dso, struct machine *machine, return 0; } +struct read_unwind_spec_eh_frame_maps_cb_args { + struct dso *dso; + u64 base_addr; +}; + +static int read_unwind_spec_eh_frame_maps_cb(struct map *map, void *data) +{ + + struct read_unwind_spec_eh_frame_maps_cb_args *args = data; + + if (map__dso(map) == args->dso && map__start(map) - map__pgoff(map) < args->base_addr) + args->base_addr = map__start(map) - map__pgoff(map); + + return 0; +} + + static int read_unwind_spec_eh_frame(struct dso *dso, struct unwind_info *ui, u64 *table_data, u64 *segbase, u64 *fde_count) { - struct map_rb_node *map_node; - u64 base_addr = UINT64_MAX; + struct read_unwind_spec_eh_frame_maps_cb_args args = { + .dso = dso, + .base_addr = UINT64_MAX, + }; int ret, fd; if (dso->data.eh_frame_hdr_offset == 0) { @@ -325,16 +344,11 @@ static int read_unwind_spec_eh_frame(struct dso *dso, struct unwind_info *ui, return -EINVAL; } - maps__for_each_entry(thread__maps(ui->thread), map_node) { - struct map *map = map_node->map; - u64 start = map__start(map) - map__pgoff(map); + maps__for_each_map(thread__maps(ui->thread), read_unwind_spec_eh_frame_maps_cb, &args); - if (map__dso(map) == dso && start < base_addr) - base_addr = start; - } - base_addr -= dso->data.elf_base_addr; + args.base_addr -= dso->data.elf_base_addr; /* Address of .eh_frame_hdr */ - *segbase = base_addr + dso->data.eh_frame_hdr_addr; + *segbase = args.base_addr + dso->data.eh_frame_hdr_addr; ret = unwind_spec_ehframe(dso, ui->machine, dso->data.eh_frame_hdr_offset, table_data, fde_count); if (ret) -- cgit v1.2.3 From 51ab715e2bf037cd0525494b7ed496c46e4013c6 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Wed, 6 Dec 2023 17:16:46 -0800 Subject: perf vdso: Use function to add missing maps lock Switch machine__thread_dso_type() from loop macro maps__for_each_entry() to maps__for_each_map() function that takes a callback. The function holds the maps lock, which should be held during iteration. Signed-off-by: Ian Rogers Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Andi Kleen Cc: Athira Rajeev Cc: Changbin Du Cc: Colin Ian King Cc: Dmitrii Dolgov <9erthalion6@gmail.com> Cc: German Gomez Cc: Guilherme Amadio Cc: Huacai Chen Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: K Prateek Nayak Cc: Kajol Jain Cc: Kan Liang Cc: Leo Yan Cc: Li Dong Cc: Liam Howlett Cc: Mark Rutland Cc: Masami Hiramatsu Cc: Miguel Ojeda Cc: Ming Wang Cc: Namhyung Kim Cc: Nick Terrell Cc: Paolo Bonzini Cc: Peter Zijlstra Cc: Ravi Bangoria Cc: Sandipan Das Cc: Sean Christopherson Cc: Steinar H. Gunderson Cc: Vincent Whitchurch Cc: Wenyu Liu Cc: Yang Jihong Link: https://lore.kernel.org/r/20231207011722.1220634-13-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/vdso.c | 35 +++++++++++++++++++++++------------ 1 file changed, 23 insertions(+), 12 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/vdso.c b/tools/perf/util/vdso.c index ae3eee69b659..df8963796187 100644 --- a/tools/perf/util/vdso.c +++ b/tools/perf/util/vdso.c @@ -140,23 +140,34 @@ static struct dso *__machine__addnew_vdso(struct machine *machine, const char *s return dso; } +struct machine__thread_dso_type_maps_cb_args { + struct machine *machine; + enum dso_type dso_type; +}; + +static int machine__thread_dso_type_maps_cb(struct map *map, void *data) +{ + struct machine__thread_dso_type_maps_cb_args *args = data; + struct dso *dso = map__dso(map); + + if (!dso || dso->long_name[0] != '/') + return 0; + + args->dso_type = dso__type(dso, args->machine); + return (args->dso_type != DSO__TYPE_UNKNOWN) ? 1 : 0; +} + static enum dso_type machine__thread_dso_type(struct machine *machine, struct thread *thread) { - enum dso_type dso_type = DSO__TYPE_UNKNOWN; - struct map_rb_node *rb_node; - - maps__for_each_entry(thread__maps(thread), rb_node) { - struct dso *dso = map__dso(rb_node->map); + struct machine__thread_dso_type_maps_cb_args args = { + .machine = machine, + .dso_type = DSO__TYPE_UNKNOWN, + }; - if (!dso || dso->long_name[0] != '/') - continue; - dso_type = dso__type(dso, machine); - if (dso_type != DSO__TYPE_UNKNOWN) - break; - } + maps__for_each_map(thread__maps(thread), machine__thread_dso_type_maps_cb, &args); - return dso_type; + return args.dso_type; } #if BITS_PER_LONG == 64 -- cgit v1.2.3 From 9cce3a161e17b5c1d50de75e85c1aeb7452d17e6 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Wed, 6 Dec 2023 17:16:47 -0800 Subject: perf maps: Reduce scope of maps__for_each_entry() Reduce scope of maps__for_each_entry() as maps__for_each_map() is a safer alternative holding the maps lock during iteration. Signed-off-by: Ian Rogers Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Andi Kleen Cc: Athira Rajeev Cc: Changbin Du Cc: Colin Ian King Cc: Dmitrii Dolgov <9erthalion6@gmail.com> Cc: German Gomez Cc: Guilherme Amadio Cc: Huacai Chen Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: K Prateek Nayak Cc: Kajol Jain Cc: Kan Liang Cc: Leo Yan Cc: Li Dong Cc: Liam Howlett Cc: Mark Rutland Cc: Masami Hiramatsu Cc: Miguel Ojeda Cc: Ming Wang Cc: Namhyung Kim Cc: Nick Terrell Cc: Paolo Bonzini Cc: Peter Zijlstra Cc: Ravi Bangoria Cc: Sandipan Das Cc: Sean Christopherson Cc: Steinar H. Gunderson Cc: Vincent Whitchurch Cc: Wenyu Liu Cc: Yang Jihong Link: https://lore.kernel.org/r/20231207011722.1220634-14-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/maps.c | 3 +++ tools/perf/util/maps.h | 3 --- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/maps.c b/tools/perf/util/maps.c index 160a6dce54bb..00e6589bba10 100644 --- a/tools/perf/util/maps.c +++ b/tools/perf/util/maps.c @@ -10,6 +10,9 @@ #include "ui/ui.h" #include "unwind.h" +#define maps__for_each_entry(maps, map) \ + for (map = maps__first(maps); map; map = map_rb_node__next(map)) + static void maps__init(struct maps *maps, struct machine *machine) { refcount_set(maps__refcnt(maps), 1); diff --git a/tools/perf/util/maps.h b/tools/perf/util/maps.h index 14ad95979257..8ac30cdaf5bd 100644 --- a/tools/perf/util/maps.h +++ b/tools/perf/util/maps.h @@ -36,9 +36,6 @@ struct map_rb_node *map_rb_node__next(struct map_rb_node *node); struct map_rb_node *maps__find_node(struct maps *maps, struct map *map); struct map *maps__find(struct maps *maps, u64 addr); -#define maps__for_each_entry(maps, map) \ - for (map = maps__first(maps); map; map = map_rb_node__next(map)) - #define maps__for_each_entry_safe(maps, map, next) \ for (map = maps__first(maps), next = map_rb_node__next(map); map; \ map = next, next = map_rb_node__next(map)) -- cgit v1.2.3 From 8d5847a61723b4dbb3da3b356925c4928ed14de2 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Wed, 6 Dec 2023 17:16:48 -0800 Subject: perf maps: Add remove maps function to remove a map based on callback Removing maps wasn't being done under the write lock. Similar to maps__for_each_map(), iterate the entries but in this case remove the entry based on the result of the callback. If an entry was removed then maps_by_name() also needs updating, so add missed flush. In dso__load_kcore(), the test of map to save would always be false with REFCNT_CHECKING because of a missing RC_CHK_ACCESS/RC_CHK_EQUAL. Signed-off-by: Ian Rogers Acked-by: Namhyung Kim Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Andi Kleen Cc: Athira Rajeev Cc: Changbin Du Cc: Colin Ian King Cc: Dmitrii Dolgov <9erthalion6@gmail.com> Cc: German Gomez Cc: Guilherme Amadio Cc: Huacai Chen Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: K Prateek Nayak Cc: Kajol Jain Cc: Kan Liang Cc: Leo Yan Cc: Li Dong Cc: Liam Howlett Cc: Mark Rutland Cc: Masami Hiramatsu Cc: Miguel Ojeda Cc: Ming Wang Cc: Nick Terrell Cc: Paolo Bonzini Cc: Peter Zijlstra Cc: Ravi Bangoria Cc: Sandipan Das Cc: Sean Christopherson Cc: Steinar H. Gunderson Cc: Vincent Whitchurch Cc: Wenyu Liu Cc: Yang Jihong Link: https://lore.kernel.org/r/20231207011722.1220634-15-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/maps.c | 24 ++++++++++++++++++++++++ tools/perf/util/maps.h | 6 ++---- tools/perf/util/symbol.c | 24 ++++++++++++------------ 3 files changed, 38 insertions(+), 16 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/maps.c b/tools/perf/util/maps.c index 00e6589bba10..f13fd3a9686b 100644 --- a/tools/perf/util/maps.c +++ b/tools/perf/util/maps.c @@ -13,6 +13,10 @@ #define maps__for_each_entry(maps, map) \ for (map = maps__first(maps); map; map = map_rb_node__next(map)) +#define maps__for_each_entry_safe(maps, map, next) \ + for (map = maps__first(maps), next = map_rb_node__next(map); map; \ + map = next, next = map_rb_node__next(map)) + static void maps__init(struct maps *maps, struct machine *machine) { refcount_set(maps__refcnt(maps), 1); @@ -214,6 +218,26 @@ int maps__for_each_map(struct maps *maps, int (*cb)(struct map *map, void *data) return ret; } +void maps__remove_maps(struct maps *maps, bool (*cb)(struct map *map, void *data), void *data) +{ + struct map_rb_node *pos, *next; + unsigned int start_nr_maps; + + down_write(maps__lock(maps)); + + start_nr_maps = maps__nr_maps(maps); + maps__for_each_entry_safe(maps, pos, next) { + if (cb(pos->map, data)) { + __maps__remove(maps, pos); + --RC_CHK_ACCESS(maps)->nr_maps; + } + } + if (maps__maps_by_name(maps) && start_nr_maps != maps__nr_maps(maps)) + __maps__free_maps_by_name(maps); + + up_write(maps__lock(maps)); +} + struct symbol *maps__find_symbol(struct maps *maps, u64 addr, struct map **mapp) { struct map *map = maps__find(maps, addr); diff --git a/tools/perf/util/maps.h b/tools/perf/util/maps.h index 8ac30cdaf5bd..b94ad5c8fea7 100644 --- a/tools/perf/util/maps.h +++ b/tools/perf/util/maps.h @@ -36,10 +36,6 @@ struct map_rb_node *map_rb_node__next(struct map_rb_node *node); struct map_rb_node *maps__find_node(struct maps *maps, struct map *map); struct map *maps__find(struct maps *maps, u64 addr); -#define maps__for_each_entry_safe(maps, map, next) \ - for (map = maps__first(maps), next = map_rb_node__next(map); map; \ - map = next, next = map_rb_node__next(map)) - DECLARE_RC_STRUCT(maps) { struct rb_root entries; struct rw_semaphore lock; @@ -80,6 +76,8 @@ static inline void __maps__zput(struct maps **map) /* Iterate over map calling cb for each entry. */ int maps__for_each_map(struct maps *maps, int (*cb)(struct map *map, void *data), void *data); +/* Iterate over map removing an entry if cb returns true. */ +void maps__remove_maps(struct maps *maps, bool (*cb)(struct map *map, void *data), void *data); static inline struct rb_root *maps__entries(struct maps *maps) { diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 72f03b875478..be212ba157dc 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -1239,13 +1239,23 @@ static int kcore_mapfn(u64 start, u64 len, u64 pgoff, void *data) return 0; } +static bool remove_old_maps(struct map *map, void *data) +{ + const struct map *map_to_save = data; + + /* + * We need to preserve eBPF maps even if they are covered by kcore, + * because we need to access eBPF dso for source data. + */ + return !RC_CHK_EQUAL(map, map_to_save) && !__map__is_bpf_prog(map); +} + static int dso__load_kcore(struct dso *dso, struct map *map, const char *kallsyms_filename) { struct maps *kmaps = map__kmaps(map); struct kcore_mapfn_data md; struct map *replacement_map = NULL; - struct map_rb_node *old_node, *next; struct machine *machine; bool is_64_bit; int err, fd; @@ -1292,17 +1302,7 @@ static int dso__load_kcore(struct dso *dso, struct map *map, } /* Remove old maps */ - maps__for_each_entry_safe(kmaps, old_node, next) { - struct map *old_map = old_node->map; - - /* - * We need to preserve eBPF maps even if they are - * covered by kcore, because we need to access - * eBPF dso for source data. - */ - if (old_map != map && !__map__is_bpf_prog(old_map)) - maps__remove(kmaps, old_map); - } + maps__remove_maps(kmaps, remove_old_maps, map); machine->trampolines_mapped = false; /* Find the kernel map using the '_stext' symbol */ -- cgit v1.2.3 From ec49230cf6dda70437bf878281566dd82af9affa Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Wed, 6 Dec 2023 17:16:49 -0800 Subject: perf debug: Expose debug file Some dumping call backs need to be passed a FILE*. Expose debug file via an accessor API for a consistent way to do this. Catch the unlikely failure of it not being set. Switch two cases where stderr was being used instead of debug_file. Signed-off-by: Ian Rogers Acked-by: Namhyung Kim Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Andi Kleen Cc: Athira Rajeev Cc: Changbin Du Cc: Colin Ian King Cc: Dmitrii Dolgov <9erthalion6@gmail.com> Cc: German Gomez Cc: Guilherme Amadio Cc: Huacai Chen Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: K Prateek Nayak Cc: Kajol Jain Cc: Kan Liang Cc: Leo Yan Cc: Li Dong Cc: Liam Howlett Cc: Mark Rutland Cc: Masami Hiramatsu Cc: Miguel Ojeda Cc: Ming Wang Cc: Nick Terrell Cc: Paolo Bonzini Cc: Peter Zijlstra Cc: Ravi Bangoria Cc: Sandipan Das Cc: Sean Christopherson Cc: Steinar H. Gunderson Cc: Vincent Whitchurch Cc: Wenyu Liu Cc: Yang Jihong Link: https://lore.kernel.org/r/20231207011722.1220634-16-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/debug.c | 22 +++++++++++++++------- tools/perf/util/debug.h | 1 + 2 files changed, 16 insertions(+), 7 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/debug.c b/tools/perf/util/debug.c index 88378c4c5dd9..e282b4ceb4d2 100644 --- a/tools/perf/util/debug.c +++ b/tools/perf/util/debug.c @@ -38,12 +38,21 @@ bool dump_trace = false, quiet = false; int debug_ordered_events; static int redirect_to_stderr; int debug_data_convert; -static FILE *debug_file; +static FILE *_debug_file; bool debug_display_time; +FILE *debug_file(void) +{ + if (!_debug_file) { + pr_warning_once("debug_file not set"); + debug_set_file(stderr); + } + return _debug_file; +} + void debug_set_file(FILE *file) { - debug_file = file; + _debug_file = file; } void debug_set_display_time(bool set) @@ -78,8 +87,8 @@ int veprintf(int level, int var, const char *fmt, va_list args) if (use_browser >= 1 && !redirect_to_stderr) { ui_helpline__vshow(fmt, args); } else { - ret = fprintf_time(debug_file); - ret += vfprintf(debug_file, fmt, args); + ret = fprintf_time(debug_file()); + ret += vfprintf(debug_file(), fmt, args); } } @@ -107,9 +116,8 @@ static int veprintf_time(u64 t, const char *fmt, va_list args) nsecs -= secs * NSEC_PER_SEC; usecs = nsecs / NSEC_PER_USEC; - ret = fprintf(stderr, "[%13" PRIu64 ".%06" PRIu64 "] ", - secs, usecs); - ret += vfprintf(stderr, fmt, args); + ret = fprintf(debug_file(), "[%13" PRIu64 ".%06" PRIu64 "] ", secs, usecs); + ret += vfprintf(debug_file(), fmt, args); return ret; } diff --git a/tools/perf/util/debug.h b/tools/perf/util/debug.h index f99468a7f681..de8870980d44 100644 --- a/tools/perf/util/debug.h +++ b/tools/perf/util/debug.h @@ -77,6 +77,7 @@ int eprintf_time(int level, int var, u64 t, const char *fmt, ...) __printf(4, 5) int veprintf(int level, int var, const char *fmt, va_list args); int perf_debug_option(const char *str); +FILE *debug_file(void); void debug_set_file(FILE *file); void debug_set_display_time(bool set); void perf_debug_setup(void); -- cgit v1.2.3 From 07ef14d50cf1af1caa49cd183216a517e75e8a93 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Wed, 6 Dec 2023 17:16:50 -0800 Subject: perf maps: Refactor maps__fixup_overlappings() Rename to maps__fixup_overlap_and_insert() as the given mapping is always inserted. Factor out first_ending_after() as a utility function. Minor variable name changes. Switch to using debug_file() rather than passing a debug FILE*. Signed-off-by: Ian Rogers Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Andi Kleen Cc: Athira Rajeev Cc: Changbin Du Cc: Colin Ian King Cc: Dmitrii Dolgov <9erthalion6@gmail.com> Cc: German Gomez Cc: Guilherme Amadio Cc: Huacai Chen Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: K Prateek Nayak Cc: Kajol Jain Cc: Kan Liang Cc: Leo Yan Cc: Li Dong Cc: Liam Howlett Cc: Mark Rutland Cc: Masami Hiramatsu Cc: Miguel Ojeda Cc: Ming Wang Cc: Namhyung Kim Cc: Nick Terrell Cc: Paolo Bonzini Cc: Peter Zijlstra Cc: Ravi Bangoria Cc: Sandipan Das Cc: Sean Christopherson Cc: Steinar H. Gunderson Cc: Vincent Whitchurch Cc: Wenyu Liu Cc: Yang Jihong Link: https://lore.kernel.org/r/20231207011722.1220634-17-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/maps.c | 57 +++++++++++++++++++++++++++++------------------- tools/perf/util/maps.h | 2 +- tools/perf/util/thread.c | 3 +-- 3 files changed, 37 insertions(+), 25 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/maps.c b/tools/perf/util/maps.c index f13fd3a9686b..94a97923527d 100644 --- a/tools/perf/util/maps.c +++ b/tools/perf/util/maps.c @@ -334,20 +334,16 @@ size_t maps__fprintf(struct maps *maps, FILE *fp) return args.printed; } -int maps__fixup_overlappings(struct maps *maps, struct map *map, FILE *fp) +/* + * Find first map where end > map->start. + * Same as find_vma() in kernel. + */ +static struct rb_node *first_ending_after(struct maps *maps, const struct map *map) { struct rb_root *root; struct rb_node *next, *first; - int err = 0; - - down_write(maps__lock(maps)); root = maps__entries(maps); - - /* - * Find first map where end > map->start. - * Same as find_vma() in kernel. - */ next = root->rb_node; first = NULL; while (next) { @@ -361,8 +357,23 @@ int maps__fixup_overlappings(struct maps *maps, struct map *map, FILE *fp) } else next = next->rb_right; } + return first; +} + +/* + * Adds new to maps, if new overlaps existing entries then the existing maps are + * adjusted or removed so that new fits without overlapping any entries. + */ +int maps__fixup_overlap_and_insert(struct maps *maps, struct map *new) +{ + + struct rb_node *next; + int err = 0; + FILE *fp = debug_file(); + + down_write(maps__lock(maps)); - next = first; + next = first_ending_after(maps, new); while (next && !err) { struct map_rb_node *pos = rb_entry(next, struct map_rb_node, rb_node); next = rb_next(&pos->rb_node); @@ -371,27 +382,27 @@ int maps__fixup_overlappings(struct maps *maps, struct map *map, FILE *fp) * Stop if current map starts after map->end. * Maps are ordered by start: next will not overlap for sure. */ - if (map__start(pos->map) >= map__end(map)) + if (map__start(pos->map) >= map__end(new)) break; if (verbose >= 2) { if (use_browser) { pr_debug("overlapping maps in %s (disable tui for more info)\n", - map__dso(map)->name); + map__dso(new)->name); } else { - fputs("overlapping maps:\n", fp); - map__fprintf(map, fp); + pr_debug("overlapping maps:\n"); + map__fprintf(new, fp); map__fprintf(pos->map, fp); } } - rb_erase_init(&pos->rb_node, root); + rb_erase_init(&pos->rb_node, maps__entries(maps)); /* * Now check if we need to create new maps for areas not * overlapped by the new map: */ - if (map__start(map) > map__start(pos->map)) { + if (map__start(new) > map__start(pos->map)) { struct map *before = map__clone(pos->map); if (before == NULL) { @@ -399,7 +410,7 @@ int maps__fixup_overlappings(struct maps *maps, struct map *map, FILE *fp) goto put_map; } - map__set_end(before, map__start(map)); + map__set_end(before, map__start(new)); err = __maps__insert(maps, before); if (err) { map__put(before); @@ -411,7 +422,7 @@ int maps__fixup_overlappings(struct maps *maps, struct map *map, FILE *fp) map__put(before); } - if (map__end(map) < map__end(pos->map)) { + if (map__end(new) < map__end(pos->map)) { struct map *after = map__clone(pos->map); if (after == NULL) { @@ -419,10 +430,10 @@ int maps__fixup_overlappings(struct maps *maps, struct map *map, FILE *fp) goto put_map; } - map__set_start(after, map__end(map)); - map__add_pgoff(after, map__end(map) - map__start(pos->map)); - assert(map__map_ip(pos->map, map__end(map)) == - map__map_ip(after, map__end(map))); + map__set_start(after, map__end(new)); + map__add_pgoff(after, map__end(new) - map__start(pos->map)); + assert(map__map_ip(pos->map, map__end(new)) == + map__map_ip(after, map__end(new))); err = __maps__insert(maps, after); if (err) { map__put(after); @@ -436,6 +447,8 @@ put_map: map__put(pos->map); free(pos); } + /* Add the map. */ + err = __maps__insert(maps, new); up_write(maps__lock(maps)); return err; } diff --git a/tools/perf/util/maps.h b/tools/perf/util/maps.h index b94ad5c8fea7..62e94d443c02 100644 --- a/tools/perf/util/maps.h +++ b/tools/perf/util/maps.h @@ -133,7 +133,7 @@ struct addr_map_symbol; int maps__find_ams(struct maps *maps, struct addr_map_symbol *ams); -int maps__fixup_overlappings(struct maps *maps, struct map *map, FILE *fp); +int maps__fixup_overlap_and_insert(struct maps *maps, struct map *new); struct map *maps__find_by_name(struct maps *maps, const char *name); diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c index b6986a81aa6d..3d47b5c5528b 100644 --- a/tools/perf/util/thread.c +++ b/tools/perf/util/thread.c @@ -345,8 +345,7 @@ int thread__insert_map(struct thread *thread, struct map *map) if (ret) return ret; - maps__fixup_overlappings(thread__maps(thread), map, stderr); - return maps__insert(thread__maps(thread), map); + return maps__fixup_overlap_and_insert(thread__maps(thread), map); } struct thread__prepare_access_maps_cb_args { -- cgit v1.2.3 From 980d7927213a57c9a31ff4ea685eb93fbe4b31ab Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Wed, 6 Dec 2023 17:16:51 -0800 Subject: perf maps: Do simple merge if given map doesn't overlap Simplify merge in for the simple case of a non-overlapping map. Signed-off-by: Ian Rogers Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Andi Kleen Cc: Athira Rajeev Cc: Changbin Du Cc: Colin Ian King Cc: Dmitrii Dolgov <9erthalion6@gmail.com> Cc: German Gomez Cc: Guilherme Amadio Cc: Huacai Chen Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: K Prateek Nayak Cc: Kajol Jain Cc: Kan Liang Cc: Leo Yan Cc: Li Dong Cc: Liam Howlett Cc: Mark Rutland Cc: Masami Hiramatsu Cc: Miguel Ojeda Cc: Ming Wang Cc: Namhyung Kim Cc: Nick Terrell Cc: Paolo Bonzini Cc: Peter Zijlstra Cc: Ravi Bangoria Cc: Sandipan Das Cc: Sean Christopherson Cc: Steinar H. Gunderson Cc: Vincent Whitchurch Cc: Wenyu Liu Cc: Yang Jihong Link: https://lore.kernel.org/r/20231207011722.1220634-18-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/maps.c | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'tools') diff --git a/tools/perf/util/maps.c b/tools/perf/util/maps.c index 94a97923527d..f305a4834cf0 100644 --- a/tools/perf/util/maps.c +++ b/tools/perf/util/maps.c @@ -697,9 +697,20 @@ void maps__fixup_end(struct maps *maps) int maps__merge_in(struct maps *kmaps, struct map *new_map) { struct map_rb_node *rb_node; + struct rb_node *first; + bool overlaps; LIST_HEAD(merged); int err = 0; + down_read(maps__lock(kmaps)); + first = first_ending_after(kmaps, new_map); + rb_node = first ? rb_entry(first, struct map_rb_node, rb_node) : NULL; + overlaps = rb_node && map__start(rb_node->map) < map__end(new_map); + up_read(maps__lock(kmaps)); + + if (!overlaps) + return maps__insert(kmaps, new_map); + maps__for_each_entry(kmaps, rb_node) { struct map *old_map = rb_node->map; -- cgit v1.2.3 From 9084952704ba075de28684301ec282b6626b5e7a Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Wed, 6 Dec 2023 17:16:52 -0800 Subject: perf maps: Rename clone to copy from Rename maps__clone() to maps__copy_from() to be more intention revealing of its behavior. Pass the underlying maps rather than the thread. Signed-off-by: Ian Rogers Acked-by: Namhyung Kim Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Andi Kleen Cc: Athira Rajeev Cc: Changbin Du Cc: Colin Ian King Cc: Dmitrii Dolgov <9erthalion6@gmail.com> Cc: German Gomez Cc: Guilherme Amadio Cc: Huacai Chen Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: K Prateek Nayak Cc: Kajol Jain Cc: Kan Liang Cc: Leo Yan Cc: Li Dong Cc: Liam Howlett Cc: Mark Rutland Cc: Masami Hiramatsu Cc: Miguel Ojeda Cc: Ming Wang Cc: Nick Terrell Cc: Paolo Bonzini Cc: Peter Zijlstra Cc: Ravi Bangoria Cc: Sandipan Das Cc: Sean Christopherson Cc: Steinar H. Gunderson Cc: Vincent Whitchurch Cc: Wenyu Liu Cc: Yang Jihong Link: https://lore.kernel.org/r/20231207011722.1220634-19-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/machine.c | 2 +- tools/perf/util/maps.c | 6 +----- tools/perf/util/maps.h | 3 +-- tools/perf/util/thread.c | 2 +- 4 files changed, 4 insertions(+), 9 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index ca855fc435ac..38bf7108821d 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -453,7 +453,7 @@ static struct thread *findnew_guest_code(struct machine *machine, * Guest code can be found in hypervisor process at the same address * so copy host maps. */ - err = maps__clone(thread, thread__maps(host_thread)); + err = maps__copy_from(thread__maps(thread), thread__maps(host_thread)); thread__put(host_thread); if (err) goto out_err; diff --git a/tools/perf/util/maps.c b/tools/perf/util/maps.c index f305a4834cf0..986daa1b0497 100644 --- a/tools/perf/util/maps.c +++ b/tools/perf/util/maps.c @@ -453,12 +453,8 @@ put_map: return err; } -/* - * XXX This should not really _copy_ te maps, but refcount them. - */ -int maps__clone(struct thread *thread, struct maps *parent) +int maps__copy_from(struct maps *maps, struct maps *parent) { - struct maps *maps = thread__maps(thread); int err; struct map_rb_node *rb_node; diff --git a/tools/perf/util/maps.h b/tools/perf/util/maps.h index 62e94d443c02..e4a49d6ff5cf 100644 --- a/tools/perf/util/maps.h +++ b/tools/perf/util/maps.h @@ -14,7 +14,6 @@ struct ref_reloc_sym; struct machine; struct map; struct maps; -struct thread; struct map_rb_node { struct rb_node rb_node; @@ -61,7 +60,7 @@ struct kmap { struct maps *maps__new(struct machine *machine); bool maps__empty(struct maps *maps); -int maps__clone(struct thread *thread, struct maps *parent); +int maps__copy_from(struct maps *maps, struct maps *parent); struct maps *maps__get(struct maps *maps); void maps__put(struct maps *maps); diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c index 3d47b5c5528b..89c47a5098e2 100644 --- a/tools/perf/util/thread.c +++ b/tools/perf/util/thread.c @@ -390,7 +390,7 @@ static int thread__clone_maps(struct thread *thread, struct thread *parent, bool return 0; } /* But this one is new process, copy maps. */ - return do_maps_clone ? maps__clone(thread, thread__maps(parent)) : 0; + return do_maps_clone ? maps__copy_from(thread__maps(thread), thread__maps(parent)) : 0; } int thread__fork(struct thread *thread, struct thread *parent, u64 timestamp, bool do_maps_clone) -- cgit v1.2.3 From e77b0236cd0cd1572c6a9b25097b207eab799e74 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Wed, 6 Dec 2023 17:16:53 -0800 Subject: perf maps: Add maps__load_first() Avoid bpf_lock_contention_read touching the internal maps data structure by adding a helper function. As access is done directly on the map in maps, hold the read lock to stop it being removed. Signed-off-by: Ian Rogers Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Andi Kleen Cc: Athira Rajeev Cc: Changbin Du Cc: Colin Ian King Cc: Dmitrii Dolgov <9erthalion6@gmail.com> Cc: German Gomez Cc: Guilherme Amadio Cc: Huacai Chen Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: K Prateek Nayak Cc: Kajol Jain Cc: Kan Liang Cc: Leo Yan Cc: Li Dong Cc: Liam Howlett Cc: Mark Rutland Cc: Masami Hiramatsu Cc: Miguel Ojeda Cc: Ming Wang Cc: Namhyung Kim Cc: Nick Terrell Cc: Paolo Bonzini Cc: Peter Zijlstra Cc: Ravi Bangoria Cc: Sandipan Das Cc: Sean Christopherson Cc: Steinar H. Gunderson Cc: Vincent Whitchurch Cc: Wenyu Liu Cc: Yang Jihong Link: https://lore.kernel.org/r/20231207011722.1220634-20-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/bpf_lock_contention.c | 2 +- tools/perf/util/maps.c | 13 +++++++++++++ tools/perf/util/maps.h | 2 ++ 3 files changed, 16 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/util/bpf_lock_contention.c b/tools/perf/util/bpf_lock_contention.c index f1716c089c99..31ff19afc20c 100644 --- a/tools/perf/util/bpf_lock_contention.c +++ b/tools/perf/util/bpf_lock_contention.c @@ -318,7 +318,7 @@ int lock_contention_read(struct lock_contention *con) } /* make sure it loads the kernel map */ - map__load(maps__first(machine->kmaps)->map); + maps__load_first(machine->kmaps); prev_key = NULL; while (!bpf_map_get_next_key(fd, prev_key, &key)) { diff --git a/tools/perf/util/maps.c b/tools/perf/util/maps.c index 986daa1b0497..024a6c9f72c4 100644 --- a/tools/perf/util/maps.c +++ b/tools/perf/util/maps.c @@ -793,3 +793,16 @@ out: } return err; } + +void maps__load_first(struct maps *maps) +{ + struct map_rb_node *first; + + down_read(maps__lock(maps)); + + first = maps__first(maps); + if (first) + map__load(first->map); + + up_read(maps__lock(maps)); +} diff --git a/tools/perf/util/maps.h b/tools/perf/util/maps.h index e4a49d6ff5cf..b7ab3ec61b7c 100644 --- a/tools/perf/util/maps.h +++ b/tools/perf/util/maps.h @@ -142,4 +142,6 @@ void __maps__sort_by_name(struct maps *maps); void maps__fixup_end(struct maps *maps); +void maps__load_first(struct maps *maps); + #endif // __PERF_MAPS_H -- cgit v1.2.3 From 75858007d101cf38e9a79d682d0361bb6493d7cc Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Wed, 6 Dec 2023 17:16:54 -0800 Subject: perf maps: Add find next entry to give entry after the given map Use to remove map_rb_node use from machine.c. Signed-off-by: Ian Rogers Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Andi Kleen Cc: Athira Rajeev Cc: Changbin Du Cc: Colin Ian King Cc: Dmitrii Dolgov <9erthalion6@gmail.com> Cc: German Gomez Cc: Guilherme Amadio Cc: Huacai Chen Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: K Prateek Nayak Cc: Kajol Jain Cc: Kan Liang Cc: Leo Yan Cc: Li Dong Cc: Liam Howlett Cc: Mark Rutland Cc: Masami Hiramatsu Cc: Miguel Ojeda Cc: Ming Wang Cc: Namhyung Kim Cc: Nick Terrell Cc: Paolo Bonzini Cc: Peter Zijlstra Cc: Ravi Bangoria Cc: Sandipan Das Cc: Sean Christopherson Cc: Steinar H. Gunderson Cc: Vincent Whitchurch Cc: Wenyu Liu Cc: Yang Jihong Link: https://lore.kernel.org/r/20231207011722.1220634-21-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/machine.c | 7 +++---- tools/perf/util/maps.c | 11 +++++++++++ tools/perf/util/maps.h | 2 ++ 3 files changed, 16 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index 38bf7108821d..b397a769006f 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -1762,12 +1762,11 @@ int machine__create_kernel_maps(struct machine *machine) if (end == ~0ULL) { /* update end address of the kernel map using adjacent module address */ - struct map_rb_node *rb_node = maps__find_node(machine__kernel_maps(machine), - machine__kernel_map(machine)); - struct map_rb_node *next = map_rb_node__next(rb_node); + struct map *next = maps__find_next_entry(machine__kernel_maps(machine), + machine__kernel_map(machine)); if (next) - machine__set_kernel_mmap(machine, start, map__start(next->map)); + machine__set_kernel_mmap(machine, start, map__start(next)); } out_put: diff --git a/tools/perf/util/maps.c b/tools/perf/util/maps.c index 024a6c9f72c4..5b898a0e97b2 100644 --- a/tools/perf/util/maps.c +++ b/tools/perf/util/maps.c @@ -663,6 +663,17 @@ out_unlock: return map; } +struct map *maps__find_next_entry(struct maps *maps, struct map *map) +{ + struct map_rb_node *rb_node = maps__find_node(maps, map); + struct map_rb_node *next = map_rb_node__next(rb_node); + + if (next) + return next->map; + + return NULL; +} + void maps__fixup_end(struct maps *maps) { struct map_rb_node *prev = NULL, *curr; diff --git a/tools/perf/util/maps.h b/tools/perf/util/maps.h index b7ab3ec61b7c..84b42c8456e8 100644 --- a/tools/perf/util/maps.h +++ b/tools/perf/util/maps.h @@ -136,6 +136,8 @@ int maps__fixup_overlap_and_insert(struct maps *maps, struct map *new); struct map *maps__find_by_name(struct maps *maps, const char *name); +struct map *maps__find_next_entry(struct maps *maps, struct map *map); + int maps__merge_in(struct maps *kmaps, struct map *new_map); void __maps__sort_by_name(struct maps *maps); -- cgit v1.2.3 From 631bb236aa6f306fd2ba16aee9ae96083453eebc Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Wed, 6 Dec 2023 17:16:55 -0800 Subject: perf maps: Reduce scope of map_rb_node and maps internals Avoid exposing the implementation of maps so that the internals can be refactored. Signed-off-by: Ian Rogers Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Andi Kleen Cc: Athira Rajeev Cc: Changbin Du Cc: Colin Ian King Cc: Dmitrii Dolgov <9erthalion6@gmail.com> Cc: German Gomez Cc: Guilherme Amadio Cc: Huacai Chen Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: K Prateek Nayak Cc: Kajol Jain Cc: Kan Liang Cc: Leo Yan Cc: Li Dong Cc: Liam Howlett Cc: Mark Rutland Cc: Masami Hiramatsu Cc: Miguel Ojeda Cc: Ming Wang Cc: Namhyung Kim Cc: Nick Terrell Cc: Paolo Bonzini Cc: Peter Zijlstra Cc: Ravi Bangoria Cc: Sandipan Das Cc: Sean Christopherson Cc: Steinar H. Gunderson Cc: Vincent Whitchurch Cc: Wenyu Liu Cc: Yang Jihong Link: https://lore.kernel.org/r/20231207011722.1220634-22-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/maps.c | 90 ++++++++++++++++++++++++++++++-------------------- tools/perf/util/maps.h | 23 ------------- 2 files changed, 55 insertions(+), 58 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/maps.c b/tools/perf/util/maps.c index 5b898a0e97b2..dcd67384d877 100644 --- a/tools/perf/util/maps.c +++ b/tools/perf/util/maps.c @@ -10,6 +10,11 @@ #include "ui/ui.h" #include "unwind.h" +struct map_rb_node { + struct rb_node rb_node; + struct map *map; +}; + #define maps__for_each_entry(maps, map) \ for (map = maps__first(maps); map; map = map_rb_node__next(map)) @@ -17,6 +22,56 @@ for (map = maps__first(maps), next = map_rb_node__next(map); map; \ map = next, next = map_rb_node__next(map)) +static struct rb_root *maps__entries(struct maps *maps) +{ + return &RC_CHK_ACCESS(maps)->entries; +} + +static struct rw_semaphore *maps__lock(struct maps *maps) +{ + return &RC_CHK_ACCESS(maps)->lock; +} + +static struct map **maps__maps_by_name(struct maps *maps) +{ + return RC_CHK_ACCESS(maps)->maps_by_name; +} + +static struct map_rb_node *maps__first(struct maps *maps) +{ + struct rb_node *first = rb_first(maps__entries(maps)); + + if (first) + return rb_entry(first, struct map_rb_node, rb_node); + return NULL; +} + +static struct map_rb_node *map_rb_node__next(struct map_rb_node *node) +{ + struct rb_node *next; + + if (!node) + return NULL; + + next = rb_next(&node->rb_node); + + if (!next) + return NULL; + + return rb_entry(next, struct map_rb_node, rb_node); +} + +static struct map_rb_node *maps__find_node(struct maps *maps, struct map *map) +{ + struct map_rb_node *rb_node; + + maps__for_each_entry(maps, rb_node) { + if (rb_node->RC_CHK_ACCESS(map) == RC_CHK_ACCESS(map)) + return rb_node; + } + return NULL; +} + static void maps__init(struct maps *maps, struct machine *machine) { refcount_set(maps__refcnt(maps), 1); @@ -485,17 +540,6 @@ out_unlock: return err; } -struct map_rb_node *maps__find_node(struct maps *maps, struct map *map) -{ - struct map_rb_node *rb_node; - - maps__for_each_entry(maps, rb_node) { - if (rb_node->RC_CHK_ACCESS(map) == RC_CHK_ACCESS(map)) - return rb_node; - } - return NULL; -} - struct map *maps__find(struct maps *maps, u64 ip) { struct rb_node *p; @@ -521,30 +565,6 @@ out: return m ? m->map : NULL; } -struct map_rb_node *maps__first(struct maps *maps) -{ - struct rb_node *first = rb_first(maps__entries(maps)); - - if (first) - return rb_entry(first, struct map_rb_node, rb_node); - return NULL; -} - -struct map_rb_node *map_rb_node__next(struct map_rb_node *node) -{ - struct rb_node *next; - - if (!node) - return NULL; - - next = rb_next(&node->rb_node); - - if (!next) - return NULL; - - return rb_entry(next, struct map_rb_node, rb_node); -} - static int map__strcmp(const void *a, const void *b) { const struct map *map_a = *(const struct map **)a; diff --git a/tools/perf/util/maps.h b/tools/perf/util/maps.h index 84b42c8456e8..d836d04c9402 100644 --- a/tools/perf/util/maps.h +++ b/tools/perf/util/maps.h @@ -15,11 +15,6 @@ struct machine; struct map; struct maps; -struct map_rb_node { - struct rb_node rb_node; - struct map *map; -}; - struct map_list_node { struct list_head node; struct map *map; @@ -30,9 +25,6 @@ static inline struct map_list_node *map_list_node__new(void) return malloc(sizeof(struct map_list_node)); } -struct map_rb_node *maps__first(struct maps *maps); -struct map_rb_node *map_rb_node__next(struct map_rb_node *node); -struct map_rb_node *maps__find_node(struct maps *maps, struct map *map); struct map *maps__find(struct maps *maps, u64 addr); DECLARE_RC_STRUCT(maps) { @@ -78,26 +70,11 @@ int maps__for_each_map(struct maps *maps, int (*cb)(struct map *map, void *data) /* Iterate over map removing an entry if cb returns true. */ void maps__remove_maps(struct maps *maps, bool (*cb)(struct map *map, void *data), void *data); -static inline struct rb_root *maps__entries(struct maps *maps) -{ - return &RC_CHK_ACCESS(maps)->entries; -} - static inline struct machine *maps__machine(struct maps *maps) { return RC_CHK_ACCESS(maps)->machine; } -static inline struct rw_semaphore *maps__lock(struct maps *maps) -{ - return &RC_CHK_ACCESS(maps)->lock; -} - -static inline struct map **maps__maps_by_name(struct maps *maps) -{ - return RC_CHK_ACCESS(maps)->maps_by_name; -} - static inline unsigned int maps__nr_maps(const struct maps *maps) { return RC_CHK_ACCESS(maps)->nr_maps; -- cgit v1.2.3 From 7887097c65446ff229099221975f6fc9ad9e378b Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Wed, 6 Dec 2023 17:16:56 -0800 Subject: perf maps: Fix up overlaps during fixup_end Maps are sometimes made overlapping, in particular kernel maps. If the end of a map overlaps the start of the next, shorten the overlapping map. This should remove potential non-determinism in maps__find, ie finding maps by address. Signed-off-by: Ian Rogers Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Andi Kleen Cc: Athira Rajeev Cc: Changbin Du Cc: Colin Ian King Cc: Dmitrii Dolgov <9erthalion6@gmail.com> Cc: German Gomez Cc: Guilherme Amadio Cc: Huacai Chen Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: K Prateek Nayak Cc: Kajol Jain Cc: Kan Liang Cc: Leo Yan Cc: Li Dong Cc: Liam Howlett Cc: Mark Rutland Cc: Masami Hiramatsu Cc: Miguel Ojeda Cc: Ming Wang Cc: Namhyung Kim Cc: Nick Terrell Cc: Paolo Bonzini Cc: Peter Zijlstra Cc: Ravi Bangoria Cc: Sandipan Das Cc: Sean Christopherson Cc: Steinar H. Gunderson Cc: Vincent Whitchurch Cc: Wenyu Liu Cc: Yang Jihong Link: https://lore.kernel.org/r/20231207011722.1220634-23-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/maps.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/util/maps.c b/tools/perf/util/maps.c index dcd67384d877..0334fc18d9c6 100644 --- a/tools/perf/util/maps.c +++ b/tools/perf/util/maps.c @@ -701,7 +701,7 @@ void maps__fixup_end(struct maps *maps) down_write(maps__lock(maps)); maps__for_each_entry(maps, curr) { - if (prev != NULL && !map__end(prev->map)) + if (prev && (!map__end(prev->map) || map__end(prev->map) > map__start(curr->map))) map__set_end(prev->map, map__start(curr->map)); prev = curr; -- cgit v1.2.3 From 69ff403d87be4812571c54b1159e24998414bcab Mon Sep 17 00:00:00 2001 From: Hou Tao Date: Sat, 16 Dec 2023 21:10:52 +0800 Subject: selftests/bpf: Remove tests for zeroed-array kptr bpf_mem_alloc() doesn't support zero-sized allocation, so removing these tests from test_bpf_ma test. After the removal, there will no definition for bin_data_8, so remove 8 from data_sizes array and adjust the index of data_btf_ids array in all test cases accordingly. Signed-off-by: Hou Tao Link: https://lore.kernel.org/r/20231216131052.27621-3-houtao@huaweicloud.com Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/progs/test_bpf_ma.c | 100 ++++++++++++------------ 1 file changed, 49 insertions(+), 51 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/progs/test_bpf_ma.c b/tools/testing/selftests/bpf/progs/test_bpf_ma.c index b685a4aba6bd..069db9085e78 100644 --- a/tools/testing/selftests/bpf/progs/test_bpf_ma.c +++ b/tools/testing/selftests/bpf/progs/test_bpf_ma.c @@ -17,7 +17,7 @@ struct generic_map_value { char _license[] SEC("license") = "GPL"; -const unsigned int data_sizes[] = {8, 16, 32, 64, 96, 128, 192, 256, 512, 1024, 2048, 4096}; +const unsigned int data_sizes[] = {16, 32, 64, 96, 128, 192, 256, 512, 1024, 2048, 4096}; const volatile unsigned int data_btf_ids[ARRAY_SIZE(data_sizes)] = {}; int err = 0; @@ -166,7 +166,7 @@ static __always_inline void batch_percpu_free(struct bpf_map *map, unsigned int batch_percpu_free((struct bpf_map *)(&array_percpu_##size), batch, idx); \ } while (0) -DEFINE_ARRAY_WITH_KPTR(8); +/* kptr doesn't support bin_data_8 which is a zero-sized array */ DEFINE_ARRAY_WITH_KPTR(16); DEFINE_ARRAY_WITH_KPTR(32); DEFINE_ARRAY_WITH_KPTR(64); @@ -198,21 +198,20 @@ int test_batch_alloc_free(void *ctx) if ((u32)bpf_get_current_pid_tgid() != pid) return 0; - /* Alloc 128 8-bytes objects in batch to trigger refilling, - * then free 128 8-bytes objects in batch to trigger freeing. + /* Alloc 128 16-bytes objects in batch to trigger refilling, + * then free 128 16-bytes objects in batch to trigger freeing. */ - CALL_BATCH_ALLOC_FREE(8, 128, 0); - CALL_BATCH_ALLOC_FREE(16, 128, 1); - CALL_BATCH_ALLOC_FREE(32, 128, 2); - CALL_BATCH_ALLOC_FREE(64, 128, 3); - CALL_BATCH_ALLOC_FREE(96, 128, 4); - CALL_BATCH_ALLOC_FREE(128, 128, 5); - CALL_BATCH_ALLOC_FREE(192, 128, 6); - CALL_BATCH_ALLOC_FREE(256, 128, 7); - CALL_BATCH_ALLOC_FREE(512, 64, 8); - CALL_BATCH_ALLOC_FREE(1024, 32, 9); - CALL_BATCH_ALLOC_FREE(2048, 16, 10); - CALL_BATCH_ALLOC_FREE(4096, 8, 11); + CALL_BATCH_ALLOC_FREE(16, 128, 0); + CALL_BATCH_ALLOC_FREE(32, 128, 1); + CALL_BATCH_ALLOC_FREE(64, 128, 2); + CALL_BATCH_ALLOC_FREE(96, 128, 3); + CALL_BATCH_ALLOC_FREE(128, 128, 4); + CALL_BATCH_ALLOC_FREE(192, 128, 5); + CALL_BATCH_ALLOC_FREE(256, 128, 6); + CALL_BATCH_ALLOC_FREE(512, 64, 7); + CALL_BATCH_ALLOC_FREE(1024, 32, 8); + CALL_BATCH_ALLOC_FREE(2048, 16, 9); + CALL_BATCH_ALLOC_FREE(4096, 8, 10); return 0; } @@ -223,21 +222,20 @@ int test_free_through_map_free(void *ctx) if ((u32)bpf_get_current_pid_tgid() != pid) return 0; - /* Alloc 128 8-bytes objects in batch to trigger refilling, + /* Alloc 128 16-bytes objects in batch to trigger refilling, * then free these objects through map free. */ - CALL_BATCH_ALLOC(8, 128, 0); - CALL_BATCH_ALLOC(16, 128, 1); - CALL_BATCH_ALLOC(32, 128, 2); - CALL_BATCH_ALLOC(64, 128, 3); - CALL_BATCH_ALLOC(96, 128, 4); - CALL_BATCH_ALLOC(128, 128, 5); - CALL_BATCH_ALLOC(192, 128, 6); - CALL_BATCH_ALLOC(256, 128, 7); - CALL_BATCH_ALLOC(512, 64, 8); - CALL_BATCH_ALLOC(1024, 32, 9); - CALL_BATCH_ALLOC(2048, 16, 10); - CALL_BATCH_ALLOC(4096, 8, 11); + CALL_BATCH_ALLOC(16, 128, 0); + CALL_BATCH_ALLOC(32, 128, 1); + CALL_BATCH_ALLOC(64, 128, 2); + CALL_BATCH_ALLOC(96, 128, 3); + CALL_BATCH_ALLOC(128, 128, 4); + CALL_BATCH_ALLOC(192, 128, 5); + CALL_BATCH_ALLOC(256, 128, 6); + CALL_BATCH_ALLOC(512, 64, 7); + CALL_BATCH_ALLOC(1024, 32, 8); + CALL_BATCH_ALLOC(2048, 16, 9); + CALL_BATCH_ALLOC(4096, 8, 10); return 0; } @@ -251,17 +249,17 @@ int test_batch_percpu_alloc_free(void *ctx) /* Alloc 128 16-bytes per-cpu objects in batch to trigger refilling, * then free 128 16-bytes per-cpu objects in batch to trigger freeing. */ - CALL_BATCH_PERCPU_ALLOC_FREE(16, 128, 1); - CALL_BATCH_PERCPU_ALLOC_FREE(32, 128, 2); - CALL_BATCH_PERCPU_ALLOC_FREE(64, 128, 3); - CALL_BATCH_PERCPU_ALLOC_FREE(96, 128, 4); - CALL_BATCH_PERCPU_ALLOC_FREE(128, 128, 5); - CALL_BATCH_PERCPU_ALLOC_FREE(192, 128, 6); - CALL_BATCH_PERCPU_ALLOC_FREE(256, 128, 7); - CALL_BATCH_PERCPU_ALLOC_FREE(512, 64, 8); - CALL_BATCH_PERCPU_ALLOC_FREE(1024, 32, 9); - CALL_BATCH_PERCPU_ALLOC_FREE(2048, 16, 10); - CALL_BATCH_PERCPU_ALLOC_FREE(4096, 8, 11); + CALL_BATCH_PERCPU_ALLOC_FREE(16, 128, 0); + CALL_BATCH_PERCPU_ALLOC_FREE(32, 128, 1); + CALL_BATCH_PERCPU_ALLOC_FREE(64, 128, 2); + CALL_BATCH_PERCPU_ALLOC_FREE(96, 128, 3); + CALL_BATCH_PERCPU_ALLOC_FREE(128, 128, 4); + CALL_BATCH_PERCPU_ALLOC_FREE(192, 128, 5); + CALL_BATCH_PERCPU_ALLOC_FREE(256, 128, 6); + CALL_BATCH_PERCPU_ALLOC_FREE(512, 64, 7); + CALL_BATCH_PERCPU_ALLOC_FREE(1024, 32, 8); + CALL_BATCH_PERCPU_ALLOC_FREE(2048, 16, 9); + CALL_BATCH_PERCPU_ALLOC_FREE(4096, 8, 10); return 0; } @@ -275,17 +273,17 @@ int test_percpu_free_through_map_free(void *ctx) /* Alloc 128 16-bytes per-cpu objects in batch to trigger refilling, * then free these object through map free. */ - CALL_BATCH_PERCPU_ALLOC(16, 128, 1); - CALL_BATCH_PERCPU_ALLOC(32, 128, 2); - CALL_BATCH_PERCPU_ALLOC(64, 128, 3); - CALL_BATCH_PERCPU_ALLOC(96, 128, 4); - CALL_BATCH_PERCPU_ALLOC(128, 128, 5); - CALL_BATCH_PERCPU_ALLOC(192, 128, 6); - CALL_BATCH_PERCPU_ALLOC(256, 128, 7); - CALL_BATCH_PERCPU_ALLOC(512, 64, 8); - CALL_BATCH_PERCPU_ALLOC(1024, 32, 9); - CALL_BATCH_PERCPU_ALLOC(2048, 16, 10); - CALL_BATCH_PERCPU_ALLOC(4096, 8, 11); + CALL_BATCH_PERCPU_ALLOC(16, 128, 0); + CALL_BATCH_PERCPU_ALLOC(32, 128, 1); + CALL_BATCH_PERCPU_ALLOC(64, 128, 2); + CALL_BATCH_PERCPU_ALLOC(96, 128, 3); + CALL_BATCH_PERCPU_ALLOC(128, 128, 4); + CALL_BATCH_PERCPU_ALLOC(192, 128, 5); + CALL_BATCH_PERCPU_ALLOC(256, 128, 6); + CALL_BATCH_PERCPU_ALLOC(512, 64, 7); + CALL_BATCH_PERCPU_ALLOC(1024, 32, 8); + CALL_BATCH_PERCPU_ALLOC(2048, 16, 9); + CALL_BATCH_PERCPU_ALLOC(4096, 8, 10); return 0; } -- cgit v1.2.3 From 4249f13c11be8b8b7bf93204185e150c3bdc968d Mon Sep 17 00:00:00 2001 From: Sidhartha Kumar Date: Wed, 13 Dec 2023 12:50:57 -0800 Subject: maple_tree: do not preallocate nodes for slot stores mas_preallocate() defaults to requesting 1 node for preallocation and then ,depending on the type of store, will update the request variable. There isn't a check for a slot store type, so slot stores are preallocating the default 1 node. Slot stores do not require any additional nodes, so add a check for the slot store case that will bypass node_count_gfp(). Update the tests to reflect that slot stores do not require allocations. User visible effects of this bug include increased memory usage from the unneeded node that was allocated. Link: https://lkml.kernel.org/r/20231213205058.386589-1-sidhartha.kumar@oracle.com Fixes: 0b8bb544b1a7 ("maple_tree: update mas_preallocate() testing") Signed-off-by: Sidhartha Kumar Cc: Liam R. Howlett Cc: Matthew Wilcox (Oracle) Cc: Peng Zhang Cc: [6.6+] Signed-off-by: Andrew Morton --- lib/maple_tree.c | 11 +++++++++++ tools/testing/radix-tree/maple.c | 2 +- 2 files changed, 12 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/lib/maple_tree.c b/lib/maple_tree.c index bb24d84a4922..684689457d77 100644 --- a/lib/maple_tree.c +++ b/lib/maple_tree.c @@ -5501,6 +5501,17 @@ int mas_preallocate(struct ma_state *mas, void *entry, gfp_t gfp) mas_wr_end_piv(&wr_mas); node_size = mas_wr_new_end(&wr_mas); + + /* Slot store, does not require additional nodes */ + if (node_size == wr_mas.node_end) { + /* reuse node */ + if (!mt_in_rcu(mas->tree)) + return 0; + /* shifting boundary */ + if (wr_mas.offset_end - mas->offset == 1) + return 0; + } + if (node_size >= mt_slots[wr_mas.type]) { /* Split, worst case for now. */ request = 1 + mas_mt_height(mas) * 2; diff --git a/tools/testing/radix-tree/maple.c b/tools/testing/radix-tree/maple.c index e5da1cad70ba..76a8990bb14e 100644 --- a/tools/testing/radix-tree/maple.c +++ b/tools/testing/radix-tree/maple.c @@ -35538,7 +35538,7 @@ static noinline void __init check_prealloc(struct maple_tree *mt) MT_BUG_ON(mt, mas_preallocate(&mas, ptr, GFP_KERNEL) != 0); allocated = mas_allocated(&mas); height = mas_mt_height(&mas); - MT_BUG_ON(mt, allocated != 1); + MT_BUG_ON(mt, allocated != 0); mas_store_prealloc(&mas, ptr); MT_BUG_ON(mt, mas_allocated(&mas) != 0); -- cgit v1.2.3 From 0aac13add26d546ac74c89d2883b3a5f0fbea039 Mon Sep 17 00:00:00 2001 From: Muhammad Usama Anjum Date: Thu, 14 Dec 2023 15:19:30 +0500 Subject: selftests: secretmem: floor the memory size to the multiple of page_size The "locked-in-memory size" limit per process can be non-multiple of page_size. The mmap() fails if we try to allocate locked-in-memory with same size as the allowed limit if it isn't multiple of the page_size because mmap() rounds off the memory size to be allocated to next multiple of page_size. Fix this by flooring the length to be allocated with mmap() to the previous multiple of the page_size. This was getting triggered on KernelCI regularly because of different ulimit settings which wasn't multiple of the page_size. Find logs here: https://linux.kernelci.org/test/plan/id/657654bd8e81e654fae13532/ The bug in was present from the time test was first added. Link: https://lkml.kernel.org/r/20231214101931.1155586-1-usama.anjum@collabora.com Fixes: 76fe17ef588a ("secretmem: test: add basic selftest for memfd_secret(2)") Signed-off-by: Muhammad Usama Anjum Reported-by: "kernelci.org bot" Closes: https://linux.kernelci.org/test/plan/id/657654bd8e81e654fae13532/ Cc: "James E.J. Bottomley" Cc: Mike Rapoport (IBM) Cc: Shuah Khan Cc: Signed-off-by: Andrew Morton --- tools/testing/selftests/mm/memfd_secret.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/mm/memfd_secret.c b/tools/testing/selftests/mm/memfd_secret.c index 957b9e18c729..9b298f6a04b3 100644 --- a/tools/testing/selftests/mm/memfd_secret.c +++ b/tools/testing/selftests/mm/memfd_secret.c @@ -62,6 +62,9 @@ static void test_mlock_limit(int fd) char *mem; len = mlock_limit_cur; + if (len % page_size != 0) + len = (len/page_size) * page_size; + mem = mmap(NULL, len, prot, mode, fd, 0); if (mem == MAP_FAILED) { fail("unable to mmap secret memory\n"); -- cgit v1.2.3 From b6aab3384cafba151c53d3b5f7e1f8d073aadf03 Mon Sep 17 00:00:00 2001 From: Ryan Roberts Date: Thu, 7 Dec 2023 16:12:06 +0000 Subject: selftests/mm/kugepaged: restore thp settings at exit Previously, the saved thp settings would be restored upon a signal or at the natural end of the test suite. But there are some tests that directly call exit() upon failure. In this case, the thp settings were not being restored, which could then influence other tests. Fix this by installing an atexit() handler to do the actual restore. The signal handler can now just call exit() and the atexit handler is invoked. Link: https://lkml.kernel.org/r/20231207161211.2374093-6-ryan.roberts@arm.com Signed-off-by: Ryan Roberts Reviewed-by: Alistair Popple Reviewed-by: David Hildenbrand Tested-by: Kefeng Wang Tested-by: John Hubbard Cc: Anshuman Khandual Cc: Barry Song Cc: Catalin Marinas Cc: David Rientjes Cc: "Huang, Ying" Cc: Hugh Dickins Cc: Itaru Kitayama Cc: Kirill A. Shutemov Cc: Luis Chamberlain Cc: Matthew Wilcox (Oracle) Cc: Vlastimil Babka Cc: Yang Shi Cc: Yin Fengwei Cc: Yu Zhao Cc: Zi Yan Signed-off-by: Andrew Morton --- tools/testing/selftests/mm/khugepaged.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/mm/khugepaged.c b/tools/testing/selftests/mm/khugepaged.c index 030667cb5533..fc47a1c4944c 100644 --- a/tools/testing/selftests/mm/khugepaged.c +++ b/tools/testing/selftests/mm/khugepaged.c @@ -374,18 +374,22 @@ static void pop_settings(void) write_settings(current_settings()); } -static void restore_settings(int sig) +static void restore_settings_atexit(void) { if (skip_settings_restore) - goto out; + return; printf("Restore THP and khugepaged settings..."); write_settings(&saved_settings); success("OK"); - if (sig) - exit(EXIT_FAILURE); -out: - exit(exit_status); + + skip_settings_restore = true; +} + +static void restore_settings(int sig) +{ + /* exit() will invoke the restore_settings_atexit handler. */ + exit(sig ? EXIT_FAILURE : exit_status); } static void save_settings(void) @@ -415,6 +419,7 @@ static void save_settings(void) success("OK"); + atexit(restore_settings_atexit); signal(SIGTERM, restore_settings); signal(SIGINT, restore_settings); signal(SIGHUP, restore_settings); -- cgit v1.2.3 From 00679a183ac6d2584723cfc2a2c07c8285f802dc Mon Sep 17 00:00:00 2001 From: Ryan Roberts Date: Thu, 7 Dec 2023 16:12:07 +0000 Subject: selftests/mm: factor out thp settings management The khugepaged test has a useful framework for save/restore/pop/push of all thp settings via the sysfs interface. This will be useful to explicitly control multi-size THP settings in other tests, so let's move it out of khugepaged and into its own thp_settings.[c|h] utility. Link: https://lkml.kernel.org/r/20231207161211.2374093-7-ryan.roberts@arm.com Signed-off-by: Ryan Roberts Tested-by: Alistair Popple Acked-by: David Hildenbrand Tested-by: Kefeng Wang Tested-by: John Hubbard Cc: Anshuman Khandual Cc: Barry Song Cc: Catalin Marinas Cc: David Rientjes Cc: "Huang, Ying" Cc: Hugh Dickins Cc: Itaru Kitayama Cc: Kirill A. Shutemov Cc: Luis Chamberlain Cc: Matthew Wilcox (Oracle) Cc: Vlastimil Babka Cc: Yang Shi Cc: Yin Fengwei Cc: Yu Zhao Cc: Zi Yan Signed-off-by: Andrew Morton --- tools/testing/selftests/mm/Makefile | 4 +- tools/testing/selftests/mm/khugepaged.c | 346 ++---------------------------- tools/testing/selftests/mm/thp_settings.c | 296 +++++++++++++++++++++++++ tools/testing/selftests/mm/thp_settings.h | 71 ++++++ 4 files changed, 391 insertions(+), 326 deletions(-) create mode 100644 tools/testing/selftests/mm/thp_settings.c create mode 100644 tools/testing/selftests/mm/thp_settings.h (limited to 'tools') diff --git a/tools/testing/selftests/mm/Makefile b/tools/testing/selftests/mm/Makefile index dede0bcf97a3..2453add65d12 100644 --- a/tools/testing/selftests/mm/Makefile +++ b/tools/testing/selftests/mm/Makefile @@ -117,8 +117,8 @@ TEST_FILES += va_high_addr_switch.sh include ../lib.mk -$(TEST_GEN_PROGS): vm_util.c -$(TEST_GEN_FILES): vm_util.c +$(TEST_GEN_PROGS): vm_util.c thp_settings.c +$(TEST_GEN_FILES): vm_util.c thp_settings.c $(OUTPUT)/uffd-stress: uffd-common.c $(OUTPUT)/uffd-unit-tests: uffd-common.c diff --git a/tools/testing/selftests/mm/khugepaged.c b/tools/testing/selftests/mm/khugepaged.c index fc47a1c4944c..b15e7fd70176 100644 --- a/tools/testing/selftests/mm/khugepaged.c +++ b/tools/testing/selftests/mm/khugepaged.c @@ -22,13 +22,13 @@ #include "linux/magic.h" #include "vm_util.h" +#include "thp_settings.h" #define BASE_ADDR ((void *)(1UL << 30)) static unsigned long hpage_pmd_size; static unsigned long page_size; static int hpage_pmd_nr; -#define THP_SYSFS "/sys/kernel/mm/transparent_hugepage/" #define PID_SMAPS "/proc/self/smaps" #define TEST_FILE "collapse_test_file" @@ -71,78 +71,7 @@ struct file_info { }; static struct file_info finfo; - -enum thp_enabled { - THP_ALWAYS, - THP_MADVISE, - THP_NEVER, -}; - -static const char *thp_enabled_strings[] = { - "always", - "madvise", - "never", - NULL -}; - -enum thp_defrag { - THP_DEFRAG_ALWAYS, - THP_DEFRAG_DEFER, - THP_DEFRAG_DEFER_MADVISE, - THP_DEFRAG_MADVISE, - THP_DEFRAG_NEVER, -}; - -static const char *thp_defrag_strings[] = { - "always", - "defer", - "defer+madvise", - "madvise", - "never", - NULL -}; - -enum shmem_enabled { - SHMEM_ALWAYS, - SHMEM_WITHIN_SIZE, - SHMEM_ADVISE, - SHMEM_NEVER, - SHMEM_DENY, - SHMEM_FORCE, -}; - -static const char *shmem_enabled_strings[] = { - "always", - "within_size", - "advise", - "never", - "deny", - "force", - NULL -}; - -struct khugepaged_settings { - bool defrag; - unsigned int alloc_sleep_millisecs; - unsigned int scan_sleep_millisecs; - unsigned int max_ptes_none; - unsigned int max_ptes_swap; - unsigned int max_ptes_shared; - unsigned long pages_to_scan; -}; - -struct settings { - enum thp_enabled thp_enabled; - enum thp_defrag thp_defrag; - enum shmem_enabled shmem_enabled; - bool use_zero_page; - struct khugepaged_settings khugepaged; - unsigned long read_ahead_kb; -}; - -static struct settings saved_settings; static bool skip_settings_restore; - static int exit_status; static void success(const char *msg) @@ -161,226 +90,13 @@ static void skip(const char *msg) printf(" \e[33m%s\e[0m\n", msg); } -static int read_file(const char *path, char *buf, size_t buflen) -{ - int fd; - ssize_t numread; - - fd = open(path, O_RDONLY); - if (fd == -1) - return 0; - - numread = read(fd, buf, buflen - 1); - if (numread < 1) { - close(fd); - return 0; - } - - buf[numread] = '\0'; - close(fd); - - return (unsigned int) numread; -} - -static int write_file(const char *path, const char *buf, size_t buflen) -{ - int fd; - ssize_t numwritten; - - fd = open(path, O_WRONLY); - if (fd == -1) { - printf("open(%s)\n", path); - exit(EXIT_FAILURE); - return 0; - } - - numwritten = write(fd, buf, buflen - 1); - close(fd); - if (numwritten < 1) { - printf("write(%s)\n", buf); - exit(EXIT_FAILURE); - return 0; - } - - return (unsigned int) numwritten; -} - -static int read_string(const char *name, const char *strings[]) -{ - char path[PATH_MAX]; - char buf[256]; - char *c; - int ret; - - ret = snprintf(path, PATH_MAX, THP_SYSFS "%s", name); - if (ret >= PATH_MAX) { - printf("%s: Pathname is too long\n", __func__); - exit(EXIT_FAILURE); - } - - if (!read_file(path, buf, sizeof(buf))) { - perror(path); - exit(EXIT_FAILURE); - } - - c = strchr(buf, '['); - if (!c) { - printf("%s: Parse failure\n", __func__); - exit(EXIT_FAILURE); - } - - c++; - memmove(buf, c, sizeof(buf) - (c - buf)); - - c = strchr(buf, ']'); - if (!c) { - printf("%s: Parse failure\n", __func__); - exit(EXIT_FAILURE); - } - *c = '\0'; - - ret = 0; - while (strings[ret]) { - if (!strcmp(strings[ret], buf)) - return ret; - ret++; - } - - printf("Failed to parse %s\n", name); - exit(EXIT_FAILURE); -} - -static void write_string(const char *name, const char *val) -{ - char path[PATH_MAX]; - int ret; - - ret = snprintf(path, PATH_MAX, THP_SYSFS "%s", name); - if (ret >= PATH_MAX) { - printf("%s: Pathname is too long\n", __func__); - exit(EXIT_FAILURE); - } - - if (!write_file(path, val, strlen(val) + 1)) { - perror(path); - exit(EXIT_FAILURE); - } -} - -static const unsigned long _read_num(const char *path) -{ - char buf[21]; - - if (read_file(path, buf, sizeof(buf)) < 0) { - perror("read_file(read_num)"); - exit(EXIT_FAILURE); - } - - return strtoul(buf, NULL, 10); -} - -static const unsigned long read_num(const char *name) -{ - char path[PATH_MAX]; - int ret; - - ret = snprintf(path, PATH_MAX, THP_SYSFS "%s", name); - if (ret >= PATH_MAX) { - printf("%s: Pathname is too long\n", __func__); - exit(EXIT_FAILURE); - } - return _read_num(path); -} - -static void _write_num(const char *path, unsigned long num) -{ - char buf[21]; - - sprintf(buf, "%ld", num); - if (!write_file(path, buf, strlen(buf) + 1)) { - perror(path); - exit(EXIT_FAILURE); - } -} - -static void write_num(const char *name, unsigned long num) -{ - char path[PATH_MAX]; - int ret; - - ret = snprintf(path, PATH_MAX, THP_SYSFS "%s", name); - if (ret >= PATH_MAX) { - printf("%s: Pathname is too long\n", __func__); - exit(EXIT_FAILURE); - } - _write_num(path, num); -} - -static void write_settings(struct settings *settings) -{ - struct khugepaged_settings *khugepaged = &settings->khugepaged; - - write_string("enabled", thp_enabled_strings[settings->thp_enabled]); - write_string("defrag", thp_defrag_strings[settings->thp_defrag]); - write_string("shmem_enabled", - shmem_enabled_strings[settings->shmem_enabled]); - write_num("use_zero_page", settings->use_zero_page); - - write_num("khugepaged/defrag", khugepaged->defrag); - write_num("khugepaged/alloc_sleep_millisecs", - khugepaged->alloc_sleep_millisecs); - write_num("khugepaged/scan_sleep_millisecs", - khugepaged->scan_sleep_millisecs); - write_num("khugepaged/max_ptes_none", khugepaged->max_ptes_none); - write_num("khugepaged/max_ptes_swap", khugepaged->max_ptes_swap); - write_num("khugepaged/max_ptes_shared", khugepaged->max_ptes_shared); - write_num("khugepaged/pages_to_scan", khugepaged->pages_to_scan); - - if (file_ops && finfo.type == VMA_FILE) - _write_num(finfo.dev_queue_read_ahead_path, - settings->read_ahead_kb); -} - -#define MAX_SETTINGS_DEPTH 4 -static struct settings settings_stack[MAX_SETTINGS_DEPTH]; -static int settings_index; - -static struct settings *current_settings(void) -{ - if (!settings_index) { - printf("Fail: No settings set"); - exit(EXIT_FAILURE); - } - return settings_stack + settings_index - 1; -} - -static void push_settings(struct settings *settings) -{ - if (settings_index >= MAX_SETTINGS_DEPTH) { - printf("Fail: Settings stack exceeded"); - exit(EXIT_FAILURE); - } - settings_stack[settings_index++] = *settings; - write_settings(current_settings()); -} - -static void pop_settings(void) -{ - if (settings_index <= 0) { - printf("Fail: Settings stack empty"); - exit(EXIT_FAILURE); - } - --settings_index; - write_settings(current_settings()); -} - static void restore_settings_atexit(void) { if (skip_settings_restore) return; printf("Restore THP and khugepaged settings..."); - write_settings(&saved_settings); + thp_restore_settings(); success("OK"); skip_settings_restore = true; @@ -395,27 +111,9 @@ static void restore_settings(int sig) static void save_settings(void) { printf("Save THP and khugepaged settings..."); - saved_settings = (struct settings) { - .thp_enabled = read_string("enabled", thp_enabled_strings), - .thp_defrag = read_string("defrag", thp_defrag_strings), - .shmem_enabled = - read_string("shmem_enabled", shmem_enabled_strings), - .use_zero_page = read_num("use_zero_page"), - }; - saved_settings.khugepaged = (struct khugepaged_settings) { - .defrag = read_num("khugepaged/defrag"), - .alloc_sleep_millisecs = - read_num("khugepaged/alloc_sleep_millisecs"), - .scan_sleep_millisecs = - read_num("khugepaged/scan_sleep_millisecs"), - .max_ptes_none = read_num("khugepaged/max_ptes_none"), - .max_ptes_swap = read_num("khugepaged/max_ptes_swap"), - .max_ptes_shared = read_num("khugepaged/max_ptes_shared"), - .pages_to_scan = read_num("khugepaged/pages_to_scan"), - }; if (file_ops && finfo.type == VMA_FILE) - saved_settings.read_ahead_kb = - _read_num(finfo.dev_queue_read_ahead_path); + thp_set_read_ahead_path(finfo.dev_queue_read_ahead_path); + thp_save_settings(); success("OK"); @@ -798,7 +496,7 @@ static void __madvise_collapse(const char *msg, char *p, int nr_hpages, struct mem_ops *ops, bool expect) { int ret; - struct settings settings = *current_settings(); + struct thp_settings settings = *thp_current_settings(); printf("%s...", msg); @@ -808,7 +506,7 @@ static void __madvise_collapse(const char *msg, char *p, int nr_hpages, */ settings.thp_enabled = THP_NEVER; settings.shmem_enabled = SHMEM_NEVER; - push_settings(&settings); + thp_push_settings(&settings); /* Clear VM_NOHUGEPAGE */ madvise(p, nr_hpages * hpage_pmd_size, MADV_HUGEPAGE); @@ -820,7 +518,7 @@ static void __madvise_collapse(const char *msg, char *p, int nr_hpages, else success("OK"); - pop_settings(); + thp_pop_settings(); } static void madvise_collapse(const char *msg, char *p, int nr_hpages, @@ -850,13 +548,13 @@ static bool wait_for_scan(const char *msg, char *p, int nr_hpages, madvise(p, nr_hpages * hpage_pmd_size, MADV_HUGEPAGE); /* Wait until the second full_scan completed */ - full_scans = read_num("khugepaged/full_scans") + 2; + full_scans = thp_read_num("khugepaged/full_scans") + 2; printf("%s...", msg); while (timeout--) { if (ops->check_huge(p, nr_hpages)) break; - if (read_num("khugepaged/full_scans") >= full_scans) + if (thp_read_num("khugepaged/full_scans") >= full_scans) break; printf("."); usleep(TICK); @@ -911,11 +609,11 @@ static bool is_tmpfs(struct mem_ops *ops) static void alloc_at_fault(void) { - struct settings settings = *current_settings(); + struct thp_settings settings = *thp_current_settings(); char *p; settings.thp_enabled = THP_ALWAYS; - push_settings(&settings); + thp_push_settings(&settings); p = alloc_mapping(1); *p = 1; @@ -925,7 +623,7 @@ static void alloc_at_fault(void) else fail("Fail"); - pop_settings(); + thp_pop_settings(); madvise(p, page_size, MADV_DONTNEED); printf("Split huge PMD on MADV_DONTNEED..."); @@ -973,11 +671,11 @@ static void collapse_single_pte_entry(struct collapse_context *c, struct mem_ops static void collapse_max_ptes_none(struct collapse_context *c, struct mem_ops *ops) { int max_ptes_none = hpage_pmd_nr / 2; - struct settings settings = *current_settings(); + struct thp_settings settings = *thp_current_settings(); void *p; settings.khugepaged.max_ptes_none = max_ptes_none; - push_settings(&settings); + thp_push_settings(&settings); p = ops->setup_area(1); @@ -1002,7 +700,7 @@ static void collapse_max_ptes_none(struct collapse_context *c, struct mem_ops *o } skip: ops->cleanup_area(p, hpage_pmd_size); - pop_settings(); + thp_pop_settings(); } static void collapse_swapin_single_pte(struct collapse_context *c, struct mem_ops *ops) @@ -1033,7 +731,7 @@ out: static void collapse_max_ptes_swap(struct collapse_context *c, struct mem_ops *ops) { - int max_ptes_swap = read_num("khugepaged/max_ptes_swap"); + int max_ptes_swap = thp_read_num("khugepaged/max_ptes_swap"); void *p; p = ops->setup_area(1); @@ -1250,11 +948,11 @@ static void collapse_fork_compound(struct collapse_context *c, struct mem_ops *o fail("Fail"); ops->fault(p, 0, page_size); - write_num("khugepaged/max_ptes_shared", hpage_pmd_nr - 1); + thp_write_num("khugepaged/max_ptes_shared", hpage_pmd_nr - 1); c->collapse("Collapse PTE table full of compound pages in child", p, 1, ops, true); - write_num("khugepaged/max_ptes_shared", - current_settings()->khugepaged.max_ptes_shared); + thp_write_num("khugepaged/max_ptes_shared", + thp_current_settings()->khugepaged.max_ptes_shared); validate_memory(p, 0, hpage_pmd_size); ops->cleanup_area(p, hpage_pmd_size); @@ -1275,7 +973,7 @@ static void collapse_fork_compound(struct collapse_context *c, struct mem_ops *o static void collapse_max_ptes_shared(struct collapse_context *c, struct mem_ops *ops) { - int max_ptes_shared = read_num("khugepaged/max_ptes_shared"); + int max_ptes_shared = thp_read_num("khugepaged/max_ptes_shared"); int wstatus; void *p; @@ -1443,7 +1141,7 @@ static void parse_test_type(int argc, const char **argv) int main(int argc, const char **argv) { - struct settings default_settings = { + struct thp_settings default_settings = { .thp_enabled = THP_MADVISE, .thp_defrag = THP_DEFRAG_ALWAYS, .shmem_enabled = SHMEM_ADVISE, @@ -1484,7 +1182,7 @@ int main(int argc, const char **argv) default_settings.khugepaged.pages_to_scan = hpage_pmd_nr * 8; save_settings(); - push_settings(&default_settings); + thp_push_settings(&default_settings); alloc_at_fault(); diff --git a/tools/testing/selftests/mm/thp_settings.c b/tools/testing/selftests/mm/thp_settings.c new file mode 100644 index 000000000000..5e8ec792cac7 --- /dev/null +++ b/tools/testing/selftests/mm/thp_settings.c @@ -0,0 +1,296 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include +#include +#include +#include + +#include "thp_settings.h" + +#define THP_SYSFS "/sys/kernel/mm/transparent_hugepage/" +#define MAX_SETTINGS_DEPTH 4 +static struct thp_settings settings_stack[MAX_SETTINGS_DEPTH]; +static int settings_index; +static struct thp_settings saved_settings; +static char dev_queue_read_ahead_path[PATH_MAX]; + +static const char * const thp_enabled_strings[] = { + "always", + "madvise", + "never", + NULL +}; + +static const char * const thp_defrag_strings[] = { + "always", + "defer", + "defer+madvise", + "madvise", + "never", + NULL +}; + +static const char * const shmem_enabled_strings[] = { + "always", + "within_size", + "advise", + "never", + "deny", + "force", + NULL +}; + +int read_file(const char *path, char *buf, size_t buflen) +{ + int fd; + ssize_t numread; + + fd = open(path, O_RDONLY); + if (fd == -1) + return 0; + + numread = read(fd, buf, buflen - 1); + if (numread < 1) { + close(fd); + return 0; + } + + buf[numread] = '\0'; + close(fd); + + return (unsigned int) numread; +} + +int write_file(const char *path, const char *buf, size_t buflen) +{ + int fd; + ssize_t numwritten; + + fd = open(path, O_WRONLY); + if (fd == -1) { + printf("open(%s)\n", path); + exit(EXIT_FAILURE); + return 0; + } + + numwritten = write(fd, buf, buflen - 1); + close(fd); + if (numwritten < 1) { + printf("write(%s)\n", buf); + exit(EXIT_FAILURE); + return 0; + } + + return (unsigned int) numwritten; +} + +const unsigned long read_num(const char *path) +{ + char buf[21]; + + if (read_file(path, buf, sizeof(buf)) < 0) { + perror("read_file()"); + exit(EXIT_FAILURE); + } + + return strtoul(buf, NULL, 10); +} + +void write_num(const char *path, unsigned long num) +{ + char buf[21]; + + sprintf(buf, "%ld", num); + if (!write_file(path, buf, strlen(buf) + 1)) { + perror(path); + exit(EXIT_FAILURE); + } +} + +int thp_read_string(const char *name, const char * const strings[]) +{ + char path[PATH_MAX]; + char buf[256]; + char *c; + int ret; + + ret = snprintf(path, PATH_MAX, THP_SYSFS "%s", name); + if (ret >= PATH_MAX) { + printf("%s: Pathname is too long\n", __func__); + exit(EXIT_FAILURE); + } + + if (!read_file(path, buf, sizeof(buf))) { + perror(path); + exit(EXIT_FAILURE); + } + + c = strchr(buf, '['); + if (!c) { + printf("%s: Parse failure\n", __func__); + exit(EXIT_FAILURE); + } + + c++; + memmove(buf, c, sizeof(buf) - (c - buf)); + + c = strchr(buf, ']'); + if (!c) { + printf("%s: Parse failure\n", __func__); + exit(EXIT_FAILURE); + } + *c = '\0'; + + ret = 0; + while (strings[ret]) { + if (!strcmp(strings[ret], buf)) + return ret; + ret++; + } + + printf("Failed to parse %s\n", name); + exit(EXIT_FAILURE); +} + +void thp_write_string(const char *name, const char *val) +{ + char path[PATH_MAX]; + int ret; + + ret = snprintf(path, PATH_MAX, THP_SYSFS "%s", name); + if (ret >= PATH_MAX) { + printf("%s: Pathname is too long\n", __func__); + exit(EXIT_FAILURE); + } + + if (!write_file(path, val, strlen(val) + 1)) { + perror(path); + exit(EXIT_FAILURE); + } +} + +const unsigned long thp_read_num(const char *name) +{ + char path[PATH_MAX]; + int ret; + + ret = snprintf(path, PATH_MAX, THP_SYSFS "%s", name); + if (ret >= PATH_MAX) { + printf("%s: Pathname is too long\n", __func__); + exit(EXIT_FAILURE); + } + return read_num(path); +} + +void thp_write_num(const char *name, unsigned long num) +{ + char path[PATH_MAX]; + int ret; + + ret = snprintf(path, PATH_MAX, THP_SYSFS "%s", name); + if (ret >= PATH_MAX) { + printf("%s: Pathname is too long\n", __func__); + exit(EXIT_FAILURE); + } + write_num(path, num); +} + +void thp_read_settings(struct thp_settings *settings) +{ + *settings = (struct thp_settings) { + .thp_enabled = thp_read_string("enabled", thp_enabled_strings), + .thp_defrag = thp_read_string("defrag", thp_defrag_strings), + .shmem_enabled = + thp_read_string("shmem_enabled", shmem_enabled_strings), + .use_zero_page = thp_read_num("use_zero_page"), + }; + settings->khugepaged = (struct khugepaged_settings) { + .defrag = thp_read_num("khugepaged/defrag"), + .alloc_sleep_millisecs = + thp_read_num("khugepaged/alloc_sleep_millisecs"), + .scan_sleep_millisecs = + thp_read_num("khugepaged/scan_sleep_millisecs"), + .max_ptes_none = thp_read_num("khugepaged/max_ptes_none"), + .max_ptes_swap = thp_read_num("khugepaged/max_ptes_swap"), + .max_ptes_shared = thp_read_num("khugepaged/max_ptes_shared"), + .pages_to_scan = thp_read_num("khugepaged/pages_to_scan"), + }; + if (dev_queue_read_ahead_path[0]) + settings->read_ahead_kb = read_num(dev_queue_read_ahead_path); +} + +void thp_write_settings(struct thp_settings *settings) +{ + struct khugepaged_settings *khugepaged = &settings->khugepaged; + + thp_write_string("enabled", thp_enabled_strings[settings->thp_enabled]); + thp_write_string("defrag", thp_defrag_strings[settings->thp_defrag]); + thp_write_string("shmem_enabled", + shmem_enabled_strings[settings->shmem_enabled]); + thp_write_num("use_zero_page", settings->use_zero_page); + + thp_write_num("khugepaged/defrag", khugepaged->defrag); + thp_write_num("khugepaged/alloc_sleep_millisecs", + khugepaged->alloc_sleep_millisecs); + thp_write_num("khugepaged/scan_sleep_millisecs", + khugepaged->scan_sleep_millisecs); + thp_write_num("khugepaged/max_ptes_none", khugepaged->max_ptes_none); + thp_write_num("khugepaged/max_ptes_swap", khugepaged->max_ptes_swap); + thp_write_num("khugepaged/max_ptes_shared", khugepaged->max_ptes_shared); + thp_write_num("khugepaged/pages_to_scan", khugepaged->pages_to_scan); + + if (dev_queue_read_ahead_path[0]) + write_num(dev_queue_read_ahead_path, settings->read_ahead_kb); +} + +struct thp_settings *thp_current_settings(void) +{ + if (!settings_index) { + printf("Fail: No settings set"); + exit(EXIT_FAILURE); + } + return settings_stack + settings_index - 1; +} + +void thp_push_settings(struct thp_settings *settings) +{ + if (settings_index >= MAX_SETTINGS_DEPTH) { + printf("Fail: Settings stack exceeded"); + exit(EXIT_FAILURE); + } + settings_stack[settings_index++] = *settings; + thp_write_settings(thp_current_settings()); +} + +void thp_pop_settings(void) +{ + if (settings_index <= 0) { + printf("Fail: Settings stack empty"); + exit(EXIT_FAILURE); + } + --settings_index; + thp_write_settings(thp_current_settings()); +} + +void thp_restore_settings(void) +{ + thp_write_settings(&saved_settings); +} + +void thp_save_settings(void) +{ + thp_read_settings(&saved_settings); +} + +void thp_set_read_ahead_path(char *path) +{ + if (!path) { + dev_queue_read_ahead_path[0] = '\0'; + return; + } + + strncpy(dev_queue_read_ahead_path, path, + sizeof(dev_queue_read_ahead_path)); + dev_queue_read_ahead_path[sizeof(dev_queue_read_ahead_path) - 1] = '\0'; +} diff --git a/tools/testing/selftests/mm/thp_settings.h b/tools/testing/selftests/mm/thp_settings.h new file mode 100644 index 000000000000..ff3d98c30617 --- /dev/null +++ b/tools/testing/selftests/mm/thp_settings.h @@ -0,0 +1,71 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __THP_SETTINGS_H__ +#define __THP_SETTINGS_H__ + +#include +#include +#include + +enum thp_enabled { + THP_ALWAYS, + THP_MADVISE, + THP_NEVER, +}; + +enum thp_defrag { + THP_DEFRAG_ALWAYS, + THP_DEFRAG_DEFER, + THP_DEFRAG_DEFER_MADVISE, + THP_DEFRAG_MADVISE, + THP_DEFRAG_NEVER, +}; + +enum shmem_enabled { + SHMEM_ALWAYS, + SHMEM_WITHIN_SIZE, + SHMEM_ADVISE, + SHMEM_NEVER, + SHMEM_DENY, + SHMEM_FORCE, +}; + +struct khugepaged_settings { + bool defrag; + unsigned int alloc_sleep_millisecs; + unsigned int scan_sleep_millisecs; + unsigned int max_ptes_none; + unsigned int max_ptes_swap; + unsigned int max_ptes_shared; + unsigned long pages_to_scan; +}; + +struct thp_settings { + enum thp_enabled thp_enabled; + enum thp_defrag thp_defrag; + enum shmem_enabled shmem_enabled; + bool use_zero_page; + struct khugepaged_settings khugepaged; + unsigned long read_ahead_kb; +}; + +int read_file(const char *path, char *buf, size_t buflen); +int write_file(const char *path, const char *buf, size_t buflen); +const unsigned long read_num(const char *path); +void write_num(const char *path, unsigned long num); + +int thp_read_string(const char *name, const char * const strings[]); +void thp_write_string(const char *name, const char *val); +const unsigned long thp_read_num(const char *name); +void thp_write_num(const char *name, unsigned long num); + +void thp_write_settings(struct thp_settings *settings); +void thp_read_settings(struct thp_settings *settings); +struct thp_settings *thp_current_settings(void); +void thp_push_settings(struct thp_settings *settings); +void thp_pop_settings(void); +void thp_restore_settings(void); +void thp_save_settings(void); + +void thp_set_read_ahead_path(char *path); + +#endif /* __THP_SETTINGS_H__ */ -- cgit v1.2.3 From 4f5070a5e40db2e9dbf5fff4ec678d6fbb338d5c Mon Sep 17 00:00:00 2001 From: Ryan Roberts Date: Thu, 7 Dec 2023 16:12:08 +0000 Subject: selftests/mm: support multi-size THP interface in thp_settings Save and restore the new per-size hugepage enabled setting, if available on the running kernel. Since the number of per-size directories is not fixed, solve this as simply as possible by catering for a maximum number in the thp_settings struct (20). Each array index is the order. The value of THP_NEVER is changed to 0 so that all of these new settings default to THP_NEVER and the user only needs to fill in the ones they want to enable. Link: https://lkml.kernel.org/r/20231207161211.2374093-8-ryan.roberts@arm.com Signed-off-by: Ryan Roberts Tested-by: Kefeng Wang Tested-by: John Hubbard Cc: Alistair Popple Cc: Anshuman Khandual Cc: Barry Song Cc: Catalin Marinas Cc: David Hildenbrand Cc: David Rientjes Cc: "Huang, Ying" Cc: Hugh Dickins Cc: Itaru Kitayama Cc: Kirill A. Shutemov Cc: Luis Chamberlain Cc: Matthew Wilcox (Oracle) Cc: Vlastimil Babka Cc: Yang Shi Cc: Yin Fengwei Cc: Yu Zhao Cc: Zi Yan Signed-off-by: Andrew Morton --- tools/testing/selftests/mm/khugepaged.c | 3 ++ tools/testing/selftests/mm/thp_settings.c | 55 ++++++++++++++++++++++++++++++- tools/testing/selftests/mm/thp_settings.h | 11 ++++++- 3 files changed, 67 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/mm/khugepaged.c b/tools/testing/selftests/mm/khugepaged.c index b15e7fd70176..7bd3baa9d34b 100644 --- a/tools/testing/selftests/mm/khugepaged.c +++ b/tools/testing/selftests/mm/khugepaged.c @@ -1141,6 +1141,7 @@ static void parse_test_type(int argc, const char **argv) int main(int argc, const char **argv) { + int hpage_pmd_order; struct thp_settings default_settings = { .thp_enabled = THP_MADVISE, .thp_defrag = THP_DEFRAG_ALWAYS, @@ -1175,11 +1176,13 @@ int main(int argc, const char **argv) exit(EXIT_FAILURE); } hpage_pmd_nr = hpage_pmd_size / page_size; + hpage_pmd_order = __builtin_ctz(hpage_pmd_nr); default_settings.khugepaged.max_ptes_none = hpage_pmd_nr - 1; default_settings.khugepaged.max_ptes_swap = hpage_pmd_nr / 8; default_settings.khugepaged.max_ptes_shared = hpage_pmd_nr / 2; default_settings.khugepaged.pages_to_scan = hpage_pmd_nr * 8; + default_settings.hugepages[hpage_pmd_order].enabled = THP_INHERIT; save_settings(); thp_push_settings(&default_settings); diff --git a/tools/testing/selftests/mm/thp_settings.c b/tools/testing/selftests/mm/thp_settings.c index 5e8ec792cac7..a4163438108e 100644 --- a/tools/testing/selftests/mm/thp_settings.c +++ b/tools/testing/selftests/mm/thp_settings.c @@ -16,9 +16,10 @@ static struct thp_settings saved_settings; static char dev_queue_read_ahead_path[PATH_MAX]; static const char * const thp_enabled_strings[] = { + "never", "always", + "inherit", "madvise", - "never", NULL }; @@ -198,6 +199,10 @@ void thp_write_num(const char *name, unsigned long num) void thp_read_settings(struct thp_settings *settings) { + unsigned long orders = thp_supported_orders(); + char path[PATH_MAX]; + int i; + *settings = (struct thp_settings) { .thp_enabled = thp_read_string("enabled", thp_enabled_strings), .thp_defrag = thp_read_string("defrag", thp_defrag_strings), @@ -218,11 +223,26 @@ void thp_read_settings(struct thp_settings *settings) }; if (dev_queue_read_ahead_path[0]) settings->read_ahead_kb = read_num(dev_queue_read_ahead_path); + + for (i = 0; i < NR_ORDERS; i++) { + if (!((1 << i) & orders)) { + settings->hugepages[i].enabled = THP_NEVER; + continue; + } + snprintf(path, PATH_MAX, "hugepages-%ukB/enabled", + (getpagesize() >> 10) << i); + settings->hugepages[i].enabled = + thp_read_string(path, thp_enabled_strings); + } } void thp_write_settings(struct thp_settings *settings) { struct khugepaged_settings *khugepaged = &settings->khugepaged; + unsigned long orders = thp_supported_orders(); + char path[PATH_MAX]; + int enabled; + int i; thp_write_string("enabled", thp_enabled_strings[settings->thp_enabled]); thp_write_string("defrag", thp_defrag_strings[settings->thp_defrag]); @@ -242,6 +262,15 @@ void thp_write_settings(struct thp_settings *settings) if (dev_queue_read_ahead_path[0]) write_num(dev_queue_read_ahead_path, settings->read_ahead_kb); + + for (i = 0; i < NR_ORDERS; i++) { + if (!((1 << i) & orders)) + continue; + snprintf(path, PATH_MAX, "hugepages-%ukB/enabled", + (getpagesize() >> 10) << i); + enabled = settings->hugepages[i].enabled; + thp_write_string(path, thp_enabled_strings[enabled]); + } } struct thp_settings *thp_current_settings(void) @@ -294,3 +323,27 @@ void thp_set_read_ahead_path(char *path) sizeof(dev_queue_read_ahead_path)); dev_queue_read_ahead_path[sizeof(dev_queue_read_ahead_path) - 1] = '\0'; } + +unsigned long thp_supported_orders(void) +{ + unsigned long orders = 0; + char path[PATH_MAX]; + char buf[256]; + int ret; + int i; + + for (i = 0; i < NR_ORDERS; i++) { + ret = snprintf(path, PATH_MAX, THP_SYSFS "hugepages-%ukB/enabled", + (getpagesize() >> 10) << i); + if (ret >= PATH_MAX) { + printf("%s: Pathname is too long\n", __func__); + exit(EXIT_FAILURE); + } + + ret = read_file(path, buf, sizeof(buf)); + if (ret) + orders |= 1UL << i; + } + + return orders; +} diff --git a/tools/testing/selftests/mm/thp_settings.h b/tools/testing/selftests/mm/thp_settings.h index ff3d98c30617..71cbff05f4c7 100644 --- a/tools/testing/selftests/mm/thp_settings.h +++ b/tools/testing/selftests/mm/thp_settings.h @@ -7,9 +7,10 @@ #include enum thp_enabled { + THP_NEVER, THP_ALWAYS, + THP_INHERIT, THP_MADVISE, - THP_NEVER, }; enum thp_defrag { @@ -29,6 +30,12 @@ enum shmem_enabled { SHMEM_FORCE, }; +#define NR_ORDERS 20 + +struct hugepages_settings { + enum thp_enabled enabled; +}; + struct khugepaged_settings { bool defrag; unsigned int alloc_sleep_millisecs; @@ -46,6 +53,7 @@ struct thp_settings { bool use_zero_page; struct khugepaged_settings khugepaged; unsigned long read_ahead_kb; + struct hugepages_settings hugepages[NR_ORDERS]; }; int read_file(const char *path, char *buf, size_t buflen); @@ -67,5 +75,6 @@ void thp_restore_settings(void); void thp_save_settings(void); void thp_set_read_ahead_path(char *path); +unsigned long thp_supported_orders(void); #endif /* __THP_SETTINGS_H__ */ -- cgit v1.2.3 From 9f0704eae8a4edc8dca9c8a297f798d505a4103a Mon Sep 17 00:00:00 2001 From: Ryan Roberts Date: Thu, 7 Dec 2023 16:12:09 +0000 Subject: selftests/mm/khugepaged: enlighten for multi-size THP The `collapse_max_ptes_none` test was previously failing when a THP size less than PMD-size had enabled="always". The root cause is because the test faults in 1 page less than the threshold it set for collapsing. But when THP is enabled always, we "over allocate" and therefore the threshold is passed, and collapse unexpectedly succeeds. Solve this by enlightening khugepaged selftest. Add a command line option to pass in the desired THP size that should be used for all anonymous allocations. The harness will then explicitly configure a THP size as requested and modify the `collapse_max_ptes_none` test so that it faults in the threshold minus the number of pages in the configured THP size. If no command line option is provided, default to order 0, as per previous behaviour. I chose to use an order in the command line interface, since this makes the interface agnostic of base page size, making it easier to invoke from run_vmtests.sh. Link: https://lkml.kernel.org/r/20231207161211.2374093-9-ryan.roberts@arm.com Signed-off-by: Ryan Roberts Tested-by: Kefeng Wang Tested-by: John Hubbard Cc: Alistair Popple Cc: Anshuman Khandual Cc: Barry Song Cc: Catalin Marinas Cc: David Hildenbrand Cc: David Rientjes Cc: "Huang, Ying" Cc: Hugh Dickins Cc: Itaru Kitayama Cc: Kirill A. Shutemov Cc: Luis Chamberlain Cc: Matthew Wilcox (Oracle) Cc: Vlastimil Babka Cc: Yang Shi Cc: Yin Fengwei Cc: Yu Zhao Cc: Zi Yan Signed-off-by: Andrew Morton --- tools/testing/selftests/mm/khugepaged.c | 48 ++++++++++++++++++++++++------- tools/testing/selftests/mm/run_vmtests.sh | 2 ++ 2 files changed, 39 insertions(+), 11 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/mm/khugepaged.c b/tools/testing/selftests/mm/khugepaged.c index 7bd3baa9d34b..829320a519e7 100644 --- a/tools/testing/selftests/mm/khugepaged.c +++ b/tools/testing/selftests/mm/khugepaged.c @@ -28,6 +28,7 @@ static unsigned long hpage_pmd_size; static unsigned long page_size; static int hpage_pmd_nr; +static int anon_order; #define PID_SMAPS "/proc/self/smaps" #define TEST_FILE "collapse_test_file" @@ -607,6 +608,11 @@ static bool is_tmpfs(struct mem_ops *ops) return ops == &__file_ops && finfo.type == VMA_SHMEM; } +static bool is_anon(struct mem_ops *ops) +{ + return ops == &__anon_ops; +} + static void alloc_at_fault(void) { struct thp_settings settings = *thp_current_settings(); @@ -673,6 +679,7 @@ static void collapse_max_ptes_none(struct collapse_context *c, struct mem_ops *o int max_ptes_none = hpage_pmd_nr / 2; struct thp_settings settings = *thp_current_settings(); void *p; + int fault_nr_pages = is_anon(ops) ? 1 << anon_order : 1; settings.khugepaged.max_ptes_none = max_ptes_none; thp_push_settings(&settings); @@ -686,10 +693,10 @@ static void collapse_max_ptes_none(struct collapse_context *c, struct mem_ops *o goto skip; } - ops->fault(p, 0, (hpage_pmd_nr - max_ptes_none - 1) * page_size); + ops->fault(p, 0, (hpage_pmd_nr - max_ptes_none - fault_nr_pages) * page_size); c->collapse("Maybe collapse with max_ptes_none exceeded", p, 1, ops, !c->enforce_pte_scan_limits); - validate_memory(p, 0, (hpage_pmd_nr - max_ptes_none - 1) * page_size); + validate_memory(p, 0, (hpage_pmd_nr - max_ptes_none - fault_nr_pages) * page_size); if (c->enforce_pte_scan_limits) { ops->fault(p, 0, (hpage_pmd_nr - max_ptes_none) * page_size); @@ -1076,7 +1083,7 @@ static void madvise_retracted_page_tables(struct collapse_context *c, static void usage(void) { - fprintf(stderr, "\nUsage: ./khugepaged [dir]\n\n"); + fprintf(stderr, "\nUsage: ./khugepaged [OPTIONS] [dir]\n\n"); fprintf(stderr, "\t\t: :\n"); fprintf(stderr, "\t\t: [all|khugepaged|madvise]\n"); fprintf(stderr, "\t\t: [all|anon|file|shmem]\n"); @@ -1085,15 +1092,34 @@ static void usage(void) fprintf(stderr, "\tCONFIG_READ_ONLY_THP_FOR_FS=y\n"); fprintf(stderr, "\n\tif [dir] is a (sub)directory of a tmpfs mount, tmpfs must be\n"); fprintf(stderr, "\tmounted with huge=madvise option for khugepaged tests to work\n"); + fprintf(stderr, "\n\tSupported Options:\n"); + fprintf(stderr, "\t\t-h: This help message.\n"); + fprintf(stderr, "\t\t-s: mTHP size, expressed as page order.\n"); + fprintf(stderr, "\t\t Defaults to 0. Use this size for anon allocations.\n"); exit(1); } -static void parse_test_type(int argc, const char **argv) +static void parse_test_type(int argc, char **argv) { + int opt; char *buf; const char *token; - if (argc == 1) { + while ((opt = getopt(argc, argv, "s:h")) != -1) { + switch (opt) { + case 's': + anon_order = atoi(optarg); + break; + case 'h': + default: + usage(); + } + } + + argv += optind; + argc -= optind; + + if (argc == 0) { /* Backwards compatibility */ khugepaged_context = &__khugepaged_context; madvise_context = &__madvise_context; @@ -1101,7 +1127,7 @@ static void parse_test_type(int argc, const char **argv) return; } - buf = strdup(argv[1]); + buf = strdup(argv[0]); token = strsep(&buf, ":"); if (!strcmp(token, "all")) { @@ -1135,11 +1161,13 @@ static void parse_test_type(int argc, const char **argv) if (!file_ops) return; - if (argc != 3) + if (argc != 2) usage(); + + get_finfo(argv[1]); } -int main(int argc, const char **argv) +int main(int argc, char **argv) { int hpage_pmd_order; struct thp_settings default_settings = { @@ -1164,9 +1192,6 @@ int main(int argc, const char **argv) parse_test_type(argc, argv); - if (file_ops) - get_finfo(argv[2]); - setbuf(stdout, NULL); page_size = getpagesize(); @@ -1183,6 +1208,7 @@ int main(int argc, const char **argv) default_settings.khugepaged.max_ptes_shared = hpage_pmd_nr / 2; default_settings.khugepaged.pages_to_scan = hpage_pmd_nr * 8; default_settings.hugepages[hpage_pmd_order].enabled = THP_INHERIT; + default_settings.hugepages[anon_order].enabled = THP_ALWAYS; save_settings(); thp_push_settings(&default_settings); diff --git a/tools/testing/selftests/mm/run_vmtests.sh b/tools/testing/selftests/mm/run_vmtests.sh index c0212258b852..87f513f5cf91 100755 --- a/tools/testing/selftests/mm/run_vmtests.sh +++ b/tools/testing/selftests/mm/run_vmtests.sh @@ -357,6 +357,8 @@ CATEGORY="cow" run_test ./cow CATEGORY="thp" run_test ./khugepaged +CATEGORY="thp" run_test ./khugepaged -s 2 + CATEGORY="thp" run_test ./transhuge-stress -d 20 CATEGORY="thp" run_test ./split_huge_page_test -- cgit v1.2.3 From 12dc16b38463a671bc91dc2df10f3a014a27ff3b Mon Sep 17 00:00:00 2001 From: Ryan Roberts Date: Thu, 7 Dec 2023 16:12:10 +0000 Subject: selftests/mm/cow: generalize do_run_with_thp() helper do_run_with_thp() prepares (PMD-sized) THP memory into different states before running tests. With the introduction of multi-size THP, we would like to reuse this logic to also test those smaller THP sizes. So let's add a thpsize parameter which tells the function what size THP it should operate on. A separate commit will utilize this change to add new tests for multi-size THP, where available. Link: https://lkml.kernel.org/r/20231207161211.2374093-10-ryan.roberts@arm.com Signed-off-by: Ryan Roberts Reviewed-by: David Hildenbrand Tested-by: Kefeng Wang Tested-by: John Hubbard Cc: Alistair Popple Cc: Anshuman Khandual Cc: Barry Song Cc: Catalin Marinas Cc: David Rientjes Cc: "Huang, Ying" Cc: Hugh Dickins Cc: Itaru Kitayama Cc: Kirill A. Shutemov Cc: Luis Chamberlain Cc: Matthew Wilcox (Oracle) Cc: Vlastimil Babka Cc: Yang Shi Cc: Yin Fengwei Cc: Yu Zhao Cc: Zi Yan Signed-off-by: Andrew Morton --- tools/testing/selftests/mm/cow.c | 121 ++++++++++++++++++++++----------------- 1 file changed, 67 insertions(+), 54 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/mm/cow.c b/tools/testing/selftests/mm/cow.c index 6f2f83990441..a284918b1172 100644 --- a/tools/testing/selftests/mm/cow.c +++ b/tools/testing/selftests/mm/cow.c @@ -32,7 +32,7 @@ static size_t pagesize; static int pagemap_fd; -static size_t thpsize; +static size_t pmdsize; static int nr_hugetlbsizes; static size_t hugetlbsizes[10]; static int gup_fd; @@ -734,7 +734,7 @@ enum thp_run { THP_RUN_PARTIAL_SHARED, }; -static void do_run_with_thp(test_fn fn, enum thp_run thp_run) +static void do_run_with_thp(test_fn fn, enum thp_run thp_run, size_t thpsize) { char *mem, *mmap_mem, *tmp, *mremap_mem = MAP_FAILED; size_t size, mmap_size, mremap_size; @@ -759,11 +759,11 @@ static void do_run_with_thp(test_fn fn, enum thp_run thp_run) } /* - * Try to populate a THP. Touch the first sub-page and test if we get - * another sub-page populated automatically. + * Try to populate a THP. Touch the first sub-page and test if + * we get the last sub-page populated automatically. */ mem[0] = 0; - if (!pagemap_is_populated(pagemap_fd, mem + pagesize)) { + if (!pagemap_is_populated(pagemap_fd, mem + thpsize - pagesize)) { ksft_test_result_skip("Did not get a THP populated\n"); goto munmap; } @@ -773,12 +773,14 @@ static void do_run_with_thp(test_fn fn, enum thp_run thp_run) switch (thp_run) { case THP_RUN_PMD: case THP_RUN_PMD_SWAPOUT: + assert(thpsize == pmdsize); break; case THP_RUN_PTE: case THP_RUN_PTE_SWAPOUT: /* * Trigger PTE-mapping the THP by temporarily mapping a single - * subpage R/O. + * subpage R/O. This is a noop if the THP is not pmdsize (and + * therefore already PTE-mapped). */ ret = mprotect(mem + pagesize, pagesize, PROT_READ); if (ret) { @@ -875,52 +877,60 @@ munmap: munmap(mremap_mem, mremap_size); } -static void run_with_thp(test_fn fn, const char *desc) +static void run_with_thp(test_fn fn, const char *desc, size_t size) { - ksft_print_msg("[RUN] %s ... with THP\n", desc); - do_run_with_thp(fn, THP_RUN_PMD); + ksft_print_msg("[RUN] %s ... with THP (%zu kB)\n", + desc, size / 1024); + do_run_with_thp(fn, THP_RUN_PMD, size); } -static void run_with_thp_swap(test_fn fn, const char *desc) +static void run_with_thp_swap(test_fn fn, const char *desc, size_t size) { - ksft_print_msg("[RUN] %s ... with swapped-out THP\n", desc); - do_run_with_thp(fn, THP_RUN_PMD_SWAPOUT); + ksft_print_msg("[RUN] %s ... with swapped-out THP (%zu kB)\n", + desc, size / 1024); + do_run_with_thp(fn, THP_RUN_PMD_SWAPOUT, size); } -static void run_with_pte_mapped_thp(test_fn fn, const char *desc) +static void run_with_pte_mapped_thp(test_fn fn, const char *desc, size_t size) { - ksft_print_msg("[RUN] %s ... with PTE-mapped THP\n", desc); - do_run_with_thp(fn, THP_RUN_PTE); + ksft_print_msg("[RUN] %s ... with PTE-mapped THP (%zu kB)\n", + desc, size / 1024); + do_run_with_thp(fn, THP_RUN_PTE, size); } -static void run_with_pte_mapped_thp_swap(test_fn fn, const char *desc) +static void run_with_pte_mapped_thp_swap(test_fn fn, const char *desc, size_t size) { - ksft_print_msg("[RUN] %s ... with swapped-out, PTE-mapped THP\n", desc); - do_run_with_thp(fn, THP_RUN_PTE_SWAPOUT); + ksft_print_msg("[RUN] %s ... with swapped-out, PTE-mapped THP (%zu kB)\n", + desc, size / 1024); + do_run_with_thp(fn, THP_RUN_PTE_SWAPOUT, size); } -static void run_with_single_pte_of_thp(test_fn fn, const char *desc) +static void run_with_single_pte_of_thp(test_fn fn, const char *desc, size_t size) { - ksft_print_msg("[RUN] %s ... with single PTE of THP\n", desc); - do_run_with_thp(fn, THP_RUN_SINGLE_PTE); + ksft_print_msg("[RUN] %s ... with single PTE of THP (%zu kB)\n", + desc, size / 1024); + do_run_with_thp(fn, THP_RUN_SINGLE_PTE, size); } -static void run_with_single_pte_of_thp_swap(test_fn fn, const char *desc) +static void run_with_single_pte_of_thp_swap(test_fn fn, const char *desc, size_t size) { - ksft_print_msg("[RUN] %s ... with single PTE of swapped-out THP\n", desc); - do_run_with_thp(fn, THP_RUN_SINGLE_PTE_SWAPOUT); + ksft_print_msg("[RUN] %s ... with single PTE of swapped-out THP (%zu kB)\n", + desc, size / 1024); + do_run_with_thp(fn, THP_RUN_SINGLE_PTE_SWAPOUT, size); } -static void run_with_partial_mremap_thp(test_fn fn, const char *desc) +static void run_with_partial_mremap_thp(test_fn fn, const char *desc, size_t size) { - ksft_print_msg("[RUN] %s ... with partially mremap()'ed THP\n", desc); - do_run_with_thp(fn, THP_RUN_PARTIAL_MREMAP); + ksft_print_msg("[RUN] %s ... with partially mremap()'ed THP (%zu kB)\n", + desc, size / 1024); + do_run_with_thp(fn, THP_RUN_PARTIAL_MREMAP, size); } -static void run_with_partial_shared_thp(test_fn fn, const char *desc) +static void run_with_partial_shared_thp(test_fn fn, const char *desc, size_t size) { - ksft_print_msg("[RUN] %s ... with partially shared THP\n", desc); - do_run_with_thp(fn, THP_RUN_PARTIAL_SHARED); + ksft_print_msg("[RUN] %s ... with partially shared THP (%zu kB)\n", + desc, size / 1024); + do_run_with_thp(fn, THP_RUN_PARTIAL_SHARED, size); } static void run_with_hugetlb(test_fn fn, const char *desc, size_t hugetlbsize) @@ -1091,15 +1101,15 @@ static void run_anon_test_case(struct test_case const *test_case) run_with_base_page(test_case->fn, test_case->desc); run_with_base_page_swap(test_case->fn, test_case->desc); - if (thpsize) { - run_with_thp(test_case->fn, test_case->desc); - run_with_thp_swap(test_case->fn, test_case->desc); - run_with_pte_mapped_thp(test_case->fn, test_case->desc); - run_with_pte_mapped_thp_swap(test_case->fn, test_case->desc); - run_with_single_pte_of_thp(test_case->fn, test_case->desc); - run_with_single_pte_of_thp_swap(test_case->fn, test_case->desc); - run_with_partial_mremap_thp(test_case->fn, test_case->desc); - run_with_partial_shared_thp(test_case->fn, test_case->desc); + if (pmdsize) { + run_with_thp(test_case->fn, test_case->desc, pmdsize); + run_with_thp_swap(test_case->fn, test_case->desc, pmdsize); + run_with_pte_mapped_thp(test_case->fn, test_case->desc, pmdsize); + run_with_pte_mapped_thp_swap(test_case->fn, test_case->desc, pmdsize); + run_with_single_pte_of_thp(test_case->fn, test_case->desc, pmdsize); + run_with_single_pte_of_thp_swap(test_case->fn, test_case->desc, pmdsize); + run_with_partial_mremap_thp(test_case->fn, test_case->desc, pmdsize); + run_with_partial_shared_thp(test_case->fn, test_case->desc, pmdsize); } for (i = 0; i < nr_hugetlbsizes; i++) run_with_hugetlb(test_case->fn, test_case->desc, @@ -1120,7 +1130,7 @@ static int tests_per_anon_test_case(void) { int tests = 2 + nr_hugetlbsizes; - if (thpsize) + if (pmdsize) tests += 8; return tests; } @@ -1329,7 +1339,7 @@ static void run_anon_thp_test_cases(void) { int i; - if (!thpsize) + if (!pmdsize) return; ksft_print_msg("[INFO] Anonymous THP tests\n"); @@ -1338,13 +1348,13 @@ static void run_anon_thp_test_cases(void) struct test_case const *test_case = &anon_thp_test_cases[i]; ksft_print_msg("[RUN] %s\n", test_case->desc); - do_run_with_thp(test_case->fn, THP_RUN_PMD); + do_run_with_thp(test_case->fn, THP_RUN_PMD, pmdsize); } } static int tests_per_anon_thp_test_case(void) { - return thpsize ? 1 : 0; + return pmdsize ? 1 : 0; } typedef void (*non_anon_test_fn)(char *mem, const char *smem, size_t size); @@ -1419,7 +1429,7 @@ static void run_with_huge_zeropage(non_anon_test_fn fn, const char *desc) } /* For alignment purposes, we need twice the thp size. */ - mmap_size = 2 * thpsize; + mmap_size = 2 * pmdsize; mmap_mem = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); if (mmap_mem == MAP_FAILED) { @@ -1434,11 +1444,11 @@ static void run_with_huge_zeropage(non_anon_test_fn fn, const char *desc) } /* We need a THP-aligned memory area. */ - mem = (char *)(((uintptr_t)mmap_mem + thpsize) & ~(thpsize - 1)); - smem = (char *)(((uintptr_t)mmap_smem + thpsize) & ~(thpsize - 1)); + mem = (char *)(((uintptr_t)mmap_mem + pmdsize) & ~(pmdsize - 1)); + smem = (char *)(((uintptr_t)mmap_smem + pmdsize) & ~(pmdsize - 1)); - ret = madvise(mem, thpsize, MADV_HUGEPAGE); - ret |= madvise(smem, thpsize, MADV_HUGEPAGE); + ret = madvise(mem, pmdsize, MADV_HUGEPAGE); + ret |= madvise(smem, pmdsize, MADV_HUGEPAGE); if (ret) { ksft_test_result_fail("MADV_HUGEPAGE failed\n"); goto munmap; @@ -1457,7 +1467,7 @@ static void run_with_huge_zeropage(non_anon_test_fn fn, const char *desc) goto munmap; } - fn(mem, smem, thpsize); + fn(mem, smem, pmdsize); munmap: munmap(mmap_mem, mmap_size); if (mmap_smem != MAP_FAILED) @@ -1650,7 +1660,7 @@ static void run_non_anon_test_case(struct non_anon_test_case const *test_case) run_with_zeropage(test_case->fn, test_case->desc); run_with_memfd(test_case->fn, test_case->desc); run_with_tmpfile(test_case->fn, test_case->desc); - if (thpsize) + if (pmdsize) run_with_huge_zeropage(test_case->fn, test_case->desc); for (i = 0; i < nr_hugetlbsizes; i++) run_with_memfd_hugetlb(test_case->fn, test_case->desc, @@ -1671,7 +1681,7 @@ static int tests_per_non_anon_test_case(void) { int tests = 3 + nr_hugetlbsizes; - if (thpsize) + if (pmdsize) tests += 1; return tests; } @@ -1683,10 +1693,13 @@ int main(int argc, char **argv) ksft_print_header(); pagesize = getpagesize(); - thpsize = read_pmd_pagesize(); - if (thpsize) + pmdsize = read_pmd_pagesize(); + if (pmdsize) { + ksft_print_msg("[INFO] detected PMD size: %zu KiB\n", + pmdsize / 1024); ksft_print_msg("[INFO] detected THP size: %zu KiB\n", - thpsize / 1024); + pmdsize / 1024); + } nr_hugetlbsizes = detect_hugetlb_page_sizes(hugetlbsizes, ARRAY_SIZE(hugetlbsizes)); detect_huge_zeropage(); -- cgit v1.2.3 From c0f79103322c322ea9342d52c2d81528b7b56232 Mon Sep 17 00:00:00 2001 From: Ryan Roberts Date: Thu, 7 Dec 2023 16:12:11 +0000 Subject: selftests/mm/cow: add tests for anonymous multi-size THP Add tests similar to the existing PMD-sized THP tests, but which operate on memory backed by (PTE-mapped) multi-size THP. This reuses all the existing infrastructure. If the test suite detects that multi-size THP is not supported by the kernel, the new tests are skipped. Link: https://lkml.kernel.org/r/20231207161211.2374093-11-ryan.roberts@arm.com Signed-off-by: Ryan Roberts Reviewed-by: David Hildenbrand Tested-by: Kefeng Wang Tested-by: John Hubbard Cc: Alistair Popple Cc: Anshuman Khandual Cc: Barry Song Cc: Catalin Marinas Cc: David Rientjes Cc: "Huang, Ying" Cc: Hugh Dickins Cc: Itaru Kitayama Cc: Kirill A. Shutemov Cc: Luis Chamberlain Cc: Matthew Wilcox (Oracle) Cc: Vlastimil Babka Cc: Yang Shi Cc: Yin Fengwei Cc: Yu Zhao Cc: Zi Yan Signed-off-by: Andrew Morton --- tools/testing/selftests/mm/cow.c | 82 ++++++++++++++++++++++++++++++++++------ 1 file changed, 70 insertions(+), 12 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/mm/cow.c b/tools/testing/selftests/mm/cow.c index a284918b1172..363bf5f801be 100644 --- a/tools/testing/selftests/mm/cow.c +++ b/tools/testing/selftests/mm/cow.c @@ -29,15 +29,49 @@ #include "../../../../mm/gup_test.h" #include "../kselftest.h" #include "vm_util.h" +#include "thp_settings.h" static size_t pagesize; static int pagemap_fd; static size_t pmdsize; +static int nr_thpsizes; +static size_t thpsizes[20]; static int nr_hugetlbsizes; static size_t hugetlbsizes[10]; static int gup_fd; static bool has_huge_zeropage; +static int sz2ord(size_t size) +{ + return __builtin_ctzll(size / pagesize); +} + +static int detect_thp_sizes(size_t sizes[], int max) +{ + int count = 0; + unsigned long orders; + size_t kb; + int i; + + /* thp not supported at all. */ + if (!pmdsize) + return 0; + + orders = 1UL << sz2ord(pmdsize); + orders |= thp_supported_orders(); + + for (i = 0; orders && count < max; i++) { + if (!(orders & (1UL << i))) + continue; + orders &= ~(1UL << i); + kb = (pagesize >> 10) << i; + sizes[count++] = kb * 1024; + ksft_print_msg("[INFO] detected THP size: %zu KiB\n", kb); + } + + return count; +} + static void detect_huge_zeropage(void) { int fd = open("/sys/kernel/mm/transparent_hugepage/use_zero_page", @@ -1101,15 +1135,27 @@ static void run_anon_test_case(struct test_case const *test_case) run_with_base_page(test_case->fn, test_case->desc); run_with_base_page_swap(test_case->fn, test_case->desc); - if (pmdsize) { - run_with_thp(test_case->fn, test_case->desc, pmdsize); - run_with_thp_swap(test_case->fn, test_case->desc, pmdsize); - run_with_pte_mapped_thp(test_case->fn, test_case->desc, pmdsize); - run_with_pte_mapped_thp_swap(test_case->fn, test_case->desc, pmdsize); - run_with_single_pte_of_thp(test_case->fn, test_case->desc, pmdsize); - run_with_single_pte_of_thp_swap(test_case->fn, test_case->desc, pmdsize); - run_with_partial_mremap_thp(test_case->fn, test_case->desc, pmdsize); - run_with_partial_shared_thp(test_case->fn, test_case->desc, pmdsize); + for (i = 0; i < nr_thpsizes; i++) { + size_t size = thpsizes[i]; + struct thp_settings settings = *thp_current_settings(); + + settings.hugepages[sz2ord(pmdsize)].enabled = THP_NEVER; + settings.hugepages[sz2ord(size)].enabled = THP_ALWAYS; + thp_push_settings(&settings); + + if (size == pmdsize) { + run_with_thp(test_case->fn, test_case->desc, size); + run_with_thp_swap(test_case->fn, test_case->desc, size); + } + + run_with_pte_mapped_thp(test_case->fn, test_case->desc, size); + run_with_pte_mapped_thp_swap(test_case->fn, test_case->desc, size); + run_with_single_pte_of_thp(test_case->fn, test_case->desc, size); + run_with_single_pte_of_thp_swap(test_case->fn, test_case->desc, size); + run_with_partial_mremap_thp(test_case->fn, test_case->desc, size); + run_with_partial_shared_thp(test_case->fn, test_case->desc, size); + + thp_pop_settings(); } for (i = 0; i < nr_hugetlbsizes; i++) run_with_hugetlb(test_case->fn, test_case->desc, @@ -1130,8 +1176,9 @@ static int tests_per_anon_test_case(void) { int tests = 2 + nr_hugetlbsizes; + tests += 6 * nr_thpsizes; if (pmdsize) - tests += 8; + tests += 2; return tests; } @@ -1689,16 +1736,22 @@ static int tests_per_non_anon_test_case(void) int main(int argc, char **argv) { int err; + struct thp_settings default_settings; ksft_print_header(); pagesize = getpagesize(); pmdsize = read_pmd_pagesize(); if (pmdsize) { + /* Only if THP is supported. */ + thp_read_settings(&default_settings); + default_settings.hugepages[sz2ord(pmdsize)].enabled = THP_INHERIT; + thp_save_settings(); + thp_push_settings(&default_settings); + ksft_print_msg("[INFO] detected PMD size: %zu KiB\n", pmdsize / 1024); - ksft_print_msg("[INFO] detected THP size: %zu KiB\n", - pmdsize / 1024); + nr_thpsizes = detect_thp_sizes(thpsizes, ARRAY_SIZE(thpsizes)); } nr_hugetlbsizes = detect_hugetlb_page_sizes(hugetlbsizes, ARRAY_SIZE(hugetlbsizes)); @@ -1717,6 +1770,11 @@ int main(int argc, char **argv) run_anon_thp_test_cases(); run_non_anon_test_cases(); + if (pmdsize) { + /* Only if THP is supported. */ + thp_restore_settings(); + } + err = ksft_get_fail_cnt(); if (err) ksft_exit_fail_msg("%d out of %d tests failed\n", -- cgit v1.2.3 From 03d69d49da496e31246f41a017b32b68b9d2362e Mon Sep 17 00:00:00 2001 From: Jiapeng Chong Date: Fri, 8 Dec 2023 10:04:50 +0800 Subject: maple_tree: fix warning comparing pointer to 0 Avoid pointer type value compared with 0 to make code clear. ./tools/testing/radix-tree/maple.c:34142:15-16: WARNING comparing pointer to 0. Link: https://lkml.kernel.org/r/20231208020450.7003-1-jiapeng.chong@linux.alibaba.com Reported-by: Abaci Robot Closes: https://bugzilla.openanolis.cn/show_bug.cgi?id=7696 Signed-off-by: Jiapeng Chong Cc: Liam R. Howlett Cc: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton --- tools/testing/radix-tree/maple.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/testing/radix-tree/maple.c b/tools/testing/radix-tree/maple.c index 35cc8c2a10f4..f1caf4bcf937 100644 --- a/tools/testing/radix-tree/maple.c +++ b/tools/testing/radix-tree/maple.c @@ -34139,7 +34139,7 @@ STORE, 140501948112896, 140501948116991, mt_init_flags(mt, MT_FLAGS_ALLOC_RANGE); check_erase2_testset(mt, set27, ARRAY_SIZE(set27)); rcu_barrier(); - MT_BUG_ON(mt, 0 != mtree_load(mt, 140415537422336)); + MT_BUG_ON(mt, NULL != mtree_load(mt, 140415537422336)); mt_set_non_kernel(0); mt_validate(mt); mtree_destroy(mt); @@ -34263,7 +34263,7 @@ STORE, 140501948112896, 140501948116991, mt_init_flags(mt, MT_FLAGS_ALLOC_RANGE); check_erase2_testset(mt, set37, ARRAY_SIZE(set37)); rcu_barrier(); - MT_BUG_ON(mt, 0 != mtree_load(mt, 94637033459712)); + MT_BUG_ON(mt, NULL != mtree_load(mt, 94637033459712)); mt_validate(mt); mtree_destroy(mt); @@ -34271,7 +34271,7 @@ STORE, 140501948112896, 140501948116991, mt_init_flags(mt, MT_FLAGS_ALLOC_RANGE); check_erase2_testset(mt, set38, ARRAY_SIZE(set38)); rcu_barrier(); - MT_BUG_ON(mt, 0 != mtree_load(mt, 94637033459712)); + MT_BUG_ON(mt, NULL != mtree_load(mt, 94637033459712)); mt_validate(mt); mtree_destroy(mt); -- cgit v1.2.3 From 306abb63a8cab566bf80860c5430b1fa316646b7 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Tue, 12 Dec 2023 19:48:06 +0000 Subject: selftests/damon: implement a python module for test-purpose DAMON sysfs controls Patch series "selftests/damon: add Python-written DAMON functionality tests", v2. DAMON exports most of its functionality via its sysfs interface. Hence most DAMON functionality tests could be implemented using the interface. However, because the interfaces require simple but multiple operations for many controls, writing all such tests from the scratch could be repetitive and time consuming. Implement a minimum DAMON sysfs control module, and a couple of DAMON functionality tests using the control module. The first test is for ensuring minimum accuracy of data access monitoring, and the second test is for finding if a previously found and fixed bug is introduced again. Note that the DAMON sysfs control module is only for avoiding duplicating code in tests. For convenient and general control of DAMON, users should use DAMON user-space tools that developed for the purpose, such as damo[1]. [1] https://github.com/damonitor/damo Patches Sequence ---------------- This patchset is constructed with five patches. The first three patches implement a Python-written test implementation-purpose DAMON sysfs control module. The implementation is incrementally done in the sequence of the basic data structure (first patch) first, kdamonds start command (second patch) next, and finally DAMOS tried bytes update command (third patch). Then two patches for implementing selftests using the module follows. The fourth patch implements a basic functionality test of DAMON for working set estimation accuracy. Finally, the fifth patch implements a corner case test for a previously found bug. This patch (of 5): Implement a python module for DAMON sysfs controls. The module is aimed to be useful for writing DAMON functionality tests in future. Nonetheless, this module is only representing a subset of DAMON sysfs files. Following commits will implement more DAMON sysfs controls. Link: https://lkml.kernel.org/r/20231212194810.54457-1-sj@kernel.org Link: https://lkml.kernel.org/r/20231212194810.54457-2-sj@kernel.org Signed-off-by: SeongJae Park Cc: Shuah Khan Signed-off-by: Andrew Morton --- tools/testing/selftests/damon/_damon_sysfs.py | 102 ++++++++++++++++++++++++++ 1 file changed, 102 insertions(+) create mode 100644 tools/testing/selftests/damon/_damon_sysfs.py (limited to 'tools') diff --git a/tools/testing/selftests/damon/_damon_sysfs.py b/tools/testing/selftests/damon/_damon_sysfs.py new file mode 100644 index 000000000000..78101846ab66 --- /dev/null +++ b/tools/testing/selftests/damon/_damon_sysfs.py @@ -0,0 +1,102 @@ +# SPDX-License-Identifier: GPL-2.0 + +class DamosAccessPattern: + size = None + nr_accesses = None + age = None + scheme = None + + def __init__(self, size=None, nr_accesses=None, age=None): + self.size = size + self.nr_accesses = nr_accesses + self.age = age + + if self.size == None: + self.size = [0, 2**64 - 1] + if self.nr_accesses == None: + self.nr_accesses = [0, 2**64 - 1] + if self.age == None: + self.age = [0, 2**64 - 1] + +class Damos: + action = None + access_pattern = None + # todo: Support quotas, watermarks, stats, tried_regions + idx = None + context = None + + def __init__(self, action='stat', access_pattern=DamosAccessPattern()): + self.action = action + self.access_pattern = access_pattern + self.access_pattern.scheme = self + +class DamonTarget: + pid = None + # todo: Support target regions if test is made + idx = None + context = None + + def __init__(self, pid): + self.pid = pid + +class DamonAttrs: + sample_us = None + aggr_us = None + update_us = None + min_nr_regions = None + max_nr_regions = None + context = None + + def __init__(self, sample_us=5000, aggr_us=100000, update_us=1000000, + min_nr_regions=10, max_nr_regions=1000): + self.sample_us = sample_us + self.aggr_us = aggr_us + self.update_us = update_us + self.min_nr_regions = min_nr_regions + self.max_nr_regions = max_nr_regions + +class DamonCtx: + ops = None + monitoring_attrs = None + targets = None + schemes = None + kdamond = None + idx = None + + def __init__(self, ops='paddr', monitoring_attrs=DamonAttrs(), targets=[], + schemes=[]): + self.ops = ops + self.monitoring_attrs = monitoring_attrs + self.monitoring_attrs.context = self + + self.targets = targets + for idx, target in enumerate(self.targets): + target.idx = idx + target.context = self + + self.schemes = schemes + for idx, scheme in enumerate(self.schemes): + scheme.idx = idx + scheme.context = self + +class Kdamond: + state = None + pid = None + contexts = None + idx = None # index of this kdamond between siblings + kdamonds = None # parent + + def __init__(self, contexts=[]): + self.contexts = contexts + for idx, context in enumerate(self.contexts): + context.idx = idx + context.kdamond = self + +class Kdamonds: + kdamonds = [] + + def __init__(self, kdamonds=[]): + self.kdamonds = kdamonds + for idx, kdamond in enumerate(self.kdamonds): + kdamond.idx = idx + kdamond.kdamonds = self -- cgit v1.2.3 From f5f0e5a2bef9e46f7a674b71d7f2a4c4b7e6bc5d Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Tue, 12 Dec 2023 19:48:07 +0000 Subject: selftests/damon/_damon_sysfs: implement kdamonds start function Extend the tests-writing-purpose DAMON sysfs control module to support the kdamonds start functionality. Link: https://lkml.kernel.org/r/20231212194810.54457-3-sj@kernel.org Signed-off-by: SeongJae Park Cc: Shuah Khan Signed-off-by: Andrew Morton --- tools/testing/selftests/damon/_damon_sysfs.py | 206 ++++++++++++++++++++++++++ 1 file changed, 206 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/damon/_damon_sysfs.py b/tools/testing/selftests/damon/_damon_sysfs.py index 78101846ab66..6b99f87a5f1e 100644 --- a/tools/testing/selftests/damon/_damon_sysfs.py +++ b/tools/testing/selftests/damon/_damon_sysfs.py @@ -1,5 +1,28 @@ # SPDX-License-Identifier: GPL-2.0 +import os + +sysfs_root = '/sys/kernel/mm/damon/admin' + +def write_file(path, string): + "Returns error string if failed, or None otherwise" + string = '%s' % string + try: + with open(path, 'w') as f: + f.write(string) + except Exception as e: + return '%s' % e + return None + +def read_file(path): + '''Returns the read content and error string. The read content is None if + the reading failed''' + try: + with open(path, 'r') as f: + return f.read(), None + except Exception as e: + return None, '%s' % e + class DamosAccessPattern: size = None nr_accesses = None @@ -18,6 +41,35 @@ class DamosAccessPattern: if self.age == None: self.age = [0, 2**64 - 1] + def sysfs_dir(self): + return os.path.join(self.scheme.sysfs_dir(), 'access_pattern') + + def stage(self): + err = write_file( + os.path.join(self.sysfs_dir(), 'sz', 'min'), self.size[0]) + if err != None: + return err + err = write_file( + os.path.join(self.sysfs_dir(), 'sz', 'max'), self.size[1]) + if err != None: + return err + err = write_file(os.path.join(self.sysfs_dir(), 'nr_accesses', 'min'), + self.nr_accesses[0]) + if err != None: + return err + err = write_file(os.path.join(self.sysfs_dir(), 'nr_accesses', 'max'), + self.nr_accesses[1]) + if err != None: + return err + err = write_file( + os.path.join(self.sysfs_dir(), 'age', 'min'), self.age[0]) + if err != None: + return err + err = write_file( + os.path.join(self.sysfs_dir(), 'age', 'max'), self.age[1]) + if err != None: + return err + class Damos: action = None access_pattern = None @@ -30,6 +82,39 @@ class Damos: self.access_pattern = access_pattern self.access_pattern.scheme = self + def sysfs_dir(self): + return os.path.join( + self.context.sysfs_dir(), 'schemes', '%d' % self.idx) + + def stage(self): + err = write_file(os.path.join(self.sysfs_dir(), 'action'), self.action) + if err != None: + return err + err = self.access_pattern.stage() + if err != None: + return err + + # disable quotas + err = write_file(os.path.join(self.sysfs_dir(), 'quotas', 'ms'), '0') + if err != None: + return err + err = write_file( + os.path.join(self.sysfs_dir(), 'quotas', 'bytes'), '0') + if err != None: + return err + + # disable watermarks + err = write_file( + os.path.join(self.sysfs_dir(), 'watermarks', 'metric'), 'none') + if err != None: + return err + + # disable filters + err = write_file( + os.path.join(self.sysfs_dir(), 'filters', 'nr_filters'), '0') + if err != None: + return err + class DamonTarget: pid = None # todo: Support target regions if test is made @@ -39,6 +124,18 @@ class DamonTarget: def __init__(self, pid): self.pid = pid + def sysfs_dir(self): + return os.path.join( + self.context.sysfs_dir(), 'targets', '%d' % self.idx) + + def stage(self): + err = write_file( + os.path.join(self.sysfs_dir(), 'regions', 'nr_regions'), '0') + if err != None: + return err + return write_file( + os.path.join(self.sysfs_dir(), 'pid_target'), self.pid) + class DamonAttrs: sample_us = None aggr_us = None @@ -55,6 +152,40 @@ class DamonAttrs: self.min_nr_regions = min_nr_regions self.max_nr_regions = max_nr_regions + def interval_sysfs_dir(self): + return os.path.join(self.context.sysfs_dir(), 'monitoring_attrs', + 'intervals') + + def nr_regions_range_sysfs_dir(self): + return os.path.join(self.context.sysfs_dir(), 'monitoring_attrs', + 'nr_regions') + + def stage(self): + err = write_file(os.path.join(self.interval_sysfs_dir(), 'sample_us'), + self.sample_us) + if err != None: + return err + err = write_file(os.path.join(self.interval_sysfs_dir(), 'aggr_us'), + self.aggr_us) + if err != None: + return err + err = write_file(os.path.join(self.interval_sysfs_dir(), 'update_us'), + self.update_us) + if err != None: + return err + + err = write_file( + os.path.join(self.nr_regions_range_sysfs_dir(), 'min'), + self.min_nr_regions) + if err != None: + return err + + err = write_file( + os.path.join(self.nr_regions_range_sysfs_dir(), 'max'), + self.max_nr_regions) + if err != None: + return err + class DamonCtx: ops = None monitoring_attrs = None @@ -79,6 +210,46 @@ class DamonCtx: scheme.idx = idx scheme.context = self + def sysfs_dir(self): + return os.path.join(self.kdamond.sysfs_dir(), 'contexts', + '%d' % self.idx) + + def stage(self): + err = write_file( + os.path.join(self.sysfs_dir(), 'operations'), self.ops) + if err != None: + return err + err = self.monitoring_attrs.stage() + if err != None: + return err + + nr_targets_file = os.path.join( + self.sysfs_dir(), 'targets', 'nr_targets') + content, err = read_file(nr_targets_file) + if err != None: + return err + if int(content) != len(self.targets): + err = write_file(nr_targets_file, '%d' % len(self.targets)) + if err != None: + return err + for target in self.targets: + err = target.stage() + if err != None: + return err + + nr_schemes_file = os.path.join( + self.sysfs_dir(), 'schemes', 'nr_schemes') + content, err = read_file(nr_schemes_file) + if int(content) != len(self.schemes): + err = write_file(nr_schemes_file, '%d' % len(self.schemes)) + if err != None: + return err + for scheme in self.schemes: + err = scheme.stage() + if err != None: + return err + return None + class Kdamond: state = None pid = None @@ -92,6 +263,27 @@ class Kdamond: context.idx = idx context.kdamond = self + def sysfs_dir(self): + return os.path.join(self.kdamonds.sysfs_dir(), '%d' % self.idx) + + def start(self): + nr_contexts_file = os.path.join(self.sysfs_dir(), + 'contexts', 'nr_contexts') + content, err = read_file(nr_contexts_file) + if err != None: + return err + if int(content) != len(self.contexts): + err = write_file(nr_contexts_file, '%d' % len(self.contexts)) + if err != None: + return err + + for context in self.contexts: + err = context.stage() + if err != None: + return err + err = write_file(os.path.join(self.sysfs_dir(), 'state'), 'on') + return err + class Kdamonds: kdamonds = [] @@ -100,3 +292,17 @@ class Kdamonds: for idx, kdamond in enumerate(self.kdamonds): kdamond.idx = idx kdamond.kdamonds = self + + def sysfs_dir(self): + return os.path.join(sysfs_root, 'kdamonds') + + def start(self): + err = write_file(os.path.join(self.sysfs_dir(), 'nr_kdamonds'), + '%s' % len(self.kdamonds)) + if err != None: + return err + for kdamond in self.kdamonds: + err = kdamond.start() + if err != None: + return err + return None -- cgit v1.2.3 From 3402c6ce398e33bf1733f619756dd068ca2e2aa5 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Tue, 12 Dec 2023 19:48:08 +0000 Subject: selftests/damon/_damon_sysfs: implement updat_schemes_tried_bytes command Implement update_schemes_tried_bytes command of DAMON sysfs interface in _damon_sysfs.py. It is not only making the update, but also read the updated value from the sysfs interface and store it in the Kdamond python objects so that the user of the module can easily get the value. Link: https://lkml.kernel.org/r/20231212194810.54457-4-sj@kernel.org Signed-off-by: SeongJae Park Cc: Shuah Khan Signed-off-by: Andrew Morton --- tools/testing/selftests/damon/_damon_sysfs.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/damon/_damon_sysfs.py b/tools/testing/selftests/damon/_damon_sysfs.py index 6b99f87a5f1e..e98cf4b6a4b7 100644 --- a/tools/testing/selftests/damon/_damon_sysfs.py +++ b/tools/testing/selftests/damon/_damon_sysfs.py @@ -76,6 +76,7 @@ class Damos: # todo: Support quotas, watermarks, stats, tried_regions idx = None context = None + tried_bytes = None def __init__(self, action='stat', access_pattern=DamosAccessPattern()): self.action = action @@ -284,6 +285,19 @@ class Kdamond: err = write_file(os.path.join(self.sysfs_dir(), 'state'), 'on') return err + def update_schemes_tried_bytes(self): + err = write_file(os.path.join(self.sysfs_dir(), 'state'), + 'update_schemes_tried_bytes') + if err != None: + return err + for context in self.contexts: + for scheme in context.schemes: + content, err = read_file(os.path.join(scheme.sysfs_dir(), + 'tried_regions', 'total_bytes')) + if err != None: + return err + scheme.tried_bytes = int(content) + class Kdamonds: kdamonds = [] -- cgit v1.2.3 From b5906f5f7359f561c5915dc146ced1bc2733401c Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Tue, 12 Dec 2023 19:48:09 +0000 Subject: selftests/damon: add a test for update_schemes_tried_regions sysfs command Add a selftest for verifying the accuracy of DAMON's access monitoring functionality. The test starts a program of artificial access pattern, monitor the access pattern using DAMON, and check if DAMON finds expected amount of hot data region (working set size) with only acceptable error rate. Note that the acceptable error rate is set with only naive assumptions and small number of tests. Hence failures of the test may not always mean DAMON is broken. Rather than that, those could be a signal to better understand the real accuracy level of DAMON in wider environments. Based on further finding, we could optimize DAMON or adjust the expectation of the test. Link: https://lkml.kernel.org/r/20231212194810.54457-5-sj@kernel.org Signed-off-by: SeongJae Park Cc: Shuah Khan Signed-off-by: Andrew Morton --- tools/testing/selftests/damon/Makefile | 2 + tools/testing/selftests/damon/access_memory.c | 41 ++++++++++++++++ ..._update_schemes_tried_regions_wss_estimation.py | 55 ++++++++++++++++++++++ 3 files changed, 98 insertions(+) create mode 100644 tools/testing/selftests/damon/access_memory.c create mode 100644 tools/testing/selftests/damon/sysfs_update_schemes_tried_regions_wss_estimation.py (limited to 'tools') diff --git a/tools/testing/selftests/damon/Makefile b/tools/testing/selftests/damon/Makefile index b71247ba7196..90ffafc42c5e 100644 --- a/tools/testing/selftests/damon/Makefile +++ b/tools/testing/selftests/damon/Makefile @@ -2,6 +2,7 @@ # Makefile for damon selftests TEST_GEN_FILES += huge_count_read_write +TEST_GEN_FILES += access_memory TEST_FILES = _chk_dependency.sh _debugfs_common.sh TEST_PROGS = debugfs_attrs.sh debugfs_schemes.sh debugfs_target_ids.sh @@ -9,6 +10,7 @@ TEST_PROGS += debugfs_empty_targets.sh debugfs_huge_count_read_write.sh TEST_PROGS += debugfs_duplicate_context_creation.sh TEST_PROGS += debugfs_rm_non_contexts.sh TEST_PROGS += sysfs.sh sysfs_update_removed_scheme_dir.sh +TEST_PROGS += sysfs_update_schemes_tried_regions_wss_estimation.py TEST_PROGS += reclaim.sh lru_sort.sh include ../lib.mk diff --git a/tools/testing/selftests/damon/access_memory.c b/tools/testing/selftests/damon/access_memory.c new file mode 100644 index 000000000000..585a2fa54329 --- /dev/null +++ b/tools/testing/selftests/damon/access_memory.c @@ -0,0 +1,41 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Artificial memory access program for testing DAMON. + */ + +#include +#include +#include +#include + +int main(int argc, char *argv[]) +{ + char **regions; + clock_t start_clock; + int nr_regions; + int sz_region; + int access_time_ms; + int i; + + if (argc != 4) { + printf("Usage: %s