diff options
Diffstat (limited to 'tools/testing')
466 files changed, 22295 insertions, 4984 deletions
diff --git a/tools/testing/kunit/configs/arch_uml.config b/tools/testing/kunit/configs/arch_uml.config index 54ad8972681a..28edf816aa70 100644 --- a/tools/testing/kunit/configs/arch_uml.config +++ b/tools/testing/kunit/configs/arch_uml.config @@ -1,8 +1,7 @@ # Config options which are added to UML builds by default -# Enable virtio/pci, as a lot of tests require it. -CONFIG_VIRTIO_UML=y -CONFIG_UML_PCI_OVER_VIRTIO=y +# Enable pci, as a lot of tests require it. +CONFIG_KUNIT_UML_PCI=y # Enable FORTIFY_SOURCE for wider checking. CONFIG_FORTIFY_SOURCE=y diff --git a/tools/testing/kunit/kunit.py b/tools/testing/kunit/kunit.py index 7f9ae55fd6d5..cd99c1956331 100755 --- a/tools/testing/kunit/kunit.py +++ b/tools/testing/kunit/kunit.py @@ -228,7 +228,7 @@ def parse_tests(request: KunitParseRequest, metadata: kunit_json.Metadata, input fake_test.counts.passed = 1 output: Iterable[str] = input_data - if request.raw_output == 'all': + if request.raw_output == 'all' or request.raw_output == 'full': pass elif request.raw_output == 'kunit': output = kunit_parser.extract_tap_lines(output) @@ -425,7 +425,7 @@ def add_parse_opts(parser: argparse.ArgumentParser) -> None: parser.add_argument('--raw_output', help='If set don\'t parse output from kernel. ' 'By default, filters to just KUnit output. Use ' '--raw_output=all to show everything', - type=str, nargs='?', const='all', default=None, choices=['all', 'kunit']) + type=str, nargs='?', const='all', default=None, choices=['all', 'full', 'kunit']) parser.add_argument('--json', nargs='?', help='Prints parsed test results as JSON to stdout or a file if ' diff --git a/tools/testing/kunit/kunit_parser.py b/tools/testing/kunit/kunit_parser.py index c176487356e6..333cd3a4a56b 100644 --- a/tools/testing/kunit/kunit_parser.py +++ b/tools/testing/kunit/kunit_parser.py @@ -352,9 +352,9 @@ def parse_test_plan(lines: LineStream, test: Test) -> bool: lines.pop() return True -TEST_RESULT = re.compile(r'^\s*(ok|not ok) ([0-9]+) (- )?([^#]*)( # .*)?$') +TEST_RESULT = re.compile(r'^\s*(ok|not ok) ([0-9]+) ?(- )?([^#]*)( # .*)?$') -TEST_RESULT_SKIP = re.compile(r'^\s*(ok|not ok) ([0-9]+) (- )?(.*) # SKIP(.*)$') +TEST_RESULT_SKIP = re.compile(r'^\s*(ok|not ok) ([0-9]+) ?(- )?(.*) # SKIP ?(.*)$') def peek_test_name_match(lines: LineStream, test: Test) -> bool: """ @@ -379,6 +379,8 @@ def peek_test_name_match(lines: LineStream, test: Test) -> bool: if not match: return False name = match.group(4) + if not name: + return False return name == test.name def parse_test_result(lines: LineStream, test: Test, @@ -416,7 +418,7 @@ def parse_test_result(lines: LineStream, test: Test, # Set name of test object if skip_match: - test.name = skip_match.group(4) + test.name = skip_match.group(4) or skip_match.group(5) else: test.name = match.group(4) diff --git a/tools/testing/kunit/qemu_configs/mips.py b/tools/testing/kunit/qemu_configs/mips.py new file mode 100644 index 000000000000..8899ac157b30 --- /dev/null +++ b/tools/testing/kunit/qemu_configs/mips.py @@ -0,0 +1,18 @@ +# SPDX-License-Identifier: GPL-2.0 + +from ..qemu_config import QemuArchParams + +QEMU_ARCH = QemuArchParams(linux_arch='mips', + kconfig=''' +CONFIG_32BIT=y +CONFIG_CPU_BIG_ENDIAN=y +CONFIG_MIPS_MALTA=y +CONFIG_SERIAL_8250=y +CONFIG_SERIAL_8250_CONSOLE=y +CONFIG_POWER_RESET=y +CONFIG_POWER_RESET_SYSCON=y +''', + qemu_arch='mips', + kernel_path='vmlinuz', + kernel_command_line='console=ttyS0', + extra_qemu_params=['-M', 'malta']) diff --git a/tools/testing/kunit/qemu_configs/mips64.py b/tools/testing/kunit/qemu_configs/mips64.py new file mode 100644 index 000000000000..1478aed05b94 --- /dev/null +++ b/tools/testing/kunit/qemu_configs/mips64.py @@ -0,0 +1,19 @@ +# SPDX-License-Identifier: GPL-2.0 + +from ..qemu_config import QemuArchParams + +QEMU_ARCH = QemuArchParams(linux_arch='mips', + kconfig=''' +CONFIG_CPU_MIPS64_R2=y +CONFIG_64BIT=y +CONFIG_CPU_BIG_ENDIAN=y +CONFIG_MIPS_MALTA=y +CONFIG_SERIAL_8250=y +CONFIG_SERIAL_8250_CONSOLE=y +CONFIG_POWER_RESET=y +CONFIG_POWER_RESET_SYSCON=y +''', + qemu_arch='mips64', + kernel_path='vmlinuz', + kernel_command_line='console=ttyS0', + extra_qemu_params=['-M', 'malta', '-cpu', '5KEc']) diff --git a/tools/testing/kunit/qemu_configs/mips64el.py b/tools/testing/kunit/qemu_configs/mips64el.py new file mode 100644 index 000000000000..300c711d7a82 --- /dev/null +++ b/tools/testing/kunit/qemu_configs/mips64el.py @@ -0,0 +1,19 @@ +# SPDX-License-Identifier: GPL-2.0 + +from ..qemu_config import QemuArchParams + +QEMU_ARCH = QemuArchParams(linux_arch='mips', + kconfig=''' +CONFIG_CPU_MIPS64_R2=y +CONFIG_64BIT=y +CONFIG_CPU_LITTLE_ENDIAN=y +CONFIG_MIPS_MALTA=y +CONFIG_SERIAL_8250=y +CONFIG_SERIAL_8250_CONSOLE=y +CONFIG_POWER_RESET=y +CONFIG_POWER_RESET_SYSCON=y +''', + qemu_arch='mips64el', + kernel_path='vmlinuz', + kernel_command_line='console=ttyS0', + extra_qemu_params=['-M', 'malta', '-cpu', '5KEc']) diff --git a/tools/testing/kunit/qemu_configs/mipsel.py b/tools/testing/kunit/qemu_configs/mipsel.py new file mode 100644 index 000000000000..3d3543315b45 --- /dev/null +++ b/tools/testing/kunit/qemu_configs/mipsel.py @@ -0,0 +1,18 @@ +# SPDX-License-Identifier: GPL-2.0 + +from ..qemu_config import QemuArchParams + +QEMU_ARCH = QemuArchParams(linux_arch='mips', + kconfig=''' +CONFIG_32BIT=y +CONFIG_CPU_LITTLE_ENDIAN=y +CONFIG_MIPS_MALTA=y +CONFIG_SERIAL_8250=y +CONFIG_SERIAL_8250_CONSOLE=y +CONFIG_POWER_RESET=y +CONFIG_POWER_RESET_SYSCON=y +''', + qemu_arch='mipsel', + kernel_path='vmlinuz', + kernel_command_line='console=ttyS0', + extra_qemu_params=['-M', 'malta']) diff --git a/tools/testing/kunit/test_data/test_is_test_passed-kselftest.log b/tools/testing/kunit/test_data/test_is_test_passed-kselftest.log index 65d3f27feaf2..30d9ef18bcec 100644 --- a/tools/testing/kunit/test_data/test_is_test_passed-kselftest.log +++ b/tools/testing/kunit/test_data/test_is_test_passed-kselftest.log @@ -1,5 +1,5 @@ TAP version 13 -1..2 +1..3 # selftests: membarrier: membarrier_test_single_thread # TAP version 13 # 1..2 @@ -12,3 +12,4 @@ ok 1 selftests: membarrier: membarrier_test_single_thread # ok 1 sys_membarrier available # ok 2 sys membarrier invalid command test: command = -1, flags = 0, errno = 22. Failed as expected ok 2 selftests: membarrier: membarrier_test_multi_thread +ok 3 # SKIP selftests: membarrier: membarrier_test_multi_thread diff --git a/tools/testing/radix-tree/maple.c b/tools/testing/radix-tree/maple.c index 172700fb7784..5c1b18e3ed21 100644 --- a/tools/testing/radix-tree/maple.c +++ b/tools/testing/radix-tree/maple.c @@ -8,14 +8,6 @@ * difficult to handle in kernel tests. */ -#define CONFIG_DEBUG_MAPLE_TREE -#define CONFIG_MAPLE_SEARCH -#define MAPLE_32BIT (MAPLE_NODE_SLOTS > 31) -#include "test.h" -#include <stdlib.h> -#include <time.h> -#include <linux/init.h> - #define module_init(x) #define module_exit(x) #define MODULE_AUTHOR(x) @@ -23,7 +15,9 @@ #define MODULE_LICENSE(x) #define dump_stack() assert(0) -#include "../../../lib/maple_tree.c" +#include "test.h" + +#include "../shared/maple-shim.c" #include "../../../lib/test_maple_tree.c" #define RCU_RANGE_COUNT 1000 @@ -63,430 +57,6 @@ struct rcu_reader_struct { struct rcu_test_struct2 *test; }; -static int get_alloc_node_count(struct ma_state *mas) -{ - int count = 1; - struct maple_alloc *node = mas->alloc; - - if (!node || ((unsigned long)node & 0x1)) - return 0; - while (node->node_count) { - count += node->node_count; - node = node->slot[0]; - } - return count; -} - -static void check_mas_alloc_node_count(struct ma_state *mas) -{ - mas_node_count_gfp(mas, MAPLE_ALLOC_SLOTS + 1, GFP_KERNEL); - mas_node_count_gfp(mas, MAPLE_ALLOC_SLOTS + 3, GFP_KERNEL); - MT_BUG_ON(mas->tree, get_alloc_node_count(mas) != mas->alloc->total); - mas_destroy(mas); -} - -/* - * check_new_node() - Check the creation of new nodes and error path - * verification. - */ -static noinline void __init check_new_node(struct maple_tree *mt) -{ - - struct maple_node *mn, *mn2, *mn3; - struct maple_alloc *smn; - struct maple_node *nodes[100]; - int i, j, total; - - MA_STATE(mas, mt, 0, 0); - - check_mas_alloc_node_count(&mas); - - /* Try allocating 3 nodes */ - mtree_lock(mt); - mt_set_non_kernel(0); - /* request 3 nodes to be allocated. */ - mas_node_count(&mas, 3); - /* Allocation request of 3. */ - MT_BUG_ON(mt, mas_alloc_req(&mas) != 3); - /* Allocate failed. */ - MT_BUG_ON(mt, mas.node != MA_ERROR(-ENOMEM)); - MT_BUG_ON(mt, !mas_nomem(&mas, GFP_KERNEL)); - - MT_BUG_ON(mt, mas_allocated(&mas) != 3); - mn = mas_pop_node(&mas); - MT_BUG_ON(mt, not_empty(mn)); - MT_BUG_ON(mt, mn == NULL); - MT_BUG_ON(mt, mas.alloc == NULL); - MT_BUG_ON(mt, mas.alloc->slot[0] == NULL); - mas_push_node(&mas, mn); - mas_reset(&mas); - mas_destroy(&mas); - mtree_unlock(mt); - - - /* Try allocating 1 node, then 2 more */ - mtree_lock(mt); - /* Set allocation request to 1. */ - mas_set_alloc_req(&mas, 1); - /* Check Allocation request of 1. */ - MT_BUG_ON(mt, mas_alloc_req(&mas) != 1); - mas_set_err(&mas, -ENOMEM); - /* Validate allocation request. */ - MT_BUG_ON(mt, !mas_nomem(&mas, GFP_KERNEL)); - /* Eat the requested node. */ - mn = mas_pop_node(&mas); - MT_BUG_ON(mt, not_empty(mn)); - MT_BUG_ON(mt, mn == NULL); - MT_BUG_ON(mt, mn->slot[0] != NULL); - MT_BUG_ON(mt, mn->slot[1] != NULL); - MT_BUG_ON(mt, mas_allocated(&mas) != 0); - - mn->parent = ma_parent_ptr(mn); - ma_free_rcu(mn); - mas.status = ma_start; - mas_destroy(&mas); - /* Allocate 3 nodes, will fail. */ - mas_node_count(&mas, 3); - /* Drop the lock and allocate 3 nodes. */ - mas_nomem(&mas, GFP_KERNEL); - /* Ensure 3 are allocated. */ - MT_BUG_ON(mt, mas_allocated(&mas) != 3); - /* Allocation request of 0. */ - MT_BUG_ON(mt, mas_alloc_req(&mas) != 0); - - MT_BUG_ON(mt, mas.alloc == NULL); - MT_BUG_ON(mt, mas.alloc->slot[0] == NULL); - MT_BUG_ON(mt, mas.alloc->slot[1] == NULL); - /* Ensure we counted 3. */ - MT_BUG_ON(mt, mas_allocated(&mas) != 3); - /* Free. */ - mas_reset(&mas); - mas_destroy(&mas); - - /* Set allocation request to 1. */ - mas_set_alloc_req(&mas, 1); - MT_BUG_ON(mt, mas_alloc_req(&mas) != 1); - mas_set_err(&mas, -ENOMEM); - /* Validate allocation request. */ - MT_BUG_ON(mt, !mas_nomem(&mas, GFP_KERNEL)); - MT_BUG_ON(mt, mas_allocated(&mas) != 1); - /* Check the node is only one node. */ - mn = mas_pop_node(&mas); - MT_BUG_ON(mt, not_empty(mn)); - MT_BUG_ON(mt, mas_allocated(&mas) != 0); - MT_BUG_ON(mt, mn == NULL); - MT_BUG_ON(mt, mn->slot[0] != NULL); - MT_BUG_ON(mt, mn->slot[1] != NULL); - MT_BUG_ON(mt, mas_allocated(&mas) != 0); - mas_push_node(&mas, mn); - MT_BUG_ON(mt, mas_allocated(&mas) != 1); - MT_BUG_ON(mt, mas.alloc->node_count); - - mas_set_alloc_req(&mas, 2); /* request 2 more. */ - MT_BUG_ON(mt, mas_alloc_req(&mas) != 2); - mas_set_err(&mas, -ENOMEM); - MT_BUG_ON(mt, !mas_nomem(&mas, GFP_KERNEL)); - MT_BUG_ON(mt, mas_allocated(&mas) != 3); - MT_BUG_ON(mt, mas.alloc == NULL); - MT_BUG_ON(mt, mas.alloc->slot[0] == NULL); - MT_BUG_ON(mt, mas.alloc->slot[1] == NULL); - for (i = 2; i >= 0; i--) { - mn = mas_pop_node(&mas); - MT_BUG_ON(mt, mas_allocated(&mas) != i); - MT_BUG_ON(mt, !mn); - MT_BUG_ON(mt, not_empty(mn)); - mn->parent = ma_parent_ptr(mn); - ma_free_rcu(mn); - } - - total = 64; - mas_set_alloc_req(&mas, total); /* request 2 more. */ - MT_BUG_ON(mt, mas_alloc_req(&mas) != total); - mas_set_err(&mas, -ENOMEM); - MT_BUG_ON(mt, !mas_nomem(&mas, GFP_KERNEL)); - for (i = total; i > 0; i--) { - unsigned int e = 0; /* expected node_count */ - - if (!MAPLE_32BIT) { - if (i >= 35) - e = i - 34; - else if (i >= 5) - e = i - 4; - else if (i >= 2) - e = i - 1; - } else { - if (i >= 4) - e = i - 3; - else if (i >= 1) - e = i - 1; - else - e = 0; - } - - MT_BUG_ON(mt, mas.alloc->node_count != e); - mn = mas_pop_node(&mas); - MT_BUG_ON(mt, not_empty(mn)); - MT_BUG_ON(mt, mas_allocated(&mas) != i - 1); - MT_BUG_ON(mt, !mn); - mn->parent = ma_parent_ptr(mn); - ma_free_rcu(mn); - } - - total = 100; - for (i = 1; i < total; i++) { - mas_set_alloc_req(&mas, i); - mas_set_err(&mas, -ENOMEM); - MT_BUG_ON(mt, !mas_nomem(&mas, GFP_KERNEL)); - for (j = i; j > 0; j--) { - mn = mas_pop_node(&mas); - MT_BUG_ON(mt, mas_allocated(&mas) != j - 1); - MT_BUG_ON(mt, !mn); - MT_BUG_ON(mt, not_empty(mn)); - mas_push_node(&mas, mn); - MT_BUG_ON(mt, mas_allocated(&mas) != j); - mn = mas_pop_node(&mas); - MT_BUG_ON(mt, not_empty(mn)); - MT_BUG_ON(mt, mas_allocated(&mas) != j - 1); - mn->parent = ma_parent_ptr(mn); - ma_free_rcu(mn); - } - MT_BUG_ON(mt, mas_allocated(&mas) != 0); - - mas_set_alloc_req(&mas, i); - mas_set_err(&mas, -ENOMEM); - MT_BUG_ON(mt, !mas_nomem(&mas, GFP_KERNEL)); - for (j = 0; j <= i/2; j++) { - MT_BUG_ON(mt, mas_allocated(&mas) != i - j); - nodes[j] = mas_pop_node(&mas); - MT_BUG_ON(mt, mas_allocated(&mas) != i - j - 1); - } - - while (j) { - j--; - mas_push_node(&mas, nodes[j]); - MT_BUG_ON(mt, mas_allocated(&mas) != i - j); - } - MT_BUG_ON(mt, mas_allocated(&mas) != i); - for (j = 0; j <= i/2; j++) { - MT_BUG_ON(mt, mas_allocated(&mas) != i - j); - mn = mas_pop_node(&mas); - MT_BUG_ON(mt, not_empty(mn)); - mn->parent = ma_parent_ptr(mn); - ma_free_rcu(mn); - MT_BUG_ON(mt, mas_allocated(&mas) != i - j - 1); - } - mas_reset(&mas); - MT_BUG_ON(mt, mas_nomem(&mas, GFP_KERNEL)); - mas_destroy(&mas); - - } - - /* Set allocation request. */ - total = 500; - mas_node_count(&mas, total); - /* Drop the lock and allocate the nodes. */ - mas_nomem(&mas, GFP_KERNEL); - MT_BUG_ON(mt, !mas.alloc); - i = 1; - smn = mas.alloc; - while (i < total) { - for (j = 0; j < MAPLE_ALLOC_SLOTS; j++) { - i++; - MT_BUG_ON(mt, !smn->slot[j]); - if (i == total) - break; - } - smn = smn->slot[0]; /* next. */ - } - MT_BUG_ON(mt, mas_allocated(&mas) != total); - mas_reset(&mas); - mas_destroy(&mas); /* Free. */ - - MT_BUG_ON(mt, mas_allocated(&mas) != 0); - for (i = 1; i < 128; i++) { - mas_node_count(&mas, i); /* Request */ - mas_nomem(&mas, GFP_KERNEL); /* Fill request */ - MT_BUG_ON(mt, mas_allocated(&mas) != i); /* check request filled */ - for (j = i; j > 0; j--) { /*Free the requests */ - mn = mas_pop_node(&mas); /* get the next node. */ - MT_BUG_ON(mt, mn == NULL); - MT_BUG_ON(mt, not_empty(mn)); - mn->parent = ma_parent_ptr(mn); - ma_free_rcu(mn); - } - MT_BUG_ON(mt, mas_allocated(&mas) != 0); - } - - for (i = 1; i < MAPLE_NODE_MASK + 1; i++) { - MA_STATE(mas2, mt, 0, 0); - mas_node_count(&mas, i); /* Request */ - mas_nomem(&mas, GFP_KERNEL); /* Fill request */ - MT_BUG_ON(mt, mas_allocated(&mas) != i); /* check request filled */ - for (j = 1; j <= i; j++) { /* Move the allocations to mas2 */ - mn = mas_pop_node(&mas); /* get the next node. */ - MT_BUG_ON(mt, mn == NULL); - MT_BUG_ON(mt, not_empty(mn)); - mas_push_node(&mas2, mn); - MT_BUG_ON(mt, mas_allocated(&mas2) != j); - } - MT_BUG_ON(mt, mas_allocated(&mas) != 0); - MT_BUG_ON(mt, mas_allocated(&mas2) != i); - - for (j = i; j > 0; j--) { /*Free the requests */ - MT_BUG_ON(mt, mas_allocated(&mas2) != j); - mn = mas_pop_node(&mas2); /* get the next node. */ - MT_BUG_ON(mt, mn == NULL); - MT_BUG_ON(mt, not_empty(mn)); - mn->parent = ma_parent_ptr(mn); - ma_free_rcu(mn); - } - MT_BUG_ON(mt, mas_allocated(&mas2) != 0); - } - - - MT_BUG_ON(mt, mas_allocated(&mas) != 0); - mas_node_count(&mas, MAPLE_ALLOC_SLOTS + 1); /* Request */ - MT_BUG_ON(mt, mas.node != MA_ERROR(-ENOMEM)); - MT_BUG_ON(mt, !mas_nomem(&mas, GFP_KERNEL)); - MT_BUG_ON(mt, mas_allocated(&mas) != MAPLE_ALLOC_SLOTS + 1); - MT_BUG_ON(mt, mas.alloc->node_count != MAPLE_ALLOC_SLOTS); - - mn = mas_pop_node(&mas); /* get the next node. */ - MT_BUG_ON(mt, mn == NULL); - MT_BUG_ON(mt, not_empty(mn)); - MT_BUG_ON(mt, mas_allocated(&mas) != MAPLE_ALLOC_SLOTS); - MT_BUG_ON(mt, mas.alloc->node_count != MAPLE_ALLOC_SLOTS - 1); - - mas_push_node(&mas, mn); - MT_BUG_ON(mt, mas_allocated(&mas) != MAPLE_ALLOC_SLOTS + 1); - MT_BUG_ON(mt, mas.alloc->node_count != MAPLE_ALLOC_SLOTS); - - /* Check the limit of pop/push/pop */ - mas_node_count(&mas, MAPLE_ALLOC_SLOTS + 2); /* Request */ - MT_BUG_ON(mt, mas_alloc_req(&mas) != 1); - MT_BUG_ON(mt, mas.node != MA_ERROR(-ENOMEM)); - MT_BUG_ON(mt, !mas_nomem(&mas, GFP_KERNEL)); - MT_BUG_ON(mt, mas_alloc_req(&mas)); - MT_BUG_ON(mt, mas.alloc->node_count != 1); - MT_BUG_ON(mt, mas_allocated(&mas) != MAPLE_ALLOC_SLOTS + 2); - mn = mas_pop_node(&mas); - MT_BUG_ON(mt, not_empty(mn)); - MT_BUG_ON(mt, mas_allocated(&mas) != MAPLE_ALLOC_SLOTS + 1); - MT_BUG_ON(mt, mas.alloc->node_count != MAPLE_ALLOC_SLOTS); - mas_push_node(&mas, mn); - MT_BUG_ON(mt, mas.alloc->node_count != 1); - MT_BUG_ON(mt, mas_allocated(&mas) != MAPLE_ALLOC_SLOTS + 2); - mn = mas_pop_node(&mas); - MT_BUG_ON(mt, not_empty(mn)); - mn->parent = ma_parent_ptr(mn); - ma_free_rcu(mn); - for (i = 1; i <= MAPLE_ALLOC_SLOTS + 1; i++) { - mn = mas_pop_node(&mas); - MT_BUG_ON(mt, not_empty(mn)); - mn->parent = ma_parent_ptr(mn); - ma_free_rcu(mn); - } - MT_BUG_ON(mt, mas_allocated(&mas) != 0); - - - for (i = 3; i < MAPLE_NODE_MASK * 3; i++) { - mas.node = MA_ERROR(-ENOMEM); - mas_node_count(&mas, i); /* Request */ - mas_nomem(&mas, GFP_KERNEL); /* Fill request */ - mn = mas_pop_node(&mas); /* get the next node. */ - mas_push_node(&mas, mn); /* put it back */ - mas_destroy(&mas); - - mas.node = MA_ERROR(-ENOMEM); - mas_node_count(&mas, i); /* Request */ - mas_nomem(&mas, GFP_KERNEL); /* Fill request */ - mn = mas_pop_node(&mas); /* get the next node. */ - mn2 = mas_pop_node(&mas); /* get the next node. */ - mas_push_node(&mas, mn); /* put them back */ - mas_push_node(&mas, mn2); - mas_destroy(&mas); - - mas.node = MA_ERROR(-ENOMEM); - mas_node_count(&mas, i); /* Request */ - mas_nomem(&mas, GFP_KERNEL); /* Fill request */ - mn = mas_pop_node(&mas); /* get the next node. */ - mn2 = mas_pop_node(&mas); /* get the next node. */ - mn3 = mas_pop_node(&mas); /* get the next node. */ - mas_push_node(&mas, mn); /* put them back */ - mas_push_node(&mas, mn2); - mas_push_node(&mas, mn3); - mas_destroy(&mas); - - mas.node = MA_ERROR(-ENOMEM); - mas_node_count(&mas, i); /* Request */ - mas_nomem(&mas, GFP_KERNEL); /* Fill request */ - mn = mas_pop_node(&mas); /* get the next node. */ - mn->parent = ma_parent_ptr(mn); - ma_free_rcu(mn); - mas_destroy(&mas); - - mas.node = MA_ERROR(-ENOMEM); - mas_node_count(&mas, i); /* Request */ - mas_nomem(&mas, GFP_KERNEL); /* Fill request */ - mn = mas_pop_node(&mas); /* get the next node. */ - mn->parent = ma_parent_ptr(mn); - ma_free_rcu(mn); - mn = mas_pop_node(&mas); /* get the next node. */ - mn->parent = ma_parent_ptr(mn); - ma_free_rcu(mn); - mn = mas_pop_node(&mas); /* get the next node. */ - mn->parent = ma_parent_ptr(mn); - ma_free_rcu(mn); - mas_destroy(&mas); - } - - mas.node = MA_ERROR(-ENOMEM); - mas_node_count(&mas, 5); /* Request */ - mas_nomem(&mas, GFP_KERNEL); /* Fill request */ - MT_BUG_ON(mt, mas_allocated(&mas) != 5); - mas.node = MA_ERROR(-ENOMEM); - mas_node_count(&mas, 10); /* Request */ - mas_nomem(&mas, GFP_KERNEL); /* Fill request */ - mas.status = ma_start; - MT_BUG_ON(mt, mas_allocated(&mas) != 10); - mas_destroy(&mas); - - mas.node = MA_ERROR(-ENOMEM); - mas_node_count(&mas, MAPLE_ALLOC_SLOTS - 1); /* Request */ - mas_nomem(&mas, GFP_KERNEL); /* Fill request */ - MT_BUG_ON(mt, mas_allocated(&mas) != MAPLE_ALLOC_SLOTS - 1); - mas.node = MA_ERROR(-ENOMEM); - mas_node_count(&mas, 10 + MAPLE_ALLOC_SLOTS - 1); /* Request */ - mas_nomem(&mas, GFP_KERNEL); /* Fill request */ - mas.status = ma_start; - MT_BUG_ON(mt, mas_allocated(&mas) != 10 + MAPLE_ALLOC_SLOTS - 1); - mas_destroy(&mas); - - mas.node = MA_ERROR(-ENOMEM); - mas_node_count(&mas, MAPLE_ALLOC_SLOTS + 1); /* Request */ - mas_nomem(&mas, GFP_KERNEL); /* Fill request */ - MT_BUG_ON(mt, mas_allocated(&mas) != MAPLE_ALLOC_SLOTS + 1); - mas.node = MA_ERROR(-ENOMEM); - mas_node_count(&mas, MAPLE_ALLOC_SLOTS * 2 + 2); /* Request */ - mas_nomem(&mas, GFP_KERNEL); /* Fill request */ - mas.status = ma_start; - MT_BUG_ON(mt, mas_allocated(&mas) != MAPLE_ALLOC_SLOTS * 2 + 2); - mas_destroy(&mas); - - mas.node = MA_ERROR(-ENOMEM); - mas_node_count(&mas, MAPLE_ALLOC_SLOTS * 2 + 1); /* Request */ - mas_nomem(&mas, GFP_KERNEL); /* Fill request */ - MT_BUG_ON(mt, mas_allocated(&mas) != MAPLE_ALLOC_SLOTS * 2 + 1); - mas.node = MA_ERROR(-ENOMEM); - mas_node_count(&mas, MAPLE_ALLOC_SLOTS * 3 + 2); /* Request */ - mas_nomem(&mas, GFP_KERNEL); /* Fill request */ - mas.status = ma_start; - MT_BUG_ON(mt, mas_allocated(&mas) != MAPLE_ALLOC_SLOTS * 3 + 2); - mas_destroy(&mas); - - mtree_unlock(mt); -} - /* * Check erasing including RCU. */ @@ -35455,17 +35025,6 @@ static void check_dfs_preorder(struct maple_tree *mt) MT_BUG_ON(mt, count != e); mtree_destroy(mt); - mt_init_flags(mt, MT_FLAGS_ALLOC_RANGE); - mas_reset(&mas); - mt_zero_nr_tallocated(); - mt_set_non_kernel(200); - mas_expected_entries(&mas, max); - for (count = 0; count <= max; count++) { - mas.index = mas.last = count; - mas_store(&mas, xa_mk_value(count)); - MT_BUG_ON(mt, mas_is_err(&mas)); - } - mas_destroy(&mas); rcu_barrier(); /* * pr_info(" ->seq test of 0-%lu %luK in %d active (%d total)\n", @@ -35524,6 +35083,18 @@ static unsigned char get_vacant_height(struct ma_wr_state *wr_mas, void *entry) return vacant_height; } +static int mas_allocated(struct ma_state *mas) +{ + int total = 0; + + if (mas->alloc) + total++; + + if (mas->sheaf) + total += kmem_cache_sheaf_size(mas->sheaf); + + return total; +} /* Preallocation testing */ static noinline void __init check_prealloc(struct maple_tree *mt) { @@ -35542,7 +35113,10 @@ static noinline void __init check_prealloc(struct maple_tree *mt) /* Spanning store */ mas_set_range(&mas, 470, 500); - MT_BUG_ON(mt, mas_preallocate(&mas, ptr, GFP_KERNEL) != 0); + + mas_wr_preallocate(&wr_mas, ptr); + MT_BUG_ON(mt, mas.store_type != wr_spanning_store); + MT_BUG_ON(mt, mas_is_err(&mas)); allocated = mas_allocated(&mas); height = mas_mt_height(&mas); vacant_height = get_vacant_height(&wr_mas, ptr); @@ -35552,6 +35126,7 @@ static noinline void __init check_prealloc(struct maple_tree *mt) allocated = mas_allocated(&mas); MT_BUG_ON(mt, allocated != 0); + mas_wr_preallocate(&wr_mas, ptr); MT_BUG_ON(mt, mas_preallocate(&mas, ptr, GFP_KERNEL) != 0); allocated = mas_allocated(&mas); height = mas_mt_height(&mas); @@ -35597,20 +35172,6 @@ static noinline void __init check_prealloc(struct maple_tree *mt) height = mas_mt_height(&mas); vacant_height = get_vacant_height(&wr_mas, ptr); MT_BUG_ON(mt, allocated != 1 + (height - vacant_height) * 3); - mn = mas_pop_node(&mas); - MT_BUG_ON(mt, mas_allocated(&mas) != allocated - 1); - mas_push_node(&mas, mn); - MT_BUG_ON(mt, mas_allocated(&mas) != allocated); - MT_BUG_ON(mt, mas_preallocate(&mas, ptr, GFP_KERNEL) != 0); - mas_destroy(&mas); - allocated = mas_allocated(&mas); - MT_BUG_ON(mt, allocated != 0); - - MT_BUG_ON(mt, mas_preallocate(&mas, ptr, GFP_KERNEL) != 0); - allocated = mas_allocated(&mas); - height = mas_mt_height(&mas); - vacant_height = get_vacant_height(&wr_mas, ptr); - MT_BUG_ON(mt, allocated != 1 + (height - vacant_height) * 3); mas_store_prealloc(&mas, ptr); MT_BUG_ON(mt, mas_allocated(&mas) != 0); @@ -36327,13 +35888,18 @@ extern void test_kmem_cache_bulk(void); static inline void check_spanning_store_height(struct maple_tree *mt) { int index = 0; + int last = 140; MA_STATE(mas, mt, 0, 0); mas_lock(&mas); while (mt_height(mt) != 3) { mas_store_gfp(&mas, xa_mk_value(index), GFP_KERNEL); mas_set(&mas, ++index); } - mas_set_range(&mas, 90, 140); + + if (MAPLE_32BIT) + last = 155; /* 32 bit higher branching factor. */ + + mas_set_range(&mas, 90, last); mas_store_gfp(&mas, xa_mk_value(index), GFP_KERNEL); MT_BUG_ON(mt, mas_mt_height(&mas) != 2); mas_unlock(&mas); @@ -36406,11 +35972,17 @@ static void check_nomem_writer_race(struct maple_tree *mt) check_load(mt, 6, xa_mk_value(0xC)); mtree_unlock(mt); + mt_set_non_kernel(0); /* test for the same race but with mas_store_gfp() */ mtree_store_range(mt, 0, 5, xa_mk_value(0xA), GFP_KERNEL); mtree_store_range(mt, 6, 10, NULL, GFP_KERNEL); mas_set_range(&mas, 0, 5); + + /* setup writer 2 that will trigger the race condition */ + mt_set_private(mt); + mt_set_callback(writer2); + mtree_lock(mt); mas_store_gfp(&mas, NULL, GFP_KERNEL); @@ -36428,6 +36000,7 @@ static void check_nomem_writer_race(struct maple_tree *mt) */ static inline int check_vma_modification(struct maple_tree *mt) { +#if defined(CONFIG_64BIT) MA_STATE(mas, mt, 0, 0); mtree_lock(mt); @@ -36451,30 +36024,11 @@ static inline int check_vma_modification(struct maple_tree *mt) mas_destroy(&mas); mtree_unlock(mt); +#endif + return 0; } -/* - * test to check that bulk stores do not use wr_rebalance as the store - * type. - */ -static inline void check_bulk_rebalance(struct maple_tree *mt) -{ - MA_STATE(mas, mt, ULONG_MAX, ULONG_MAX); - int max = 10; - - build_full_tree(mt, 0, 2); - - /* erase every entry in the tree */ - do { - /* set up bulk store mode */ - mas_expected_entries(&mas, max); - mas_erase(&mas); - MT_BUG_ON(mt, mas.store_type == wr_rebalance); - } while (mas_prev(&mas, 0) != NULL); - - mas_destroy(&mas); -} void farmer_tests(void) { @@ -36487,10 +36041,6 @@ void farmer_tests(void) check_vma_modification(&tree); mtree_destroy(&tree); - mt_init(&tree); - check_bulk_rebalance(&tree); - mtree_destroy(&tree); - tree.ma_root = xa_mk_value(0); mt_dump(&tree, mt_dump_dec); @@ -36550,10 +36100,6 @@ void farmer_tests(void) check_erase_testset(&tree); mtree_destroy(&tree); - mt_init_flags(&tree, 0); - check_new_node(&tree); - mtree_destroy(&tree); - if (!MAPLE_32BIT) { mt_init_flags(&tree, MT_FLAGS_ALLOC_RANGE); check_rcu_simulated(&tree); diff --git a/tools/testing/scatterlist/linux/mm.h b/tools/testing/scatterlist/linux/mm.h index 5bd9e6e80625..121ae78d6e88 100644 --- a/tools/testing/scatterlist/linux/mm.h +++ b/tools/testing/scatterlist/linux/mm.h @@ -51,7 +51,6 @@ static inline unsigned long page_to_phys(struct page *page) #define page_to_pfn(page) ((unsigned long)(page) / PAGE_SIZE) #define pfn_to_page(pfn) (void *)((pfn) * PAGE_SIZE) -#define nth_page(page,n) pfn_to_page(page_to_pfn((page)) + (n)) #define __min(t1, t2, min1, min2, x, y) ({ \ t1 min1 = (x); \ diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile index 030da61dbff3..a2d8e1093b00 100644 --- a/tools/testing/selftests/Makefile +++ b/tools/testing/selftests/Makefile @@ -314,7 +314,7 @@ gen_tar: install @echo "Created ${TAR_PATH}" clean: - @for TARGET in $(TARGETS); do \ + @for TARGET in $(TARGETS) $(INSTALL_DEP_TARGETS); do \ BUILD_TARGET=$$BUILD/$$TARGET; \ $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET clean;\ done; diff --git a/tools/testing/selftests/alsa/mixer-test.c b/tools/testing/selftests/alsa/mixer-test.c index 2a4b2662035e..e113dafa5c24 100644 --- a/tools/testing/selftests/alsa/mixer-test.c +++ b/tools/testing/selftests/alsa/mixer-test.c @@ -53,10 +53,10 @@ struct ctl_data { struct ctl_data *next; }; -int num_cards = 0; -int num_controls = 0; -struct card_data *card_list = NULL; -struct ctl_data *ctl_list = NULL; +int num_cards; +int num_controls; +struct card_data *card_list; +struct ctl_data *ctl_list; static void find_controls(void) { diff --git a/tools/testing/selftests/alsa/pcm-test.c b/tools/testing/selftests/alsa/pcm-test.c index dbd7c222ce93..ce92548670c8 100644 --- a/tools/testing/selftests/alsa/pcm-test.c +++ b/tools/testing/selftests/alsa/pcm-test.c @@ -30,7 +30,7 @@ struct card_data { struct card_data *next; }; -struct card_data *card_list = NULL; +struct card_data *card_list; struct pcm_data { snd_pcm_t *handle; @@ -43,10 +43,10 @@ struct pcm_data { struct pcm_data *next; }; -struct pcm_data *pcm_list = NULL; +struct pcm_data *pcm_list; -int num_missing = 0; -struct pcm_data *pcm_missing = NULL; +int num_missing; +struct pcm_data *pcm_missing; snd_config_t *default_pcm_config; diff --git a/tools/testing/selftests/arm64/abi/hwcap.c b/tools/testing/selftests/arm64/abi/hwcap.c index 002ec38a8bbb..3b96d090c5eb 100644 --- a/tools/testing/selftests/arm64/abi/hwcap.c +++ b/tools/testing/selftests/arm64/abi/hwcap.c @@ -17,6 +17,8 @@ #include <asm/sigcontext.h> #include <asm/unistd.h> +#include <linux/auxvec.h> + #include "../../kselftest.h" #define TESTS_PER_HWCAP 3 @@ -55,7 +57,6 @@ static void cmpbr_sigill(void) /* Not implemented, too complicated and unreliable anyway */ } - static void crc32_sigill(void) { /* CRC32W W0, W0, W1 */ @@ -169,6 +170,18 @@ static void lse128_sigill(void) : "cc", "memory"); } +static void lsfe_sigill(void) +{ + float __attribute__ ((aligned (16))) mem; + register float *memp asm ("x0") = &mem; + + /* STFADD H0, [X0] */ + asm volatile(".inst 0x7c20801f" + : "+r" (memp) + : + : "memory"); +} + static void lut_sigill(void) { /* LUTI2 V0.16B, { V0.16B }, V[0] */ @@ -763,6 +776,13 @@ static const struct hwcap_data { .sigill_fn = lse128_sigill, }, { + .name = "LSFE", + .at_hwcap = AT_HWCAP3, + .hwcap_bit = HWCAP3_LSFE, + .cpuinfo = "lsfe", + .sigill_fn = lsfe_sigill, + }, + { .name = "LUT", .at_hwcap = AT_HWCAP2, .hwcap_bit = HWCAP2_LUT, diff --git a/tools/testing/selftests/arm64/abi/tpidr2.c b/tools/testing/selftests/arm64/abi/tpidr2.c index f58a9f89b952..1703543fb7c7 100644 --- a/tools/testing/selftests/arm64/abi/tpidr2.c +++ b/tools/testing/selftests/arm64/abi/tpidr2.c @@ -182,16 +182,16 @@ static int write_clone_read(void) } for (;;) { - waiting = wait4(ret, &status, __WCLONE, NULL); + waiting = waitpid(ret, &status, __WCLONE); if (waiting < 0) { if (errno == EINTR) continue; - ksft_print_msg("wait4() failed: %d\n", errno); + ksft_print_msg("waitpid() failed: %d\n", errno); return 0; } if (waiting != ret) { - ksft_print_msg("wait4() returned wrong PID %d\n", + ksft_print_msg("waitpid() returned wrong PID %d\n", waiting); return 0; } @@ -227,10 +227,10 @@ int main(int argc, char **argv) ret = open("/proc/sys/abi/sme_default_vector_length", O_RDONLY, 0); if (ret >= 0) { ksft_test_result(default_value(), "default_value\n"); - ksft_test_result(write_read, "write_read\n"); - ksft_test_result(write_sleep_read, "write_sleep_read\n"); - ksft_test_result(write_fork_read, "write_fork_read\n"); - ksft_test_result(write_clone_read, "write_clone_read\n"); + ksft_test_result(write_read(), "write_read\n"); + ksft_test_result(write_sleep_read(), "write_sleep_read\n"); + ksft_test_result(write_fork_read(), "write_fork_read\n"); + ksft_test_result(write_clone_read(), "write_clone_read\n"); } else { ksft_print_msg("SME support not present\n"); diff --git a/tools/testing/selftests/arm64/bti/assembler.h b/tools/testing/selftests/arm64/bti/assembler.h index 04e7b72880ef..141cdcbf0b8f 100644 --- a/tools/testing/selftests/arm64/bti/assembler.h +++ b/tools/testing/selftests/arm64/bti/assembler.h @@ -14,7 +14,6 @@ #define GNU_PROPERTY_AARCH64_FEATURE_1_BTI (1U << 0) #define GNU_PROPERTY_AARCH64_FEATURE_1_PAC (1U << 1) - .macro startfn name:req .globl \name \name: diff --git a/tools/testing/selftests/arm64/fp/fp-ptrace.c b/tools/testing/selftests/arm64/fp/fp-ptrace.c index cdd7a45c045d..a85c19e9524e 100644 --- a/tools/testing/selftests/arm64/fp/fp-ptrace.c +++ b/tools/testing/selftests/arm64/fp/fp-ptrace.c @@ -1568,7 +1568,6 @@ static void run_sve_tests(void) &test_config); } } - } static void run_sme_tests(void) diff --git a/tools/testing/selftests/arm64/fp/fp-stress.c b/tools/testing/selftests/arm64/fp/fp-stress.c index 74e23208b94c..9349aa630c84 100644 --- a/tools/testing/selftests/arm64/fp/fp-stress.c +++ b/tools/testing/selftests/arm64/fp/fp-stress.c @@ -105,8 +105,8 @@ static void child_start(struct child_data *child, const char *program) /* * Read from the startup pipe, there should be no data - * and we should block until it is closed. We just - * carry on on error since this isn't super critical. + * and we should block until it is closed. We just + * carry-on on error since this isn't super critical. */ ret = read(3, &i, sizeof(i)); if (ret < 0) @@ -549,7 +549,7 @@ int main(int argc, char **argv) evs = calloc(tests, sizeof(*evs)); if (!evs) - ksft_exit_fail_msg("Failed to allocated %d epoll events\n", + ksft_exit_fail_msg("Failed to allocate %d epoll events\n", tests); for (i = 0; i < cpus; i++) { diff --git a/tools/testing/selftests/arm64/fp/kernel-test.c b/tools/testing/selftests/arm64/fp/kernel-test.c index e3cec3723ffa..0c40007d1282 100644 --- a/tools/testing/selftests/arm64/fp/kernel-test.c +++ b/tools/testing/selftests/arm64/fp/kernel-test.c @@ -188,13 +188,13 @@ static bool create_socket(void) ref = malloc(digest_len); if (!ref) { - printf("Failed to allocated %d byte reference\n", digest_len); + printf("Failed to allocate %d byte reference\n", digest_len); return false; } digest = malloc(digest_len); if (!digest) { - printf("Failed to allocated %d byte digest\n", digest_len); + printf("Failed to allocate %d byte digest\n", digest_len); return false; } diff --git a/tools/testing/selftests/arm64/fp/sve-ptrace.c b/tools/testing/selftests/arm64/fp/sve-ptrace.c index b22303778fb0..e0fc3a001e28 100644 --- a/tools/testing/selftests/arm64/fp/sve-ptrace.c +++ b/tools/testing/selftests/arm64/fp/sve-ptrace.c @@ -66,7 +66,7 @@ static const struct vec_type vec_types[] = { }; #define VL_TESTS (((TEST_VQ_MAX - SVE_VQ_MIN) + 1) * 4) -#define FLAG_TESTS 2 +#define FLAG_TESTS 4 #define FPSIMD_TESTS 2 #define EXPECTED_TESTS ((VL_TESTS + FLAG_TESTS + FPSIMD_TESTS) * ARRAY_SIZE(vec_types)) @@ -95,19 +95,27 @@ static int do_child(void) static int get_fpsimd(pid_t pid, struct user_fpsimd_state *fpsimd) { struct iovec iov; + int ret; iov.iov_base = fpsimd; iov.iov_len = sizeof(*fpsimd); - return ptrace(PTRACE_GETREGSET, pid, NT_PRFPREG, &iov); + ret = ptrace(PTRACE_GETREGSET, pid, NT_PRFPREG, &iov); + if (ret == -1) + ksft_perror("ptrace(PTRACE_GETREGSET)"); + return ret; } static int set_fpsimd(pid_t pid, struct user_fpsimd_state *fpsimd) { struct iovec iov; + int ret; iov.iov_base = fpsimd; iov.iov_len = sizeof(*fpsimd); - return ptrace(PTRACE_SETREGSET, pid, NT_PRFPREG, &iov); + ret = ptrace(PTRACE_SETREGSET, pid, NT_PRFPREG, &iov); + if (ret == -1) + ksft_perror("ptrace(PTRACE_SETREGSET)"); + return ret; } static struct user_sve_header *get_sve(pid_t pid, const struct vec_type *type, @@ -115,8 +123,9 @@ static struct user_sve_header *get_sve(pid_t pid, const struct vec_type *type, { struct user_sve_header *sve; void *p; - size_t sz = sizeof *sve; + size_t sz = sizeof(*sve); struct iovec iov; + int ret; while (1) { if (*size < sz) { @@ -132,8 +141,11 @@ static struct user_sve_header *get_sve(pid_t pid, const struct vec_type *type, iov.iov_base = *buf; iov.iov_len = sz; - if (ptrace(PTRACE_GETREGSET, pid, type->regset, &iov)) + ret = ptrace(PTRACE_GETREGSET, pid, type->regset, &iov); + if (ret) { + ksft_perror("ptrace(PTRACE_GETREGSET)"); goto error; + } sve = *buf; if (sve->size <= sz) @@ -152,10 +164,46 @@ static int set_sve(pid_t pid, const struct vec_type *type, const struct user_sve_header *sve) { struct iovec iov; + int ret; iov.iov_base = (void *)sve; iov.iov_len = sve->size; - return ptrace(PTRACE_SETREGSET, pid, type->regset, &iov); + ret = ptrace(PTRACE_SETREGSET, pid, type->regset, &iov); + if (ret == -1) + ksft_perror("ptrace(PTRACE_SETREGSET)"); + return ret; +} + +/* A read operation fails */ +static void read_fails(pid_t child, const struct vec_type *type) +{ + struct user_sve_header *new_sve = NULL; + size_t new_sve_size = 0; + void *ret; + + ret = get_sve(child, type, (void **)&new_sve, &new_sve_size); + + ksft_test_result(ret == NULL, "%s unsupported read fails\n", + type->name); + + free(new_sve); +} + +/* A write operation fails */ +static void write_fails(pid_t child, const struct vec_type *type) +{ + struct user_sve_header sve; + int ret; + + /* Just the header, no data */ + memset(&sve, 0, sizeof(sve)); + sve.size = sizeof(sve); + sve.flags = SVE_PT_REGS_SVE; + sve.vl = SVE_VL_MIN; + ret = set_sve(child, type, &sve); + + ksft_test_result(ret != 0, "%s unsupported write fails\n", + type->name); } /* Validate setting and getting the inherit flag */ @@ -270,6 +318,25 @@ static void check_u32(unsigned int vl, const char *reg, } } +/* Set out of range VLs */ +static void ptrace_set_vl_ranges(pid_t child, const struct vec_type *type) +{ + struct user_sve_header sve; + int ret; + + memset(&sve, 0, sizeof(sve)); + sve.flags = SVE_PT_REGS_SVE; + sve.size = sizeof(sve); + + ret = set_sve(child, type, &sve); + ksft_test_result(ret != 0, "%s Set invalid VL 0\n", type->name); + + sve.vl = SVE_VL_MAX + SVE_VQ_BYTES; + ret = set_sve(child, type, &sve); + ksft_test_result(ret != 0, "%s Set invalid VL %d\n", type->name, + SVE_VL_MAX + SVE_VQ_BYTES); +} + /* Access the FPSIMD registers via the SVE regset */ static void ptrace_sve_fpsimd(pid_t child, const struct vec_type *type) { @@ -683,6 +750,20 @@ static int do_parent(pid_t child) } for (i = 0; i < ARRAY_SIZE(vec_types); i++) { + /* + * If the vector type isn't supported reads and writes + * should fail. + */ + if (!(getauxval(vec_types[i].hwcap_type) & vec_types[i].hwcap)) { + read_fails(child, &vec_types[i]); + write_fails(child, &vec_types[i]); + } else { + ksft_test_result_skip("%s unsupported read fails\n", + vec_types[i].name); + ksft_test_result_skip("%s unsupported write fails\n", + vec_types[i].name); + } + /* FPSIMD via SVE regset */ if (getauxval(vec_types[i].hwcap_type) & vec_types[i].hwcap) { ptrace_sve_fpsimd(child, &vec_types[i]); @@ -703,6 +784,17 @@ static int do_parent(pid_t child) vec_types[i].name); } + /* Setting out of bounds VLs should fail */ + if (getauxval(vec_types[i].hwcap_type) & vec_types[i].hwcap) { + ptrace_set_vl_ranges(child, &vec_types[i]); + } else { + ksft_test_result_skip("%s Set invalid VL 0\n", + vec_types[i].name); + ksft_test_result_skip("%s Set invalid VL %d\n", + vec_types[i].name, + SVE_VL_MAX + SVE_VQ_BYTES); + } + /* Step through every possible VQ */ for (vq = SVE_VQ_MIN; vq <= TEST_VQ_MAX; vq++) { vl = sve_vl_from_vq(vq); diff --git a/tools/testing/selftests/arm64/fp/vec-syscfg.c b/tools/testing/selftests/arm64/fp/vec-syscfg.c index ea9c7d47790f..2d75d342eeb9 100644 --- a/tools/testing/selftests/arm64/fp/vec-syscfg.c +++ b/tools/testing/selftests/arm64/fp/vec-syscfg.c @@ -690,7 +690,6 @@ static inline void smstop(void) asm volatile("msr S0_3_C4_C6_3, xzr"); } - /* * Verify we can change the SVE vector length while SME is active and * continue to use SME afterwards. diff --git a/tools/testing/selftests/arm64/fp/zt-ptrace.c b/tools/testing/selftests/arm64/fp/zt-ptrace.c index 584b8d59b7ea..a7f34040fbf1 100644 --- a/tools/testing/selftests/arm64/fp/zt-ptrace.c +++ b/tools/testing/selftests/arm64/fp/zt-ptrace.c @@ -108,7 +108,6 @@ static int get_zt(pid_t pid, char zt[ZT_SIG_REG_BYTES]) return ptrace(PTRACE_GETREGSET, pid, NT_ARM_ZT, &iov); } - static int set_zt(pid_t pid, const char zt[ZT_SIG_REG_BYTES]) { struct iovec iov; diff --git a/tools/testing/selftests/arm64/gcs/Makefile b/tools/testing/selftests/arm64/gcs/Makefile index d2f3497a9103..1fbbf0ca1f02 100644 --- a/tools/testing/selftests/arm64/gcs/Makefile +++ b/tools/testing/selftests/arm64/gcs/Makefile @@ -14,11 +14,11 @@ LDLIBS+=-lpthread include ../../lib.mk $(OUTPUT)/basic-gcs: basic-gcs.c - $(CC) -g -fno-asynchronous-unwind-tables -fno-ident -s -Os -nostdlib \ - -static -include ../../../../include/nolibc/nolibc.h \ + $(CC) $(CFLAGS) -fno-asynchronous-unwind-tables -fno-ident -s -nostdlib -nostdinc \ + -static -I../../../../include/nolibc -include ../../../../include/nolibc/nolibc.h \ -I../../../../../usr/include \ -std=gnu99 -I../.. -g \ - -ffreestanding -Wall $^ -o $@ -lgcc + -ffreestanding $^ -o $@ -lgcc $(OUTPUT)/gcs-stress-thread: gcs-stress-thread.S $(CC) -nostdlib $^ -o $@ diff --git a/tools/testing/selftests/arm64/gcs/basic-gcs.c b/tools/testing/selftests/arm64/gcs/basic-gcs.c index 54f9c888249d..250977abc398 100644 --- a/tools/testing/selftests/arm64/gcs/basic-gcs.c +++ b/tools/testing/selftests/arm64/gcs/basic-gcs.c @@ -10,6 +10,7 @@ #include <sys/mman.h> #include <asm/mman.h> +#include <asm/hwcap.h> #include <linux/sched.h> #include "kselftest.h" @@ -386,14 +387,13 @@ int main(void) ksft_print_header(); - /* - * We don't have getauxval() with nolibc so treat a failure to - * read GCS state as a lack of support and skip. - */ + if (!(getauxval(AT_HWCAP) & HWCAP_GCS)) + ksft_exit_skip("SKIP GCS not supported\n"); + ret = my_syscall5(__NR_prctl, PR_GET_SHADOW_STACK_STATUS, &gcs_mode, 0, 0, 0); if (ret != 0) - ksft_exit_skip("Failed to read GCS state: %d\n", ret); + ksft_exit_fail_msg("Failed to read GCS state: %d\n", ret); if (!(gcs_mode & PR_SHADOW_STACK_ENABLE)) { gcs_mode = PR_SHADOW_STACK_ENABLE; @@ -410,7 +410,7 @@ int main(void) } /* One last test: disable GCS, we can do this one time */ - my_syscall5(__NR_prctl, PR_SET_SHADOW_STACK_STATUS, 0, 0, 0, 0); + ret = my_syscall5(__NR_prctl, PR_SET_SHADOW_STACK_STATUS, 0, 0, 0, 0); if (ret != 0) ksft_print_msg("Failed to disable GCS: %d\n", ret); diff --git a/tools/testing/selftests/arm64/gcs/gcs-locking.c b/tools/testing/selftests/arm64/gcs/gcs-locking.c index 989f75a491b7..1e6abb136ffd 100644 --- a/tools/testing/selftests/arm64/gcs/gcs-locking.c +++ b/tools/testing/selftests/arm64/gcs/gcs-locking.c @@ -165,7 +165,6 @@ TEST_F(valid_modes, lock_enable_disable_others) ASSERT_EQ(ret, 0); ASSERT_EQ(mode, PR_SHADOW_STACK_ALL_MODES); - ret = my_syscall2(__NR_prctl, PR_SET_SHADOW_STACK_STATUS, variant->mode); ASSERT_EQ(ret, 0); diff --git a/tools/testing/selftests/arm64/gcs/gcs-stress.c b/tools/testing/selftests/arm64/gcs/gcs-stress.c index bbc7f4950c13..cf316d78ea97 100644 --- a/tools/testing/selftests/arm64/gcs/gcs-stress.c +++ b/tools/testing/selftests/arm64/gcs/gcs-stress.c @@ -433,7 +433,7 @@ int main(int argc, char **argv) evs = calloc(tests, sizeof(*evs)); if (!evs) - ksft_exit_fail_msg("Failed to allocated %d epoll events\n", + ksft_exit_fail_msg("Failed to allocate %d epoll events\n", tests); for (i = 0; i < gcs_threads; i++) diff --git a/tools/testing/selftests/arm64/pauth/exec_target.c b/tools/testing/selftests/arm64/pauth/exec_target.c index 4435600ca400..e597861b26d6 100644 --- a/tools/testing/selftests/arm64/pauth/exec_target.c +++ b/tools/testing/selftests/arm64/pauth/exec_target.c @@ -13,7 +13,12 @@ int main(void) unsigned long hwcaps; size_t val; - fread(&val, sizeof(size_t), 1, stdin); + size_t size = fread(&val, sizeof(size_t), 1, stdin); + + if (size != 1) { + fprintf(stderr, "Could not read input from stdin\n"); + return EXIT_FAILURE; + } /* don't try to execute illegal (unimplemented) instructions) caller * should have checked this and keep worker simple diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore index 3d8378972d26..be1ee7ba7ce0 100644 --- a/tools/testing/selftests/bpf/.gitignore +++ b/tools/testing/selftests/bpf/.gitignore @@ -44,3 +44,4 @@ xdp_redirect_multi xdp_synproxy xdp_hw_metadata xdp_features +verification_cert.h diff --git a/tools/testing/selftests/bpf/DENYLIST.s390x b/tools/testing/selftests/bpf/DENYLIST.s390x index 3ebd77206f98..a17baf8c6fd7 100644 --- a/tools/testing/selftests/bpf/DENYLIST.s390x +++ b/tools/testing/selftests/bpf/DENYLIST.s390x @@ -2,4 +2,3 @@ # Alphabetical order get_stack_raw_tp # user_stack corrupted user stack (no backchain userspace) stacktrace_build_id # compare_map_keys stackid_hmap vs. stackmap err -2 errno 2 (?) -verifier_iterating_callbacks diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 4863106034df..f00587d4ede6 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -120,7 +120,7 @@ TEST_PROGS_EXTENDED := \ test_bpftool.py TEST_KMODS := bpf_testmod.ko bpf_test_no_cfi.ko bpf_test_modorder_x.ko \ - bpf_test_modorder_y.ko + bpf_test_modorder_y.ko bpf_test_rqspinlock.ko TEST_KMOD_TARGETS = $(addprefix $(OUTPUT)/,$(TEST_KMODS)) # Compile but not part of 'make run_tests' @@ -137,7 +137,7 @@ TEST_GEN_PROGS_EXTENDED = \ xdping \ xskxceiver -TEST_GEN_FILES += liburandom_read.so urandom_read sign-file uprobe_multi +TEST_GEN_FILES += $(TEST_KMODS) liburandom_read.so urandom_read sign-file uprobe_multi ifneq ($(V),1) submake_extras := feature_display=0 @@ -398,7 +398,7 @@ $(HOST_BPFOBJ): $(wildcard $(BPFDIR)/*.[ch] $(BPFDIR)/Makefile) \ DESTDIR=$(HOST_SCRATCH_DIR)/ prefix= all install_headers endif -# vmlinux.h is first dumped to a temprorary file and then compared to +# vmlinux.h is first dumped to a temporary file and then compared to # the previous version. This helps to avoid unnecessary re-builds of # $(TRUNNER_BPF_OBJS) $(INCLUDE_DIR)/vmlinux.h: $(VMLINUX_BTF) $(BPFTOOL) | $(INCLUDE_DIR) @@ -496,15 +496,16 @@ LINKED_SKELS := test_static_linked.skel.h linked_funcs.skel.h \ test_subskeleton.skel.h test_subskeleton_lib.skel.h \ test_usdt.skel.h -LSKELS := fentry_test.c fexit_test.c fexit_sleep.c atomics.c \ - trace_printk.c trace_vprintk.c map_ptr_kern.c \ +LSKELS := fexit_sleep.c trace_printk.c trace_vprintk.c map_ptr_kern.c \ core_kern.c core_kern_overflow.c test_ringbuf.c \ test_ringbuf_n.c test_ringbuf_map_key.c test_ringbuf_write.c +LSKELS_SIGNED := fentry_test.c fexit_test.c atomics.c + # Generate both light skeleton and libbpf skeleton for these LSKELS_EXTRA := test_ksyms_module.c test_ksyms_weak.c kfunc_call_test.c \ kfunc_call_test_subprog.c -SKEL_BLACKLIST += $$(LSKELS) +SKEL_BLACKLIST += $$(LSKELS) $$(LSKELS_SIGNED) test_static_linked.skel.h-deps := test_static_linked1.bpf.o test_static_linked2.bpf.o linked_funcs.skel.h-deps := linked_funcs1.bpf.o linked_funcs2.bpf.o @@ -535,6 +536,7 @@ HEADERS_FOR_BPF_OBJS := $(wildcard $(BPFDIR)/*.bpf.h) \ # $2 - test runner extra "flavor" (e.g., no_alu32, cpuv4, bpf_gcc, etc) define DEFINE_TEST_RUNNER +LSKEL_SIGN := -S -k $(PRIVATE_KEY) -i $(VERIFICATION_CERT) TRUNNER_OUTPUT := $(OUTPUT)$(if $2,/)$2 TRUNNER_BINARY := $1$(if $2,-)$2 TRUNNER_TEST_OBJS := $$(patsubst %.c,$$(TRUNNER_OUTPUT)/%.test.o, \ @@ -550,6 +552,7 @@ TRUNNER_BPF_SKELS := $$(patsubst %.c,$$(TRUNNER_OUTPUT)/%.skel.h, \ $$(TRUNNER_BPF_SRCS))) TRUNNER_BPF_LSKELS := $$(patsubst %.c,$$(TRUNNER_OUTPUT)/%.lskel.h, $$(LSKELS) $$(LSKELS_EXTRA)) TRUNNER_BPF_SKELS_LINKED := $$(addprefix $$(TRUNNER_OUTPUT)/,$(LINKED_SKELS)) +TRUNNER_BPF_LSKELS_SIGNED := $$(patsubst %.c,$$(TRUNNER_OUTPUT)/%.lskel.h, $$(LSKELS_SIGNED)) TEST_GEN_FILES += $$(TRUNNER_BPF_OBJS) # Evaluate rules now with extra TRUNNER_XXX variables above already defined @@ -604,6 +607,15 @@ $(TRUNNER_BPF_LSKELS): %.lskel.h: %.bpf.o $(BPFTOOL) | $(TRUNNER_OUTPUT) $(Q)$$(BPFTOOL) gen skeleton -L $$(<:.o=.llinked3.o) name $$(notdir $$(<:.bpf.o=_lskel)) > $$@ $(Q)rm -f $$(<:.o=.llinked1.o) $$(<:.o=.llinked2.o) $$(<:.o=.llinked3.o) +$(TRUNNER_BPF_LSKELS_SIGNED): %.lskel.h: %.bpf.o $(BPFTOOL) | $(TRUNNER_OUTPUT) + $$(call msg,GEN-SKEL,$(TRUNNER_BINARY) (signed),$$@) + $(Q)$$(BPFTOOL) gen object $$(<:.o=.llinked1.o) $$< + $(Q)$$(BPFTOOL) gen object $$(<:.o=.llinked2.o) $$(<:.o=.llinked1.o) + $(Q)$$(BPFTOOL) gen object $$(<:.o=.llinked3.o) $$(<:.o=.llinked2.o) + $(Q)diff $$(<:.o=.llinked2.o) $$(<:.o=.llinked3.o) + $(Q)$$(BPFTOOL) gen skeleton $(LSKEL_SIGN) $$(<:.o=.llinked3.o) name $$(notdir $$(<:.bpf.o=_lskel)) > $$@ + $(Q)rm -f $$(<:.o=.llinked1.o) $$(<:.o=.llinked2.o) $$(<:.o=.llinked3.o) + $(LINKED_BPF_OBJS): %: $(TRUNNER_OUTPUT)/% # .SECONDEXPANSION here allows to correctly expand %-deps variables as prerequisites @@ -653,6 +665,7 @@ $(TRUNNER_TEST_OBJS:.o=.d): $(TRUNNER_OUTPUT)/%.test.d: \ $(TRUNNER_EXTRA_HDRS) \ $(TRUNNER_BPF_SKELS) \ $(TRUNNER_BPF_LSKELS) \ + $(TRUNNER_BPF_LSKELS_SIGNED) \ $(TRUNNER_BPF_SKELS_LINKED) \ $$(BPFOBJ) | $(TRUNNER_OUTPUT) @@ -667,6 +680,7 @@ $(foreach N,$(patsubst $(TRUNNER_OUTPUT)/%.o,%,$(TRUNNER_EXTRA_OBJS)), \ $(TRUNNER_EXTRA_OBJS): $(TRUNNER_OUTPUT)/%.o: \ %.c \ $(TRUNNER_EXTRA_HDRS) \ + $(VERIFY_SIG_HDR) \ $(TRUNNER_TESTS_HDR) \ $$(BPFOBJ) | $(TRUNNER_OUTPUT) $$(call msg,EXT-OBJ,$(TRUNNER_BINARY),$$@) @@ -697,6 +711,18 @@ $(OUTPUT)/$(TRUNNER_BINARY): $(TRUNNER_TEST_OBJS) \ endef +VERIFY_SIG_SETUP := $(CURDIR)/verify_sig_setup.sh +VERIFY_SIG_HDR := verification_cert.h +VERIFICATION_CERT := $(BUILD_DIR)/signing_key.der +PRIVATE_KEY := $(BUILD_DIR)/signing_key.pem + +$(VERIFICATION_CERT) $(PRIVATE_KEY): $(VERIFY_SIG_SETUP) + $(Q)mkdir -p $(BUILD_DIR) + $(Q)$(VERIFY_SIG_SETUP) genkey $(BUILD_DIR) + +$(VERIFY_SIG_HDR): $(VERIFICATION_CERT) + $(Q)xxd -i -n test_progs_verification_cert $< > $@ + # Define test_progs test runner. TRUNNER_TESTS_DIR := prog_tests TRUNNER_BPF_PROGS_DIR := progs @@ -716,6 +742,7 @@ TRUNNER_EXTRA_SOURCES := test_progs.c \ disasm.c \ disasm_helpers.c \ json_writer.c \ + $(VERIFY_SIG_HDR) \ flow_dissector_load.h \ ip_check_defrag_frags.h TRUNNER_EXTRA_FILES := $(OUTPUT)/urandom_read \ @@ -725,7 +752,7 @@ TRUNNER_EXTRA_FILES := $(OUTPUT)/urandom_read \ $(OUTPUT)/uprobe_multi \ $(TEST_KMOD_TARGETS) \ ima_setup.sh \ - verify_sig_setup.sh \ + $(VERIFY_SIG_SETUP) \ $(wildcard progs/btf_dump_test_case_*.c) \ $(wildcard progs/*.bpf.o) TRUNNER_BPF_BUILD_RULE := CLANG_BPF_BUILD_RULE @@ -816,6 +843,7 @@ $(OUTPUT)/bench_bpf_hashmap_lookup.o: $(OUTPUT)/bpf_hashmap_lookup.skel.h $(OUTPUT)/bench_htab_mem.o: $(OUTPUT)/htab_mem_bench.skel.h $(OUTPUT)/bench_bpf_crypto.o: $(OUTPUT)/crypto_bench.skel.h $(OUTPUT)/bench_sockmap.o: $(OUTPUT)/bench_sockmap_prog.skel.h +$(OUTPUT)/bench_lpm_trie_map.o: $(OUTPUT)/lpm_trie_bench.skel.h $(OUTPUT)/lpm_trie_map.skel.h $(OUTPUT)/bench.o: bench.h testing_helpers.h $(BPFOBJ) $(OUTPUT)/bench: LDLIBS += -lm $(OUTPUT)/bench: $(OUTPUT)/bench.o \ @@ -837,6 +865,7 @@ $(OUTPUT)/bench: $(OUTPUT)/bench.o \ $(OUTPUT)/bench_htab_mem.o \ $(OUTPUT)/bench_bpf_crypto.o \ $(OUTPUT)/bench_sockmap.o \ + $(OUTPUT)/bench_lpm_trie_map.o \ # $(call msg,BINARY,,$@) $(Q)$(CC) $(CFLAGS) $(LDFLAGS) $(filter %.a %.o,$^) $(LDLIBS) -o $@ diff --git a/tools/testing/selftests/bpf/bench.c b/tools/testing/selftests/bpf/bench.c index ddd73d06a1eb..bd29bb2e6cb5 100644 --- a/tools/testing/selftests/bpf/bench.c +++ b/tools/testing/selftests/bpf/bench.c @@ -284,6 +284,7 @@ extern struct argp bench_htab_mem_argp; extern struct argp bench_trigger_batch_argp; extern struct argp bench_crypto_argp; extern struct argp bench_sockmap_argp; +extern struct argp bench_lpm_trie_map_argp; static const struct argp_child bench_parsers[] = { { &bench_ringbufs_argp, 0, "Ring buffers benchmark", 0 }, @@ -299,6 +300,7 @@ static const struct argp_child bench_parsers[] = { { &bench_trigger_batch_argp, 0, "BPF triggering benchmark", 0 }, { &bench_crypto_argp, 0, "bpf crypto benchmark", 0 }, { &bench_sockmap_argp, 0, "bpf sockmap benchmark", 0 }, + { &bench_lpm_trie_map_argp, 0, "LPM trie map benchmark", 0 }, {}, }; @@ -499,7 +501,7 @@ extern const struct bench bench_rename_rawtp; extern const struct bench bench_rename_fentry; extern const struct bench bench_rename_fexit; -/* pure counting benchmarks to establish theoretical lmits */ +/* pure counting benchmarks to establish theoretical limits */ extern const struct bench bench_trig_usermode_count; extern const struct bench bench_trig_syscall_count; extern const struct bench bench_trig_kernel_count; @@ -510,6 +512,8 @@ extern const struct bench bench_trig_kretprobe; extern const struct bench bench_trig_kprobe_multi; extern const struct bench bench_trig_kretprobe_multi; extern const struct bench bench_trig_fentry; +extern const struct bench bench_trig_kprobe_multi_all; +extern const struct bench bench_trig_kretprobe_multi_all; extern const struct bench bench_trig_fexit; extern const struct bench bench_trig_fmodret; extern const struct bench bench_trig_tp; @@ -558,6 +562,13 @@ extern const struct bench bench_htab_mem; extern const struct bench bench_crypto_encrypt; extern const struct bench bench_crypto_decrypt; extern const struct bench bench_sockmap; +extern const struct bench bench_lpm_trie_noop; +extern const struct bench bench_lpm_trie_baseline; +extern const struct bench bench_lpm_trie_lookup; +extern const struct bench bench_lpm_trie_insert; +extern const struct bench bench_lpm_trie_update; +extern const struct bench bench_lpm_trie_delete; +extern const struct bench bench_lpm_trie_free; static const struct bench *benchs[] = { &bench_count_global, @@ -578,6 +589,8 @@ static const struct bench *benchs[] = { &bench_trig_kprobe_multi, &bench_trig_kretprobe_multi, &bench_trig_fentry, + &bench_trig_kprobe_multi_all, + &bench_trig_kretprobe_multi_all, &bench_trig_fexit, &bench_trig_fmodret, &bench_trig_tp, @@ -625,6 +638,13 @@ static const struct bench *benchs[] = { &bench_crypto_encrypt, &bench_crypto_decrypt, &bench_sockmap, + &bench_lpm_trie_noop, + &bench_lpm_trie_baseline, + &bench_lpm_trie_lookup, + &bench_lpm_trie_insert, + &bench_lpm_trie_update, + &bench_lpm_trie_delete, + &bench_lpm_trie_free, }; static void find_benchmark(void) diff --git a/tools/testing/selftests/bpf/bench.h b/tools/testing/selftests/bpf/bench.h index 005c401b3e22..bea323820ffb 100644 --- a/tools/testing/selftests/bpf/bench.h +++ b/tools/testing/selftests/bpf/bench.h @@ -46,6 +46,7 @@ struct bench_res { unsigned long gp_ns; unsigned long gp_ct; unsigned int stime; + unsigned long duration_ns; }; struct bench { diff --git a/tools/testing/selftests/bpf/benchs/bench_lpm_trie_map.c b/tools/testing/selftests/bpf/benchs/bench_lpm_trie_map.c new file mode 100644 index 000000000000..246f6cb3387d --- /dev/null +++ b/tools/testing/selftests/bpf/benchs/bench_lpm_trie_map.c @@ -0,0 +1,555 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2025 Cloudflare */ + +/* + * All of these benchmarks operate on tries with keys in the range + * [0, args.nr_entries), i.e. there are no gaps or partially filled + * branches of the trie for any key < args.nr_entries. + * + * This gives an idea of worst-case behaviour. + */ + +#include <argp.h> +#include <linux/time64.h> +#include <linux/if_ether.h> +#include "lpm_trie_bench.skel.h" +#include "lpm_trie_map.skel.h" +#include "bench.h" +#include "testing_helpers.h" +#include "progs/lpm_trie.h" + +static struct ctx { + struct lpm_trie_bench *bench; +} ctx; + +static struct { + __u32 nr_entries; + __u32 prefixlen; + bool random; +} args = { + .nr_entries = 0, + .prefixlen = 32, + .random = false, +}; + +enum { + ARG_NR_ENTRIES = 9000, + ARG_PREFIX_LEN, + ARG_RANDOM, +}; + +static const struct argp_option opts[] = { + { "nr_entries", ARG_NR_ENTRIES, "NR_ENTRIES", 0, + "Number of unique entries in the LPM trie" }, + { "prefix_len", ARG_PREFIX_LEN, "PREFIX_LEN", 0, + "Number of prefix bits to use in the LPM trie" }, + { "random", ARG_RANDOM, NULL, 0, "Access random keys during op" }, + {}, +}; + +static error_t lpm_parse_arg(int key, char *arg, struct argp_state *state) +{ + long ret; + + switch (key) { + case ARG_NR_ENTRIES: + ret = strtol(arg, NULL, 10); + if (ret < 1 || ret > UINT_MAX) { + fprintf(stderr, "Invalid nr_entries count."); + argp_usage(state); + } + args.nr_entries = ret; + break; + case ARG_PREFIX_LEN: + ret = strtol(arg, NULL, 10); + if (ret < 1 || ret > UINT_MAX) { + fprintf(stderr, "Invalid prefix_len value."); + argp_usage(state); + } + args.prefixlen = ret; + break; + case ARG_RANDOM: + args.random = true; + break; + default: + return ARGP_ERR_UNKNOWN; + } + return 0; +} + +const struct argp bench_lpm_trie_map_argp = { + .options = opts, + .parser = lpm_parse_arg, +}; + +static void validate_common(void) +{ + if (env.consumer_cnt != 0) { + fprintf(stderr, "benchmark doesn't support consumer\n"); + exit(1); + } + + if (args.nr_entries == 0) { + fprintf(stderr, "Missing --nr_entries parameter\n"); + exit(1); + } + + if ((1UL << args.prefixlen) < args.nr_entries) { + fprintf(stderr, "prefix_len value too small for nr_entries\n"); + exit(1); + } +} + +static void lpm_insert_validate(void) +{ + validate_common(); + + if (env.producer_cnt != 1) { + fprintf(stderr, "lpm-trie-insert requires a single producer\n"); + exit(1); + } + + if (args.random) { + fprintf(stderr, "lpm-trie-insert does not support --random\n"); + exit(1); + } +} + +static void lpm_delete_validate(void) +{ + validate_common(); + + if (env.producer_cnt != 1) { + fprintf(stderr, "lpm-trie-delete requires a single producer\n"); + exit(1); + } + + if (args.random) { + fprintf(stderr, "lpm-trie-delete does not support --random\n"); + exit(1); + } +} + +static void lpm_free_validate(void) +{ + validate_common(); + + if (env.producer_cnt != 1) { + fprintf(stderr, "lpm-trie-free requires a single producer\n"); + exit(1); + } + + if (args.random) { + fprintf(stderr, "lpm-trie-free does not support --random\n"); + exit(1); + } +} + +static struct trie_key *keys; +static __u32 *vals; + +static void fill_map(int map_fd) +{ + int err; + + DECLARE_LIBBPF_OPTS(bpf_map_batch_opts, opts, + .elem_flags = 0, + .flags = 0, + ); + + err = bpf_map_update_batch(map_fd, keys, vals, &args.nr_entries, &opts); + if (err) { + fprintf(stderr, "failed to batch update keys to map: %d\n", + -err); + exit(1); + } +} + +static void empty_map(int map_fd) +{ + int err; + + DECLARE_LIBBPF_OPTS(bpf_map_batch_opts, opts, + .elem_flags = 0, + .flags = 0, + ); + + err = bpf_map_delete_batch(map_fd, keys, &args.nr_entries, &opts); + if (err) { + fprintf(stderr, "failed to batch delete keys for map: %d\n", + -err); + exit(1); + } +} + +static void attach_prog(void) +{ + int i; + + ctx.bench = lpm_trie_bench__open_and_load(); + if (!ctx.bench) { + fprintf(stderr, "failed to open skeleton\n"); + exit(1); + } + + ctx.bench->bss->nr_entries = args.nr_entries; + ctx.bench->bss->prefixlen = args.prefixlen; + ctx.bench->bss->random = args.random; + + if (lpm_trie_bench__attach(ctx.bench)) { + fprintf(stderr, "failed to attach skeleton\n"); + exit(1); + } + + keys = calloc(args.nr_entries, sizeof(*keys)); + vals = calloc(args.nr_entries, sizeof(*vals)); + + for (i = 0; i < args.nr_entries; i++) { + struct trie_key *k = &keys[i]; + __u32 *v = &vals[i]; + + k->prefixlen = args.prefixlen; + k->data = i; + *v = 1; + } +} + +static void attach_prog_and_fill_map(void) +{ + int fd; + + attach_prog(); + + fd = bpf_map__fd(ctx.bench->maps.trie_map); + fill_map(fd); +} + +static void lpm_noop_setup(void) +{ + attach_prog(); + ctx.bench->bss->op = LPM_OP_NOOP; +} + +static void lpm_baseline_setup(void) +{ + attach_prog(); + ctx.bench->bss->op = LPM_OP_BASELINE; +} + +static void lpm_lookup_setup(void) +{ + attach_prog_and_fill_map(); + ctx.bench->bss->op = LPM_OP_LOOKUP; +} + +static void lpm_insert_setup(void) +{ + attach_prog(); + ctx.bench->bss->op = LPM_OP_INSERT; +} + +static void lpm_update_setup(void) +{ + attach_prog_and_fill_map(); + ctx.bench->bss->op = LPM_OP_UPDATE; +} + +static void lpm_delete_setup(void) +{ + attach_prog_and_fill_map(); + ctx.bench->bss->op = LPM_OP_DELETE; +} + +static void lpm_free_setup(void) +{ + attach_prog(); + ctx.bench->bss->op = LPM_OP_FREE; +} + +static void lpm_measure(struct bench_res *res) +{ + res->hits = atomic_swap(&ctx.bench->bss->hits, 0); + res->duration_ns = atomic_swap(&ctx.bench->bss->duration_ns, 0); +} + +static void bench_reinit_map(void) +{ + int fd = bpf_map__fd(ctx.bench->maps.trie_map); + + switch (ctx.bench->bss->op) { + case LPM_OP_INSERT: + /* trie_map needs to be emptied */ + empty_map(fd); + break; + case LPM_OP_DELETE: + /* trie_map needs to be refilled */ + fill_map(fd); + break; + default: + fprintf(stderr, "Unexpected REINIT return code for op %d\n", + ctx.bench->bss->op); + exit(1); + } +} + +/* For NOOP, BASELINE, LOOKUP, INSERT, UPDATE, and DELETE */ +static void *lpm_producer(void *unused __always_unused) +{ + int err; + char in[ETH_HLEN]; /* unused */ + + LIBBPF_OPTS(bpf_test_run_opts, opts, .data_in = in, + .data_size_in = sizeof(in), .repeat = 1, ); + + while (true) { + int fd = bpf_program__fd(ctx.bench->progs.run_bench); + err = bpf_prog_test_run_opts(fd, &opts); + if (err) { + fprintf(stderr, "failed to run BPF prog: %d\n", err); + exit(1); + } + + /* Check for kernel error code */ + if ((int)opts.retval < 0) { + fprintf(stderr, "BPF prog returned error: %d\n", + opts.retval); + exit(1); + } + + switch (opts.retval) { + case LPM_BENCH_SUCCESS: + break; + case LPM_BENCH_REINIT_MAP: + bench_reinit_map(); + break; + default: + fprintf(stderr, "Unexpected BPF prog return code %d for op %d\n", + opts.retval, ctx.bench->bss->op); + exit(1); + } + } + + return NULL; +} + +static void *lpm_free_producer(void *unused __always_unused) +{ + while (true) { + struct lpm_trie_map *skel; + + skel = lpm_trie_map__open_and_load(); + if (!skel) { + fprintf(stderr, "failed to open skeleton\n"); + exit(1); + } + + fill_map(bpf_map__fd(skel->maps.trie_free_map)); + lpm_trie_map__destroy(skel); + } + + return NULL; +} + +/* + * The standard bench op_report_*() functions assume measurements are + * taken over a 1-second interval but operations that modify the map + * (INSERT, DELETE, and FREE) cannot run indefinitely without + * "resetting" the map to the initial state. Depending on the size of + * the map, this likely needs to happen before the 1-second timer fires. + * + * Calculate the fraction of a second over which the op measurement was + * taken (to ignore any time spent doing the reset) and report the + * throughput results per second. + */ +static void frac_second_report_progress(int iter, struct bench_res *res, + long delta_ns, double rate_divisor, + char rate) +{ + double hits_per_sec, hits_per_prod; + + hits_per_sec = res->hits / rate_divisor / + (res->duration_ns / (double)NSEC_PER_SEC); + hits_per_prod = hits_per_sec / env.producer_cnt; + + printf("Iter %3d (%7.3lfus): ", iter, + (delta_ns - NSEC_PER_SEC) / 1000.0); + printf("hits %8.3lf%c/s (%7.3lf%c/prod)\n", hits_per_sec, rate, + hits_per_prod, rate); +} + +static void frac_second_report_final(struct bench_res res[], int res_cnt, + double lat_divisor, double rate_divisor, + char rate, const char *unit) +{ + double hits_mean = 0.0, hits_stddev = 0.0; + double latency = 0.0; + int i; + + for (i = 0; i < res_cnt; i++) { + double val = res[i].hits / rate_divisor / + (res[i].duration_ns / (double)NSEC_PER_SEC); + hits_mean += val / (0.0 + res_cnt); + latency += res[i].duration_ns / res[i].hits / (0.0 + res_cnt); + } + + if (res_cnt > 1) { + for (i = 0; i < res_cnt; i++) { + double val = + res[i].hits / rate_divisor / + (res[i].duration_ns / (double)NSEC_PER_SEC); + hits_stddev += (hits_mean - val) * (hits_mean - val) / + (res_cnt - 1.0); + } + + hits_stddev = sqrt(hits_stddev); + } + printf("Summary: throughput %8.3lf \u00B1 %5.3lf %c ops/s (%7.3lf%c ops/prod), ", + hits_mean, hits_stddev, rate, hits_mean / env.producer_cnt, + rate); + printf("latency %8.3lf %s/op\n", + latency / lat_divisor / env.producer_cnt, unit); +} + +static void insert_ops_report_progress(int iter, struct bench_res *res, + long delta_ns) +{ + double rate_divisor = 1000000.0; + char rate = 'M'; + + frac_second_report_progress(iter, res, delta_ns, rate_divisor, rate); +} + +static void delete_ops_report_progress(int iter, struct bench_res *res, + long delta_ns) +{ + double rate_divisor = 1000000.0; + char rate = 'M'; + + frac_second_report_progress(iter, res, delta_ns, rate_divisor, rate); +} + +static void free_ops_report_progress(int iter, struct bench_res *res, + long delta_ns) +{ + double rate_divisor = 1000.0; + char rate = 'K'; + + frac_second_report_progress(iter, res, delta_ns, rate_divisor, rate); +} + +static void insert_ops_report_final(struct bench_res res[], int res_cnt) +{ + double lat_divisor = 1.0; + double rate_divisor = 1000000.0; + const char *unit = "ns"; + char rate = 'M'; + + frac_second_report_final(res, res_cnt, lat_divisor, rate_divisor, rate, + unit); +} + +static void delete_ops_report_final(struct bench_res res[], int res_cnt) +{ + double lat_divisor = 1.0; + double rate_divisor = 1000000.0; + const char *unit = "ns"; + char rate = 'M'; + + frac_second_report_final(res, res_cnt, lat_divisor, rate_divisor, rate, + unit); +} + +static void free_ops_report_final(struct bench_res res[], int res_cnt) +{ + double lat_divisor = 1000000.0; + double rate_divisor = 1000.0; + const char *unit = "ms"; + char rate = 'K'; + + frac_second_report_final(res, res_cnt, lat_divisor, rate_divisor, rate, + unit); +} + +/* noop bench measures harness-overhead */ +const struct bench bench_lpm_trie_noop = { + .name = "lpm-trie-noop", + .argp = &bench_lpm_trie_map_argp, + .validate = validate_common, + .setup = lpm_noop_setup, + .producer_thread = lpm_producer, + .measure = lpm_measure, + .report_progress = ops_report_progress, + .report_final = ops_report_final, +}; + +/* baseline overhead for lookup and update */ +const struct bench bench_lpm_trie_baseline = { + .name = "lpm-trie-baseline", + .argp = &bench_lpm_trie_map_argp, + .validate = validate_common, + .setup = lpm_baseline_setup, + .producer_thread = lpm_producer, + .measure = lpm_measure, + .report_progress = ops_report_progress, + .report_final = ops_report_final, +}; + +/* measure cost of doing a lookup on existing entries in a full trie */ +const struct bench bench_lpm_trie_lookup = { + .name = "lpm-trie-lookup", + .argp = &bench_lpm_trie_map_argp, + .validate = validate_common, + .setup = lpm_lookup_setup, + .producer_thread = lpm_producer, + .measure = lpm_measure, + .report_progress = ops_report_progress, + .report_final = ops_report_final, +}; + +/* measure cost of inserting new entries into an empty trie */ +const struct bench bench_lpm_trie_insert = { + .name = "lpm-trie-insert", + .argp = &bench_lpm_trie_map_argp, + .validate = lpm_insert_validate, + .setup = lpm_insert_setup, + .producer_thread = lpm_producer, + .measure = lpm_measure, + .report_progress = insert_ops_report_progress, + .report_final = insert_ops_report_final, +}; + +/* measure cost of updating existing entries in a full trie */ +const struct bench bench_lpm_trie_update = { + .name = "lpm-trie-update", + .argp = &bench_lpm_trie_map_argp, + .validate = validate_common, + .setup = lpm_update_setup, + .producer_thread = lpm_producer, + .measure = lpm_measure, + .report_progress = ops_report_progress, + .report_final = ops_report_final, +}; + +/* measure cost of deleting existing entries from a full trie */ +const struct bench bench_lpm_trie_delete = { + .name = "lpm-trie-delete", + .argp = &bench_lpm_trie_map_argp, + .validate = lpm_delete_validate, + .setup = lpm_delete_setup, + .producer_thread = lpm_producer, + .measure = lpm_measure, + .report_progress = delete_ops_report_progress, + .report_final = delete_ops_report_final, +}; + +/* measure cost of freeing a full trie */ +const struct bench bench_lpm_trie_free = { + .name = "lpm-trie-free", + .argp = &bench_lpm_trie_map_argp, + .validate = lpm_free_validate, + .setup = lpm_free_setup, + .producer_thread = lpm_free_producer, + .measure = lpm_measure, + .report_progress = free_ops_report_progress, + .report_final = free_ops_report_final, +}; diff --git a/tools/testing/selftests/bpf/benchs/bench_sockmap.c b/tools/testing/selftests/bpf/benchs/bench_sockmap.c index 8ebf563a67a2..cfc072aa7fff 100644 --- a/tools/testing/selftests/bpf/benchs/bench_sockmap.c +++ b/tools/testing/selftests/bpf/benchs/bench_sockmap.c @@ -10,6 +10,7 @@ #include <argp.h> #include "bench.h" #include "bench_sockmap_prog.skel.h" +#include "bpf_util.h" #define FILE_SIZE (128 * 1024) #define DATA_REPEAT_SIZE 10 @@ -124,8 +125,8 @@ static void bench_sockmap_prog_destroy(void) { int i; - for (i = 0; i < sizeof(ctx.fds); i++) { - if (ctx.fds[0] > 0) + for (i = 0; i < ARRAY_SIZE(ctx.fds); i++) { + if (ctx.fds[i] > 0) close(ctx.fds[i]); } diff --git a/tools/testing/selftests/bpf/benchs/bench_trigger.c b/tools/testing/selftests/bpf/benchs/bench_trigger.c index 82327657846e..1e2aff007c2a 100644 --- a/tools/testing/selftests/bpf/benchs/bench_trigger.c +++ b/tools/testing/selftests/bpf/benchs/bench_trigger.c @@ -226,6 +226,65 @@ static void trigger_fentry_setup(void) attach_bpf(ctx.skel->progs.bench_trigger_fentry); } +static void attach_ksyms_all(struct bpf_program *empty, bool kretprobe) +{ + LIBBPF_OPTS(bpf_kprobe_multi_opts, opts); + char **syms = NULL; + size_t cnt = 0; + + /* Some recursive functions will be skipped in + * bpf_get_ksyms -> skip_entry, as they can introduce sufficient + * overhead. However, it's difficut to skip all the recursive + * functions for a debug kernel. + * + * So, don't run the kprobe-multi-all and kretprobe-multi-all on + * a debug kernel. + */ + if (bpf_get_ksyms(&syms, &cnt, true)) { + fprintf(stderr, "failed to get ksyms\n"); + exit(1); + } + + opts.syms = (const char **) syms; + opts.cnt = cnt; + opts.retprobe = kretprobe; + /* attach empty to all the kernel functions except bpf_get_numa_node_id. */ + if (!bpf_program__attach_kprobe_multi_opts(empty, NULL, &opts)) { + fprintf(stderr, "failed to attach bpf_program__attach_kprobe_multi_opts to all\n"); + exit(1); + } +} + +static void trigger_kprobe_multi_all_setup(void) +{ + struct bpf_program *prog, *empty; + + setup_ctx(); + empty = ctx.skel->progs.bench_kprobe_multi_empty; + prog = ctx.skel->progs.bench_trigger_kprobe_multi; + bpf_program__set_autoload(empty, true); + bpf_program__set_autoload(prog, true); + load_ctx(); + + attach_ksyms_all(empty, false); + attach_bpf(prog); +} + +static void trigger_kretprobe_multi_all_setup(void) +{ + struct bpf_program *prog, *empty; + + setup_ctx(); + empty = ctx.skel->progs.bench_kretprobe_multi_empty; + prog = ctx.skel->progs.bench_trigger_kretprobe_multi; + bpf_program__set_autoload(empty, true); + bpf_program__set_autoload(prog, true); + load_ctx(); + + attach_ksyms_all(empty, true); + attach_bpf(prog); +} + static void trigger_fexit_setup(void) { setup_ctx(); @@ -512,6 +571,8 @@ BENCH_TRIG_KERNEL(kretprobe, "kretprobe"); BENCH_TRIG_KERNEL(kprobe_multi, "kprobe-multi"); BENCH_TRIG_KERNEL(kretprobe_multi, "kretprobe-multi"); BENCH_TRIG_KERNEL(fentry, "fentry"); +BENCH_TRIG_KERNEL(kprobe_multi_all, "kprobe-multi-all"); +BENCH_TRIG_KERNEL(kretprobe_multi_all, "kretprobe-multi-all"); BENCH_TRIG_KERNEL(fexit, "fexit"); BENCH_TRIG_KERNEL(fmodret, "fmodret"); BENCH_TRIG_KERNEL(tp, "tp"); diff --git a/tools/testing/selftests/bpf/benchs/run_bench_trigger.sh b/tools/testing/selftests/bpf/benchs/run_bench_trigger.sh index a690f5a68b6b..f7573708a0c3 100755 --- a/tools/testing/selftests/bpf/benchs/run_bench_trigger.sh +++ b/tools/testing/selftests/bpf/benchs/run_bench_trigger.sh @@ -6,8 +6,8 @@ def_tests=( \ usermode-count kernel-count syscall-count \ fentry fexit fmodret \ rawtp tp \ - kprobe kprobe-multi \ - kretprobe kretprobe-multi \ + kprobe kprobe-multi kprobe-multi-all \ + kretprobe kretprobe-multi kretprobe-multi-all \ ) tests=("$@") diff --git a/tools/testing/selftests/bpf/bpf_experimental.h b/tools/testing/selftests/bpf/bpf_experimental.h index da7e230f2781..d89eda3fd8a3 100644 --- a/tools/testing/selftests/bpf/bpf_experimental.h +++ b/tools/testing/selftests/bpf/bpf_experimental.h @@ -599,4 +599,58 @@ extern void bpf_iter_dmabuf_destroy(struct bpf_iter_dmabuf *it) __weak __ksym; extern int bpf_cgroup_read_xattr(struct cgroup *cgroup, const char *name__str, struct bpf_dynptr *value_p) __weak __ksym; +#define PREEMPT_BITS 8 +#define SOFTIRQ_BITS 8 +#define HARDIRQ_BITS 4 +#define NMI_BITS 4 + +#define PREEMPT_SHIFT 0 +#define SOFTIRQ_SHIFT (PREEMPT_SHIFT + PREEMPT_BITS) +#define HARDIRQ_SHIFT (SOFTIRQ_SHIFT + SOFTIRQ_BITS) +#define NMI_SHIFT (HARDIRQ_SHIFT + HARDIRQ_BITS) + +#define __IRQ_MASK(x) ((1UL << (x))-1) + +#define SOFTIRQ_MASK (__IRQ_MASK(SOFTIRQ_BITS) << SOFTIRQ_SHIFT) +#define HARDIRQ_MASK (__IRQ_MASK(HARDIRQ_BITS) << HARDIRQ_SHIFT) +#define NMI_MASK (__IRQ_MASK(NMI_BITS) << NMI_SHIFT) + +extern bool CONFIG_PREEMPT_RT __kconfig __weak; +#ifdef bpf_target_x86 +extern const int __preempt_count __ksym; +#endif + +struct task_struct___preempt_rt { + int softirq_disable_cnt; +} __attribute__((preserve_access_index)); + +static inline int get_preempt_count(void) +{ +#if defined(bpf_target_x86) + return *(int *) bpf_this_cpu_ptr(&__preempt_count); +#elif defined(bpf_target_arm64) + return bpf_get_current_task_btf()->thread_info.preempt.count; +#endif + return 0; +} + +/* Description + * Report whether it is in interrupt context. Only works on the following archs: + * * x86 + * * arm64 + */ +static inline int bpf_in_interrupt(void) +{ + struct task_struct___preempt_rt *tsk; + int pcnt; + + pcnt = get_preempt_count(); + if (!CONFIG_PREEMPT_RT) + return pcnt & (NMI_MASK | HARDIRQ_MASK | SOFTIRQ_MASK); + + tsk = (void *) bpf_get_current_task_btf(); + return (pcnt & (NMI_MASK | HARDIRQ_MASK)) | + (tsk->softirq_disable_cnt & SOFTIRQ_MASK); +} + #endif diff --git a/tools/testing/selftests/bpf/bpf_kfuncs.h b/tools/testing/selftests/bpf/bpf_kfuncs.h index 9386dfe8b884..794d44d19c88 100644 --- a/tools/testing/selftests/bpf/bpf_kfuncs.h +++ b/tools/testing/selftests/bpf/bpf_kfuncs.h @@ -19,6 +19,9 @@ extern int bpf_dynptr_from_skb(struct __sk_buff *skb, __u64 flags, extern int bpf_dynptr_from_xdp(struct xdp_md *xdp, __u64 flags, struct bpf_dynptr *ptr__uninit) __ksym __weak; +extern int bpf_dynptr_from_skb_meta(struct __sk_buff *skb, __u64 flags, + struct bpf_dynptr *ptr__uninit) __ksym __weak; + /* Description * Obtain a read-only pointer to the dynptr's data * Returns diff --git a/tools/testing/selftests/bpf/bpf_util.h b/tools/testing/selftests/bpf/bpf_util.h index 5f6963a320d7..4bc2d25f33e1 100644 --- a/tools/testing/selftests/bpf/bpf_util.h +++ b/tools/testing/selftests/bpf/bpf_util.h @@ -67,6 +67,9 @@ static inline void bpf_strlcpy(char *dst, const char *src, size_t sz) #define sys_gettid() syscall(SYS_gettid) #endif +/* and poison usage to ensure it does not creep back in. */ +#pragma GCC poison gettid + #ifndef ENOTSUPP #define ENOTSUPP 524 #endif diff --git a/tools/testing/selftests/bpf/cgroup_helpers.c b/tools/testing/selftests/bpf/cgroup_helpers.c index 15f626014872..20cede4db3ce 100644 --- a/tools/testing/selftests/bpf/cgroup_helpers.c +++ b/tools/testing/selftests/bpf/cgroup_helpers.c @@ -412,6 +412,26 @@ void remove_cgroup(const char *relative_path) log_err("rmdiring cgroup %s .. %s", relative_path, cgroup_path); } +/* + * remove_cgroup_pid() - Remove a cgroup setup by process identified by PID + * @relative_path: The cgroup path, relative to the workdir, to remove + * @pid: PID to be used to find cgroup_path + * + * This function expects a cgroup to already be created, relative to the cgroup + * work dir. It also expects the cgroup doesn't have any children or live + * processes and it removes the cgroup. + * + * On failure, it will print an error to stderr. + */ +void remove_cgroup_pid(const char *relative_path, int pid) +{ + char cgroup_path[PATH_MAX + 1]; + + format_cgroup_path_pid(cgroup_path, relative_path, pid); + if (rmdir(cgroup_path)) + log_err("rmdiring cgroup %s .. %s", relative_path, cgroup_path); +} + /** * create_and_get_cgroup() - Create a cgroup, relative to workdir, and get the FD * @relative_path: The cgroup path, relative to the workdir, to join diff --git a/tools/testing/selftests/bpf/cgroup_helpers.h b/tools/testing/selftests/bpf/cgroup_helpers.h index 182e1ac36c95..3857304be874 100644 --- a/tools/testing/selftests/bpf/cgroup_helpers.h +++ b/tools/testing/selftests/bpf/cgroup_helpers.h @@ -19,6 +19,7 @@ int cgroup_setup_and_join(const char *relative_path); int get_root_cgroup(void); int create_and_get_cgroup(const char *relative_path); void remove_cgroup(const char *relative_path); +void remove_cgroup_pid(const char *relative_path, int pid); unsigned long long get_cgroup_id(const char *relative_path); int get_cgroup1_hierarchy_id(const char *subsys_name); diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config index 8916ab814a3e..70b28c1e653e 100644 --- a/tools/testing/selftests/bpf/config +++ b/tools/testing/selftests/bpf/config @@ -61,6 +61,7 @@ CONFIG_MPLS_IPTUNNEL=y CONFIG_MPLS_ROUTING=y CONFIG_MPTCP=y CONFIG_NET_ACT_GACT=y +CONFIG_NET_ACT_MIRRED=y CONFIG_NET_ACT_SKBMOD=y CONFIG_NET_CLS=y CONFIG_NET_CLS_ACT=y diff --git a/tools/testing/selftests/bpf/config.aarch64 b/tools/testing/selftests/bpf/config.aarch64 index e1495a4bbc99..7efad36ceb26 100644 --- a/tools/testing/selftests/bpf/config.aarch64 +++ b/tools/testing/selftests/bpf/config.aarch64 @@ -31,10 +31,7 @@ CONFIG_COMPAT=y CONFIG_CPUSETS=y CONFIG_CRASH_DUMP=y CONFIG_CRYPTO_USER_API_RNG=y -CONFIG_CRYPTO_USER_API_SKCIPHER=y CONFIG_DEBUG_ATOMIC_SLEEP=y -CONFIG_DEBUG_INFO_BTF=y -CONFIG_DEBUG_INFO_DWARF4=y CONFIG_DEBUG_INFO_REDUCED=n CONFIG_DEBUG_LIST=y CONFIG_DEBUG_LOCKDEP=y @@ -46,7 +43,6 @@ CONFIG_DETECT_HUNG_TASK=y CONFIG_DEVTMPFS_MOUNT=y CONFIG_DEVTMPFS=y CONFIG_DRM=y -CONFIG_DUMMY=y CONFIG_EXPERT=y CONFIG_EXT4_FS_POSIX_ACL=y CONFIG_EXT4_FS_SECURITY=y @@ -70,13 +66,11 @@ CONFIG_HZ_100=y CONFIG_IDLE_PAGE_TRACKING=y CONFIG_IKHEADERS=y CONFIG_INET6_ESP=y -CONFIG_INET_ESP=y CONFIG_INET=y CONFIG_INPUT_EVDEV=y CONFIG_IP_ADVANCED_ROUTER=y CONFIG_IP_MULTICAST=y CONFIG_IP_MULTIPLE_TABLES=y -CONFIG_IP_NF_IPTABLES=y CONFIG_IPV6_SEG6_LWTUNNEL=y CONFIG_IPVLAN=y CONFIG_JUMP_LABEL=y @@ -97,22 +91,18 @@ CONFIG_MEMORY_HOTPLUG=y CONFIG_MEMORY_HOTREMOVE=y CONFIG_NAMESPACES=y CONFIG_NET_ACT_BPF=y -CONFIG_NET_ACT_GACT=y CONFIG_NETDEVICES=y CONFIG_NETFILTER_XT_MATCH_BPF=y CONFIG_NETFILTER_XT_TARGET_MARK=y CONFIG_NET_KEY=y -CONFIG_NET_SCH_FQ=y CONFIG_NET_VRF=y CONFIG_NET=y -CONFIG_NF_TABLES=y CONFIG_NLMON=y CONFIG_NO_HZ_IDLE=y CONFIG_NR_CPUS=256 CONFIG_NUMA=y CONFIG_OVERLAY_FS=y CONFIG_PACKET_DIAG=y -CONFIG_PACKET=y CONFIG_PANIC_ON_OOPS=y CONFIG_PARTITION_ADVANCED=y CONFIG_PCI_HOST_GENERIC=y @@ -149,7 +139,6 @@ CONFIG_TASK_XACCT=y CONFIG_TCG_TIS=y CONFIG_TCG_TPM=y CONFIG_TCP_CONG_ADVANCED=y -CONFIG_TCP_CONG_DCTCP=y CONFIG_TLS=y CONFIG_TMPFS_POSIX_ACL=y CONFIG_TMPFS=y @@ -161,6 +150,5 @@ CONFIG_UPROBES=y CONFIG_USER_NS=y CONFIG_VETH=y CONFIG_VLAN_8021Q=y -CONFIG_VSOCKETS=y CONFIG_VSOCKETS_LOOPBACK=y CONFIG_XFRM_USER=y diff --git a/tools/testing/selftests/bpf/config.ppc64el b/tools/testing/selftests/bpf/config.ppc64el index 9acf389dc4ce..b53afb5e0b71 100644 --- a/tools/testing/selftests/bpf/config.ppc64el +++ b/tools/testing/selftests/bpf/config.ppc64el @@ -54,7 +54,6 @@ CONFIG_NET=y CONFIG_NO_HZ_IDLE=y CONFIG_NONPORTABLE=y CONFIG_NR_CPUS=256 -CONFIG_PACKET=y CONFIG_PANIC_ON_OOPS=y CONFIG_PARTITION_ADVANCED=y CONFIG_PCI_HOST_GENERIC=y diff --git a/tools/testing/selftests/bpf/config.riscv64 b/tools/testing/selftests/bpf/config.riscv64 index bb7043a80e1a..7bee24a79a71 100644 --- a/tools/testing/selftests/bpf/config.riscv64 +++ b/tools/testing/selftests/bpf/config.riscv64 @@ -48,7 +48,6 @@ CONFIG_NET_VRF=y CONFIG_NONPORTABLE=y CONFIG_NO_HZ_IDLE=y CONFIG_NR_CPUS=256 -CONFIG_PACKET=y CONFIG_PANIC_ON_OOPS=y CONFIG_PARTITION_ADVANCED=y CONFIG_PCI=y diff --git a/tools/testing/selftests/bpf/config.s390x b/tools/testing/selftests/bpf/config.s390x index 26c3bc2ce11d..db61878148e4 100644 --- a/tools/testing/selftests/bpf/config.s390x +++ b/tools/testing/selftests/bpf/config.s390x @@ -22,10 +22,7 @@ CONFIG_CHECKPOINT_RESTORE=y CONFIG_CPUSETS=y CONFIG_CRASH_DUMP=y CONFIG_CRYPTO_USER_API_RNG=y -CONFIG_CRYPTO_USER_API_SKCIPHER=y CONFIG_DEBUG_ATOMIC_SLEEP=y -CONFIG_DEBUG_INFO_BTF=y -CONFIG_DEBUG_INFO_DWARF4=y CONFIG_DEBUG_LIST=y CONFIG_DEBUG_LOCKDEP=y CONFIG_DEBUG_NOTIFIERS=y @@ -56,11 +53,9 @@ CONFIG_IDLE_PAGE_TRACKING=y CONFIG_IKHEADERS=y CONFIG_INET6_ESP=y CONFIG_INET=y -CONFIG_INET_ESP=y CONFIG_IP_ADVANCED_ROUTER=y CONFIG_IP_MULTICAST=y CONFIG_IP_MULTIPLE_TABLES=y -CONFIG_IP_NF_IPTABLES=y CONFIG_IPV6_SEG6_LWTUNNEL=y CONFIG_IPVLAN=y CONFIG_JUMP_LABEL=y @@ -83,18 +78,14 @@ CONFIG_MEMORY_HOTREMOVE=y CONFIG_NAMESPACES=y CONFIG_NET=y CONFIG_NET_ACT_BPF=y -CONFIG_NET_ACT_GACT=y CONFIG_NET_KEY=y -CONFIG_NET_SCH_FQ=y CONFIG_NET_VRF=y CONFIG_NETDEVICES=y CONFIG_NETFILTER_XT_MATCH_BPF=y CONFIG_NETFILTER_XT_TARGET_MARK=y -CONFIG_NF_TABLES=y CONFIG_NO_HZ_IDLE=y CONFIG_NR_CPUS=256 CONFIG_NUMA=y -CONFIG_PACKET=y CONFIG_PANIC_ON_OOPS=y CONFIG_PARTITION_ADVANCED=y CONFIG_PCI=y @@ -119,7 +110,6 @@ CONFIG_TASK_IO_ACCOUNTING=y CONFIG_TASK_XACCT=y CONFIG_TASKSTATS=y CONFIG_TCP_CONG_ADVANCED=y -CONFIG_TCP_CONG_DCTCP=y CONFIG_TLS=y CONFIG_TMPFS=y CONFIG_TMPFS_POSIX_ACL=y @@ -131,6 +121,5 @@ CONFIG_UPROBES=y CONFIG_USER_NS=y CONFIG_VETH=y CONFIG_VLAN_8021Q=y -CONFIG_VSOCKETS=y CONFIG_VSOCKETS_LOOPBACK=y CONFIG_XFRM_USER=y diff --git a/tools/testing/selftests/bpf/config.x86_64 b/tools/testing/selftests/bpf/config.x86_64 index 5e713ef7caa3..42ad817b00ae 100644 --- a/tools/testing/selftests/bpf/config.x86_64 +++ b/tools/testing/selftests/bpf/config.x86_64 @@ -44,7 +44,6 @@ CONFIG_CRYPTO_SEQIV=y CONFIG_CRYPTO_XXHASH=y CONFIG_DCB=y CONFIG_DEBUG_ATOMIC_SLEEP=y -CONFIG_DEBUG_INFO_BTF=y CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y CONFIG_DEBUG_MEMORY_INIT=y CONFIG_DEFAULT_FQ_CODEL=y @@ -104,12 +103,10 @@ CONFIG_HZ_1000=y CONFIG_INET=y CONFIG_INPUT_EVDEV=y CONFIG_INTEL_POWERCLAMP=y -CONFIG_IP6_NF_IPTABLES=y CONFIG_IP_ADVANCED_ROUTER=y CONFIG_IP_MROUTE=y CONFIG_IP_MULTICAST=y CONFIG_IP_MULTIPLE_TABLES=y -CONFIG_IP_NF_IPTABLES=y CONFIG_IP_PIMSM_V1=y CONFIG_IP_PIMSM_V2=y CONFIG_IP_ROUTE_MULTIPATH=y @@ -162,7 +159,6 @@ CONFIG_NUMA=y CONFIG_NUMA_BALANCING=y CONFIG_NVMEM=y CONFIG_OSF_PARTITION=y -CONFIG_PACKET=y CONFIG_PANIC_ON_OOPS=y CONFIG_PARTITION_ADVANCED=y CONFIG_PCI=y @@ -220,7 +216,6 @@ CONFIG_VALIDATE_FS_PARSER=y CONFIG_VETH=y CONFIG_VIRT_DRIVERS=y CONFIG_VLAN_8021Q=y -CONFIG_VSOCKETS=y CONFIG_VSOCKETS_LOOPBACK=y CONFIG_X86_ACPI_CPUFREQ=y CONFIG_X86_CPUID=y diff --git a/tools/testing/selftests/bpf/network_helpers.c b/tools/testing/selftests/bpf/network_helpers.c index 72b5c174ab3b..cdf7b6641444 100644 --- a/tools/testing/selftests/bpf/network_helpers.c +++ b/tools/testing/selftests/bpf/network_helpers.c @@ -457,7 +457,7 @@ int append_tid(char *str, size_t sz) if (end + 8 > sz) return -1; - sprintf(&str[end], "%07d", gettid()); + sprintf(&str[end], "%07ld", sys_gettid()); str[end + 7] = '\0'; return 0; diff --git a/tools/testing/selftests/bpf/prog_tests/align.c b/tools/testing/selftests/bpf/prog_tests/align.c index 1d53a8561ee2..24c509ce4e5b 100644 --- a/tools/testing/selftests/bpf/prog_tests/align.c +++ b/tools/testing/selftests/bpf/prog_tests/align.c @@ -42,11 +42,11 @@ static struct bpf_align_test tests[] = { .matches = { {0, "R1", "ctx()"}, {0, "R10", "fp0"}, - {0, "R3_w", "2"}, - {1, "R3_w", "4"}, - {2, "R3_w", "8"}, - {3, "R3_w", "16"}, - {4, "R3_w", "32"}, + {0, "R3", "2"}, + {1, "R3", "4"}, + {2, "R3", "8"}, + {3, "R3", "16"}, + {4, "R3", "32"}, }, }, { @@ -70,17 +70,17 @@ static struct bpf_align_test tests[] = { .matches = { {0, "R1", "ctx()"}, {0, "R10", "fp0"}, - {0, "R3_w", "1"}, - {1, "R3_w", "2"}, - {2, "R3_w", "4"}, - {3, "R3_w", "8"}, - {4, "R3_w", "16"}, - {5, "R3_w", "1"}, - {6, "R4_w", "32"}, - {7, "R4_w", "16"}, - {8, "R4_w", "8"}, - {9, "R4_w", "4"}, - {10, "R4_w", "2"}, + {0, "R3", "1"}, + {1, "R3", "2"}, + {2, "R3", "4"}, + {3, "R3", "8"}, + {4, "R3", "16"}, + {5, "R3", "1"}, + {6, "R4", "32"}, + {7, "R4", "16"}, + {8, "R4", "8"}, + {9, "R4", "4"}, + {10, "R4", "2"}, }, }, { @@ -99,12 +99,12 @@ static struct bpf_align_test tests[] = { .matches = { {0, "R1", "ctx()"}, {0, "R10", "fp0"}, - {0, "R3_w", "4"}, - {1, "R3_w", "8"}, - {2, "R3_w", "10"}, - {3, "R4_w", "8"}, - {4, "R4_w", "12"}, - {5, "R4_w", "14"}, + {0, "R3", "4"}, + {1, "R3", "8"}, + {2, "R3", "10"}, + {3, "R4", "8"}, + {4, "R4", "12"}, + {5, "R4", "14"}, }, }, { @@ -121,10 +121,10 @@ static struct bpf_align_test tests[] = { .matches = { {0, "R1", "ctx()"}, {0, "R10", "fp0"}, - {0, "R3_w", "7"}, - {1, "R3_w", "7"}, - {2, "R3_w", "14"}, - {3, "R3_w", "56"}, + {0, "R3", "7"}, + {1, "R3", "7"}, + {2, "R3", "14"}, + {3, "R3", "56"}, }, }, @@ -162,19 +162,19 @@ static struct bpf_align_test tests[] = { }, .prog_type = BPF_PROG_TYPE_SCHED_CLS, .matches = { - {6, "R0_w", "pkt(off=8,r=8)"}, - {6, "R3_w", "var_off=(0x0; 0xff)"}, - {7, "R3_w", "var_off=(0x0; 0x1fe)"}, - {8, "R3_w", "var_off=(0x0; 0x3fc)"}, - {9, "R3_w", "var_off=(0x0; 0x7f8)"}, - {10, "R3_w", "var_off=(0x0; 0xff0)"}, - {12, "R3_w", "pkt_end()"}, - {17, "R4_w", "var_off=(0x0; 0xff)"}, - {18, "R4_w", "var_off=(0x0; 0x1fe0)"}, - {19, "R4_w", "var_off=(0x0; 0xff0)"}, - {20, "R4_w", "var_off=(0x0; 0x7f8)"}, - {21, "R4_w", "var_off=(0x0; 0x3fc)"}, - {22, "R4_w", "var_off=(0x0; 0x1fe)"}, + {6, "R0", "pkt(off=8,r=8)"}, + {6, "R3", "var_off=(0x0; 0xff)"}, + {7, "R3", "var_off=(0x0; 0x1fe)"}, + {8, "R3", "var_off=(0x0; 0x3fc)"}, + {9, "R3", "var_off=(0x0; 0x7f8)"}, + {10, "R3", "var_off=(0x0; 0xff0)"}, + {12, "R3", "pkt_end()"}, + {17, "R4", "var_off=(0x0; 0xff)"}, + {18, "R4", "var_off=(0x0; 0x1fe0)"}, + {19, "R4", "var_off=(0x0; 0xff0)"}, + {20, "R4", "var_off=(0x0; 0x7f8)"}, + {21, "R4", "var_off=(0x0; 0x3fc)"}, + {22, "R4", "var_off=(0x0; 0x1fe)"}, }, }, { @@ -195,16 +195,16 @@ static struct bpf_align_test tests[] = { }, .prog_type = BPF_PROG_TYPE_SCHED_CLS, .matches = { - {6, "R3_w", "var_off=(0x0; 0xff)"}, - {7, "R4_w", "var_off=(0x0; 0xff)"}, - {8, "R4_w", "var_off=(0x0; 0xff)"}, - {9, "R4_w", "var_off=(0x0; 0xff)"}, - {10, "R4_w", "var_off=(0x0; 0x1fe)"}, - {11, "R4_w", "var_off=(0x0; 0xff)"}, - {12, "R4_w", "var_off=(0x0; 0x3fc)"}, - {13, "R4_w", "var_off=(0x0; 0xff)"}, - {14, "R4_w", "var_off=(0x0; 0x7f8)"}, - {15, "R4_w", "var_off=(0x0; 0xff0)"}, + {6, "R3", "var_off=(0x0; 0xff)"}, + {7, "R4", "var_off=(0x0; 0xff)"}, + {8, "R4", "var_off=(0x0; 0xff)"}, + {9, "R4", "var_off=(0x0; 0xff)"}, + {10, "R4", "var_off=(0x0; 0x1fe)"}, + {11, "R4", "var_off=(0x0; 0xff)"}, + {12, "R4", "var_off=(0x0; 0x3fc)"}, + {13, "R4", "var_off=(0x0; 0xff)"}, + {14, "R4", "var_off=(0x0; 0x7f8)"}, + {15, "R4", "var_off=(0x0; 0xff0)"}, }, }, { @@ -235,14 +235,14 @@ static struct bpf_align_test tests[] = { }, .prog_type = BPF_PROG_TYPE_SCHED_CLS, .matches = { - {2, "R5_w", "pkt(r=0)"}, - {4, "R5_w", "pkt(off=14,r=0)"}, - {5, "R4_w", "pkt(off=14,r=0)"}, + {2, "R5", "pkt(r=0)"}, + {4, "R5", "pkt(off=14,r=0)"}, + {5, "R4", "pkt(off=14,r=0)"}, {9, "R2", "pkt(r=18)"}, {10, "R5", "pkt(off=14,r=18)"}, - {10, "R4_w", "var_off=(0x0; 0xff)"}, - {13, "R4_w", "var_off=(0x0; 0xffff)"}, - {14, "R4_w", "var_off=(0x0; 0xffff)"}, + {10, "R4", "var_off=(0x0; 0xff)"}, + {13, "R4", "var_off=(0x0; 0xffff)"}, + {14, "R4", "var_off=(0x0; 0xffff)"}, }, }, { @@ -299,12 +299,12 @@ static struct bpf_align_test tests[] = { /* Calculated offset in R6 has unknown value, but known * alignment of 4. */ - {6, "R2_w", "pkt(r=8)"}, - {7, "R6_w", "var_off=(0x0; 0x3fc)"}, + {6, "R2", "pkt(r=8)"}, + {7, "R6", "var_off=(0x0; 0x3fc)"}, /* Offset is added to packet pointer R5, resulting in * known fixed offset, and variable offset from R6. */ - {11, "R5_w", "pkt(id=1,off=14,"}, + {11, "R5", "pkt(id=1,off=14,"}, /* At the time the word size load is performed from R5, * it's total offset is NET_IP_ALIGN + reg->off (0) + * reg->aux_off (14) which is 16. Then the variable @@ -320,12 +320,12 @@ static struct bpf_align_test tests[] = { * instruction to validate R5 state. We also check * that R4 is what it should be in such case. */ - {18, "R4_w", "var_off=(0x0; 0x3fc)"}, - {18, "R5_w", "var_off=(0x0; 0x3fc)"}, + {18, "R4", "var_off=(0x0; 0x3fc)"}, + {18, "R5", "var_off=(0x0; 0x3fc)"}, /* Constant offset is added to R5, resulting in * reg->off of 14. */ - {19, "R5_w", "pkt(id=2,off=14,"}, + {19, "R5", "pkt(id=2,off=14,"}, /* At the time the word size load is performed from R5, * its total fixed offset is NET_IP_ALIGN + reg->off * (14) which is 16. Then the variable offset is 4-byte @@ -337,21 +337,21 @@ static struct bpf_align_test tests[] = { /* Constant offset is added to R5 packet pointer, * resulting in reg->off value of 14. */ - {26, "R5_w", "pkt(off=14,r=8)"}, + {26, "R5", "pkt(off=14,r=8)"}, /* Variable offset is added to R5, resulting in a * variable offset of (4n). See comment for insn #18 * for R4 = R5 trick. */ - {28, "R4_w", "var_off=(0x0; 0x3fc)"}, - {28, "R5_w", "var_off=(0x0; 0x3fc)"}, + {28, "R4", "var_off=(0x0; 0x3fc)"}, + {28, "R5", "var_off=(0x0; 0x3fc)"}, /* Constant is added to R5 again, setting reg->off to 18. */ - {29, "R5_w", "pkt(id=3,off=18,"}, + {29, "R5", "pkt(id=3,off=18,"}, /* And once more we add a variable; resulting var_off * is still (4n), fixed offset is not changed. * Also, we create a new reg->id. */ - {31, "R4_w", "var_off=(0x0; 0x7fc)"}, - {31, "R5_w", "var_off=(0x0; 0x7fc)"}, + {31, "R4", "var_off=(0x0; 0x7fc)"}, + {31, "R5", "var_off=(0x0; 0x7fc)"}, /* At the time the word size load is performed from R5, * its total fixed offset is NET_IP_ALIGN + reg->off (18) * which is 20. Then the variable offset is (4n), so @@ -397,12 +397,12 @@ static struct bpf_align_test tests[] = { /* Calculated offset in R6 has unknown value, but known * alignment of 4. */ - {6, "R2_w", "pkt(r=8)"}, - {7, "R6_w", "var_off=(0x0; 0x3fc)"}, + {6, "R2", "pkt(r=8)"}, + {7, "R6", "var_off=(0x0; 0x3fc)"}, /* Adding 14 makes R6 be (4n+2) */ - {8, "R6_w", "var_off=(0x2; 0x7fc)"}, + {8, "R6", "var_off=(0x2; 0x7fc)"}, /* Packet pointer has (4n+2) offset */ - {11, "R5_w", "var_off=(0x2; 0x7fc)"}, + {11, "R5", "var_off=(0x2; 0x7fc)"}, {12, "R4", "var_off=(0x2; 0x7fc)"}, /* At the time the word size load is performed from R5, * its total fixed offset is NET_IP_ALIGN + reg->off (0) @@ -414,11 +414,11 @@ static struct bpf_align_test tests[] = { /* Newly read value in R6 was shifted left by 2, so has * known alignment of 4. */ - {17, "R6_w", "var_off=(0x0; 0x3fc)"}, + {17, "R6", "var_off=(0x0; 0x3fc)"}, /* Added (4n) to packet pointer's (4n+2) var_off, giving * another (4n+2). */ - {19, "R5_w", "var_off=(0x2; 0xffc)"}, + {19, "R5", "var_off=(0x2; 0xffc)"}, {20, "R4", "var_off=(0x2; 0xffc)"}, /* At the time the word size load is performed from R5, * its total fixed offset is NET_IP_ALIGN + reg->off (0) @@ -459,18 +459,18 @@ static struct bpf_align_test tests[] = { .prog_type = BPF_PROG_TYPE_SCHED_CLS, .result = REJECT, .matches = { - {3, "R5_w", "pkt_end()"}, + {3, "R5", "pkt_end()"}, /* (ptr - ptr) << 2 == unknown, (4n) */ - {5, "R5_w", "var_off=(0x0; 0xfffffffffffffffc)"}, + {5, "R5", "var_off=(0x0; 0xfffffffffffffffc)"}, /* (4n) + 14 == (4n+2). We blow our bounds, because * the add could overflow. */ - {6, "R5_w", "var_off=(0x2; 0xfffffffffffffffc)"}, + {6, "R5", "var_off=(0x2; 0xfffffffffffffffc)"}, /* Checked s>=0 */ {9, "R5", "var_off=(0x2; 0x7ffffffffffffffc)"}, /* packet pointer + nonnegative (4n+2) */ - {11, "R6_w", "var_off=(0x2; 0x7ffffffffffffffc)"}, - {12, "R4_w", "var_off=(0x2; 0x7ffffffffffffffc)"}, + {11, "R6", "var_off=(0x2; 0x7ffffffffffffffc)"}, + {12, "R4", "var_off=(0x2; 0x7ffffffffffffffc)"}, /* NET_IP_ALIGN + (4n+2) == (4n), alignment is fine. * We checked the bounds, but it might have been able * to overflow if the packet pointer started in the @@ -478,7 +478,7 @@ static struct bpf_align_test tests[] = { * So we did not get a 'range' on R6, and the access * attempt will fail. */ - {15, "R6_w", "var_off=(0x2; 0x7ffffffffffffffc)"}, + {15, "R6", "var_off=(0x2; 0x7ffffffffffffffc)"}, } }, { @@ -513,12 +513,12 @@ static struct bpf_align_test tests[] = { /* Calculated offset in R6 has unknown value, but known * alignment of 4. */ - {6, "R2_w", "pkt(r=8)"}, - {8, "R6_w", "var_off=(0x0; 0x3fc)"}, + {6, "R2", "pkt(r=8)"}, + {8, "R6", "var_off=(0x0; 0x3fc)"}, /* Adding 14 makes R6 be (4n+2) */ - {9, "R6_w", "var_off=(0x2; 0x7fc)"}, + {9, "R6", "var_off=(0x2; 0x7fc)"}, /* New unknown value in R7 is (4n) */ - {10, "R7_w", "var_off=(0x0; 0x3fc)"}, + {10, "R7", "var_off=(0x0; 0x3fc)"}, /* Subtracting it from R6 blows our unsigned bounds */ {11, "R6", "var_off=(0x2; 0xfffffffffffffffc)"}, /* Checked s>= 0 */ @@ -566,16 +566,16 @@ static struct bpf_align_test tests[] = { /* Calculated offset in R6 has unknown value, but known * alignment of 4. */ - {6, "R2_w", "pkt(r=8)"}, - {9, "R6_w", "var_off=(0x0; 0x3c)"}, + {6, "R2", "pkt(r=8)"}, + {9, "R6", "var_off=(0x0; 0x3c)"}, /* Adding 14 makes R6 be (4n+2) */ - {10, "R6_w", "var_off=(0x2; 0x7c)"}, + {10, "R6", "var_off=(0x2; 0x7c)"}, /* Subtracting from packet pointer overflows ubounds */ - {13, "R5_w", "var_off=(0xffffffffffffff82; 0x7c)"}, + {13, "R5", "var_off=(0xffffffffffffff82; 0x7c)"}, /* New unknown value in R7 is (4n), >= 76 */ - {14, "R7_w", "var_off=(0x0; 0x7fc)"}, + {14, "R7", "var_off=(0x0; 0x7fc)"}, /* Adding it to packet pointer gives nice bounds again */ - {16, "R5_w", "var_off=(0x2; 0x7fc)"}, + {16, "R5", "var_off=(0x2; 0x7fc)"}, /* At the time the word size load is performed from R5, * its total fixed offset is NET_IP_ALIGN + reg->off (0) * which is 2. Then the variable offset is (4n+2), so diff --git a/tools/testing/selftests/bpf/prog_tests/arena_spin_lock.c b/tools/testing/selftests/bpf/prog_tests/arena_spin_lock.c index 0223fce4db2b..693fd86fbde6 100644 --- a/tools/testing/selftests/bpf/prog_tests/arena_spin_lock.c +++ b/tools/testing/selftests/bpf/prog_tests/arena_spin_lock.c @@ -40,8 +40,13 @@ static void *spin_lock_thread(void *arg) err = bpf_prog_test_run_opts(prog_fd, &topts); ASSERT_OK(err, "test_run err"); + + if (topts.retval == -EOPNOTSUPP) + goto end; + ASSERT_EQ((int)topts.retval, 0, "test_run retval"); +end: pthread_exit(arg); } @@ -63,6 +68,7 @@ static void test_arena_spin_lock_size(int size) skel = arena_spin_lock__open_and_load(); if (!ASSERT_OK_PTR(skel, "arena_spin_lock__open_and_load")) return; + if (skel->data->test_skip == 2) { test__skip(); goto end; @@ -86,6 +92,13 @@ static void test_arena_spin_lock_size(int size) goto end_barrier; } + if (skel->data->test_skip == 3) { + printf("%s:SKIP: CONFIG_NR_CPUS exceed the maximum supported by arena spinlock\n", + __func__); + test__skip(); + goto end_barrier; + } + ASSERT_EQ(skel->bss->counter, repeat * nthreads, "check counter value"); end_barrier: diff --git a/tools/testing/selftests/bpf/prog_tests/atomics.c b/tools/testing/selftests/bpf/prog_tests/atomics.c index 13e101f370a1..92b5f378bfb8 100644 --- a/tools/testing/selftests/bpf/prog_tests/atomics.c +++ b/tools/testing/selftests/bpf/prog_tests/atomics.c @@ -165,11 +165,17 @@ static void test_xchg(struct atomics_lskel *skel) void test_atomics(void) { struct atomics_lskel *skel; + int err; - skel = atomics_lskel__open_and_load(); - if (!ASSERT_OK_PTR(skel, "atomics skeleton load")) + skel = atomics_lskel__open(); + if (!ASSERT_OK_PTR(skel, "atomics skeleton open")) return; + skel->keyring_id = KEY_SPEC_SESSION_KEYRING; + err = atomics_lskel__load(skel); + if (!ASSERT_OK(err, "atomics skeleton load")) + goto cleanup; + if (skel->data->skip_tests) { printf("%s:SKIP:no ENABLE_ATOMICS_TESTS (missing Clang BPF atomics support)", __func__); diff --git a/tools/testing/selftests/bpf/prog_tests/attach_probe.c b/tools/testing/selftests/bpf/prog_tests/attach_probe.c index cabc51c2ca6b..9e77e5da7097 100644 --- a/tools/testing/selftests/bpf/prog_tests/attach_probe.c +++ b/tools/testing/selftests/bpf/prog_tests/attach_probe.c @@ -3,6 +3,7 @@ #include "test_attach_kprobe_sleepable.skel.h" #include "test_attach_probe_manual.skel.h" #include "test_attach_probe.skel.h" +#include "kprobe_write_ctx.skel.h" /* this is how USDT semaphore is actually defined, except volatile modifier */ volatile unsigned short uprobe_ref_ctr __attribute__((unused)) __attribute((section(".probes"))); @@ -201,6 +202,31 @@ cleanup: test_attach_probe_manual__destroy(skel); } +#ifdef __x86_64__ +/* attach kprobe/kretprobe long event name testings */ +static void test_attach_kprobe_write_ctx(void) +{ + struct kprobe_write_ctx *skel = NULL; + struct bpf_link *link = NULL; + + skel = kprobe_write_ctx__open_and_load(); + if (!ASSERT_OK_PTR(skel, "kprobe_write_ctx__open_and_load")) + return; + + link = bpf_program__attach_kprobe_opts(skel->progs.kprobe_write_ctx, + "bpf_fentry_test1", NULL); + if (!ASSERT_ERR_PTR(link, "bpf_program__attach_kprobe_opts")) + bpf_link__destroy(link); + + kprobe_write_ctx__destroy(skel); +} +#else +static void test_attach_kprobe_write_ctx(void) +{ + test__skip(); +} +#endif + static void test_attach_probe_auto(struct test_attach_probe *skel) { struct bpf_link *uprobe_err_link; @@ -406,6 +432,8 @@ void test_attach_probe(void) test_attach_uprobe_long_event_name(); if (test__start_subtest("kprobe-long_name")) test_attach_kprobe_long_event_name(); + if (test__start_subtest("kprobe-write-ctx")) + test_attach_kprobe_write_ctx(); cleanup: test_attach_probe__destroy(skel); diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c b/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c index 4a0670c056ba..75f4dff7d042 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c @@ -450,8 +450,7 @@ static void pe_subtest(struct test_bpf_cookie *skel) attr.size = sizeof(attr); attr.type = PERF_TYPE_SOFTWARE; attr.config = PERF_COUNT_SW_CPU_CLOCK; - attr.freq = 1; - attr.sample_freq = 10000; + attr.sample_period = 100000; pfd = syscall(__NR_perf_event_open, &attr, -1, 0, -1, PERF_FLAG_FD_CLOEXEC); if (!ASSERT_GE(pfd, 0, "perf_fd")) goto cleanup; diff --git a/tools/testing/selftests/bpf/prog_tests/btf_dump.c b/tools/testing/selftests/bpf/prog_tests/btf_dump.c index 82903585c870..10cba526d3e6 100644 --- a/tools/testing/selftests/bpf/prog_tests/btf_dump.c +++ b/tools/testing/selftests/bpf/prog_tests/btf_dump.c @@ -63,7 +63,7 @@ static int test_btf_dump_case(int n, struct btf_dump_test_case *t) /* tests with t->known_ptr_sz have no "long" or "unsigned long" type, * so it's impossible to determine correct pointer size; but if they - * do, it should be 8 regardless of host architecture, becaues BPF + * do, it should be 8 regardless of host architecture, because BPF * target is always 64-bit */ if (!t->known_ptr_sz) { diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_xattr.c b/tools/testing/selftests/bpf/prog_tests/cgroup_xattr.c index e0dd966e4a3e..5ad904e9d15d 100644 --- a/tools/testing/selftests/bpf/prog_tests/cgroup_xattr.c +++ b/tools/testing/selftests/bpf/prog_tests/cgroup_xattr.c @@ -44,7 +44,7 @@ static void test_read_cgroup_xattr(void) if (!ASSERT_OK_PTR(skel, "read_cgroupfs_xattr__open_and_load")) goto out; - skel->bss->target_pid = gettid(); + skel->bss->target_pid = sys_gettid(); if (!ASSERT_OK(read_cgroupfs_xattr__attach(skel), "read_cgroupfs_xattr__attach")) goto out; diff --git a/tools/testing/selftests/bpf/prog_tests/cgrp_kfunc.c b/tools/testing/selftests/bpf/prog_tests/cgrp_kfunc.c index adda85f97058..4b42fbc96efc 100644 --- a/tools/testing/selftests/bpf/prog_tests/cgrp_kfunc.c +++ b/tools/testing/selftests/bpf/prog_tests/cgrp_kfunc.c @@ -4,6 +4,8 @@ #define _GNU_SOURCE #include <cgroup_helpers.h> #include <test_progs.h> +#include <sched.h> +#include <sys/wait.h> #include "cgrp_kfunc_failure.skel.h" #include "cgrp_kfunc_success.skel.h" @@ -87,6 +89,72 @@ static const char * const success_tests[] = { "test_cgrp_from_id", }; +static void test_cgrp_from_id_ns(void) +{ + LIBBPF_OPTS(bpf_test_run_opts, opts); + struct cgrp_kfunc_success *skel; + struct bpf_program *prog; + int pid, pipe_fd[2]; + + skel = open_load_cgrp_kfunc_skel(); + if (!ASSERT_OK_PTR(skel, "open_load_skel")) + return; + + if (!ASSERT_OK(skel->bss->err, "pre_mkdir_err")) + goto cleanup; + + prog = skel->progs.test_cgrp_from_id_ns; + + if (!ASSERT_OK(pipe(pipe_fd), "pipe")) + goto cleanup; + + pid = fork(); + if (!ASSERT_GE(pid, 0, "fork result")) { + close(pipe_fd[0]); + close(pipe_fd[1]); + goto cleanup; + } + + if (pid == 0) { + int ret = 0; + + close(pipe_fd[0]); + + if (!ASSERT_GE(cgroup_setup_and_join("cgrp_from_id_ns"), 0, "join cgroup")) + exit(1); + + if (!ASSERT_OK(unshare(CLONE_NEWCGROUP), "unshare cgns")) + exit(1); + + ret = bpf_prog_test_run_opts(bpf_program__fd(prog), &opts); + if (!ASSERT_OK(ret, "test run ret")) + exit(1); + + if (!ASSERT_OK(opts.retval, "test run retval")) + exit(1); + + if (!ASSERT_EQ(write(pipe_fd[1], &ret, sizeof(ret)), sizeof(ret), "write pipe")) + exit(1); + + exit(0); + } else { + int res; + + close(pipe_fd[1]); + + ASSERT_EQ(read(pipe_fd[0], &res, sizeof(res)), sizeof(res), "read res"); + ASSERT_EQ(waitpid(pid, NULL, 0), pid, "wait on child"); + + remove_cgroup_pid("cgrp_from_id_ns", pid); + + ASSERT_OK(res, "result from run"); + } + + close(pipe_fd[0]); +cleanup: + cgrp_kfunc_success__destroy(skel); +} + void test_cgrp_kfunc(void) { int i, err; @@ -102,6 +170,9 @@ void test_cgrp_kfunc(void) run_success_test(success_tests[i]); } + if (test__start_subtest("test_cgrp_from_id_ns")) + test_cgrp_from_id_ns(); + RUN_TESTS(cgrp_kfunc_failure); cleanup: diff --git a/tools/testing/selftests/bpf/prog_tests/dynptr.c b/tools/testing/selftests/bpf/prog_tests/dynptr.c index 9b2d9ceda210..b9f86cb91e81 100644 --- a/tools/testing/selftests/bpf/prog_tests/dynptr.c +++ b/tools/testing/selftests/bpf/prog_tests/dynptr.c @@ -32,6 +32,8 @@ static struct { {"test_ringbuf", SETUP_SYSCALL_SLEEP}, {"test_skb_readonly", SETUP_SKB_PROG}, {"test_dynptr_skb_data", SETUP_SKB_PROG}, + {"test_dynptr_skb_meta_data", SETUP_SKB_PROG}, + {"test_dynptr_skb_meta_flags", SETUP_SKB_PROG}, {"test_adjust", SETUP_SYSCALL_SLEEP}, {"test_adjust_err", SETUP_SYSCALL_SLEEP}, {"test_zero_size_dynptr", SETUP_SYSCALL_SLEEP}, diff --git a/tools/testing/selftests/bpf/prog_tests/fd_array.c b/tools/testing/selftests/bpf/prog_tests/fd_array.c index 241b2c8c6e0f..c534b4d5f9da 100644 --- a/tools/testing/selftests/bpf/prog_tests/fd_array.c +++ b/tools/testing/selftests/bpf/prog_tests/fd_array.c @@ -293,7 +293,7 @@ static int get_btf_id_by_fd(int btf_fd, __u32 *id) * 1) Create a new btf, it's referenced only by a file descriptor, so refcnt=1 * 2) Load a BPF prog with fd_array[0] = btf_fd; now btf's refcnt=2 * 3) Close the btf_fd, now refcnt=1 - * Wait and check that BTF stil exists. + * Wait and check that BTF still exists. */ static void check_fd_array_cnt__referenced_btfs(void) { diff --git a/tools/testing/selftests/bpf/prog_tests/fentry_fexit.c b/tools/testing/selftests/bpf/prog_tests/fentry_fexit.c index 130f5b82d2e6..5ef1804e44df 100644 --- a/tools/testing/selftests/bpf/prog_tests/fentry_fexit.c +++ b/tools/testing/selftests/bpf/prog_tests/fentry_fexit.c @@ -12,13 +12,24 @@ void test_fentry_fexit(void) int err, prog_fd, i; LIBBPF_OPTS(bpf_test_run_opts, topts); - fentry_skel = fentry_test_lskel__open_and_load(); + fentry_skel = fentry_test_lskel__open(); if (!ASSERT_OK_PTR(fentry_skel, "fentry_skel_load")) goto close_prog; - fexit_skel = fexit_test_lskel__open_and_load(); + + fentry_skel->keyring_id = KEY_SPEC_SESSION_KEYRING; + err = fentry_test_lskel__load(fentry_skel); + if (!ASSERT_OK(err, "fentry_skel_load")) + goto close_prog; + + fexit_skel = fexit_test_lskel__open(); if (!ASSERT_OK_PTR(fexit_skel, "fexit_skel_load")) goto close_prog; + fexit_skel->keyring_id = KEY_SPEC_SESSION_KEYRING; + err = fexit_test_lskel__load(fexit_skel); + if (!ASSERT_OK(err, "fexit_skel_load")) + goto close_prog; + err = fentry_test_lskel__attach(fentry_skel); if (!ASSERT_OK(err, "fentry_attach")) goto close_prog; diff --git a/tools/testing/selftests/bpf/prog_tests/fentry_test.c b/tools/testing/selftests/bpf/prog_tests/fentry_test.c index aee1bc77a17f..ec882328eb59 100644 --- a/tools/testing/selftests/bpf/prog_tests/fentry_test.c +++ b/tools/testing/selftests/bpf/prog_tests/fentry_test.c @@ -43,8 +43,13 @@ static void fentry_test(void) struct fentry_test_lskel *fentry_skel = NULL; int err; - fentry_skel = fentry_test_lskel__open_and_load(); - if (!ASSERT_OK_PTR(fentry_skel, "fentry_skel_load")) + fentry_skel = fentry_test_lskel__open(); + if (!ASSERT_OK_PTR(fentry_skel, "fentry_skel_open")) + goto cleanup; + + fentry_skel->keyring_id = KEY_SPEC_SESSION_KEYRING; + err = fentry_test_lskel__load(fentry_skel); + if (!ASSERT_OK(err, "fentry_skel_load")) goto cleanup; err = fentry_test_common(fentry_skel); diff --git a/tools/testing/selftests/bpf/prog_tests/fexit_test.c b/tools/testing/selftests/bpf/prog_tests/fexit_test.c index 1c13007e37dd..94eed753560c 100644 --- a/tools/testing/selftests/bpf/prog_tests/fexit_test.c +++ b/tools/testing/selftests/bpf/prog_tests/fexit_test.c @@ -43,8 +43,13 @@ static void fexit_test(void) struct fexit_test_lskel *fexit_skel = NULL; int err; - fexit_skel = fexit_test_lskel__open_and_load(); - if (!ASSERT_OK_PTR(fexit_skel, "fexit_skel_load")) + fexit_skel = fexit_test_lskel__open(); + if (!ASSERT_OK_PTR(fexit_skel, "fexit_skel_open")) + goto cleanup; + + fexit_skel->keyring_id = KEY_SPEC_SESSION_KEYRING; + err = fexit_test_lskel__load(fexit_skel); + if (!ASSERT_OK(err, "fexit_skel_load")) goto cleanup; err = fexit_test_common(fexit_skel); diff --git a/tools/testing/selftests/bpf/prog_tests/free_timer.c b/tools/testing/selftests/bpf/prog_tests/free_timer.c index b7b77a6b2979..0de8facca4c5 100644 --- a/tools/testing/selftests/bpf/prog_tests/free_timer.c +++ b/tools/testing/selftests/bpf/prog_tests/free_timer.c @@ -124,6 +124,10 @@ void test_free_timer(void) int err; skel = free_timer__open_and_load(); + if (!skel && errno == EOPNOTSUPP) { + test__skip(); + return; + } if (!ASSERT_OK_PTR(skel, "open_load")) return; diff --git a/tools/testing/selftests/bpf/prog_tests/kernel_flag.c b/tools/testing/selftests/bpf/prog_tests/kernel_flag.c index a133354ac9bc..97b00c7efe94 100644 --- a/tools/testing/selftests/bpf/prog_tests/kernel_flag.c +++ b/tools/testing/selftests/bpf/prog_tests/kernel_flag.c @@ -16,7 +16,7 @@ void test_kernel_flag(void) if (!ASSERT_OK_PTR(lsm_skel, "lsm_skel")) return; - lsm_skel->bss->monitored_tid = gettid(); + lsm_skel->bss->monitored_tid = sys_gettid(); ret = test_kernel_flag__attach(lsm_skel); if (!ASSERT_OK(ret, "test_kernel_flag__attach")) diff --git a/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c b/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c index e19ef509ebf8..6cfaa978bc9a 100644 --- a/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c +++ b/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c @@ -7,6 +7,7 @@ #include "kprobe_multi_session.skel.h" #include "kprobe_multi_session_cookie.skel.h" #include "kprobe_multi_verifier.skel.h" +#include "kprobe_write_ctx.skel.h" #include "bpf/libbpf_internal.h" #include "bpf/hashmap.h" @@ -422,220 +423,6 @@ static void test_unique_match(void) kprobe_multi__destroy(skel); } -static size_t symbol_hash(long key, void *ctx __maybe_unused) -{ - return str_hash((const char *) key); -} - -static bool symbol_equal(long key1, long key2, void *ctx __maybe_unused) -{ - return strcmp((const char *) key1, (const char *) key2) == 0; -} - -static bool is_invalid_entry(char *buf, bool kernel) -{ - if (kernel && strchr(buf, '[')) - return true; - if (!kernel && !strchr(buf, '[')) - return true; - return false; -} - -static bool skip_entry(char *name) -{ - /* - * We attach to almost all kernel functions and some of them - * will cause 'suspicious RCU usage' when fprobe is attached - * to them. Filter out the current culprits - arch_cpu_idle - * default_idle and rcu_* functions. - */ - if (!strcmp(name, "arch_cpu_idle")) - return true; - if (!strcmp(name, "default_idle")) - return true; - if (!strncmp(name, "rcu_", 4)) - return true; - if (!strcmp(name, "bpf_dispatcher_xdp_func")) - return true; - if (!strncmp(name, "__ftrace_invalid_address__", - sizeof("__ftrace_invalid_address__") - 1)) - return true; - return false; -} - -/* Do comparision by ignoring '.llvm.<hash>' suffixes. */ -static int compare_name(const char *name1, const char *name2) -{ - const char *res1, *res2; - int len1, len2; - - res1 = strstr(name1, ".llvm."); - res2 = strstr(name2, ".llvm."); - len1 = res1 ? res1 - name1 : strlen(name1); - len2 = res2 ? res2 - name2 : strlen(name2); - - if (len1 == len2) - return strncmp(name1, name2, len1); - if (len1 < len2) - return strncmp(name1, name2, len1) <= 0 ? -1 : 1; - return strncmp(name1, name2, len2) >= 0 ? 1 : -1; -} - -static int load_kallsyms_compare(const void *p1, const void *p2) -{ - return compare_name(((const struct ksym *)p1)->name, ((const struct ksym *)p2)->name); -} - -static int search_kallsyms_compare(const void *p1, const struct ksym *p2) -{ - return compare_name(p1, p2->name); -} - -static int get_syms(char ***symsp, size_t *cntp, bool kernel) -{ - size_t cap = 0, cnt = 0; - char *name = NULL, *ksym_name, **syms = NULL; - struct hashmap *map; - struct ksyms *ksyms; - struct ksym *ks; - char buf[256]; - FILE *f; - int err = 0; - - ksyms = load_kallsyms_custom_local(load_kallsyms_compare); - if (!ASSERT_OK_PTR(ksyms, "load_kallsyms_custom_local")) - return -EINVAL; - - /* - * The available_filter_functions contains many duplicates, - * but other than that all symbols are usable in kprobe multi - * interface. - * Filtering out duplicates by using hashmap__add, which won't - * add existing entry. - */ - - if (access("/sys/kernel/tracing/trace", F_OK) == 0) - f = fopen("/sys/kernel/tracing/available_filter_functions", "r"); - else - f = fopen("/sys/kernel/debug/tracing/available_filter_functions", "r"); - - if (!f) - return -EINVAL; - - map = hashmap__new(symbol_hash, symbol_equal, NULL); - if (IS_ERR(map)) { - err = libbpf_get_error(map); - goto error; - } - - while (fgets(buf, sizeof(buf), f)) { - if (is_invalid_entry(buf, kernel)) - continue; - - free(name); - if (sscanf(buf, "%ms$*[^\n]\n", &name) != 1) - continue; - if (skip_entry(name)) - continue; - - ks = search_kallsyms_custom_local(ksyms, name, search_kallsyms_compare); - if (!ks) { - err = -EINVAL; - goto error; - } - - ksym_name = ks->name; - err = hashmap__add(map, ksym_name, 0); - if (err == -EEXIST) { - err = 0; - continue; - } - if (err) - goto error; - - err = libbpf_ensure_mem((void **) &syms, &cap, - sizeof(*syms), cnt + 1); - if (err) - goto error; - - syms[cnt++] = ksym_name; - } - - *symsp = syms; - *cntp = cnt; - -error: - free(name); - fclose(f); - hashmap__free(map); - if (err) - free(syms); - return err; -} - -static int get_addrs(unsigned long **addrsp, size_t *cntp, bool kernel) -{ - unsigned long *addr, *addrs, *tmp_addrs; - int err = 0, max_cnt, inc_cnt; - char *name = NULL; - size_t cnt = 0; - char buf[256]; - FILE *f; - - if (access("/sys/kernel/tracing/trace", F_OK) == 0) - f = fopen("/sys/kernel/tracing/available_filter_functions_addrs", "r"); - else - f = fopen("/sys/kernel/debug/tracing/available_filter_functions_addrs", "r"); - - if (!f) - return -ENOENT; - - /* In my local setup, the number of entries is 50k+ so Let us initially - * allocate space to hold 64k entries. If 64k is not enough, incrementally - * increase 1k each time. - */ - max_cnt = 65536; - inc_cnt = 1024; - addrs = malloc(max_cnt * sizeof(long)); - if (addrs == NULL) { - err = -ENOMEM; - goto error; - } - - while (fgets(buf, sizeof(buf), f)) { - if (is_invalid_entry(buf, kernel)) - continue; - - free(name); - if (sscanf(buf, "%p %ms$*[^\n]\n", &addr, &name) != 2) - continue; - if (skip_entry(name)) - continue; - - if (cnt == max_cnt) { - max_cnt += inc_cnt; - tmp_addrs = realloc(addrs, max_cnt); - if (!tmp_addrs) { - err = -ENOMEM; - goto error; - } - addrs = tmp_addrs; - } - - addrs[cnt++] = (unsigned long)addr; - } - - *addrsp = addrs; - *cntp = cnt; - -error: - free(name); - fclose(f); - if (err) - free(addrs); - return err; -} - static void do_bench_test(struct kprobe_multi_empty *skel, struct bpf_kprobe_multi_opts *opts) { long attach_start_ns, attach_end_ns; @@ -670,7 +457,7 @@ static void test_kprobe_multi_bench_attach(bool kernel) char **syms = NULL; size_t cnt = 0; - if (!ASSERT_OK(get_syms(&syms, &cnt, kernel), "get_syms")) + if (!ASSERT_OK(bpf_get_ksyms(&syms, &cnt, kernel), "bpf_get_ksyms")) return; skel = kprobe_multi_empty__open_and_load(); @@ -696,13 +483,13 @@ static void test_kprobe_multi_bench_attach_addr(bool kernel) size_t cnt = 0; int err; - err = get_addrs(&addrs, &cnt, kernel); + err = bpf_get_addrs(&addrs, &cnt, kernel); if (err == -ENOENT) { test__skip(); return; } - if (!ASSERT_OK(err, "get_addrs")) + if (!ASSERT_OK(err, "bpf_get_addrs")) return; skel = kprobe_multi_empty__open_and_load(); @@ -753,6 +540,30 @@ cleanup: kprobe_multi_override__destroy(skel); } +#ifdef __x86_64__ +static void test_attach_write_ctx(void) +{ + struct kprobe_write_ctx *skel = NULL; + struct bpf_link *link = NULL; + + skel = kprobe_write_ctx__open_and_load(); + if (!ASSERT_OK_PTR(skel, "kprobe_write_ctx__open_and_load")) + return; + + link = bpf_program__attach_kprobe_opts(skel->progs.kprobe_multi_write_ctx, + "bpf_fentry_test1", NULL); + if (!ASSERT_ERR_PTR(link, "bpf_program__attach_kprobe_opts")) + bpf_link__destroy(link); + + kprobe_write_ctx__destroy(skel); +} +#else +static void test_attach_write_ctx(void) +{ + test__skip(); +} +#endif + void serial_test_kprobe_multi_bench_attach(void) { if (test__start_subtest("kernel")) @@ -792,5 +603,7 @@ void test_kprobe_multi_test(void) test_session_cookie_skel_api(); if (test__start_subtest("unique_match")) test_unique_match(); + if (test__start_subtest("attach_write_ctx")) + test_attach_write_ctx(); RUN_TESTS(kprobe_multi_verifier); } diff --git a/tools/testing/selftests/bpf/prog_tests/map_excl.c b/tools/testing/selftests/bpf/prog_tests/map_excl.c new file mode 100644 index 000000000000..6bdc6d6de0da --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/map_excl.c @@ -0,0 +1,54 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (C) 2025 Google LLC. */ +#define _GNU_SOURCE +#include <unistd.h> +#include <sys/syscall.h> +#include <test_progs.h> +#include <bpf/btf.h> + +#include "map_excl.skel.h" + +static void test_map_excl_allowed(void) +{ + struct map_excl *skel = map_excl__open(); + int err; + + err = bpf_map__set_exclusive_program(skel->maps.excl_map, skel->progs.should_have_access); + if (!ASSERT_OK(err, "bpf_map__set_exclusive_program")) + goto out; + + bpf_program__set_autoload(skel->progs.should_have_access, true); + bpf_program__set_autoload(skel->progs.should_not_have_access, false); + + err = map_excl__load(skel); + ASSERT_OK(err, "map_excl__load"); +out: + map_excl__destroy(skel); +} + +static void test_map_excl_denied(void) +{ + struct map_excl *skel = map_excl__open(); + int err; + + err = bpf_map__set_exclusive_program(skel->maps.excl_map, skel->progs.should_have_access); + if (!ASSERT_OK(err, "bpf_map__make_exclusive")) + goto out; + + bpf_program__set_autoload(skel->progs.should_have_access, false); + bpf_program__set_autoload(skel->progs.should_not_have_access, true); + + err = map_excl__load(skel); + ASSERT_EQ(err, -EACCES, "exclusive map access not denied\n"); +out: + map_excl__destroy(skel); + +} + +void test_map_excl(void) +{ + if (test__start_subtest("map_excl_allowed")) + test_map_excl_allowed(); + if (test__start_subtest("map_excl_denied")) + test_map_excl_denied(); +} diff --git a/tools/testing/selftests/bpf/prog_tests/module_attach.c b/tools/testing/selftests/bpf/prog_tests/module_attach.c index 6d391d95f96e..70fa7ae93173 100644 --- a/tools/testing/selftests/bpf/prog_tests/module_attach.c +++ b/tools/testing/selftests/bpf/prog_tests/module_attach.c @@ -90,7 +90,7 @@ void test_module_attach(void) test_module_attach__detach(skel); - /* attach fentry/fexit and make sure it get's module reference */ + /* attach fentry/fexit and make sure it gets module reference */ link = bpf_program__attach(skel->progs.handle_fentry); if (!ASSERT_OK_PTR(link, "attach_fentry")) goto cleanup; diff --git a/tools/testing/selftests/bpf/prog_tests/pinning_devmap_reuse.c b/tools/testing/selftests/bpf/prog_tests/pinning_devmap_reuse.c new file mode 100644 index 000000000000..9ae49b587f3e --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/pinning_devmap_reuse.c @@ -0,0 +1,50 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <sys/types.h> +#include <sys/stat.h> +#include <unistd.h> +#include <test_progs.h> + + +#include "test_pinning_devmap.skel.h" + +void test_pinning_devmap_reuse(void) +{ + const char *pinpath1 = "/sys/fs/bpf/pinmap1"; + const char *pinpath2 = "/sys/fs/bpf/pinmap2"; + struct test_pinning_devmap *skel1 = NULL, *skel2 = NULL; + int err; + DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts); + + /* load the object a first time */ + skel1 = test_pinning_devmap__open_and_load(); + if (!ASSERT_OK_PTR(skel1, "skel_load1")) + goto out; + + /* load the object a second time, re-using the pinned map */ + skel2 = test_pinning_devmap__open_and_load(); + if (!ASSERT_OK_PTR(skel2, "skel_load2")) + goto out; + + /* we can close the reference safely without + * the map's refcount falling to 0 + */ + test_pinning_devmap__destroy(skel1); + skel1 = NULL; + + /* now, swap the pins */ + err = renameat2(0, pinpath1, 0, pinpath2, RENAME_EXCHANGE); + if (!ASSERT_OK(err, "swap pins")) + goto out; + + /* load the object again, this time the re-use should fail */ + skel1 = test_pinning_devmap__open_and_load(); + if (!ASSERT_ERR_PTR(skel1, "skel_load3")) + goto out; + +out: + unlink(pinpath1); + unlink(pinpath2); + test_pinning_devmap__destroy(skel1); + test_pinning_devmap__destroy(skel2); +} diff --git a/tools/testing/selftests/bpf/prog_tests/prog_tests_framework.c b/tools/testing/selftests/bpf/prog_tests/prog_tests_framework.c index 14f2796076e0..7607cfc2408c 100644 --- a/tools/testing/selftests/bpf/prog_tests/prog_tests_framework.c +++ b/tools/testing/selftests/bpf/prog_tests/prog_tests_framework.c @@ -54,3 +54,128 @@ void test_prog_tests_framework(void) return; clear_test_state(state); } + +static void dummy_emit(const char *buf, bool force) {} + +void test_prog_tests_framework_expected_msgs(void) +{ + struct expected_msgs msgs; + int i, j, error_cnt; + const struct { + const char *name; + const char *log; + const char *expected; + struct expect_msg *pats; + } cases[] = { + { + .name = "simple-ok", + .log = "aaabbbccc", + .pats = (struct expect_msg[]) { + { .substr = "aaa" }, + { .substr = "ccc" }, + {} + } + }, + { + .name = "simple-fail", + .log = "aaabbbddd", + .expected = "MATCHED SUBSTR: 'aaa'\n" + "EXPECTED SUBSTR: 'ccc'\n", + .pats = (struct expect_msg[]) { + { .substr = "aaa" }, + { .substr = "ccc" }, + {} + } + }, + { + .name = "negative-ok-mid", + .log = "aaabbbccc", + .pats = (struct expect_msg[]) { + { .substr = "aaa" }, + { .substr = "foo", .negative = true }, + { .substr = "bar", .negative = true }, + { .substr = "ccc" }, + {} + } + }, + { + .name = "negative-ok-tail", + .log = "aaabbbccc", + .pats = (struct expect_msg[]) { + { .substr = "aaa" }, + { .substr = "foo", .negative = true }, + {} + } + }, + { + .name = "negative-ok-head", + .log = "aaabbbccc", + .pats = (struct expect_msg[]) { + { .substr = "foo", .negative = true }, + { .substr = "ccc" }, + {} + } + }, + { + .name = "negative-fail-head", + .log = "aaabbbccc", + .expected = "UNEXPECTED SUBSTR: 'aaa'\n", + .pats = (struct expect_msg[]) { + { .substr = "aaa", .negative = true }, + { .substr = "bbb" }, + {} + } + }, + { + .name = "negative-fail-tail", + .log = "aaabbbccc", + .expected = "UNEXPECTED SUBSTR: 'ccc'\n", + .pats = (struct expect_msg[]) { + { .substr = "bbb" }, + { .substr = "ccc", .negative = true }, + {} + } + }, + { + .name = "negative-fail-mid-1", + .log = "aaabbbccc", + .expected = "UNEXPECTED SUBSTR: 'bbb'\n", + .pats = (struct expect_msg[]) { + { .substr = "aaa" }, + { .substr = "bbb", .negative = true }, + { .substr = "ccc" }, + {} + } + }, + { + .name = "negative-fail-mid-2", + .log = "aaabbb222ccc", + .expected = "UNEXPECTED SUBSTR: '222'\n", + .pats = (struct expect_msg[]) { + { .substr = "aaa" }, + { .substr = "222", .negative = true }, + { .substr = "bbb", .negative = true }, + { .substr = "ccc" }, + {} + } + } + }; + + for (i = 0; i < ARRAY_SIZE(cases); i++) { + if (test__start_subtest(cases[i].name)) { + error_cnt = env.subtest_state->error_cnt; + msgs.patterns = cases[i].pats; + msgs.cnt = 0; + for (j = 0; cases[i].pats[j].substr; j++) + msgs.cnt++; + validate_msgs(cases[i].log, &msgs, dummy_emit); + fflush(stderr); + env.subtest_state->error_cnt = error_cnt; + if (cases[i].expected) + ASSERT_HAS_SUBSTR(env.subtest_state->log_buf, cases[i].expected, "expected output"); + else + ASSERT_STREQ(env.subtest_state->log_buf, "", "expected no output"); + test__end_subtest(); + } + } +} diff --git a/tools/testing/selftests/bpf/prog_tests/reg_bounds.c b/tools/testing/selftests/bpf/prog_tests/reg_bounds.c index e261b0e872db..d93a0c7b1786 100644 --- a/tools/testing/selftests/bpf/prog_tests/reg_bounds.c +++ b/tools/testing/selftests/bpf/prog_tests/reg_bounds.c @@ -623,7 +623,7 @@ static void range_cond(enum num_t t, struct range x, struct range y, *newx = range(t, x.a, x.b); *newy = range(t, y.a + 1, y.b); } else if (x.a == x.b && x.b == y.b) { - /* X is a constant matching rigth side of Y */ + /* X is a constant matching right side of Y */ *newx = range(t, x.a, x.b); *newy = range(t, y.a, y.b - 1); } else if (y.a == y.b && x.a == y.a) { @@ -631,7 +631,7 @@ static void range_cond(enum num_t t, struct range x, struct range y, *newx = range(t, x.a + 1, x.b); *newy = range(t, y.a, y.b); } else if (y.a == y.b && x.b == y.b) { - /* Y is a constant matching rigth side of X */ + /* Y is a constant matching right side of X */ *newx = range(t, x.a, x.b - 1); *newy = range(t, y.a, y.b); } else { diff --git a/tools/testing/selftests/bpf/prog_tests/res_spin_lock.c b/tools/testing/selftests/bpf/prog_tests/res_spin_lock.c index 0703e987df89..8c6c2043a432 100644 --- a/tools/testing/selftests/bpf/prog_tests/res_spin_lock.c +++ b/tools/testing/selftests/bpf/prog_tests/res_spin_lock.c @@ -99,3 +99,19 @@ end: res_spin_lock__destroy(skel); return; } + +void serial_test_res_spin_lock_stress(void) +{ + if (libbpf_num_possible_cpus() < 3) { + test__skip(); + return; + } + + ASSERT_OK(load_module("bpf_test_rqspinlock.ko", false), "load module AA"); + sleep(5); + unload_module("bpf_test_rqspinlock", false); + + ASSERT_OK(load_module_params("bpf_test_rqspinlock.ko", "test_ab=1", false), "load module ABBA"); + sleep(5); + unload_module("bpf_test_rqspinlock", false); +} diff --git a/tools/testing/selftests/bpf/prog_tests/spin_lock.c b/tools/testing/selftests/bpf/prog_tests/spin_lock.c index e3ea5dc2f697..254fbfeab06a 100644 --- a/tools/testing/selftests/bpf/prog_tests/spin_lock.c +++ b/tools/testing/selftests/bpf/prog_tests/spin_lock.c @@ -13,22 +13,22 @@ static struct { const char *err_msg; } spin_lock_fail_tests[] = { { "lock_id_kptr_preserve", - "5: (bf) r1 = r0 ; R0_w=ptr_foo(id=2,ref_obj_id=2) " - "R1_w=ptr_foo(id=2,ref_obj_id=2) refs=2\n6: (85) call bpf_this_cpu_ptr#154\n" + "5: (bf) r1 = r0 ; R0=ptr_foo(id=2,ref_obj_id=2) " + "R1=ptr_foo(id=2,ref_obj_id=2) refs=2\n6: (85) call bpf_this_cpu_ptr#154\n" "R1 type=ptr_ expected=percpu_ptr_" }, { "lock_id_global_zero", - "; R1_w=map_value(map=.data.A,ks=4,vs=4)\n2: (85) call bpf_this_cpu_ptr#154\n" + "; R1=map_value(map=.data.A,ks=4,vs=4)\n2: (85) call bpf_this_cpu_ptr#154\n" "R1 type=map_value expected=percpu_ptr_" }, { "lock_id_mapval_preserve", "[0-9]\\+: (bf) r1 = r0 ;" - " R0_w=map_value(id=1,map=array_map,ks=4,vs=8)" - " R1_w=map_value(id=1,map=array_map,ks=4,vs=8)\n" + " R0=map_value(id=1,map=array_map,ks=4,vs=8)" + " R1=map_value(id=1,map=array_map,ks=4,vs=8)\n" "[0-9]\\+: (85) call bpf_this_cpu_ptr#154\n" "R1 type=map_value expected=percpu_ptr_" }, { "lock_id_innermapval_preserve", "[0-9]\\+: (bf) r1 = r0 ;" " R0=map_value(id=2,ks=4,vs=8)" - " R1_w=map_value(id=2,ks=4,vs=8)\n" + " R1=map_value(id=2,ks=4,vs=8)\n" "[0-9]\\+: (85) call bpf_this_cpu_ptr#154\n" "R1 type=map_value expected=percpu_ptr_" }, { "lock_id_mismatch_kptr_kptr", "bpf_spin_unlock of different lock" }, diff --git a/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id.c b/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id.c index b7ba5cd47d96..271b5cc9fc01 100644 --- a/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id.c +++ b/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id.c @@ -39,7 +39,7 @@ retry: bpf_map_update_elem(control_map_fd, &key, &val, 0); /* for every element in stackid_hmap, we can find a corresponding one - * in stackmap, and vise versa. + * in stackmap, and vice versa. */ err = compare_map_keys(stackid_hmap_fd, stackmap_fd); if (CHECK(err, "compare_map_keys stackid_hmap vs. stackmap", diff --git a/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c b/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c index 0832fd787457..b277dddd5af7 100644 --- a/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c +++ b/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c @@ -66,7 +66,7 @@ retry: bpf_map_update_elem(control_map_fd, &key, &val, 0); /* for every element in stackid_hmap, we can find a corresponding one - * in stackmap, and vise versa. + * in stackmap, and vice versa. */ err = compare_map_keys(stackid_hmap_fd, stackmap_fd); if (CHECK(err, "compare_map_keys stackid_hmap vs. stackmap", diff --git a/tools/testing/selftests/bpf/prog_tests/stacktrace_map.c b/tools/testing/selftests/bpf/prog_tests/stacktrace_map.c index df59e4ae2951..c23b97414813 100644 --- a/tools/testing/selftests/bpf/prog_tests/stacktrace_map.c +++ b/tools/testing/selftests/bpf/prog_tests/stacktrace_map.c @@ -1,46 +1,27 @@ // SPDX-License-Identifier: GPL-2.0 #include <test_progs.h> +#include "stacktrace_map.skel.h" void test_stacktrace_map(void) { + struct stacktrace_map *skel; int control_map_fd, stackid_hmap_fd, stackmap_fd, stack_amap_fd; - const char *prog_name = "oncpu"; - int err, prog_fd, stack_trace_len; - const char *file = "./test_stacktrace_map.bpf.o"; - __u32 key, val, duration = 0; - struct bpf_program *prog; - struct bpf_object *obj; - struct bpf_link *link; + int err, stack_trace_len; + __u32 key, val, stack_id, duration = 0; + __u64 stack[PERF_MAX_STACK_DEPTH]; - err = bpf_prog_test_load(file, BPF_PROG_TYPE_TRACEPOINT, &obj, &prog_fd); - if (CHECK(err, "prog_load", "err %d errno %d\n", err, errno)) + skel = stacktrace_map__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_open_and_load")) return; - prog = bpf_object__find_program_by_name(obj, prog_name); - if (CHECK(!prog, "find_prog", "prog '%s' not found\n", prog_name)) - goto close_prog; - - link = bpf_program__attach_tracepoint(prog, "sched", "sched_switch"); - if (!ASSERT_OK_PTR(link, "attach_tp")) - goto close_prog; - - /* find map fds */ - control_map_fd = bpf_find_map(__func__, obj, "control_map"); - if (CHECK_FAIL(control_map_fd < 0)) - goto disable_pmu; - - stackid_hmap_fd = bpf_find_map(__func__, obj, "stackid_hmap"); - if (CHECK_FAIL(stackid_hmap_fd < 0)) - goto disable_pmu; - - stackmap_fd = bpf_find_map(__func__, obj, "stackmap"); - if (CHECK_FAIL(stackmap_fd < 0)) - goto disable_pmu; - - stack_amap_fd = bpf_find_map(__func__, obj, "stack_amap"); - if (CHECK_FAIL(stack_amap_fd < 0)) - goto disable_pmu; + control_map_fd = bpf_map__fd(skel->maps.control_map); + stackid_hmap_fd = bpf_map__fd(skel->maps.stackid_hmap); + stackmap_fd = bpf_map__fd(skel->maps.stackmap); + stack_amap_fd = bpf_map__fd(skel->maps.stack_amap); + err = stacktrace_map__attach(skel); + if (!ASSERT_OK(err, "skel_attach")) + goto out; /* give some time for bpf program run */ sleep(1); @@ -50,26 +31,32 @@ void test_stacktrace_map(void) bpf_map_update_elem(control_map_fd, &key, &val, 0); /* for every element in stackid_hmap, we can find a corresponding one - * in stackmap, and vise versa. + * in stackmap, and vice versa. */ err = compare_map_keys(stackid_hmap_fd, stackmap_fd); if (CHECK(err, "compare_map_keys stackid_hmap vs. stackmap", "err %d errno %d\n", err, errno)) - goto disable_pmu; + goto out; err = compare_map_keys(stackmap_fd, stackid_hmap_fd); if (CHECK(err, "compare_map_keys stackmap vs. stackid_hmap", "err %d errno %d\n", err, errno)) - goto disable_pmu; + goto out; stack_trace_len = PERF_MAX_STACK_DEPTH * sizeof(__u64); err = compare_stack_ips(stackmap_fd, stack_amap_fd, stack_trace_len); if (CHECK(err, "compare_stack_ips stackmap vs. stack_amap", "err %d errno %d\n", err, errno)) - goto disable_pmu; - -disable_pmu: - bpf_link__destroy(link); -close_prog: - bpf_object__close(obj); + goto out; + + stack_id = skel->bss->stack_id; + err = bpf_map_lookup_and_delete_elem(stackmap_fd, &stack_id, stack); + if (!ASSERT_OK(err, "lookup and delete target stack_id")) + goto out; + + err = bpf_map_lookup_elem(stackmap_fd, &stack_id, stack); + if (!ASSERT_EQ(err, -ENOENT, "lookup deleted stack_id")) + goto out; +out: + stacktrace_map__destroy(skel); } diff --git a/tools/testing/selftests/bpf/prog_tests/stacktrace_map_raw_tp.c b/tools/testing/selftests/bpf/prog_tests/stacktrace_map_raw_tp.c index c6ef06f55cdb..e985d51d3d47 100644 --- a/tools/testing/selftests/bpf/prog_tests/stacktrace_map_raw_tp.c +++ b/tools/testing/selftests/bpf/prog_tests/stacktrace_map_raw_tp.c @@ -5,7 +5,7 @@ void test_stacktrace_map_raw_tp(void) { const char *prog_name = "oncpu"; int control_map_fd, stackid_hmap_fd, stackmap_fd; - const char *file = "./test_stacktrace_map.bpf.o"; + const char *file = "./stacktrace_map.bpf.o"; __u32 key, val, duration = 0; int err, prog_fd; struct bpf_program *prog; @@ -46,7 +46,7 @@ void test_stacktrace_map_raw_tp(void) bpf_map_update_elem(control_map_fd, &key, &val, 0); /* for every element in stackid_hmap, we can find a corresponding one - * in stackmap, and vise versa. + * in stackmap, and vice versa. */ err = compare_map_keys(stackid_hmap_fd, stackmap_fd); if (CHECK(err, "compare_map_keys stackid_hmap vs. stackmap", diff --git a/tools/testing/selftests/bpf/prog_tests/stacktrace_map_skip.c b/tools/testing/selftests/bpf/prog_tests/stacktrace_map_skip.c index 1932b1e0685c..dc2ccf6a14d1 100644 --- a/tools/testing/selftests/bpf/prog_tests/stacktrace_map_skip.c +++ b/tools/testing/selftests/bpf/prog_tests/stacktrace_map_skip.c @@ -40,7 +40,7 @@ void test_stacktrace_map_skip(void) skel->bss->control = 1; /* for every element in stackid_hmap, we can find a corresponding one - * in stackmap, and vise versa. + * in stackmap, and vice versa. */ err = compare_map_keys(stackid_hmap_fd, stackmap_fd); if (!ASSERT_OK(err, "compare_map_keys stackid_hmap vs. stackmap")) diff --git a/tools/testing/selftests/bpf/prog_tests/stream.c b/tools/testing/selftests/bpf/prog_tests/stream.c index d9f0185dca61..c3cce5c292bd 100644 --- a/tools/testing/selftests/bpf/prog_tests/stream.c +++ b/tools/testing/selftests/bpf/prog_tests/stream.c @@ -2,7 +2,6 @@ /* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */ #include <test_progs.h> #include <sys/mman.h> -#include <regex.h> #include "stream.skel.h" #include "stream_fail.skel.h" @@ -18,87 +17,6 @@ void test_stream_success(void) return; } -struct { - int prog_off; - const char *errstr; -} stream_error_arr[] = { - { - offsetof(struct stream, progs.stream_cond_break), - "ERROR: Timeout detected for may_goto instruction\n" - "CPU: [0-9]+ UID: 0 PID: [0-9]+ Comm: .*\n" - "Call trace:\n" - "([a-zA-Z_][a-zA-Z0-9_]*\\+0x[0-9a-fA-F]+/0x[0-9a-fA-F]+\n" - "|[ \t]+[^\n]+\n)*", - }, - { - offsetof(struct stream, progs.stream_deadlock), - "ERROR: AA or ABBA deadlock detected for bpf_res_spin_lock\n" - "Attempted lock = (0x[0-9a-fA-F]+)\n" - "Total held locks = 1\n" - "Held lock\\[ 0\\] = \\1\n" // Lock address must match - "CPU: [0-9]+ UID: 0 PID: [0-9]+ Comm: .*\n" - "Call trace:\n" - "([a-zA-Z_][a-zA-Z0-9_]*\\+0x[0-9a-fA-F]+/0x[0-9a-fA-F]+\n" - "|[ \t]+[^\n]+\n)*", - }, -}; - -static int match_regex(const char *pattern, const char *string) -{ - int err, rc; - regex_t re; - - err = regcomp(&re, pattern, REG_EXTENDED | REG_NEWLINE); - if (err) - return -1; - rc = regexec(&re, string, 0, NULL, 0); - regfree(&re); - return rc == 0 ? 1 : 0; -} - -void test_stream_errors(void) -{ - LIBBPF_OPTS(bpf_test_run_opts, opts); - LIBBPF_OPTS(bpf_prog_stream_read_opts, ropts); - struct stream *skel; - int ret, prog_fd; - char buf[1024]; - - skel = stream__open_and_load(); - if (!ASSERT_OK_PTR(skel, "stream__open_and_load")) - return; - - for (int i = 0; i < ARRAY_SIZE(stream_error_arr); i++) { - struct bpf_program **prog; - - prog = (struct bpf_program **)(((char *)skel) + stream_error_arr[i].prog_off); - prog_fd = bpf_program__fd(*prog); - ret = bpf_prog_test_run_opts(prog_fd, &opts); - ASSERT_OK(ret, "ret"); - ASSERT_OK(opts.retval, "retval"); - -#if !defined(__x86_64__) - ASSERT_TRUE(1, "Timed may_goto unsupported, skip."); - if (i == 0) { - ret = bpf_prog_stream_read(prog_fd, 2, buf, sizeof(buf), &ropts); - ASSERT_EQ(ret, 0, "stream read"); - continue; - } -#endif - - ret = bpf_prog_stream_read(prog_fd, BPF_STREAM_STDERR, buf, sizeof(buf), &ropts); - ASSERT_GT(ret, 0, "stream read"); - ASSERT_LE(ret, 1023, "len for buf"); - buf[ret] = '\0'; - - ret = match_regex(stream_error_arr[i].errstr, buf); - if (!ASSERT_TRUE(ret == 1, "regex match")) - fprintf(stderr, "Output from stream:\n%s\n", buf); - } - - stream__destroy(skel); -} - void test_stream_syscall(void) { LIBBPF_OPTS(bpf_test_run_opts, opts); @@ -139,3 +57,52 @@ void test_stream_syscall(void) stream__destroy(skel); } + +static void test_address(struct bpf_program *prog, unsigned long *fault_addr_p) +{ + LIBBPF_OPTS(bpf_test_run_opts, opts); + LIBBPF_OPTS(bpf_prog_stream_read_opts, ropts); + int ret, prog_fd; + char fault_addr[64]; + char buf[1024]; + + prog_fd = bpf_program__fd(prog); + + ret = bpf_prog_test_run_opts(prog_fd, &opts); + ASSERT_OK(ret, "ret"); + ASSERT_OK(opts.retval, "retval"); + + sprintf(fault_addr, "0x%lx", *fault_addr_p); + + ret = bpf_prog_stream_read(prog_fd, BPF_STREAM_STDERR, buf, sizeof(buf), &ropts); + ASSERT_GT(ret, 0, "stream read"); + ASSERT_LE(ret, 1023, "len for buf"); + buf[ret] = '\0'; + + if (!ASSERT_HAS_SUBSTR(buf, fault_addr, "fault_addr")) { + fprintf(stderr, "Output from stream:\n%s\n", buf); + fprintf(stderr, "Fault Addr: %s\n", fault_addr); + } +} + +void test_stream_arena_fault_address(void) +{ + struct stream *skel; + +#if !defined(__x86_64__) && !defined(__aarch64__) + printf("%s:SKIP: arena fault reporting not supported\n", __func__); + test__skip(); + return; +#endif + + skel = stream__open_and_load(); + if (!ASSERT_OK_PTR(skel, "stream__open_and_load")) + return; + + if (test__start_subtest("read_fault")) + test_address(skel->progs.stream_arena_read_fault, &skel->bss->fault_addr); + if (test__start_subtest("write_fault")) + test_address(skel->progs.stream_arena_write_fault, &skel->bss->fault_addr); + + stream__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/prog_tests/string_kfuncs.c b/tools/testing/selftests/bpf/prog_tests/string_kfuncs.c index 35af8044d059..4d66fad3c8bd 100644 --- a/tools/testing/selftests/bpf/prog_tests/string_kfuncs.c +++ b/tools/testing/selftests/bpf/prog_tests/string_kfuncs.c @@ -8,6 +8,7 @@ static const char * const test_cases[] = { "strcmp", + "strcasecmp", "strchr", "strchrnul", "strnchr", diff --git a/tools/testing/selftests/bpf/prog_tests/task_local_data.h b/tools/testing/selftests/bpf/prog_tests/task_local_data.h new file mode 100644 index 000000000000..2de38776a2d4 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/task_local_data.h @@ -0,0 +1,386 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __TASK_LOCAL_DATA_H +#define __TASK_LOCAL_DATA_H + +#include <errno.h> +#include <fcntl.h> +#include <sched.h> +#include <stdatomic.h> +#include <stddef.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <sys/syscall.h> +#include <sys/types.h> + +#ifdef TLD_FREE_DATA_ON_THREAD_EXIT +#include <pthread.h> +#endif + +#include <bpf/bpf.h> + +/* + * OPTIONS + * + * Define the option before including the header + * + * TLD_FREE_DATA_ON_THREAD_EXIT - Frees memory on thread exit automatically + * + * Thread-specific memory for storing TLD is allocated lazily on the first call to + * tld_get_data(). The thread that calls it must also call tld_free() on thread exit + * to prevent memory leak. Pthread will be included if the option is defined. A pthread + * key will be registered with a destructor that calls tld_free(). + * + * + * TLD_DYN_DATA_SIZE - The maximum size of memory allocated for TLDs created dynamically + * (default: 64 bytes) + * + * A TLD can be defined statically using TLD_DEFINE_KEY() or created on the fly using + * tld_create_key(). As the total size of TLDs created with tld_create_key() cannot be + * possibly known statically, a memory area of size TLD_DYN_DATA_SIZE will be allocated + * for these TLDs. This additional memory is allocated for every thread that calls + * tld_get_data() even if no tld_create_key are actually called, so be mindful of + * potential memory wastage. Use TLD_DEFINE_KEY() whenever possible as just enough memory + * will be allocated for TLDs created with it. + * + * + * TLD_NAME_LEN - The maximum length of the name of a TLD (default: 62) + * + * Setting TLD_NAME_LEN will affect the maximum number of TLDs a process can store, + * TLD_MAX_DATA_CNT. + * + * + * TLD_DATA_USE_ALIGNED_ALLOC - Always use aligned_alloc() instead of malloc() + * + * When allocating the memory for storing TLDs, we need to make sure there is a memory + * region of the X bytes within a page. This is due to the limit posed by UPTR: memory + * pinned to the kernel cannot exceed a page nor can it cross the page boundary. The + * library normally calls malloc(2*X) given X bytes of total TLDs, and only uses + * aligned_alloc(PAGE_SIZE, X) when X >= PAGE_SIZE / 2. This is to reduce memory wastage + * as not all memory allocator can use the exact amount of memory requested to fulfill + * aligned_alloc(). For example, some may round the size up to the alignment. Enable the + * option to always use aligned_alloc() if the implementation has low memory overhead. + */ + +#define TLD_PAGE_SIZE getpagesize() +#define TLD_PAGE_MASK (~(TLD_PAGE_SIZE - 1)) + +#define TLD_ROUND_MASK(x, y) ((__typeof__(x))((y) - 1)) +#define TLD_ROUND_UP(x, y) ((((x) - 1) | TLD_ROUND_MASK(x, y)) + 1) + +#define TLD_READ_ONCE(x) (*(volatile typeof(x) *)&(x)) + +#ifndef TLD_DYN_DATA_SIZE +#define TLD_DYN_DATA_SIZE 64 +#endif + +#define TLD_MAX_DATA_CNT (TLD_PAGE_SIZE / sizeof(struct tld_metadata) - 1) + +#ifndef TLD_NAME_LEN +#define TLD_NAME_LEN 62 +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct { + __s16 off; +} tld_key_t; + +struct tld_metadata { + char name[TLD_NAME_LEN]; + _Atomic __u16 size; +}; + +struct tld_meta_u { + _Atomic __u8 cnt; + __u16 size; + struct tld_metadata metadata[]; +}; + +struct tld_data_u { + __u64 start; /* offset of tld_data_u->data in a page */ + char data[]; +}; + +struct tld_map_value { + void *data; + struct tld_meta_u *meta; +}; + +struct tld_meta_u * _Atomic tld_meta_p __attribute__((weak)); +__thread struct tld_data_u *tld_data_p __attribute__((weak)); +__thread void *tld_data_alloc_p __attribute__((weak)); + +#ifdef TLD_FREE_DATA_ON_THREAD_EXIT +pthread_key_t tld_pthread_key __attribute__((weak)); + +static void tld_free(void); + +static void __tld_thread_exit_handler(void *unused) +{ + tld_free(); +} +#endif + +static int __tld_init_meta_p(void) +{ + struct tld_meta_u *meta, *uninit = NULL; + int err = 0; + + meta = (struct tld_meta_u *)aligned_alloc(TLD_PAGE_SIZE, TLD_PAGE_SIZE); + if (!meta) { + err = -ENOMEM; + goto out; + } + + memset(meta, 0, TLD_PAGE_SIZE); + meta->size = TLD_DYN_DATA_SIZE; + + if (!atomic_compare_exchange_strong(&tld_meta_p, &uninit, meta)) { + free(meta); + goto out; + } + +#ifdef TLD_FREE_DATA_ON_THREAD_EXIT + pthread_key_create(&tld_pthread_key, __tld_thread_exit_handler); +#endif +out: + return err; +} + +static int __tld_init_data_p(int map_fd) +{ + bool use_aligned_alloc = false; + struct tld_map_value map_val; + struct tld_data_u *data; + void *data_alloc = NULL; + int err, tid_fd = -1; + + tid_fd = syscall(SYS_pidfd_open, sys_gettid(), O_EXCL); + if (tid_fd < 0) { + err = -errno; + goto out; + } + +#ifdef TLD_DATA_USE_ALIGNED_ALLOC + use_aligned_alloc = true; +#endif + + /* + * tld_meta_p->size = TLD_DYN_DATA_SIZE + + * total size of TLDs defined via TLD_DEFINE_KEY() + */ + data_alloc = (use_aligned_alloc || tld_meta_p->size * 2 >= TLD_PAGE_SIZE) ? + aligned_alloc(TLD_PAGE_SIZE, tld_meta_p->size) : + malloc(tld_meta_p->size * 2); + if (!data_alloc) { + err = -ENOMEM; + goto out; + } + + /* + * Always pass a page-aligned address to UPTR since the size of tld_map_value::data + * is a page in BTF. If data_alloc spans across two pages, use the page that contains large + * enough memory. + */ + if (TLD_PAGE_SIZE - (~TLD_PAGE_MASK & (intptr_t)data_alloc) >= tld_meta_p->size) { + map_val.data = (void *)(TLD_PAGE_MASK & (intptr_t)data_alloc); + data = data_alloc; + data->start = (~TLD_PAGE_MASK & (intptr_t)data_alloc) + + offsetof(struct tld_data_u, data); + } else { + map_val.data = (void *)(TLD_ROUND_UP((intptr_t)data_alloc, TLD_PAGE_SIZE)); + data = (void *)(TLD_ROUND_UP((intptr_t)data_alloc, TLD_PAGE_SIZE)); + data->start = offsetof(struct tld_data_u, data); + } + map_val.meta = TLD_READ_ONCE(tld_meta_p); + + err = bpf_map_update_elem(map_fd, &tid_fd, &map_val, 0); + if (err) { + free(data_alloc); + goto out; + } + + tld_data_p = data; + tld_data_alloc_p = data_alloc; +#ifdef TLD_FREE_DATA_ON_THREAD_EXIT + pthread_setspecific(tld_pthread_key, (void *)1); +#endif +out: + if (tid_fd >= 0) + close(tid_fd); + return err; +} + +static tld_key_t __tld_create_key(const char *name, size_t size, bool dyn_data) +{ + int err, i, sz, off = 0; + __u8 cnt; + + if (!TLD_READ_ONCE(tld_meta_p)) { + err = __tld_init_meta_p(); + if (err) + return (tld_key_t){err}; + } + + for (i = 0; i < TLD_MAX_DATA_CNT; i++) { +retry: + cnt = atomic_load(&tld_meta_p->cnt); + if (i < cnt) { + /* A metadata is not ready until size is updated with a non-zero value */ + while (!(sz = atomic_load(&tld_meta_p->metadata[i].size))) + sched_yield(); + + if (!strncmp(tld_meta_p->metadata[i].name, name, TLD_NAME_LEN)) + return (tld_key_t){-EEXIST}; + + off += TLD_ROUND_UP(sz, 8); + continue; + } + + /* + * TLD_DEFINE_KEY() is given memory upto a page while at most + * TLD_DYN_DATA_SIZE is allocated for tld_create_key() + */ + if (dyn_data) { + if (off + TLD_ROUND_UP(size, 8) > tld_meta_p->size) + return (tld_key_t){-E2BIG}; + } else { + if (off + TLD_ROUND_UP(size, 8) > TLD_PAGE_SIZE - sizeof(struct tld_data_u)) + return (tld_key_t){-E2BIG}; + tld_meta_p->size += TLD_ROUND_UP(size, 8); + } + + /* + * Only one tld_create_key() can increase the current cnt by one and + * takes the latest available slot. Other threads will check again if a new + * TLD can still be added, and then compete for the new slot after the + * succeeding thread update the size. + */ + if (!atomic_compare_exchange_strong(&tld_meta_p->cnt, &cnt, cnt + 1)) + goto retry; + + strncpy(tld_meta_p->metadata[i].name, name, TLD_NAME_LEN); + atomic_store(&tld_meta_p->metadata[i].size, size); + return (tld_key_t){(__s16)off}; + } + + return (tld_key_t){-ENOSPC}; +} + +/** + * TLD_DEFINE_KEY() - Define a TLD and a global variable key associated with the TLD. + * + * @name: The name of the TLD + * @size: The size of the TLD + * @key: The variable name of the key. Cannot exceed TLD_NAME_LEN + * + * The macro can only be used in file scope. + * + * A global variable key of opaque type, tld_key_t, will be declared and initialized before + * main() starts. Use tld_key_is_err() or tld_key_err_or_zero() later to check if the key + * creation succeeded. Pass the key to tld_get_data() to get a pointer to the TLD. + * bpf programs can also fetch the same key by name. + * + * The total size of TLDs created using TLD_DEFINE_KEY() cannot exceed a page. Just + * enough memory will be allocated for each thread on the first call to tld_get_data(). + */ +#define TLD_DEFINE_KEY(key, name, size) \ +tld_key_t key; \ + \ +__attribute__((constructor)) \ +void __tld_define_key_##key(void) \ +{ \ + key = __tld_create_key(name, size, false); \ +} + +/** + * tld_create_key() - Create a TLD and return a key associated with the TLD. + * + * @name: The name the TLD + * @size: The size of the TLD + * + * Return an opaque object key. Use tld_key_is_err() or tld_key_err_or_zero() to check + * if the key creation succeeded. Pass the key to tld_get_data() to get a pointer to + * locate the TLD. bpf programs can also fetch the same key by name. + * + * Use tld_create_key() only when a TLD needs to be created dynamically (e.g., @name is + * not known statically or a TLD needs to be created conditionally) + * + * An additional TLD_DYN_DATA_SIZE bytes are allocated per-thread to accommodate TLDs + * created dynamically with tld_create_key(). Since only a user page is pinned to the + * kernel, when TLDs created with TLD_DEFINE_KEY() uses more than TLD_PAGE_SIZE - + * TLD_DYN_DATA_SIZE, the buffer size will be limited to the rest of the page. + */ +__attribute__((unused)) +static tld_key_t tld_create_key(const char *name, size_t size) +{ + return __tld_create_key(name, size, true); +} + +__attribute__((unused)) +static inline bool tld_key_is_err(tld_key_t key) +{ + return key.off < 0; +} + +__attribute__((unused)) +static inline int tld_key_err_or_zero(tld_key_t key) +{ + return tld_key_is_err(key) ? key.off : 0; +} + +/** + * tld_get_data() - Get a pointer to the TLD associated with the given key of the + * calling thread. + * + * @map_fd: A file descriptor of tld_data_map, the underlying BPF task local storage map + * of task local data. + * @key: A key object created by TLD_DEFINE_KEY() or tld_create_key(). + * + * Return a pointer to the TLD if the key is valid; NULL if not enough memory for TLD + * for this thread, or the key is invalid. The returned pointer is guaranteed to be 8-byte + * aligned. + * + * Threads that call tld_get_data() must call tld_free() on exit to prevent + * memory leak if TLD_FREE_DATA_ON_THREAD_EXIT is not defined. + */ +__attribute__((unused)) +static void *tld_get_data(int map_fd, tld_key_t key) +{ + if (!TLD_READ_ONCE(tld_meta_p)) + return NULL; + + /* tld_data_p is allocated on the first invocation of tld_get_data() */ + if (!tld_data_p && __tld_init_data_p(map_fd)) + return NULL; + + return tld_data_p->data + key.off; +} + +/** + * tld_free() - Free task local data memory of the calling thread + * + * For the calling thread, all pointers to TLDs acquired before will become invalid. + * + * Users must call tld_free() on thread exit to prevent memory leak. Alternatively, + * define TLD_FREE_DATA_ON_THREAD_EXIT and a thread exit handler will be registered + * to free the memory automatically. + */ +__attribute__((unused)) +static void tld_free(void) +{ + if (tld_data_alloc_p) { + free(tld_data_alloc_p); + tld_data_alloc_p = NULL; + tld_data_p = NULL; + } +} + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif /* __TASK_LOCAL_DATA_H */ diff --git a/tools/testing/selftests/bpf/prog_tests/task_work_stress.c b/tools/testing/selftests/bpf/prog_tests/task_work_stress.c new file mode 100644 index 000000000000..450d17d91a56 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/task_work_stress.c @@ -0,0 +1,130 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */ +#include <test_progs.h> +#include <string.h> +#include <stdio.h> +#include "task_work_stress.skel.h" +#include <linux/bpf.h> +#include <linux/perf_event.h> +#include <sys/syscall.h> +#include <time.h> +#include <stdlib.h> +#include <stdatomic.h> + +struct test_data { + int prog_fd; + atomic_int exit; +}; + +void *runner(void *test_data) +{ + struct test_data *td = test_data; + int err = 0; + LIBBPF_OPTS(bpf_test_run_opts, opts); + + while (!err && !atomic_load(&td->exit)) + err = bpf_prog_test_run_opts(td->prog_fd, &opts); + + return NULL; +} + +static int get_env_int(const char *str, int def) +{ + const char *s = getenv(str); + char *end; + int retval; + + if (!s || !*s) + return def; + errno = 0; + retval = strtol(s, &end, 10); + if (errno || *end || retval < 0) + return def; + return retval; +} + +static void task_work_run(bool enable_delete) +{ + struct task_work_stress *skel; + struct bpf_program *scheduler, *deleter; + int nthreads = 16; + int test_time_s = get_env_int("BPF_TASK_WORK_TEST_TIME", 1); + pthread_t tid[nthreads], tid_del; + bool started[nthreads], started_del = false; + struct test_data td_sched = { .exit = 0 }, td_del = { .exit = 1 }; + int i, err; + + skel = task_work_stress__open(); + if (!ASSERT_OK_PTR(skel, "task_work__open")) + return; + + scheduler = bpf_object__find_program_by_name(skel->obj, "schedule_task_work"); + bpf_program__set_autoload(scheduler, true); + + deleter = bpf_object__find_program_by_name(skel->obj, "delete_task_work"); + bpf_program__set_autoload(deleter, true); + + err = task_work_stress__load(skel); + if (!ASSERT_OK(err, "skel_load")) + goto cleanup; + + for (i = 0; i < nthreads; ++i) + started[i] = false; + + td_sched.prog_fd = bpf_program__fd(scheduler); + for (i = 0; i < nthreads; ++i) { + if (pthread_create(&tid[i], NULL, runner, &td_sched) != 0) { + fprintf(stderr, "could not start thread"); + goto cancel; + } + started[i] = true; + } + + if (enable_delete) + atomic_store(&td_del.exit, 0); + + td_del.prog_fd = bpf_program__fd(deleter); + if (pthread_create(&tid_del, NULL, runner, &td_del) != 0) { + fprintf(stderr, "could not start thread"); + goto cancel; + } + started_del = true; + + /* Run stress test for some time */ + sleep(test_time_s); + +cancel: + atomic_store(&td_sched.exit, 1); + atomic_store(&td_del.exit, 1); + for (i = 0; i < nthreads; ++i) { + if (started[i]) + pthread_join(tid[i], NULL); + } + + if (started_del) + pthread_join(tid_del, NULL); + + ASSERT_GT(skel->bss->callback_scheduled, 0, "work scheduled"); + /* Some scheduling attempts should have failed due to contention */ + ASSERT_GT(skel->bss->schedule_error, 0, "schedule error"); + + if (enable_delete) { + /* If delete thread is enabled, it has cancelled some callbacks */ + ASSERT_GT(skel->bss->delete_success, 0, "delete success"); + ASSERT_LT(skel->bss->callback_success, skel->bss->callback_scheduled, "callbacks"); + } else { + /* Without delete thread number of scheduled callbacks is the same as fired */ + ASSERT_EQ(skel->bss->callback_success, skel->bss->callback_scheduled, "callbacks"); + } + +cleanup: + task_work_stress__destroy(skel); +} + +void test_task_work_stress(void) +{ + if (test__start_subtest("no_delete")) + task_work_run(false); + if (test__start_subtest("with_delete")) + task_work_run(true); +} diff --git a/tools/testing/selftests/bpf/prog_tests/test_struct_ops_id_ops_mapping.c b/tools/testing/selftests/bpf/prog_tests/test_struct_ops_id_ops_mapping.c new file mode 100644 index 000000000000..fd8762ba4b67 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/test_struct_ops_id_ops_mapping.c @@ -0,0 +1,74 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <test_progs.h> +#include "struct_ops_id_ops_mapping1.skel.h" +#include "struct_ops_id_ops_mapping2.skel.h" + +static void test_st_ops_id_ops_mapping(void) +{ + struct struct_ops_id_ops_mapping1 *skel1 = NULL; + struct struct_ops_id_ops_mapping2 *skel2 = NULL; + struct bpf_map_info info = {}; + __u32 len = sizeof(info); + int err, pid, prog1_fd, prog2_fd; + + skel1 = struct_ops_id_ops_mapping1__open_and_load(); + if (!ASSERT_OK_PTR(skel1, "struct_ops_id_ops_mapping1__open")) + goto out; + + skel2 = struct_ops_id_ops_mapping2__open_and_load(); + if (!ASSERT_OK_PTR(skel2, "struct_ops_id_ops_mapping2__open")) + goto out; + + err = bpf_map_get_info_by_fd(bpf_map__fd(skel1->maps.st_ops_map), + &info, &len); + if (!ASSERT_OK(err, "bpf_map_get_info_by_fd")) + goto out; + + skel1->bss->st_ops_id = info.id; + + err = bpf_map_get_info_by_fd(bpf_map__fd(skel2->maps.st_ops_map), + &info, &len); + if (!ASSERT_OK(err, "bpf_map_get_info_by_fd")) + goto out; + + skel2->bss->st_ops_id = info.id; + + err = struct_ops_id_ops_mapping1__attach(skel1); + if (!ASSERT_OK(err, "struct_ops_id_ops_mapping1__attach")) + goto out; + + err = struct_ops_id_ops_mapping2__attach(skel2); + if (!ASSERT_OK(err, "struct_ops_id_ops_mapping2__attach")) + goto out; + + /* run tracing prog that calls .test_1 and checks return */ + pid = getpid(); + skel1->bss->test_pid = pid; + skel2->bss->test_pid = pid; + sys_gettid(); + skel1->bss->test_pid = 0; + skel2->bss->test_pid = 0; + + /* run syscall_prog that calls .test_1 and checks return */ + prog1_fd = bpf_program__fd(skel1->progs.syscall_prog); + err = bpf_prog_test_run_opts(prog1_fd, NULL); + ASSERT_OK(err, "bpf_prog_test_run_opts"); + + prog2_fd = bpf_program__fd(skel2->progs.syscall_prog); + err = bpf_prog_test_run_opts(prog2_fd, NULL); + ASSERT_OK(err, "bpf_prog_test_run_opts"); + + ASSERT_EQ(skel1->bss->test_err, 0, "skel1->bss->test_err"); + ASSERT_EQ(skel2->bss->test_err, 0, "skel2->bss->test_err"); + +out: + struct_ops_id_ops_mapping1__destroy(skel1); + struct_ops_id_ops_mapping2__destroy(skel2); +} + +void test_struct_ops_id_ops_mapping(void) +{ + if (test__start_subtest("st_ops_id_ops_mapping")) + test_st_ops_id_ops_mapping(); +} diff --git a/tools/testing/selftests/bpf/prog_tests/test_task_local_data.c b/tools/testing/selftests/bpf/prog_tests/test_task_local_data.c new file mode 100644 index 000000000000..9fd6306b455c --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/test_task_local_data.c @@ -0,0 +1,297 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <pthread.h> +#include <bpf/btf.h> +#include <test_progs.h> + +#define TLD_FREE_DATA_ON_THREAD_EXIT +#define TLD_DYN_DATA_SIZE 4096 +#include "task_local_data.h" + +struct test_tld_struct { + __u64 a; + __u64 b; + __u64 c; + __u64 d; +}; + +#include "test_task_local_data.skel.h" + +TLD_DEFINE_KEY(value0_key, "value0", sizeof(int)); + +/* + * Reset task local data between subtests by clearing metadata other + * than the statically defined value0. This is safe as subtests run + * sequentially. Users of task local data library should not touch + * library internal. + */ +static void reset_tld(void) +{ + if (TLD_READ_ONCE(tld_meta_p)) { + /* Remove TLDs created by tld_create_key() */ + tld_meta_p->cnt = 1; + tld_meta_p->size = TLD_DYN_DATA_SIZE; + memset(&tld_meta_p->metadata[1], 0, + (TLD_MAX_DATA_CNT - 1) * sizeof(struct tld_metadata)); + } +} + +/* Serialize access to bpf program's global variables */ +static pthread_mutex_t global_mutex; + +static tld_key_t *tld_keys; + +#define TEST_BASIC_THREAD_NUM 32 + +void *test_task_local_data_basic_thread(void *arg) +{ + LIBBPF_OPTS(bpf_test_run_opts, opts); + struct test_task_local_data *skel = (struct test_task_local_data *)arg; + int fd, err, tid, *value0, *value1; + struct test_tld_struct *value2; + + fd = bpf_map__fd(skel->maps.tld_data_map); + + value0 = tld_get_data(fd, value0_key); + if (!ASSERT_OK_PTR(value0, "tld_get_data")) + goto out; + + value1 = tld_get_data(fd, tld_keys[1]); + if (!ASSERT_OK_PTR(value1, "tld_get_data")) + goto out; + + value2 = tld_get_data(fd, tld_keys[2]); + if (!ASSERT_OK_PTR(value2, "tld_get_data")) + goto out; + + tid = sys_gettid(); + + *value0 = tid + 0; + *value1 = tid + 1; + value2->a = tid + 2; + value2->b = tid + 3; + value2->c = tid + 4; + value2->d = tid + 5; + + pthread_mutex_lock(&global_mutex); + /* Run task_main that read task local data and save to global variables */ + err = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.task_main), &opts); + ASSERT_OK(err, "run task_main"); + ASSERT_OK(opts.retval, "task_main retval"); + + ASSERT_EQ(skel->bss->test_value0, tid + 0, "tld_get_data value0"); + ASSERT_EQ(skel->bss->test_value1, tid + 1, "tld_get_data value1"); + ASSERT_EQ(skel->bss->test_value2.a, tid + 2, "tld_get_data value2.a"); + ASSERT_EQ(skel->bss->test_value2.b, tid + 3, "tld_get_data value2.b"); + ASSERT_EQ(skel->bss->test_value2.c, tid + 4, "tld_get_data value2.c"); + ASSERT_EQ(skel->bss->test_value2.d, tid + 5, "tld_get_data value2.d"); + pthread_mutex_unlock(&global_mutex); + + /* Make sure valueX are indeed local to threads */ + ASSERT_EQ(*value0, tid + 0, "value0"); + ASSERT_EQ(*value1, tid + 1, "value1"); + ASSERT_EQ(value2->a, tid + 2, "value2.a"); + ASSERT_EQ(value2->b, tid + 3, "value2.b"); + ASSERT_EQ(value2->c, tid + 4, "value2.c"); + ASSERT_EQ(value2->d, tid + 5, "value2.d"); + + *value0 = tid + 5; + *value1 = tid + 4; + value2->a = tid + 3; + value2->b = tid + 2; + value2->c = tid + 1; + value2->d = tid + 0; + + /* Run task_main again */ + pthread_mutex_lock(&global_mutex); + err = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.task_main), &opts); + ASSERT_OK(err, "run task_main"); + ASSERT_OK(opts.retval, "task_main retval"); + + ASSERT_EQ(skel->bss->test_value0, tid + 5, "tld_get_data value0"); + ASSERT_EQ(skel->bss->test_value1, tid + 4, "tld_get_data value1"); + ASSERT_EQ(skel->bss->test_value2.a, tid + 3, "tld_get_data value2.a"); + ASSERT_EQ(skel->bss->test_value2.b, tid + 2, "tld_get_data value2.b"); + ASSERT_EQ(skel->bss->test_value2.c, tid + 1, "tld_get_data value2.c"); + ASSERT_EQ(skel->bss->test_value2.d, tid + 0, "tld_get_data value2.d"); + pthread_mutex_unlock(&global_mutex); + +out: + pthread_exit(NULL); +} + +static void test_task_local_data_basic(void) +{ + struct test_task_local_data *skel; + pthread_t thread[TEST_BASIC_THREAD_NUM]; + char dummy_key_name[TLD_NAME_LEN]; + tld_key_t key; + int i, err; + + reset_tld(); + + ASSERT_OK(pthread_mutex_init(&global_mutex, NULL), "pthread_mutex_init"); + + skel = test_task_local_data__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_open_and_load")) + return; + + tld_keys = calloc(TLD_MAX_DATA_CNT, sizeof(tld_key_t)); + if (!ASSERT_OK_PTR(tld_keys, "calloc tld_keys")) + goto out; + + ASSERT_FALSE(tld_key_is_err(value0_key), "TLD_DEFINE_KEY"); + tld_keys[1] = tld_create_key("value1", sizeof(int)); + ASSERT_FALSE(tld_key_is_err(tld_keys[1]), "tld_create_key"); + tld_keys[2] = tld_create_key("value2", sizeof(struct test_tld_struct)); + ASSERT_FALSE(tld_key_is_err(tld_keys[2]), "tld_create_key"); + + /* + * Shouldn't be able to store data exceed a page. Create a TLD just big + * enough to exceed a page. TLDs already created are int value0, int + * value1, and struct test_tld_struct value2. + */ + key = tld_create_key("value_not_exist", + TLD_PAGE_SIZE - 2 * sizeof(int) - sizeof(struct test_tld_struct) + 1); + ASSERT_EQ(tld_key_err_or_zero(key), -E2BIG, "tld_create_key"); + + key = tld_create_key("value2", sizeof(struct test_tld_struct)); + ASSERT_EQ(tld_key_err_or_zero(key), -EEXIST, "tld_create_key"); + + /* Shouldn't be able to create the (TLD_MAX_DATA_CNT+1)-th TLD */ + for (i = 3; i < TLD_MAX_DATA_CNT; i++) { + snprintf(dummy_key_name, TLD_NAME_LEN, "dummy_value%d", i); + tld_keys[i] = tld_create_key(dummy_key_name, sizeof(int)); + ASSERT_FALSE(tld_key_is_err(tld_keys[i]), "tld_create_key"); + } + key = tld_create_key("value_not_exist", sizeof(struct test_tld_struct)); + ASSERT_EQ(tld_key_err_or_zero(key), -ENOSPC, "tld_create_key"); + + /* Access TLDs from multiple threads and check if they are thread-specific */ + for (i = 0; i < TEST_BASIC_THREAD_NUM; i++) { + err = pthread_create(&thread[i], NULL, test_task_local_data_basic_thread, skel); + if (!ASSERT_OK(err, "pthread_create")) + goto out; + } + +out: + for (i = 0; i < TEST_BASIC_THREAD_NUM; i++) + pthread_join(thread[i], NULL); + + if (tld_keys) { + free(tld_keys); + tld_keys = NULL; + } + tld_free(); + test_task_local_data__destroy(skel); +} + +#define TEST_RACE_THREAD_NUM (TLD_MAX_DATA_CNT - 3) + +void *test_task_local_data_race_thread(void *arg) +{ + int err = 0, id = (intptr_t)arg; + char key_name[32]; + tld_key_t key; + + key = tld_create_key("value_not_exist", TLD_PAGE_SIZE + 1); + if (tld_key_err_or_zero(key) != -E2BIG) { + err = 1; + goto out; + } + + /* Only one thread will succeed in creating value1 */ + key = tld_create_key("value1", sizeof(int)); + if (!tld_key_is_err(key)) + tld_keys[1] = key; + + /* Only one thread will succeed in creating value2 */ + key = tld_create_key("value2", sizeof(struct test_tld_struct)); + if (!tld_key_is_err(key)) + tld_keys[2] = key; + + snprintf(key_name, 32, "thread_%d", id); + tld_keys[id] = tld_create_key(key_name, sizeof(int)); + if (tld_key_is_err(tld_keys[id])) + err = 2; +out: + return (void *)(intptr_t)err; +} + +static void test_task_local_data_race(void) +{ + LIBBPF_OPTS(bpf_test_run_opts, opts); + pthread_t thread[TEST_RACE_THREAD_NUM]; + struct test_task_local_data *skel; + int fd, i, j, err, *data; + void *ret = NULL; + + skel = test_task_local_data__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_open_and_load")) + return; + + tld_keys = calloc(TLD_MAX_DATA_CNT, sizeof(tld_key_t)); + if (!ASSERT_OK_PTR(tld_keys, "calloc tld_keys")) + goto out; + + fd = bpf_map__fd(skel->maps.tld_data_map); + + ASSERT_FALSE(tld_key_is_err(value0_key), "TLD_DEFINE_KEY"); + tld_keys[0] = value0_key; + + for (j = 0; j < 100; j++) { + reset_tld(); + + for (i = 0; i < TEST_RACE_THREAD_NUM; i++) { + /* + * Try to make tld_create_key() race with each other. Call + * tld_create_key(), both valid and invalid, from different threads. + */ + err = pthread_create(&thread[i], NULL, test_task_local_data_race_thread, + (void *)(intptr_t)(i + 3)); + if (CHECK_FAIL(err)) + break; + } + + /* Wait for all tld_create_key() to return */ + for (i = 0; i < TEST_RACE_THREAD_NUM; i++) { + pthread_join(thread[i], &ret); + if (CHECK_FAIL(ret)) + break; + } + + /* Write a unique number to each TLD */ + for (i = 0; i < TLD_MAX_DATA_CNT; i++) { + data = tld_get_data(fd, tld_keys[i]); + if (CHECK_FAIL(!data)) + break; + *data = i; + } + + /* Read TLDs and check the value to see if any address collides with another */ + for (i = 0; i < TLD_MAX_DATA_CNT; i++) { + data = tld_get_data(fd, tld_keys[i]); + if (CHECK_FAIL(*data != i)) + break; + } + + /* Run task_main to make sure no invalid TLDs are added */ + err = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.task_main), &opts); + ASSERT_OK(err, "run task_main"); + ASSERT_OK(opts.retval, "task_main retval"); + } +out: + if (tld_keys) { + free(tld_keys); + tld_keys = NULL; + } + tld_free(); + test_task_local_data__destroy(skel); +} + +void test_task_local_data(void) +{ + if (test__start_subtest("task_local_data_basic")) + test_task_local_data_basic(); + if (test__start_subtest("task_local_data_race")) + test_task_local_data_race(); +} diff --git a/tools/testing/selftests/bpf/prog_tests/test_task_work.c b/tools/testing/selftests/bpf/prog_tests/test_task_work.c new file mode 100644 index 000000000000..774b31a5f6ca --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/test_task_work.c @@ -0,0 +1,157 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */ +#include <test_progs.h> +#include <string.h> +#include <stdio.h> +#include "task_work.skel.h" +#include "task_work_fail.skel.h" +#include <linux/bpf.h> +#include <linux/perf_event.h> +#include <sys/syscall.h> +#include <time.h> + +static int perf_event_open(__u32 type, __u64 config, int pid) +{ + struct perf_event_attr attr = { + .type = type, + .config = config, + .size = sizeof(struct perf_event_attr), + .sample_period = 100000, + }; + + return syscall(__NR_perf_event_open, &attr, pid, -1, -1, 0); +} + +struct elem { + char data[128]; + struct bpf_task_work tw; +}; + +static int verify_map(struct bpf_map *map, const char *expected_data) +{ + int err; + struct elem value; + int processed_values = 0; + int k, sz; + + sz = bpf_map__max_entries(map); + for (k = 0; k < sz; ++k) { + err = bpf_map__lookup_elem(map, &k, sizeof(int), &value, sizeof(struct elem), 0); + if (err) + continue; + if (!ASSERT_EQ(strcmp(expected_data, value.data), 0, "map data")) { + fprintf(stderr, "expected '%s', found '%s' in %s map", expected_data, + value.data, bpf_map__name(map)); + return 2; + } + processed_values++; + } + + return processed_values == 0; +} + +static void task_work_run(const char *prog_name, const char *map_name) +{ + struct task_work *skel; + struct bpf_program *prog; + struct bpf_map *map; + struct bpf_link *link = NULL; + int err, pe_fd = -1, pid, status, pipefd[2]; + char user_string[] = "hello world"; + + if (!ASSERT_NEQ(pipe(pipefd), -1, "pipe")) + return; + + pid = fork(); + if (pid == 0) { + __u64 num = 1; + int i; + char buf; + + close(pipefd[1]); + read(pipefd[0], &buf, sizeof(buf)); + close(pipefd[0]); + + for (i = 0; i < 10000; ++i) + num *= time(0) % 7; + (void)num; + exit(0); + } + if (!ASSERT_GT(pid, 0, "fork() failed")) { + close(pipefd[0]); + close(pipefd[1]); + return; + } + + skel = task_work__open(); + if (!ASSERT_OK_PTR(skel, "task_work__open")) + return; + + bpf_object__for_each_program(prog, skel->obj) { + bpf_program__set_autoload(prog, false); + } + + prog = bpf_object__find_program_by_name(skel->obj, prog_name); + if (!ASSERT_OK_PTR(prog, "prog_name")) + goto cleanup; + bpf_program__set_autoload(prog, true); + skel->bss->user_ptr = (char *)user_string; + + err = task_work__load(skel); + if (!ASSERT_OK(err, "skel_load")) + goto cleanup; + + pe_fd = perf_event_open(PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES, pid); + if (pe_fd == -1 && (errno == ENOENT || errno == EOPNOTSUPP)) { + printf("%s:SKIP:no PERF_COUNT_HW_CPU_CYCLES\n", __func__); + test__skip(); + goto cleanup; + } + if (!ASSERT_NEQ(pe_fd, -1, "pe_fd")) { + fprintf(stderr, "perf_event_open errno: %d, pid: %d\n", errno, pid); + goto cleanup; + } + + link = bpf_program__attach_perf_event(prog, pe_fd); + if (!ASSERT_OK_PTR(link, "attach_perf_event")) + goto cleanup; + + /* perf event fd ownership is passed to bpf_link */ + pe_fd = -1; + close(pipefd[0]); + write(pipefd[1], user_string, 1); + close(pipefd[1]); + /* Wait to collect some samples */ + waitpid(pid, &status, 0); + pid = 0; + map = bpf_object__find_map_by_name(skel->obj, map_name); + if (!ASSERT_OK_PTR(map, "find map_name")) + goto cleanup; + if (!ASSERT_OK(verify_map(map, user_string), "verify map")) + goto cleanup; +cleanup: + if (pe_fd >= 0) + close(pe_fd); + bpf_link__destroy(link); + task_work__destroy(skel); + if (pid > 0) { + close(pipefd[0]); + write(pipefd[1], user_string, 1); + close(pipefd[1]); + waitpid(pid, &status, 0); + } +} + +void test_task_work(void) +{ + if (test__start_subtest("test_task_work_hash_map")) + task_work_run("oncpu_hash_map", "hmap"); + + if (test__start_subtest("test_task_work_array_map")) + task_work_run("oncpu_array_map", "arrmap"); + + if (test__start_subtest("test_task_work_lru_map")) + task_work_run("oncpu_lru_map", "lrumap"); + + RUN_TESTS(task_work_fail); +} diff --git a/tools/testing/selftests/bpf/prog_tests/test_veristat.c b/tools/testing/selftests/bpf/prog_tests/test_veristat.c index 367f47e4a936..b38c16b4247f 100644 --- a/tools/testing/selftests/bpf/prog_tests/test_veristat.c +++ b/tools/testing/selftests/bpf/prog_tests/test_veristat.c @@ -75,26 +75,26 @@ static void test_set_global_vars_succeeds(void) " -vl2 > %s", fix->veristat, fix->tmpfile); read(fix->fd, fix->output, fix->sz); - __CHECK_STR("_w=0xf000000000000001 ", "var_s64 = 0xf000000000000001"); - __CHECK_STR("_w=0xfedcba9876543210 ", "var_u64 = 0xfedcba9876543210"); - __CHECK_STR("_w=0x80000000 ", "var_s32 = -0x80000000"); - __CHECK_STR("_w=0x76543210 ", "var_u32 = 0x76543210"); - __CHECK_STR("_w=0x8000 ", "var_s16 = -32768"); - __CHECK_STR("_w=0xecec ", "var_u16 = 60652"); - __CHECK_STR("_w=128 ", "var_s8 = -128"); - __CHECK_STR("_w=255 ", "var_u8 = 255"); - __CHECK_STR("_w=11 ", "var_ea = EA2"); - __CHECK_STR("_w=12 ", "var_eb = EB2"); - __CHECK_STR("_w=13 ", "var_ec = EC2"); - __CHECK_STR("_w=1 ", "var_b = 1"); - __CHECK_STR("_w=170 ", "struct1[2].struct2[1][2].u.var_u8[2]=170"); - __CHECK_STR("_w=0xaaaa ", "union1.var_u16 = 0xaaaa"); - __CHECK_STR("_w=171 ", "arr[3]= 171"); - __CHECK_STR("_w=172 ", "arr[EA2] =172"); - __CHECK_STR("_w=10 ", "enum_arr[EC2]=EA3"); - __CHECK_STR("_w=173 ", "matrix[31][7][11]=173"); - __CHECK_STR("_w=174 ", "struct1[2].struct2[1][2].u.mat[5][3]=174"); - __CHECK_STR("_w=175 ", "struct11[7][5].struct2[0][1].u.mat[3][0]=175"); + __CHECK_STR("=0xf000000000000001 ", "var_s64 = 0xf000000000000001"); + __CHECK_STR("=0xfedcba9876543210 ", "var_u64 = 0xfedcba9876543210"); + __CHECK_STR("=0x80000000 ", "var_s32 = -0x80000000"); + __CHECK_STR("=0x76543210 ", "var_u32 = 0x76543210"); + __CHECK_STR("=0x8000 ", "var_s16 = -32768"); + __CHECK_STR("=0xecec ", "var_u16 = 60652"); + __CHECK_STR("=128 ", "var_s8 = -128"); + __CHECK_STR("=255 ", "var_u8 = 255"); + __CHECK_STR("=11 ", "var_ea = EA2"); + __CHECK_STR("=12 ", "var_eb = EB2"); + __CHECK_STR("=13 ", "var_ec = EC2"); + __CHECK_STR("=1 ", "var_b = 1"); + __CHECK_STR("=170 ", "struct1[2].struct2[1][2].u.var_u8[2]=170"); + __CHECK_STR("=0xaaaa ", "union1.var_u16 = 0xaaaa"); + __CHECK_STR("=171 ", "arr[3]= 171"); + __CHECK_STR("=172 ", "arr[EA2] =172"); + __CHECK_STR("=10 ", "enum_arr[EC2]=EA3"); + __CHECK_STR("=173 ", "matrix[31][7][11]=173"); + __CHECK_STR("=174 ", "struct1[2].struct2[1][2].u.mat[5][3]=174"); + __CHECK_STR("=175 ", "struct11[7][5].struct2[0][1].u.mat[3][0]=175"); out: teardown_fixture(fix); @@ -117,8 +117,8 @@ static void test_set_global_vars_from_file_succeeds(void) SYS(out, "%s set_global_vars.bpf.o -G \"@%s\" -vl2 > %s", fix->veristat, input_file, fix->tmpfile); read(fix->fd, fix->output, fix->sz); - __CHECK_STR("_w=0x8000 ", "var_s16 = -32768"); - __CHECK_STR("_w=0xecec ", "var_u16 = 60652"); + __CHECK_STR("=0x8000 ", "var_s16 = -32768"); + __CHECK_STR("=0xecec ", "var_u16 = 60652"); out: close(fd); diff --git a/tools/testing/selftests/bpf/prog_tests/timer.c b/tools/testing/selftests/bpf/prog_tests/timer.c index d66687f1ee6a..34f9ccce2602 100644 --- a/tools/testing/selftests/bpf/prog_tests/timer.c +++ b/tools/testing/selftests/bpf/prog_tests/timer.c @@ -3,6 +3,7 @@ #include <test_progs.h> #include "timer.skel.h" #include "timer_failure.skel.h" +#include "timer_interrupt.skel.h" #define NUM_THR 8 @@ -86,6 +87,10 @@ void serial_test_timer(void) int err; timer_skel = timer__open_and_load(); + if (!timer_skel && errno == EOPNOTSUPP) { + test__skip(); + return; + } if (!ASSERT_OK_PTR(timer_skel, "timer_skel_load")) return; @@ -95,3 +100,36 @@ void serial_test_timer(void) RUN_TESTS(timer_failure); } + +void test_timer_interrupt(void) +{ + struct timer_interrupt *skel = NULL; + int err, prog_fd; + LIBBPF_OPTS(bpf_test_run_opts, opts); + + skel = timer_interrupt__open_and_load(); + if (!skel && errno == EOPNOTSUPP) { + test__skip(); + return; + } + if (!ASSERT_OK_PTR(skel, "timer_interrupt__open_and_load")) + return; + + err = timer_interrupt__attach(skel); + if (!ASSERT_OK(err, "timer_interrupt__attach")) + goto out; + + prog_fd = bpf_program__fd(skel->progs.test_timer_interrupt); + err = bpf_prog_test_run_opts(prog_fd, &opts); + if (!ASSERT_OK(err, "bpf_prog_test_run_opts")) + goto out; + + usleep(50); + + ASSERT_EQ(skel->bss->in_interrupt, 0, "in_interrupt"); + if (skel->bss->preempt_count) + ASSERT_NEQ(skel->bss->in_interrupt_cb, 0, "in_interrupt_cb"); + +out: + timer_interrupt__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/prog_tests/timer_crash.c b/tools/testing/selftests/bpf/prog_tests/timer_crash.c index f74b82305da8..b841597c8a3a 100644 --- a/tools/testing/selftests/bpf/prog_tests/timer_crash.c +++ b/tools/testing/selftests/bpf/prog_tests/timer_crash.c @@ -12,6 +12,10 @@ static void test_timer_crash_mode(int mode) struct timer_crash *skel; skel = timer_crash__open_and_load(); + if (!skel && errno == EOPNOTSUPP) { + test__skip(); + return; + } if (!ASSERT_OK_PTR(skel, "timer_crash__open_and_load")) return; skel->bss->pid = getpid(); diff --git a/tools/testing/selftests/bpf/prog_tests/timer_lockup.c b/tools/testing/selftests/bpf/prog_tests/timer_lockup.c index 1a2f99596916..eb303fa1e09a 100644 --- a/tools/testing/selftests/bpf/prog_tests/timer_lockup.c +++ b/tools/testing/selftests/bpf/prog_tests/timer_lockup.c @@ -59,6 +59,10 @@ void test_timer_lockup(void) } skel = timer_lockup__open_and_load(); + if (!skel && errno == EOPNOTSUPP) { + test__skip(); + return; + } if (!ASSERT_OK_PTR(skel, "timer_lockup__open_and_load")) return; diff --git a/tools/testing/selftests/bpf/prog_tests/timer_mim.c b/tools/testing/selftests/bpf/prog_tests/timer_mim.c index 9ff7843909e7..c930c7d7105b 100644 --- a/tools/testing/selftests/bpf/prog_tests/timer_mim.c +++ b/tools/testing/selftests/bpf/prog_tests/timer_mim.c @@ -65,6 +65,10 @@ void serial_test_timer_mim(void) goto cleanup; timer_skel = timer_mim__open_and_load(); + if (!timer_skel && errno == EOPNOTSUPP) { + test__skip(); + return; + } if (!ASSERT_OK_PTR(timer_skel, "timer_skel_load")) goto cleanup; diff --git a/tools/testing/selftests/bpf/prog_tests/tracing_struct.c b/tools/testing/selftests/bpf/prog_tests/tracing_struct.c index 19e68d4b3532..6f8c0bfb0415 100644 --- a/tools/testing/selftests/bpf/prog_tests/tracing_struct.c +++ b/tools/testing/selftests/bpf/prog_tests/tracing_struct.c @@ -112,10 +112,39 @@ destroy_skel: tracing_struct_many_args__destroy(skel); } +static void test_union_args(void) +{ + struct tracing_struct *skel; + int err; + + skel = tracing_struct__open_and_load(); + if (!ASSERT_OK_PTR(skel, "tracing_struct__open_and_load")) + return; + + err = tracing_struct__attach(skel); + if (!ASSERT_OK(err, "tracing_struct__attach")) + goto out; + + ASSERT_OK(trigger_module_test_read(256), "trigger_read"); + + ASSERT_EQ(skel->bss->ut1_a_a, 1, "ut1:a.arg.a"); + ASSERT_EQ(skel->bss->ut1_b, 4, "ut1:b"); + ASSERT_EQ(skel->bss->ut1_c, 5, "ut1:c"); + + ASSERT_EQ(skel->bss->ut2_a, 6, "ut2:a"); + ASSERT_EQ(skel->bss->ut2_b_a, 2, "ut2:b.arg.a"); + ASSERT_EQ(skel->bss->ut2_b_b, 3, "ut2:b.arg.b"); + +out: + tracing_struct__destroy(skel); +} + void test_tracing_struct(void) { if (test__start_subtest("struct_args")) test_struct_args(); if (test__start_subtest("struct_many_args")) test_struct_many_args(); + if (test__start_subtest("union_args")) + test_union_args(); } diff --git a/tools/testing/selftests/bpf/prog_tests/uprobe.c b/tools/testing/selftests/bpf/prog_tests/uprobe.c index cf3e0e7a64fa..86404476c1da 100644 --- a/tools/testing/selftests/bpf/prog_tests/uprobe.c +++ b/tools/testing/selftests/bpf/prog_tests/uprobe.c @@ -2,6 +2,7 @@ /* Copyright (c) 2023 Hengqi Chen */ #include <test_progs.h> +#include <asm/ptrace.h> #include "test_uprobe.skel.h" static FILE *urand_spawn(int *pid) @@ -33,7 +34,7 @@ static int urand_trigger(FILE **urand_pipe) return exit_code; } -void test_uprobe(void) +static void test_uprobe_attach(void) { LIBBPF_OPTS(bpf_uprobe_opts, uprobe_opts); struct test_uprobe *skel; @@ -93,3 +94,156 @@ cleanup: pclose(urand_pipe); test_uprobe__destroy(skel); } + +#ifdef __x86_64__ +__naked __maybe_unused unsigned long uprobe_regs_change_trigger(void) +{ + asm volatile ( + "ret\n" + ); +} + +static __naked void uprobe_regs_change(struct pt_regs *before, struct pt_regs *after) +{ + asm volatile ( + "movq %r11, 48(%rdi)\n" + "movq %r10, 56(%rdi)\n" + "movq %r9, 64(%rdi)\n" + "movq %r8, 72(%rdi)\n" + "movq %rax, 80(%rdi)\n" + "movq %rcx, 88(%rdi)\n" + "movq %rdx, 96(%rdi)\n" + "movq %rsi, 104(%rdi)\n" + "movq %rdi, 112(%rdi)\n" + + /* save 2nd argument */ + "pushq %rsi\n" + "call uprobe_regs_change_trigger\n" + + /* save return value and load 2nd argument pointer to rax */ + "pushq %rax\n" + "movq 8(%rsp), %rax\n" + + "movq %r11, 48(%rax)\n" + "movq %r10, 56(%rax)\n" + "movq %r9, 64(%rax)\n" + "movq %r8, 72(%rax)\n" + "movq %rcx, 88(%rax)\n" + "movq %rdx, 96(%rax)\n" + "movq %rsi, 104(%rax)\n" + "movq %rdi, 112(%rax)\n" + + /* restore return value and 2nd argument */ + "pop %rax\n" + "pop %rsi\n" + + "movq %rax, 80(%rsi)\n" + "ret\n" + ); +} + +static void regs_common(void) +{ + struct pt_regs before = {}, after = {}, expected = { + .rax = 0xc0ffe, + .rcx = 0xbad, + .rdx = 0xdead, + .r8 = 0x8, + .r9 = 0x9, + .r10 = 0x10, + .r11 = 0x11, + .rdi = 0x12, + .rsi = 0x13, + }; + LIBBPF_OPTS(bpf_uprobe_opts, uprobe_opts); + struct test_uprobe *skel; + + skel = test_uprobe__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_open")) + return; + + skel->bss->my_pid = getpid(); + skel->bss->regs = expected; + + uprobe_opts.func_name = "uprobe_regs_change_trigger"; + skel->links.test_regs_change = bpf_program__attach_uprobe_opts(skel->progs.test_regs_change, + -1, + "/proc/self/exe", + 0 /* offset */, + &uprobe_opts); + if (!ASSERT_OK_PTR(skel->links.test_regs_change, "bpf_program__attach_uprobe_opts")) + goto cleanup; + + uprobe_regs_change(&before, &after); + + ASSERT_EQ(after.rax, expected.rax, "ax"); + ASSERT_EQ(after.rcx, expected.rcx, "cx"); + ASSERT_EQ(after.rdx, expected.rdx, "dx"); + ASSERT_EQ(after.r8, expected.r8, "r8"); + ASSERT_EQ(after.r9, expected.r9, "r9"); + ASSERT_EQ(after.r10, expected.r10, "r10"); + ASSERT_EQ(after.r11, expected.r11, "r11"); + ASSERT_EQ(after.rdi, expected.rdi, "rdi"); + ASSERT_EQ(after.rsi, expected.rsi, "rsi"); + +cleanup: + test_uprobe__destroy(skel); +} + +static noinline unsigned long uprobe_regs_change_ip_1(void) +{ + return 0xc0ffee; +} + +static noinline unsigned long uprobe_regs_change_ip_2(void) +{ + return 0xdeadbeef; +} + +static void regs_ip(void) +{ + LIBBPF_OPTS(bpf_uprobe_opts, uprobe_opts); + struct test_uprobe *skel; + unsigned long ret; + + skel = test_uprobe__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_open")) + return; + + skel->bss->my_pid = getpid(); + skel->bss->ip = (unsigned long) uprobe_regs_change_ip_2; + + uprobe_opts.func_name = "uprobe_regs_change_ip_1"; + skel->links.test_regs_change_ip = bpf_program__attach_uprobe_opts( + skel->progs.test_regs_change_ip, + -1, + "/proc/self/exe", + 0 /* offset */, + &uprobe_opts); + if (!ASSERT_OK_PTR(skel->links.test_regs_change_ip, "bpf_program__attach_uprobe_opts")) + goto cleanup; + + ret = uprobe_regs_change_ip_1(); + ASSERT_EQ(ret, 0xdeadbeef, "ret"); + +cleanup: + test_uprobe__destroy(skel); +} + +static void test_uprobe_regs_change(void) +{ + if (test__start_subtest("regs_change_common")) + regs_common(); + if (test__start_subtest("regs_change_ip")) + regs_ip(); +} +#else +static void test_uprobe_regs_change(void) { } +#endif + +void test_uprobe(void) +{ + if (test__start_subtest("attach")) + test_uprobe_attach(); + test_uprobe_regs_change(); +} diff --git a/tools/testing/selftests/bpf/prog_tests/uprobe_syscall.c b/tools/testing/selftests/bpf/prog_tests/uprobe_syscall.c index b17dc39a23db..6d75ede16e7c 100644 --- a/tools/testing/selftests/bpf/prog_tests/uprobe_syscall.c +++ b/tools/testing/selftests/bpf/prog_tests/uprobe_syscall.c @@ -8,22 +8,31 @@ #include <asm/ptrace.h> #include <linux/compiler.h> #include <linux/stringify.h> +#include <linux/kernel.h> #include <sys/wait.h> #include <sys/syscall.h> #include <sys/prctl.h> #include <asm/prctl.h> #include "uprobe_syscall.skel.h" #include "uprobe_syscall_executed.skel.h" +#include "bpf/libbpf_internal.h" -__naked unsigned long uretprobe_regs_trigger(void) +#define USDT_NOP .byte 0x0f, 0x1f, 0x44, 0x00, 0x00 +#include "usdt.h" + +#pragma GCC diagnostic ignored "-Wattributes" + +__attribute__((aligned(16))) +__nocf_check __weak __naked unsigned long uprobe_regs_trigger(void) { asm volatile ( + ".byte 0x0f, 0x1f, 0x44, 0x00, 0x00\n" /* nop5 */ "movq $0xdeadbeef, %rax\n" "ret\n" ); } -__naked void uretprobe_regs(struct pt_regs *before, struct pt_regs *after) +__naked void uprobe_regs(struct pt_regs *before, struct pt_regs *after) { asm volatile ( "movq %r15, 0(%rdi)\n" @@ -44,15 +53,17 @@ __naked void uretprobe_regs(struct pt_regs *before, struct pt_regs *after) "movq $0, 120(%rdi)\n" /* orig_rax */ "movq $0, 128(%rdi)\n" /* rip */ "movq $0, 136(%rdi)\n" /* cs */ + "pushq %rax\n" "pushf\n" "pop %rax\n" "movq %rax, 144(%rdi)\n" /* eflags */ + "pop %rax\n" "movq %rsp, 152(%rdi)\n" /* rsp */ "movq $0, 160(%rdi)\n" /* ss */ /* save 2nd argument */ "pushq %rsi\n" - "call uretprobe_regs_trigger\n" + "call uprobe_regs_trigger\n" /* save return value and load 2nd argument pointer to rax */ "pushq %rax\n" @@ -92,25 +103,37 @@ __naked void uretprobe_regs(struct pt_regs *before, struct pt_regs *after) ); } -static void test_uretprobe_regs_equal(void) +static void test_uprobe_regs_equal(bool retprobe) { + LIBBPF_OPTS(bpf_uprobe_opts, opts, + .retprobe = retprobe, + ); struct uprobe_syscall *skel = NULL; struct pt_regs before = {}, after = {}; unsigned long *pb = (unsigned long *) &before; unsigned long *pa = (unsigned long *) &after; unsigned long *pp; + unsigned long offset; unsigned int i, cnt; - int err; + + offset = get_uprobe_offset(&uprobe_regs_trigger); + if (!ASSERT_GE(offset, 0, "get_uprobe_offset")) + return; skel = uprobe_syscall__open_and_load(); if (!ASSERT_OK_PTR(skel, "uprobe_syscall__open_and_load")) goto cleanup; - err = uprobe_syscall__attach(skel); - if (!ASSERT_OK(err, "uprobe_syscall__attach")) + skel->links.probe = bpf_program__attach_uprobe_opts(skel->progs.probe, + 0, "/proc/self/exe", offset, &opts); + if (!ASSERT_OK_PTR(skel->links.probe, "bpf_program__attach_uprobe_opts")) goto cleanup; - uretprobe_regs(&before, &after); + /* make sure uprobe gets optimized */ + if (!retprobe) + uprobe_regs_trigger(); + + uprobe_regs(&before, &after); pp = (unsigned long *) &skel->bss->regs; cnt = sizeof(before)/sizeof(*pb); @@ -119,7 +142,7 @@ static void test_uretprobe_regs_equal(void) unsigned int offset = i * sizeof(unsigned long); /* - * Check register before and after uretprobe_regs_trigger call + * Check register before and after uprobe_regs_trigger call * that triggers the uretprobe. */ switch (offset) { @@ -133,7 +156,7 @@ static void test_uretprobe_regs_equal(void) /* * Check register seen from bpf program and register after - * uretprobe_regs_trigger call + * uprobe_regs_trigger call (with rax exception, check below). */ switch (offset) { /* @@ -146,6 +169,15 @@ static void test_uretprobe_regs_equal(void) case offsetof(struct pt_regs, rsp): case offsetof(struct pt_regs, ss): break; + /* + * uprobe does not see return value in rax, it needs to see the + * original (before) rax value + */ + case offsetof(struct pt_regs, rax): + if (!retprobe) { + ASSERT_EQ(pp[i], pb[i], "uprobe rax prog-before value check"); + break; + } default: if (!ASSERT_EQ(pp[i], pa[i], "register prog-after value check")) fprintf(stdout, "failed register offset %u\n", offset); @@ -175,7 +207,7 @@ static int write_bpf_testmod_uprobe(unsigned long offset) return ret != n ? (int) ret : 0; } -static void test_uretprobe_regs_change(void) +static void test_regs_change(void) { struct pt_regs before = {}, after = {}; unsigned long *pb = (unsigned long *) &before; @@ -183,13 +215,16 @@ static void test_uretprobe_regs_change(void) unsigned long cnt = sizeof(before)/sizeof(*pb); unsigned int i, err, offset; - offset = get_uprobe_offset(uretprobe_regs_trigger); + offset = get_uprobe_offset(uprobe_regs_trigger); err = write_bpf_testmod_uprobe(offset); if (!ASSERT_OK(err, "register_uprobe")) return; - uretprobe_regs(&before, &after); + /* make sure uprobe gets optimized */ + uprobe_regs_trigger(); + + uprobe_regs(&before, &after); err = write_bpf_testmod_uprobe(0); if (!ASSERT_OK(err, "unregister_uprobe")) @@ -252,6 +287,7 @@ static void test_uretprobe_syscall_call(void) ); struct uprobe_syscall_executed *skel; int pid, status, err, go[2], c = 0; + struct bpf_link *link; if (!ASSERT_OK(pipe(go), "pipe")) return; @@ -277,11 +313,14 @@ static void test_uretprobe_syscall_call(void) _exit(0); } - skel->links.test = bpf_program__attach_uprobe_multi(skel->progs.test, pid, - "/proc/self/exe", - "uretprobe_syscall_call", &opts); - if (!ASSERT_OK_PTR(skel->links.test, "bpf_program__attach_uprobe_multi")) + skel->bss->pid = pid; + + link = bpf_program__attach_uprobe_multi(skel->progs.test_uretprobe_multi, + pid, "/proc/self/exe", + "uretprobe_syscall_call", &opts); + if (!ASSERT_OK_PTR(link, "bpf_program__attach_uprobe_multi")) goto cleanup; + skel->links.test_uretprobe_multi = link; /* kick the child */ write(go[1], &c, 1); @@ -301,6 +340,256 @@ cleanup: close(go[0]); } +#define TRAMP "[uprobes-trampoline]" + +__attribute__((aligned(16))) +__nocf_check __weak __naked void uprobe_test(void) +{ + asm volatile (" \n" + ".byte 0x0f, 0x1f, 0x44, 0x00, 0x00 \n" + "ret \n" + ); +} + +__attribute__((aligned(16))) +__nocf_check __weak void usdt_test(void) +{ + USDT(optimized_uprobe, usdt); +} + +static int find_uprobes_trampoline(void *tramp_addr) +{ + void *start, *end; + char line[128]; + int ret = -1; + FILE *maps; + + maps = fopen("/proc/self/maps", "r"); + if (!maps) { + fprintf(stderr, "cannot open maps\n"); + return -1; + } + + while (fgets(line, sizeof(line), maps)) { + int m = -1; + + /* We care only about private r-x mappings. */ + if (sscanf(line, "%p-%p r-xp %*x %*x:%*x %*u %n", &start, &end, &m) != 2) + continue; + if (m < 0) + continue; + if (!strncmp(&line[m], TRAMP, sizeof(TRAMP)-1) && (start == tramp_addr)) { + ret = 0; + break; + } + } + + fclose(maps); + return ret; +} + +static unsigned char nop5[5] = { 0x0f, 0x1f, 0x44, 0x00, 0x00 }; + +static void *find_nop5(void *fn) +{ + int i; + + for (i = 0; i < 10; i++) { + if (!memcmp(nop5, fn + i, 5)) + return fn + i; + } + return NULL; +} + +typedef void (__attribute__((nocf_check)) *trigger_t)(void); + +static void *check_attach(struct uprobe_syscall_executed *skel, trigger_t trigger, + void *addr, int executed) +{ + struct __arch_relative_insn { + __u8 op; + __s32 raddr; + } __packed *call; + void *tramp = NULL; + + /* Uprobe gets optimized after first trigger, so let's press twice. */ + trigger(); + trigger(); + + /* Make sure bpf program got executed.. */ + ASSERT_EQ(skel->bss->executed, executed, "executed"); + + /* .. and check the trampoline is as expected. */ + call = (struct __arch_relative_insn *) addr; + tramp = (void *) (call + 1) + call->raddr; + ASSERT_EQ(call->op, 0xe8, "call"); + ASSERT_OK(find_uprobes_trampoline(tramp), "uprobes_trampoline"); + + return tramp; +} + +static void check_detach(void *addr, void *tramp) +{ + /* [uprobes_trampoline] stays after detach */ + ASSERT_OK(find_uprobes_trampoline(tramp), "uprobes_trampoline"); + ASSERT_OK(memcmp(addr, nop5, 5), "nop5"); +} + +static void check(struct uprobe_syscall_executed *skel, struct bpf_link *link, + trigger_t trigger, void *addr, int executed) +{ + void *tramp; + + tramp = check_attach(skel, trigger, addr, executed); + bpf_link__destroy(link); + check_detach(addr, tramp); +} + +static void test_uprobe_legacy(void) +{ + struct uprobe_syscall_executed *skel = NULL; + LIBBPF_OPTS(bpf_uprobe_opts, opts, + .retprobe = true, + ); + struct bpf_link *link; + unsigned long offset; + + offset = get_uprobe_offset(&uprobe_test); + if (!ASSERT_GE(offset, 0, "get_uprobe_offset")) + goto cleanup; + + /* uprobe */ + skel = uprobe_syscall_executed__open_and_load(); + if (!ASSERT_OK_PTR(skel, "uprobe_syscall_executed__open_and_load")) + return; + + skel->bss->pid = getpid(); + + link = bpf_program__attach_uprobe_opts(skel->progs.test_uprobe, + 0, "/proc/self/exe", offset, NULL); + if (!ASSERT_OK_PTR(link, "bpf_program__attach_uprobe_opts")) + goto cleanup; + + check(skel, link, uprobe_test, uprobe_test, 2); + + /* uretprobe */ + skel->bss->executed = 0; + + link = bpf_program__attach_uprobe_opts(skel->progs.test_uretprobe, + 0, "/proc/self/exe", offset, &opts); + if (!ASSERT_OK_PTR(link, "bpf_program__attach_uprobe_opts")) + goto cleanup; + + check(skel, link, uprobe_test, uprobe_test, 2); + +cleanup: + uprobe_syscall_executed__destroy(skel); +} + +static void test_uprobe_multi(void) +{ + struct uprobe_syscall_executed *skel = NULL; + LIBBPF_OPTS(bpf_uprobe_multi_opts, opts); + struct bpf_link *link; + unsigned long offset; + + offset = get_uprobe_offset(&uprobe_test); + if (!ASSERT_GE(offset, 0, "get_uprobe_offset")) + goto cleanup; + + opts.offsets = &offset; + opts.cnt = 1; + + skel = uprobe_syscall_executed__open_and_load(); + if (!ASSERT_OK_PTR(skel, "uprobe_syscall_executed__open_and_load")) + return; + + skel->bss->pid = getpid(); + + /* uprobe.multi */ + link = bpf_program__attach_uprobe_multi(skel->progs.test_uprobe_multi, + 0, "/proc/self/exe", NULL, &opts); + if (!ASSERT_OK_PTR(link, "bpf_program__attach_uprobe_multi")) + goto cleanup; + + check(skel, link, uprobe_test, uprobe_test, 2); + + /* uretprobe.multi */ + skel->bss->executed = 0; + opts.retprobe = true; + link = bpf_program__attach_uprobe_multi(skel->progs.test_uretprobe_multi, + 0, "/proc/self/exe", NULL, &opts); + if (!ASSERT_OK_PTR(link, "bpf_program__attach_uprobe_multi")) + goto cleanup; + + check(skel, link, uprobe_test, uprobe_test, 2); + +cleanup: + uprobe_syscall_executed__destroy(skel); +} + +static void test_uprobe_session(void) +{ + struct uprobe_syscall_executed *skel = NULL; + LIBBPF_OPTS(bpf_uprobe_multi_opts, opts, + .session = true, + ); + struct bpf_link *link; + unsigned long offset; + + offset = get_uprobe_offset(&uprobe_test); + if (!ASSERT_GE(offset, 0, "get_uprobe_offset")) + goto cleanup; + + opts.offsets = &offset; + opts.cnt = 1; + + skel = uprobe_syscall_executed__open_and_load(); + if (!ASSERT_OK_PTR(skel, "uprobe_syscall_executed__open_and_load")) + return; + + skel->bss->pid = getpid(); + + link = bpf_program__attach_uprobe_multi(skel->progs.test_uprobe_session, + 0, "/proc/self/exe", NULL, &opts); + if (!ASSERT_OK_PTR(link, "bpf_program__attach_uprobe_multi")) + goto cleanup; + + check(skel, link, uprobe_test, uprobe_test, 4); + +cleanup: + uprobe_syscall_executed__destroy(skel); +} + +static void test_uprobe_usdt(void) +{ + struct uprobe_syscall_executed *skel; + struct bpf_link *link; + void *addr; + + errno = 0; + addr = find_nop5(usdt_test); + if (!ASSERT_OK_PTR(addr, "find_nop5")) + return; + + skel = uprobe_syscall_executed__open_and_load(); + if (!ASSERT_OK_PTR(skel, "uprobe_syscall_executed__open_and_load")) + return; + + skel->bss->pid = getpid(); + + link = bpf_program__attach_usdt(skel->progs.test_usdt, + -1 /* all PIDs */, "/proc/self/exe", + "optimized_uprobe", "usdt", NULL); + if (!ASSERT_OK_PTR(link, "bpf_program__attach_usdt")) + goto cleanup; + + check(skel, link, usdt_test, addr, 2); + +cleanup: + uprobe_syscall_executed__destroy(skel); +} + /* * Borrowed from tools/testing/selftests/x86/test_shadow_stack.c. * @@ -343,43 +632,172 @@ static void test_uretprobe_shadow_stack(void) return; } - /* Run all of the uretprobe tests. */ - test_uretprobe_regs_equal(); - test_uretprobe_regs_change(); + /* Run all the tests with shadow stack in place. */ + + test_uprobe_regs_equal(false); + test_uprobe_regs_equal(true); test_uretprobe_syscall_call(); + test_uprobe_legacy(); + test_uprobe_multi(); + test_uprobe_session(); + test_uprobe_usdt(); + + test_regs_change(); + ARCH_PRCTL(ARCH_SHSTK_DISABLE, ARCH_SHSTK_SHSTK); } -#else -static void test_uretprobe_regs_equal(void) + +static volatile bool race_stop; + +static USDT_DEFINE_SEMA(race); + +static void *worker_trigger(void *arg) { - test__skip(); + unsigned long rounds = 0; + + while (!race_stop) { + uprobe_test(); + rounds++; + } + + printf("tid %d trigger rounds: %lu\n", gettid(), rounds); + return NULL; } -static void test_uretprobe_regs_change(void) +static void *worker_attach(void *arg) { - test__skip(); + LIBBPF_OPTS(bpf_uprobe_opts, opts); + struct uprobe_syscall_executed *skel; + unsigned long rounds = 0, offset; + const char *sema[2] = { + __stringify(USDT_SEMA(race)), + NULL, + }; + unsigned long *ref; + int err; + + offset = get_uprobe_offset(&uprobe_test); + if (!ASSERT_GE(offset, 0, "get_uprobe_offset")) + return NULL; + + err = elf_resolve_syms_offsets("/proc/self/exe", 1, (const char **) &sema, &ref, STT_OBJECT); + if (!ASSERT_OK(err, "elf_resolve_syms_offsets_sema")) + return NULL; + + opts.ref_ctr_offset = *ref; + + skel = uprobe_syscall_executed__open_and_load(); + if (!ASSERT_OK_PTR(skel, "uprobe_syscall_executed__open_and_load")) + return NULL; + + skel->bss->pid = getpid(); + + while (!race_stop) { + skel->links.test_uprobe = bpf_program__attach_uprobe_opts(skel->progs.test_uprobe, + 0, "/proc/self/exe", offset, &opts); + if (!ASSERT_OK_PTR(skel->links.test_uprobe, "bpf_program__attach_uprobe_opts")) + break; + + bpf_link__destroy(skel->links.test_uprobe); + skel->links.test_uprobe = NULL; + rounds++; + } + + printf("tid %d attach rounds: %lu hits: %d\n", gettid(), rounds, skel->bss->executed); + uprobe_syscall_executed__destroy(skel); + free(ref); + return NULL; } -static void test_uretprobe_syscall_call(void) +static useconds_t race_msec(void) { - test__skip(); + char *env; + + env = getenv("BPF_SELFTESTS_UPROBE_SYSCALL_RACE_MSEC"); + if (env) + return atoi(env); + + /* default duration is 500ms */ + return 500; } -static void test_uretprobe_shadow_stack(void) +static void test_uprobe_race(void) { - test__skip(); + int err, i, nr_threads; + pthread_t *threads; + + nr_threads = libbpf_num_possible_cpus(); + if (!ASSERT_GT(nr_threads, 0, "libbpf_num_possible_cpus")) + return; + nr_threads = max(2, nr_threads); + + threads = alloca(sizeof(*threads) * nr_threads); + if (!ASSERT_OK_PTR(threads, "malloc")) + return; + + for (i = 0; i < nr_threads; i++) { + err = pthread_create(&threads[i], NULL, i % 2 ? worker_trigger : worker_attach, + NULL); + if (!ASSERT_OK(err, "pthread_create")) + goto cleanup; + } + + usleep(race_msec() * 1000); + +cleanup: + race_stop = true; + for (nr_threads = i, i = 0; i < nr_threads; i++) + pthread_join(threads[i], NULL); + + ASSERT_FALSE(USDT_SEMA_IS_ACTIVE(race), "race_semaphore"); } + +#ifndef __NR_uprobe +#define __NR_uprobe 336 #endif -void test_uprobe_syscall(void) +static void test_uprobe_error(void) +{ + long err = syscall(__NR_uprobe); + + ASSERT_EQ(err, -1, "error"); + ASSERT_EQ(errno, ENXIO, "errno"); +} + +static void __test_uprobe_syscall(void) { if (test__start_subtest("uretprobe_regs_equal")) - test_uretprobe_regs_equal(); - if (test__start_subtest("uretprobe_regs_change")) - test_uretprobe_regs_change(); + test_uprobe_regs_equal(true); if (test__start_subtest("uretprobe_syscall_call")) test_uretprobe_syscall_call(); if (test__start_subtest("uretprobe_shadow_stack")) test_uretprobe_shadow_stack(); + if (test__start_subtest("uprobe_legacy")) + test_uprobe_legacy(); + if (test__start_subtest("uprobe_multi")) + test_uprobe_multi(); + if (test__start_subtest("uprobe_session")) + test_uprobe_session(); + if (test__start_subtest("uprobe_usdt")) + test_uprobe_usdt(); + if (test__start_subtest("uprobe_race")) + test_uprobe_race(); + if (test__start_subtest("uprobe_error")) + test_uprobe_error(); + if (test__start_subtest("uprobe_regs_equal")) + test_uprobe_regs_equal(false); + if (test__start_subtest("regs_change")) + test_regs_change(); +} +#else +static void __test_uprobe_syscall(void) +{ + test__skip(); +} +#endif + +void test_uprobe_syscall(void) +{ + __test_uprobe_syscall(); } diff --git a/tools/testing/selftests/bpf/prog_tests/usdt.c b/tools/testing/selftests/bpf/prog_tests/usdt.c index 9057e983cc54..4f7f45e69315 100644 --- a/tools/testing/selftests/bpf/prog_tests/usdt.c +++ b/tools/testing/selftests/bpf/prog_tests/usdt.c @@ -40,12 +40,79 @@ static void __always_inline trigger_func(int x) { } } -static void subtest_basic_usdt(void) +#if defined(__x86_64__) || defined(__i386__) +/* + * SIB (Scale-Index-Base) addressing format: "size@(base_reg, index_reg, scale)" + * - 'size' is the size in bytes of the array element, and its sign indicates + * whether the type is signed (negative) or unsigned (positive). + * - 'base_reg' is the register holding the base address, normally rdx or edx + * - 'index_reg' is the register holding the index, normally rax or eax + * - 'scale' is the scaling factor (typically 1, 2, 4, or 8), which matches the + * size of the element type. + * + * For example, for an array of 'short' (signed 2-byte elements), the SIB spec would be: + * - size: -2 (negative because 'short' is signed) + * - scale: 2 (since sizeof(short) == 2) + * + * The resulting SIB format: "-2@(%%rdx,%%rax,2)" for x86_64, "-2@(%%edx,%%eax,2)" for i386 + */ +static volatile short array[] = {-1, -2, -3, -4}; + +#if defined(__x86_64__) +#define USDT_SIB_ARG_SPEC -2@(%%rdx,%%rax,2) +#else +#define USDT_SIB_ARG_SPEC -2@(%%edx,%%eax,2) +#endif + +unsigned short test_usdt_sib_semaphore SEC(".probes"); + +static void trigger_sib_spec(void) +{ + /* + * Force SIB addressing with inline assembly. + * + * You must compile with -std=gnu99 or -std=c99 to use the + * STAP_PROBE_ASM macro. + * + * The STAP_PROBE_ASM macro generates a quoted string that gets + * inserted between the surrounding assembly instructions. In this + * case, USDT_SIB_ARG_SPEC is embedded directly into the instruction + * stream, creating a probe point between the asm statement boundaries. + * It works fine with gcc/clang. + * + * Register constraints: + * - "d"(array): Binds the 'array' variable to %rdx or %edx register + * - "a"(0): Binds the constant 0 to %rax or %eax register + * These ensure that when USDT_SIB_ARG_SPEC references %%rdx(%edx) and + * %%rax(%eax), they contain the expected values for SIB addressing. + * + * The "memory" clobber prevents the compiler from reordering memory + * accesses around the probe point, ensuring that the probe behavior + * is predictable and consistent. + */ + asm volatile( + STAP_PROBE_ASM(test, usdt_sib, USDT_SIB_ARG_SPEC) + : + : "d"(array), "a"(0) + : "memory" + ); +} +#endif + +static void subtest_basic_usdt(bool optimized) { LIBBPF_OPTS(bpf_usdt_opts, opts); struct test_usdt *skel; struct test_usdt__bss *bss; - int err, i; + int err, i, called; + const __u64 expected_cookie = 0xcafedeadbeeffeed; + +#define TRIGGER(x) ({ \ + trigger_func(x); \ + if (optimized) \ + trigger_func(x); \ + optimized ? 2 : 1; \ + }) skel = test_usdt__open_and_load(); if (!ASSERT_OK_PTR(skel, "skel_open")) @@ -59,20 +126,29 @@ static void subtest_basic_usdt(void) goto cleanup; /* usdt0 won't be auto-attached */ - opts.usdt_cookie = 0xcafedeadbeeffeed; + opts.usdt_cookie = expected_cookie; skel->links.usdt0 = bpf_program__attach_usdt(skel->progs.usdt0, 0 /*self*/, "/proc/self/exe", "test", "usdt0", &opts); if (!ASSERT_OK_PTR(skel->links.usdt0, "usdt0_link")) goto cleanup; - trigger_func(1); +#if defined(__x86_64__) || defined(__i386__) + opts.usdt_cookie = expected_cookie; + skel->links.usdt_sib = bpf_program__attach_usdt(skel->progs.usdt_sib, + 0 /*self*/, "/proc/self/exe", + "test", "usdt_sib", &opts); + if (!ASSERT_OK_PTR(skel->links.usdt_sib, "usdt_sib_link")) + goto cleanup; +#endif + + alled = TRIGGER(1); - ASSERT_EQ(bss->usdt0_called, 1, "usdt0_called"); - ASSERT_EQ(bss->usdt3_called, 1, "usdt3_called"); - ASSERT_EQ(bss->usdt12_called, 1, "usdt12_called"); + ASSERT_EQ(bss->usdt0_called, called, "usdt0_called"); + ASSERT_EQ(bss->usdt3_called, called, "usdt3_called"); + ASSERT_EQ(bss->usdt12_called, called, "usdt12_called"); - ASSERT_EQ(bss->usdt0_cookie, 0xcafedeadbeeffeed, "usdt0_cookie"); + ASSERT_EQ(bss->usdt0_cookie, expected_cookie, "usdt0_cookie"); ASSERT_EQ(bss->usdt0_arg_cnt, 0, "usdt0_arg_cnt"); ASSERT_EQ(bss->usdt0_arg_ret, -ENOENT, "usdt0_arg_ret"); ASSERT_EQ(bss->usdt0_arg_size, -ENOENT, "usdt0_arg_size"); @@ -119,11 +195,11 @@ static void subtest_basic_usdt(void) * bpf_program__attach_usdt() handles this properly and attaches to * all possible places of USDT invocation. */ - trigger_func(2); + called += TRIGGER(2); - ASSERT_EQ(bss->usdt0_called, 2, "usdt0_called"); - ASSERT_EQ(bss->usdt3_called, 2, "usdt3_called"); - ASSERT_EQ(bss->usdt12_called, 2, "usdt12_called"); + ASSERT_EQ(bss->usdt0_called, called, "usdt0_called"); + ASSERT_EQ(bss->usdt3_called, called, "usdt3_called"); + ASSERT_EQ(bss->usdt12_called, called, "usdt12_called"); /* only check values that depend on trigger_func()'s input value */ ASSERT_EQ(bss->usdt3_args[0], 2, "usdt3_arg1"); @@ -142,9 +218,9 @@ static void subtest_basic_usdt(void) if (!ASSERT_OK_PTR(skel->links.usdt3, "usdt3_reattach")) goto cleanup; - trigger_func(3); + called += TRIGGER(3); - ASSERT_EQ(bss->usdt3_called, 3, "usdt3_called"); + ASSERT_EQ(bss->usdt3_called, called, "usdt3_called"); /* this time usdt3 has custom cookie */ ASSERT_EQ(bss->usdt3_cookie, 0xBADC00C51E, "usdt3_cookie"); ASSERT_EQ(bss->usdt3_arg_cnt, 3, "usdt3_arg_cnt"); @@ -156,8 +232,19 @@ static void subtest_basic_usdt(void) ASSERT_EQ(bss->usdt3_args[1], 42, "usdt3_arg2"); ASSERT_EQ(bss->usdt3_args[2], (uintptr_t)&bla, "usdt3_arg3"); +#if defined(__x86_64__) || defined(__i386__) + trigger_sib_spec(); + ASSERT_EQ(bss->usdt_sib_called, 1, "usdt_sib_called"); + ASSERT_EQ(bss->usdt_sib_cookie, expected_cookie, "usdt_sib_cookie"); + ASSERT_EQ(bss->usdt_sib_arg_cnt, 1, "usdt_sib_arg_cnt"); + ASSERT_EQ(bss->usdt_sib_arg, nums[0], "usdt_sib_arg"); + ASSERT_EQ(bss->usdt_sib_arg_ret, 0, "usdt_sib_arg_ret"); + ASSERT_EQ(bss->usdt_sib_arg_size, sizeof(nums[0]), "usdt_sib_arg_size"); +#endif + cleanup: test_usdt__destroy(skel); +#undef TRIGGER } unsigned short test_usdt_100_semaphore SEC(".probes"); @@ -425,7 +512,11 @@ cleanup: void test_usdt(void) { if (test__start_subtest("basic")) - subtest_basic_usdt(); + subtest_basic_usdt(false); +#ifdef __x86_64__ + if (test__start_subtest("basic_optimized")) + subtest_basic_usdt(true); +#endif if (test__start_subtest("multispec")) subtest_multispec_usdt(); if (test__start_subtest("urand_auto_attach")) diff --git a/tools/testing/selftests/bpf/prog_tests/verifier.c b/tools/testing/selftests/bpf/prog_tests/verifier.c index 77ec95d4ffaa..28e81161e6fc 100644 --- a/tools/testing/selftests/bpf/prog_tests/verifier.c +++ b/tools/testing/selftests/bpf/prog_tests/verifier.c @@ -46,6 +46,7 @@ #include "verifier_ldsx.skel.h" #include "verifier_leak_ptr.skel.h" #include "verifier_linked_scalars.skel.h" +#include "verifier_live_stack.skel.h" #include "verifier_load_acquire.skel.h" #include "verifier_loops1.skel.h" #include "verifier_lwt.skel.h" @@ -59,6 +60,7 @@ #include "verifier_meta_access.skel.h" #include "verifier_movsx.skel.h" #include "verifier_mtu.skel.h" +#include "verifier_mul.skel.h" #include "verifier_netfilter_ctx.skel.h" #include "verifier_netfilter_retcode.skel.h" #include "verifier_bpf_fastcall.skel.h" @@ -183,6 +185,7 @@ void test_verifier_ld_ind(void) { RUN(verifier_ld_ind); } void test_verifier_ldsx(void) { RUN(verifier_ldsx); } void test_verifier_leak_ptr(void) { RUN(verifier_leak_ptr); } void test_verifier_linked_scalars(void) { RUN(verifier_linked_scalars); } +void test_verifier_live_stack(void) { RUN(verifier_live_stack); } void test_verifier_loops1(void) { RUN(verifier_loops1); } void test_verifier_lwt(void) { RUN(verifier_lwt); } void test_verifier_map_in_map(void) { RUN(verifier_map_in_map); } @@ -194,6 +197,7 @@ void test_verifier_may_goto_1(void) { RUN(verifier_may_goto_1); } void test_verifier_may_goto_2(void) { RUN(verifier_may_goto_2); } void test_verifier_meta_access(void) { RUN(verifier_meta_access); } void test_verifier_movsx(void) { RUN(verifier_movsx); } +void test_verifier_mul(void) { RUN(verifier_mul); } void test_verifier_netfilter_ctx(void) { RUN(verifier_netfilter_ctx); } void test_verifier_netfilter_retcode(void) { RUN(verifier_netfilter_retcode); } void test_verifier_bpf_fastcall(void) { RUN(verifier_bpf_fastcall); } diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c b/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c index b9d9f0a502ce..178292d1251a 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c @@ -9,6 +9,7 @@ #define TX_NETNS "xdp_context_tx" #define RX_NETNS "xdp_context_rx" #define TAP_NAME "tap0" +#define DUMMY_NAME "dum0" #define TAP_NETNS "xdp_context_tuntap" #define TEST_PAYLOAD_LEN 32 @@ -96,9 +97,7 @@ void test_xdp_context_test_run(void) /* Meta data must be 255 bytes or smaller */ test_xdp_context_error(prog_fd, opts, 0, 256, sizeof(data), 0, 0, 0); - /* Total size of data must match data_end - data_meta */ - test_xdp_context_error(prog_fd, opts, 0, sizeof(__u32), - sizeof(data) - 1, 0, 0, 0); + /* Total size of data must be data_end - data_meta or larger */ test_xdp_context_error(prog_fd, opts, 0, sizeof(__u32), sizeof(data) + 1, 0, 0, 0); @@ -156,15 +155,30 @@ err: return -1; } -static void assert_test_result(struct test_xdp_meta *skel) +static int write_test_packet(int tap_fd) +{ + __u8 packet[sizeof(struct ethhdr) + TEST_PAYLOAD_LEN]; + int n; + + /* The ethernet header doesn't need to be valid for this test */ + memset(packet, 0, sizeof(struct ethhdr)); + memcpy(packet + sizeof(struct ethhdr), test_payload, TEST_PAYLOAD_LEN); + + n = write(tap_fd, packet, sizeof(packet)); + if (!ASSERT_EQ(n, sizeof(packet), "write packet")) + return -1; + + return 0; +} + +static void assert_test_result(const struct bpf_map *result_map) { int err; __u32 map_key = 0; __u8 map_value[TEST_PAYLOAD_LEN]; - err = bpf_map__lookup_elem(skel->maps.test_result, &map_key, - sizeof(map_key), &map_value, - TEST_PAYLOAD_LEN, BPF_ANY); + err = bpf_map__lookup_elem(result_map, &map_key, sizeof(map_key), + &map_value, TEST_PAYLOAD_LEN, BPF_ANY); if (!ASSERT_OK(err, "lookup test_result")) return; @@ -172,6 +186,18 @@ static void assert_test_result(struct test_xdp_meta *skel) "test_result map contains test payload"); } +static bool clear_test_result(struct bpf_map *result_map) +{ + const __u8 v[sizeof(test_payload)] = {}; + const __u32 k = 0; + int err; + + err = bpf_map__update_elem(result_map, &k, sizeof(k), v, sizeof(v), BPF_ANY); + ASSERT_OK(err, "update test_result"); + + return err == 0; +} + void test_xdp_context_veth(void) { LIBBPF_OPTS(bpf_tc_hook, tc_hook, .attach_point = BPF_TC_INGRESS); @@ -248,7 +274,7 @@ void test_xdp_context_veth(void) if (!ASSERT_OK(ret, "send_test_packet")) goto close; - assert_test_result(skel); + assert_test_result(skel->maps.test_result); close: close_netns(nstoken); @@ -257,17 +283,21 @@ close: netns_free(tx_ns); } -void test_xdp_context_tuntap(void) +static void test_tuntap(struct bpf_program *xdp_prog, + struct bpf_program *tc_prio_1_prog, + struct bpf_program *tc_prio_2_prog, + struct bpf_map *result_map) { LIBBPF_OPTS(bpf_tc_hook, tc_hook, .attach_point = BPF_TC_INGRESS); LIBBPF_OPTS(bpf_tc_opts, tc_opts, .handle = 1, .priority = 1); struct netns_obj *ns = NULL; - struct test_xdp_meta *skel = NULL; - __u8 packet[sizeof(struct ethhdr) + TEST_PAYLOAD_LEN]; int tap_fd = -1; int tap_ifindex; int ret; + if (!clear_test_result(result_map)) + return; + ns = netns_new(TAP_NETNS, true); if (!ASSERT_OK_PTR(ns, "create and open ns")) return; @@ -278,10 +308,6 @@ void test_xdp_context_tuntap(void) SYS(close, "ip link set dev " TAP_NAME " up"); - skel = test_xdp_meta__open_and_load(); - if (!ASSERT_OK_PTR(skel, "open and load skeleton")) - goto close; - tap_ifindex = if_nametoindex(TAP_NAME); if (!ASSERT_GE(tap_ifindex, 0, "if_nametoindex")) goto close; @@ -291,33 +317,175 @@ void test_xdp_context_tuntap(void) if (!ASSERT_OK(ret, "bpf_tc_hook_create")) goto close; - tc_opts.prog_fd = bpf_program__fd(skel->progs.ing_cls); + tc_opts.prog_fd = bpf_program__fd(tc_prio_1_prog); ret = bpf_tc_attach(&tc_hook, &tc_opts); if (!ASSERT_OK(ret, "bpf_tc_attach")) goto close; - ret = bpf_xdp_attach(tap_ifindex, bpf_program__fd(skel->progs.ing_xdp), + if (tc_prio_2_prog) { + LIBBPF_OPTS(bpf_tc_opts, tc_opts, .handle = 1, .priority = 2, + .prog_fd = bpf_program__fd(tc_prio_2_prog)); + + ret = bpf_tc_attach(&tc_hook, &tc_opts); + if (!ASSERT_OK(ret, "bpf_tc_attach")) + goto close; + } + + ret = bpf_xdp_attach(tap_ifindex, bpf_program__fd(xdp_prog), 0, NULL); if (!ASSERT_GE(ret, 0, "bpf_xdp_attach")) goto close; - /* The ethernet header is not relevant for this test and doesn't need to - * be meaningful. - */ - struct ethhdr eth = { 0 }; + ret = write_test_packet(tap_fd); + if (!ASSERT_OK(ret, "write_test_packet")) + goto close; - memcpy(packet, ð, sizeof(eth)); - memcpy(packet + sizeof(eth), test_payload, TEST_PAYLOAD_LEN); + assert_test_result(result_map); + +close: + if (tap_fd >= 0) + close(tap_fd); + netns_free(ns); +} + +/* Write a packet to a tap dev and copy it to ingress of a dummy dev */ +static void test_tuntap_mirred(struct bpf_program *xdp_prog, + struct bpf_program *tc_prog, + bool *test_pass) +{ + LIBBPF_OPTS(bpf_tc_hook, tc_hook, .attach_point = BPF_TC_INGRESS); + LIBBPF_OPTS(bpf_tc_opts, tc_opts, .handle = 1, .priority = 1); + struct netns_obj *ns = NULL; + int dummy_ifindex; + int tap_fd = -1; + int tap_ifindex; + int ret; + + *test_pass = false; + + ns = netns_new(TAP_NETNS, true); + if (!ASSERT_OK_PTR(ns, "netns_new")) + return; - ret = write(tap_fd, packet, sizeof(packet)); - if (!ASSERT_EQ(ret, sizeof(packet), "write packet")) + /* Setup dummy interface */ + SYS(close, "ip link add name " DUMMY_NAME " type dummy"); + SYS(close, "ip link set dev " DUMMY_NAME " up"); + + dummy_ifindex = if_nametoindex(DUMMY_NAME); + if (!ASSERT_GE(dummy_ifindex, 0, "if_nametoindex")) goto close; - assert_test_result(skel); + tc_hook.ifindex = dummy_ifindex; + ret = bpf_tc_hook_create(&tc_hook); + if (!ASSERT_OK(ret, "bpf_tc_hook_create")) + goto close; + + tc_opts.prog_fd = bpf_program__fd(tc_prog); + ret = bpf_tc_attach(&tc_hook, &tc_opts); + if (!ASSERT_OK(ret, "bpf_tc_attach")) + goto close; + + /* Setup TAP interface */ + tap_fd = open_tuntap(TAP_NAME, true); + if (!ASSERT_GE(tap_fd, 0, "open_tuntap")) + goto close; + + SYS(close, "ip link set dev " TAP_NAME " up"); + + tap_ifindex = if_nametoindex(TAP_NAME); + if (!ASSERT_GE(tap_ifindex, 0, "if_nametoindex")) + goto close; + + ret = bpf_xdp_attach(tap_ifindex, bpf_program__fd(xdp_prog), 0, NULL); + if (!ASSERT_GE(ret, 0, "bpf_xdp_attach")) + goto close; + + /* Copy all packets received from TAP to dummy ingress */ + SYS(close, "tc qdisc add dev " TAP_NAME " clsact"); + SYS(close, "tc filter add dev " TAP_NAME " ingress " + "protocol all matchall " + "action mirred ingress mirror dev " DUMMY_NAME); + + /* Receive a packet on TAP */ + ret = write_test_packet(tap_fd); + if (!ASSERT_OK(ret, "write_test_packet")) + goto close; + + ASSERT_TRUE(*test_pass, "test_pass"); close: if (tap_fd >= 0) close(tap_fd); - test_xdp_meta__destroy(skel); netns_free(ns); } + +void test_xdp_context_tuntap(void) +{ + struct test_xdp_meta *skel = NULL; + + skel = test_xdp_meta__open_and_load(); + if (!ASSERT_OK_PTR(skel, "open and load skeleton")) + return; + + if (test__start_subtest("data_meta")) + test_tuntap(skel->progs.ing_xdp, + skel->progs.ing_cls, + NULL, /* tc prio 2 */ + skel->maps.test_result); + if (test__start_subtest("dynptr_read")) + test_tuntap(skel->progs.ing_xdp, + skel->progs.ing_cls_dynptr_read, + NULL, /* tc prio 2 */ + skel->maps.test_result); + if (test__start_subtest("dynptr_slice")) + test_tuntap(skel->progs.ing_xdp, + skel->progs.ing_cls_dynptr_slice, + NULL, /* tc prio 2 */ + skel->maps.test_result); + if (test__start_subtest("dynptr_write")) + test_tuntap(skel->progs.ing_xdp_zalloc_meta, + skel->progs.ing_cls_dynptr_write, + skel->progs.ing_cls_dynptr_read, + skel->maps.test_result); + if (test__start_subtest("dynptr_slice_rdwr")) + test_tuntap(skel->progs.ing_xdp_zalloc_meta, + skel->progs.ing_cls_dynptr_slice_rdwr, + skel->progs.ing_cls_dynptr_slice, + skel->maps.test_result); + if (test__start_subtest("dynptr_offset")) + test_tuntap(skel->progs.ing_xdp_zalloc_meta, + skel->progs.ing_cls_dynptr_offset_wr, + skel->progs.ing_cls_dynptr_offset_rd, + skel->maps.test_result); + if (test__start_subtest("dynptr_offset_oob")) + test_tuntap(skel->progs.ing_xdp, + skel->progs.ing_cls_dynptr_offset_oob, + skel->progs.ing_cls, + skel->maps.test_result); + if (test__start_subtest("clone_data_meta_empty_on_data_write")) + test_tuntap_mirred(skel->progs.ing_xdp, + skel->progs.clone_data_meta_empty_on_data_write, + &skel->bss->test_pass); + if (test__start_subtest("clone_data_meta_empty_on_meta_write")) + test_tuntap_mirred(skel->progs.ing_xdp, + skel->progs.clone_data_meta_empty_on_meta_write, + &skel->bss->test_pass); + if (test__start_subtest("clone_dynptr_empty_on_data_slice_write")) + test_tuntap_mirred(skel->progs.ing_xdp, + skel->progs.clone_dynptr_empty_on_data_slice_write, + &skel->bss->test_pass); + if (test__start_subtest("clone_dynptr_empty_on_meta_slice_write")) + test_tuntap_mirred(skel->progs.ing_xdp, + skel->progs.clone_dynptr_empty_on_meta_slice_write, + &skel->bss->test_pass); + if (test__start_subtest("clone_dynptr_rdonly_before_data_dynptr_write")) + test_tuntap_mirred(skel->progs.ing_xdp, + skel->progs.clone_dynptr_rdonly_before_data_dynptr_write, + &skel->bss->test_pass); + if (test__start_subtest("clone_dynptr_rdonly_before_meta_dynptr_write")) + test_tuntap_mirred(skel->progs.ing_xdp, + skel->progs.clone_dynptr_rdonly_before_meta_dynptr_write, + &skel->bss->test_pass); + + test_xdp_meta__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c b/tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c index 461ab18705d5..a8ab05216c38 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c @@ -7,6 +7,7 @@ #include <test_progs.h> #include "test_xdp_devmap_helpers.skel.h" +#include "test_xdp_devmap_tailcall.skel.h" #include "test_xdp_with_devmap_frags_helpers.skel.h" #include "test_xdp_with_devmap_helpers.skel.h" @@ -107,6 +108,29 @@ static void test_neg_xdp_devmap_helpers(void) } } +static void test_xdp_devmap_tailcall(enum bpf_attach_type prog_dev, + enum bpf_attach_type prog_tail, + bool expect_reject) +{ + struct test_xdp_devmap_tailcall *skel; + int err; + + skel = test_xdp_devmap_tailcall__open(); + if (!ASSERT_OK_PTR(skel, "test_xdp_devmap_tailcall__open")) + return; + + bpf_program__set_expected_attach_type(skel->progs.xdp_devmap, prog_dev); + bpf_program__set_expected_attach_type(skel->progs.xdp_entry, prog_tail); + + err = test_xdp_devmap_tailcall__load(skel); + if (expect_reject) + ASSERT_ERR(err, "test_xdp_devmap_tailcall__load"); + else + ASSERT_OK(err, "test_xdp_devmap_tailcall__load"); + + test_xdp_devmap_tailcall__destroy(skel); +} + static void test_xdp_with_devmap_frags_helpers(void) { struct test_xdp_with_devmap_frags_helpers *skel; @@ -238,8 +262,13 @@ void serial_test_xdp_devmap_attach(void) if (test__start_subtest("DEVMAP with frags programs in entries")) test_xdp_with_devmap_frags_helpers(); - if (test__start_subtest("Verifier check of DEVMAP programs")) + if (test__start_subtest("Verifier check of DEVMAP programs")) { test_neg_xdp_devmap_helpers(); + test_xdp_devmap_tailcall(BPF_XDP_DEVMAP, BPF_XDP_DEVMAP, false); + test_xdp_devmap_tailcall(0, 0, true); + test_xdp_devmap_tailcall(BPF_XDP_DEVMAP, 0, true); + test_xdp_devmap_tailcall(0, BPF_XDP_DEVMAP, true); + } if (test__start_subtest("DEVMAP with programs in entries on veth")) test_xdp_with_devmap_helpers_veth(); diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_pull_data.c b/tools/testing/selftests/bpf/prog_tests/xdp_pull_data.c new file mode 100644 index 000000000000..efa350d04ec5 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/xdp_pull_data.c @@ -0,0 +1,179 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <test_progs.h> +#include <network_helpers.h> +#include "test_xdp_pull_data.skel.h" + +#define PULL_MAX (1 << 31) +#define PULL_PLUS_ONE (1 << 30) + +#define XDP_PACKET_HEADROOM 256 + +/* Find headroom and tailroom occupied by struct xdp_frame and struct + * skb_shared_info so that we can calculate the maximum pull lengths for + * test cases. They might not be the real size of the structures due to + * cache alignment. + */ +static int find_xdp_sizes(struct test_xdp_pull_data *skel, int frame_sz) +{ + LIBBPF_OPTS(bpf_test_run_opts, topts); + struct xdp_md ctx = {}; + int prog_fd, err; + __u8 *buf; + + buf = calloc(frame_sz, sizeof(__u8)); + if (!ASSERT_OK_PTR(buf, "calloc buf")) + return -ENOMEM; + + topts.data_in = buf; + topts.data_out = buf; + topts.data_size_in = frame_sz; + topts.data_size_out = frame_sz; + /* Pass a data_end larger than the linear space available to make sure + * bpf_prog_test_run_xdp() will fill the linear data area so that + * xdp_find_sizes can infer the size of struct skb_shared_info + */ + ctx.data_end = frame_sz; + topts.ctx_in = &ctx; + topts.ctx_out = &ctx; + topts.ctx_size_in = sizeof(ctx); + topts.ctx_size_out = sizeof(ctx); + + prog_fd = bpf_program__fd(skel->progs.xdp_find_sizes); + err = bpf_prog_test_run_opts(prog_fd, &topts); + ASSERT_OK(err, "bpf_prog_test_run_opts"); + + free(buf); + + return err; +} + +/* xdp_pull_data_prog will directly read a marker 0xbb stored at buf[1024] + * so caller expecting XDP_PASS should always pass pull_len no less than 1024 + */ +static void run_test(struct test_xdp_pull_data *skel, int retval, + int frame_sz, int buff_len, int meta_len, int data_len, + int pull_len) +{ + LIBBPF_OPTS(bpf_test_run_opts, topts); + struct xdp_md ctx = {}; + int prog_fd, err; + __u8 *buf; + + buf = calloc(buff_len, sizeof(__u8)); + if (!ASSERT_OK_PTR(buf, "calloc buf")) + return; + + buf[meta_len + 1023] = 0xaa; + buf[meta_len + 1024] = 0xbb; + buf[meta_len + 1025] = 0xcc; + + topts.data_in = buf; + topts.data_out = buf; + topts.data_size_in = buff_len; + topts.data_size_out = buff_len; + ctx.data = meta_len; + ctx.data_end = meta_len + data_len; + topts.ctx_in = &ctx; + topts.ctx_out = &ctx; + topts.ctx_size_in = sizeof(ctx); + topts.ctx_size_out = sizeof(ctx); + + skel->bss->data_len = data_len; + if (pull_len & PULL_MAX) { + int headroom = XDP_PACKET_HEADROOM - meta_len - skel->bss->xdpf_sz; + int tailroom = frame_sz - XDP_PACKET_HEADROOM - + data_len - skel->bss->sinfo_sz; + + pull_len = pull_len & PULL_PLUS_ONE ? 1 : 0; + pull_len += headroom + tailroom + data_len; + } + skel->bss->pull_len = pull_len; + + prog_fd = bpf_program__fd(skel->progs.xdp_pull_data_prog); + err = bpf_prog_test_run_opts(prog_fd, &topts); + ASSERT_OK(err, "bpf_prog_test_run_opts"); + ASSERT_EQ(topts.retval, retval, "xdp_pull_data_prog retval"); + + if (retval == XDP_DROP) + goto out; + + ASSERT_EQ(ctx.data_end, meta_len + pull_len, "linear data size"); + ASSERT_EQ(topts.data_size_out, buff_len, "linear + non-linear data size"); + /* Make sure data around xdp->data_end was not messed up by + * bpf_xdp_pull_data() + */ + ASSERT_EQ(buf[meta_len + 1023], 0xaa, "data[1023]"); + ASSERT_EQ(buf[meta_len + 1024], 0xbb, "data[1024]"); + ASSERT_EQ(buf[meta_len + 1025], 0xcc, "data[1025]"); +out: + free(buf); +} + +static void test_xdp_pull_data_basic(void) +{ + u32 pg_sz, max_meta_len, max_data_len; + struct test_xdp_pull_data *skel; + + skel = test_xdp_pull_data__open_and_load(); + if (!ASSERT_OK_PTR(skel, "test_xdp_pull_data__open_and_load")) + return; + + pg_sz = sysconf(_SC_PAGE_SIZE); + + if (find_xdp_sizes(skel, pg_sz)) + goto out; + + max_meta_len = XDP_PACKET_HEADROOM - skel->bss->xdpf_sz; + max_data_len = pg_sz - XDP_PACKET_HEADROOM - skel->bss->sinfo_sz; + + /* linear xdp pkt, pull 0 byte */ + run_test(skel, XDP_PASS, pg_sz, 2048, 0, 2048, 2048); + + /* multi-buf pkt, pull results in linear xdp pkt */ + run_test(skel, XDP_PASS, pg_sz, 2048, 0, 1024, 2048); + + /* multi-buf pkt, pull 1 byte to linear data area */ + run_test(skel, XDP_PASS, pg_sz, 9000, 0, 1024, 1025); + + /* multi-buf pkt, pull 0 byte to linear data area */ + run_test(skel, XDP_PASS, pg_sz, 9000, 0, 1025, 1025); + + /* multi-buf pkt, empty linear data area, pull requires memmove */ + run_test(skel, XDP_PASS, pg_sz, 9000, 0, 0, PULL_MAX); + + /* multi-buf pkt, no headroom */ + run_test(skel, XDP_PASS, pg_sz, 9000, max_meta_len, 1024, PULL_MAX); + + /* multi-buf pkt, no tailroom, pull requires memmove */ + run_test(skel, XDP_PASS, pg_sz, 9000, 0, max_data_len, PULL_MAX); + + /* Test cases with invalid pull length */ + + /* linear xdp pkt, pull more than total data len */ + run_test(skel, XDP_DROP, pg_sz, 2048, 0, 2048, 2049); + + /* multi-buf pkt with no space left in linear data area */ + run_test(skel, XDP_DROP, pg_sz, 9000, max_meta_len, max_data_len, + PULL_MAX | PULL_PLUS_ONE); + + /* multi-buf pkt, empty linear data area */ + run_test(skel, XDP_DROP, pg_sz, 9000, 0, 0, PULL_MAX | PULL_PLUS_ONE); + + /* multi-buf pkt, no headroom */ + run_test(skel, XDP_DROP, pg_sz, 9000, max_meta_len, 1024, + PULL_MAX | PULL_PLUS_ONE); + + /* multi-buf pkt, no tailroom */ + run_test(skel, XDP_DROP, pg_sz, 9000, 0, max_data_len, + PULL_MAX | PULL_PLUS_ONE); + +out: + test_xdp_pull_data__destroy(skel); +} + +void test_xdp_pull_data(void) +{ + if (test__start_subtest("xdp_pull_data")) + test_xdp_pull_data_basic(); +} diff --git a/tools/testing/selftests/bpf/progs/arena_atomics.c b/tools/testing/selftests/bpf/progs/arena_atomics.c index a52feff98112..d1841aac94a2 100644 --- a/tools/testing/selftests/bpf/progs/arena_atomics.c +++ b/tools/testing/selftests/bpf/progs/arena_atomics.c @@ -28,7 +28,8 @@ bool skip_all_tests = true; #if defined(ENABLE_ATOMICS_TESTS) && \ defined(__BPF_FEATURE_ADDR_SPACE_CAST) && \ - (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86)) + (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86) || \ + (defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64)) bool skip_lacq_srel_tests __attribute((__section__(".data"))) = false; #else bool skip_lacq_srel_tests = true; @@ -314,7 +315,8 @@ int load_acquire(const void *ctx) { #if defined(ENABLE_ATOMICS_TESTS) && \ defined(__BPF_FEATURE_ADDR_SPACE_CAST) && \ - (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86)) + (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86) || \ + (defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64)) #define LOAD_ACQUIRE_ARENA(SIZEOP, SIZE, SRC, DST) \ { asm volatile ( \ @@ -365,7 +367,8 @@ int store_release(const void *ctx) { #if defined(ENABLE_ATOMICS_TESTS) && \ defined(__BPF_FEATURE_ADDR_SPACE_CAST) && \ - (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86)) + (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86) || \ + (defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64)) #define STORE_RELEASE_ARENA(SIZEOP, DST, VAL) \ { asm volatile ( \ diff --git a/tools/testing/selftests/bpf/progs/arena_spin_lock.c b/tools/testing/selftests/bpf/progs/arena_spin_lock.c index c4500c37f85e..086b57a426cf 100644 --- a/tools/testing/selftests/bpf/progs/arena_spin_lock.c +++ b/tools/testing/selftests/bpf/progs/arena_spin_lock.c @@ -37,8 +37,11 @@ int prog(void *ctx) #if defined(ENABLE_ATOMICS_TESTS) && defined(__BPF_FEATURE_ADDR_SPACE_CAST) unsigned long flags; - if ((ret = arena_spin_lock_irqsave(&lock, flags))) + if ((ret = arena_spin_lock_irqsave(&lock, flags))) { + if (ret == -EOPNOTSUPP) + test_skip = 3; return ret; + } if (counter != limit) counter++; bpf_repeat(cs_count); diff --git a/tools/testing/selftests/bpf/progs/bpf_arena_spin_lock.h b/tools/testing/selftests/bpf/progs/bpf_arena_spin_lock.h index d67466c1ff77..f90531cf3ee5 100644 --- a/tools/testing/selftests/bpf/progs/bpf_arena_spin_lock.h +++ b/tools/testing/selftests/bpf/progs/bpf_arena_spin_lock.h @@ -302,7 +302,7 @@ int arena_spin_lock_slowpath(arena_spinlock_t __arena __arg_arena *lock, u32 val * barriers. */ if (val & _Q_LOCKED_MASK) - smp_cond_load_acquire_label(&lock->locked, !VAL, release_err); + (void)smp_cond_load_acquire_label(&lock->locked, !VAL, release_err); /* * take ownership and clear the pending bit. @@ -380,7 +380,7 @@ queue: /* Link @node into the waitqueue. */ WRITE_ONCE(prev->next, node); - arch_mcs_spin_lock_contended_label(&node->locked, release_node_err); + (void)arch_mcs_spin_lock_contended_label(&node->locked, release_node_err); /* * While waiting for the MCS lock, the next pointer may have diff --git a/tools/testing/selftests/bpf/progs/bpf_cc_cubic.c b/tools/testing/selftests/bpf/progs/bpf_cc_cubic.c index 1654a530aa3d..4e51785e7606 100644 --- a/tools/testing/selftests/bpf/progs/bpf_cc_cubic.c +++ b/tools/testing/selftests/bpf/progs/bpf_cc_cubic.c @@ -101,7 +101,7 @@ static void tcp_cwnd_reduction(struct sock *sk, int newly_acked_sacked, tp->snd_cwnd = pkts_in_flight + sndcnt; } -/* Decide wheather to run the increase function of congestion control. */ +/* Decide whether to run the increase function of congestion control. */ static bool tcp_may_raise_cwnd(const struct sock *sk, const int flag) { if (tcp_sk(sk)->reordering > TCP_REORDERING) diff --git a/tools/testing/selftests/bpf/progs/bpf_dctcp.c b/tools/testing/selftests/bpf/progs/bpf_dctcp.c index 7cd73e75f52a..32c511bcd60b 100644 --- a/tools/testing/selftests/bpf/progs/bpf_dctcp.c +++ b/tools/testing/selftests/bpf/progs/bpf_dctcp.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2019 Facebook */ -/* WARNING: This implemenation is not necessarily the same +/* WARNING: This implementation is not necessarily the same * as the tcp_dctcp.c. The purpose is mainly for testing * the kernel BPF logic. */ diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_udp4.c b/tools/testing/selftests/bpf/progs/bpf_iter_udp4.c index ffbd4b116d17..23b2aa2604de 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_udp4.c +++ b/tools/testing/selftests/bpf/progs/bpf_iter_udp4.c @@ -64,7 +64,8 @@ int dump_udp4(struct bpf_iter__udp *ctx) 0, 0L, 0, ctx->uid, 0, sock_i_ino(&inet->sk), inet->sk.sk_refcnt.refs.counter, udp_sk, - inet->sk.sk_drops.counter); + udp_sk->drop_counters.drops0.counter + + udp_sk->drop_counters.drops1.counter); return 0; } diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_udp6.c b/tools/testing/selftests/bpf/progs/bpf_iter_udp6.c index 47ff7754f4fd..c48b05aa2a4b 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_udp6.c +++ b/tools/testing/selftests/bpf/progs/bpf_iter_udp6.c @@ -72,7 +72,7 @@ int dump_udp6(struct bpf_iter__udp *ctx) 0, 0L, 0, ctx->uid, 0, sock_i_ino(&inet->sk), inet->sk.sk_refcnt.refs.counter, udp_sk, - inet->sk.sk_drops.counter); - + udp_sk->drop_counters.drops0.counter + + udp_sk->drop_counters.drops1.counter); return 0; } diff --git a/tools/testing/selftests/bpf/progs/bpf_misc.h b/tools/testing/selftests/bpf/progs/bpf_misc.h index c1cfd297aabf..a7a1a684eed1 100644 --- a/tools/testing/selftests/bpf/progs/bpf_misc.h +++ b/tools/testing/selftests/bpf/progs/bpf_misc.h @@ -33,7 +33,20 @@ * e.g. "foo{{[0-9]+}}" matches strings like "foo007". * Extended POSIX regular expression syntax is allowed * inside the brackets. + * __not_msg Message not expected to be found in verifier log. + * If __msg_not is situated between __msg tags + * framework matches __msg tags first, and then + * checks that __msg_not is not present in a portion of + * a log between bracketing __msg tags. + * Same regex syntax as for __msg is supported. * __msg_unpriv Same as __msg but for unprivileged mode. + * __not_msg_unpriv Same as __not_msg but for unprivileged mode. + * + * __stderr Message expected to be found in bpf stderr stream. The + * same regex rules apply like __msg. + * __stderr_unpriv Same as __stderr but for unpriveleged mode. + * __stdout Same as __stderr but for stdout stream. + * __stdout_unpriv Same as __stdout but for unpriveleged mode. * * __xlated Expect a line in a disassembly log after verifier applies rewrites. * Multiple __xlated attributes could be specified. @@ -115,12 +128,14 @@ * __caps_unpriv Specify the capabilities that should be set when running the test. */ #define __msg(msg) __attribute__((btf_decl_tag("comment:test_expect_msg=" XSTR(__COUNTER__) "=" msg))) +#define __not_msg(msg) __attribute__((btf_decl_tag("comment:test_expect_not_msg=" XSTR(__COUNTER__) "=" msg))) #define __xlated(msg) __attribute__((btf_decl_tag("comment:test_expect_xlated=" XSTR(__COUNTER__) "=" msg))) #define __jited(msg) __attribute__((btf_decl_tag("comment:test_jited=" XSTR(__COUNTER__) "=" msg))) #define __failure __attribute__((btf_decl_tag("comment:test_expect_failure"))) #define __success __attribute__((btf_decl_tag("comment:test_expect_success"))) #define __description(desc) __attribute__((btf_decl_tag("comment:test_description=" desc))) #define __msg_unpriv(msg) __attribute__((btf_decl_tag("comment:test_expect_msg_unpriv=" XSTR(__COUNTER__) "=" msg))) +#define __not_msg_unpriv(msg) __attribute__((btf_decl_tag("comment:test_expect_not_msg_unpriv=" XSTR(__COUNTER__) "=" msg))) #define __xlated_unpriv(msg) __attribute__((btf_decl_tag("comment:test_expect_xlated_unpriv=" XSTR(__COUNTER__) "=" msg))) #define __jited_unpriv(msg) __attribute__((btf_decl_tag("comment:test_jited=" XSTR(__COUNTER__) "=" msg))) #define __failure_unpriv __attribute__((btf_decl_tag("comment:test_expect_failure_unpriv"))) @@ -136,9 +151,14 @@ #define __arch_x86_64 __arch("X86_64") #define __arch_arm64 __arch("ARM64") #define __arch_riscv64 __arch("RISCV64") +#define __arch_s390x __arch("s390x") #define __caps_unpriv(caps) __attribute__((btf_decl_tag("comment:test_caps_unpriv=" EXPAND_QUOTE(caps)))) #define __load_if_JITed() __attribute__((btf_decl_tag("comment:load_mode=jited"))) #define __load_if_no_JITed() __attribute__((btf_decl_tag("comment:load_mode=no_jited"))) +#define __stderr(msg) __attribute__((btf_decl_tag("comment:test_expect_stderr=" XSTR(__COUNTER__) "=" msg))) +#define __stderr_unpriv(msg) __attribute__((btf_decl_tag("comment:test_expect_stderr_unpriv=" XSTR(__COUNTER__) "=" msg))) +#define __stdout(msg) __attribute__((btf_decl_tag("comment:test_expect_stdout=" XSTR(__COUNTER__) "=" msg))) +#define __stdout_unpriv(msg) __attribute__((btf_decl_tag("comment:test_expect_stdout_unpriv=" XSTR(__COUNTER__) "=" msg))) /* Define common capabilities tested using __caps_unpriv */ #define CAP_NET_ADMIN 12 @@ -156,6 +176,10 @@ #define __imm_ptr(name) [name]"r"(&name) #define __imm_insn(name, expr) [name]"i"(*(long *)&(expr)) +#define sizeof_field(TYPE, MEMBER) sizeof((((TYPE *)0)->MEMBER)) +#define offsetofend(TYPE, MEMBER) \ + (offsetof(TYPE, MEMBER) + sizeof_field(TYPE, MEMBER)) + /* Magic constants used with __retval() */ #define POINTER_VALUE 0xbadcafe #define TEST_DATA_LEN 64 diff --git a/tools/testing/selftests/bpf/progs/bpf_test_utils.h b/tools/testing/selftests/bpf/progs/bpf_test_utils.h new file mode 100644 index 000000000000..f4e67b492dd2 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/bpf_test_utils.h @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __BPF_TEST_UTILS_H__ +#define __BPF_TEST_UTILS_H__ + +#include <bpf/bpf_helpers.h> +#include "bpf_misc.h" + +/* Clobber as many native registers and stack slots as possible. */ +static __always_inline void clobber_regs_stack(void) +{ + char tmp_str[] = "123456789"; + unsigned long tmp; + + bpf_strtoul(tmp_str, sizeof(tmp_str), 0, &tmp); + __sink(tmp); +} + +#endif diff --git a/tools/testing/selftests/bpf/progs/cgroup_read_xattr.c b/tools/testing/selftests/bpf/progs/cgroup_read_xattr.c index 092db1d0435e..88e13e17ec9e 100644 --- a/tools/testing/selftests/bpf/progs/cgroup_read_xattr.c +++ b/tools/testing/selftests/bpf/progs/cgroup_read_xattr.c @@ -73,7 +73,7 @@ int BPF_PROG(use_css_iter_non_sleepable) } SEC("lsm.s/socket_connect") -__failure __msg("expected an RCU CS") +__failure __msg("kernel func bpf_iter_css_new requires RCU critical section protection") int BPF_PROG(use_css_iter_sleepable_missing_rcu_lock) { u64 cgrp_id = bpf_get_current_cgroup_id(); diff --git a/tools/testing/selftests/bpf/progs/cgrp_kfunc_success.c b/tools/testing/selftests/bpf/progs/cgrp_kfunc_success.c index 5354455a01be..02d8f160ca0e 100644 --- a/tools/testing/selftests/bpf/progs/cgrp_kfunc_success.c +++ b/tools/testing/selftests/bpf/progs/cgrp_kfunc_success.c @@ -221,3 +221,15 @@ int BPF_PROG(test_cgrp_from_id, struct cgroup *cgrp, const char *path) return 0; } + +SEC("syscall") +int test_cgrp_from_id_ns(void *ctx) +{ + struct cgroup *cg; + + cg = bpf_cgroup_from_id(1); + if (!cg) + return 42; + bpf_cgroup_release(cg); + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/crypto_sanity.c b/tools/testing/selftests/bpf/progs/crypto_sanity.c index 645be6cddf36..dfd8a258f14a 100644 --- a/tools/testing/selftests/bpf/progs/crypto_sanity.c +++ b/tools/testing/selftests/bpf/progs/crypto_sanity.c @@ -14,7 +14,7 @@ unsigned char key[256] = {}; u16 udp_test_port = 7777; u32 authsize, key_len; char algo[128] = {}; -char dst[16] = {}; +char dst[16] = {}, dst_bad[8] = {}; int status; static int skb_dynptr_validate(struct __sk_buff *skb, struct bpf_dynptr *psrc) @@ -59,10 +59,9 @@ int skb_crypto_setup(void *ctx) .authsize = authsize, }; struct bpf_crypto_ctx *cctx; - int err = 0; + int err; status = 0; - if (key_len > 256) { status = -EINVAL; return 0; @@ -70,8 +69,8 @@ int skb_crypto_setup(void *ctx) __builtin_memcpy(¶ms.algo, algo, sizeof(algo)); __builtin_memcpy(¶ms.key, key, sizeof(key)); - cctx = bpf_crypto_ctx_create(¶ms, sizeof(params), &err); + cctx = bpf_crypto_ctx_create(¶ms, sizeof(params), &err); if (!cctx) { status = err; return 0; @@ -80,7 +79,6 @@ int skb_crypto_setup(void *ctx) err = crypto_ctx_insert(cctx); if (err && err != -EEXIST) status = err; - return 0; } @@ -92,6 +90,7 @@ int decrypt_sanity(struct __sk_buff *skb) struct bpf_dynptr psrc, pdst; int err; + status = 0; err = skb_dynptr_validate(skb, &psrc); if (err < 0) { status = err; @@ -110,13 +109,23 @@ int decrypt_sanity(struct __sk_buff *skb) return TC_ACT_SHOT; } - /* dst is a global variable to make testing part easier to check. In real - * production code, a percpu map should be used to store the result. + /* Check also bad case where the dst buffer is smaller than the + * skb's linear section. + */ + bpf_dynptr_from_mem(dst_bad, sizeof(dst_bad), 0, &pdst); + status = bpf_crypto_decrypt(ctx, &psrc, &pdst, NULL); + if (!status) + status = -EIO; + if (status != -EINVAL) + goto err; + + /* dst is a global variable to make testing part easier to check. + * In real production code, a percpu map should be used to store + * the result. */ bpf_dynptr_from_mem(dst, sizeof(dst), 0, &pdst); - status = bpf_crypto_decrypt(ctx, &psrc, &pdst, NULL); - +err: return TC_ACT_SHOT; } @@ -129,7 +138,6 @@ int encrypt_sanity(struct __sk_buff *skb) int err; status = 0; - err = skb_dynptr_validate(skb, &psrc); if (err < 0) { status = err; @@ -148,13 +156,23 @@ int encrypt_sanity(struct __sk_buff *skb) return TC_ACT_SHOT; } - /* dst is a global variable to make testing part easier to check. In real - * production code, a percpu map should be used to store the result. + /* Check also bad case where the dst buffer is smaller than the + * skb's linear section. + */ + bpf_dynptr_from_mem(dst_bad, sizeof(dst_bad), 0, &pdst); + status = bpf_crypto_encrypt(ctx, &psrc, &pdst, NULL); + if (!status) + status = -EIO; + if (status != -EINVAL) + goto err; + + /* dst is a global variable to make testing part easier to check. + * In real production code, a percpu map should be used to store + * the result. */ bpf_dynptr_from_mem(dst, sizeof(dst), 0, &pdst); - status = bpf_crypto_encrypt(ctx, &psrc, &pdst, NULL); - +err: return TC_ACT_SHOT; } diff --git a/tools/testing/selftests/bpf/progs/dynptr_fail.c b/tools/testing/selftests/bpf/progs/dynptr_fail.c index bd8f15229f5c..dda6a8dada82 100644 --- a/tools/testing/selftests/bpf/progs/dynptr_fail.c +++ b/tools/testing/selftests/bpf/progs/dynptr_fail.c @@ -269,6 +269,26 @@ int data_slice_out_of_bounds_skb(struct __sk_buff *skb) return SK_PASS; } +/* A metadata slice can't be accessed out of bounds */ +SEC("?tc") +__failure __msg("value is outside of the allowed memory range") +int data_slice_out_of_bounds_skb_meta(struct __sk_buff *skb) +{ + struct bpf_dynptr meta; + __u8 *md; + + bpf_dynptr_from_skb_meta(skb, 0, &meta); + + md = bpf_dynptr_slice_rdwr(&meta, 0, NULL, sizeof(*md)); + if (!md) + return SK_DROP; + + /* this should fail */ + *(md + 1) = 42; + + return SK_PASS; +} + SEC("?raw_tp") __failure __msg("value is outside of the allowed memory range") int data_slice_out_of_bounds_map_value(void *ctx) @@ -1089,6 +1109,26 @@ int skb_invalid_slice_write(struct __sk_buff *skb) return SK_PASS; } +/* bpf_dynptr_slice()s are read-only and cannot be written to */ +SEC("?tc") +__failure __msg("R{{[0-9]+}} cannot write into rdonly_mem") +int skb_meta_invalid_slice_write(struct __sk_buff *skb) +{ + struct bpf_dynptr meta; + __u8 *md; + + bpf_dynptr_from_skb_meta(skb, 0, &meta); + + md = bpf_dynptr_slice(&meta, 0, NULL, sizeof(*md)); + if (!md) + return SK_DROP; + + /* this should fail */ + *md = 42; + + return SK_PASS; +} + /* The read-only data slice is invalidated whenever a helper changes packet data */ SEC("?tc") __failure __msg("invalid mem access 'scalar'") @@ -1192,6 +1232,188 @@ int skb_invalid_data_slice4(struct __sk_buff *skb) return SK_PASS; } +/* Read-only skb data slice is invalidated on write to skb metadata */ +SEC("?tc") +__failure __msg("invalid mem access 'scalar'") +int ro_skb_slice_invalid_after_metadata_write(struct __sk_buff *skb) +{ + struct bpf_dynptr data, meta; + __u8 *d; + + bpf_dynptr_from_skb(skb, 0, &data); + bpf_dynptr_from_skb_meta(skb, 0, &meta); + + d = bpf_dynptr_slice(&data, 0, NULL, sizeof(*d)); + if (!d) + return SK_DROP; + + bpf_dynptr_write(&meta, 0, "x", 1, 0); + + /* this should fail */ + val = *d; + + return SK_PASS; +} + +/* Read-write skb data slice is invalidated on write to skb metadata */ +SEC("?tc") +__failure __msg("invalid mem access 'scalar'") +int rw_skb_slice_invalid_after_metadata_write(struct __sk_buff *skb) +{ + struct bpf_dynptr data, meta; + __u8 *d; + + bpf_dynptr_from_skb(skb, 0, &data); + bpf_dynptr_from_skb_meta(skb, 0, &meta); + + d = bpf_dynptr_slice_rdwr(&data, 0, NULL, sizeof(*d)); + if (!d) + return SK_DROP; + + bpf_dynptr_write(&meta, 0, "x", 1, 0); + + /* this should fail */ + *d = 42; + + return SK_PASS; +} + +/* Read-only skb metadata slice is invalidated on write to skb data */ +SEC("?tc") +__failure __msg("invalid mem access 'scalar'") +int ro_skb_meta_slice_invalid_after_payload_write(struct __sk_buff *skb) +{ + struct bpf_dynptr data, meta; + __u8 *md; + + bpf_dynptr_from_skb(skb, 0, &data); + bpf_dynptr_from_skb_meta(skb, 0, &meta); + + md = bpf_dynptr_slice(&meta, 0, NULL, sizeof(*md)); + if (!md) + return SK_DROP; + + bpf_dynptr_write(&data, 0, "x", 1, 0); + + /* this should fail */ + val = *md; + + return SK_PASS; +} + +/* Read-write skb metadata slice is invalidated on write to skb data slice */ +SEC("?tc") +__failure __msg("invalid mem access 'scalar'") +int rw_skb_meta_slice_invalid_after_payload_write(struct __sk_buff *skb) +{ + struct bpf_dynptr data, meta; + __u8 *md; + + bpf_dynptr_from_skb(skb, 0, &data); + bpf_dynptr_from_skb_meta(skb, 0, &meta); + + md = bpf_dynptr_slice_rdwr(&meta, 0, NULL, sizeof(*md)); + if (!md) + return SK_DROP; + + bpf_dynptr_write(&data, 0, "x", 1, 0); + + /* this should fail */ + *md = 42; + + return SK_PASS; +} + +/* Read-only skb metadata slice is invalidated whenever a helper changes packet data */ +SEC("?tc") +__failure __msg("invalid mem access 'scalar'") +int ro_skb_meta_slice_invalid_after_payload_helper(struct __sk_buff *skb) +{ + struct bpf_dynptr meta; + __u8 *md; + + bpf_dynptr_from_skb_meta(skb, 0, &meta); + + md = bpf_dynptr_slice(&meta, 0, NULL, sizeof(*md)); + if (!md) + return SK_DROP; + + if (bpf_skb_pull_data(skb, skb->len)) + return SK_DROP; + + /* this should fail */ + val = *md; + + return SK_PASS; +} + +/* Read-write skb metadata slice is invalidated whenever a helper changes packet data */ +SEC("?tc") +__failure __msg("invalid mem access 'scalar'") +int rw_skb_meta_slice_invalid_after_payload_helper(struct __sk_buff *skb) +{ + struct bpf_dynptr meta; + __u8 *md; + + bpf_dynptr_from_skb_meta(skb, 0, &meta); + + md = bpf_dynptr_slice_rdwr(&meta, 0, NULL, sizeof(*md)); + if (!md) + return SK_DROP; + + if (bpf_skb_pull_data(skb, skb->len)) + return SK_DROP; + + /* this should fail */ + *md = 42; + + return SK_PASS; +} + +/* Read-only skb metadata slice is invalidated on write to skb metadata */ +SEC("?tc") +__failure __msg("invalid mem access 'scalar'") +int ro_skb_meta_slice_invalid_after_metadata_write(struct __sk_buff *skb) +{ + struct bpf_dynptr meta; + __u8 *md; + + bpf_dynptr_from_skb_meta(skb, 0, &meta); + + md = bpf_dynptr_slice(&meta, 0, NULL, sizeof(*md)); + if (!md) + return SK_DROP; + + bpf_dynptr_write(&meta, 0, "x", 1, 0); + + /* this should fail */ + val = *md; + + return SK_PASS; +} + +/* Read-write skb metadata slice is invalidated on write to skb metadata */ +SEC("?tc") +__failure __msg("invalid mem access 'scalar'") +int rw_skb_meta_slice_invalid_after_metadata_write(struct __sk_buff *skb) +{ + struct bpf_dynptr meta; + __u8 *md; + + bpf_dynptr_from_skb_meta(skb, 0, &meta); + + md = bpf_dynptr_slice_rdwr(&meta, 0, NULL, sizeof(*md)); + if (!md) + return SK_DROP; + + bpf_dynptr_write(&meta, 0, "x", 1, 0); + + /* this should fail */ + *md = 42; + + return SK_PASS; +} + /* The read-only data slice is invalidated whenever a helper changes packet data */ SEC("?xdp") __failure __msg("invalid mem access 'scalar'") @@ -1255,6 +1477,19 @@ int skb_invalid_ctx(void *ctx) return 0; } +/* Only supported prog type can create skb_meta-type dynptrs */ +SEC("?raw_tp") +__failure __msg("calling kernel function bpf_dynptr_from_skb_meta is not allowed") +int skb_meta_invalid_ctx(void *ctx) +{ + struct bpf_dynptr meta; + + /* this should fail */ + bpf_dynptr_from_skb_meta(ctx, 0, &meta); + + return 0; +} + SEC("fentry/skb_tx_error") __failure __msg("must be referenced or trusted") int BPF_PROG(skb_invalid_ctx_fentry, void *skb) @@ -1665,6 +1900,29 @@ int clone_skb_packet_data(struct __sk_buff *skb) return 0; } +/* A skb clone's metadata slice becomes invalid anytime packet data changes */ +SEC("?tc") +__failure __msg("invalid mem access 'scalar'") +int clone_skb_packet_meta(struct __sk_buff *skb) +{ + struct bpf_dynptr clone, meta; + __u8 *md; + + bpf_dynptr_from_skb_meta(skb, 0, &meta); + bpf_dynptr_clone(&meta, &clone); + md = bpf_dynptr_slice_rdwr(&clone, 0, NULL, sizeof(*md)); + if (!md) + return SK_DROP; + + if (bpf_skb_pull_data(skb, skb->len)) + return SK_DROP; + + /* this should fail */ + *md = 42; + + return 0; +} + /* A xdp clone's data slices should be invalid anytime packet data changes */ SEC("?xdp") __failure __msg("invalid mem access 'scalar'") diff --git a/tools/testing/selftests/bpf/progs/dynptr_success.c b/tools/testing/selftests/bpf/progs/dynptr_success.c index 8315273cb900..127dea342e5a 100644 --- a/tools/testing/selftests/bpf/progs/dynptr_success.c +++ b/tools/testing/selftests/bpf/progs/dynptr_success.c @@ -211,6 +211,61 @@ int test_dynptr_skb_data(struct __sk_buff *skb) return 1; } +SEC("?tc") +int test_dynptr_skb_meta_data(struct __sk_buff *skb) +{ + struct bpf_dynptr meta; + __u8 *md; + int ret; + + err = 1; + ret = bpf_dynptr_from_skb_meta(skb, 0, &meta); + if (ret) + return 1; + + /* This should return NULL. Must use bpf_dynptr_slice API */ + err = 2; + md = bpf_dynptr_data(&meta, 0, sizeof(*md)); + if (md) + return 1; + + err = 0; + return 1; +} + +/* Check that skb metadata dynptr ops don't accept any flags. */ +SEC("?tc") +int test_dynptr_skb_meta_flags(struct __sk_buff *skb) +{ + const __u64 INVALID_FLAGS = ~0ULL; + struct bpf_dynptr meta; + __u8 buf; + int ret; + + err = 1; + ret = bpf_dynptr_from_skb_meta(skb, INVALID_FLAGS, &meta); + if (ret != -EINVAL) + return 1; + + err = 2; + ret = bpf_dynptr_from_skb_meta(skb, 0, &meta); + if (ret) + return 1; + + err = 3; + ret = bpf_dynptr_read(&buf, 0, &meta, 0, INVALID_FLAGS); + if (ret != -EINVAL) + return 1; + + err = 4; + ret = bpf_dynptr_write(&meta, 0, &buf, 0, INVALID_FLAGS); + if (ret != -EINVAL) + return 1; + + err = 0; + return 1; +} + SEC("tp/syscalls/sys_enter_nanosleep") int test_adjust(void *ctx) { diff --git a/tools/testing/selftests/bpf/progs/exceptions_assert.c b/tools/testing/selftests/bpf/progs/exceptions_assert.c index 5e0a1ca96d4e..a01c2736890f 100644 --- a/tools/testing/selftests/bpf/progs/exceptions_assert.c +++ b/tools/testing/selftests/bpf/progs/exceptions_assert.c @@ -18,43 +18,43 @@ return *(u64 *)num; \ } -__msg(": R0_w=0xffffffff80000000") +__msg(": R0=0xffffffff80000000") check_assert(s64, ==, eq_int_min, INT_MIN); -__msg(": R0_w=0x7fffffff") +__msg(": R0=0x7fffffff") check_assert(s64, ==, eq_int_max, INT_MAX); -__msg(": R0_w=0") +__msg(": R0=0") check_assert(s64, ==, eq_zero, 0); -__msg(": R0_w=0x8000000000000000 R1_w=0x8000000000000000") +__msg(": R0=0x8000000000000000 R1=0x8000000000000000") check_assert(s64, ==, eq_llong_min, LLONG_MIN); -__msg(": R0_w=0x7fffffffffffffff R1_w=0x7fffffffffffffff") +__msg(": R0=0x7fffffffffffffff R1=0x7fffffffffffffff") check_assert(s64, ==, eq_llong_max, LLONG_MAX); -__msg(": R0_w=scalar(id=1,smax=0x7ffffffe)") +__msg(": R0=scalar(id=1,smax=0x7ffffffe)") check_assert(s64, <, lt_pos, INT_MAX); -__msg(": R0_w=scalar(id=1,smax=-1,umin=0x8000000000000000,var_off=(0x8000000000000000; 0x7fffffffffffffff))") +__msg(": R0=scalar(id=1,smax=-1,umin=0x8000000000000000,var_off=(0x8000000000000000; 0x7fffffffffffffff))") check_assert(s64, <, lt_zero, 0); -__msg(": R0_w=scalar(id=1,smax=0xffffffff7fffffff") +__msg(": R0=scalar(id=1,smax=0xffffffff7fffffff") check_assert(s64, <, lt_neg, INT_MIN); -__msg(": R0_w=scalar(id=1,smax=0x7fffffff)") +__msg(": R0=scalar(id=1,smax=0x7fffffff)") check_assert(s64, <=, le_pos, INT_MAX); -__msg(": R0_w=scalar(id=1,smax=0)") +__msg(": R0=scalar(id=1,smax=0)") check_assert(s64, <=, le_zero, 0); -__msg(": R0_w=scalar(id=1,smax=0xffffffff80000000") +__msg(": R0=scalar(id=1,smax=0xffffffff80000000") check_assert(s64, <=, le_neg, INT_MIN); -__msg(": R0_w=scalar(id=1,smin=umin=0x80000000,umax=0x7fffffffffffffff,var_off=(0x0; 0x7fffffffffffffff))") +__msg(": R0=scalar(id=1,smin=umin=0x80000000,umax=0x7fffffffffffffff,var_off=(0x0; 0x7fffffffffffffff))") check_assert(s64, >, gt_pos, INT_MAX); -__msg(": R0_w=scalar(id=1,smin=umin=1,umax=0x7fffffffffffffff,var_off=(0x0; 0x7fffffffffffffff))") +__msg(": R0=scalar(id=1,smin=umin=1,umax=0x7fffffffffffffff,var_off=(0x0; 0x7fffffffffffffff))") check_assert(s64, >, gt_zero, 0); -__msg(": R0_w=scalar(id=1,smin=0xffffffff80000001") +__msg(": R0=scalar(id=1,smin=0xffffffff80000001") check_assert(s64, >, gt_neg, INT_MIN); -__msg(": R0_w=scalar(id=1,smin=umin=0x7fffffff,umax=0x7fffffffffffffff,var_off=(0x0; 0x7fffffffffffffff))") +__msg(": R0=scalar(id=1,smin=umin=0x7fffffff,umax=0x7fffffffffffffff,var_off=(0x0; 0x7fffffffffffffff))") check_assert(s64, >=, ge_pos, INT_MAX); -__msg(": R0_w=scalar(id=1,smin=0,umax=0x7fffffffffffffff,var_off=(0x0; 0x7fffffffffffffff))") +__msg(": R0=scalar(id=1,smin=0,umax=0x7fffffffffffffff,var_off=(0x0; 0x7fffffffffffffff))") check_assert(s64, >=, ge_zero, 0); -__msg(": R0_w=scalar(id=1,smin=0xffffffff80000000") +__msg(": R0=scalar(id=1,smin=0xffffffff80000000") check_assert(s64, >=, ge_neg, INT_MIN); SEC("?tc") diff --git a/tools/testing/selftests/bpf/progs/freplace_connect_v4_prog.c b/tools/testing/selftests/bpf/progs/freplace_connect_v4_prog.c index 544e5ac90461..d09bbd8ae8a8 100644 --- a/tools/testing/selftests/bpf/progs/freplace_connect_v4_prog.c +++ b/tools/testing/selftests/bpf/progs/freplace_connect_v4_prog.c @@ -12,7 +12,7 @@ SEC("freplace/connect_v4_prog") int new_connect_v4_prog(struct bpf_sock_addr *ctx) { - // return value thats in invalid range + // return value that's in invalid range return 255; } diff --git a/tools/testing/selftests/bpf/progs/iters_state_safety.c b/tools/testing/selftests/bpf/progs/iters_state_safety.c index f41257eadbb2..d273b46dfc7c 100644 --- a/tools/testing/selftests/bpf/progs/iters_state_safety.c +++ b/tools/testing/selftests/bpf/progs/iters_state_safety.c @@ -30,7 +30,7 @@ int force_clang_to_emit_btf_for_externs(void *ctx) SEC("?raw_tp") __success __log_level(2) -__msg("fp-8_w=iter_num(ref_id=1,state=active,depth=0)") +__msg("fp-8=iter_num(ref_id=1,state=active,depth=0)") int create_and_destroy(void *ctx) { struct bpf_iter_num iter; @@ -196,7 +196,7 @@ int leak_iter_from_subprog_fail(void *ctx) SEC("?raw_tp") __success __log_level(2) -__msg("fp-8_w=iter_num(ref_id=1,state=active,depth=0)") +__msg("fp-8=iter_num(ref_id=1,state=active,depth=0)") int valid_stack_reuse(void *ctx) { struct bpf_iter_num iter; @@ -345,7 +345,7 @@ int __naked read_from_iter_slot_fail(void) "r3 = 1000;" "call %[bpf_iter_num_new];" - /* attemp to leak bpf_iter_num state */ + /* attempt to leak bpf_iter_num state */ "r7 = *(u64 *)(r6 + 0);" "r8 = *(u64 *)(r6 + 8);" diff --git a/tools/testing/selftests/bpf/progs/iters_task_failure.c b/tools/testing/selftests/bpf/progs/iters_task_failure.c index 6b1588d70652..fe3663dedbe1 100644 --- a/tools/testing/selftests/bpf/progs/iters_task_failure.c +++ b/tools/testing/selftests/bpf/progs/iters_task_failure.c @@ -15,7 +15,7 @@ void bpf_rcu_read_lock(void) __ksym; void bpf_rcu_read_unlock(void) __ksym; SEC("?fentry.s/" SYS_PREFIX "sys_getpgid") -__failure __msg("expected an RCU CS when using bpf_iter_task_next") +__failure __msg("kernel func bpf_iter_task_new requires RCU critical section protection") int BPF_PROG(iter_tasks_without_lock) { struct task_struct *pos; @@ -27,7 +27,7 @@ int BPF_PROG(iter_tasks_without_lock) } SEC("?fentry.s/" SYS_PREFIX "sys_getpgid") -__failure __msg("expected an RCU CS when using bpf_iter_css_next") +__failure __msg("kernel func bpf_iter_css_new requires RCU critical section protection") int BPF_PROG(iter_css_without_lock) { u64 cg_id = bpf_get_current_cgroup_id(); diff --git a/tools/testing/selftests/bpf/progs/iters_testmod.c b/tools/testing/selftests/bpf/progs/iters_testmod.c index 9e4b45201e69..5379e9960ffd 100644 --- a/tools/testing/selftests/bpf/progs/iters_testmod.c +++ b/tools/testing/selftests/bpf/progs/iters_testmod.c @@ -123,3 +123,49 @@ out: bpf_iter_num_destroy(&num_it); return 0; } + +SEC("?fentry.s/" SYS_PREFIX "sys_getpgid") +__failure __msg("kernel func bpf_kfunc_ret_rcu_test requires RCU critical section protection") +int iter_ret_rcu_test_protected(const void *ctx) +{ + struct task_struct *p; + + p = bpf_kfunc_ret_rcu_test(); + return p->pid; +} + +SEC("?fentry.s/" SYS_PREFIX "sys_getpgid") +__failure __msg("R1 type=rcu_ptr_or_null_ expected=") +int iter_ret_rcu_test_type(const void *ctx) +{ + struct task_struct *p; + + bpf_rcu_read_lock(); + p = bpf_kfunc_ret_rcu_test(); + bpf_this_cpu_ptr(p); + bpf_rcu_read_unlock(); + return 0; +} + +SEC("?fentry.s/" SYS_PREFIX "sys_getpgid") +__failure __msg("kernel func bpf_kfunc_ret_rcu_test_nostruct requires RCU critical section protection") +int iter_ret_rcu_test_protected_nostruct(const void *ctx) +{ + void *p; + + p = bpf_kfunc_ret_rcu_test_nostruct(4); + return *(int *)p; +} + +SEC("?fentry.s/" SYS_PREFIX "sys_getpgid") +__failure __msg("R1 type=rdonly_rcu_mem_or_null expected=") +int iter_ret_rcu_test_type_nostruct(const void *ctx) +{ + void *p; + + bpf_rcu_read_lock(); + p = bpf_kfunc_ret_rcu_test_nostruct(4); + bpf_this_cpu_ptr(p); + bpf_rcu_read_unlock(); + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/iters_testmod_seq.c b/tools/testing/selftests/bpf/progs/iters_testmod_seq.c index 6543d5b6e0a9..83791348bed5 100644 --- a/tools/testing/selftests/bpf/progs/iters_testmod_seq.c +++ b/tools/testing/selftests/bpf/progs/iters_testmod_seq.c @@ -20,7 +20,7 @@ __s64 res_empty; SEC("raw_tp/sys_enter") __success __log_level(2) -__msg("fp-16_w=iter_testmod_seq(ref_id=1,state=active,depth=0)") +__msg("fp-16=iter_testmod_seq(ref_id=1,state=active,depth=0)") __msg("fp-16=iter_testmod_seq(ref_id=1,state=drained,depth=0)") __msg("call bpf_iter_testmod_seq_destroy") int testmod_seq_empty(const void *ctx) @@ -38,7 +38,7 @@ __s64 res_full; SEC("raw_tp/sys_enter") __success __log_level(2) -__msg("fp-16_w=iter_testmod_seq(ref_id=1,state=active,depth=0)") +__msg("fp-16=iter_testmod_seq(ref_id=1,state=active,depth=0)") __msg("fp-16=iter_testmod_seq(ref_id=1,state=drained,depth=0)") __msg("call bpf_iter_testmod_seq_destroy") int testmod_seq_full(const void *ctx) @@ -58,7 +58,7 @@ static volatile int zero = 0; SEC("raw_tp/sys_enter") __success __log_level(2) -__msg("fp-16_w=iter_testmod_seq(ref_id=1,state=active,depth=0)") +__msg("fp-16=iter_testmod_seq(ref_id=1,state=active,depth=0)") __msg("fp-16=iter_testmod_seq(ref_id=1,state=drained,depth=0)") __msg("call bpf_iter_testmod_seq_destroy") int testmod_seq_truncated(const void *ctx) diff --git a/tools/testing/selftests/bpf/progs/kprobe_write_ctx.c b/tools/testing/selftests/bpf/progs/kprobe_write_ctx.c new file mode 100644 index 000000000000..f77aef0474d3 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/kprobe_write_ctx.c @@ -0,0 +1,22 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> + +char _license[] SEC("license") = "GPL"; + +#if defined(__TARGET_ARCH_x86) +SEC("kprobe") +int kprobe_write_ctx(struct pt_regs *ctx) +{ + ctx->ax = 0; + return 0; +} + +SEC("kprobe.multi") +int kprobe_multi_write_ctx(struct pt_regs *ctx) +{ + ctx->ax = 0; + return 0; +} +#endif diff --git a/tools/testing/selftests/bpf/progs/linked_list_fail.c b/tools/testing/selftests/bpf/progs/linked_list_fail.c index 6438982b928b..ddd26d1a083f 100644 --- a/tools/testing/selftests/bpf/progs/linked_list_fail.c +++ b/tools/testing/selftests/bpf/progs/linked_list_fail.c @@ -226,8 +226,7 @@ int obj_new_no_composite(void *ctx) SEC("?tc") int obj_new_no_struct(void *ctx) { - - bpf_obj_new(union { int data; unsigned udata; }); + (void)bpf_obj_new(union { int data; unsigned udata; }); return 0; } @@ -252,7 +251,7 @@ int new_null_ret(void *ctx) SEC("?tc") int obj_new_acq(void *ctx) { - bpf_obj_new(struct foo); + (void)bpf_obj_new(struct foo); return 0; } diff --git a/tools/testing/selftests/bpf/progs/loop1.c b/tools/testing/selftests/bpf/progs/loop1.c index 50e66772c046..b0fa26fb4760 100644 --- a/tools/testing/selftests/bpf/progs/loop1.c +++ b/tools/testing/selftests/bpf/progs/loop1.c @@ -1,11 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 // Copyright (c) 2019 Facebook -#include <linux/sched.h> -#include <linux/ptrace.h> -#include <stdint.h> -#include <stddef.h> -#include <stdbool.h> -#include <linux/bpf.h> +#include "vmlinux.h" #include <bpf/bpf_helpers.h> #include <bpf/bpf_tracing.h> diff --git a/tools/testing/selftests/bpf/progs/loop2.c b/tools/testing/selftests/bpf/progs/loop2.c index 947bb7e988c2..0227409d4b0e 100644 --- a/tools/testing/selftests/bpf/progs/loop2.c +++ b/tools/testing/selftests/bpf/progs/loop2.c @@ -1,11 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 // Copyright (c) 2019 Facebook -#include <linux/sched.h> -#include <linux/ptrace.h> -#include <stdint.h> -#include <stddef.h> -#include <stdbool.h> -#include <linux/bpf.h> +#include "vmlinux.h" #include <bpf/bpf_helpers.h> #include <bpf/bpf_tracing.h> diff --git a/tools/testing/selftests/bpf/progs/loop3.c b/tools/testing/selftests/bpf/progs/loop3.c index 717dab14322b..5d1c9a775e6b 100644 --- a/tools/testing/selftests/bpf/progs/loop3.c +++ b/tools/testing/selftests/bpf/progs/loop3.c @@ -1,11 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 // Copyright (c) 2019 Facebook -#include <linux/sched.h> -#include <linux/ptrace.h> -#include <stdint.h> -#include <stddef.h> -#include <stdbool.h> -#include <linux/bpf.h> +#include "vmlinux.h" #include <bpf/bpf_helpers.h> #include <bpf/bpf_tracing.h> diff --git a/tools/testing/selftests/bpf/progs/loop6.c b/tools/testing/selftests/bpf/progs/loop6.c index e4ff97fbcce1..dd36aff4fba3 100644 --- a/tools/testing/selftests/bpf/progs/loop6.c +++ b/tools/testing/selftests/bpf/progs/loop6.c @@ -1,8 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 -#include <linux/ptrace.h> -#include <stddef.h> -#include <linux/bpf.h> +#include <vmlinux.h> +#include <bpf/bpf_core_read.h> #include <bpf/bpf_helpers.h> #include <bpf/bpf_tracing.h> #include "bpf_misc.h" @@ -26,12 +25,6 @@ char _license[] SEC("license") = "GPL"; #define SG_CHAIN 0x01UL #define SG_END 0x02UL -struct scatterlist { - unsigned long page_link; - unsigned int offset; - unsigned int length; -}; - #define sg_is_chain(sg) ((sg)->page_link & SG_CHAIN) #define sg_is_last(sg) ((sg)->page_link & SG_END) #define sg_chain_ptr(sg) \ @@ -62,7 +55,7 @@ static inline struct scatterlist *get_sgp(struct scatterlist **sgs, int i) return sgp; } -int config = 0; +int run_once = 0; int result = 0; SEC("kprobe/virtqueue_add_sgs") @@ -73,14 +66,14 @@ int BPF_KPROBE(trace_virtqueue_add_sgs, void *unused, struct scatterlist **sgs, __u64 length1 = 0, length2 = 0; unsigned int i, n, len; - if (config != 0) + if (run_once != 0) return 0; for (i = 0; (i < VIRTIO_MAX_SGS) && (i < out_sgs); i++) { __sink(out_sgs); for (n = 0, sgp = get_sgp(sgs, i); sgp && (n < SG_MAX); sgp = __sg_next(sgp)) { - bpf_probe_read_kernel(&len, sizeof(len), &sgp->length); + len = BPF_CORE_READ(sgp, length); length1 += len; n++; } @@ -90,13 +83,13 @@ int BPF_KPROBE(trace_virtqueue_add_sgs, void *unused, struct scatterlist **sgs, __sink(in_sgs); for (n = 0, sgp = get_sgp(sgs, i); sgp && (n < SG_MAX); sgp = __sg_next(sgp)) { - bpf_probe_read_kernel(&len, sizeof(len), &sgp->length); + len = BPF_CORE_READ(sgp, length); length2 += len; n++; } } - config = 1; + run_once = 1; result = length2 - length1; return 0; } diff --git a/tools/testing/selftests/bpf/progs/lpm_trie.h b/tools/testing/selftests/bpf/progs/lpm_trie.h new file mode 100644 index 000000000000..76aa5821807f --- /dev/null +++ b/tools/testing/selftests/bpf/progs/lpm_trie.h @@ -0,0 +1,30 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#ifndef __PROGS_LPM_TRIE_H +#define __PROGS_LPM_TRIE_H + +struct trie_key { + __u32 prefixlen; + __u32 data; +}; + +/* Benchmark operations */ +enum { + LPM_OP_NOOP = 0, + LPM_OP_BASELINE, + LPM_OP_LOOKUP, + LPM_OP_INSERT, + LPM_OP_UPDATE, + LPM_OP_DELETE, + LPM_OP_FREE +}; + +/* + * Return values from run_bench. + * + * Negative values are also allowed and represent kernel error codes. + */ +#define LPM_BENCH_SUCCESS 0 +#define LPM_BENCH_REINIT_MAP 1 /* Reset trie to initial state for current op */ + +#endif diff --git a/tools/testing/selftests/bpf/progs/lpm_trie_bench.c b/tools/testing/selftests/bpf/progs/lpm_trie_bench.c new file mode 100644 index 000000000000..a0e6ebd5507a --- /dev/null +++ b/tools/testing/selftests/bpf/progs/lpm_trie_bench.c @@ -0,0 +1,230 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2025 Cloudflare */ + +#include <vmlinux.h> +#include <errno.h> +#include <bpf/bpf_tracing.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_core_read.h> +#include "bpf_misc.h" +#include "bpf_atomic.h" +#include "progs/lpm_trie.h" + +#define BPF_OBJ_NAME_LEN 16U +#define MAX_ENTRIES 100000000 +#define NR_LOOPS 10000 + +char _license[] SEC("license") = "GPL"; + +/* Filled by userspace. See fill_map() in bench_lpm_trie_map.c */ +struct { + __uint(type, BPF_MAP_TYPE_LPM_TRIE); + __type(key, struct trie_key); + __type(value, __u32); + __uint(map_flags, BPF_F_NO_PREALLOC); + __uint(max_entries, MAX_ENTRIES); +} trie_map SEC(".maps"); + +long hits; +long duration_ns; + +/* Configured from userspace */ +__u32 nr_entries; +__u32 prefixlen; +bool random; +__u8 op; + +static __u64 latency_free_start; + +SEC("fentry/bpf_map_free_deferred") +int BPF_PROG(trie_free_entry, struct work_struct *work) +{ + struct bpf_map *map = container_of(work, struct bpf_map, work); + char name[BPF_OBJ_NAME_LEN]; + u32 map_type; + + map_type = BPF_CORE_READ(map, map_type); + if (map_type != BPF_MAP_TYPE_LPM_TRIE) + return 0; + + /* + * Ideally we'd have access to the map ID but that's already + * freed before we enter trie_free(). + */ + BPF_CORE_READ_STR_INTO(&name, map, name); + if (bpf_strncmp(name, BPF_OBJ_NAME_LEN, "trie_free_map")) + return 0; + + latency_free_start = bpf_ktime_get_ns(); + + return 0; +} + +SEC("fexit/bpf_map_free_deferred") +int BPF_PROG(trie_free_exit, struct work_struct *work) +{ + __u64 val; + + if (!latency_free_start) + return 0; + + val = bpf_ktime_get_ns() - latency_free_start; + latency_free_start = 0; + + __sync_add_and_fetch(&duration_ns, val); + __sync_add_and_fetch(&hits, 1); + + return 0; +} + +static __u32 cur_key; + +static __always_inline void generate_key(struct trie_key *key) +{ + key->prefixlen = prefixlen; + + if (random) + key->data = bpf_get_prandom_u32() % nr_entries; + else + key->data = cur_key++ % nr_entries; +} + +static int noop(__u32 index, __u32 *unused) +{ + return 0; +} + +static int baseline(__u32 index, __u32 *unused) +{ + struct trie_key key; + __u32 blackbox = 0; + + generate_key(&key); + /* Avoid compiler optimizing out the modulo */ + barrier_var(blackbox); + blackbox = READ_ONCE(key.data); + + return 0; +} + +static int lookup(__u32 index, int *retval) +{ + struct trie_key key; + + generate_key(&key); + if (!bpf_map_lookup_elem(&trie_map, &key)) { + *retval = -ENOENT; + return 1; + } + + return 0; +} + +static int insert(__u32 index, int *retval) +{ + struct trie_key key; + u32 val = 1; + int err; + + generate_key(&key); + err = bpf_map_update_elem(&trie_map, &key, &val, BPF_NOEXIST); + if (err) { + *retval = err; + return 1; + } + + /* Is this the last entry? */ + if (key.data == nr_entries - 1) { + /* For atomicity concerns, see the comment in delete() */ + *retval = LPM_BENCH_REINIT_MAP; + return 1; + } + + return 0; +} + +static int update(__u32 index, int *retval) +{ + struct trie_key key; + u32 val = 1; + int err; + + generate_key(&key); + err = bpf_map_update_elem(&trie_map, &key, &val, BPF_EXIST); + if (err) { + *retval = err; + return 1; + } + + return 0; +} + +static int delete(__u32 index, int *retval) +{ + struct trie_key key; + int err; + + generate_key(&key); + err = bpf_map_delete_elem(&trie_map, &key); + if (err) { + *retval = err; + return 1; + } + + /* Do we need to refill the map? */ + if (key.data == nr_entries - 1) { + /* + * Atomicity isn't required because DELETE only supports + * one producer running concurrently. What we need is a + * way to track how many entries have been deleted from + * the trie between consecutive invocations of the BPF + * prog because a single bpf_loop() call might not + * delete all entries, e.g. when NR_LOOPS < nr_entries. + */ + *retval = LPM_BENCH_REINIT_MAP; + return 1; + } + + return 0; +} + +SEC("xdp") +int BPF_PROG(run_bench) +{ + int err = LPM_BENCH_SUCCESS; + u64 start, delta; + int loops; + + start = bpf_ktime_get_ns(); + + switch (op) { + case LPM_OP_NOOP: + loops = bpf_loop(NR_LOOPS, noop, NULL, 0); + break; + case LPM_OP_BASELINE: + loops = bpf_loop(NR_LOOPS, baseline, NULL, 0); + break; + case LPM_OP_LOOKUP: + loops = bpf_loop(NR_LOOPS, lookup, &err, 0); + break; + case LPM_OP_INSERT: + loops = bpf_loop(NR_LOOPS, insert, &err, 0); + break; + case LPM_OP_UPDATE: + loops = bpf_loop(NR_LOOPS, update, &err, 0); + break; + case LPM_OP_DELETE: + loops = bpf_loop(NR_LOOPS, delete, &err, 0); + break; + default: + bpf_printk("invalid benchmark operation\n"); + return -1; + } + + delta = bpf_ktime_get_ns() - start; + + __sync_add_and_fetch(&duration_ns, delta); + __sync_add_and_fetch(&hits, loops); + + return err; +} diff --git a/tools/testing/selftests/bpf/progs/lpm_trie_map.c b/tools/testing/selftests/bpf/progs/lpm_trie_map.c new file mode 100644 index 000000000000..6e60d686b664 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/lpm_trie_map.c @@ -0,0 +1,19 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> + +#define MAX_ENTRIES 100000000 + +struct trie_key { + __u32 prefixlen; + __u32 data; +}; + +struct { + __uint(type, BPF_MAP_TYPE_LPM_TRIE); + __type(key, struct trie_key); + __type(value, __u32); + __uint(map_flags, BPF_F_NO_PREALLOC); + __uint(max_entries, MAX_ENTRIES); +} trie_free_map SEC(".maps"); diff --git a/tools/testing/selftests/bpf/progs/map_excl.c b/tools/testing/selftests/bpf/progs/map_excl.c new file mode 100644 index 000000000000..d461684728e4 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/map_excl.c @@ -0,0 +1,34 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (C) 2025 Google LLC. */ +#include <linux/bpf.h> +#include <time.h> +#include <bpf/bpf_helpers.h> + +#include "bpf_misc.h" + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __type(key, __u32); + __type(value, __u32); + __uint(max_entries, 1); +} excl_map SEC(".maps"); + +char _license[] SEC("license") = "GPL"; + +SEC("?fentry.s/" SYS_PREFIX "sys_getpgid") +int should_have_access(void *ctx) +{ + int key = 0, value = 0xdeadbeef; + + bpf_map_update_elem(&excl_map, &key, &value, 0); + return 0; +} + +SEC("?fentry.s/" SYS_PREFIX "sys_getpgid") +int should_not_have_access(void *ctx) +{ + int key = 0, value = 0xdeadbeef; + + bpf_map_update_elem(&excl_map, &key, &value, 0); + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/mem_rdonly_untrusted.c b/tools/testing/selftests/bpf/progs/mem_rdonly_untrusted.c index 4f94c971ae86..3b984b6ae7c0 100644 --- a/tools/testing/selftests/bpf/progs/mem_rdonly_untrusted.c +++ b/tools/testing/selftests/bpf/progs/mem_rdonly_untrusted.c @@ -8,8 +8,8 @@ SEC("tp_btf/sys_enter") __success __log_level(2) -__msg("r8 = *(u64 *)(r7 +0) ; R7_w=ptr_nameidata(off={{[0-9]+}}) R8_w=rdonly_untrusted_mem(sz=0)") -__msg("r9 = *(u8 *)(r8 +0) ; R8_w=rdonly_untrusted_mem(sz=0) R9_w=scalar") +__msg("r8 = *(u64 *)(r7 +0) ; R7=ptr_nameidata(off={{[0-9]+}}) R8=rdonly_untrusted_mem(sz=0)") +__msg("r9 = *(u8 *)(r8 +0) ; R8=rdonly_untrusted_mem(sz=0) R9=scalar") int btf_id_to_ptr_mem(void *ctx) { struct task_struct *task; diff --git a/tools/testing/selftests/bpf/progs/mptcp_subflow.c b/tools/testing/selftests/bpf/progs/mptcp_subflow.c index 70302477e326..41389e579578 100644 --- a/tools/testing/selftests/bpf/progs/mptcp_subflow.c +++ b/tools/testing/selftests/bpf/progs/mptcp_subflow.c @@ -117,7 +117,7 @@ int _getsockopt_subflow(struct bpf_sockopt *ctx) return 1; msk = bpf_core_cast(sk, struct mptcp_sock); - if (msk->pm.subflows != 1) { + if (msk->pm.extra_subflows != 1) { ctx->retval = -1; return 1; } diff --git a/tools/testing/selftests/bpf/progs/rbtree_search.c b/tools/testing/selftests/bpf/progs/rbtree_search.c index 098ef970fac1..b05565d1db0d 100644 --- a/tools/testing/selftests/bpf/progs/rbtree_search.c +++ b/tools/testing/selftests/bpf/progs/rbtree_search.c @@ -183,7 +183,7 @@ long test_##op##_spinlock_##dolock(void *ctx) \ } /* - * Use a spearate MSG macro instead of passing to TEST_XXX(..., MSG) + * Use a separate MSG macro instead of passing to TEST_XXX(..., MSG) * to ensure the message itself is not in the bpf prog lineinfo * which the verifier includes in its log. * Otherwise, the test_loader will incorrectly match the prog lineinfo diff --git a/tools/testing/selftests/bpf/progs/test_stacktrace_map.c b/tools/testing/selftests/bpf/progs/stacktrace_map.c index 47568007b668..0c77df05be7f 100644 --- a/tools/testing/selftests/bpf/progs/test_stacktrace_map.c +++ b/tools/testing/selftests/bpf/progs/stacktrace_map.c @@ -50,6 +50,7 @@ struct sched_switch_args { int next_prio; }; +__u32 stack_id; SEC("tracepoint/sched/sched_switch") int oncpu(struct sched_switch_args *ctx) { @@ -64,6 +65,7 @@ int oncpu(struct sched_switch_args *ctx) /* The size of stackmap and stackid_hmap should be the same */ key = bpf_get_stackid(ctx, &stackmap, 0); if ((int)key >= 0) { + stack_id = key; bpf_map_update_elem(&stackid_hmap, &key, &val, 0); stack_p = bpf_map_lookup_elem(&stack_amap, &key); if (stack_p) diff --git a/tools/testing/selftests/bpf/progs/stream.c b/tools/testing/selftests/bpf/progs/stream.c index 35790897dc87..4a5bd852f10c 100644 --- a/tools/testing/selftests/bpf/progs/stream.c +++ b/tools/testing/selftests/bpf/progs/stream.c @@ -5,6 +5,7 @@ #include <bpf/bpf_helpers.h> #include "bpf_misc.h" #include "bpf_experimental.h" +#include "bpf_arena_common.h" struct arr_elem { struct bpf_res_spin_lock lock; @@ -17,10 +18,29 @@ struct { __type(value, struct arr_elem); } arrmap SEC(".maps"); +struct { + __uint(type, BPF_MAP_TYPE_ARENA); + __uint(map_flags, BPF_F_MMAPABLE); + __uint(max_entries, 1); /* number of pages */ +} arena SEC(".maps"); + +struct elem { + struct bpf_timer timer; +}; + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 1); + __type(key, int); + __type(value, struct elem); +} array SEC(".maps"); + #define ENOSPC 28 #define _STR "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" int size; +u64 fault_addr; +void *arena_ptr; SEC("syscall") __success __retval(0) @@ -37,7 +57,15 @@ int stream_exhaust(void *ctx) } SEC("syscall") +__arch_x86_64 +__arch_arm64 +__arch_s390x __success __retval(0) +__stderr("ERROR: Timeout detected for may_goto instruction") +__stderr("CPU: {{[0-9]+}} UID: 0 PID: {{[0-9]+}} Comm: {{.*}}") +__stderr("Call trace:\n" +"{{([a-zA-Z_][a-zA-Z0-9_]*\\+0x[0-9a-fA-F]+/0x[0-9a-fA-F]+\n" +"|[ \t]+[^\n]+\n)*}}") int stream_cond_break(void *ctx) { while (can_loop) @@ -47,6 +75,15 @@ int stream_cond_break(void *ctx) SEC("syscall") __success __retval(0) +__stderr("ERROR: AA or ABBA deadlock detected for bpf_res_spin_lock") +__stderr("{{Attempted lock = (0x[0-9a-fA-F]+)\n" +"Total held locks = 1\n" +"Held lock\\[ 0\\] = \\1}}") +__stderr("...") +__stderr("CPU: {{[0-9]+}} UID: 0 PID: {{[0-9]+}} Comm: {{.*}}") +__stderr("Call trace:\n" +"{{([a-zA-Z_][a-zA-Z0-9_]*\\+0x[0-9a-fA-F]+/0x[0-9a-fA-F]+\n" +"|[ \t]+[^\n]+\n)*}}") int stream_deadlock(void *ctx) { struct bpf_res_spin_lock *lock, *nlock; @@ -76,4 +113,125 @@ int stream_syscall(void *ctx) return 0; } +SEC("syscall") +__arch_x86_64 +__arch_arm64 +__success __retval(0) +__stderr("ERROR: Arena WRITE access at unmapped address 0x{{.*}}") +__stderr("CPU: {{[0-9]+}} UID: 0 PID: {{[0-9]+}} Comm: {{.*}}") +__stderr("Call trace:\n" +"{{([a-zA-Z_][a-zA-Z0-9_]*\\+0x[0-9a-fA-F]+/0x[0-9a-fA-F]+\n" +"|[ \t]+[^\n]+\n)*}}") +int stream_arena_write_fault(void *ctx) +{ + struct bpf_arena *ptr = (void *)&arena; + u64 user_vm_start; + + /* Prevent GCC bounds warning: casting &arena to struct bpf_arena * + * triggers bounds checking since the map definition is smaller than struct + * bpf_arena. barrier_var() makes the pointer opaque to GCC, preventing the + * bounds analysis + */ + barrier_var(ptr); + user_vm_start = ptr->user_vm_start; + fault_addr = user_vm_start + 0x7fff; + bpf_addr_space_cast(user_vm_start, 0, 1); + asm volatile ( + "r1 = %0;" + "r2 = 1;" + "*(u32 *)(r1 + 0x7fff) = r2;" + : + : "r" (user_vm_start) + : "r1", "r2" + ); + return 0; +} + +SEC("syscall") +__arch_x86_64 +__arch_arm64 +__success __retval(0) +__stderr("ERROR: Arena READ access at unmapped address 0x{{.*}}") +__stderr("CPU: {{[0-9]+}} UID: 0 PID: {{[0-9]+}} Comm: {{.*}}") +__stderr("Call trace:\n" +"{{([a-zA-Z_][a-zA-Z0-9_]*\\+0x[0-9a-fA-F]+/0x[0-9a-fA-F]+\n" +"|[ \t]+[^\n]+\n)*}}") +int stream_arena_read_fault(void *ctx) +{ + struct bpf_arena *ptr = (void *)&arena; + u64 user_vm_start; + + /* Prevent GCC bounds warning: casting &arena to struct bpf_arena * + * triggers bounds checking since the map definition is smaller than struct + * bpf_arena. barrier_var() makes the pointer opaque to GCC, preventing the + * bounds analysis + */ + barrier_var(ptr); + user_vm_start = ptr->user_vm_start; + fault_addr = user_vm_start + 0x7fff; + bpf_addr_space_cast(user_vm_start, 0, 1); + asm volatile ( + "r1 = %0;" + "r1 = *(u32 *)(r1 + 0x7fff);" + : + : "r" (user_vm_start) + : "r1" + ); + return 0; +} + +static __noinline void subprog(void) +{ + int __arena *addr = (int __arena *)0xdeadbeef; + + arena_ptr = &arena; + *addr = 1; +} + +SEC("syscall") +__arch_x86_64 +__arch_arm64 +__success __retval(0) +__stderr("ERROR: Arena WRITE access at unmapped address 0x{{.*}}") +__stderr("CPU: {{[0-9]+}} UID: 0 PID: {{[0-9]+}} Comm: {{.*}}") +__stderr("Call trace:\n" +"{{([a-zA-Z_][a-zA-Z0-9_]*\\+0x[0-9a-fA-F]+/0x[0-9a-fA-F]+\n" +"|[ \t]+[^\n]+\n)*}}") +int stream_arena_subprog_fault(void *ctx) +{ + subprog(); + return 0; +} + +static __noinline int timer_cb(void *map, int *key, struct bpf_timer *timer) +{ + int __arena *addr = (int __arena *)0xdeadbeef; + + arena_ptr = &arena; + *addr = 1; + return 0; +} + +SEC("syscall") +__arch_x86_64 +__arch_arm64 +__success __retval(0) +__stderr("ERROR: Arena WRITE access at unmapped address 0x{{.*}}") +__stderr("CPU: {{[0-9]+}} UID: 0 PID: {{[0-9]+}} Comm: {{.*}}") +__stderr("Call trace:\n" +"{{([a-zA-Z_][a-zA-Z0-9_]*\\+0x[0-9a-fA-F]+/0x[0-9a-fA-F]+\n" +"|[ \t]+[^\n]+\n)*}}") +int stream_arena_callback_fault(void *ctx) +{ + struct bpf_timer *arr_timer; + + arr_timer = bpf_map_lookup_elem(&array, &(int){0}); + if (!arr_timer) + return 0; + bpf_timer_init(arr_timer, &array, 1); + bpf_timer_set_callback(arr_timer, timer_cb); + bpf_timer_start(arr_timer, 0, 0); + return 0; +} + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/string_kfuncs_failure1.c b/tools/testing/selftests/bpf/progs/string_kfuncs_failure1.c index 53af438bd998..99d72c68f76a 100644 --- a/tools/testing/selftests/bpf/progs/string_kfuncs_failure1.c +++ b/tools/testing/selftests/bpf/progs/string_kfuncs_failure1.c @@ -31,6 +31,8 @@ char *invalid_kern_ptr = (char *)-1; /* Passing NULL to string kfuncs (treated as a userspace ptr) */ SEC("syscall") __retval(USER_PTR_ERR) int test_strcmp_null1(void *ctx) { return bpf_strcmp(NULL, "hello"); } SEC("syscall") __retval(USER_PTR_ERR)int test_strcmp_null2(void *ctx) { return bpf_strcmp("hello", NULL); } +SEC("syscall") __retval(USER_PTR_ERR) int test_strcasecmp_null1(void *ctx) { return bpf_strcasecmp(NULL, "HELLO"); } +SEC("syscall") __retval(USER_PTR_ERR)int test_strcasecmp_null2(void *ctx) { return bpf_strcasecmp("HELLO", NULL); } SEC("syscall") __retval(USER_PTR_ERR)int test_strchr_null(void *ctx) { return bpf_strchr(NULL, 'a'); } SEC("syscall") __retval(USER_PTR_ERR)int test_strchrnul_null(void *ctx) { return bpf_strchrnul(NULL, 'a'); } SEC("syscall") __retval(USER_PTR_ERR)int test_strnchr_null(void *ctx) { return bpf_strnchr(NULL, 1, 'a'); } @@ -49,6 +51,8 @@ SEC("syscall") __retval(USER_PTR_ERR)int test_strnstr_null2(void *ctx) { return /* Passing userspace ptr to string kfuncs */ SEC("syscall") __retval(USER_PTR_ERR) int test_strcmp_user_ptr1(void *ctx) { return bpf_strcmp(user_ptr, "hello"); } SEC("syscall") __retval(USER_PTR_ERR) int test_strcmp_user_ptr2(void *ctx) { return bpf_strcmp("hello", user_ptr); } +SEC("syscall") __retval(USER_PTR_ERR) int test_strcasecmp_user_ptr1(void *ctx) { return bpf_strcasecmp(user_ptr, "HELLO"); } +SEC("syscall") __retval(USER_PTR_ERR) int test_strcasecmp_user_ptr2(void *ctx) { return bpf_strcasecmp("HELLO", user_ptr); } SEC("syscall") __retval(USER_PTR_ERR) int test_strchr_user_ptr(void *ctx) { return bpf_strchr(user_ptr, 'a'); } SEC("syscall") __retval(USER_PTR_ERR) int test_strchrnul_user_ptr(void *ctx) { return bpf_strchrnul(user_ptr, 'a'); } SEC("syscall") __retval(USER_PTR_ERR) int test_strnchr_user_ptr(void *ctx) { return bpf_strnchr(user_ptr, 1, 'a'); } @@ -69,6 +73,8 @@ SEC("syscall") __retval(USER_PTR_ERR) int test_strnstr_user_ptr2(void *ctx) { re /* Passing invalid kernel ptr to string kfuncs should always return -EFAULT */ SEC("syscall") __retval(-EFAULT) int test_strcmp_pagefault1(void *ctx) { return bpf_strcmp(invalid_kern_ptr, "hello"); } SEC("syscall") __retval(-EFAULT) int test_strcmp_pagefault2(void *ctx) { return bpf_strcmp("hello", invalid_kern_ptr); } +SEC("syscall") __retval(-EFAULT) int test_strcasecmp_pagefault1(void *ctx) { return bpf_strcasecmp(invalid_kern_ptr, "HELLO"); } +SEC("syscall") __retval(-EFAULT) int test_strcasecmp_pagefault2(void *ctx) { return bpf_strcasecmp("HELLO", invalid_kern_ptr); } SEC("syscall") __retval(-EFAULT) int test_strchr_pagefault(void *ctx) { return bpf_strchr(invalid_kern_ptr, 'a'); } SEC("syscall") __retval(-EFAULT) int test_strchrnul_pagefault(void *ctx) { return bpf_strchrnul(invalid_kern_ptr, 'a'); } SEC("syscall") __retval(-EFAULT) int test_strnchr_pagefault(void *ctx) { return bpf_strnchr(invalid_kern_ptr, 1, 'a'); } diff --git a/tools/testing/selftests/bpf/progs/string_kfuncs_failure2.c b/tools/testing/selftests/bpf/progs/string_kfuncs_failure2.c index 89fb4669b0e9..e41cc5601994 100644 --- a/tools/testing/selftests/bpf/progs/string_kfuncs_failure2.c +++ b/tools/testing/selftests/bpf/progs/string_kfuncs_failure2.c @@ -7,6 +7,7 @@ char long_str[XATTR_SIZE_MAX + 1]; SEC("syscall") int test_strcmp_too_long(void *ctx) { return bpf_strcmp(long_str, long_str); } +SEC("syscall") int test_strcasecmp_too_long(void *ctx) { return bpf_strcasecmp(long_str, long_str); } SEC("syscall") int test_strchr_too_long(void *ctx) { return bpf_strchr(long_str, 'b'); } SEC("syscall") int test_strchrnul_too_long(void *ctx) { return bpf_strchrnul(long_str, 'b'); } SEC("syscall") int test_strnchr_too_long(void *ctx) { return bpf_strnchr(long_str, sizeof(long_str), 'b'); } diff --git a/tools/testing/selftests/bpf/progs/string_kfuncs_success.c b/tools/testing/selftests/bpf/progs/string_kfuncs_success.c index 46697f381878..2e3498e37b9c 100644 --- a/tools/testing/selftests/bpf/progs/string_kfuncs_success.c +++ b/tools/testing/selftests/bpf/progs/string_kfuncs_success.c @@ -12,6 +12,11 @@ char str[] = "hello world"; /* Functional tests */ __test(0) int test_strcmp_eq(void *ctx) { return bpf_strcmp(str, "hello world"); } __test(1) int test_strcmp_neq(void *ctx) { return bpf_strcmp(str, "hello"); } +__test(0) int test_strcasecmp_eq1(void *ctx) { return bpf_strcasecmp(str, "hello world"); } +__test(0) int test_strcasecmp_eq2(void *ctx) { return bpf_strcasecmp(str, "HELLO WORLD"); } +__test(0) int test_strcasecmp_eq3(void *ctx) { return bpf_strcasecmp(str, "HELLO world"); } +__test(1) int test_strcasecmp_neq1(void *ctx) { return bpf_strcasecmp(str, "hello"); } +__test(1) int test_strcasecmp_neq2(void *ctx) { return bpf_strcasecmp(str, "HELLO"); } __test(1) int test_strchr_found(void *ctx) { return bpf_strchr(str, 'e'); } __test(11) int test_strchr_null(void *ctx) { return bpf_strchr(str, '\0'); } __test(-ENOENT) int test_strchr_notfound(void *ctx) { return bpf_strchr(str, 'x'); } @@ -30,8 +35,12 @@ __test(2) int test_strcspn(void *ctx) { return bpf_strcspn(str, "lo"); } __test(6) int test_strstr_found(void *ctx) { return bpf_strstr(str, "world"); } __test(-ENOENT) int test_strstr_notfound(void *ctx) { return bpf_strstr(str, "hi"); } __test(0) int test_strstr_empty(void *ctx) { return bpf_strstr(str, ""); } -__test(0) int test_strnstr_found(void *ctx) { return bpf_strnstr(str, "hello", 6); } -__test(-ENOENT) int test_strnstr_notfound(void *ctx) { return bpf_strnstr(str, "hi", 10); } +__test(0) int test_strnstr_found1(void *ctx) { return bpf_strnstr("", "", 0); } +__test(0) int test_strnstr_found2(void *ctx) { return bpf_strnstr(str, "hello", 5); } +__test(0) int test_strnstr_found3(void *ctx) { return bpf_strnstr(str, "hello", 6); } +__test(-ENOENT) int test_strnstr_notfound1(void *ctx) { return bpf_strnstr(str, "hi", 10); } +__test(-ENOENT) int test_strnstr_notfound2(void *ctx) { return bpf_strnstr(str, "hello", 4); } +__test(-ENOENT) int test_strnstr_notfound3(void *ctx) { return bpf_strnstr("", "a", 0); } __test(0) int test_strnstr_empty(void *ctx) { return bpf_strnstr(str, "", 1); } char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/struct_ops_id_ops_mapping1.c b/tools/testing/selftests/bpf/progs/struct_ops_id_ops_mapping1.c new file mode 100644 index 000000000000..ad8bb546c9bf --- /dev/null +++ b/tools/testing/selftests/bpf/progs/struct_ops_id_ops_mapping1.c @@ -0,0 +1,59 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <vmlinux.h> +#include <bpf/bpf_tracing.h> +#include "bpf_misc.h" +#include "../test_kmods/bpf_testmod.h" +#include "../test_kmods/bpf_testmod_kfunc.h" + +char _license[] SEC("license") = "GPL"; + +#define bpf_kfunc_multi_st_ops_test_1(args) bpf_kfunc_multi_st_ops_test_1(args, st_ops_id) +int st_ops_id; + +int test_pid; +int test_err; + +#define MAP1_MAGIC 1234 + +SEC("struct_ops") +int BPF_PROG(test_1, struct st_ops_args *args) +{ + return MAP1_MAGIC; +} + +SEC("tp_btf/sys_enter") +int BPF_PROG(sys_enter, struct pt_regs *regs, long id) +{ + struct st_ops_args args = {}; + struct task_struct *task; + int ret; + + task = bpf_get_current_task_btf(); + if (!test_pid || task->pid != test_pid) + return 0; + + ret = bpf_kfunc_multi_st_ops_test_1(&args); + if (ret != MAP1_MAGIC) + test_err++; + + return 0; +} + +SEC("syscall") +int syscall_prog(void *ctx) +{ + struct st_ops_args args = {}; + int ret; + + ret = bpf_kfunc_multi_st_ops_test_1(&args); + if (ret != MAP1_MAGIC) + test_err++; + + return 0; +} + +SEC(".struct_ops.link") +struct bpf_testmod_multi_st_ops st_ops_map = { + .test_1 = (void *)test_1, +}; diff --git a/tools/testing/selftests/bpf/progs/struct_ops_id_ops_mapping2.c b/tools/testing/selftests/bpf/progs/struct_ops_id_ops_mapping2.c new file mode 100644 index 000000000000..cea1a2f4b62f --- /dev/null +++ b/tools/testing/selftests/bpf/progs/struct_ops_id_ops_mapping2.c @@ -0,0 +1,59 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <vmlinux.h> +#include <bpf/bpf_tracing.h> +#include "bpf_misc.h" +#include "../test_kmods/bpf_testmod.h" +#include "../test_kmods/bpf_testmod_kfunc.h" + +char _license[] SEC("license") = "GPL"; + +#define bpf_kfunc_multi_st_ops_test_1(args) bpf_kfunc_multi_st_ops_test_1(args, st_ops_id) +int st_ops_id; + +int test_pid; +int test_err; + +#define MAP2_MAGIC 4567 + +SEC("struct_ops") +int BPF_PROG(test_1, struct st_ops_args *args) +{ + return MAP2_MAGIC; +} + +SEC("tp_btf/sys_enter") +int BPF_PROG(sys_enter, struct pt_regs *regs, long id) +{ + struct st_ops_args args = {}; + struct task_struct *task; + int ret; + + task = bpf_get_current_task_btf(); + if (!test_pid || task->pid != test_pid) + return 0; + + ret = bpf_kfunc_multi_st_ops_test_1(&args); + if (ret != MAP2_MAGIC) + test_err++; + + return 0; +} + +SEC("syscall") +int syscall_prog(void *ctx) +{ + struct st_ops_args args = {}; + int ret; + + ret = bpf_kfunc_multi_st_ops_test_1(&args); + if (ret != MAP2_MAGIC) + test_err++; + + return 0; +} + +SEC(".struct_ops.link") +struct bpf_testmod_multi_st_ops st_ops_map = { + .test_1 = (void *)test_1, +}; diff --git a/tools/testing/selftests/bpf/progs/struct_ops_kptr_return.c b/tools/testing/selftests/bpf/progs/struct_ops_kptr_return.c index 36386b3c23a1..2b98b7710816 100644 --- a/tools/testing/selftests/bpf/progs/struct_ops_kptr_return.c +++ b/tools/testing/selftests/bpf/progs/struct_ops_kptr_return.c @@ -9,7 +9,7 @@ void bpf_task_release(struct task_struct *p) __ksym; /* This test struct_ops BPF programs returning referenced kptr. The verifier should * allow a referenced kptr or a NULL pointer to be returned. A referenced kptr to task - * here is acquried automatically as the task argument is tagged with "__ref". + * here is acquired automatically as the task argument is tagged with "__ref". */ SEC("struct_ops/test_return_ref_kptr") struct task_struct *BPF_PROG(kptr_return, int dummy, diff --git a/tools/testing/selftests/bpf/progs/struct_ops_refcounted.c b/tools/testing/selftests/bpf/progs/struct_ops_refcounted.c index 76dcb6089d7f..9c0a65466356 100644 --- a/tools/testing/selftests/bpf/progs/struct_ops_refcounted.c +++ b/tools/testing/selftests/bpf/progs/struct_ops_refcounted.c @@ -9,7 +9,7 @@ __attribute__((nomerge)) extern void bpf_task_release(struct task_struct *p) __k /* This is a test BPF program that uses struct_ops to access a referenced * kptr argument. This is a test for the verifier to ensure that it - * 1) recongnizes the task as a referenced object (i.e., ref_obj_id > 0), and + * 1) recognizes the task as a referenced object (i.e., ref_obj_id > 0), and * 2) the same reference can be acquired from multiple paths as long as it * has not been released. */ diff --git a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_hierarchy1.c b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_hierarchy1.c index 327ca395e860..d556b19413d7 100644 --- a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_hierarchy1.c +++ b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_hierarchy1.c @@ -2,6 +2,7 @@ #include <linux/bpf.h> #include <bpf/bpf_helpers.h> #include "bpf_legacy.h" +#include "bpf_test_utils.h" struct { __uint(type, BPF_MAP_TYPE_PROG_ARRAY); @@ -24,6 +25,8 @@ int entry(struct __sk_buff *skb) { int ret = 1; + clobber_regs_stack(); + count++; subprog_tail(skb); subprog_tail(skb); diff --git a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_hierarchy2.c b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_hierarchy2.c index 72fd0d577506..ae94c9c70ab7 100644 --- a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_hierarchy2.c +++ b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_hierarchy2.c @@ -2,6 +2,7 @@ #include <linux/bpf.h> #include <bpf/bpf_helpers.h> #include "bpf_misc.h" +#include "bpf_test_utils.h" int classifier_0(struct __sk_buff *skb); int classifier_1(struct __sk_buff *skb); @@ -60,6 +61,8 @@ int tailcall_bpf2bpf_hierarchy_2(struct __sk_buff *skb) { int ret = 0; + clobber_regs_stack(); + subprog_tail0(skb); subprog_tail1(skb); diff --git a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_hierarchy3.c b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_hierarchy3.c index a7fb91cb05b7..56b6b0099840 100644 --- a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_hierarchy3.c +++ b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_hierarchy3.c @@ -2,6 +2,7 @@ #include <linux/bpf.h> #include <bpf/bpf_helpers.h> #include "bpf_misc.h" +#include "bpf_test_utils.h" int classifier_0(struct __sk_buff *skb); @@ -53,6 +54,8 @@ int tailcall_bpf2bpf_hierarchy_3(struct __sk_buff *skb) { int ret = 0; + clobber_regs_stack(); + bpf_tail_call_static(skb, &jmp_table0, 0); __sink(ret); diff --git a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_hierarchy_fentry.c b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_hierarchy_fentry.c index c87f9ca982d3..5261395713cd 100644 --- a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_hierarchy_fentry.c +++ b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_hierarchy_fentry.c @@ -4,6 +4,7 @@ #include "vmlinux.h" #include <bpf/bpf_helpers.h> #include <bpf/bpf_tracing.h> +#include "bpf_test_utils.h" struct { __uint(type, BPF_MAP_TYPE_PROG_ARRAY); @@ -24,6 +25,8 @@ int subprog_tail(void *ctx) SEC("fentry/dummy") int BPF_PROG(fentry, struct sk_buff *skb) { + clobber_regs_stack(); + count++; subprog_tail(ctx); subprog_tail(ctx); diff --git a/tools/testing/selftests/bpf/progs/task_local_data.bpf.h b/tools/testing/selftests/bpf/progs/task_local_data.bpf.h new file mode 100644 index 000000000000..432fff2af844 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/task_local_data.bpf.h @@ -0,0 +1,237 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __TASK_LOCAL_DATA_BPF_H +#define __TASK_LOCAL_DATA_BPF_H + +/* + * Task local data is a library that facilitates sharing per-task data + * between user space and bpf programs. + * + * + * USAGE + * + * A TLD, an entry of data in task local data, first needs to be created by the + * user space. This is done by calling user space API, TLD_DEFINE_KEY() or + * tld_create_key(), with the name of the TLD and the size. + * + * TLD_DEFINE_KEY(prio, "priority", sizeof(int)); + * + * or + * + * void func_call(...) { + * tld_key_t prio, in_cs; + * + * prio = tld_create_key("priority", sizeof(int)); + * in_cs = tld_create_key("in_critical_section", sizeof(bool)); + * ... + * + * A key associated with the TLD, which has an opaque type tld_key_t, will be + * initialized or returned. It can be used to get a pointer to the TLD in the + * user space by calling tld_get_data(). + * + * In a bpf program, tld_object_init() first needs to be called to initialized a + * tld_object on the stack. Then, TLDs can be accessed by calling tld_get_data(). + * The API will try to fetch the key by the name and use it to locate the data. + * A pointer to the TLD will be returned. It also caches the key in a task local + * storage map, tld_key_map, whose value type, struct tld_keys, must be defined + * by the developer. + * + * struct tld_keys { + * tld_key_t prio; + * tld_key_t in_cs; + * }; + * + * SEC("struct_ops") + * void prog(struct task_struct task, ...) + * { + * struct tld_object tld_obj; + * int err, *p; + * + * err = tld_object_init(task, &tld_obj); + * if (err) + * return; + * + * p = tld_get_data(&tld_obj, prio, "priority", sizeof(int)); + * if (p) + * // do something depending on *p + */ +#include <errno.h> +#include <bpf/bpf_helpers.h> + +#define TLD_ROUND_MASK(x, y) ((__typeof__(x))((y) - 1)) +#define TLD_ROUND_UP(x, y) ((((x) - 1) | TLD_ROUND_MASK(x, y)) + 1) + +#define TLD_MAX_DATA_CNT (__PAGE_SIZE / sizeof(struct tld_metadata) - 1) + +#ifndef TLD_NAME_LEN +#define TLD_NAME_LEN 62 +#endif + +#ifndef TLD_KEY_MAP_CREATE_RETRY +#define TLD_KEY_MAP_CREATE_RETRY 10 +#endif + +typedef struct { + __s16 off; +} tld_key_t; + +struct tld_metadata { + char name[TLD_NAME_LEN]; + __u16 size; +}; + +struct tld_meta_u { + __u8 cnt; + __u16 size; + struct tld_metadata metadata[TLD_MAX_DATA_CNT]; +}; + +struct tld_data_u { + __u64 start; /* offset of tld_data_u->data in a page */ + char data[__PAGE_SIZE - sizeof(__u64)]; +}; + +struct tld_map_value { + struct tld_data_u __uptr *data; + struct tld_meta_u __uptr *meta; +}; + +typedef struct tld_uptr_dummy { + struct tld_data_u data[0]; + struct tld_meta_u meta[0]; +} *tld_uptr_dummy_t; + +struct tld_object { + struct tld_map_value *data_map; + struct tld_keys *key_map; + /* + * Force the compiler to generate the actual definition of tld_meta_u + * and tld_data_u in BTF. Without it, tld_meta_u and u_tld_data will + * be BTF_KIND_FWD. + */ + tld_uptr_dummy_t dummy[0]; +}; + +/* + * Map value of tld_key_map for caching keys. Must be defined by the developer. + * Members should be tld_key_t and passed to the 3rd argument of tld_fetch_key(). + */ +struct tld_keys; + +struct { + __uint(type, BPF_MAP_TYPE_TASK_STORAGE); + __uint(map_flags, BPF_F_NO_PREALLOC); + __type(key, int); + __type(value, struct tld_map_value); +} tld_data_map SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_TASK_STORAGE); + __uint(map_flags, BPF_F_NO_PREALLOC); + __type(key, int); + __type(value, struct tld_keys); +} tld_key_map SEC(".maps"); + +/** + * tld_object_init() - Initialize a tld_object. + * + * @task: The task_struct of the target task + * @tld_obj: A pointer to a tld_object to be initialized + * + * Return 0 on success; -ENODATA if the user space did not initialize task local data + * for the current task through tld_get_data(); -ENOMEM if the creation of tld_key_map + * fails + */ +__attribute__((unused)) +static int tld_object_init(struct task_struct *task, struct tld_object *tld_obj) +{ + int i; + + tld_obj->data_map = bpf_task_storage_get(&tld_data_map, task, 0, 0); + if (!tld_obj->data_map) + return -ENODATA; + + bpf_for(i, 0, TLD_KEY_MAP_CREATE_RETRY) { + tld_obj->key_map = bpf_task_storage_get(&tld_key_map, task, 0, + BPF_LOCAL_STORAGE_GET_F_CREATE); + if (likely(tld_obj->key_map)) + break; + } + if (!tld_obj->key_map) + return -ENOMEM; + + return 0; +} + +/* + * Return the offset of TLD if @name is found. Otherwise, return the current TLD count + * using the nonpositive range so that the next tld_get_data() can skip fetching key if + * no new TLD is added or start comparing name from the first newly added TLD. + */ +__attribute__((unused)) +static int __tld_fetch_key(struct tld_object *tld_obj, const char *name, int i_start) +{ + struct tld_metadata *metadata; + int i, cnt, start, off = 0; + + if (!tld_obj->data_map || !tld_obj->data_map->data || !tld_obj->data_map->meta) + return 0; + + start = tld_obj->data_map->data->start; + cnt = tld_obj->data_map->meta->cnt; + metadata = tld_obj->data_map->meta->metadata; + + bpf_for(i, 0, cnt) { + if (i >= TLD_MAX_DATA_CNT) + break; + + if (i >= i_start && !bpf_strncmp(metadata[i].name, TLD_NAME_LEN, name)) + return start + off; + + off += TLD_ROUND_UP(metadata[i].size, 8); + } + + return -cnt; +} + +/** + * tld_get_data() - Retrieve a pointer to the TLD associated with the name. + * + * @tld_obj: A pointer to a valid tld_object initialized by tld_object_init() + * @key: The cached key of the TLD in tld_key_map + * @name: The name of the key associated with a TLD + * @size: The size of the TLD. Must be a known constant value + * + * Return a pointer to the TLD associated with @name; NULL if not found or @size is too + * big. @key is used to cache the key if the TLD is found to speed up subsequent calls. + * It should be defined as an member of tld_keys of tld_key_t type by the developer. + */ +#define tld_get_data(tld_obj, key, name, size) \ + ({ \ + void *data = NULL, *_data = (tld_obj)->data_map->data; \ + long off = (tld_obj)->key_map->key.off; \ + int cnt; \ + \ + if (likely(_data)) { \ + if (likely(off > 0)) { \ + barrier_var(off); \ + if (likely(off < __PAGE_SIZE - size)) \ + data = _data + off; \ + } else { \ + cnt = -(off); \ + if (likely((tld_obj)->data_map->meta) && \ + cnt < (tld_obj)->data_map->meta->cnt) { \ + off = __tld_fetch_key(tld_obj, name, cnt); \ + (tld_obj)->key_map->key.off = off; \ + \ + if (likely(off < __PAGE_SIZE - size)) { \ + barrier_var(off); \ + if (off > 0) \ + data = _data + off; \ + } \ + } \ + } \ + } \ + data; \ + }) + +#endif diff --git a/tools/testing/selftests/bpf/progs/task_work.c b/tools/testing/selftests/bpf/progs/task_work.c new file mode 100644 index 000000000000..23217f06a3ec --- /dev/null +++ b/tools/testing/selftests/bpf/progs/task_work.c @@ -0,0 +1,107 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */ + +#include <vmlinux.h> +#include <string.h> +#include <stdbool.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> +#include "bpf_misc.h" +#include "errno.h" + +char _license[] SEC("license") = "GPL"; + +const void *user_ptr = NULL; + +struct elem { + char data[128]; + struct bpf_task_work tw; +}; + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(map_flags, BPF_F_NO_PREALLOC); + __uint(max_entries, 1); + __type(key, int); + __type(value, struct elem); +} hmap SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 1); + __type(key, int); + __type(value, struct elem); +} arrmap SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_LRU_HASH); + __uint(max_entries, 1); + __type(key, int); + __type(value, struct elem); +} lrumap SEC(".maps"); + +static int process_work(struct bpf_map *map, void *key, void *value) +{ + struct elem *work = value; + + bpf_copy_from_user_str(work->data, sizeof(work->data), (const void *)user_ptr, 0); + return 0; +} + +int key = 0; + +SEC("perf_event") +int oncpu_hash_map(struct pt_regs *args) +{ + struct elem empty_work = { .data = { 0 } }; + struct elem *work; + struct task_struct *task; + int err; + + task = bpf_get_current_task_btf(); + err = bpf_map_update_elem(&hmap, &key, &empty_work, BPF_NOEXIST); + if (err) + return 0; + work = bpf_map_lookup_elem(&hmap, &key); + if (!work) + return 0; + + bpf_task_work_schedule_resume(task, &work->tw, &hmap, process_work, NULL); + return 0; +} + +SEC("perf_event") +int oncpu_array_map(struct pt_regs *args) +{ + struct elem *work; + struct task_struct *task; + + task = bpf_get_current_task_btf(); + work = bpf_map_lookup_elem(&arrmap, &key); + if (!work) + return 0; + bpf_task_work_schedule_signal(task, &work->tw, &arrmap, process_work, NULL); + return 0; +} + +SEC("perf_event") +int oncpu_lru_map(struct pt_regs *args) +{ + struct elem empty_work = { .data = { 0 } }; + struct elem *work; + struct task_struct *task; + int err; + + task = bpf_get_current_task_btf(); + work = bpf_map_lookup_elem(&lrumap, &key); + if (work) + return 0; + err = bpf_map_update_elem(&lrumap, &key, &empty_work, BPF_NOEXIST); + if (err) + return 0; + work = bpf_map_lookup_elem(&lrumap, &key); + if (!work || work->data[0]) + return 0; + bpf_task_work_schedule_resume(task, &work->tw, &lrumap, process_work, NULL); + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/task_work_fail.c b/tools/testing/selftests/bpf/progs/task_work_fail.c new file mode 100644 index 000000000000..77fe8f28facd --- /dev/null +++ b/tools/testing/selftests/bpf/progs/task_work_fail.c @@ -0,0 +1,96 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */ + +#include <vmlinux.h> +#include <string.h> +#include <stdbool.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> +#include "bpf_misc.h" + +char _license[] SEC("license") = "GPL"; + +const void *user_ptr = NULL; + +struct elem { + char data[128]; + struct bpf_task_work tw; +}; + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(map_flags, BPF_F_NO_PREALLOC); + __uint(max_entries, 1); + __type(key, int); + __type(value, struct elem); +} hmap SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 1); + __type(key, int); + __type(value, struct elem); +} arrmap SEC(".maps"); + +static int process_work(struct bpf_map *map, void *key, void *value) +{ + struct elem *work = value; + + bpf_copy_from_user_str(work->data, sizeof(work->data), (const void *)user_ptr, 0); + return 0; +} + +int key = 0; + +SEC("perf_event") +__failure __msg("doesn't match map pointer in R3") +int mismatch_map(struct pt_regs *args) +{ + struct elem *work; + struct task_struct *task; + + task = bpf_get_current_task_btf(); + work = bpf_map_lookup_elem(&arrmap, &key); + if (!work) + return 0; + bpf_task_work_schedule_resume(task, &work->tw, &hmap, process_work, NULL); + return 0; +} + +SEC("perf_event") +__failure __msg("arg#1 doesn't point to a map value") +int no_map_task_work(struct pt_regs *args) +{ + struct task_struct *task; + struct bpf_task_work tw; + + task = bpf_get_current_task_btf(); + bpf_task_work_schedule_resume(task, &tw, &hmap, process_work, NULL); + return 0; +} + +SEC("perf_event") +__failure __msg("Possibly NULL pointer passed to trusted arg1") +int task_work_null(struct pt_regs *args) +{ + struct task_struct *task; + + task = bpf_get_current_task_btf(); + bpf_task_work_schedule_resume(task, NULL, &hmap, process_work, NULL); + return 0; +} + +SEC("perf_event") +__failure __msg("Possibly NULL pointer passed to trusted arg2") +int map_null(struct pt_regs *args) +{ + struct elem *work; + struct task_struct *task; + + task = bpf_get_current_task_btf(); + work = bpf_map_lookup_elem(&arrmap, &key); + if (!work) + return 0; + bpf_task_work_schedule_resume(task, &work->tw, NULL, process_work, NULL); + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/task_work_stress.c b/tools/testing/selftests/bpf/progs/task_work_stress.c new file mode 100644 index 000000000000..90fca06fff56 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/task_work_stress.c @@ -0,0 +1,73 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */ + +#include <vmlinux.h> +#include <string.h> +#include <stdbool.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> +#include "bpf_misc.h" + +#define ENTRIES 128 + +char _license[] SEC("license") = "GPL"; + +__u64 callback_scheduled = 0; +__u64 callback_success = 0; +__u64 schedule_error = 0; +__u64 delete_success = 0; + +struct elem { + __u32 count; + struct bpf_task_work tw; +}; + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(map_flags, BPF_F_NO_PREALLOC); + __uint(max_entries, ENTRIES); + __type(key, int); + __type(value, struct elem); +} hmap SEC(".maps"); + +static int process_work(struct bpf_map *map, void *key, void *value) +{ + __sync_fetch_and_add(&callback_success, 1); + return 0; +} + +SEC("syscall") +int schedule_task_work(void *ctx) +{ + struct elem empty_work = {.count = 0}; + struct elem *work; + int key = 0, err; + + key = bpf_ktime_get_ns() % ENTRIES; + work = bpf_map_lookup_elem(&hmap, &key); + if (!work) { + bpf_map_update_elem(&hmap, &key, &empty_work, BPF_NOEXIST); + work = bpf_map_lookup_elem(&hmap, &key); + if (!work) + return 0; + } + err = bpf_task_work_schedule_signal(bpf_get_current_task_btf(), &work->tw, &hmap, + process_work, NULL); + if (err) + __sync_fetch_and_add(&schedule_error, 1); + else + __sync_fetch_and_add(&callback_scheduled, 1); + return 0; +} + +SEC("syscall") +int delete_task_work(void *ctx) +{ + int key = 0, err; + + key = bpf_get_prandom_u32() % ENTRIES; + err = bpf_map_delete_elem(&hmap, &key); + if (!err) + __sync_fetch_and_add(&delete_success, 1); + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/test_cls_redirect.c b/tools/testing/selftests/bpf/progs/test_cls_redirect.c index f344c6835e84..26a53e54b8fa 100644 --- a/tools/testing/selftests/bpf/progs/test_cls_redirect.c +++ b/tools/testing/selftests/bpf/progs/test_cls_redirect.c @@ -22,6 +22,7 @@ #include "bpf_compiler.h" #include "test_cls_redirect.h" +#include "bpf_misc.h" #pragma GCC diagnostic ignored "-Waddress-of-packed-member" @@ -31,9 +32,6 @@ #define INLINING __always_inline #endif -#define offsetofend(TYPE, MEMBER) \ - (offsetof(TYPE, MEMBER) + sizeof((((TYPE *)0)->MEMBER))) - #define IP_OFFSET_MASK (0x1FFF) #define IP_MF (0x2000) @@ -129,7 +127,7 @@ typedef uint8_t *net_ptr __attribute__((align_value(8))); typedef struct buf { struct __sk_buff *skb; net_ptr head; - /* NB: tail musn't have alignment other than 1, otherwise + /* NB: tail mustn't have alignment other than 1, otherwise * LLVM will go and eliminate code, e.g. when checking packet lengths. */ uint8_t *const tail; diff --git a/tools/testing/selftests/bpf/progs/test_cls_redirect_dynptr.c b/tools/testing/selftests/bpf/progs/test_cls_redirect_dynptr.c index d0f7670351e5..dfd4a2710391 100644 --- a/tools/testing/selftests/bpf/progs/test_cls_redirect_dynptr.c +++ b/tools/testing/selftests/bpf/progs/test_cls_redirect_dynptr.c @@ -494,7 +494,7 @@ static ret_t get_next_hop(struct bpf_dynptr *dynptr, __u64 *offset, encap_header *offset += sizeof(*next_hop); - /* Skip the remainig next hops (may be zero). */ + /* Skip the remaining next hops (may be zero). */ return skip_next_hops(offset, encap->unigue.hop_count - encap->unigue.next_hop - 1); } diff --git a/tools/testing/selftests/bpf/progs/test_overhead.c b/tools/testing/selftests/bpf/progs/test_overhead.c index abb7344b531f..5edf3cdc213d 100644 --- a/tools/testing/selftests/bpf/progs/test_overhead.c +++ b/tools/testing/selftests/bpf/progs/test_overhead.c @@ -1,9 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2019 Facebook */ -#include <stdbool.h> -#include <stddef.h> -#include <linux/bpf.h> -#include <linux/ptrace.h> +#include "vmlinux.h" #include <bpf/bpf_helpers.h> #include <bpf/bpf_tracing.h> diff --git a/tools/testing/selftests/bpf/progs/test_pinning_devmap.c b/tools/testing/selftests/bpf/progs/test_pinning_devmap.c new file mode 100644 index 000000000000..c855f8f87eff --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_pinning_devmap.c @@ -0,0 +1,20 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> + +struct { + __uint(type, BPF_MAP_TYPE_DEVMAP); + __uint(max_entries, 1); + __type(key, __u32); + __type(value, __u32); + __uint(pinning, LIBBPF_PIN_BY_NAME); +} pinmap1 SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_DEVMAP); + __uint(max_entries, 2); + __type(key, __u32); + __type(value, __u32); + __uint(pinning, LIBBPF_PIN_BY_NAME); +} pinmap2 SEC(".maps"); diff --git a/tools/testing/selftests/bpf/progs/test_task_local_data.c b/tools/testing/selftests/bpf/progs/test_task_local_data.c new file mode 100644 index 000000000000..fffafc013044 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_task_local_data.c @@ -0,0 +1,65 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <vmlinux.h> +#include <errno.h> +#include <bpf/bpf_helpers.h> + +#include "task_local_data.bpf.h" + +struct tld_keys { + tld_key_t value0; + tld_key_t value1; + tld_key_t value2; + tld_key_t value_not_exist; +}; + +struct test_tld_struct { + __u64 a; + __u64 b; + __u64 c; + __u64 d; +}; + +int test_value0; +int test_value1; +struct test_tld_struct test_value2; + +SEC("syscall") +int task_main(void *ctx) +{ + struct tld_object tld_obj; + struct test_tld_struct *struct_p; + struct task_struct *task; + int err, *int_p; + + task = bpf_get_current_task_btf(); + err = tld_object_init(task, &tld_obj); + if (err) + return 1; + + int_p = tld_get_data(&tld_obj, value0, "value0", sizeof(int)); + if (int_p) + test_value0 = *int_p; + else + return 2; + + int_p = tld_get_data(&tld_obj, value1, "value1", sizeof(int)); + if (int_p) + test_value1 = *int_p; + else + return 3; + + struct_p = tld_get_data(&tld_obj, value2, "value2", sizeof(struct test_tld_struct)); + if (struct_p) + test_value2 = *struct_p; + else + return 4; + + int_p = tld_get_data(&tld_obj, value_not_exist, "value_not_exist", sizeof(int)); + if (int_p) + return 5; + + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_tcp_hdr_options.c b/tools/testing/selftests/bpf/progs/test_tcp_hdr_options.c index 5f4e87ee949a..1ecdf4c54de4 100644 --- a/tools/testing/selftests/bpf/progs/test_tcp_hdr_options.c +++ b/tools/testing/selftests/bpf/progs/test_tcp_hdr_options.c @@ -14,10 +14,7 @@ #include <bpf/bpf_endian.h> #define BPF_PROG_TEST_TCP_HDR_OPTIONS #include "test_tcp_hdr_options.h" - -#ifndef sizeof_field -#define sizeof_field(TYPE, MEMBER) sizeof((((TYPE *)0)->MEMBER)) -#endif +#include "bpf_misc.h" __u8 test_kind = TCPOPT_EXP; __u16 test_magic = 0xeB9F; diff --git a/tools/testing/selftests/bpf/progs/test_tcpnotify_kern.c b/tools/testing/selftests/bpf/progs/test_tcpnotify_kern.c index 540181c115a8..ef00d38b0a8d 100644 --- a/tools/testing/selftests/bpf/progs/test_tcpnotify_kern.c +++ b/tools/testing/selftests/bpf/progs/test_tcpnotify_kern.c @@ -23,7 +23,6 @@ struct { struct { __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY); - __uint(max_entries, 2); __type(key, int); __type(value, __u32); } perf_event_map SEC(".maps"); diff --git a/tools/testing/selftests/bpf/progs/test_uprobe.c b/tools/testing/selftests/bpf/progs/test_uprobe.c index 896c88a4960d..12f4065fca20 100644 --- a/tools/testing/selftests/bpf/progs/test_uprobe.c +++ b/tools/testing/selftests/bpf/progs/test_uprobe.c @@ -59,3 +59,41 @@ int BPF_UPROBE(test4) test4_result = 1; return 0; } + +#if defined(__TARGET_ARCH_x86) +struct pt_regs regs; + +SEC("uprobe") +int BPF_UPROBE(test_regs_change) +{ + pid_t pid = bpf_get_current_pid_tgid() >> 32; + + if (pid != my_pid) + return 0; + + ctx->ax = regs.ax; + ctx->cx = regs.cx; + ctx->dx = regs.dx; + ctx->r8 = regs.r8; + ctx->r9 = regs.r9; + ctx->r10 = regs.r10; + ctx->r11 = regs.r11; + ctx->di = regs.di; + ctx->si = regs.si; + return 0; +} + +unsigned long ip; + +SEC("uprobe") +int BPF_UPROBE(test_regs_change_ip) +{ + pid_t pid = bpf_get_current_pid_tgid() >> 32; + + if (pid != my_pid) + return 0; + + ctx->ip = ip; + return 0; +} +#endif diff --git a/tools/testing/selftests/bpf/progs/test_usdt.c b/tools/testing/selftests/bpf/progs/test_usdt.c index 096488f47fbc..a78c87537b07 100644 --- a/tools/testing/selftests/bpf/progs/test_usdt.c +++ b/tools/testing/selftests/bpf/progs/test_usdt.c @@ -107,4 +107,35 @@ int BPF_USDT(usdt12, int a1, int a2, long a3, long a4, unsigned a5, return 0; } +int usdt_sib_called; +u64 usdt_sib_cookie; +int usdt_sib_arg_cnt; +int usdt_sib_arg_ret; +short usdt_sib_arg; +int usdt_sib_arg_size; + +/* + * usdt_sib is only tested on x86-related architectures, so it requires + * manual attach since auto-attach will panic tests under other architectures + */ +SEC("usdt") +int usdt_sib(struct pt_regs *ctx) +{ + long tmp; + + if (my_pid != (bpf_get_current_pid_tgid() >> 32)) + return 0; + + __sync_fetch_and_add(&usdt_sib_called, 1); + + usdt_sib_cookie = bpf_usdt_cookie(ctx); + usdt_sib_arg_cnt = bpf_usdt_arg_cnt(ctx); + + usdt_sib_arg_ret = bpf_usdt_arg(ctx, 0, &tmp); + usdt_sib_arg = (short)tmp; + usdt_sib_arg_size = bpf_usdt_arg_size(ctx, 0); + + return 0; +} + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_xdp_devmap_tailcall.c b/tools/testing/selftests/bpf/progs/test_xdp_devmap_tailcall.c new file mode 100644 index 000000000000..814e2a980e97 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_xdp_devmap_tailcall.c @@ -0,0 +1,29 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> + +SEC("xdp") +int xdp_devmap(struct xdp_md *ctx) +{ + return ctx->egress_ifindex; +} + +struct { + __uint(type, BPF_MAP_TYPE_PROG_ARRAY); + __uint(max_entries, 1); + __uint(key_size, sizeof(__u32)); + __array(values, int (void *)); +} xdp_map SEC(".maps") = { + .values = { + [0] = (void *)&xdp_devmap, + }, +}; + +SEC("xdp") +int xdp_entry(struct xdp_md *ctx) +{ + bpf_tail_call(ctx, &xdp_map, 0); + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/test_xdp_meta.c b/tools/testing/selftests/bpf/progs/test_xdp_meta.c index fcf6ca14f2ea..d79cb74b571e 100644 --- a/tools/testing/selftests/bpf/progs/test_xdp_meta.c +++ b/tools/testing/selftests/bpf/progs/test_xdp_meta.c @@ -1,8 +1,11 @@ +#include <stdbool.h> #include <linux/bpf.h> +#include <linux/errno.h> #include <linux/if_ether.h> #include <linux/pkt_cls.h> #include <bpf/bpf_helpers.h> +#include "bpf_kfuncs.h" #define META_SIZE 32 @@ -23,6 +26,8 @@ struct { __uint(value_size, META_SIZE); } test_result SEC(".maps"); +bool test_pass; + SEC("tc") int ing_cls(struct __sk_buff *ctx) { @@ -40,6 +45,231 @@ int ing_cls(struct __sk_buff *ctx) return TC_ACT_SHOT; } +/* Read from metadata using bpf_dynptr_read helper */ +SEC("tc") +int ing_cls_dynptr_read(struct __sk_buff *ctx) +{ + struct bpf_dynptr meta; + const __u32 zero = 0; + __u8 *dst; + + dst = bpf_map_lookup_elem(&test_result, &zero); + if (!dst) + return TC_ACT_SHOT; + + bpf_dynptr_from_skb_meta(ctx, 0, &meta); + bpf_dynptr_read(dst, META_SIZE, &meta, 0, 0); + + return TC_ACT_SHOT; +} + +/* Write to metadata using bpf_dynptr_write helper */ +SEC("tc") +int ing_cls_dynptr_write(struct __sk_buff *ctx) +{ + struct bpf_dynptr data, meta; + __u8 *src; + + bpf_dynptr_from_skb(ctx, 0, &data); + src = bpf_dynptr_slice(&data, sizeof(struct ethhdr), NULL, META_SIZE); + if (!src) + return TC_ACT_SHOT; + + bpf_dynptr_from_skb_meta(ctx, 0, &meta); + bpf_dynptr_write(&meta, 0, src, META_SIZE, 0); + + return TC_ACT_UNSPEC; /* pass */ +} + +/* Read from metadata using read-only dynptr slice */ +SEC("tc") +int ing_cls_dynptr_slice(struct __sk_buff *ctx) +{ + struct bpf_dynptr meta; + const __u32 zero = 0; + __u8 *dst, *src; + + dst = bpf_map_lookup_elem(&test_result, &zero); + if (!dst) + return TC_ACT_SHOT; + + bpf_dynptr_from_skb_meta(ctx, 0, &meta); + src = bpf_dynptr_slice(&meta, 0, NULL, META_SIZE); + if (!src) + return TC_ACT_SHOT; + + __builtin_memcpy(dst, src, META_SIZE); + + return TC_ACT_SHOT; +} + +/* Write to metadata using writeable dynptr slice */ +SEC("tc") +int ing_cls_dynptr_slice_rdwr(struct __sk_buff *ctx) +{ + struct bpf_dynptr data, meta; + __u8 *src, *dst; + + bpf_dynptr_from_skb(ctx, 0, &data); + src = bpf_dynptr_slice(&data, sizeof(struct ethhdr), NULL, META_SIZE); + if (!src) + return TC_ACT_SHOT; + + bpf_dynptr_from_skb_meta(ctx, 0, &meta); + dst = bpf_dynptr_slice_rdwr(&meta, 0, NULL, META_SIZE); + if (!dst) + return TC_ACT_SHOT; + + __builtin_memcpy(dst, src, META_SIZE); + + return TC_ACT_UNSPEC; /* pass */ +} + +/* Read skb metadata in chunks from various offsets in different ways. */ +SEC("tc") +int ing_cls_dynptr_offset_rd(struct __sk_buff *ctx) +{ + struct bpf_dynptr meta; + const __u32 chunk_len = META_SIZE / 4; + const __u32 zero = 0; + __u8 *dst, *src; + + dst = bpf_map_lookup_elem(&test_result, &zero); + if (!dst) + return TC_ACT_SHOT; + + /* 1. Regular read */ + bpf_dynptr_from_skb_meta(ctx, 0, &meta); + bpf_dynptr_read(dst, chunk_len, &meta, 0, 0); + dst += chunk_len; + + /* 2. Read from an offset-adjusted dynptr */ + bpf_dynptr_adjust(&meta, chunk_len, bpf_dynptr_size(&meta)); + bpf_dynptr_read(dst, chunk_len, &meta, 0, 0); + dst += chunk_len; + + /* 3. Read at an offset */ + bpf_dynptr_read(dst, chunk_len, &meta, chunk_len, 0); + dst += chunk_len; + + /* 4. Read from a slice starting at an offset */ + src = bpf_dynptr_slice(&meta, 2 * chunk_len, NULL, chunk_len); + if (!src) + return TC_ACT_SHOT; + __builtin_memcpy(dst, src, chunk_len); + + return TC_ACT_SHOT; +} + +/* Write skb metadata in chunks at various offsets in different ways. */ +SEC("tc") +int ing_cls_dynptr_offset_wr(struct __sk_buff *ctx) +{ + const __u32 chunk_len = META_SIZE / 4; + __u8 payload[META_SIZE]; + struct bpf_dynptr meta; + __u8 *dst, *src; + + bpf_skb_load_bytes(ctx, sizeof(struct ethhdr), payload, sizeof(payload)); + src = payload; + + /* 1. Regular write */ + bpf_dynptr_from_skb_meta(ctx, 0, &meta); + bpf_dynptr_write(&meta, 0, src, chunk_len, 0); + src += chunk_len; + + /* 2. Write to an offset-adjusted dynptr */ + bpf_dynptr_adjust(&meta, chunk_len, bpf_dynptr_size(&meta)); + bpf_dynptr_write(&meta, 0, src, chunk_len, 0); + src += chunk_len; + + /* 3. Write at an offset */ + bpf_dynptr_write(&meta, chunk_len, src, chunk_len, 0); + src += chunk_len; + + /* 4. Write to a slice starting at an offset */ + dst = bpf_dynptr_slice_rdwr(&meta, 2 * chunk_len, NULL, chunk_len); + if (!dst) + return TC_ACT_SHOT; + __builtin_memcpy(dst, src, chunk_len); + + return TC_ACT_UNSPEC; /* pass */ +} + +/* Pass an OOB offset to dynptr read, write, adjust, slice. */ +SEC("tc") +int ing_cls_dynptr_offset_oob(struct __sk_buff *ctx) +{ + struct bpf_dynptr meta; + __u8 md, *p; + int err; + + err = bpf_dynptr_from_skb_meta(ctx, 0, &meta); + if (err) + goto fail; + + /* read offset OOB */ + err = bpf_dynptr_read(&md, sizeof(md), &meta, META_SIZE, 0); + if (err != -E2BIG) + goto fail; + + /* write offset OOB */ + err = bpf_dynptr_write(&meta, META_SIZE, &md, sizeof(md), 0); + if (err != -E2BIG) + goto fail; + + /* adjust end offset OOB */ + err = bpf_dynptr_adjust(&meta, 0, META_SIZE + 1); + if (err != -ERANGE) + goto fail; + + /* adjust start offset OOB */ + err = bpf_dynptr_adjust(&meta, META_SIZE + 1, META_SIZE + 1); + if (err != -ERANGE) + goto fail; + + /* slice offset OOB */ + p = bpf_dynptr_slice(&meta, META_SIZE, NULL, sizeof(*p)); + if (p) + goto fail; + + /* slice rdwr offset OOB */ + p = bpf_dynptr_slice_rdwr(&meta, META_SIZE, NULL, sizeof(*p)); + if (p) + goto fail; + + return TC_ACT_UNSPEC; +fail: + return TC_ACT_SHOT; +} + +/* Reserve and clear space for metadata but don't populate it */ +SEC("xdp") +int ing_xdp_zalloc_meta(struct xdp_md *ctx) +{ + struct ethhdr *eth = ctx_ptr(ctx, data); + __u8 *meta; + int ret; + + /* Drop any non-test packets */ + if (eth + 1 > ctx_ptr(ctx, data_end)) + return XDP_DROP; + if (eth->h_proto != 0) + return XDP_DROP; + + ret = bpf_xdp_adjust_meta(ctx, -META_SIZE); + if (ret < 0) + return XDP_DROP; + + meta = ctx_ptr(ctx, data_meta); + if (meta + META_SIZE > ctx_ptr(ctx, data)) + return XDP_DROP; + + __builtin_memset(meta, 0, META_SIZE); + + return XDP_PASS; +} + SEC("xdp") int ing_xdp(struct xdp_md *ctx) { @@ -73,4 +303,193 @@ int ing_xdp(struct xdp_md *ctx) return XDP_PASS; } +/* + * Check that skb->data_meta..skb->data is empty if prog writes to packet + * _payload_ using packet pointers. Applies only to cloned skbs. + */ +SEC("tc") +int clone_data_meta_empty_on_data_write(struct __sk_buff *ctx) +{ + struct ethhdr *eth = ctx_ptr(ctx, data); + + if (eth + 1 > ctx_ptr(ctx, data_end)) + goto out; + /* Ignore non-test packets */ + if (eth->h_proto != 0) + goto out; + + /* Expect no metadata */ + if (ctx->data_meta != ctx->data) + goto out; + + /* Packet write to trigger unclone in prologue */ + eth->h_proto = 42; + + test_pass = true; +out: + return TC_ACT_SHOT; +} + +/* + * Check that skb->data_meta..skb->data is empty if prog writes to packet + * _metadata_ using packet pointers. Applies only to cloned skbs. + */ +SEC("tc") +int clone_data_meta_empty_on_meta_write(struct __sk_buff *ctx) +{ + struct ethhdr *eth = ctx_ptr(ctx, data); + __u8 *md = ctx_ptr(ctx, data_meta); + + if (eth + 1 > ctx_ptr(ctx, data_end)) + goto out; + /* Ignore non-test packets */ + if (eth->h_proto != 0) + goto out; + + if (md + 1 > ctx_ptr(ctx, data)) { + /* Expect no metadata */ + test_pass = true; + } else { + /* Metadata write to trigger unclone in prologue */ + *md = 42; + } +out: + return TC_ACT_SHOT; +} + +/* + * Check that skb_meta dynptr is writable but empty if prog writes to packet + * _payload_ using a dynptr slice. Applies only to cloned skbs. + */ +SEC("tc") +int clone_dynptr_empty_on_data_slice_write(struct __sk_buff *ctx) +{ + struct bpf_dynptr data, meta; + struct ethhdr *eth; + + bpf_dynptr_from_skb(ctx, 0, &data); + eth = bpf_dynptr_slice_rdwr(&data, 0, NULL, sizeof(*eth)); + if (!eth) + goto out; + /* Ignore non-test packets */ + if (eth->h_proto != 0) + goto out; + + /* Expect no metadata */ + bpf_dynptr_from_skb_meta(ctx, 0, &meta); + if (bpf_dynptr_is_rdonly(&meta) || bpf_dynptr_size(&meta) > 0) + goto out; + + /* Packet write to trigger unclone in prologue */ + eth->h_proto = 42; + + test_pass = true; +out: + return TC_ACT_SHOT; +} + +/* + * Check that skb_meta dynptr is writable but empty if prog writes to packet + * _metadata_ using a dynptr slice. Applies only to cloned skbs. + */ +SEC("tc") +int clone_dynptr_empty_on_meta_slice_write(struct __sk_buff *ctx) +{ + struct bpf_dynptr data, meta; + const struct ethhdr *eth; + __u8 *md; + + bpf_dynptr_from_skb(ctx, 0, &data); + eth = bpf_dynptr_slice(&data, 0, NULL, sizeof(*eth)); + if (!eth) + goto out; + /* Ignore non-test packets */ + if (eth->h_proto != 0) + goto out; + + /* Expect no metadata */ + bpf_dynptr_from_skb_meta(ctx, 0, &meta); + if (bpf_dynptr_is_rdonly(&meta) || bpf_dynptr_size(&meta) > 0) + goto out; + + /* Metadata write to trigger unclone in prologue */ + bpf_dynptr_from_skb_meta(ctx, 0, &meta); + md = bpf_dynptr_slice_rdwr(&meta, 0, NULL, sizeof(*md)); + if (md) + *md = 42; + + test_pass = true; +out: + return TC_ACT_SHOT; +} + +/* + * Check that skb_meta dynptr is read-only before prog writes to packet payload + * using dynptr_write helper. Applies only to cloned skbs. + */ +SEC("tc") +int clone_dynptr_rdonly_before_data_dynptr_write(struct __sk_buff *ctx) +{ + struct bpf_dynptr data, meta; + const struct ethhdr *eth; + + bpf_dynptr_from_skb(ctx, 0, &data); + eth = bpf_dynptr_slice(&data, 0, NULL, sizeof(*eth)); + if (!eth) + goto out; + /* Ignore non-test packets */ + if (eth->h_proto != 0) + goto out; + + /* Expect read-only metadata before unclone */ + bpf_dynptr_from_skb_meta(ctx, 0, &meta); + if (!bpf_dynptr_is_rdonly(&meta) || bpf_dynptr_size(&meta) != META_SIZE) + goto out; + + /* Helper write to payload will unclone the packet */ + bpf_dynptr_write(&data, offsetof(struct ethhdr, h_proto), "x", 1, 0); + + /* Expect no metadata after unclone */ + bpf_dynptr_from_skb_meta(ctx, 0, &meta); + if (bpf_dynptr_is_rdonly(&meta) || bpf_dynptr_size(&meta) != 0) + goto out; + + test_pass = true; +out: + return TC_ACT_SHOT; +} + +/* + * Check that skb_meta dynptr is read-only if prog writes to packet + * metadata using dynptr_write helper. Applies only to cloned skbs. + */ +SEC("tc") +int clone_dynptr_rdonly_before_meta_dynptr_write(struct __sk_buff *ctx) +{ + struct bpf_dynptr data, meta; + const struct ethhdr *eth; + + bpf_dynptr_from_skb(ctx, 0, &data); + eth = bpf_dynptr_slice(&data, 0, NULL, sizeof(*eth)); + if (!eth) + goto out; + /* Ignore non-test packets */ + if (eth->h_proto != 0) + goto out; + + /* Expect read-only metadata */ + bpf_dynptr_from_skb_meta(ctx, 0, &meta); + if (!bpf_dynptr_is_rdonly(&meta) || bpf_dynptr_size(&meta) != META_SIZE) + goto out; + + /* Metadata write. Expect failure. */ + bpf_dynptr_from_skb_meta(ctx, 0, &meta); + if (bpf_dynptr_write(&meta, 0, "x", 1, 0) != -EINVAL) + goto out; + + test_pass = true; +out: + return TC_ACT_SHOT; +} + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_xdp_pull_data.c b/tools/testing/selftests/bpf/progs/test_xdp_pull_data.c new file mode 100644 index 000000000000..c41a21413eaa --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_xdp_pull_data.c @@ -0,0 +1,48 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> + +int xdpf_sz; +int sinfo_sz; +int data_len; +int pull_len; + +#define XDP_PACKET_HEADROOM 256 + +SEC("xdp.frags") +int xdp_find_sizes(struct xdp_md *ctx) +{ + xdpf_sz = sizeof(struct xdp_frame); + sinfo_sz = __PAGE_SIZE - XDP_PACKET_HEADROOM - + (ctx->data_end - ctx->data); + + return XDP_PASS; +} + +SEC("xdp.frags") +int xdp_pull_data_prog(struct xdp_md *ctx) +{ + __u8 *data_end = (void *)(long)ctx->data_end; + __u8 *data = (void *)(long)ctx->data; + __u8 *val_p; + int err; + + if (data_len != data_end - data) + return XDP_DROP; + + err = bpf_xdp_pull_data(ctx, pull_len); + if (err) + return XDP_DROP; + + val_p = (void *)(long)ctx->data + 1024; + if (val_p + 1 > (void *)(long)ctx->data_end) + return XDP_DROP; + + if (*val_p != 0xbb) + return XDP_DROP; + + return XDP_PASS; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/timer_interrupt.c b/tools/testing/selftests/bpf/progs/timer_interrupt.c new file mode 100644 index 000000000000..19180a455f40 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/timer_interrupt.c @@ -0,0 +1,48 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <vmlinux.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> +#include "bpf_experimental.h" + +char _license[] SEC("license") = "GPL"; + +#define CLOCK_MONOTONIC 1 + +int preempt_count; +int in_interrupt; +int in_interrupt_cb; + +struct elem { + struct bpf_timer t; +}; + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 1); + __type(key, int); + __type(value, struct elem); +} array SEC(".maps"); + +static int timer_in_interrupt(void *map, int *key, struct bpf_timer *timer) +{ + preempt_count = get_preempt_count(); + in_interrupt_cb = bpf_in_interrupt(); + return 0; +} + +SEC("fentry/bpf_fentry_test1") +int BPF_PROG(test_timer_interrupt) +{ + struct bpf_timer *timer; + int key = 0; + + timer = bpf_map_lookup_elem(&array, &key); + if (!timer) + return 0; + + in_interrupt = bpf_in_interrupt(); + bpf_timer_init(timer, &array, CLOCK_MONOTONIC); + bpf_timer_set_callback(timer, timer_in_interrupt); + bpf_timer_start(timer, 0, 0); + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/tracing_struct.c b/tools/testing/selftests/bpf/progs/tracing_struct.c index c435a3a8328a..d460732e2023 100644 --- a/tools/testing/selftests/bpf/progs/tracing_struct.c +++ b/tools/testing/selftests/bpf/progs/tracing_struct.c @@ -18,6 +18,18 @@ struct bpf_testmod_struct_arg_3 { int b[]; }; +union bpf_testmod_union_arg_1 { + char a; + short b; + struct bpf_testmod_struct_arg_1 arg; +}; + +union bpf_testmod_union_arg_2 { + int a; + long b; + struct bpf_testmod_struct_arg_2 arg; +}; + long t1_a_a, t1_a_b, t1_b, t1_c, t1_ret, t1_nregs; __u64 t1_reg0, t1_reg1, t1_reg2, t1_reg3; long t2_a, t2_b_a, t2_b_b, t2_c, t2_ret; @@ -26,6 +38,9 @@ long t4_a_a, t4_b, t4_c, t4_d, t4_e_a, t4_e_b, t4_ret; long t5_ret; int t6; +long ut1_a_a, ut1_b, ut1_c; +long ut2_a, ut2_b_a, ut2_b_b; + SEC("fentry/bpf_testmod_test_struct_arg_1") int BPF_PROG2(test_struct_arg_1, struct bpf_testmod_struct_arg_2, a, int, b, int, c) { @@ -130,4 +145,22 @@ int BPF_PROG2(test_struct_arg_11, struct bpf_testmod_struct_arg_3 *, a) return 0; } +SEC("fexit/bpf_testmod_test_union_arg_1") +int BPF_PROG2(test_union_arg_1, union bpf_testmod_union_arg_1, a, int, b, int, c) +{ + ut1_a_a = a.arg.a; + ut1_b = b; + ut1_c = c; + return 0; +} + +SEC("fexit/bpf_testmod_test_union_arg_2") +int BPF_PROG2(test_union_arg_2, int, a, union bpf_testmod_union_arg_2, b) +{ + ut2_a = a; + ut2_b_a = b.arg.a; + ut2_b_b = b.arg.b; + return 0; +} + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/trigger_bench.c b/tools/testing/selftests/bpf/progs/trigger_bench.c index 044a6d78923e..3d5f30c29ae3 100644 --- a/tools/testing/selftests/bpf/progs/trigger_bench.c +++ b/tools/testing/selftests/bpf/progs/trigger_bench.c @@ -97,6 +97,12 @@ int bench_trigger_kprobe_multi(void *ctx) return 0; } +SEC("?kprobe.multi/bpf_get_numa_node_id") +int bench_kprobe_multi_empty(void *ctx) +{ + return 0; +} + SEC("?kretprobe.multi/bpf_get_numa_node_id") int bench_trigger_kretprobe_multi(void *ctx) { @@ -104,6 +110,12 @@ int bench_trigger_kretprobe_multi(void *ctx) return 0; } +SEC("?kretprobe.multi/bpf_get_numa_node_id") +int bench_kretprobe_multi_empty(void *ctx) +{ + return 0; +} + SEC("?fentry/bpf_get_numa_node_id") int bench_trigger_fentry(void *ctx) { diff --git a/tools/testing/selftests/bpf/progs/uprobe_syscall.c b/tools/testing/selftests/bpf/progs/uprobe_syscall.c index 8a4fa6c7ef59..e08c31669e5a 100644 --- a/tools/testing/selftests/bpf/progs/uprobe_syscall.c +++ b/tools/testing/selftests/bpf/progs/uprobe_syscall.c @@ -7,8 +7,8 @@ struct pt_regs regs; char _license[] SEC("license") = "GPL"; -SEC("uretprobe//proc/self/exe:uretprobe_regs_trigger") -int uretprobe(struct pt_regs *ctx) +SEC("uprobe") +int probe(struct pt_regs *ctx) { __builtin_memcpy(®s, ctx, sizeof(regs)); return 0; diff --git a/tools/testing/selftests/bpf/progs/uprobe_syscall_executed.c b/tools/testing/selftests/bpf/progs/uprobe_syscall_executed.c index 0d7f1a7db2e2..915d38591bf6 100644 --- a/tools/testing/selftests/bpf/progs/uprobe_syscall_executed.c +++ b/tools/testing/selftests/bpf/progs/uprobe_syscall_executed.c @@ -1,6 +1,8 @@ // SPDX-License-Identifier: GPL-2.0 #include "vmlinux.h" #include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> +#include <bpf/usdt.bpf.h> #include <string.h> struct pt_regs regs; @@ -8,10 +10,64 @@ struct pt_regs regs; char _license[] SEC("license") = "GPL"; int executed = 0; +int pid; + +SEC("uprobe") +int BPF_UPROBE(test_uprobe) +{ + if (bpf_get_current_pid_tgid() >> 32 != pid) + return 0; + + executed++; + return 0; +} + +SEC("uretprobe") +int BPF_URETPROBE(test_uretprobe) +{ + if (bpf_get_current_pid_tgid() >> 32 != pid) + return 0; + + executed++; + return 0; +} + +SEC("uprobe.multi") +int test_uprobe_multi(struct pt_regs *ctx) +{ + if (bpf_get_current_pid_tgid() >> 32 != pid) + return 0; + + executed++; + return 0; +} SEC("uretprobe.multi") -int test(struct pt_regs *regs) +int test_uretprobe_multi(struct pt_regs *ctx) +{ + if (bpf_get_current_pid_tgid() >> 32 != pid) + return 0; + + executed++; + return 0; +} + +SEC("uprobe.session") +int test_uprobe_session(struct pt_regs *ctx) { - executed = 1; + if (bpf_get_current_pid_tgid() >> 32 != pid) + return 0; + + executed++; + return 0; +} + +SEC("usdt") +int test_usdt(struct pt_regs *ctx) +{ + if (bpf_get_current_pid_tgid() >> 32 != pid) + return 0; + + executed++; return 0; } diff --git a/tools/testing/selftests/bpf/progs/uretprobe_stack.c b/tools/testing/selftests/bpf/progs/uretprobe_stack.c index 9fdcf396b8f4..a2951e2f1711 100644 --- a/tools/testing/selftests/bpf/progs/uretprobe_stack.c +++ b/tools/testing/selftests/bpf/progs/uretprobe_stack.c @@ -26,8 +26,8 @@ int usdt_len; SEC("uprobe//proc/self/exe:target_1") int BPF_UPROBE(uprobe_1) { - /* target_1 is recursive wit depth of 2, so we capture two separate - * stack traces, depending on which occurence it is + /* target_1 is recursive with depth of 2, so we capture two separate + * stack traces, depending on which occurrence it is */ static bool recur = false; diff --git a/tools/testing/selftests/bpf/progs/verifier_arena_large.c b/tools/testing/selftests/bpf/progs/verifier_arena_large.c index 9dbdf123542d..f19e15400b3e 100644 --- a/tools/testing/selftests/bpf/progs/verifier_arena_large.c +++ b/tools/testing/selftests/bpf/progs/verifier_arena_large.c @@ -240,6 +240,7 @@ int big_alloc2(void *ctx) return 5; bpf_arena_free_pages(&arena, (void __arena *)pg, 2); page[i] = NULL; + barrier(); page[i + 1] = NULL; cond_break; } diff --git a/tools/testing/selftests/bpf/progs/verifier_bounds.c b/tools/testing/selftests/bpf/progs/verifier_bounds.c index 87a2c60d86e6..0a72e0228ea9 100644 --- a/tools/testing/selftests/bpf/progs/verifier_bounds.c +++ b/tools/testing/selftests/bpf/progs/verifier_bounds.c @@ -926,7 +926,7 @@ l1_%=: r0 = 0; \ SEC("socket") __description("bounds check for non const xor src dst") __success __log_level(2) -__msg("5: (af) r0 ^= r6 ; R0_w=scalar(smin=smin32=0,smax=umax=smax32=umax32=431,var_off=(0x0; 0x1af))") +__msg("5: (af) r0 ^= r6 ; R0=scalar(smin=smin32=0,smax=umax=smax32=umax32=431,var_off=(0x0; 0x1af))") __naked void non_const_xor_src_dst(void) { asm volatile (" \ @@ -947,7 +947,7 @@ __naked void non_const_xor_src_dst(void) SEC("socket") __description("bounds check for non const or src dst") __success __log_level(2) -__msg("5: (4f) r0 |= r6 ; R0_w=scalar(smin=smin32=0,smax=umax=smax32=umax32=431,var_off=(0x0; 0x1af))") +__msg("5: (4f) r0 |= r6 ; R0=scalar(smin=smin32=0,smax=umax=smax32=umax32=431,var_off=(0x0; 0x1af))") __naked void non_const_or_src_dst(void) { asm volatile (" \ @@ -968,7 +968,7 @@ __naked void non_const_or_src_dst(void) SEC("socket") __description("bounds check for non const mul regs") __success __log_level(2) -__msg("5: (2f) r0 *= r6 ; R0_w=scalar(smin=smin32=0,smax=umax=smax32=umax32=3825,var_off=(0x0; 0xfff))") +__msg("5: (2f) r0 *= r6 ; R0=scalar(smin=smin32=0,smax=umax=smax32=umax32=3825,var_off=(0x0; 0xfff))") __naked void non_const_mul_regs(void) { asm volatile (" \ @@ -1241,7 +1241,7 @@ l0_%=: r0 = 0; \ SEC("tc") __description("multiply mixed sign bounds. test 1") __success __log_level(2) -__msg("r6 *= r7 {{.*}}; R6_w=scalar(smin=umin=0x1bc16d5cd4927ee1,smax=umax=0x1bc16d674ec80000,smax32=0x7ffffeff,umax32=0xfffffeff,var_off=(0x1bc16d4000000000; 0x3ffffffeff))") +__msg("r6 *= r7 {{.*}}; R6=scalar(smin=umin=0x1bc16d5cd4927ee1,smax=umax=0x1bc16d674ec80000,smax32=0x7ffffeff,umax32=0xfffffeff,var_off=(0x1bc16d4000000000; 0x3ffffffeff))") __naked void mult_mixed0_sign(void) { asm volatile ( @@ -1264,7 +1264,7 @@ __naked void mult_mixed0_sign(void) SEC("tc") __description("multiply mixed sign bounds. test 2") __success __log_level(2) -__msg("r6 *= r7 {{.*}}; R6_w=scalar(smin=smin32=-100,smax=smax32=200)") +__msg("r6 *= r7 {{.*}}; R6=scalar(smin=smin32=-100,smax=smax32=200)") __naked void mult_mixed1_sign(void) { asm volatile ( @@ -1287,7 +1287,7 @@ __naked void mult_mixed1_sign(void) SEC("tc") __description("multiply negative bounds") __success __log_level(2) -__msg("r6 *= r7 {{.*}}; R6_w=scalar(smin=umin=smin32=umin32=0x3ff280b0,smax=umax=smax32=umax32=0x3fff0001,var_off=(0x3ff00000; 0xf81ff))") +__msg("r6 *= r7 {{.*}}; R6=scalar(smin=umin=smin32=umin32=0x3ff280b0,smax=umax=smax32=umax32=0x3fff0001,var_off=(0x3ff00000; 0xf81ff))") __naked void mult_sign_bounds(void) { asm volatile ( @@ -1311,7 +1311,7 @@ __naked void mult_sign_bounds(void) SEC("tc") __description("multiply bounds that don't cross signed boundary") __success __log_level(2) -__msg("r8 *= r6 {{.*}}; R6_w=scalar(smin=smin32=0,smax=umax=smax32=umax32=11,var_off=(0x0; 0xb)) R8_w=scalar(smin=0,smax=umax=0x7b96bb0a94a3a7cd,var_off=(0x0; 0x7fffffffffffffff))") +__msg("r8 *= r6 {{.*}}; R6=scalar(smin=smin32=0,smax=umax=smax32=umax32=11,var_off=(0x0; 0xb)) R8=scalar(smin=0,smax=umax=0x7b96bb0a94a3a7cd,var_off=(0x0; 0x7fffffffffffffff))") __naked void mult_no_sign_crossing(void) { asm volatile ( @@ -1331,7 +1331,7 @@ __naked void mult_no_sign_crossing(void) SEC("tc") __description("multiplication overflow, result in unbounded reg. test 1") __success __log_level(2) -__msg("r6 *= r7 {{.*}}; R6_w=scalar()") +__msg("r6 *= r7 {{.*}}; R6=scalar()") __naked void mult_unsign_ovf(void) { asm volatile ( @@ -1353,7 +1353,7 @@ __naked void mult_unsign_ovf(void) SEC("tc") __description("multiplication overflow, result in unbounded reg. test 2") __success __log_level(2) -__msg("r6 *= r7 {{.*}}; R6_w=scalar()") +__msg("r6 *= r7 {{.*}}; R6=scalar()") __naked void mult_sign_ovf(void) { asm volatile ( @@ -1376,7 +1376,7 @@ __naked void mult_sign_ovf(void) SEC("socket") __description("64-bit addition, all outcomes overflow") __success __log_level(2) -__msg("5: (0f) r3 += r3 {{.*}} R3_w=scalar(umin=0x4000000000000000,umax=0xfffffffffffffffe)") +__msg("5: (0f) r3 += r3 {{.*}} R3=scalar(umin=0x4000000000000000,umax=0xfffffffffffffffe)") __retval(0) __naked void add64_full_overflow(void) { @@ -1396,7 +1396,7 @@ __naked void add64_full_overflow(void) SEC("socket") __description("64-bit addition, partial overflow, result in unbounded reg") __success __log_level(2) -__msg("4: (0f) r3 += r3 {{.*}} R3_w=scalar()") +__msg("4: (0f) r3 += r3 {{.*}} R3=scalar()") __retval(0) __naked void add64_partial_overflow(void) { @@ -1416,7 +1416,7 @@ __naked void add64_partial_overflow(void) SEC("socket") __description("32-bit addition overflow, all outcomes overflow") __success __log_level(2) -__msg("4: (0c) w3 += w3 {{.*}} R3_w=scalar(smin=umin=umin32=0x40000000,smax=umax=umax32=0xfffffffe,var_off=(0x0; 0xffffffff))") +__msg("4: (0c) w3 += w3 {{.*}} R3=scalar(smin=umin=umin32=0x40000000,smax=umax=umax32=0xfffffffe,var_off=(0x0; 0xffffffff))") __retval(0) __naked void add32_full_overflow(void) { @@ -1436,7 +1436,7 @@ __naked void add32_full_overflow(void) SEC("socket") __description("32-bit addition, partial overflow, result in unbounded u32 bounds") __success __log_level(2) -__msg("4: (0c) w3 += w3 {{.*}} R3_w=scalar(smin=0,smax=umax=0xffffffff,var_off=(0x0; 0xffffffff))") +__msg("4: (0c) w3 += w3 {{.*}} R3=scalar(smin=0,smax=umax=0xffffffff,var_off=(0x0; 0xffffffff))") __retval(0) __naked void add32_partial_overflow(void) { @@ -1456,7 +1456,7 @@ __naked void add32_partial_overflow(void) SEC("socket") __description("64-bit subtraction, all outcomes underflow") __success __log_level(2) -__msg("6: (1f) r3 -= r1 {{.*}} R3_w=scalar(umin=1,umax=0x8000000000000000)") +__msg("6: (1f) r3 -= r1 {{.*}} R3=scalar(umin=1,umax=0x8000000000000000)") __retval(0) __naked void sub64_full_overflow(void) { @@ -1477,7 +1477,7 @@ __naked void sub64_full_overflow(void) SEC("socket") __description("64-bit subtraction, partial overflow, result in unbounded reg") __success __log_level(2) -__msg("3: (1f) r3 -= r2 {{.*}} R3_w=scalar()") +__msg("3: (1f) r3 -= r2 {{.*}} R3=scalar()") __retval(0) __naked void sub64_partial_overflow(void) { @@ -1496,7 +1496,7 @@ __naked void sub64_partial_overflow(void) SEC("socket") __description("32-bit subtraction overflow, all outcomes underflow") __success __log_level(2) -__msg("5: (1c) w3 -= w1 {{.*}} R3_w=scalar(smin=umin=umin32=1,smax=umax=umax32=0x80000000,var_off=(0x0; 0xffffffff))") +__msg("5: (1c) w3 -= w1 {{.*}} R3=scalar(smin=umin=umin32=1,smax=umax=umax32=0x80000000,var_off=(0x0; 0xffffffff))") __retval(0) __naked void sub32_full_overflow(void) { @@ -1517,7 +1517,7 @@ __naked void sub32_full_overflow(void) SEC("socket") __description("32-bit subtraction, partial overflow, result in unbounded u32 bounds") __success __log_level(2) -__msg("3: (1c) w3 -= w2 {{.*}} R3_w=scalar(smin=0,smax=umax=0xffffffff,var_off=(0x0; 0xffffffff))") +__msg("3: (1c) w3 -= w2 {{.*}} R3=scalar(smin=0,smax=umax=0xffffffff,var_off=(0x0; 0xffffffff))") __retval(0) __naked void sub32_partial_overflow(void) { @@ -1617,7 +1617,7 @@ l0_%=: r0 = 0; \ SEC("socket") __description("bounds deduction cross sign boundary, positive overlap") __success __log_level(2) __flag(BPF_F_TEST_REG_INVARIANTS) -__msg("3: (2d) if r0 > r1 {{.*}} R0_w=scalar(smin=smin32=0,smax=umax=smax32=umax32=127,var_off=(0x0; 0x7f))") +__msg("3: (2d) if r0 > r1 {{.*}} R0=scalar(smin=smin32=0,smax=umax=smax32=umax32=127,var_off=(0x0; 0x7f))") __retval(0) __naked void bounds_deduct_positive_overlap(void) { @@ -1650,7 +1650,7 @@ l0_%=: r0 = 0; \ SEC("socket") __description("bounds deduction cross sign boundary, two overlaps") __failure __flag(BPF_F_TEST_REG_INVARIANTS) -__msg("3: (2d) if r0 > r1 {{.*}} R0_w=scalar(smin=smin32=-128,smax=smax32=127,umax=0xffffffffffffff80)") +__msg("3: (2d) if r0 > r1 {{.*}} R0=scalar(smin=smin32=-128,smax=smax32=127,umax=0xffffffffffffff80)") __msg("frame pointer is read only") __naked void bounds_deduct_two_overlaps(void) { @@ -1668,4 +1668,45 @@ l0_%=: r0 = 0; \ : __clobber_all); } +SEC("socket") +__description("dead jne branch due to disagreeing tnums") +__success __log_level(2) +__naked void jne_disagreeing_tnums(void *ctx) +{ + asm volatile(" \ + call %[bpf_get_prandom_u32]; \ + w0 = w0; \ + r0 >>= 30; \ + r0 <<= 30; \ + r1 = r0; \ + r1 += 1024; \ + if r1 != r0 goto +1; \ + r10 = 0; \ + exit; \ +" : + : __imm(bpf_get_prandom_u32) + : __clobber_all); +} + +SEC("socket") +__description("dead jeq branch due to disagreeing tnums") +__success __log_level(2) +__naked void jeq_disagreeing_tnums(void *ctx) +{ + asm volatile(" \ + call %[bpf_get_prandom_u32]; \ + w0 = w0; \ + r0 >>= 30; \ + r0 <<= 30; \ + r1 = r0; \ + r1 += 1024; \ + if r1 == r0 goto +1; \ + exit; \ + r10 = 0; \ + exit; \ +" : + : __imm(bpf_get_prandom_u32) + : __clobber_all); +} + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/verifier_bpf_fastcall.c b/tools/testing/selftests/bpf/progs/verifier_bpf_fastcall.c index c258b0722e04..fb4fa465d67c 100644 --- a/tools/testing/selftests/bpf/progs/verifier_bpf_fastcall.c +++ b/tools/testing/selftests/bpf/progs/verifier_bpf_fastcall.c @@ -660,19 +660,24 @@ __naked void may_goto_interaction_x86_64(void) SEC("raw_tp") __arch_arm64 -__log_level(4) __msg("stack depth 16") -/* may_goto counter at -16 */ -__xlated("0: *(u64 *)(r10 -16) =") -__xlated("1: r1 = 1") -__xlated("2: call bpf_get_smp_processor_id") +__log_level(4) __msg("stack depth 24") +/* may_goto counter at -24 */ +__xlated("0: *(u64 *)(r10 -24) =") +/* may_goto timestamp at -16 */ +__xlated("1: *(u64 *)(r10 -16) =") +__xlated("2: r1 = 1") +__xlated("3: call bpf_get_smp_processor_id") /* may_goto expansion starts */ -__xlated("3: r11 = *(u64 *)(r10 -16)") -__xlated("4: if r11 == 0x0 goto pc+3") -__xlated("5: r11 -= 1") -__xlated("6: *(u64 *)(r10 -16) = r11") +__xlated("4: r11 = *(u64 *)(r10 -24)") +__xlated("5: if r11 == 0x0 goto pc+6") +__xlated("6: r11 -= 1") +__xlated("7: if r11 != 0x0 goto pc+2") +__xlated("8: r11 = -24") +__xlated("9: call unknown") +__xlated("10: *(u64 *)(r10 -24) = r11") /* may_goto expansion ends */ -__xlated("7: *(u64 *)(r10 -8) = r1") -__xlated("8: exit") +__xlated("11: *(u64 *)(r10 -8) = r1") +__xlated("12: exit") __success __naked void may_goto_interaction_arm64(void) { diff --git a/tools/testing/selftests/bpf/progs/verifier_ctx.c b/tools/testing/selftests/bpf/progs/verifier_ctx.c index 424463094760..5ebf7d9bcc55 100644 --- a/tools/testing/selftests/bpf/progs/verifier_ctx.c +++ b/tools/testing/selftests/bpf/progs/verifier_ctx.c @@ -5,8 +5,6 @@ #include <bpf/bpf_helpers.h> #include "bpf_misc.h" -#define sizeof_field(TYPE, MEMBER) sizeof((((TYPE *)0)->MEMBER)) - SEC("tc") __description("context stores via BPF_ATOMIC") __failure __msg("BPF_ATOMIC stores into R1 ctx is not allowed") @@ -264,4 +262,34 @@ narrow_load("sockops", bpf_sock_ops, skb_hwtstamp); unaligned_access("flow_dissector", __sk_buff, data); unaligned_access("netfilter", bpf_nf_ctx, skb); +#define padding_access(type, ctx, prev_field, sz) \ + SEC(type) \ + __description("access on " #ctx " padding after " #prev_field) \ + __naked void padding_ctx_access_##ctx(void) \ + { \ + asm volatile (" \ + r1 = *(u%[size] *)(r1 + %[off]); \ + r0 = 0; \ + exit;" \ + : \ + : __imm_const(size, sz * 8), \ + __imm_const(off, offsetofend(struct ctx, prev_field)) \ + : __clobber_all); \ + } + +__failure __msg("invalid bpf_context access") +padding_access("cgroup/bind4", bpf_sock_addr, msg_src_ip6[3], 4); + +__success +padding_access("sk_lookup", bpf_sk_lookup, remote_port, 2); + +__failure __msg("invalid bpf_context access") +padding_access("tc", __sk_buff, tstamp_type, 2); + +__failure __msg("invalid bpf_context access") +padding_access("cgroup/post_bind4", bpf_sock, dst_port, 2); + +__failure __msg("invalid bpf_context access") +padding_access("sk_reuseport", sk_reuseport_md, hash, 4); + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/verifier_global_ptr_args.c b/tools/testing/selftests/bpf/progs/verifier_global_ptr_args.c index 181da86ba5f0..6630a92b1b47 100644 --- a/tools/testing/selftests/bpf/progs/verifier_global_ptr_args.c +++ b/tools/testing/selftests/bpf/progs/verifier_global_ptr_args.c @@ -215,7 +215,7 @@ __weak int subprog_untrusted(const volatile struct task_struct *restrict task __ SEC("tp_btf/sys_enter") __success __log_level(2) -__msg("r1 = {{.*}}; {{.*}}R1_w=trusted_ptr_task_struct()") +__msg("r1 = {{.*}}; {{.*}}R1=trusted_ptr_task_struct()") __msg("Func#1 ('subprog_untrusted') is global and assumed valid.") __msg("Validating subprog_untrusted() func#1...") __msg(": R1=untrusted_ptr_task_struct") @@ -278,7 +278,7 @@ __weak int subprog_enum_untrusted(enum bpf_attach_type *p __arg_untrusted) SEC("tp_btf/sys_enter") __success __log_level(2) -__msg("r1 = {{.*}}; {{.*}}R1_w=trusted_ptr_task_struct()") +__msg("r1 = {{.*}}; {{.*}}R1=trusted_ptr_task_struct()") __msg("Func#1 ('subprog_void_untrusted') is global and assumed valid.") __msg("Validating subprog_void_untrusted() func#1...") __msg(": R1=rdonly_untrusted_mem(sz=0)") diff --git a/tools/testing/selftests/bpf/progs/verifier_ldsx.c b/tools/testing/selftests/bpf/progs/verifier_ldsx.c index 52edee41caf6..c8494b682c31 100644 --- a/tools/testing/selftests/bpf/progs/verifier_ldsx.c +++ b/tools/testing/selftests/bpf/progs/verifier_ldsx.c @@ -3,6 +3,7 @@ #include <linux/bpf.h> #include <bpf/bpf_helpers.h> #include "bpf_misc.h" +#include "bpf_arena_common.h" #if (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86) || \ (defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64) || \ @@ -10,6 +11,12 @@ defined(__TARGET_ARCH_loongarch)) && \ __clang_major__ >= 18 +struct { + __uint(type, BPF_MAP_TYPE_ARENA); + __uint(map_flags, BPF_F_MMAPABLE); + __uint(max_entries, 1); +} arena SEC(".maps"); + SEC("socket") __description("LDSX, S8") __success __success_unpriv __retval(-2) @@ -65,7 +72,7 @@ __naked void ldsx_s32(void) SEC("socket") __description("LDSX, S8 range checking, privileged") __log_level(2) __success __retval(1) -__msg("R1_w=scalar(smin=smin32=-128,smax=smax32=127)") +__msg("R1=scalar(smin=smin32=-128,smax=smax32=127)") __naked void ldsx_s8_range_priv(void) { asm volatile ( @@ -256,6 +263,175 @@ __naked void ldsx_ctx_8(void) : __clobber_all); } +SEC("syscall") +__description("Arena LDSX Disasm") +__success +__arch_x86_64 +__jited("movslq 0x10(%rax,%r12), %r14") +__jited("movswq 0x18(%rax,%r12), %r14") +__jited("movsbq 0x20(%rax,%r12), %r14") +__jited("movslq 0x10(%rdi,%r12), %r15") +__jited("movswq 0x18(%rdi,%r12), %r15") +__jited("movsbq 0x20(%rdi,%r12), %r15") +__arch_arm64 +__jited("add x11, x7, x28") +__jited("ldrsw x21, [x11, #0x10]") +__jited("add x11, x7, x28") +__jited("ldrsh x21, [x11, #0x18]") +__jited("add x11, x7, x28") +__jited("ldrsb x21, [x11, #0x20]") +__jited("add x11, x0, x28") +__jited("ldrsw x22, [x11, #0x10]") +__jited("add x11, x0, x28") +__jited("ldrsh x22, [x11, #0x18]") +__jited("add x11, x0, x28") +__jited("ldrsb x22, [x11, #0x20]") +__naked void arena_ldsx_disasm(void *ctx) +{ + asm volatile ( + "r1 = %[arena] ll;" + "r2 = 0;" + "r3 = 1;" + "r4 = %[numa_no_node];" + "r5 = 0;" + "call %[bpf_arena_alloc_pages];" + "r0 = addr_space_cast(r0, 0x0, 0x1);" + "r1 = r0;" + "r8 = *(s32 *)(r0 + 16);" + "r8 = *(s16 *)(r0 + 24);" + "r8 = *(s8 *)(r0 + 32);" + "r9 = *(s32 *)(r1 + 16);" + "r9 = *(s16 *)(r1 + 24);" + "r9 = *(s8 *)(r1 + 32);" + "r0 = 0;" + "exit;" + :: __imm(bpf_arena_alloc_pages), + __imm_addr(arena), + __imm_const(numa_no_node, NUMA_NO_NODE) + : __clobber_all + ); +} + +SEC("syscall") +__description("Arena LDSX Exception") +__success __retval(0) +__arch_x86_64 +__arch_arm64 +__naked void arena_ldsx_exception(void *ctx) +{ + asm volatile ( + "r1 = %[arena] ll;" + "r0 = 0xdeadbeef;" + "r0 = addr_space_cast(r0, 0x0, 0x1);" + "r1 = 0x3fe;" + "*(u64 *)(r0 + 0) = r1;" + "r0 = *(s8 *)(r0 + 0);" + "exit;" + : + : __imm_addr(arena) + : __clobber_all + ); +} + +SEC("syscall") +__description("Arena LDSX, S8") +__success __retval(-1) +__arch_x86_64 +__arch_arm64 +__naked void arena_ldsx_s8(void *ctx) +{ + asm volatile ( + "r1 = %[arena] ll;" + "r2 = 0;" + "r3 = 1;" + "r4 = %[numa_no_node];" + "r5 = 0;" + "call %[bpf_arena_alloc_pages];" + "r0 = addr_space_cast(r0, 0x0, 0x1);" + "r1 = 0x3fe;" + "*(u64 *)(r0 + 0) = r1;" +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + "r0 = *(s8 *)(r0 + 0);" +#else + "r0 = *(s8 *)(r0 + 7);" +#endif + "r0 >>= 1;" + "exit;" + :: __imm(bpf_arena_alloc_pages), + __imm_addr(arena), + __imm_const(numa_no_node, NUMA_NO_NODE) + : __clobber_all + ); +} + +SEC("syscall") +__description("Arena LDSX, S16") +__success __retval(-1) +__arch_x86_64 +__arch_arm64 +__naked void arena_ldsx_s16(void *ctx) +{ + asm volatile ( + "r1 = %[arena] ll;" + "r2 = 0;" + "r3 = 1;" + "r4 = %[numa_no_node];" + "r5 = 0;" + "call %[bpf_arena_alloc_pages];" + "r0 = addr_space_cast(r0, 0x0, 0x1);" + "r1 = 0x3fffe;" + "*(u64 *)(r0 + 0) = r1;" +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + "r0 = *(s16 *)(r0 + 0);" +#else + "r0 = *(s16 *)(r0 + 6);" +#endif + "r0 >>= 1;" + "exit;" + :: __imm(bpf_arena_alloc_pages), + __imm_addr(arena), + __imm_const(numa_no_node, NUMA_NO_NODE) + : __clobber_all + ); +} + +SEC("syscall") +__description("Arena LDSX, S32") +__success __retval(-1) +__arch_x86_64 +__arch_arm64 +__naked void arena_ldsx_s32(void *ctx) +{ + asm volatile ( + "r1 = %[arena] ll;" + "r2 = 0;" + "r3 = 1;" + "r4 = %[numa_no_node];" + "r5 = 0;" + "call %[bpf_arena_alloc_pages];" + "r0 = addr_space_cast(r0, 0x0, 0x1);" + "r1 = 0xfffffffe;" + "*(u64 *)(r0 + 0) = r1;" +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + "r0 = *(s32 *)(r0 + 0);" +#else + "r0 = *(s32 *)(r0 + 4);" +#endif + "r0 >>= 1;" + "exit;" + :: __imm(bpf_arena_alloc_pages), + __imm_addr(arena), + __imm_const(numa_no_node, NUMA_NO_NODE) + : __clobber_all + ); +} + +/* to retain debug info for BTF generation */ +void kfunc_root(void) +{ + bpf_arena_alloc_pages(0, 0, 0, 0, 0); +} + #else SEC("socket") diff --git a/tools/testing/selftests/bpf/progs/verifier_live_stack.c b/tools/testing/selftests/bpf/progs/verifier_live_stack.c new file mode 100644 index 000000000000..c0e808509268 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/verifier_live_stack.c @@ -0,0 +1,294 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */ + +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> +#include "bpf_misc.h" + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, 1); + __type(key, int); + __type(value, long long); +} map SEC(".maps"); + +SEC("socket") +__log_level(2) +__msg("(0) frame 0 insn 2 +written -8") +__msg("(0) frame 0 insn 1 +live -24") +__msg("(0) frame 0 insn 1 +written -8") +__msg("(0) frame 0 insn 0 +live -8,-24") +__msg("(0) frame 0 insn 0 +written -8") +__msg("(0) live stack update done in 2 iterations") +__naked void simple_read_simple_write(void) +{ + asm volatile ( + "r1 = *(u64 *)(r10 - 8);" + "r2 = *(u64 *)(r10 - 24);" + "*(u64 *)(r10 - 8) = r1;" + "r0 = 0;" + "exit;" + ::: __clobber_all); +} + +SEC("socket") +__log_level(2) +__msg("(0) frame 0 insn 1 +live -8") +__not_msg("(0) frame 0 insn 1 +written") +__msg("(0) live stack update done in 2 iterations") +__msg("(0) frame 0 insn 1 +live -16") +__msg("(0) frame 0 insn 1 +written -32") +__msg("(0) live stack update done in 2 iterations") +__naked void read_write_join(void) +{ + asm volatile ( + "call %[bpf_get_prandom_u32];" + "if r0 > 42 goto 1f;" + "r0 = *(u64 *)(r10 - 8);" + "*(u64 *)(r10 - 32) = r0;" + "*(u64 *)(r10 - 40) = r0;" + "exit;" +"1:" + "r0 = *(u64 *)(r10 - 16);" + "*(u64 *)(r10 - 32) = r0;" + "exit;" + :: __imm(bpf_get_prandom_u32) + : __clobber_all); +} + +SEC("socket") +__log_level(2) +__msg("2: (25) if r0 > 0x2a goto pc+1") +__msg("7: (95) exit") +__msg("(0) frame 0 insn 2 +written -16") +__msg("(0) live stack update done in 2 iterations") +__msg("7: (95) exit") +__not_msg("(0) frame 0 insn 2") +__msg("(0) live stack update done in 1 iterations") +__naked void must_write_not_same_slot(void) +{ + asm volatile ( + "call %[bpf_get_prandom_u32];" + "r1 = -8;" + "if r0 > 42 goto 1f;" + "r1 = -16;" +"1:" + "r2 = r10;" + "r2 += r1;" + "*(u64 *)(r2 + 0) = r0;" + "exit;" + :: __imm(bpf_get_prandom_u32) + : __clobber_all); +} + +SEC("socket") +__log_level(2) +__msg("(0) frame 0 insn 0 +written -8,-16") +__msg("(0) live stack update done in 2 iterations") +__msg("(0) frame 0 insn 0 +written -8") +__msg("(0) live stack update done in 2 iterations") +__naked void must_write_not_same_type(void) +{ + asm volatile ( + "*(u64*)(r10 - 8) = 0;" + "r2 = r10;" + "r2 += -8;" + "r1 = %[map] ll;" + "call %[bpf_map_lookup_elem];" + "if r0 != 0 goto 1f;" + "r0 = r10;" + "r0 += -16;" +"1:" + "*(u64 *)(r0 + 0) = 42;" + "exit;" + : + : __imm(bpf_get_prandom_u32), + __imm(bpf_map_lookup_elem), + __imm_addr(map) + : __clobber_all); +} + +SEC("socket") +__log_level(2) +__msg("(2,4) frame 0 insn 4 +written -8") +__msg("(2,4) live stack update done in 2 iterations") +__msg("(0) frame 0 insn 2 +written -8") +__msg("(0) live stack update done in 2 iterations") +__naked void caller_stack_write(void) +{ + asm volatile ( + "r1 = r10;" + "r1 += -8;" + "call write_first_param;" + "exit;" + ::: __clobber_all); +} + +static __used __naked void write_first_param(void) +{ + asm volatile ( + "*(u64 *)(r1 + 0) = 7;" + "r0 = 0;" + "exit;" + ::: __clobber_all); +} + +SEC("socket") +__log_level(2) +/* caller_stack_read() function */ +__msg("2: .12345.... (85) call pc+4") +__msg("5: .12345.... (85) call pc+1") +__msg("6: 0......... (95) exit") +/* read_first_param() function */ +__msg("7: .1........ (79) r0 = *(u64 *)(r1 +0)") +__msg("8: 0......... (95) exit") +/* update for callsite at (2) */ +__msg("(2,7) frame 0 insn 7 +live -8") +__msg("(2,7) live stack update done in 2 iterations") +__msg("(0) frame 0 insn 2 +live -8") +__msg("(0) live stack update done in 2 iterations") +/* update for callsite at (5) */ +__msg("(5,7) frame 0 insn 7 +live -16") +__msg("(5,7) live stack update done in 2 iterations") +__msg("(0) frame 0 insn 5 +live -16") +__msg("(0) live stack update done in 2 iterations") +__naked void caller_stack_read(void) +{ + asm volatile ( + "r1 = r10;" + "r1 += -8;" + "call read_first_param;" + "r1 = r10;" + "r1 += -16;" + "call read_first_param;" + "exit;" + ::: __clobber_all); +} + +static __used __naked void read_first_param(void) +{ + asm volatile ( + "r0 = *(u64 *)(r1 + 0);" + "exit;" + ::: __clobber_all); +} + +SEC("socket") +__flag(BPF_F_TEST_STATE_FREQ) +__log_level(2) +/* read_first_param2() function */ +__msg(" 9: .1........ (79) r0 = *(u64 *)(r1 +0)") +__msg("10: .......... (b7) r0 = 0") +__msg("11: 0......... (05) goto pc+0") +__msg("12: 0......... (95) exit") +/* + * The purpose of the test is to check that checkpoint in + * read_first_param2() stops path traversal. This will only happen if + * verifier understands that fp[0]-8 at insn (12) is not alive. + */ +__msg("12: safe") +__msg("processed 20 insns") +__naked void caller_stack_pruning(void) +{ + asm volatile ( + "call %[bpf_get_prandom_u32];" + "if r0 == 42 goto 1f;" + "r0 = %[map] ll;" +"1:" + "*(u64 *)(r10 - 8) = r0;" + "r1 = r10;" + "r1 += -8;" + /* + * fp[0]-8 is either pointer to map or a scalar, + * preventing state pruning at checkpoint created for call. + */ + "call read_first_param2;" + "exit;" + : + : __imm(bpf_get_prandom_u32), + __imm_addr(map) + : __clobber_all); +} + +static __used __naked void read_first_param2(void) +{ + asm volatile ( + "r0 = *(u64 *)(r1 + 0);" + "r0 = 0;" + /* + * Checkpoint at goto +0 should fire, + * as caller stack fp[0]-8 is not alive at this point. + */ + "goto +0;" + "exit;" + ::: __clobber_all); +} + +SEC("socket") +__flag(BPF_F_TEST_STATE_FREQ) +__failure +__msg("R1 type=scalar expected=map_ptr") +__naked void caller_stack_pruning_callback(void) +{ + asm volatile ( + "r0 = %[map] ll;" + "*(u64 *)(r10 - 8) = r0;" + "r1 = 2;" + "r2 = loop_cb ll;" + "r3 = r10;" + "r3 += -8;" + "r4 = 0;" + /* + * fp[0]-8 is either pointer to map or a scalar, + * preventing state pruning at checkpoint created for call. + */ + "call %[bpf_loop];" + "r0 = 42;" + "exit;" + : + : __imm(bpf_get_prandom_u32), + __imm(bpf_loop), + __imm_addr(map) + : __clobber_all); +} + +static __used __naked void loop_cb(void) +{ + asm volatile ( + /* + * Checkpoint at function entry should not fire, as caller + * stack fp[0]-8 is alive at this point. + */ + "r6 = r2;" + "r1 = *(u64 *)(r6 + 0);" + "*(u64*)(r10 - 8) = 7;" + "r2 = r10;" + "r2 += -8;" + "call %[bpf_map_lookup_elem];" + /* + * This should stop verifier on a second loop iteration, + * but only if verifier correctly maintains that fp[0]-8 + * is still alive. + */ + "*(u64 *)(r6 + 0) = 0;" + "r0 = 0;" + "exit;" + : + : __imm(bpf_map_lookup_elem), + __imm(bpf_get_prandom_u32) + : __clobber_all); +} + +/* + * Because of a bug in verifier.c:compute_postorder() + * the program below overflowed traversal queue in that function. + */ +SEC("socket") +__naked void syzbot_postorder_bug1(void) +{ + asm volatile ( + "r0 = 0;" + "if r0 != 0 goto -1;" + "exit;" + ::: __clobber_all); +} diff --git a/tools/testing/selftests/bpf/progs/verifier_loops1.c b/tools/testing/selftests/bpf/progs/verifier_loops1.c index e07b43b78fd2..fbdde80e7b90 100644 --- a/tools/testing/selftests/bpf/progs/verifier_loops1.c +++ b/tools/testing/selftests/bpf/progs/verifier_loops1.c @@ -283,4 +283,25 @@ exit_%=: \ : __clobber_all); } +/* + * This test case triggered a bug in verifier.c:maybe_exit_scc(). + * Speculative execution path reaches stack access instruction, + * stops and triggers maybe_exit_scc() w/o accompanying maybe_enter_scc() call. + */ +SEC("socket") +__arch_x86_64 +__caps_unpriv(CAP_BPF) +__naked void maybe_exit_scc_bug1(void) +{ + asm volatile ( + "r0 = 100;" +"1:" + /* Speculative execution path reaches and stops here. */ + "*(u64 *)(r10 - 512) = r0;" + /* Condition is always false, but verifier speculatively executes the true branch. */ + "if r0 <= 0x0 goto 1b;" + "exit;" + ::: __clobber_all); +} + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/verifier_map_ptr.c b/tools/testing/selftests/bpf/progs/verifier_map_ptr.c index 11a079145966..e2767d27d8aa 100644 --- a/tools/testing/selftests/bpf/progs/verifier_map_ptr.c +++ b/tools/testing/selftests/bpf/progs/verifier_map_ptr.c @@ -70,10 +70,13 @@ __naked void bpf_map_ptr_write_rejected(void) : __clobber_all); } +/* The first element of struct bpf_map is a SHA256 hash of 32 bytes, accessing + * into this array is valid. The opts field is now at offset 33. + */ SEC("socket") __description("bpf_map_ptr: read non-existent field rejected") __failure -__msg("cannot access ptr member ops with moff 0 in struct bpf_map with off 1 size 4") +__msg("cannot access ptr member ops with moff 32 in struct bpf_map with off 33 size 4") __failure_unpriv __msg_unpriv("access is allowed only to CAP_PERFMON and CAP_SYS_ADMIN") __flag(BPF_F_ANY_ALIGNMENT) @@ -82,7 +85,7 @@ __naked void read_non_existent_field_rejected(void) asm volatile (" \ r6 = 0; \ r1 = %[map_array_48b] ll; \ - r6 = *(u32*)(r1 + 1); \ + r6 = *(u32*)(r1 + 33); \ r0 = 1; \ exit; \ " : diff --git a/tools/testing/selftests/bpf/progs/verifier_may_goto_1.c b/tools/testing/selftests/bpf/progs/verifier_may_goto_1.c index 3966d827f288..6d1edaef9213 100644 --- a/tools/testing/selftests/bpf/progs/verifier_may_goto_1.c +++ b/tools/testing/selftests/bpf/progs/verifier_may_goto_1.c @@ -9,6 +9,8 @@ SEC("raw_tp") __description("may_goto 0") __arch_x86_64 +__arch_s390x +__arch_arm64 __xlated("0: r0 = 1") __xlated("1: exit") __success @@ -27,6 +29,8 @@ __naked void may_goto_simple(void) SEC("raw_tp") __description("batch 2 of may_goto 0") __arch_x86_64 +__arch_s390x +__arch_arm64 __xlated("0: r0 = 1") __xlated("1: exit") __success @@ -47,6 +51,8 @@ __naked void may_goto_batch_0(void) SEC("raw_tp") __description("may_goto batch with offsets 2/1/0") __arch_x86_64 +__arch_s390x +__arch_arm64 __xlated("0: r0 = 1") __xlated("1: exit") __success @@ -69,8 +75,10 @@ __naked void may_goto_batch_1(void) } SEC("raw_tp") -__description("may_goto batch with offsets 2/0 - x86_64") +__description("may_goto batch with offsets 2/0") __arch_x86_64 +__arch_s390x +__arch_arm64 __xlated("0: *(u64 *)(r10 -16) = 65535") __xlated("1: *(u64 *)(r10 -8) = 0") __xlated("2: r11 = *(u64 *)(r10 -16)") @@ -84,33 +92,7 @@ __xlated("9: r0 = 1") __xlated("10: r0 = 2") __xlated("11: exit") __success -__naked void may_goto_batch_2_x86_64(void) -{ - asm volatile ( - ".8byte %[may_goto1];" - ".8byte %[may_goto3];" - "r0 = 1;" - "r0 = 2;" - "exit;" - : - : __imm_insn(may_goto1, BPF_RAW_INSN(BPF_JMP | BPF_JCOND, 0, 0, 2 /* offset */, 0)), - __imm_insn(may_goto3, BPF_RAW_INSN(BPF_JMP | BPF_JCOND, 0, 0, 0 /* offset */, 0)) - : __clobber_all); -} - -SEC("raw_tp") -__description("may_goto batch with offsets 2/0 - arm64") -__arch_arm64 -__xlated("0: *(u64 *)(r10 -8) = 8388608") -__xlated("1: r11 = *(u64 *)(r10 -8)") -__xlated("2: if r11 == 0x0 goto pc+3") -__xlated("3: r11 -= 1") -__xlated("4: *(u64 *)(r10 -8) = r11") -__xlated("5: r0 = 1") -__xlated("6: r0 = 2") -__xlated("7: exit") -__success -__naked void may_goto_batch_2_arm64(void) +__naked void may_goto_batch_2(void) { asm volatile ( ".8byte %[may_goto1];" diff --git a/tools/testing/selftests/bpf/progs/verifier_mul.c b/tools/testing/selftests/bpf/progs/verifier_mul.c new file mode 100644 index 000000000000..7145fe3351d5 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/verifier_mul.c @@ -0,0 +1,38 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2025 Nandakumar Edamana */ +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> +#include "bpf_misc.h" + +/* Intended to test the abstract multiplication technique(s) used by + * the verifier. Using assembly to avoid compiler optimizations. + */ +SEC("fentry/bpf_fentry_test1") +void BPF_PROG(mul_precise, int x) +{ + /* First, force the verifier to be uncertain about the value: + * unsigned int a = (bpf_get_prandom_u32() & 0x2) | 0x1; + * + * Assuming the verifier is using tnum, a must be tnum{.v=0x1, .m=0x2}. + * Then a * 0x3 would be m0m1 (m for uncertain). Added imprecision + * would cause the following to fail, because the required return value + * is 0: + * return (a * 0x3) & 0x4); + */ + asm volatile ("\ + call %[bpf_get_prandom_u32];\ + r0 &= 0x2;\ + r0 |= 0x1;\ + r0 *= 0x3;\ + r0 &= 0x4;\ + if r0 != 0 goto l0_%=;\ + r0 = 0;\ + goto l1_%=;\ +l0_%=:\ + r0 = 1;\ +l1_%=:\ +" : + : __imm(bpf_get_prandom_u32) + : __clobber_all); +} diff --git a/tools/testing/selftests/bpf/progs/verifier_precision.c b/tools/testing/selftests/bpf/progs/verifier_precision.c index 73fee2aec698..1fe090cd6744 100644 --- a/tools/testing/selftests/bpf/progs/verifier_precision.c +++ b/tools/testing/selftests/bpf/progs/verifier_precision.c @@ -144,21 +144,21 @@ SEC("?raw_tp") __success __log_level(2) /* * Without the bug fix there will be no history between "last_idx 3 first_idx 3" - * and "parent state regs=" lines. "R0_w=6" parts are here to help anchor + * and "parent state regs=" lines. "R0=6" parts are here to help anchor * expected log messages to the one specific mark_chain_precision operation. * * This is quite fragile: if verifier checkpointing heuristic changes, this * might need adjusting. */ -__msg("2: (07) r0 += 1 ; R0_w=6") +__msg("2: (07) r0 += 1 ; R0=6") __msg("3: (35) if r0 >= 0xa goto pc+1") __msg("mark_precise: frame0: last_idx 3 first_idx 3 subseq_idx -1") __msg("mark_precise: frame0: regs=r0 stack= before 2: (07) r0 += 1") __msg("mark_precise: frame0: regs=r0 stack= before 1: (07) r0 += 1") __msg("mark_precise: frame0: regs=r0 stack= before 4: (05) goto pc-4") __msg("mark_precise: frame0: regs=r0 stack= before 3: (35) if r0 >= 0xa goto pc+1") -__msg("mark_precise: frame0: parent state regs= stack=: R0_rw=P4") -__msg("3: R0_w=6") +__msg("mark_precise: frame0: parent state regs= stack=: R0=P4") +__msg("3: R0=6") __naked int state_loop_first_last_equal(void) { asm volatile ( @@ -233,8 +233,8 @@ __naked void bpf_cond_op_not_r10(void) SEC("lsm.s/socket_connect") __success __log_level(2) -__msg("0: (b7) r0 = 1 ; R0_w=1") -__msg("1: (84) w0 = -w0 ; R0_w=0xffffffff") +__msg("0: (b7) r0 = 1 ; R0=1") +__msg("1: (84) w0 = -w0 ; R0=0xffffffff") __msg("mark_precise: frame0: last_idx 2 first_idx 0 subseq_idx -1") __msg("mark_precise: frame0: regs=r0 stack= before 1: (84) w0 = -w0") __msg("mark_precise: frame0: regs=r0 stack= before 0: (b7) r0 = 1") @@ -268,8 +268,8 @@ __naked int bpf_neg_3(void) SEC("lsm.s/socket_connect") __success __log_level(2) -__msg("0: (b7) r0 = 1 ; R0_w=1") -__msg("1: (87) r0 = -r0 ; R0_w=-1") +__msg("0: (b7) r0 = 1 ; R0=1") +__msg("1: (87) r0 = -r0 ; R0=-1") __msg("mark_precise: frame0: last_idx 2 first_idx 0 subseq_idx -1") __msg("mark_precise: frame0: regs=r0 stack= before 1: (87) r0 = -r0") __msg("mark_precise: frame0: regs=r0 stack= before 0: (b7) r0 = 1") diff --git a/tools/testing/selftests/bpf/progs/verifier_scalar_ids.c b/tools/testing/selftests/bpf/progs/verifier_scalar_ids.c index 7c5e5e6d10eb..c0ce690ddb68 100644 --- a/tools/testing/selftests/bpf/progs/verifier_scalar_ids.c +++ b/tools/testing/selftests/bpf/progs/verifier_scalar_ids.c @@ -349,11 +349,11 @@ __naked void precision_two_ids(void) SEC("socket") __success __log_level(2) __flag(BPF_F_TEST_STATE_FREQ) -/* check thar r0 and r6 have different IDs after 'if', +/* check that r0 and r6 have different IDs after 'if', * collect_linked_regs() can't tie more than 6 registers for a single insn. */ __msg("8: (25) if r0 > 0x7 goto pc+0 ; R0=scalar(id=1") -__msg("9: (bf) r6 = r6 ; R6_w=scalar(id=2") +__msg("9: (bf) r6 = r6 ; R6=scalar(id=2") /* check that r{0-5} are marked precise after 'if' */ __msg("frame0: regs=r0 stack= before 8: (25) if r0 > 0x7 goto pc+0") __msg("frame0: parent state regs=r0,r1,r2,r3,r4,r5 stack=:") @@ -779,12 +779,12 @@ __success __retval(0) /* Check that verifier believes r1/r0 are zero at exit */ __log_level(2) -__msg("4: (77) r1 >>= 32 ; R1_w=0") -__msg("5: (bf) r0 = r1 ; R0_w=0 R1_w=0") +__msg("4: (77) r1 >>= 32 ; R1=0") +__msg("5: (bf) r0 = r1 ; R0=0 R1=0") __msg("6: (95) exit") __msg("from 3 to 4") -__msg("4: (77) r1 >>= 32 ; R1_w=0") -__msg("5: (bf) r0 = r1 ; R0_w=0 R1_w=0") +__msg("4: (77) r1 >>= 32 ; R1=0") +__msg("5: (bf) r0 = r1 ; R0=0 R1=0") __msg("6: (95) exit") /* Verify that statements to randomize upper half of r1 had not been * generated. diff --git a/tools/testing/selftests/bpf/progs/verifier_sock.c b/tools/testing/selftests/bpf/progs/verifier_sock.c index 0d5e56dffabb..2b4610b53382 100644 --- a/tools/testing/selftests/bpf/progs/verifier_sock.c +++ b/tools/testing/selftests/bpf/progs/verifier_sock.c @@ -1,14 +1,10 @@ // SPDX-License-Identifier: GPL-2.0 /* Converted from tools/testing/selftests/bpf/verifier/sock.c */ -#include <linux/bpf.h> +#include "vmlinux.h" #include <bpf/bpf_helpers.h> #include "bpf_misc.h" -#define sizeof_field(TYPE, MEMBER) sizeof((((TYPE *)0)->MEMBER)) -#define offsetofend(TYPE, MEMBER) \ - (offsetof(TYPE, MEMBER) + sizeof_field(TYPE, MEMBER)) - struct { __uint(type, BPF_MAP_TYPE_REUSEPORT_SOCKARRAY); __uint(max_entries, 1); @@ -1073,6 +1069,48 @@ int invalidate_pkt_pointers_from_global_func(struct __sk_buff *sk) } __noinline +long xdp_pull_data2(struct xdp_md *x, __u32 len) +{ + return bpf_xdp_pull_data(x, len); +} + +__noinline +long xdp_pull_data1(struct xdp_md *x, __u32 len) +{ + return xdp_pull_data2(x, len); +} + +/* global function calls bpf_xdp_pull_data(), which invalidates packet + * pointers established before global function call. + */ +SEC("xdp") +__failure __msg("invalid mem access") +int invalidate_xdp_pkt_pointers_from_global_func(struct xdp_md *x) +{ + int *p = (void *)(long)x->data; + + if ((void *)(p + 1) > (void *)(long)x->data_end) + return XDP_DROP; + xdp_pull_data1(x, 0); + *p = 42; /* this is unsafe */ + return XDP_PASS; +} + +/* XDP packet changing kfunc calls invalidate packet pointers */ +SEC("xdp") +__failure __msg("invalid mem access") +int invalidate_xdp_pkt_pointers(struct xdp_md *x) +{ + int *p = (void *)(long)x->data; + + if ((void *)(p + 1) > (void *)(long)x->data_end) + return XDP_DROP; + bpf_xdp_pull_data(x, 0); + *p = 42; /* this is unsafe */ + return XDP_PASS; +} + +__noinline int tail_call(struct __sk_buff *sk) { bpf_tail_call_static(sk, &jmp_table, 0); diff --git a/tools/testing/selftests/bpf/progs/verifier_spill_fill.c b/tools/testing/selftests/bpf/progs/verifier_spill_fill.c index 1e5a511e8494..7a13dbd794b2 100644 --- a/tools/testing/selftests/bpf/progs/verifier_spill_fill.c +++ b/tools/testing/selftests/bpf/progs/verifier_spill_fill.c @@ -506,17 +506,17 @@ SEC("raw_tp") __log_level(2) __success /* fp-8 is spilled IMPRECISE value zero (represented by a zero value fake reg) */ -__msg("2: (7a) *(u64 *)(r10 -8) = 0 ; R10=fp0 fp-8_w=0") +__msg("2: (7a) *(u64 *)(r10 -8) = 0 ; R10=fp0 fp-8=0") /* but fp-16 is spilled IMPRECISE zero const reg */ -__msg("4: (7b) *(u64 *)(r10 -16) = r0 ; R0_w=0 R10=fp0 fp-16_w=0") +__msg("4: (7b) *(u64 *)(r10 -16) = r0 ; R0=0 R10=fp0 fp-16=0") /* validate that assigning R2 from STACK_SPILL with zero value doesn't mark register * precise immediately; if necessary, it will be marked precise later */ -__msg("6: (71) r2 = *(u8 *)(r10 -1) ; R2_w=0 R10=fp0 fp-8_w=0") +__msg("6: (71) r2 = *(u8 *)(r10 -1) ; R2=0 R10=fp0 fp-8=0") /* similarly, when R2 is assigned from spilled register, it is initially * imprecise, but will be marked precise later once it is used in precise context */ -__msg("10: (71) r2 = *(u8 *)(r10 -9) ; R2_w=0 R10=fp0 fp-16_w=0") +__msg("10: (71) r2 = *(u8 *)(r10 -9) ; R2=0 R10=fp0 fp-16=0") __msg("11: (0f) r1 += r2") __msg("mark_precise: frame0: last_idx 11 first_idx 0 subseq_idx -1") __msg("mark_precise: frame0: regs=r2 stack= before 10: (71) r2 = *(u8 *)(r10 -9)") @@ -598,7 +598,7 @@ __log_level(2) __success /* fp-4 is STACK_ZERO */ __msg("2: (62) *(u32 *)(r10 -4) = 0 ; R10=fp0 fp-8=0000????") -__msg("4: (71) r2 = *(u8 *)(r10 -1) ; R2_w=0 R10=fp0 fp-8=0000????") +__msg("4: (71) r2 = *(u8 *)(r10 -1) ; R2=0 R10=fp0 fp-8=0000????") __msg("5: (0f) r1 += r2") __msg("mark_precise: frame0: last_idx 5 first_idx 0 subseq_idx -1") __msg("mark_precise: frame0: regs=r2 stack= before 4: (71) r2 = *(u8 *)(r10 -1)") @@ -640,25 +640,25 @@ SEC("raw_tp") __log_level(2) __flag(BPF_F_TEST_STATE_FREQ) __success /* make sure fp-8 is IMPRECISE fake register spill */ -__msg("3: (7a) *(u64 *)(r10 -8) = 1 ; R10=fp0 fp-8_w=1") +__msg("3: (7a) *(u64 *)(r10 -8) = 1 ; R10=fp0 fp-8=1") /* and fp-16 is spilled IMPRECISE const reg */ -__msg("5: (7b) *(u64 *)(r10 -16) = r0 ; R0_w=1 R10=fp0 fp-16_w=1") +__msg("5: (7b) *(u64 *)(r10 -16) = r0 ; R0=1 R10=fp0 fp-16=1") /* validate load from fp-8, which was initialized using BPF_ST_MEM */ -__msg("8: (79) r2 = *(u64 *)(r10 -8) ; R2_w=1 R10=fp0 fp-8=1") +__msg("8: (79) r2 = *(u64 *)(r10 -8) ; R2=1 R10=fp0 fp-8=1") __msg("9: (0f) r1 += r2") __msg("mark_precise: frame0: last_idx 9 first_idx 7 subseq_idx -1") __msg("mark_precise: frame0: regs=r2 stack= before 8: (79) r2 = *(u64 *)(r10 -8)") __msg("mark_precise: frame0: regs= stack=-8 before 7: (bf) r1 = r6") /* note, fp-8 is precise, fp-16 is not yet precise, we'll get there */ -__msg("mark_precise: frame0: parent state regs= stack=-8: R0_w=1 R1=ctx() R6_r=map_value(map=.data.two_byte_,ks=4,vs=2) R10=fp0 fp-8_rw=P1 fp-16_w=1") +__msg("mark_precise: frame0: parent state regs= stack=-8: R0=1 R1=ctx() R6=map_value(map=.data.two_byte_,ks=4,vs=2) R10=fp0 fp-8=P1 fp-16=1") __msg("mark_precise: frame0: last_idx 6 first_idx 3 subseq_idx 7") __msg("mark_precise: frame0: regs= stack=-8 before 6: (05) goto pc+0") __msg("mark_precise: frame0: regs= stack=-8 before 5: (7b) *(u64 *)(r10 -16) = r0") __msg("mark_precise: frame0: regs= stack=-8 before 4: (b7) r0 = 1") __msg("mark_precise: frame0: regs= stack=-8 before 3: (7a) *(u64 *)(r10 -8) = 1") -__msg("10: R1_w=map_value(map=.data.two_byte_,ks=4,vs=2,off=1) R2_w=1") +__msg("10: R1=map_value(map=.data.two_byte_,ks=4,vs=2,off=1) R2=1") /* validate load from fp-16, which was initialized using BPF_STX_MEM */ -__msg("12: (79) r2 = *(u64 *)(r10 -16) ; R2_w=1 R10=fp0 fp-16=1") +__msg("12: (79) r2 = *(u64 *)(r10 -16) ; R2=1 R10=fp0 fp-16=1") __msg("13: (0f) r1 += r2") __msg("mark_precise: frame0: last_idx 13 first_idx 7 subseq_idx -1") __msg("mark_precise: frame0: regs=r2 stack= before 12: (79) r2 = *(u64 *)(r10 -16)") @@ -668,12 +668,12 @@ __msg("mark_precise: frame0: regs= stack=-16 before 9: (0f) r1 += r2") __msg("mark_precise: frame0: regs= stack=-16 before 8: (79) r2 = *(u64 *)(r10 -8)") __msg("mark_precise: frame0: regs= stack=-16 before 7: (bf) r1 = r6") /* now both fp-8 and fp-16 are precise, very good */ -__msg("mark_precise: frame0: parent state regs= stack=-16: R0_w=1 R1=ctx() R6_r=map_value(map=.data.two_byte_,ks=4,vs=2) R10=fp0 fp-8_rw=P1 fp-16_rw=P1") +__msg("mark_precise: frame0: parent state regs= stack=-16: R0=1 R1=ctx() R6=map_value(map=.data.two_byte_,ks=4,vs=2) R10=fp0 fp-8=P1 fp-16=P1") __msg("mark_precise: frame0: last_idx 6 first_idx 3 subseq_idx 7") __msg("mark_precise: frame0: regs= stack=-16 before 6: (05) goto pc+0") __msg("mark_precise: frame0: regs= stack=-16 before 5: (7b) *(u64 *)(r10 -16) = r0") __msg("mark_precise: frame0: regs=r0 stack= before 4: (b7) r0 = 1") -__msg("14: R1_w=map_value(map=.data.two_byte_,ks=4,vs=2,off=1) R2_w=1") +__msg("14: R1=map_value(map=.data.two_byte_,ks=4,vs=2,off=1) R2=1") __naked void stack_load_preserves_const_precision(void) { asm volatile ( @@ -719,22 +719,22 @@ __success /* make sure fp-8 is 32-bit FAKE subregister spill */ __msg("3: (62) *(u32 *)(r10 -8) = 1 ; R10=fp0 fp-8=????1") /* but fp-16 is spilled IMPRECISE zero const reg */ -__msg("5: (63) *(u32 *)(r10 -16) = r0 ; R0_w=1 R10=fp0 fp-16=????1") +__msg("5: (63) *(u32 *)(r10 -16) = r0 ; R0=1 R10=fp0 fp-16=????1") /* validate load from fp-8, which was initialized using BPF_ST_MEM */ -__msg("8: (61) r2 = *(u32 *)(r10 -8) ; R2_w=1 R10=fp0 fp-8=????1") +__msg("8: (61) r2 = *(u32 *)(r10 -8) ; R2=1 R10=fp0 fp-8=????1") __msg("9: (0f) r1 += r2") __msg("mark_precise: frame0: last_idx 9 first_idx 7 subseq_idx -1") __msg("mark_precise: frame0: regs=r2 stack= before 8: (61) r2 = *(u32 *)(r10 -8)") __msg("mark_precise: frame0: regs= stack=-8 before 7: (bf) r1 = r6") -__msg("mark_precise: frame0: parent state regs= stack=-8: R0_w=1 R1=ctx() R6_r=map_value(map=.data.two_byte_,ks=4,vs=2) R10=fp0 fp-8_r=????P1 fp-16=????1") +__msg("mark_precise: frame0: parent state regs= stack=-8: R0=1 R1=ctx() R6=map_value(map=.data.two_byte_,ks=4,vs=2) R10=fp0 fp-8=????P1 fp-16=????1") __msg("mark_precise: frame0: last_idx 6 first_idx 3 subseq_idx 7") __msg("mark_precise: frame0: regs= stack=-8 before 6: (05) goto pc+0") __msg("mark_precise: frame0: regs= stack=-8 before 5: (63) *(u32 *)(r10 -16) = r0") __msg("mark_precise: frame0: regs= stack=-8 before 4: (b7) r0 = 1") __msg("mark_precise: frame0: regs= stack=-8 before 3: (62) *(u32 *)(r10 -8) = 1") -__msg("10: R1_w=map_value(map=.data.two_byte_,ks=4,vs=2,off=1) R2_w=1") +__msg("10: R1=map_value(map=.data.two_byte_,ks=4,vs=2,off=1) R2=1") /* validate load from fp-16, which was initialized using BPF_STX_MEM */ -__msg("12: (61) r2 = *(u32 *)(r10 -16) ; R2_w=1 R10=fp0 fp-16=????1") +__msg("12: (61) r2 = *(u32 *)(r10 -16) ; R2=1 R10=fp0 fp-16=????1") __msg("13: (0f) r1 += r2") __msg("mark_precise: frame0: last_idx 13 first_idx 7 subseq_idx -1") __msg("mark_precise: frame0: regs=r2 stack= before 12: (61) r2 = *(u32 *)(r10 -16)") @@ -743,12 +743,12 @@ __msg("mark_precise: frame0: regs= stack=-16 before 10: (73) *(u8 *)(r1 +0) = r2 __msg("mark_precise: frame0: regs= stack=-16 before 9: (0f) r1 += r2") __msg("mark_precise: frame0: regs= stack=-16 before 8: (61) r2 = *(u32 *)(r10 -8)") __msg("mark_precise: frame0: regs= stack=-16 before 7: (bf) r1 = r6") -__msg("mark_precise: frame0: parent state regs= stack=-16: R0_w=1 R1=ctx() R6_r=map_value(map=.data.two_byte_,ks=4,vs=2) R10=fp0 fp-8_r=????P1 fp-16_r=????P1") +__msg("mark_precise: frame0: parent state regs= stack=-16: R0=1 R1=ctx() R6=map_value(map=.data.two_byte_,ks=4,vs=2) R10=fp0 fp-8=????P1 fp-16=????P1") __msg("mark_precise: frame0: last_idx 6 first_idx 3 subseq_idx 7") __msg("mark_precise: frame0: regs= stack=-16 before 6: (05) goto pc+0") __msg("mark_precise: frame0: regs= stack=-16 before 5: (63) *(u32 *)(r10 -16) = r0") __msg("mark_precise: frame0: regs=r0 stack= before 4: (b7) r0 = 1") -__msg("14: R1_w=map_value(map=.data.two_byte_,ks=4,vs=2,off=1) R2_w=1") +__msg("14: R1=map_value(map=.data.two_byte_,ks=4,vs=2,off=1) R2=1") __naked void stack_load_preserves_const_precision_subreg(void) { asm volatile ( diff --git a/tools/testing/selftests/bpf/progs/verifier_subprog_precision.c b/tools/testing/selftests/bpf/progs/verifier_subprog_precision.c index 9d415f7ce599..ac3e418c2a96 100644 --- a/tools/testing/selftests/bpf/progs/verifier_subprog_precision.c +++ b/tools/testing/selftests/bpf/progs/verifier_subprog_precision.c @@ -105,7 +105,7 @@ __msg("mark_precise: frame0: regs=r0 stack= before 4: (27) r0 *= 4") __msg("mark_precise: frame0: regs=r0 stack= before 3: (57) r0 &= 3") __msg("mark_precise: frame0: regs=r0 stack= before 10: (95) exit") __msg("mark_precise: frame1: regs=r0 stack= before 9: (bf) r0 = (s8)r10") -__msg("7: R0_w=scalar") +__msg("7: R0=scalar") __naked int fp_precise_subprog_result(void) { asm volatile ( @@ -141,7 +141,7 @@ __msg("mark_precise: frame1: regs=r0 stack= before 10: (bf) r0 = (s8)r1") * anyways, at which point we'll break precision chain */ __msg("mark_precise: frame1: regs=r1 stack= before 9: (bf) r1 = r10") -__msg("7: R0_w=scalar") +__msg("7: R0=scalar") __naked int sneaky_fp_precise_subprog_result(void) { asm volatile ( @@ -681,7 +681,7 @@ __msg("mark_precise: frame0: last_idx 10 first_idx 7 subseq_idx -1") __msg("mark_precise: frame0: regs=r7 stack= before 9: (bf) r1 = r8") __msg("mark_precise: frame0: regs=r7 stack= before 8: (27) r7 *= 4") __msg("mark_precise: frame0: regs=r7 stack= before 7: (79) r7 = *(u64 *)(r10 -8)") -__msg("mark_precise: frame0: parent state regs= stack=-8: R0_w=2 R6_w=1 R8_rw=map_value(map=.data.vals,ks=4,vs=16) R10=fp0 fp-8_rw=P1") +__msg("mark_precise: frame0: parent state regs= stack=-8: R0=2 R6=1 R8=map_value(map=.data.vals,ks=4,vs=16) R10=fp0 fp-8=P1") __msg("mark_precise: frame0: last_idx 18 first_idx 0 subseq_idx 7") __msg("mark_precise: frame0: regs= stack=-8 before 18: (95) exit") __msg("mark_precise: frame1: regs= stack= before 17: (0f) r0 += r2") diff --git a/tools/testing/selftests/bpf/progs/verifier_var_off.c b/tools/testing/selftests/bpf/progs/verifier_var_off.c index 1d36d01b746e..f345466bca68 100644 --- a/tools/testing/selftests/bpf/progs/verifier_var_off.c +++ b/tools/testing/selftests/bpf/progs/verifier_var_off.c @@ -114,8 +114,8 @@ __naked void stack_write_priv_vs_unpriv(void) } /* Similar to the previous test, but this time also perform a read from the - * address written to with a variable offset. The read is allowed, showing that, - * after a variable-offset write, a priviledged program can read the slots that + * address written to with a variable offet. The read is allowed, showing that, + * after a variable-offset write, a privileged program can read the slots that * were in the range of that write (even if the verifier doesn't actually know if * the slot being read was really written to or not. * @@ -157,7 +157,7 @@ __naked void stack_write_followed_by_read(void) SEC("socket") __description("variable-offset stack write clobbers spilled regs") __failure -/* In the priviledged case, dereferencing a spilled-and-then-filled +/* In the privileged case, dereferencing a spilled-and-then-filled * register is rejected because the previous variable offset stack * write might have overwritten the spilled pointer (i.e. we lose track * of the spilled register when we analyze the write). diff --git a/tools/testing/selftests/bpf/test_kmods/Makefile b/tools/testing/selftests/bpf/test_kmods/Makefile index d4e50c4509c9..63c4d3f6a12f 100644 --- a/tools/testing/selftests/bpf/test_kmods/Makefile +++ b/tools/testing/selftests/bpf/test_kmods/Makefile @@ -8,7 +8,7 @@ Q = @ endif MODULES = bpf_testmod.ko bpf_test_no_cfi.ko bpf_test_modorder_x.ko \ - bpf_test_modorder_y.ko + bpf_test_modorder_y.ko bpf_test_rqspinlock.ko $(foreach m,$(MODULES),$(eval obj-m += $(m:.ko=.o))) diff --git a/tools/testing/selftests/bpf/test_kmods/bpf_test_rqspinlock.c b/tools/testing/selftests/bpf/test_kmods/bpf_test_rqspinlock.c new file mode 100644 index 000000000000..769206fc70e4 --- /dev/null +++ b/tools/testing/selftests/bpf/test_kmods/bpf_test_rqspinlock.c @@ -0,0 +1,209 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */ +#include <linux/sched.h> +#include <linux/smp.h> +#include <linux/delay.h> +#include <linux/module.h> +#include <linux/prandom.h> +#include <asm/rqspinlock.h> +#include <linux/perf_event.h> +#include <linux/kthread.h> +#include <linux/atomic.h> +#include <linux/slab.h> + +static struct perf_event_attr hw_attr = { + .type = PERF_TYPE_HARDWARE, + .config = PERF_COUNT_HW_CPU_CYCLES, + .size = sizeof(struct perf_event_attr), + .pinned = 1, + .disabled = 1, + .sample_period = 100000, +}; + +static rqspinlock_t lock_a; +static rqspinlock_t lock_b; + +static struct perf_event **rqsl_evts; +static int rqsl_nevts; + +static bool test_ab = false; +module_param(test_ab, bool, 0644); +MODULE_PARM_DESC(test_ab, "Test ABBA situations instead of AA situations"); + +static struct task_struct **rqsl_threads; +static int rqsl_nthreads; +static atomic_t rqsl_ready_cpus = ATOMIC_INIT(0); + +static int pause = 0; + +static bool nmi_locks_a(int cpu) +{ + return (cpu & 1) && test_ab; +} + +static int rqspinlock_worker_fn(void *arg) +{ + int cpu = smp_processor_id(); + unsigned long flags; + int ret; + + if (cpu) { + atomic_inc(&rqsl_ready_cpus); + + while (!kthread_should_stop()) { + if (READ_ONCE(pause)) { + msleep(1000); + continue; + } + if (nmi_locks_a(cpu)) + ret = raw_res_spin_lock_irqsave(&lock_b, flags); + else + ret = raw_res_spin_lock_irqsave(&lock_a, flags); + mdelay(20); + if (nmi_locks_a(cpu) && !ret) + raw_res_spin_unlock_irqrestore(&lock_b, flags); + else if (!ret) + raw_res_spin_unlock_irqrestore(&lock_a, flags); + cpu_relax(); + } + return 0; + } + + while (!kthread_should_stop()) { + int expected = rqsl_nthreads > 0 ? rqsl_nthreads - 1 : 0; + int ready = atomic_read(&rqsl_ready_cpus); + + if (ready == expected && !READ_ONCE(pause)) { + for (int i = 0; i < rqsl_nevts; i++) + perf_event_enable(rqsl_evts[i]); + pr_err("Waiting 5 secs to pause the test\n"); + msleep(1000 * 5); + WRITE_ONCE(pause, 1); + pr_err("Paused the test\n"); + } else { + msleep(1000); + cpu_relax(); + } + } + return 0; +} + +static void nmi_cb(struct perf_event *event, struct perf_sample_data *data, + struct pt_regs *regs) +{ + int cpu = smp_processor_id(); + unsigned long flags; + int ret; + + if (!cpu || READ_ONCE(pause)) + return; + + if (nmi_locks_a(cpu)) + ret = raw_res_spin_lock_irqsave(&lock_a, flags); + else + ret = raw_res_spin_lock_irqsave(test_ab ? &lock_b : &lock_a, flags); + + mdelay(10); + + if (nmi_locks_a(cpu) && !ret) + raw_res_spin_unlock_irqrestore(&lock_a, flags); + else if (!ret) + raw_res_spin_unlock_irqrestore(test_ab ? &lock_b : &lock_a, flags); +} + +static void free_rqsl_threads(void) +{ + int i; + + if (rqsl_threads) { + for_each_online_cpu(i) { + if (rqsl_threads[i]) + kthread_stop(rqsl_threads[i]); + } + kfree(rqsl_threads); + } +} + +static void free_rqsl_evts(void) +{ + int i; + + if (rqsl_evts) { + for (i = 0; i < rqsl_nevts; i++) { + if (rqsl_evts[i]) + perf_event_release_kernel(rqsl_evts[i]); + } + kfree(rqsl_evts); + } +} + +static int bpf_test_rqspinlock_init(void) +{ + int i, ret; + int ncpus = num_online_cpus(); + + pr_err("Mode = %s\n", test_ab ? "ABBA" : "AA"); + + if (ncpus < 3) + return -ENOTSUPP; + + raw_res_spin_lock_init(&lock_a); + raw_res_spin_lock_init(&lock_b); + + rqsl_evts = kcalloc(ncpus - 1, sizeof(*rqsl_evts), GFP_KERNEL); + if (!rqsl_evts) + return -ENOMEM; + rqsl_nevts = ncpus - 1; + + for (i = 1; i < ncpus; i++) { + struct perf_event *e; + + e = perf_event_create_kernel_counter(&hw_attr, i, NULL, nmi_cb, NULL); + if (IS_ERR(e)) { + ret = PTR_ERR(e); + goto err_perf_events; + } + rqsl_evts[i - 1] = e; + } + + rqsl_threads = kcalloc(ncpus, sizeof(*rqsl_threads), GFP_KERNEL); + if (!rqsl_threads) { + ret = -ENOMEM; + goto err_perf_events; + } + rqsl_nthreads = ncpus; + + for_each_online_cpu(i) { + struct task_struct *t; + + t = kthread_create(rqspinlock_worker_fn, NULL, "rqsl_w/%d", i); + if (IS_ERR(t)) { + ret = PTR_ERR(t); + goto err_threads_create; + } + kthread_bind(t, i); + rqsl_threads[i] = t; + wake_up_process(t); + } + return 0; + +err_threads_create: + free_rqsl_threads(); +err_perf_events: + free_rqsl_evts(); + return ret; +} + +module_init(bpf_test_rqspinlock_init); + +static void bpf_test_rqspinlock_exit(void) +{ + free_rqsl_threads(); + free_rqsl_evts(); +} + +module_exit(bpf_test_rqspinlock_exit); + +MODULE_AUTHOR("Kumar Kartikeya Dwivedi"); +MODULE_DESCRIPTION("BPF rqspinlock stress test module"); +MODULE_LICENSE("GPL"); diff --git a/tools/testing/selftests/bpf/test_kmods/bpf_testmod.c b/tools/testing/selftests/bpf/test_kmods/bpf_testmod.c index e9e918cdf31f..8074bc5f6f20 100644 --- a/tools/testing/selftests/bpf/test_kmods/bpf_testmod.c +++ b/tools/testing/selftests/bpf/test_kmods/bpf_testmod.c @@ -62,6 +62,18 @@ struct bpf_testmod_struct_arg_5 { long d; }; +union bpf_testmod_union_arg_1 { + char a; + short b; + struct bpf_testmod_struct_arg_1 arg; +}; + +union bpf_testmod_union_arg_2 { + int a; + long b; + struct bpf_testmod_struct_arg_2 arg; +}; + __bpf_hook_start(); noinline int @@ -129,6 +141,20 @@ bpf_testmod_test_struct_arg_9(u64 a, void *b, short c, int d, void *e, char f, } noinline int +bpf_testmod_test_union_arg_1(union bpf_testmod_union_arg_1 a, int b, int c) +{ + bpf_testmod_test_struct_arg_result = a.arg.a + b + c; + return bpf_testmod_test_struct_arg_result; +} + +noinline int +bpf_testmod_test_union_arg_2(int a, union bpf_testmod_union_arg_2 b) +{ + bpf_testmod_test_struct_arg_result = a + b.arg.a + b.arg.b; + return bpf_testmod_test_struct_arg_result; +} + +noinline int bpf_testmod_test_arg_ptr_to_struct(struct bpf_testmod_struct_arg_1 *a) { bpf_testmod_test_struct_arg_result = a->a; return bpf_testmod_test_struct_arg_result; @@ -218,6 +244,16 @@ __bpf_kfunc void bpf_kfunc_rcu_task_test(struct task_struct *ptr) { } +__bpf_kfunc struct task_struct *bpf_kfunc_ret_rcu_test(void) +{ + return NULL; +} + +__bpf_kfunc int *bpf_kfunc_ret_rcu_test_nostruct(int rdonly_buf_size) +{ + return NULL; +} + __bpf_kfunc struct bpf_testmod_ctx * bpf_testmod_ctx_create(int *err) { @@ -398,6 +434,8 @@ bpf_testmod_test_read(struct file *file, struct kobject *kobj, struct bpf_testmod_struct_arg_3 *struct_arg3; struct bpf_testmod_struct_arg_4 struct_arg4 = {21, 22}; struct bpf_testmod_struct_arg_5 struct_arg5 = {23, 24, 25, 26}; + union bpf_testmod_union_arg_1 union_arg1 = { .arg = {1} }; + union bpf_testmod_union_arg_2 union_arg2 = { .arg = {2, 3} }; int i = 1; while (bpf_testmod_return_ptr(i)) @@ -415,6 +453,9 @@ bpf_testmod_test_read(struct file *file, struct kobject *kobj, (void)bpf_testmod_test_struct_arg_9(16, (void *)17, 18, 19, (void *)20, 21, 22, struct_arg5, 27); + (void)bpf_testmod_test_union_arg_1(union_arg1, 4, 5); + (void)bpf_testmod_test_union_arg_2(6, union_arg2); + (void)bpf_testmod_test_arg_ptr_to_struct(&struct_arg1_2); (void)trace_bpf_testmod_test_raw_tp_null_tp(NULL); @@ -501,14 +542,20 @@ static struct bin_attribute bin_attr_bpf_testmod_file __ro_after_init = { #ifdef __x86_64__ static int +uprobe_handler(struct uprobe_consumer *self, struct pt_regs *regs, __u64 *data) +{ + regs->cx = 0x87654321feebdaed; + return 0; +} + +static int uprobe_ret_handler(struct uprobe_consumer *self, unsigned long func, struct pt_regs *regs, __u64 *data) { regs->ax = 0x12345678deadbeef; - regs->cx = 0x87654321feebdaed; regs->r11 = (u64) -1; - return true; + return 0; } struct testmod_uprobe { @@ -520,6 +567,7 @@ struct testmod_uprobe { static DEFINE_MUTEX(testmod_uprobe_mutex); static struct testmod_uprobe uprobe = { + .consumer.handler = uprobe_handler, .consumer.ret_handler = uprobe_ret_handler, }; @@ -623,6 +671,8 @@ BTF_ID_FLAGS(func, bpf_kfunc_trusted_vma_test, KF_TRUSTED_ARGS) BTF_ID_FLAGS(func, bpf_kfunc_trusted_task_test, KF_TRUSTED_ARGS) BTF_ID_FLAGS(func, bpf_kfunc_trusted_num_test, KF_TRUSTED_ARGS) BTF_ID_FLAGS(func, bpf_kfunc_rcu_task_test, KF_RCU) +BTF_ID_FLAGS(func, bpf_kfunc_ret_rcu_test, KF_RET_NULL | KF_RCU_PROTECTED) +BTF_ID_FLAGS(func, bpf_kfunc_ret_rcu_test_nostruct, KF_RET_NULL | KF_RCU_PROTECTED) BTF_ID_FLAGS(func, bpf_testmod_ctx_create, KF_ACQUIRE | KF_RET_NULL) BTF_ID_FLAGS(func, bpf_testmod_ctx_release, KF_RELEASE) BTF_ID_FLAGS(func, bpf_testmod_ops3_call_test_1) @@ -1057,6 +1107,8 @@ __bpf_kfunc int bpf_kfunc_st_ops_inc10(struct st_ops_args *args) return args->a; } +__bpf_kfunc int bpf_kfunc_multi_st_ops_test_1(struct st_ops_args *args, u32 id); + BTF_KFUNCS_START(bpf_testmod_check_kfunc_ids) BTF_ID_FLAGS(func, bpf_testmod_test_mod_kfunc) BTF_ID_FLAGS(func, bpf_kfunc_call_test1) @@ -1097,6 +1149,7 @@ BTF_ID_FLAGS(func, bpf_kfunc_st_ops_test_prologue, KF_TRUSTED_ARGS | KF_SLEEPABL BTF_ID_FLAGS(func, bpf_kfunc_st_ops_test_epilogue, KF_TRUSTED_ARGS | KF_SLEEPABLE) BTF_ID_FLAGS(func, bpf_kfunc_st_ops_test_pro_epilogue, KF_TRUSTED_ARGS | KF_SLEEPABLE) BTF_ID_FLAGS(func, bpf_kfunc_st_ops_inc10, KF_TRUSTED_ARGS) +BTF_ID_FLAGS(func, bpf_kfunc_multi_st_ops_test_1, KF_TRUSTED_ARGS) BTF_KFUNCS_END(bpf_testmod_check_kfunc_ids) static int bpf_testmod_ops_init(struct btf *btf) @@ -1528,6 +1581,114 @@ static struct bpf_struct_ops testmod_st_ops = { .owner = THIS_MODULE, }; +struct hlist_head multi_st_ops_list; +static DEFINE_SPINLOCK(multi_st_ops_lock); + +static int multi_st_ops_init(struct btf *btf) +{ + spin_lock_init(&multi_st_ops_lock); + INIT_HLIST_HEAD(&multi_st_ops_list); + + return 0; +} + +static int multi_st_ops_init_member(const struct btf_type *t, + const struct btf_member *member, + void *kdata, const void *udata) +{ + return 0; +} + +static struct bpf_testmod_multi_st_ops *multi_st_ops_find_nolock(u32 id) +{ + struct bpf_testmod_multi_st_ops *st_ops; + + hlist_for_each_entry(st_ops, &multi_st_ops_list, node) { + if (st_ops->id == id) + return st_ops; + } + + return NULL; +} + +int bpf_kfunc_multi_st_ops_test_1(struct st_ops_args *args, u32 id) +{ + struct bpf_testmod_multi_st_ops *st_ops; + unsigned long flags; + int ret = -1; + + spin_lock_irqsave(&multi_st_ops_lock, flags); + st_ops = multi_st_ops_find_nolock(id); + if (st_ops) + ret = st_ops->test_1(args); + spin_unlock_irqrestore(&multi_st_ops_lock, flags); + + return ret; +} + +static int multi_st_ops_reg(void *kdata, struct bpf_link *link) +{ + struct bpf_testmod_multi_st_ops *st_ops = + (struct bpf_testmod_multi_st_ops *)kdata; + unsigned long flags; + int err = 0; + u32 id; + + if (!st_ops->test_1) + return -EINVAL; + + id = bpf_struct_ops_id(kdata); + + spin_lock_irqsave(&multi_st_ops_lock, flags); + if (multi_st_ops_find_nolock(id)) { + pr_err("multi_st_ops(id:%d) has already been registered\n", id); + err = -EEXIST; + goto unlock; + } + + st_ops->id = id; + hlist_add_head(&st_ops->node, &multi_st_ops_list); +unlock: + spin_unlock_irqrestore(&multi_st_ops_lock, flags); + + return err; +} + +static void multi_st_ops_unreg(void *kdata, struct bpf_link *link) +{ + struct bpf_testmod_multi_st_ops *st_ops; + unsigned long flags; + u32 id; + + id = bpf_struct_ops_id(kdata); + + spin_lock_irqsave(&multi_st_ops_lock, flags); + st_ops = multi_st_ops_find_nolock(id); + if (st_ops) + hlist_del(&st_ops->node); + spin_unlock_irqrestore(&multi_st_ops_lock, flags); +} + +static int bpf_testmod_multi_st_ops__test_1(struct st_ops_args *args) +{ + return 0; +} + +static struct bpf_testmod_multi_st_ops multi_st_ops_cfi_stubs = { + .test_1 = bpf_testmod_multi_st_ops__test_1, +}; + +struct bpf_struct_ops testmod_multi_st_ops = { + .verifier_ops = &bpf_testmod_verifier_ops, + .init = multi_st_ops_init, + .init_member = multi_st_ops_init_member, + .reg = multi_st_ops_reg, + .unreg = multi_st_ops_unreg, + .cfi_stubs = &multi_st_ops_cfi_stubs, + .name = "bpf_testmod_multi_st_ops", + .owner = THIS_MODULE, +}; + extern int bpf_fentry_test1(int a); static int bpf_testmod_init(void) @@ -1550,6 +1711,7 @@ static int bpf_testmod_init(void) ret = ret ?: register_bpf_struct_ops(&bpf_testmod_ops2, bpf_testmod_ops2); ret = ret ?: register_bpf_struct_ops(&bpf_testmod_ops3, bpf_testmod_ops3); ret = ret ?: register_bpf_struct_ops(&testmod_st_ops, bpf_testmod_st_ops); + ret = ret ?: register_bpf_struct_ops(&testmod_multi_st_ops, bpf_testmod_multi_st_ops); ret = ret ?: register_btf_id_dtor_kfuncs(bpf_testmod_dtors, ARRAY_SIZE(bpf_testmod_dtors), THIS_MODULE); diff --git a/tools/testing/selftests/bpf/test_kmods/bpf_testmod.h b/tools/testing/selftests/bpf/test_kmods/bpf_testmod.h index c9fab51f16e2..f6e492f9d042 100644 --- a/tools/testing/selftests/bpf/test_kmods/bpf_testmod.h +++ b/tools/testing/selftests/bpf/test_kmods/bpf_testmod.h @@ -116,4 +116,10 @@ struct bpf_testmod_st_ops { struct module *owner; }; +struct bpf_testmod_multi_st_ops { + int (*test_1)(struct st_ops_args *args); + struct hlist_node node; + int id; +}; + #endif /* _BPF_TESTMOD_H */ diff --git a/tools/testing/selftests/bpf/test_kmods/bpf_testmod_kfunc.h b/tools/testing/selftests/bpf/test_kmods/bpf_testmod_kfunc.h index b58817938deb..4df6fa6a92cb 100644 --- a/tools/testing/selftests/bpf/test_kmods/bpf_testmod_kfunc.h +++ b/tools/testing/selftests/bpf/test_kmods/bpf_testmod_kfunc.h @@ -158,5 +158,9 @@ void bpf_kfunc_trusted_vma_test(struct vm_area_struct *ptr) __ksym; void bpf_kfunc_trusted_task_test(struct task_struct *ptr) __ksym; void bpf_kfunc_trusted_num_test(int *ptr) __ksym; void bpf_kfunc_rcu_task_test(struct task_struct *ptr) __ksym; +struct task_struct *bpf_kfunc_ret_rcu_test(void) __ksym; +int *bpf_kfunc_ret_rcu_test_nostruct(int rdonly_buf_size) __ksym; + +int bpf_kfunc_multi_st_ops_test_1(struct st_ops_args *args, u32 id) __ksym; #endif /* _BPF_TESTMOD_KFUNC_H */ diff --git a/tools/testing/selftests/bpf/test_lirc_mode2_user.c b/tools/testing/selftests/bpf/test_lirc_mode2_user.c index 4694422aa76c..88e4aeab21b7 100644 --- a/tools/testing/selftests/bpf/test_lirc_mode2_user.c +++ b/tools/testing/selftests/bpf/test_lirc_mode2_user.c @@ -74,7 +74,7 @@ int main(int argc, char **argv) /* Let's try detach it before it was ever attached */ ret = bpf_prog_detach2(progfd, lircfd, BPF_LIRC_MODE2); - if (ret != -1 || errno != ENOENT) { + if (ret != -ENOENT) { printf("bpf_prog_detach2 not attached should fail: %m\n"); return 1; } diff --git a/tools/testing/selftests/bpf/test_loader.c b/tools/testing/selftests/bpf/test_loader.c index 78423cf89e01..74ecc281bb8c 100644 --- a/tools/testing/selftests/bpf/test_loader.c +++ b/tools/testing/selftests/bpf/test_loader.c @@ -2,7 +2,6 @@ /* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */ #include <linux/capability.h> #include <stdlib.h> -#include <regex.h> #include <test_progs.h> #include <bpf/btf.h> @@ -20,10 +19,12 @@ #define TEST_TAG_EXPECT_FAILURE "comment:test_expect_failure" #define TEST_TAG_EXPECT_SUCCESS "comment:test_expect_success" #define TEST_TAG_EXPECT_MSG_PFX "comment:test_expect_msg=" +#define TEST_TAG_EXPECT_NOT_MSG_PFX "comment:test_expect_not_msg=" #define TEST_TAG_EXPECT_XLATED_PFX "comment:test_expect_xlated=" #define TEST_TAG_EXPECT_FAILURE_UNPRIV "comment:test_expect_failure_unpriv" #define TEST_TAG_EXPECT_SUCCESS_UNPRIV "comment:test_expect_success_unpriv" #define TEST_TAG_EXPECT_MSG_PFX_UNPRIV "comment:test_expect_msg_unpriv=" +#define TEST_TAG_EXPECT_NOT_MSG_PFX_UNPRIV "comment:test_expect_not_msg_unpriv=" #define TEST_TAG_EXPECT_XLATED_PFX_UNPRIV "comment:test_expect_xlated_unpriv=" #define TEST_TAG_LOG_LEVEL_PFX "comment:test_log_level=" #define TEST_TAG_PROG_FLAGS_PFX "comment:test_prog_flags=" @@ -38,6 +39,10 @@ #define TEST_TAG_JITED_PFX_UNPRIV "comment:test_jited_unpriv=" #define TEST_TAG_CAPS_UNPRIV "comment:test_caps_unpriv=" #define TEST_TAG_LOAD_MODE_PFX "comment:load_mode=" +#define TEST_TAG_EXPECT_STDERR_PFX "comment:test_expect_stderr=" +#define TEST_TAG_EXPECT_STDERR_PFX_UNPRIV "comment:test_expect_stderr_unpriv=" +#define TEST_TAG_EXPECT_STDOUT_PFX "comment:test_expect_stdout=" +#define TEST_TAG_EXPECT_STDOUT_PFX_UNPRIV "comment:test_expect_stdout_unpriv=" /* Warning: duplicated in bpf_misc.h */ #define POINTER_VALUE 0xbadcafe @@ -61,24 +66,14 @@ enum load_mode { NO_JITED = 1 << 1, }; -struct expect_msg { - const char *substr; /* substring match */ - regex_t regex; - bool is_regex; - bool on_next_line; -}; - -struct expected_msgs { - struct expect_msg *patterns; - size_t cnt; -}; - struct test_subspec { char *name; bool expect_failure; struct expected_msgs expect_msgs; struct expected_msgs expect_xlated; struct expected_msgs jited; + struct expected_msgs stderr; + struct expected_msgs stdout; int retval; bool execute; __u64 caps; @@ -139,6 +134,10 @@ static void free_test_spec(struct test_spec *spec) free_msgs(&spec->unpriv.expect_xlated); free_msgs(&spec->priv.jited); free_msgs(&spec->unpriv.jited); + free_msgs(&spec->unpriv.stderr); + free_msgs(&spec->priv.stderr); + free_msgs(&spec->unpriv.stdout); + free_msgs(&spec->priv.stdout); free(spec->priv.name); free(spec->unpriv.name); @@ -206,7 +205,8 @@ static int compile_regex(const char *pattern, regex_t *regex) return 0; } -static int __push_msg(const char *pattern, bool on_next_line, struct expected_msgs *msgs) +static int __push_msg(const char *pattern, bool on_next_line, bool negative, + struct expected_msgs *msgs) { struct expect_msg *msg; void *tmp; @@ -222,6 +222,7 @@ static int __push_msg(const char *pattern, bool on_next_line, struct expected_ms msg = &msgs->patterns[msgs->cnt]; msg->on_next_line = on_next_line; msg->substr = pattern; + msg->negative = negative; msg->is_regex = false; if (strstr(pattern, "{{")) { err = compile_regex(pattern, &msg->regex); @@ -240,16 +241,16 @@ static int clone_msgs(struct expected_msgs *from, struct expected_msgs *to) for (i = 0; i < from->cnt; i++) { msg = &from->patterns[i]; - err = __push_msg(msg->substr, msg->on_next_line, to); + err = __push_msg(msg->substr, msg->on_next_line, msg->negative, to); if (err) return err; } return 0; } -static int push_msg(const char *substr, struct expected_msgs *msgs) +static int push_msg(const char *substr, bool negative, struct expected_msgs *msgs) { - return __push_msg(substr, false, msgs); + return __push_msg(substr, false, negative, msgs); } static int push_disasm_msg(const char *regex_str, bool *on_next_line, struct expected_msgs *msgs) @@ -260,7 +261,7 @@ static int push_disasm_msg(const char *regex_str, bool *on_next_line, struct exp *on_next_line = false; return 0; } - err = __push_msg(regex_str, *on_next_line, msgs); + err = __push_msg(regex_str, *on_next_line, false, msgs); if (err) return err; *on_next_line = true; @@ -374,6 +375,7 @@ enum arch { ARCH_X86_64 = 0x2, ARCH_ARM64 = 0x4, ARCH_RISCV64 = 0x8, + ARCH_S390X = 0x10, }; static int get_current_arch(void) @@ -384,6 +386,8 @@ static int get_current_arch(void) return ARCH_ARM64; #elif defined(__riscv) && __riscv_xlen == 64 return ARCH_RISCV64; +#elif defined(__s390x__) + return ARCH_S390X; #endif return ARCH_UNKNOWN; } @@ -404,6 +408,10 @@ static int parse_test_spec(struct test_loader *tester, bool xlated_on_next_line = true; bool unpriv_jit_on_next_line; bool jit_on_next_line; + bool stderr_on_next_line = true; + bool unpriv_stderr_on_next_line = true; + bool stdout_on_next_line = true; + bool unpriv_stdout_on_next_line = true; bool collect_jit = false; int func_id, i, err = 0; u32 arch_mask = 0; @@ -465,12 +473,22 @@ static int parse_test_spec(struct test_loader *tester, spec->auxiliary = true; spec->mode_mask |= UNPRIV; } else if ((msg = skip_dynamic_pfx(s, TEST_TAG_EXPECT_MSG_PFX))) { - err = push_msg(msg, &spec->priv.expect_msgs); + err = push_msg(msg, false, &spec->priv.expect_msgs); + if (err) + goto cleanup; + spec->mode_mask |= PRIV; + } else if ((msg = skip_dynamic_pfx(s, TEST_TAG_EXPECT_NOT_MSG_PFX))) { + err = push_msg(msg, true, &spec->priv.expect_msgs); if (err) goto cleanup; spec->mode_mask |= PRIV; } else if ((msg = skip_dynamic_pfx(s, TEST_TAG_EXPECT_MSG_PFX_UNPRIV))) { - err = push_msg(msg, &spec->unpriv.expect_msgs); + err = push_msg(msg, false, &spec->unpriv.expect_msgs); + if (err) + goto cleanup; + spec->mode_mask |= UNPRIV; + } else if ((msg = skip_dynamic_pfx(s, TEST_TAG_EXPECT_NOT_MSG_PFX_UNPRIV))) { + err = push_msg(msg, true, &spec->unpriv.expect_msgs); if (err) goto cleanup; spec->mode_mask |= UNPRIV; @@ -565,8 +583,10 @@ static int parse_test_spec(struct test_loader *tester, arch = ARCH_ARM64; } else if (strcmp(val, "RISCV64") == 0) { arch = ARCH_RISCV64; + } else if (strcmp(val, "s390x") == 0) { + arch = ARCH_S390X; } else { - PRINT_FAIL("bad arch spec: '%s'", val); + PRINT_FAIL("bad arch spec: '%s'\n", val); err = -EINVAL; goto cleanup; } @@ -593,6 +613,26 @@ static int parse_test_spec(struct test_loader *tester, err = -EINVAL; goto cleanup; } + } else if ((msg = skip_dynamic_pfx(s, TEST_TAG_EXPECT_STDERR_PFX))) { + err = push_disasm_msg(msg, &stderr_on_next_line, + &spec->priv.stderr); + if (err) + goto cleanup; + } else if ((msg = skip_dynamic_pfx(s, TEST_TAG_EXPECT_STDERR_PFX_UNPRIV))) { + err = push_disasm_msg(msg, &unpriv_stderr_on_next_line, + &spec->unpriv.stderr); + if (err) + goto cleanup; + } else if ((msg = skip_dynamic_pfx(s, TEST_TAG_EXPECT_STDOUT_PFX))) { + err = push_disasm_msg(msg, &stdout_on_next_line, + &spec->priv.stdout); + if (err) + goto cleanup; + } else if ((msg = skip_dynamic_pfx(s, TEST_TAG_EXPECT_STDOUT_PFX_UNPRIV))) { + err = push_disasm_msg(msg, &unpriv_stdout_on_next_line, + &spec->unpriv.stdout); + if (err) + goto cleanup; } } @@ -646,6 +686,10 @@ static int parse_test_spec(struct test_loader *tester, clone_msgs(&spec->priv.expect_xlated, &spec->unpriv.expect_xlated); if (spec->unpriv.jited.cnt == 0) clone_msgs(&spec->priv.jited, &spec->unpriv.jited); + if (spec->unpriv.stderr.cnt == 0) + clone_msgs(&spec->priv.stderr, &spec->unpriv.stderr); + if (spec->unpriv.stdout.cnt == 0) + clone_msgs(&spec->priv.stdout, &spec->unpriv.stdout); } spec->valid = true; @@ -707,44 +751,155 @@ static void emit_jited(const char *jited, bool force) fprintf(stdout, "JITED:\n=============\n%s=============\n", jited); } -static void validate_msgs(char *log_buf, struct expected_msgs *msgs, - void (*emit_fn)(const char *buf, bool force)) +static void emit_stderr(const char *stderr, bool force) { - const char *log = log_buf, *prev_match; + if (!force && env.verbosity == VERBOSE_NONE) + return; + fprintf(stdout, "STDERR:\n=============\n%s=============\n", stderr); +} + +static void emit_stdout(const char *bpf_stdout, bool force) +{ + if (!force && env.verbosity == VERBOSE_NONE) + return; + fprintf(stdout, "STDOUT:\n=============\n%s=============\n", bpf_stdout); +} + +static const char *match_msg(struct expect_msg *msg, const char **log) +{ + const char *match = NULL; regmatch_t reg_match[1]; - int prev_match_line; - int match_line; - int i, j, err; + int err; + + if (!msg->is_regex) { + match = strstr(*log, msg->substr); + if (match) + *log = match + strlen(msg->substr); + } else { + err = regexec(&msg->regex, *log, 1, reg_match, 0); + if (err == 0) { + match = *log + reg_match[0].rm_so; + *log += reg_match[0].rm_eo; + } + } + return match; +} + +static int count_lines(const char *start, const char *end) +{ + const char *tmp; + int n = 0; + + for (tmp = start; tmp < end; ++tmp) + if (*tmp == '\n') + n++; + return n; +} + +struct match { + const char *start; + const char *end; + int line; +}; + +/* + * Positive messages are matched sequentially, each next message + * is looked for starting from the end of a previous matched one. + */ +static void match_positive_msgs(const char *log, struct expected_msgs *msgs, struct match *matches) +{ + const char *prev_match; + int i, line; - prev_match_line = -1; - match_line = 0; prev_match = log; + line = 0; for (i = 0; i < msgs->cnt; i++) { struct expect_msg *msg = &msgs->patterns[i]; - const char *match = NULL, *pat_status; - bool wrong_line = false; - - if (!msg->is_regex) { - match = strstr(log, msg->substr); - if (match) - log = match + strlen(msg->substr); - } else { - err = regexec(&msg->regex, log, 1, reg_match, 0); - if (err == 0) { - match = log + reg_match[0].rm_so; - log += reg_match[0].rm_eo; + const char *match = NULL; + + if (msg->negative) + continue; + + match = match_msg(msg, &log); + if (match) { + line += count_lines(prev_match, match); + matches[i].start = match; + matches[i].end = log; + matches[i].line = line; + prev_match = match; + } + } +} + +/* + * Each negative messages N located between positive messages P1 and P2 + * is matched in the span P1.end .. P2.start. Consequently, negative messages + * are unordered within the span. + */ +static void match_negative_msgs(const char *log, struct expected_msgs *msgs, struct match *matches) +{ + const char *start = log, *end, *next, *match; + const char *log_end = log + strlen(log); + int i, j, next_positive; + + for (i = 0; i < msgs->cnt; i++) { + struct expect_msg *msg = &msgs->patterns[i]; + + /* positive message bumps span start */ + if (!msg->negative) { + start = matches[i].end ?: start; + continue; + } + + /* count stride of negative patterns and adjust span end */ + end = log_end; + for (next_positive = i + 1; next_positive < msgs->cnt; next_positive++) { + if (!msgs->patterns[next_positive].negative) { + end = matches[next_positive].start; + break; } } - if (match) { - for (; prev_match < match; ++prev_match) - if (*prev_match == '\n') - ++match_line; - wrong_line = msg->on_next_line && prev_match_line >= 0 && - prev_match_line + 1 != match_line; + /* try matching negative messages within identified span */ + for (j = i; j < next_positive; j++) { + next = start; + match = match_msg(msg, &next); + if (match && next <= end) { + matches[j].start = match; + matches[j].end = next; + } } - if (!match || wrong_line) { + /* -1 to account for i++ */ + i = next_positive - 1; + } +} + +void validate_msgs(const char *log_buf, struct expected_msgs *msgs, + void (*emit_fn)(const char *buf, bool force)) +{ + struct match matches[msgs->cnt]; + struct match *prev_match = NULL; + int i, j; + + memset(matches, 0, sizeof(*matches) * msgs->cnt); + match_positive_msgs(log_buf, msgs, matches); + match_negative_msgs(log_buf, msgs, matches); + + for (i = 0; i < msgs->cnt; i++) { + struct expect_msg *msg = &msgs->patterns[i]; + struct match *match = &matches[i]; + const char *pat_status; + bool unexpected; + bool wrong_line; + bool no_match; + + no_match = !msg->negative && !match->start; + wrong_line = !msg->negative && + msg->on_next_line && + prev_match && prev_match->line + 1 != match->line; + unexpected = msg->negative && match->start; + if (no_match || wrong_line || unexpected) { PRINT_FAIL("expect_msg\n"); if (env.verbosity == VERBOSE_NONE) emit_fn(log_buf, true /*force*/); @@ -754,8 +909,10 @@ static void validate_msgs(char *log_buf, struct expected_msgs *msgs, pat_status = "MATCHED "; else if (wrong_line) pat_status = "WRONG LINE"; - else + else if (no_match) pat_status = "EXPECTED "; + else + pat_status = "UNEXPECTED"; msg = &msgs->patterns[j]; fprintf(stderr, "%s %s: '%s'\n", pat_status, @@ -765,12 +922,13 @@ static void validate_msgs(char *log_buf, struct expected_msgs *msgs, if (wrong_line) { fprintf(stderr, "expecting match at line %d, actual match is at line %d\n", - prev_match_line + 1, match_line); + prev_match->line + 1, match->line); } break; } - prev_match_line = match_line; + if (!msg->negative) + prev_match = match; } } @@ -929,6 +1087,19 @@ out: return err; } +/* Read the bpf stream corresponding to the stream_id */ +static int get_stream(int stream_id, int prog_fd, char *text, size_t text_sz) +{ + LIBBPF_OPTS(bpf_prog_stream_read_opts, ropts); + int ret; + + ret = bpf_prog_stream_read(prog_fd, stream_id, text, text_sz, &ropts); + ASSERT_GT(ret, 0, "stream read"); + text[ret] = '\0'; + + return ret; +} + /* this function is forced noinline and has short generic name to look better * in test_progs output (in case of a failure) */ @@ -1083,7 +1254,7 @@ void run_subtest(struct test_loader *tester, link = bpf_map__attach_struct_ops(map); if (!link) { PRINT_FAIL("bpf_map__attach_struct_ops failed for map %s: err=%d\n", - bpf_map__name(map), err); + bpf_map__name(map), -errno); goto tobj_cleanup; } links[links_cnt++] = link; @@ -1103,6 +1274,31 @@ void run_subtest(struct test_loader *tester, PRINT_FAIL("Unexpected retval: %d != %d\n", retval, subspec->retval); goto tobj_cleanup; } + + if (subspec->stderr.cnt) { + err = get_stream(2, bpf_program__fd(tprog), + tester->log_buf, tester->log_buf_sz); + if (err <= 0) { + PRINT_FAIL("Unexpected retval from get_stream(): %d, errno = %d\n", + err, errno); + goto tobj_cleanup; + } + emit_stderr(tester->log_buf, false /*force*/); + validate_msgs(tester->log_buf, &subspec->stderr, emit_stderr); + } + + if (subspec->stdout.cnt) { + err = get_stream(1, bpf_program__fd(tprog), + tester->log_buf, tester->log_buf_sz); + if (err <= 0) { + PRINT_FAIL("Unexpected retval from get_stream(): %d, errno = %d\n", + err, errno); + goto tobj_cleanup; + } + emit_stdout(tester->log_buf, false /*force*/); + validate_msgs(tester->log_buf, &subspec->stdout, emit_stdout); + } + /* redo bpf_map__attach_struct_ops for each test */ while (links_cnt > 0) bpf_link__destroy(links[--links_cnt]); diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c index 309d9d4a8ace..02a85dda30e6 100644 --- a/tools/testing/selftests/bpf/test_progs.c +++ b/tools/testing/selftests/bpf/test_progs.c @@ -14,12 +14,14 @@ #include <netinet/in.h> #include <sys/select.h> #include <sys/socket.h> +#include <linux/keyctl.h> #include <sys/un.h> #include <bpf/btf.h> #include <time.h> #include "json_writer.h" #include "network_helpers.h" +#include "verification_cert.h" /* backtrace() and backtrace_symbols_fd() are glibc specific, * use header file when glibc is available and provide stub @@ -1928,6 +1930,13 @@ static void free_test_states(void) } } +static __u32 register_session_key(const char *key_data, size_t key_data_size) +{ + return syscall(__NR_add_key, "asymmetric", "libbpf_session_key", + (const void *)key_data, key_data_size, + KEY_SPEC_SESSION_KEYRING); +} + int main(int argc, char **argv) { static const struct argp argp = { @@ -1961,6 +1970,10 @@ int main(int argc, char **argv) /* Use libbpf 1.0 API mode */ libbpf_set_strict_mode(LIBBPF_STRICT_ALL); libbpf_set_print(libbpf_print_fn); + err = register_session_key((const char *)test_progs_verification_cert, + test_progs_verification_cert_len); + if (err < 0) + return err; traffic_monitor_set_print(traffic_monitor_print_fn); diff --git a/tools/testing/selftests/bpf/test_progs.h b/tools/testing/selftests/bpf/test_progs.h index df2222a1806f..eebfc18cdcd2 100644 --- a/tools/testing/selftests/bpf/test_progs.h +++ b/tools/testing/selftests/bpf/test_progs.h @@ -7,6 +7,7 @@ #include <errno.h> #include <string.h> #include <assert.h> +#include <regex.h> #include <stdlib.h> #include <stdarg.h> #include <time.h> @@ -546,4 +547,20 @@ extern void test_loader_fini(struct test_loader *tester); test_loader_fini(&tester); \ }) +struct expect_msg { + const char *substr; /* substring match */ + regex_t regex; + bool is_regex; + bool on_next_line; + bool negative; +}; + +struct expected_msgs { + struct expect_msg *patterns; + size_t cnt; +}; + +void validate_msgs(const char *log_buf, struct expected_msgs *msgs, + void (*emit_fn)(const char *buf, bool force)); + #endif /* __TEST_PROGS_H */ diff --git a/tools/testing/selftests/bpf/test_sockmap.c b/tools/testing/selftests/bpf/test_sockmap.c index fd2da2234cc9..76568db7a664 100644 --- a/tools/testing/selftests/bpf/test_sockmap.c +++ b/tools/testing/selftests/bpf/test_sockmap.c @@ -1372,7 +1372,7 @@ run: } else fprintf(stderr, "unknown test\n"); out: - /* Detatch and zero all the maps */ + /* Detach and zero all the maps */ bpf_prog_detach2(bpf_program__fd(progs[3]), cg_fd, BPF_CGROUP_SOCK_OPS); for (i = 0; i < ARRAY_SIZE(links); i++) { diff --git a/tools/testing/selftests/bpf/test_tcpnotify_user.c b/tools/testing/selftests/bpf/test_tcpnotify_user.c index 595194453ff8..35b4893ccdf8 100644 --- a/tools/testing/selftests/bpf/test_tcpnotify_user.c +++ b/tools/testing/selftests/bpf/test_tcpnotify_user.c @@ -15,20 +15,18 @@ #include <bpf/libbpf.h> #include <sys/ioctl.h> #include <linux/rtnetlink.h> -#include <signal.h> #include <linux/perf_event.h> -#include <linux/err.h> -#include "bpf_util.h" #include "cgroup_helpers.h" #include "test_tcpnotify.h" -#include "trace_helpers.h" #include "testing_helpers.h" #define SOCKET_BUFFER_SIZE (getpagesize() < 8192L ? getpagesize() : 8192L) pthread_t tid; +static bool exit_thread; + int rx_callbacks; static void dummyfn(void *ctx, int cpu, void *data, __u32 size) @@ -45,7 +43,7 @@ void tcp_notifier_poller(struct perf_buffer *pb) { int err; - while (1) { + while (!exit_thread) { err = perf_buffer__poll(pb, 100); if (err < 0 && err != -EINTR) { printf("failed perf_buffer__poll: %d\n", err); @@ -78,15 +76,10 @@ int main(int argc, char **argv) int error = EXIT_FAILURE; struct bpf_object *obj; char test_script[80]; - cpu_set_t cpuset; __u32 key = 0; libbpf_set_strict_mode(LIBBPF_STRICT_ALL); - CPU_ZERO(&cpuset); - CPU_SET(0, &cpuset); - pthread_setaffinity_np(pthread_self(), sizeof(cpu_set_t), &cpuset); - cg_fd = cgroup_setup_and_join(cg_path); if (cg_fd < 0) goto err; @@ -151,6 +144,13 @@ int main(int argc, char **argv) sleep(10); + exit_thread = true; + int ret = pthread_join(tid, NULL); + if (ret) { + printf("FAILED: pthread_join\n"); + goto err; + } + if (verify_result(&g)) { printf("FAILED: Wrong stats Expected %d calls, got %d\n", g.ncalls, rx_callbacks); diff --git a/tools/testing/selftests/bpf/test_xsk.sh b/tools/testing/selftests/bpf/test_xsk.sh index 65aafe0003db..62db060298a4 100755 --- a/tools/testing/selftests/bpf/test_xsk.sh +++ b/tools/testing/selftests/bpf/test_xsk.sh @@ -241,4 +241,6 @@ done if [ $failures -eq 0 ]; then echo "All tests successful!" +else + exit 1 fi diff --git a/tools/testing/selftests/bpf/testing_helpers.c b/tools/testing/selftests/bpf/testing_helpers.c index 5e9f16683be5..16eb37e5bad6 100644 --- a/tools/testing/selftests/bpf/testing_helpers.c +++ b/tools/testing/selftests/bpf/testing_helpers.c @@ -399,7 +399,7 @@ int unload_module(const char *name, bool verbose) return 0; } -int load_module(const char *path, bool verbose) +static int __load_module(const char *path, const char *param_values, bool verbose) { int fd; @@ -411,7 +411,7 @@ int load_module(const char *path, bool verbose) fprintf(stdout, "Can't find %s kernel module: %d\n", path, -errno); return -ENOENT; } - if (finit_module(fd, "", 0)) { + if (finit_module(fd, param_values, 0)) { fprintf(stdout, "Failed to load %s into the kernel: %d\n", path, -errno); close(fd); return -EINVAL; @@ -423,6 +423,16 @@ int load_module(const char *path, bool verbose) return 0; } +int load_module_params(const char *path, const char *param_values, bool verbose) +{ + return __load_module(path, param_values, verbose); +} + +int load_module(const char *path, bool verbose) +{ + return __load_module(path, "", verbose); +} + int unload_bpf_testmod(bool verbose) { return unload_module("bpf_testmod", verbose); diff --git a/tools/testing/selftests/bpf/testing_helpers.h b/tools/testing/selftests/bpf/testing_helpers.h index 46d7f7089f63..eb20d3772218 100644 --- a/tools/testing/selftests/bpf/testing_helpers.h +++ b/tools/testing/selftests/bpf/testing_helpers.h @@ -39,6 +39,7 @@ int kern_sync_rcu(void); int finit_module(int fd, const char *param_values, int flags); int delete_module(const char *name, int flags); int load_module(const char *path, bool verbose); +int load_module_params(const char *path, const char *param_values, bool verbose); int unload_module(const char *name, bool verbose); static inline __u64 get_time_ns(void) diff --git a/tools/testing/selftests/bpf/trace_helpers.c b/tools/testing/selftests/bpf/trace_helpers.c index 81943c6254e6..171987627f3a 100644 --- a/tools/testing/selftests/bpf/trace_helpers.c +++ b/tools/testing/selftests/bpf/trace_helpers.c @@ -17,7 +17,9 @@ #include <linux/limits.h> #include <libelf.h> #include <gelf.h> +#include "bpf/hashmap.h" #include "bpf/libbpf_internal.h" +#include "bpf_util.h" #define TRACEFS_PIPE "/sys/kernel/tracing/trace_pipe" #define DEBUGFS_PIPE "/sys/kernel/debug/tracing/trace_pipe" @@ -519,3 +521,235 @@ void read_trace_pipe(void) { read_trace_pipe_iter(trace_pipe_cb, NULL, 0); } + +static size_t symbol_hash(long key, void *ctx __maybe_unused) +{ + return str_hash((const char *) key); +} + +static bool symbol_equal(long key1, long key2, void *ctx __maybe_unused) +{ + return strcmp((const char *) key1, (const char *) key2) == 0; +} + +static bool is_invalid_entry(char *buf, bool kernel) +{ + if (kernel && strchr(buf, '[')) + return true; + if (!kernel && !strchr(buf, '[')) + return true; + return false; +} + +static const char * const trace_blacklist[] = { + "migrate_disable", + "migrate_enable", + "rcu_read_unlock_strict", + "preempt_count_add", + "preempt_count_sub", + "__rcu_read_lock", + "__rcu_read_unlock", + "bpf_get_numa_node_id", +}; + +static bool skip_entry(char *name) +{ + int i; + + /* + * We attach to almost all kernel functions and some of them + * will cause 'suspicious RCU usage' when fprobe is attached + * to them. Filter out the current culprits - arch_cpu_idle + * default_idle and rcu_* functions. + */ + if (!strcmp(name, "arch_cpu_idle")) + return true; + if (!strcmp(name, "default_idle")) + return true; + if (!strncmp(name, "rcu_", 4)) + return true; + if (!strcmp(name, "bpf_dispatcher_xdp_func")) + return true; + if (!strncmp(name, "__ftrace_invalid_address__", + sizeof("__ftrace_invalid_address__") - 1)) + return true; + + for (i = 0; i < ARRAY_SIZE(trace_blacklist); i++) { + if (!strcmp(name, trace_blacklist[i])) + return true; + } + + return false; +} + +/* Do comparison by ignoring '.llvm.<hash>' suffixes. */ +static int compare_name(const char *name1, const char *name2) +{ + const char *res1, *res2; + int len1, len2; + + res1 = strstr(name1, ".llvm."); + res2 = strstr(name2, ".llvm."); + len1 = res1 ? res1 - name1 : strlen(name1); + len2 = res2 ? res2 - name2 : strlen(name2); + + if (len1 == len2) + return strncmp(name1, name2, len1); + if (len1 < len2) + return strncmp(name1, name2, len1) <= 0 ? -1 : 1; + return strncmp(name1, name2, len2) >= 0 ? 1 : -1; +} + +static int load_kallsyms_compare(const void *p1, const void *p2) +{ + return compare_name(((const struct ksym *)p1)->name, ((const struct ksym *)p2)->name); +} + +static int search_kallsyms_compare(const void *p1, const struct ksym *p2) +{ + return compare_name(p1, p2->name); +} + +int bpf_get_ksyms(char ***symsp, size_t *cntp, bool kernel) +{ + size_t cap = 0, cnt = 0; + char *name = NULL, *ksym_name, **syms = NULL; + struct hashmap *map; + struct ksyms *ksyms; + struct ksym *ks; + char buf[256]; + FILE *f; + int err = 0; + + ksyms = load_kallsyms_custom_local(load_kallsyms_compare); + if (!ksyms) + return -EINVAL; + + /* + * The available_filter_functions contains many duplicates, + * but other than that all symbols are usable to trace. + * Filtering out duplicates by using hashmap__add, which won't + * add existing entry. + */ + + if (access("/sys/kernel/tracing/trace", F_OK) == 0) + f = fopen("/sys/kernel/tracing/available_filter_functions", "r"); + else + f = fopen("/sys/kernel/debug/tracing/available_filter_functions", "r"); + + if (!f) + return -EINVAL; + + map = hashmap__new(symbol_hash, symbol_equal, NULL); + if (IS_ERR(map)) { + err = libbpf_get_error(map); + goto error; + } + + while (fgets(buf, sizeof(buf), f)) { + if (is_invalid_entry(buf, kernel)) + continue; + + free(name); + if (sscanf(buf, "%ms$*[^\n]\n", &name) != 1) + continue; + if (skip_entry(name)) + continue; + + ks = search_kallsyms_custom_local(ksyms, name, search_kallsyms_compare); + if (!ks) { + err = -EINVAL; + goto error; + } + + ksym_name = ks->name; + err = hashmap__add(map, ksym_name, 0); + if (err == -EEXIST) { + err = 0; + continue; + } + if (err) + goto error; + + err = libbpf_ensure_mem((void **) &syms, &cap, + sizeof(*syms), cnt + 1); + if (err) + goto error; + + syms[cnt++] = ksym_name; + } + + *symsp = syms; + *cntp = cnt; + +error: + free(name); + fclose(f); + hashmap__free(map); + if (err) + free(syms); + return err; +} + +int bpf_get_addrs(unsigned long **addrsp, size_t *cntp, bool kernel) +{ + unsigned long *addr, *addrs, *tmp_addrs; + int err = 0, max_cnt, inc_cnt; + char *name = NULL; + size_t cnt = 0; + char buf[256]; + FILE *f; + + if (access("/sys/kernel/tracing/trace", F_OK) == 0) + f = fopen("/sys/kernel/tracing/available_filter_functions_addrs", "r"); + else + f = fopen("/sys/kernel/debug/tracing/available_filter_functions_addrs", "r"); + + if (!f) + return -ENOENT; + + /* In my local setup, the number of entries is 50k+ so Let us initially + * allocate space to hold 64k entries. If 64k is not enough, incrementally + * increase 1k each time. + */ + max_cnt = 65536; + inc_cnt = 1024; + addrs = malloc(max_cnt * sizeof(long)); + if (addrs == NULL) { + err = -ENOMEM; + goto error; + } + + while (fgets(buf, sizeof(buf), f)) { + if (is_invalid_entry(buf, kernel)) + continue; + + free(name); + if (sscanf(buf, "%p %ms$*[^\n]\n", &addr, &name) != 2) + continue; + if (skip_entry(name)) + continue; + + if (cnt == max_cnt) { + max_cnt += inc_cnt; + tmp_addrs = realloc(addrs, max_cnt); + if (!tmp_addrs) { + err = -ENOMEM; + goto error; + } + addrs = tmp_addrs; + } + + addrs[cnt++] = (unsigned long)addr; + } + + *addrsp = addrs; + *cntp = cnt; + +error: + free(name); + fclose(f); + if (err) + free(addrs); + return err; +} diff --git a/tools/testing/selftests/bpf/trace_helpers.h b/tools/testing/selftests/bpf/trace_helpers.h index 2ce873c9f9aa..9437bdd4afa5 100644 --- a/tools/testing/selftests/bpf/trace_helpers.h +++ b/tools/testing/selftests/bpf/trace_helpers.h @@ -41,4 +41,7 @@ ssize_t get_rel_offset(uintptr_t addr); int read_build_id(const char *path, char *build_id, size_t size); +int bpf_get_ksyms(char ***symsp, size_t *cntp, bool kernel); +int bpf_get_addrs(unsigned long **addrsp, size_t *cntp, bool kernel); + #endif diff --git a/tools/testing/selftests/bpf/usdt.h b/tools/testing/selftests/bpf/usdt.h new file mode 100644 index 000000000000..549d1f774810 --- /dev/null +++ b/tools/testing/selftests/bpf/usdt.h @@ -0,0 +1,545 @@ +// SPDX-License-Identifier: BSD-2-Clause +/* + * This single-header library defines a collection of variadic macros for + * defining and triggering USDTs (User Statically-Defined Tracepoints): + * + * - For USDTs without associated semaphore: + * USDT(group, name, args...) + * + * - For USDTs with implicit (transparent to the user) semaphore: + * USDT_WITH_SEMA(group, name, args...) + * USDT_IS_ACTIVE(group, name) + * + * - For USDTs with explicit (user-defined and provided) semaphore: + * USDT_WITH_EXPLICIT_SEMA(sema, group, name, args...) + * USDT_SEMA_IS_ACTIVE(sema) + * + * all of which emit a NOP instruction into the instruction stream, and so + * have *zero* overhead for the surrounding code. USDTs are identified by + * a combination of `group` and `name` identifiers, which is used by external + * tracing tooling (tracers) for identifying exact USDTs of interest. + * + * USDTs can have an associated (2-byte) activity counter (USDT semaphore), + * automatically maintained by Linux kernel whenever any correctly written + * BPF-based tracer is attached to the USDT. This USDT semaphore can be used + * to check whether there is a need to do any extra data collection and + * processing for a given USDT (if necessary), and otherwise avoid extra work + * for a common case of USDT not being traced ("active"). + * + * See documentation for USDT_WITH_SEMA()/USDT_IS_ACTIVE() or + * USDT_WITH_EXPLICIT_SEMA()/USDT_SEMA_IS_ACTIVE() APIs below for details on + * working with USDTs with implicitly or explicitly associated + * USDT semaphores, respectively. + * + * There is also some additional data recorded into an auxiliary note + * section. The data in the note section describes the operands, in terms of + * size and location, used by tracing tooling to know where to find USDT + * arguments. Each location is encoded as an assembler operand string. + * Tracing tools (bpftrace and BPF-based tracers, systemtap, etc) insert + * breakpoints on top of the nop, and decode the location operand-strings, + * like an assembler, to find the values being passed. + * + * The operand strings are selected by the compiler for each operand. + * They are constrained by inline-assembler codes.The default is: + * + * #define USDT_ARG_CONSTRAINT nor + * + * This is a good default if the operands tend to be integral and + * moderate in number (smaller than number of registers). In other + * cases, the compiler may report "'asm' requires impossible reload" or + * similar. In this case, consider simplifying the macro call (fewer + * and simpler operands), reduce optimization, or override the default + * constraints string via: + * + * #define USDT_ARG_CONSTRAINT g + * #include <usdt.h> + * + * For some historical description of USDT v3 format (the one used by this + * library and generally recognized and assumed by BPF-based tracing tools) + * see [0]. The more formal specification can be found at [1]. Additional + * argument constraints information can be found at [2]. + * + * Original SystemTap's sys/sdt.h implementation ([3]) was used as a base for + * this USDT library implementation. Current implementation differs *a lot* in + * terms of exposed user API and general usability, which was the main goal + * and focus of the reimplementation work. Nevertheless, underlying recorded + * USDT definitions are fully binary compatible and any USDT-based tooling + * should work equally well with USDTs defined by either SystemTap's or this + * library's USDT implementation. + * + * [0] https://ecos.sourceware.org/ml/systemtap/2010-q3/msg00145.html + * [1] https://sourceware.org/systemtap/wiki/UserSpaceProbeImplementation + * [2] https://gcc.gnu.org/onlinedocs/gcc/Constraints.html + * [3] https://sourceware.org/git/?p=systemtap.git;a=blob;f=includes/sys/sdt.h + */ +#ifndef __USDT_H +#define __USDT_H + +/* + * Changelog: + * + * 0.1.0 + * ----- + * - Initial release + */ +#define USDT_MAJOR_VERSION 0 +#define USDT_MINOR_VERSION 1 +#define USDT_PATCH_VERSION 0 + +/* C++20 and C23 added __VA_OPT__ as a standard replacement for non-standard `##__VA_ARGS__` extension */ +#if (defined(__STDC_VERSION__) && __STDC_VERSION__ > 201710L) || (defined(__cplusplus) && __cplusplus > 201703L) +#define __usdt_va_opt 1 +#define __usdt_va_args(...) __VA_OPT__(,) __VA_ARGS__ +#else +#define __usdt_va_args(...) , ##__VA_ARGS__ +#endif + +/* + * Trigger USDT with `group`:`name` identifier and pass through `args` as its + * arguments. Zero arguments are acceptable as well. No USDT semaphore is + * associated with this USDT. + * + * Such "semaphoreless" USDTs are commonly used when there is no extra data + * collection or processing needed to collect and prepare USDT arguments and + * they are just available in the surrounding code. USDT() macro will just + * record their locations in CPU registers or in memory for tracing tooling to + * be able to access them, if necessary. + */ +#ifdef __usdt_va_opt +#define USDT(group, name, ...) \ + __usdt_probe(group, name, __usdt_sema_none, 0 __VA_OPT__(,) __VA_ARGS__) +#else +#define USDT(group, name, ...) \ + __usdt_probe(group, name, __usdt_sema_none, 0, ##__VA_ARGS__) +#endif + +/* + * Trigger USDT with `group`:`name` identifier and pass through `args` as its + * arguments. Zero arguments are acceptable as well. USDT also get an + * implicitly-defined associated USDT semaphore, which will be "activated" by + * tracing tooling and can be used to check whether USDT is being actively + * observed. + * + * USDTs with semaphore are commonly used when there is a need to perform + * additional data collection and processing to prepare USDT arguments, which + * otherwise might not be necessary for the rest of application logic. In such + * case, USDT semaphore can be used to avoid unnecessary extra work. If USDT + * is not traced (which is presumed to be a common situation), the associated + * USDT semaphore is "inactive", and so there is no need to waste resources to + * prepare USDT arguments. Use USDT_IS_ACTIVE(group, name) to check whether + * USDT is "active". + * + * N.B. There is an inherent (albeit short) gap between checking whether USDT + * is active and triggering corresponding USDT, in which external tracer can + * be attached to an USDT and activate USDT semaphore after the activity check. + * If such a race occurs, tracers might miss one USDT execution. Tracers are + * expected to accommodate such possibility and this is expected to not be + * a problem for applications and tracers. + * + * N.B. Implicit USDT semaphore defined by USDT_WITH_SEMA() is contained + * within a single executable or shared library and is not shared outside + * them. I.e., if you use USDT_WITH_SEMA() with the same USDT group and name + * identifier across executable and shared library, it will work and won't + * conflict, per se, but will define independent USDT semaphores, one for each + * shared library/executable in which USDT_WITH_SEMA(group, name) is used. + * That is, if you attach to this USDT in one shared library (or executable), + * then only USDT semaphore within that shared library (or executable) will be + * updated by the kernel, while other libraries (or executable) will not see + * activated USDT semaphore. In short, it's best to use unique USDT group:name + * identifiers across different shared libraries (and, equivalently, between + * executable and shared library). This is advanced consideration and is + * rarely (if ever) seen in practice, but just to avoid surprises this is + * called out here. (Static libraries become a part of final executable, once + * linked by linker, so the above considerations don't apply to them.) + */ +#ifdef __usdt_va_opt +#define USDT_WITH_SEMA(group, name, ...) \ + __usdt_probe(group, name, \ + __usdt_sema_implicit, __usdt_sema_name(group, name) \ + __VA_OPT__(,) __VA_ARGS__) +#else +#define USDT_WITH_SEMA(group, name, ...) \ + __usdt_probe(group, name, \ + __usdt_sema_implicit, __usdt_sema_name(group, name), \ + ##__VA_ARGS__) +#endif + +struct usdt_sema { volatile unsigned short active; }; + +/* + * Check if USDT with `group`:`name` identifier is "active" (i.e., whether it + * is attached to by external tracing tooling and is actively observed). + * + * This macro can be used to decide whether any additional and potentially + * expensive data collection or processing should be done to pass extra + * information into the given USDT. It is assumed that USDT is triggered with + * USDT_WITH_SEMA() macro which will implicitly define associated USDT + * semaphore. (If one needs more control over USDT semaphore, see + * USDT_DEFINE_SEMA() and USDT_WITH_EXPLICIT_SEMA() macros below.) + * + * N.B. Such checks are necessarily racy and speculative. Between checking + * whether USDT is active and triggering the USDT itself, tracer can be + * detached with no notification. This race should be extremely rare and worst + * case should result in one-time wasted extra data collection and processing. + */ +#define USDT_IS_ACTIVE(group, name) ({ \ + extern struct usdt_sema __usdt_sema_name(group, name) \ + __usdt_asm_name(__usdt_sema_name(group, name)); \ + __usdt_sema_implicit(__usdt_sema_name(group, name)); \ + __usdt_sema_name(group, name).active > 0; \ +}) + +/* + * APIs for working with user-defined explicit USDT semaphores. + * + * This is a less commonly used advanced API for use cases in which user needs + * an explicit control over (potentially shared across multiple USDTs) USDT + * semaphore instance. This can be used when there is a group of logically + * related USDTs that all need extra data collection and processing whenever + * any of a family of related USDTs are "activated" (i.e., traced). In such + * a case, all such related USDTs will be associated with the same shared USDT + * semaphore defined with USDT_DEFINE_SEMA() and the USDTs themselves will be + * triggered with USDT_WITH_EXPLICIT_SEMA() macros, taking an explicit extra + * USDT semaphore identifier as an extra parameter. + */ + +/** + * Underlying C global variable name for user-defined USDT semaphore with + * `sema` identifier. Could be useful for debugging, but normally shouldn't be + * used explicitly. + */ +#define USDT_SEMA(sema) __usdt_sema_##sema + +/* + * Define storage for user-defined USDT semaphore `sema`. + * + * Should be used only once in non-header source file to let compiler allocate + * space for the semaphore variable. Just like with any other global variable. + * + * This macro can be used anywhere where global variable declaration is + * allowed. Just like with global variable definitions, there should be only + * one definition of user-defined USDT semaphore with given `sema` identifier, + * otherwise compiler or linker will complain about duplicate variable + * definition. + * + * For C++, it is allowed to use USDT_DEFINE_SEMA() both in global namespace + * and inside namespaces (including nested namespaces). Just make sure that + * USDT_DECLARE_SEMA() is placed within the namespace where this semaphore is + * referenced, or any of its parent namespaces, so the C++ language-level + * identifier is visible to the code that needs to reference the semaphore. + * At the lowest layer, USDT semaphores have global naming and visibility + * (they have a corresponding `__usdt_sema_<name>` symbol, which can be linked + * against from C or C++ code, if necessary). To keep it simple, putting + * USDT_DECLARE_SEMA() declarations into global namespaces is the simplest + * no-brainer solution. All these aspects are irrelevant for plain C, because + * C doesn't have namespaces and everything is always in the global namespace. + * + * N.B. Due to USDT metadata being recorded in non-allocatable ELF note + * section, it has limitations when it comes to relocations, which, in + * practice, means that it's not possible to correctly share USDT semaphores + * between main executable and shared libraries, or even between multiple + * shared libraries. USDT semaphore has to be contained to individual shared + * library or executable to avoid unpleasant surprises with half-working USDT + * semaphores. We enforce this by marking semaphore ELF symbols as having + * a hidden visibility. This is quite an advanced use case and consideration + * and for most users this should have no consequences whatsoever. + */ +#define USDT_DEFINE_SEMA(sema) \ + struct usdt_sema __usdt_sema_sec USDT_SEMA(sema) \ + __usdt_asm_name(USDT_SEMA(sema)) \ + __attribute__((visibility("hidden"))) = { 0 } + +/* + * Declare extern reference to user-defined USDT semaphore `sema`. + * + * Refers to a variable defined in another compilation unit by + * USDT_DEFINE_SEMA() and allows to use the same USDT semaphore across + * multiple compilation units (i.e., .c and .cpp files). + * + * See USDT_DEFINE_SEMA() notes above for C++ language usage peculiarities. + */ +#define USDT_DECLARE_SEMA(sema) \ + extern struct usdt_sema USDT_SEMA(sema) __usdt_asm_name(USDT_SEMA(sema)) + +/* + * Check if user-defined USDT semaphore `sema` is "active" (i.e., whether it + * is attached to by external tracing tooling and is actively observed). + * + * This macro can be used to decide whether any additional and potentially + * expensive data collection or processing should be done to pass extra + * information into USDT(s) associated with USDT semaphore `sema`. + * + * N.B. Such checks are necessarily racy. Between checking the state of USDT + * semaphore and triggering associated USDT(s), the active tracer might attach + * or detach. This race should be extremely rare and worst case should result + * in one-time missed USDT event or wasted extra data collection and + * processing. USDT-using tracers should be written with this in mind and is + * not a concern of the application defining USDTs with associated semaphore. + */ +#define USDT_SEMA_IS_ACTIVE(sema) (USDT_SEMA(sema).active > 0) + +/* + * Invoke USDT specified by `group` and `name` identifiers and associate + * explicitly user-defined semaphore `sema` with it. Pass through `args` as + * USDT arguments. `args` are optional and zero arguments are acceptable. + * + * Semaphore is defined with the help of USDT_DEFINE_SEMA() macro and can be + * checked whether active with USDT_SEMA_IS_ACTIVE(). + */ +#ifdef __usdt_va_opt +#define USDT_WITH_EXPLICIT_SEMA(sema, group, name, ...) \ + __usdt_probe(group, name, __usdt_sema_explicit, USDT_SEMA(sema), ##__VA_ARGS__) +#else +#define USDT_WITH_EXPLICIT_SEMA(sema, group, name, ...) \ + __usdt_probe(group, name, __usdt_sema_explicit, USDT_SEMA(sema) __VA_OPT__(,) __VA_ARGS__) +#endif + +/* + * Adjustable implementation aspects + */ +#ifndef USDT_ARG_CONSTRAINT +#if defined __powerpc__ +#define USDT_ARG_CONSTRAINT nZr +#elif defined __arm__ +#define USDT_ARG_CONSTRAINT g +#elif defined __loongarch__ +#define USDT_ARG_CONSTRAINT nmr +#else +#define USDT_ARG_CONSTRAINT nor +#endif +#endif /* USDT_ARG_CONSTRAINT */ + +#ifndef USDT_NOP +#if defined(__ia64__) || defined(__s390__) || defined(__s390x__) +#define USDT_NOP nop 0 +#else +#define USDT_NOP nop +#endif +#endif /* USDT_NOP */ + +/* + * Implementation details + */ +/* USDT name for implicitly-defined USDT semaphore, derived from group:name */ +#define __usdt_sema_name(group, name) __usdt_sema_##group##__##name +/* ELF section into which USDT semaphores are put */ +#define __usdt_sema_sec __attribute__((section(".probes"))) + +#define __usdt_concat(a, b) a ## b +#define __usdt_apply(fn, n) __usdt_concat(fn, n) + +#ifndef __usdt_nth +#define __usdt_nth(_, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, N, ...) N +#endif + +#ifndef __usdt_narg +#ifdef __usdt_va_opt +#define __usdt_narg(...) __usdt_nth(_ __VA_OPT__(,) __VA_ARGS__, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0) +#else +#define __usdt_narg(...) __usdt_nth(_, ##__VA_ARGS__, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0) +#endif +#endif /* __usdt_narg */ + +#define __usdt_hash # +#define __usdt_str_(x) #x +#define __usdt_str(x) __usdt_str_(x) + +#ifndef __usdt_asm_name +#define __usdt_asm_name(name) __asm__(__usdt_str(name)) +#endif + +#define __usdt_asm0() "\n" +#define __usdt_asm1(x) __usdt_str(x) "\n" +#define __usdt_asm2(x, ...) __usdt_str(x) "," __usdt_asm1(__VA_ARGS__) +#define __usdt_asm3(x, ...) __usdt_str(x) "," __usdt_asm2(__VA_ARGS__) +#define __usdt_asm4(x, ...) __usdt_str(x) "," __usdt_asm3(__VA_ARGS__) +#define __usdt_asm5(x, ...) __usdt_str(x) "," __usdt_asm4(__VA_ARGS__) +#define __usdt_asm6(x, ...) __usdt_str(x) "," __usdt_asm5(__VA_ARGS__) +#define __usdt_asm7(x, ...) __usdt_str(x) "," __usdt_asm6(__VA_ARGS__) +#define __usdt_asm8(x, ...) __usdt_str(x) "," __usdt_asm7(__VA_ARGS__) +#define __usdt_asm9(x, ...) __usdt_str(x) "," __usdt_asm8(__VA_ARGS__) +#define __usdt_asm10(x, ...) __usdt_str(x) "," __usdt_asm9(__VA_ARGS__) +#define __usdt_asm11(x, ...) __usdt_str(x) "," __usdt_asm10(__VA_ARGS__) +#define __usdt_asm12(x, ...) __usdt_str(x) "," __usdt_asm11(__VA_ARGS__) +#define __usdt_asm(...) __usdt_apply(__usdt_asm, __usdt_narg(__VA_ARGS__))(__VA_ARGS__) + +#ifdef __LP64__ +#define __usdt_asm_addr .8byte +#else +#define __usdt_asm_addr .4byte +#endif + +#define __usdt_asm_strz_(x) __usdt_asm1(.asciz #x) +#define __usdt_asm_strz(x) __usdt_asm_strz_(x) +#define __usdt_asm_str_(x) __usdt_asm1(.ascii #x) +#define __usdt_asm_str(x) __usdt_asm_str_(x) + +/* "semaphoreless" USDT case */ +#ifndef __usdt_sema_none +#define __usdt_sema_none(sema) +#endif + +/* implicitly defined __usdt_sema__group__name semaphore (using weak symbols) */ +#ifndef __usdt_sema_implicit +#define __usdt_sema_implicit(sema) \ + __asm__ __volatile__ ( \ + __usdt_asm1(.ifndef sema) \ + __usdt_asm3( .pushsection .probes, "aw", "progbits") \ + __usdt_asm1( .weak sema) \ + __usdt_asm1( .hidden sema) \ + __usdt_asm1( .align 2) \ + __usdt_asm1(sema:) \ + __usdt_asm1( .zero 2) \ + __usdt_asm2( .type sema, @object) \ + __usdt_asm2( .size sema, 2) \ + __usdt_asm1( .popsection) \ + __usdt_asm1(.endif) \ + ); +#endif + +/* externally defined semaphore using USDT_DEFINE_SEMA() and passed explicitly by user */ +#ifndef __usdt_sema_explicit +#define __usdt_sema_explicit(sema) \ + __asm__ __volatile__ ("" :: "m" (sema)); +#endif + +/* main USDT definition (nop and .note.stapsdt metadata) */ +#define __usdt_probe(group, name, sema_def, sema, ...) do { \ + sema_def(sema) \ + __asm__ __volatile__ ( \ + __usdt_asm( 990: USDT_NOP) \ + __usdt_asm3( .pushsection .note.stapsdt, "", "note") \ + __usdt_asm1( .balign 4) \ + __usdt_asm3( .4byte 992f-991f,994f-993f,3) \ + __usdt_asm1(991: .asciz "stapsdt") \ + __usdt_asm1(992: .balign 4) \ + __usdt_asm1(993: __usdt_asm_addr 990b) \ + __usdt_asm1( __usdt_asm_addr _.stapsdt.base) \ + __usdt_asm1( __usdt_asm_addr sema) \ + __usdt_asm_strz(group) \ + __usdt_asm_strz(name) \ + __usdt_asm_args(__VA_ARGS__) \ + __usdt_asm1( .ascii "\0") \ + __usdt_asm1(994: .balign 4) \ + __usdt_asm1( .popsection) \ + __usdt_asm1(.ifndef _.stapsdt.base) \ + __usdt_asm5( .pushsection .stapsdt.base,"aG","progbits",.stapsdt.base,comdat)\ + __usdt_asm1( .weak _.stapsdt.base) \ + __usdt_asm1( .hidden _.stapsdt.base) \ + __usdt_asm1(_.stapsdt.base:) \ + __usdt_asm1( .space 1) \ + __usdt_asm2( .size _.stapsdt.base, 1) \ + __usdt_asm1( .popsection) \ + __usdt_asm1(.endif) \ + :: __usdt_asm_ops(__VA_ARGS__) \ + ); \ +} while (0) + +/* + * NB: gdb PR24541 highlighted an unspecified corner of the sdt.h + * operand note format. + * + * The named register may be a longer or shorter (!) alias for the + * storage where the value in question is found. For example, on + * i386, 64-bit value may be put in register pairs, and a register + * name stored would identify just one of them. Previously, gcc was + * asked to emit the %w[id] (16-bit alias of some registers holding + * operands), even when a wider 32-bit value was used. + * + * Bottom line: the byte-width given before the @ sign governs. If + * there is a mismatch between that width and that of the named + * register, then a sys/sdt.h note consumer may need to employ + * architecture-specific heuristics to figure out where the compiler + * has actually put the complete value. + */ +#if defined(__powerpc__) || defined(__powerpc64__) +#define __usdt_argref(id) %I[id]%[id] +#elif defined(__i386__) +#define __usdt_argref(id) %k[id] /* gcc.gnu.org/PR80115 sourceware.org/PR24541 */ +#else +#define __usdt_argref(id) %[id] +#endif + +#define __usdt_asm_arg(n) __usdt_asm_str(%c[__usdt_asz##n]) \ + __usdt_asm1(.ascii "@") \ + __usdt_asm_str(__usdt_argref(__usdt_aval##n)) + +#define __usdt_asm_args0 /* no arguments */ +#define __usdt_asm_args1 __usdt_asm_arg(1) +#define __usdt_asm_args2 __usdt_asm_args1 __usdt_asm1(.ascii " ") __usdt_asm_arg(2) +#define __usdt_asm_args3 __usdt_asm_args2 __usdt_asm1(.ascii " ") __usdt_asm_arg(3) +#define __usdt_asm_args4 __usdt_asm_args3 __usdt_asm1(.ascii " ") __usdt_asm_arg(4) +#define __usdt_asm_args5 __usdt_asm_args4 __usdt_asm1(.ascii " ") __usdt_asm_arg(5) +#define __usdt_asm_args6 __usdt_asm_args5 __usdt_asm1(.ascii " ") __usdt_asm_arg(6) +#define __usdt_asm_args7 __usdt_asm_args6 __usdt_asm1(.ascii " ") __usdt_asm_arg(7) +#define __usdt_asm_args8 __usdt_asm_args7 __usdt_asm1(.ascii " ") __usdt_asm_arg(8) +#define __usdt_asm_args9 __usdt_asm_args8 __usdt_asm1(.ascii " ") __usdt_asm_arg(9) +#define __usdt_asm_args10 __usdt_asm_args9 __usdt_asm1(.ascii " ") __usdt_asm_arg(10) +#define __usdt_asm_args11 __usdt_asm_args10 __usdt_asm1(.ascii " ") __usdt_asm_arg(11) +#define __usdt_asm_args12 __usdt_asm_args11 __usdt_asm1(.ascii " ") __usdt_asm_arg(12) +#define __usdt_asm_args(...) __usdt_apply(__usdt_asm_args, __usdt_narg(__VA_ARGS__)) + +#define __usdt_is_arr(x) (__builtin_classify_type(x) == 14 || __builtin_classify_type(x) == 5) +#define __usdt_arg_size(x) (__usdt_is_arr(x) ? sizeof(void *) : sizeof(x)) + +/* + * We can't use __builtin_choose_expr() in C++, so fall back to table-based + * signedness determination for known types, utilizing templates magic. + */ +#ifdef __cplusplus + +#define __usdt_is_signed(x) (!__usdt_is_arr(x) && __usdt_t<__typeof(x)>::is_signed) + +#include <cstddef> + +template<typename T> struct __usdt_t { static const bool is_signed = false; }; +template<typename A> struct __usdt_t<A[]> : public __usdt_t<A *> {}; +template<typename A, size_t N> struct __usdt_t<A[N]> : public __usdt_t<A *> {}; + +#define __usdt_def_signed(T) \ +template<> struct __usdt_t<T> { static const bool is_signed = true; }; \ +template<> struct __usdt_t<const T> { static const bool is_signed = true; }; \ +template<> struct __usdt_t<volatile T> { static const bool is_signed = true; }; \ +template<> struct __usdt_t<const volatile T> { static const bool is_signed = true; } +#define __usdt_maybe_signed(T) \ +template<> struct __usdt_t<T> { static const bool is_signed = (T)-1 < (T)1; }; \ +template<> struct __usdt_t<const T> { static const bool is_signed = (T)-1 < (T)1; }; \ +template<> struct __usdt_t<volatile T> { static const bool is_signed = (T)-1 < (T)1; }; \ +template<> struct __usdt_t<const volatile T> { static const bool is_signed = (T)-1 < (T)1; } + +__usdt_def_signed(signed char); +__usdt_def_signed(short); +__usdt_def_signed(int); +__usdt_def_signed(long); +__usdt_def_signed(long long); +__usdt_maybe_signed(char); +__usdt_maybe_signed(wchar_t); + +#else /* !__cplusplus */ + +#define __usdt_is_inttype(x) (__builtin_classify_type(x) >= 1 && __builtin_classify_type(x) <= 4) +#define __usdt_inttype(x) __typeof(__builtin_choose_expr(__usdt_is_inttype(x), (x), 0U)) +#define __usdt_is_signed(x) ((__usdt_inttype(x))-1 < (__usdt_inttype(x))1) + +#endif /* __cplusplus */ + +#define __usdt_asm_op(n, x) \ + [__usdt_asz##n] "n" ((__usdt_is_signed(x) ? (int)-1 : 1) * (int)__usdt_arg_size(x)), \ + [__usdt_aval##n] __usdt_str(USDT_ARG_CONSTRAINT)(x) + +#define __usdt_asm_ops0() [__usdt_dummy] "g" (0) +#define __usdt_asm_ops1(x) __usdt_asm_op(1, x) +#define __usdt_asm_ops2(a,x) __usdt_asm_ops1(a), __usdt_asm_op(2, x) +#define __usdt_asm_ops3(a,b,x) __usdt_asm_ops2(a,b), __usdt_asm_op(3, x) +#define __usdt_asm_ops4(a,b,c,x) __usdt_asm_ops3(a,b,c), __usdt_asm_op(4, x) +#define __usdt_asm_ops5(a,b,c,d,x) __usdt_asm_ops4(a,b,c,d), __usdt_asm_op(5, x) +#define __usdt_asm_ops6(a,b,c,d,e,x) __usdt_asm_ops5(a,b,c,d,e), __usdt_asm_op(6, x) +#define __usdt_asm_ops7(a,b,c,d,e,f,x) __usdt_asm_ops6(a,b,c,d,e,f), __usdt_asm_op(7, x) +#define __usdt_asm_ops8(a,b,c,d,e,f,g,x) __usdt_asm_ops7(a,b,c,d,e,f,g), __usdt_asm_op(8, x) +#define __usdt_asm_ops9(a,b,c,d,e,f,g,h,x) __usdt_asm_ops8(a,b,c,d,e,f,g,h), __usdt_asm_op(9, x) +#define __usdt_asm_ops10(a,b,c,d,e,f,g,h,i,x) __usdt_asm_ops9(a,b,c,d,e,f,g,h,i), __usdt_asm_op(10, x) +#define __usdt_asm_ops11(a,b,c,d,e,f,g,h,i,j,x) __usdt_asm_ops10(a,b,c,d,e,f,g,h,i,j), __usdt_asm_op(11, x) +#define __usdt_asm_ops12(a,b,c,d,e,f,g,h,i,j,k,x) __usdt_asm_ops11(a,b,c,d,e,f,g,h,i,j,k), __usdt_asm_op(12, x) +#define __usdt_asm_ops(...) __usdt_apply(__usdt_asm_ops, __usdt_narg(__VA_ARGS__))(__VA_ARGS__) + +#endif /* __USDT_H */ diff --git a/tools/testing/selftests/bpf/verifier/bpf_st_mem.c b/tools/testing/selftests/bpf/verifier/bpf_st_mem.c index b616575c3b00..ce13002c7a19 100644 --- a/tools/testing/selftests/bpf/verifier/bpf_st_mem.c +++ b/tools/testing/selftests/bpf/verifier/bpf_st_mem.c @@ -93,7 +93,7 @@ .expected_attach_type = BPF_SK_LOOKUP, .result = VERBOSE_ACCEPT, .runs = -1, - .errstr = "0: (7a) *(u64 *)(r10 -8) = -44 ; R10=fp0 fp-8_w=-44\ + .errstr = "0: (7a) *(u64 *)(r10 -8) = -44 ; R10=fp0 fp-8=-44\ 2: (c5) if r0 s< 0x0 goto pc+2\ - R0_w=-44", + R0=-44", }, diff --git a/tools/testing/selftests/bpf/verifier/calls.c b/tools/testing/selftests/bpf/verifier/calls.c index f3492efc8834..c8d640802cce 100644 --- a/tools/testing/selftests/bpf/verifier/calls.c +++ b/tools/testing/selftests/bpf/verifier/calls.c @@ -1375,7 +1375,7 @@ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1), /* write into map value */ BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0), - /* fetch secound map_value_ptr from the stack */ + /* fetch second map_value_ptr from the stack */ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -16), BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1), /* write into map value */ @@ -1439,7 +1439,7 @@ /* second time with fp-16 */ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 4), BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 1, 2), - /* fetch secound map_value_ptr from the stack */ + /* fetch second map_value_ptr from the stack */ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_7, 0), /* write into map value */ BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0), @@ -1493,7 +1493,7 @@ /* second time with fp-16 */ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 4), BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2), - /* fetch secound map_value_ptr from the stack */ + /* fetch second map_value_ptr from the stack */ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_7, 0), /* write into map value */ BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0), @@ -2380,7 +2380,7 @@ */ BPF_JMP_REG(BPF_JGT, BPF_REG_6, BPF_REG_7, 1), BPF_MOV64_REG(BPF_REG_9, BPF_REG_8), - /* r9 = *r9 ; verifier get's to this point via two paths: + /* r9 = *r9 ; verifier gets to this point via two paths: * ; (I) one including r9 = r8, verified first; * ; (II) one excluding r9 = r8, verified next. * ; After load of *r9 to r9 the frame[0].fp[-24].id == r9.id. diff --git a/tools/testing/selftests/bpf/verify_sig_setup.sh b/tools/testing/selftests/bpf/verify_sig_setup.sh index f2cac42298ba..09179fb551f0 100755 --- a/tools/testing/selftests/bpf/verify_sig_setup.sh +++ b/tools/testing/selftests/bpf/verify_sig_setup.sh @@ -32,7 +32,7 @@ usage() exit 1 } -setup() +genkey() { local tmp_dir="$1" @@ -45,9 +45,14 @@ setup() openssl x509 -in ${tmp_dir}/signing_key.pem -out \ ${tmp_dir}/signing_key.der -outform der +} - key_id=$(cat ${tmp_dir}/signing_key.der | keyctl padd asymmetric ebpf_testing_key @s) +setup() +{ + local tmp_dir="$1" + genkey "${tmp_dir}" + key_id=$(cat ${tmp_dir}/signing_key.der | keyctl padd asymmetric ebpf_testing_key @s) keyring_id=$(keyctl newring ebpf_testing_keyring @s) keyctl link $key_id $keyring_id } @@ -105,6 +110,8 @@ main() if [[ "${action}" == "setup" ]]; then setup "${tmp_dir}" + elif [[ "${action}" == "genkey" ]]; then + genkey "${tmp_dir}" elif [[ "${action}" == "cleanup" ]]; then cleanup "${tmp_dir}" elif [[ "${action}" == "fsverity-create-sign" ]]; then diff --git a/tools/testing/selftests/bpf/veristat.c b/tools/testing/selftests/bpf/veristat.c index d532dd82a3a8..e962f133250c 100644 --- a/tools/testing/selftests/bpf/veristat.c +++ b/tools/testing/selftests/bpf/veristat.c @@ -181,6 +181,12 @@ struct var_preset { bool applied; }; +enum dump_mode { + DUMP_NONE = 0, + DUMP_XLATED = 1, + DUMP_JITED = 2, +}; + static struct env { char **filenames; int filename_cnt; @@ -227,6 +233,7 @@ static struct env { char orig_cgroup[PATH_MAX]; char stat_cgroup[PATH_MAX]; int memory_peak_fd; + __u32 dump_mode; } env; static int libbpf_print_fn(enum libbpf_print_level level, const char *format, va_list args) @@ -271,6 +278,7 @@ const char argp_program_doc[] = enum { OPT_LOG_FIXED = 1000, OPT_LOG_SIZE = 1001, + OPT_DUMP = 1002, }; static const struct argp_option opts[] = { @@ -295,6 +303,7 @@ static const struct argp_option opts[] = { "Force BPF verifier failure on register invariant violation (BPF_F_TEST_REG_INVARIANTS program flag)" }, { "top-src-lines", 'S', "N", 0, "Emit N most frequent source code lines" }, { "set-global-vars", 'G', "GLOBAL", 0, "Set global variables provided in the expression, for example \"var1 = 1\"" }, + { "dump", OPT_DUMP, "DUMP_MODE", OPTION_ARG_OPTIONAL, "Print BPF program dump (xlated, jited)" }, {}, }; @@ -427,6 +436,16 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state) return err; } break; + case OPT_DUMP: + if (!arg || strcasecmp(arg, "xlated") == 0) { + env.dump_mode |= DUMP_XLATED; + } else if (strcasecmp(arg, "jited") == 0) { + env.dump_mode |= DUMP_JITED; + } else { + fprintf(stderr, "Unrecognized dump mode '%s'\n", arg); + return -EINVAL; + } + break; default: return ARGP_ERR_UNKNOWN; } @@ -1554,6 +1573,36 @@ static int parse_rvalue(const char *val, struct rvalue *rvalue) return 0; } +static void dump(__u32 prog_id, enum dump_mode mode, const char *file_name, const char *prog_name) +{ + char command[64], buf[4096]; + FILE *fp; + int status; + + status = system("command -v bpftool > /dev/null 2>&1"); + if (status != 0) { + fprintf(stderr, "bpftool is not available, can't print program dump\n"); + return; + } + snprintf(command, sizeof(command), "bpftool prog dump %s id %u", + mode == DUMP_JITED ? "jited" : "xlated", prog_id); + fp = popen(command, "r"); + if (!fp) { + fprintf(stderr, "bpftool failed with error: %d\n", errno); + return; + } + + printf("DUMP (%s) %s/%s:\n", mode == DUMP_JITED ? "JITED" : "XLATED", file_name, prog_name); + while (fgets(buf, sizeof(buf), fp)) + fputs(buf, stdout); + fprintf(stdout, "\n"); + + if (ferror(fp)) + fprintf(stderr, "Failed to dump BPF prog with error: %d\n", errno); + + pclose(fp); +} + static int process_prog(const char *filename, struct bpf_object *obj, struct bpf_program *prog) { const char *base_filename = basename(strdupa(filename)); @@ -1630,8 +1679,13 @@ static int process_prog(const char *filename, struct bpf_object *obj, struct bpf memset(&info, 0, info_len); fd = bpf_program__fd(prog); - if (fd > 0 && bpf_prog_get_info_by_fd(fd, &info, &info_len) == 0) + if (fd > 0 && bpf_prog_get_info_by_fd(fd, &info, &info_len) == 0) { stats->stats[JITED_SIZE] = info.jited_prog_len; + if (env.dump_mode & DUMP_JITED) + dump(info.id, DUMP_JITED, base_filename, prog_name); + if (env.dump_mode & DUMP_XLATED) + dump(info.id, DUMP_XLATED, base_filename, prog_name); + } parse_verif_log(buf, buf_sz, stats); diff --git a/tools/testing/selftests/bpf/xdping.c b/tools/testing/selftests/bpf/xdping.c index 1503a1d2faa0..9ed8c796645d 100644 --- a/tools/testing/selftests/bpf/xdping.c +++ b/tools/testing/selftests/bpf/xdping.c @@ -155,7 +155,7 @@ int main(int argc, char **argv) } if (!server) { - /* Only supports IPv4; see hints initiailization above. */ + /* Only supports IPv4; see hints initialization above. */ if (getaddrinfo(argv[optind], NULL, &hints, &a) || !a) { fprintf(stderr, "Could not resolve %s\n", argv[optind]); return 1; diff --git a/tools/testing/selftests/bpf/xsk.h b/tools/testing/selftests/bpf/xsk.h index 93c2cc413cfc..48729da142c2 100644 --- a/tools/testing/selftests/bpf/xsk.h +++ b/tools/testing/selftests/bpf/xsk.h @@ -93,8 +93,8 @@ static inline __u32 xsk_prod_nb_free(struct xsk_ring_prod *r, __u32 nb) /* Refresh the local tail pointer. * cached_cons is r->size bigger than the real consumer pointer so * that this addition can be avoided in the more frequently - * executed code that computs free_entries in the beginning of - * this function. Without this optimization it whould have been + * executed code that computes free_entries in the beginning of + * this function. Without this optimization it would have been * free_entries = r->cached_prod - r->cached_cons + r->size. */ r->cached_cons = __atomic_load_n(r->consumer, __ATOMIC_ACQUIRE); diff --git a/tools/testing/selftests/bpf/xskxceiver.c b/tools/testing/selftests/bpf/xskxceiver.c index a29de0713f19..352adc8df2d1 100644 --- a/tools/testing/selftests/bpf/xskxceiver.c +++ b/tools/testing/selftests/bpf/xskxceiver.c @@ -2276,25 +2276,13 @@ static int testapp_xdp_metadata_copy(struct test_spec *test) { struct xsk_xdp_progs *skel_rx = test->ifobj_rx->xdp_progs; struct xsk_xdp_progs *skel_tx = test->ifobj_tx->xdp_progs; - struct bpf_map *data_map; - int count = 0; - int key = 0; test_spec_set_xdp_prog(test, skel_rx->progs.xsk_xdp_populate_metadata, skel_tx->progs.xsk_xdp_populate_metadata, skel_rx->maps.xsk, skel_tx->maps.xsk); test->ifobj_rx->use_metadata = true; - data_map = bpf_object__find_map_by_name(skel_rx->obj, "xsk_xdp_.bss"); - if (!data_map || !bpf_map__is_internal(data_map)) { - ksft_print_msg("Error: could not find bss section of XDP program\n"); - return TEST_FAILURE; - } - - if (bpf_map_update_elem(bpf_map__fd(data_map), &key, &count, BPF_ANY)) { - ksft_print_msg("Error: could not update count element\n"); - return TEST_FAILURE; - } + skel_rx->bss->count = 0; return testapp_validate_traffic(test); } diff --git a/tools/testing/selftests/cgroup/lib/cgroup_util.c b/tools/testing/selftests/cgroup/lib/cgroup_util.c index 0e89fcff4d05..44c52f620fda 100644 --- a/tools/testing/selftests/cgroup/lib/cgroup_util.c +++ b/tools/testing/selftests/cgroup/lib/cgroup_util.c @@ -522,6 +522,18 @@ int proc_mount_contains(const char *option) return strstr(buf, option) != NULL; } +int cgroup_feature(const char *feature) +{ + char buf[PAGE_SIZE]; + ssize_t read; + + read = read_text("/sys/kernel/cgroup/features", buf, sizeof(buf)); + if (read < 0) + return read; + + return strstr(buf, feature) != NULL; +} + ssize_t proc_read_text(int pid, bool thread, const char *item, char *buf, size_t size) { char path[PATH_MAX]; diff --git a/tools/testing/selftests/cgroup/lib/include/cgroup_util.h b/tools/testing/selftests/cgroup/lib/include/cgroup_util.h index c69cab66254b..9dc90a1b386d 100644 --- a/tools/testing/selftests/cgroup/lib/include/cgroup_util.h +++ b/tools/testing/selftests/cgroup/lib/include/cgroup_util.h @@ -60,6 +60,7 @@ extern int cg_run_nowait(const char *cgroup, extern int cg_wait_for_proc_count(const char *cgroup, int count); extern int cg_killall(const char *cgroup); int proc_mount_contains(const char *option); +int cgroup_feature(const char *feature); extern ssize_t proc_read_text(int pid, bool thread, const char *item, char *buf, size_t size); extern int proc_read_strstr(int pid, bool thread, const char *item, const char *needle); extern pid_t clone_into_cgroup(int cgroup_fd); diff --git a/tools/testing/selftests/cgroup/test_freezer.c b/tools/testing/selftests/cgroup/test_freezer.c index 8730645d363a..dfb763819581 100644 --- a/tools/testing/selftests/cgroup/test_freezer.c +++ b/tools/testing/selftests/cgroup/test_freezer.c @@ -804,6 +804,662 @@ cleanup: return ret; } +/* + * Get the current frozen_usec for the cgroup. + */ +static long cg_check_freezetime(const char *cgroup) +{ + return cg_read_key_long(cgroup, "cgroup.stat.local", + "frozen_usec "); +} + +/* + * Test that the freeze time will behave as expected for an empty cgroup. + */ +static int test_cgfreezer_time_empty(const char *root) +{ + int ret = KSFT_FAIL; + char *cgroup = NULL; + long prev, curr; + + cgroup = cg_name(root, "cg_time_test_empty"); + if (!cgroup) + goto cleanup; + + /* + * 1) Create an empty cgroup and check that its freeze time + * is 0. + */ + if (cg_create(cgroup)) + goto cleanup; + + curr = cg_check_freezetime(cgroup); + if (curr < 0) { + ret = KSFT_SKIP; + goto cleanup; + } + if (curr > 0) { + debug("Expect time (%ld) to be 0\n", curr); + goto cleanup; + } + + if (cg_freeze_nowait(cgroup, true)) + goto cleanup; + + /* + * 2) Sleep for 1000 us. Check that the freeze time is at + * least 1000 us. + */ + usleep(1000); + curr = cg_check_freezetime(cgroup); + if (curr < 1000) { + debug("Expect time (%ld) to be at least 1000 us\n", + curr); + goto cleanup; + } + + /* + * 3) Unfreeze the cgroup. Check that the freeze time is + * larger than at 2). + */ + if (cg_freeze_nowait(cgroup, false)) + goto cleanup; + prev = curr; + curr = cg_check_freezetime(cgroup); + if (curr <= prev) { + debug("Expect time (%ld) to be more than previous check (%ld)\n", + curr, prev); + goto cleanup; + } + + /* + * 4) Check the freeze time again to ensure that it has not + * changed. + */ + prev = curr; + curr = cg_check_freezetime(cgroup); + if (curr != prev) { + debug("Expect time (%ld) to be unchanged from previous check (%ld)\n", + curr, prev); + goto cleanup; + } + + ret = KSFT_PASS; + +cleanup: + if (cgroup) + cg_destroy(cgroup); + free(cgroup); + return ret; +} + +/* + * A simple test for cgroup freezer time accounting. This test follows + * the same flow as test_cgfreezer_time_empty, but with a single process + * in the cgroup. + */ +static int test_cgfreezer_time_simple(const char *root) +{ + int ret = KSFT_FAIL; + char *cgroup = NULL; + long prev, curr; + + cgroup = cg_name(root, "cg_time_test_simple"); + if (!cgroup) + goto cleanup; + + /* + * 1) Create a cgroup and check that its freeze time is 0. + */ + if (cg_create(cgroup)) + goto cleanup; + + curr = cg_check_freezetime(cgroup); + if (curr < 0) { + ret = KSFT_SKIP; + goto cleanup; + } + if (curr > 0) { + debug("Expect time (%ld) to be 0\n", curr); + goto cleanup; + } + + /* + * 2) Populate the cgroup with one child and check that the + * freeze time is still 0. + */ + cg_run_nowait(cgroup, child_fn, NULL); + prev = curr; + curr = cg_check_freezetime(cgroup); + if (curr > prev) { + debug("Expect time (%ld) to be 0\n", curr); + goto cleanup; + } + + if (cg_freeze_nowait(cgroup, true)) + goto cleanup; + + /* + * 3) Sleep for 1000 us. Check that the freeze time is at + * least 1000 us. + */ + usleep(1000); + prev = curr; + curr = cg_check_freezetime(cgroup); + if (curr < 1000) { + debug("Expect time (%ld) to be at least 1000 us\n", + curr); + goto cleanup; + } + + /* + * 4) Unfreeze the cgroup. Check that the freeze time is + * larger than at 3). + */ + if (cg_freeze_nowait(cgroup, false)) + goto cleanup; + prev = curr; + curr = cg_check_freezetime(cgroup); + if (curr <= prev) { + debug("Expect time (%ld) to be more than previous check (%ld)\n", + curr, prev); + goto cleanup; + } + + /* + * 5) Sleep for 1000 us. Check that the freeze time is the + * same as at 4). + */ + usleep(1000); + prev = curr; + curr = cg_check_freezetime(cgroup); + if (curr != prev) { + debug("Expect time (%ld) to be unchanged from previous check (%ld)\n", + curr, prev); + goto cleanup; + } + + ret = KSFT_PASS; + +cleanup: + if (cgroup) + cg_destroy(cgroup); + free(cgroup); + return ret; +} + +/* + * Test that freezer time accounting works as expected, even while we're + * populating a cgroup with processes. + */ +static int test_cgfreezer_time_populate(const char *root) +{ + int ret = KSFT_FAIL; + char *cgroup = NULL; + long prev, curr; + int i; + + cgroup = cg_name(root, "cg_time_test_populate"); + if (!cgroup) + goto cleanup; + + if (cg_create(cgroup)) + goto cleanup; + + curr = cg_check_freezetime(cgroup); + if (curr < 0) { + ret = KSFT_SKIP; + goto cleanup; + } + if (curr > 0) { + debug("Expect time (%ld) to be 0\n", curr); + goto cleanup; + } + + /* + * 1) Populate the cgroup with 100 processes. Check that + * the freeze time is 0. + */ + for (i = 0; i < 100; i++) + cg_run_nowait(cgroup, child_fn, NULL); + prev = curr; + curr = cg_check_freezetime(cgroup); + if (curr != prev) { + debug("Expect time (%ld) to be 0\n", curr); + goto cleanup; + } + + /* + * 2) Wait for the group to become fully populated. Check + * that the freeze time is 0. + */ + if (cg_wait_for_proc_count(cgroup, 100)) + goto cleanup; + prev = curr; + curr = cg_check_freezetime(cgroup); + if (curr != prev) { + debug("Expect time (%ld) to be 0\n", curr); + goto cleanup; + } + + /* + * 3) Freeze the cgroup and then populate it with 100 more + * processes. Check that the freeze time continues to grow. + */ + if (cg_freeze_nowait(cgroup, true)) + goto cleanup; + prev = curr; + curr = cg_check_freezetime(cgroup); + if (curr <= prev) { + debug("Expect time (%ld) to be more than previous check (%ld)\n", + curr, prev); + goto cleanup; + } + + for (i = 0; i < 100; i++) + cg_run_nowait(cgroup, child_fn, NULL); + prev = curr; + curr = cg_check_freezetime(cgroup); + if (curr <= prev) { + debug("Expect time (%ld) to be more than previous check (%ld)\n", + curr, prev); + goto cleanup; + } + + /* + * 4) Wait for the group to become fully populated. Check + * that the freeze time is larger than at 3). + */ + if (cg_wait_for_proc_count(cgroup, 200)) + goto cleanup; + prev = curr; + curr = cg_check_freezetime(cgroup); + if (curr <= prev) { + debug("Expect time (%ld) to be more than previous check (%ld)\n", + curr, prev); + goto cleanup; + } + + /* + * 5) Unfreeze the cgroup. Check that the freeze time is + * larger than at 4). + */ + if (cg_freeze_nowait(cgroup, false)) + goto cleanup; + prev = curr; + curr = cg_check_freezetime(cgroup); + if (curr <= prev) { + debug("Expect time (%ld) to be more than previous check (%ld)\n", + curr, prev); + goto cleanup; + } + + /* + * 6) Kill the processes. Check that the freeze time is the + * same as it was at 5). + */ + if (cg_killall(cgroup)) + goto cleanup; + prev = curr; + curr = cg_check_freezetime(cgroup); + if (curr != prev) { + debug("Expect time (%ld) to be unchanged from previous check (%ld)\n", + curr, prev); + goto cleanup; + } + + /* + * 7) Freeze and unfreeze the cgroup. Check that the freeze + * time is larger than it was at 6). + */ + if (cg_freeze_nowait(cgroup, true)) + goto cleanup; + if (cg_freeze_nowait(cgroup, false)) + goto cleanup; + prev = curr; + curr = cg_check_freezetime(cgroup); + if (curr <= prev) { + debug("Expect time (%ld) to be more than previous check (%ld)\n", + curr, prev); + goto cleanup; + } + + ret = KSFT_PASS; + +cleanup: + if (cgroup) + cg_destroy(cgroup); + free(cgroup); + return ret; +} + +/* + * Test that frozen time for a cgroup continues to work as expected, + * even as processes are migrated. Frozen cgroup A's freeze time should + * continue to increase and running cgroup B's should stay 0. + */ +static int test_cgfreezer_time_migrate(const char *root) +{ + long prev_A, curr_A, curr_B; + char *cgroup[2] = {0}; + int ret = KSFT_FAIL; + int pid; + + cgroup[0] = cg_name(root, "cg_time_test_migrate_A"); + if (!cgroup[0]) + goto cleanup; + + cgroup[1] = cg_name(root, "cg_time_test_migrate_B"); + if (!cgroup[1]) + goto cleanup; + + if (cg_create(cgroup[0])) + goto cleanup; + + if (cg_check_freezetime(cgroup[0]) < 0) { + ret = KSFT_SKIP; + goto cleanup; + } + + if (cg_create(cgroup[1])) + goto cleanup; + + pid = cg_run_nowait(cgroup[0], child_fn, NULL); + if (pid < 0) + goto cleanup; + + if (cg_wait_for_proc_count(cgroup[0], 1)) + goto cleanup; + + curr_A = cg_check_freezetime(cgroup[0]); + if (curr_A) { + debug("Expect time (%ld) to be 0\n", curr_A); + goto cleanup; + } + curr_B = cg_check_freezetime(cgroup[1]); + if (curr_B) { + debug("Expect time (%ld) to be 0\n", curr_B); + goto cleanup; + } + + /* + * Freeze cgroup A. + */ + if (cg_freeze_wait(cgroup[0], true)) + goto cleanup; + prev_A = curr_A; + curr_A = cg_check_freezetime(cgroup[0]); + if (curr_A <= prev_A) { + debug("Expect time (%ld) to be > 0\n", curr_A); + goto cleanup; + } + + /* + * Migrate from A (frozen) to B (running). + */ + if (cg_enter(cgroup[1], pid)) + goto cleanup; + + usleep(1000); + curr_B = cg_check_freezetime(cgroup[1]); + if (curr_B) { + debug("Expect time (%ld) to be 0\n", curr_B); + goto cleanup; + } + + prev_A = curr_A; + curr_A = cg_check_freezetime(cgroup[0]); + if (curr_A <= prev_A) { + debug("Expect time (%ld) to be more than previous check (%ld)\n", + curr_A, prev_A); + goto cleanup; + } + + ret = KSFT_PASS; + +cleanup: + if (cgroup[0]) + cg_destroy(cgroup[0]); + free(cgroup[0]); + if (cgroup[1]) + cg_destroy(cgroup[1]); + free(cgroup[1]); + return ret; +} + +/* + * The test creates a cgroup and freezes it. Then it creates a child cgroup. + * After that it checks that the child cgroup has a non-zero freeze time + * that is less than the parent's. Next, it freezes the child, unfreezes + * the parent, and sleeps. Finally, it checks that the child's freeze + * time has grown larger than the parent's. + */ +static int test_cgfreezer_time_parent(const char *root) +{ + char *parent, *child = NULL; + int ret = KSFT_FAIL; + long ptime, ctime; + + parent = cg_name(root, "cg_test_parent_A"); + if (!parent) + goto cleanup; + + child = cg_name(parent, "cg_test_parent_B"); + if (!child) + goto cleanup; + + if (cg_create(parent)) + goto cleanup; + + if (cg_check_freezetime(parent) < 0) { + ret = KSFT_SKIP; + goto cleanup; + } + + if (cg_freeze_wait(parent, true)) + goto cleanup; + + usleep(1000); + if (cg_create(child)) + goto cleanup; + + if (cg_check_frozen(child, true)) + goto cleanup; + + /* + * Since the parent was frozen the entire time the child cgroup + * was being created, we expect the parent's freeze time to be + * larger than the child's. + * + * Ideally, we would be able to check both times simultaneously, + * but here we get the child's after we get the parent's. + */ + ptime = cg_check_freezetime(parent); + ctime = cg_check_freezetime(child); + if (ptime <= ctime) { + debug("Expect ptime (%ld) > ctime (%ld)\n", ptime, ctime); + goto cleanup; + } + + if (cg_freeze_nowait(child, true)) + goto cleanup; + + if (cg_freeze_wait(parent, false)) + goto cleanup; + + if (cg_check_frozen(child, true)) + goto cleanup; + + usleep(100000); + + ctime = cg_check_freezetime(child); + ptime = cg_check_freezetime(parent); + + if (ctime <= ptime) { + debug("Expect ctime (%ld) > ptime (%ld)\n", ctime, ptime); + goto cleanup; + } + + ret = KSFT_PASS; + +cleanup: + if (child) + cg_destroy(child); + free(child); + if (parent) + cg_destroy(parent); + free(parent); + return ret; +} + +/* + * The test creates a parent cgroup and a child cgroup. Then, it freezes + * the child and checks that the child's freeze time is greater than the + * parent's, which should be zero. + */ +static int test_cgfreezer_time_child(const char *root) +{ + char *parent, *child = NULL; + int ret = KSFT_FAIL; + long ptime, ctime; + + parent = cg_name(root, "cg_test_child_A"); + if (!parent) + goto cleanup; + + child = cg_name(parent, "cg_test_child_B"); + if (!child) + goto cleanup; + + if (cg_create(parent)) + goto cleanup; + + if (cg_check_freezetime(parent) < 0) { + ret = KSFT_SKIP; + goto cleanup; + } + + if (cg_create(child)) + goto cleanup; + + if (cg_freeze_wait(child, true)) + goto cleanup; + + ctime = cg_check_freezetime(child); + ptime = cg_check_freezetime(parent); + if (ptime != 0) { + debug("Expect ptime (%ld) to be 0\n", ptime); + goto cleanup; + } + + if (ctime <= ptime) { + debug("Expect ctime (%ld) <= ptime (%ld)\n", ctime, ptime); + goto cleanup; + } + + ret = KSFT_PASS; + +cleanup: + if (child) + cg_destroy(child); + free(child); + if (parent) + cg_destroy(parent); + free(parent); + return ret; +} + +/* + * The test creates the following hierarchy: + * A + * | + * B + * | + * C + * + * Then it freezes the cgroups in the order C, B, A. + * Then it unfreezes the cgroups in the order A, B, C. + * Then it checks that C's freeze time is larger than B's and + * that B's is larger than A's. + */ +static int test_cgfreezer_time_nested(const char *root) +{ + char *cgroup[3] = {0}; + int ret = KSFT_FAIL; + long time[3] = {0}; + int i; + + cgroup[0] = cg_name(root, "cg_test_time_A"); + if (!cgroup[0]) + goto cleanup; + + cgroup[1] = cg_name(cgroup[0], "B"); + if (!cgroup[1]) + goto cleanup; + + cgroup[2] = cg_name(cgroup[1], "C"); + if (!cgroup[2]) + goto cleanup; + + if (cg_create(cgroup[0])) + goto cleanup; + + if (cg_check_freezetime(cgroup[0]) < 0) { + ret = KSFT_SKIP; + goto cleanup; + } + + if (cg_create(cgroup[1])) + goto cleanup; + + if (cg_create(cgroup[2])) + goto cleanup; + + if (cg_freeze_nowait(cgroup[2], true)) + goto cleanup; + + if (cg_freeze_nowait(cgroup[1], true)) + goto cleanup; + + if (cg_freeze_nowait(cgroup[0], true)) + goto cleanup; + + usleep(1000); + + if (cg_freeze_nowait(cgroup[0], false)) + goto cleanup; + + if (cg_freeze_nowait(cgroup[1], false)) + goto cleanup; + + if (cg_freeze_nowait(cgroup[2], false)) + goto cleanup; + + time[2] = cg_check_freezetime(cgroup[2]); + time[1] = cg_check_freezetime(cgroup[1]); + time[0] = cg_check_freezetime(cgroup[0]); + + if (time[2] <= time[1]) { + debug("Expect C's time (%ld) > B's time (%ld)", time[2], time[1]); + goto cleanup; + } + + if (time[1] <= time[0]) { + debug("Expect B's time (%ld) > A's time (%ld)", time[1], time[0]); + goto cleanup; + } + + ret = KSFT_PASS; + +cleanup: + for (i = 2; i >= 0 && cgroup[i]; i--) { + cg_destroy(cgroup[i]); + free(cgroup[i]); + } + + return ret; +} + #define T(x) { x, #x } struct cgfreezer_test { int (*fn)(const char *root); @@ -819,6 +1475,13 @@ struct cgfreezer_test { T(test_cgfreezer_stopped), T(test_cgfreezer_ptraced), T(test_cgfreezer_vfork), + T(test_cgfreezer_time_empty), + T(test_cgfreezer_time_simple), + T(test_cgfreezer_time_populate), + T(test_cgfreezer_time_migrate), + T(test_cgfreezer_time_parent), + T(test_cgfreezer_time_child), + T(test_cgfreezer_time_nested), }; #undef T diff --git a/tools/testing/selftests/cgroup/test_pids.c b/tools/testing/selftests/cgroup/test_pids.c index 9ecb83c6cc5c..d8a1d1cd5007 100644 --- a/tools/testing/selftests/cgroup/test_pids.c +++ b/tools/testing/selftests/cgroup/test_pids.c @@ -77,6 +77,9 @@ static int test_pids_events(const char *root) char *cg_parent = NULL, *cg_child = NULL; int pid; + if (cgroup_feature("pids_localevents") <= 0) + return KSFT_SKIP; + cg_parent = cg_name(root, "pids_parent"); cg_child = cg_name(cg_parent, "pids_child"); if (!cg_parent || !cg_child) diff --git a/tools/testing/selftests/damon/Makefile b/tools/testing/selftests/damon/Makefile index 9a3499827d4b..2180c328a825 100644 --- a/tools/testing/selftests/damon/Makefile +++ b/tools/testing/selftests/damon/Makefile @@ -5,6 +5,7 @@ TEST_GEN_FILES += access_memory access_memory_even TEST_FILES = _damon_sysfs.py TEST_FILES += drgn_dump_damon_status.py +TEST_FILES += _common.sh # functionality tests TEST_PROGS += sysfs.sh @@ -18,6 +19,7 @@ TEST_PROGS += reclaim.sh lru_sort.sh TEST_PROGS += sysfs_update_removed_scheme_dir.sh TEST_PROGS += sysfs_update_schemes_tried_regions_hang.py TEST_PROGS += sysfs_memcg_path_leak.sh +TEST_PROGS += sysfs_no_op_commit_break.py EXTRA_CLEAN = __pycache__ diff --git a/tools/testing/selftests/damon/access_memory_even.c b/tools/testing/selftests/damon/access_memory_even.c index a9f4e9aaf3a9..93f3a71bcfd4 100644 --- a/tools/testing/selftests/damon/access_memory_even.c +++ b/tools/testing/selftests/damon/access_memory_even.c @@ -9,7 +9,6 @@ #include <stdio.h> #include <stdlib.h> #include <string.h> -#include <time.h> int main(int argc, char *argv[]) { diff --git a/tools/testing/selftests/damon/sysfs_no_op_commit_break.py b/tools/testing/selftests/damon/sysfs_no_op_commit_break.py new file mode 100755 index 000000000000..2c65cffe6b54 --- /dev/null +++ b/tools/testing/selftests/damon/sysfs_no_op_commit_break.py @@ -0,0 +1,72 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 + +import json +import os +import subprocess +import sys + +import _damon_sysfs + +def dump_damon_status_dict(pid): + try: + subprocess.check_output(['which', 'drgn'], stderr=subprocess.DEVNULL) + except: + return None, 'drgn not found' + file_dir = os.path.dirname(os.path.abspath(__file__)) + dump_script = os.path.join(file_dir, 'drgn_dump_damon_status.py') + rc = subprocess.call(['drgn', dump_script, pid, 'damon_dump_output'], + stderr=subprocess.DEVNULL) + + if rc != 0: + return None, f'drgn fail: return code({rc})' + try: + with open('damon_dump_output', 'r') as f: + return json.load(f), None + except Exception as e: + return None, 'json.load fail (%s)' % e + +def main(): + kdamonds = _damon_sysfs.Kdamonds( + [_damon_sysfs.Kdamond( + contexts=[_damon_sysfs.DamonCtx( + schemes=[_damon_sysfs.Damos( + ops_filters=[ + _damon_sysfs.DamosFilter( + type_='anon', + matching=True, + allow=True, + ) + ] + )], + )])] + ) + + err = kdamonds.start() + if err is not None: + print('kdamond start failed: %s' % err) + exit(1) + + before_commit_status, err = \ + dump_damon_status_dict(kdamonds.kdamonds[0].pid) + if err is not None: + print('before-commit status dump failed: %s' % err) + exit(1) + + kdamonds.kdamonds[0].commit() + + after_commit_status, err = \ + dump_damon_status_dict(kdamonds.kdamonds[0].pid) + if err is not None: + print('after-commit status dump failed: %s' % err) + exit(1) + + if before_commit_status != after_commit_status: + print(f'before: {json.dumps(before_commit_status, indent=2)}') + print(f'after: {json.dumps(after_commit_status, indent=2)}') + exit(1) + + kdamonds.stop() + +if __name__ == '__main__': + main() diff --git a/tools/testing/selftests/drivers/net/.gitignore b/tools/testing/selftests/drivers/net/.gitignore index d634d8395d90..585ecb4d5dc4 100644 --- a/tools/testing/selftests/drivers/net/.gitignore +++ b/tools/testing/selftests/drivers/net/.gitignore @@ -1,2 +1,3 @@ # SPDX-License-Identifier: GPL-2.0-only napi_id_helper +psp_responder diff --git a/tools/testing/selftests/drivers/net/Makefile b/tools/testing/selftests/drivers/net/Makefile index 984ece05f7f9..bd3af9a34e2f 100644 --- a/tools/testing/selftests/drivers/net/Makefile +++ b/tools/testing/selftests/drivers/net/Makefile @@ -19,6 +19,7 @@ TEST_PROGS := \ netcons_sysdata.sh \ netpoll_basic.py \ ping.py \ + psp.py \ queues.py \ stats.py \ shaper.py \ @@ -26,4 +27,13 @@ TEST_PROGS := \ xdp.py \ # end of TEST_PROGS +# YNL files, must be before "include ..lib.mk" +YNL_GEN_FILES := psp_responder +TEST_GEN_FILES += $(YNL_GEN_FILES) + include ../../lib.mk + +# YNL build +YNL_GENS := psp + +include ../../net/ynl.mk diff --git a/tools/testing/selftests/drivers/net/bonding/Makefile b/tools/testing/selftests/drivers/net/bonding/Makefile index 44b98f17f8ff..2f095cf67d9a 100644 --- a/tools/testing/selftests/drivers/net/bonding/Makefile +++ b/tools/testing/selftests/drivers/net/bonding/Makefile @@ -11,7 +11,9 @@ TEST_PROGS := \ bond_options.sh \ bond-eth-type-change.sh \ bond_macvlan_ipvlan.sh \ - bond_passive_lacp.sh + bond_passive_lacp.sh \ + bond_lacp_prio.sh + bond_ipsec_offload.sh TEST_FILES := \ lag_lib.sh \ diff --git a/tools/testing/selftests/drivers/net/bonding/bond_ipsec_offload.sh b/tools/testing/selftests/drivers/net/bonding/bond_ipsec_offload.sh new file mode 100755 index 000000000000..f09e100232c7 --- /dev/null +++ b/tools/testing/selftests/drivers/net/bonding/bond_ipsec_offload.sh @@ -0,0 +1,156 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# IPsec over bonding offload test: +# +# +----------------+ +# | bond0 | +# | | | +# | eth0 eth1 | +# +---+-------+----+ +# +# We use netdevsim instead of physical interfaces +#------------------------------------------------------------------- +# Example commands +# ip x s add proto esp src 192.0.2.1 dst 192.0.2.2 \ +# spi 0x07 mode transport reqid 0x07 replay-window 32 \ +# aead 'rfc4106(gcm(aes))' 1234567890123456dcba 128 \ +# sel src 192.0.2.1/24 dst 192.0.2.2/24 +# offload dev bond0 dir out +# ip x p add dir out src 192.0.2.1/24 dst 192.0.2.2/24 \ +# tmpl proto esp src 192.0.2.1 dst 192.0.2.2 \ +# spi 0x07 mode transport reqid 0x07 +# +#------------------------------------------------------------------- + +lib_dir=$(dirname "$0") +# shellcheck disable=SC1091 +source "$lib_dir"/../../../net/lib.sh +srcip=192.0.2.1 +dstip=192.0.2.2 +ipsec0=/sys/kernel/debug/netdevsim/netdevsim0/ports/0/ipsec +ipsec1=/sys/kernel/debug/netdevsim/netdevsim0/ports/1/ipsec +active_slave="" + +# shellcheck disable=SC2317 +active_slave_changed() +{ + local old_active_slave=$1 + local new_active_slave + + # shellcheck disable=SC2154 + new_active_slave=$(ip -n "${ns}" -d -j link show bond0 | \ + jq -r ".[].linkinfo.info_data.active_slave") + [ "$new_active_slave" != "$old_active_slave" ] && [ "$new_active_slave" != "null" ] +} + +test_offload() +{ + # use ping to exercise the Tx path + ip netns exec "$ns" ping -I bond0 -c 3 -W 1 -i 0 "$dstip" >/dev/null + + active_slave=$(ip -n "${ns}" -d -j link show bond0 | \ + jq -r ".[].linkinfo.info_data.active_slave") + + if [ "$active_slave" = "$nic0" ]; then + sysfs=$ipsec0 + elif [ "$active_slave" = "$nic1" ]; then + sysfs=$ipsec1 + else + check_err 1 "bond_ipsec_offload invalid active_slave $active_slave" + fi + + # The tx/rx order in sysfs may changed after failover + grep -q "SA count=2 tx=3" "$sysfs" && grep -q "tx ipaddr=$dstip" "$sysfs" + check_err $? "incorrect tx count with link ${active_slave}" + + log_test bond_ipsec_offload "active_slave ${active_slave}" +} + +setup_env() +{ + if ! mount | grep -q debugfs; then + mount -t debugfs none /sys/kernel/debug/ &> /dev/null + defer umount /sys/kernel/debug/ + + fi + + # setup netdevsim since dummy/veth dev doesn't have offload support + if [ ! -w /sys/bus/netdevsim/new_device ] ; then + if ! modprobe -q netdevsim; then + echo "SKIP: can't load netdevsim for ipsec offload" + # shellcheck disable=SC2154 + exit "$ksft_skip" + fi + defer modprobe -r netdevsim + fi + + setup_ns ns + defer cleanup_ns "$ns" +} + +setup_bond() +{ + ip -n "$ns" link add bond0 type bond mode active-backup miimon 100 + ip -n "$ns" addr add "$srcip/24" dev bond0 + ip -n "$ns" link set bond0 up + + echo "0 2" | ip netns exec "$ns" tee /sys/bus/netdevsim/new_device >/dev/null + nic0=$(ip netns exec "$ns" ls /sys/bus/netdevsim/devices/netdevsim0/net | head -n 1) + nic1=$(ip netns exec "$ns" ls /sys/bus/netdevsim/devices/netdevsim0/net | tail -n 1) + ip -n "$ns" link set "$nic0" master bond0 + ip -n "$ns" link set "$nic1" master bond0 + + # we didn't create a peer, make sure we can Tx by adding a permanent + # neighbour this need to be added after enslave + ip -n "$ns" neigh add "$dstip" dev bond0 lladdr 00:11:22:33:44:55 + + # create offloaded SAs, both in and out + ip -n "$ns" x p add dir out src "$srcip/24" dst "$dstip/24" \ + tmpl proto esp src "$srcip" dst "$dstip" spi 9 \ + mode transport reqid 42 + + ip -n "$ns" x p add dir in src "$dstip/24" dst "$srcip/24" \ + tmpl proto esp src "$dstip" dst "$srcip" spi 9 \ + mode transport reqid 42 + + ip -n "$ns" x s add proto esp src "$srcip" dst "$dstip" spi 9 \ + mode transport reqid 42 aead "rfc4106(gcm(aes))" \ + 0x3132333435363738393031323334353664636261 128 \ + sel src "$srcip/24" dst "$dstip/24" \ + offload dev bond0 dir out + + ip -n "$ns" x s add proto esp src "$dstip" dst "$srcip" spi 9 \ + mode transport reqid 42 aead "rfc4106(gcm(aes))" \ + 0x3132333435363738393031323334353664636261 128 \ + sel src "$dstip/24" dst "$srcip/24" \ + offload dev bond0 dir in + + # does offload show up in ip output + lines=$(ip -n "$ns" x s list | grep -c "crypto offload parameters: dev bond0 dir") + if [ "$lines" -ne 2 ] ; then + check_err 1 "bond_ipsec_offload SA offload missing from list output" + fi +} + +trap defer_scopes_cleanup EXIT +setup_env +setup_bond + +# start Offload testing +test_offload + +# do failover and re-test +ip -n "$ns" link set "$active_slave" down +slowwait 5 active_slave_changed "$active_slave" +test_offload + +# make sure offload get removed from driver +ip -n "$ns" x s flush +ip -n "$ns" x p flush +line0=$(grep -c "SA count=0" "$ipsec0") +line1=$(grep -c "SA count=0" "$ipsec1") +[ "$line0" -ne 1 ] || [ "$line1" -ne 1 ] +check_fail $? "bond_ipsec_offload SA not removed from driver" + +exit "$EXIT_STATUS" diff --git a/tools/testing/selftests/drivers/net/bonding/bond_lacp_prio.sh b/tools/testing/selftests/drivers/net/bonding/bond_lacp_prio.sh new file mode 100755 index 000000000000..a483d505c6a8 --- /dev/null +++ b/tools/testing/selftests/drivers/net/bonding/bond_lacp_prio.sh @@ -0,0 +1,108 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Testing if bond lacp per port priority works +# +# Switch (s_ns) Backup Switch (b_ns) +# +-------------------------+ +-------------------------+ +# | bond0 | | bond0 | +# | + | | + | +# | eth0 | eth1 | | eth0 | eth1 | +# | +---+---+ | | +---+---+ | +# | | | | | | | | +# +-------------------------+ +-------------------------+ +# | | | | +# +-----------------------------------------------------+ +# | | | | | | +# | +-------+---------+---------+-------+ | +# | eth0 eth1 | eth2 eth3 | +# | + | +# | bond0 | +# +-----------------------------------------------------+ +# Client (c_ns) + +lib_dir=$(dirname "$0") +# shellcheck disable=SC1091 +source "$lib_dir"/../../../net/lib.sh + +setup_links() +{ + # shellcheck disable=SC2154 + ip -n "${c_ns}" link add eth0 type veth peer name eth0 netns "${s_ns}" + ip -n "${c_ns}" link add eth1 type veth peer name eth1 netns "${s_ns}" + # shellcheck disable=SC2154 + ip -n "${c_ns}" link add eth2 type veth peer name eth0 netns "${b_ns}" + ip -n "${c_ns}" link add eth3 type veth peer name eth1 netns "${b_ns}" + + ip -n "${c_ns}" link add bond0 type bond mode 802.3ad miimon 100 \ + lacp_rate fast ad_select actor_port_prio + ip -n "${s_ns}" link add bond0 type bond mode 802.3ad miimon 100 \ + lacp_rate fast + ip -n "${b_ns}" link add bond0 type bond mode 802.3ad miimon 100 \ + lacp_rate fast + + ip -n "${c_ns}" link set eth0 master bond0 + ip -n "${c_ns}" link set eth1 master bond0 + ip -n "${c_ns}" link set eth2 master bond0 + ip -n "${c_ns}" link set eth3 master bond0 + ip -n "${s_ns}" link set eth0 master bond0 + ip -n "${s_ns}" link set eth1 master bond0 + ip -n "${b_ns}" link set eth0 master bond0 + ip -n "${b_ns}" link set eth1 master bond0 + + ip -n "${c_ns}" link set bond0 up + ip -n "${s_ns}" link set bond0 up + ip -n "${b_ns}" link set bond0 up +} + +test_port_prio_setting() +{ + RET=0 + ip -n "${c_ns}" link set eth0 type bond_slave actor_port_prio 1000 + prio=$(cmd_jq "ip -n ${c_ns} -d -j link show eth0" \ + ".[].linkinfo.info_slave_data.actor_port_prio") + [ "$prio" -ne 1000 ] && RET=1 + ip -n "${c_ns}" link set eth2 type bond_slave actor_port_prio 10 + prio=$(cmd_jq "ip -n ${c_ns} -d -j link show eth2" \ + ".[].linkinfo.info_slave_data.actor_port_prio") + [ "$prio" -ne 10 ] && RET=1 +} + +test_agg_reselect() +{ + local bond_agg_id slave_agg_id + local expect_slave="$1" + RET=0 + + # Trigger link state change to reselect the aggregator + ip -n "${c_ns}" link set eth1 down + sleep 0.5 + ip -n "${c_ns}" link set eth1 up + sleep 0.5 + + bond_agg_id=$(cmd_jq "ip -n ${c_ns} -d -j link show bond0" \ + ".[].linkinfo.info_data.ad_info.aggregator") + slave_agg_id=$(cmd_jq "ip -n ${c_ns} -d -j link show $expect_slave" \ + ".[].linkinfo.info_slave_data.ad_aggregator_id") + # shellcheck disable=SC2034 + [ "${bond_agg_id}" -ne "${slave_agg_id}" ] && \ + RET=1 +} + +trap cleanup_all_ns EXIT +setup_ns c_ns s_ns b_ns +setup_links + +test_port_prio_setting +log_test "bond 802.3ad" "actor_port_prio setting" + +test_agg_reselect eth0 +log_test "bond 802.3ad" "actor_port_prio select" + +# Change the actor port prio and re-test +ip -n "${c_ns}" link set eth0 type bond_slave actor_port_prio 10 +ip -n "${c_ns}" link set eth2 type bond_slave actor_port_prio 1000 +test_agg_reselect eth2 +log_test "bond 802.3ad" "actor_port_prio switch" + +exit "${EXIT_STATUS}" diff --git a/tools/testing/selftests/drivers/net/bonding/bond_options.sh b/tools/testing/selftests/drivers/net/bonding/bond_options.sh index 7bc148889ca7..187b478d0ddf 100755 --- a/tools/testing/selftests/drivers/net/bonding/bond_options.sh +++ b/tools/testing/selftests/drivers/net/bonding/bond_options.sh @@ -7,6 +7,8 @@ ALL_TESTS=" prio arp_validate num_grat_arp + fail_over_mac + vlan_over_bond " lib_dir=$(dirname "$0") @@ -352,8 +354,8 @@ garp_test() exp_num=$(echo "${param}" | cut -f6 -d ' ') active_slave=$(cmd_jq "ip -n ${s_ns} -d -j link show bond0" ".[].linkinfo.info_data.active_slave") - slowwait_for_counter $((exp_num + 5)) $exp_num \ - tc_rule_handle_stats_get "dev s${active_slave#eth} ingress" 101 ".packets" "-n ${g_ns}" + slowwait_for_counter $((exp_num + 5)) $exp_num tc_rule_handle_stats_get \ + "dev s${active_slave#eth} ingress" 101 ".packets" "-n ${g_ns}" &> /dev/null # check result real_num=$(tc_rule_handle_stats_get "dev s${active_slave#eth} ingress" 101 ".packets" "-n ${g_ns}") @@ -376,6 +378,197 @@ num_grat_arp() done } +check_all_mac_same() +{ + RET=0 + # all slaves should have same mac address (with the first port's mac) + local bond_mac=$(ip -n "$s_ns" -j link show bond0 | jq -r '.[]["address"]') + local eth0_mac=$(ip -n "$s_ns" -j link show eth0 | jq -r '.[]["address"]') + local eth1_mac=$(ip -n "$s_ns" -j link show eth1 | jq -r '.[]["address"]') + local eth2_mac=$(ip -n "$s_ns" -j link show eth2 | jq -r '.[]["address"]') + if [ "$bond_mac" != "${mac[0]}" ] || [ "$eth0_mac" != "$bond_mac" ] || \ + [ "$eth1_mac" != "$bond_mac" ] || [ "$eth2_mac" != "$bond_mac" ]; then + RET=1 + fi +} + +check_bond_mac_same_with_first() +{ + RET=0 + # bond mac address should be same with the first added slave + local bond_mac=$(ip -n "$s_ns" -j link show bond0 | jq -r '.[]["address"]') + if [ "$bond_mac" != "${mac[0]}" ]; then + RET=1 + fi +} + +check_bond_mac_same_with_active() +{ + RET=0 + # bond mac address should be same with active slave + local bond_mac=$(ip -n "$s_ns" -j link show bond0 | jq -r '.[]["address"]') + local active_slave=$(cmd_jq "ip -n ${s_ns} -d -j link show bond0" ".[].linkinfo.info_data.active_slave") + local active_slave_mac=$(ip -n "$s_ns" -j link show "$active_slave" | jq -r '.[]["address"]') + if [ "$bond_mac" != "$active_slave_mac" ]; then + RET=1 + fi +} + +check_backup_slave_mac_not_change() +{ + RET=0 + # backup slave's mac address is not changed + if ip -n "$s_ns" -d -j link show type bond_slave | jq -e '.[] + | select(.linkinfo.info_slave_data.state=="BACKUP") + | select(.address != .linkinfo.info_slave_data.perm_hwaddr)' &> /dev/null; then + RET=1 + fi +} + +check_backup_slave_mac_inherit() +{ + local backup_mac + RET=0 + + # backup slaves should use mac[1] or mac[2] + local backup_macs=$(ip -n "$s_ns" -d -j link show type bond_slave | \ + jq -r '.[] | select(.linkinfo.info_slave_data.state=="BACKUP") | .address') + for backup_mac in $backup_macs; do + if [ "$backup_mac" != "${mac[1]}" ] && [ "$backup_mac" != "${mac[2]}" ]; then + RET=1 + fi + done +} + +check_first_slave_random_mac() +{ + RET=0 + # remove the first added slave and added it back + ip -n "$s_ns" link set eth0 nomaster + ip -n "$s_ns" link set eth0 master bond0 + + # the first slave should use random mac address + eth0_mac=$(ip -n "$s_ns" -j link show eth0 | jq -r '.[]["address"]') + [ "$eth0_mac" = "${mac[0]}" ] && RET=1 + log_test "bond fail_over_mac follow" "random first slave mac" + + # remove the first slave, the permanent MAC address should be restored back + ip -n "$s_ns" link set eth0 nomaster + eth0_mac=$(ip -n "$s_ns" -j link show eth0 | jq -r '.[]["address"]') + [ "$eth0_mac" != "${mac[0]}" ] && RET=1 +} + +do_active_backup_failover() +{ + local active_slave=$(cmd_jq "ip -n ${s_ns} -d -j link show bond0" ".[].linkinfo.info_data.active_slave") + ip -n ${s_ns} link set ${active_slave} down + slowwait 2 active_slave_changed $active_slave + ip -n ${s_ns} link set ${active_slave} up +} + +fail_over_mac() +{ + # Bring down the first interface on the switch to force the bond to + # select another active interface instead of the first one that joined. + ip -n "$g_ns" link set s0 down + + # fail_over_mac none + bond_reset "mode active-backup miimon 100 fail_over_mac 0" + check_all_mac_same + log_test "fail_over_mac 0" "all slaves have same mac" + do_active_backup_failover + check_all_mac_same + log_test "fail_over_mac 0" "failover: all slaves have same mac" + + # fail_over_mac active + bond_reset "mode active-backup miimon 100 fail_over_mac 1" + check_bond_mac_same_with_active + log_test "fail_over_mac 1" "bond mac is same with active slave mac" + check_backup_slave_mac_not_change + log_test "fail_over_mac 1" "backup slave mac is not changed" + do_active_backup_failover + check_bond_mac_same_with_active + log_test "fail_over_mac 1" "failover: bond mac is same with active slave mac" + check_backup_slave_mac_not_change + log_test "fail_over_mac 1" "failover: backup slave mac is not changed" + + # fail_over_mac follow + bond_reset "mode active-backup miimon 100 fail_over_mac 2" + check_bond_mac_same_with_first + log_test "fail_over_mac 2" "bond mac is same with first slave mac" + check_bond_mac_same_with_active + log_test "fail_over_mac 2" "bond mac is same with active slave mac" + check_backup_slave_mac_inherit + log_test "fail_over_mac 2" "backup slave mac inherit" + do_active_backup_failover + check_bond_mac_same_with_first + log_test "fail_over_mac 2" "failover: bond mac is same with first slave mac" + check_bond_mac_same_with_active + log_test "fail_over_mac 2" "failover: bond mac is same with active slave mac" + check_backup_slave_mac_inherit + log_test "fail_over_mac 2" "failover: backup slave mac inherit" + check_first_slave_random_mac + log_test "fail_over_mac 2" "first slave mac random" +} + +vlan_over_bond_arp() +{ + local mode="$1" + RET=0 + + bond_reset "mode $mode arp_interval 100 arp_ip_target 192.0.3.10" + ip -n "${s_ns}" link add bond0.3 link bond0 type vlan id 3 + ip -n "${s_ns}" link set bond0.3 up + ip -n "${s_ns}" addr add 192.0.3.1/24 dev bond0.3 + ip -n "${s_ns}" addr add 2001:db8::3:1/64 dev bond0.3 + + slowwait_for_counter 5 5 tc_rule_handle_stats_get \ + "dev eth0.3 ingress" 101 ".packets" "-n ${c_ns}" &> /dev/null || RET=1 + log_test "vlan over bond arp" "$mode" +} + +vlan_over_bond_ns() +{ + local mode="$1" + RET=0 + + if skip_ns; then + log_test_skip "vlan_over_bond ns" "$mode" + return 0 + fi + + bond_reset "mode $mode arp_interval 100 ns_ip6_target 2001:db8::3:10" + ip -n "${s_ns}" link add bond0.3 link bond0 type vlan id 3 + ip -n "${s_ns}" link set bond0.3 up + ip -n "${s_ns}" addr add 192.0.3.1/24 dev bond0.3 + ip -n "${s_ns}" addr add 2001:db8::3:1/64 dev bond0.3 + + slowwait_for_counter 5 5 tc_rule_handle_stats_get \ + "dev eth0.3 ingress" 102 ".packets" "-n ${c_ns}" &> /dev/null || RET=1 + log_test "vlan over bond ns" "$mode" +} + +vlan_over_bond() +{ + # add vlan 3 for client + ip -n "${c_ns}" link add eth0.3 link eth0 type vlan id 3 + ip -n "${c_ns}" link set eth0.3 up + ip -n "${c_ns}" addr add 192.0.3.10/24 dev eth0.3 + ip -n "${c_ns}" addr add 2001:db8::3:10/64 dev eth0.3 + + # Add tc rule to check the vlan pkts + tc -n "${c_ns}" qdisc add dev eth0.3 clsact + tc -n "${c_ns}" filter add dev eth0.3 ingress protocol arp \ + handle 101 flower skip_hw arp_op request \ + arp_sip 192.0.3.1 arp_tip 192.0.3.10 action pass + tc -n "${c_ns}" filter add dev eth0.3 ingress protocol ipv6 \ + handle 102 flower skip_hw ip_proto icmpv6 \ + type 135 src_ip 2001:db8::3:1 action pass + + vlan_over_bond_arp "active-backup" + vlan_over_bond_ns "active-backup" +} + trap cleanup EXIT setup_prepare diff --git a/tools/testing/selftests/drivers/net/bonding/bond_topo_2d1c.sh b/tools/testing/selftests/drivers/net/bonding/bond_topo_2d1c.sh index 195ef83cfbf1..167aa4a4a12a 100644 --- a/tools/testing/selftests/drivers/net/bonding/bond_topo_2d1c.sh +++ b/tools/testing/selftests/drivers/net/bonding/bond_topo_2d1c.sh @@ -39,6 +39,8 @@ g_ip4="192.0.2.254" s_ip6="2001:db8::1" c_ip6="2001:db8::10" g_ip6="2001:db8::254" +mac[0]="00:0a:0b:0c:0d:01" +mac[1]="00:0a:0b:0c:0d:02" gateway_create() { @@ -62,6 +64,7 @@ server_create() for i in $(seq 0 1); do ip -n ${s_ns} link add eth${i} type veth peer name s${i} netns ${g_ns} + ip -n "${s_ns}" link set "eth${i}" addr "${mac[$i]}" ip -n ${g_ns} link set s${i} up ip -n ${g_ns} link set s${i} master br0 diff --git a/tools/testing/selftests/drivers/net/bonding/bond_topo_3d1c.sh b/tools/testing/selftests/drivers/net/bonding/bond_topo_3d1c.sh index 3a1333d9a85b..23a2932301cc 100644 --- a/tools/testing/selftests/drivers/net/bonding/bond_topo_3d1c.sh +++ b/tools/testing/selftests/drivers/net/bonding/bond_topo_3d1c.sh @@ -26,6 +26,7 @@ # +-------------------------------------+ source bond_topo_2d1c.sh +mac[2]="00:0a:0b:0c:0d:03" setup_prepare() { @@ -36,6 +37,7 @@ setup_prepare() # Add the extra device as we use 3 down links for bond0 local i=2 ip -n ${s_ns} link add eth${i} type veth peer name s${i} netns ${g_ns} + ip -n "${s_ns}" link set "eth${i}" addr "${mac[$i]}" ip -n ${g_ns} link set s${i} up ip -n ${g_ns} link set s${i} master br0 ip -n ${s_ns} link set eth${i} master bond0 diff --git a/tools/testing/selftests/drivers/net/bonding/config b/tools/testing/selftests/drivers/net/bonding/config index 4d16a69ffc65..e5b7a8db4dfa 100644 --- a/tools/testing/selftests/drivers/net/bonding/config +++ b/tools/testing/selftests/drivers/net/bonding/config @@ -10,3 +10,8 @@ CONFIG_NET_CLS_MATCHALL=m CONFIG_NET_SCH_INGRESS=y CONFIG_NLMON=y CONFIG_VETH=y +CONFIG_VLAN_8021Q=m +CONFIG_INET_ESP=y +CONFIG_INET_ESP_OFFLOAD=y +CONFIG_XFRM_USER=m +CONFIG_NETDEVSIM=m diff --git a/tools/testing/selftests/drivers/net/config b/tools/testing/selftests/drivers/net/config index f27172ddee0a..601431248d5b 100644 --- a/tools/testing/selftests/drivers/net/config +++ b/tools/testing/selftests/drivers/net/config @@ -1,6 +1,9 @@ +CONFIG_CONFIGFS_FS=y +CONFIG_DEBUG_INFO_BTF=y +CONFIG_DEBUG_INFO_BTF_MODULES=n +CONFIG_INET_PSP=y CONFIG_IPV6=y CONFIG_NETDEVSIM=m -CONFIG_CONFIGFS_FS=y CONFIG_NETCONSOLE=m CONFIG_NETCONSOLE_DYNAMIC=y CONFIG_NETCONSOLE_EXTENDED_LOG=y diff --git a/tools/testing/selftests/drivers/net/hds.py b/tools/testing/selftests/drivers/net/hds.py index 7c90a040ce45..a2011474e625 100755 --- a/tools/testing/selftests/drivers/net/hds.py +++ b/tools/testing/selftests/drivers/net/hds.py @@ -3,6 +3,7 @@ import errno import os +from typing import Union from lib.py import ksft_run, ksft_exit, ksft_eq, ksft_raises, KsftSkipEx from lib.py import CmdExitFailure, EthtoolFamily, NlError from lib.py import NetDrvEnv @@ -58,7 +59,39 @@ def get_hds_thresh(cfg, netnl) -> None: if 'hds-thresh' not in rings: raise KsftSkipEx('hds-thresh not supported by device') + +def _hds_reset(cfg, netnl, rings) -> None: + cur = netnl.rings_get({'header': {'dev-index': cfg.ifindex}}) + + arg = {'header': {'dev-index': cfg.ifindex}} + if cur.get('tcp-data-split') != rings.get('tcp-data-split'): + # Try to reset to "unknown" first, we don't know if the setting + # was the default or user chose it. Default seems more likely. + arg['tcp-data-split'] = "unknown" + netnl.rings_set(arg) + cur = netnl.rings_get({'header': {'dev-index': cfg.ifindex}}) + if cur['tcp-data-split'] == rings['tcp-data-split']: + del arg['tcp-data-split'] + else: + # Try the explicit setting + arg['tcp-data-split'] = rings['tcp-data-split'] + if cur.get('hds-thresh') != rings.get('hds-thresh'): + arg['hds-thresh'] = rings['hds-thresh'] + if len(arg) > 1: + netnl.rings_set(arg) + + +def _defer_reset_hds(cfg, netnl) -> Union[dict, None]: + try: + rings = netnl.rings_get({'header': {'dev-index': cfg.ifindex}}) + if 'hds-thresh' in rings or 'tcp-data-split' in rings: + defer(_hds_reset, cfg, netnl, rings) + except NlError as e: + pass + + def set_hds_enable(cfg, netnl) -> None: + _defer_reset_hds(cfg, netnl) try: netnl.rings_set({'header': {'dev-index': cfg.ifindex}, 'tcp-data-split': 'enabled'}) except NlError as e: @@ -76,6 +109,7 @@ def set_hds_enable(cfg, netnl) -> None: ksft_eq('enabled', rings['tcp-data-split']) def set_hds_disable(cfg, netnl) -> None: + _defer_reset_hds(cfg, netnl) try: netnl.rings_set({'header': {'dev-index': cfg.ifindex}, 'tcp-data-split': 'disabled'}) except NlError as e: @@ -93,6 +127,7 @@ def set_hds_disable(cfg, netnl) -> None: ksft_eq('disabled', rings['tcp-data-split']) def set_hds_thresh_zero(cfg, netnl) -> None: + _defer_reset_hds(cfg, netnl) try: netnl.rings_set({'header': {'dev-index': cfg.ifindex}, 'hds-thresh': 0}) except NlError as e: @@ -110,6 +145,7 @@ def set_hds_thresh_zero(cfg, netnl) -> None: ksft_eq(0, rings['hds-thresh']) def set_hds_thresh_random(cfg, netnl) -> None: + _defer_reset_hds(cfg, netnl) try: rings = netnl.rings_get({'header': {'dev-index': cfg.ifindex}}) except NlError as e: @@ -140,6 +176,7 @@ def set_hds_thresh_random(cfg, netnl) -> None: ksft_eq(hds_thresh, rings['hds-thresh']) def set_hds_thresh_max(cfg, netnl) -> None: + _defer_reset_hds(cfg, netnl) try: rings = netnl.rings_get({'header': {'dev-index': cfg.ifindex}}) except NlError as e: @@ -157,6 +194,7 @@ def set_hds_thresh_max(cfg, netnl) -> None: ksft_eq(rings['hds-thresh'], rings['hds-thresh-max']) def set_hds_thresh_gt(cfg, netnl) -> None: + _defer_reset_hds(cfg, netnl) try: rings = netnl.rings_get({'header': {'dev-index': cfg.ifindex}}) except NlError as e: @@ -178,6 +216,7 @@ def set_xdp(cfg, netnl) -> None: """ mode = _get_hds_mode(cfg, netnl) if mode == 'enabled': + _defer_reset_hds(cfg, netnl) netnl.rings_set({'header': {'dev-index': cfg.ifindex}, 'tcp-data-split': 'unknown'}) diff --git a/tools/testing/selftests/drivers/net/hw/Makefile b/tools/testing/selftests/drivers/net/hw/Makefile index fdc97355588c..ee09a40d532c 100644 --- a/tools/testing/selftests/drivers/net/hw/Makefile +++ b/tools/testing/selftests/drivers/net/hw/Makefile @@ -15,9 +15,11 @@ TEST_PROGS = \ iou-zcrx.py \ irq.py \ loopback.sh \ + nic_timestamp.py \ pp_alloc_fail.py \ rss_api.py \ rss_ctx.py \ + rss_flow_label.py \ rss_input_xfrm.py \ tso.py \ xsk_reconfig.py \ diff --git a/tools/testing/selftests/drivers/net/hw/config b/tools/testing/selftests/drivers/net/hw/config index 88ae719e6f8f..e8a06aa1471c 100644 --- a/tools/testing/selftests/drivers/net/hw/config +++ b/tools/testing/selftests/drivers/net/hw/config @@ -1,5 +1,7 @@ +CONFIG_IO_URING=y CONFIG_IPV6=y CONFIG_IPV6_GRE=y CONFIG_NET_IPGRE=y CONFIG_NET_IPGRE_DEMUX=y +CONFIG_UDMABUF=y CONFIG_VXLAN=y diff --git a/tools/testing/selftests/drivers/net/hw/csum.py b/tools/testing/selftests/drivers/net/hw/csum.py index cd23af875317..3e3a89a34afe 100755 --- a/tools/testing/selftests/drivers/net/hw/csum.py +++ b/tools/testing/selftests/drivers/net/hw/csum.py @@ -17,7 +17,7 @@ def test_receive(cfg, ipver="6", extra_args=None): ip_args = f"-{ipver} -S {cfg.remote_addr_v[ipver]} -D {cfg.addr_v[ipver]}" rx_cmd = f"{cfg.bin_local} -i {cfg.ifname} -n 100 {ip_args} -r 1 -R {extra_args}" - tx_cmd = f"{cfg.bin_remote} -i {cfg.ifname} -n 100 {ip_args} -r 1 -T {extra_args}" + tx_cmd = f"{cfg.bin_remote} -i {cfg.remote_ifname} -n 100 {ip_args} -r 1 -T {extra_args}" with bkg(rx_cmd, exit_wait=True): wait_port_listen(34000, proto="udp") @@ -37,7 +37,7 @@ def test_transmit(cfg, ipver="6", extra_args=None): if extra_args != "-U -Z": extra_args += " -r 1" - rx_cmd = f"{cfg.bin_remote} -i {cfg.ifname} -L 1 -n 100 {ip_args} -R {extra_args}" + rx_cmd = f"{cfg.bin_remote} -i {cfg.remote_ifname} -L 1 -n 100 {ip_args} -R {extra_args}" tx_cmd = f"{cfg.bin_local} -i {cfg.ifname} -L 1 -n 100 {ip_args} -T {extra_args}" with bkg(rx_cmd, host=cfg.remote, exit_wait=True): diff --git a/tools/testing/selftests/drivers/net/hw/devmem.py b/tools/testing/selftests/drivers/net/hw/devmem.py index baa2f24240ba..45c2d49d55b6 100755 --- a/tools/testing/selftests/drivers/net/hw/devmem.py +++ b/tools/testing/selftests/drivers/net/hw/devmem.py @@ -24,7 +24,7 @@ def check_rx(cfg) -> None: require_devmem(cfg) port = rand_port() - socat = f"socat -u - TCP{cfg.addr_ipver}:{cfg.addr}:{port},bind={cfg.remote_addr}:{port}" + socat = f"socat -u - TCP{cfg.addr_ipver}:{cfg.baddr}:{port},bind={cfg.remote_baddr}:{port}" listen_cmd = f"{cfg.bin_local} -l -f {cfg.ifname} -s {cfg.addr} -p {port} -c {cfg.remote_addr} -v 7" with bkg(listen_cmd, exit_wait=True) as ncdevmem: @@ -42,9 +42,9 @@ def check_tx(cfg) -> None: port = rand_port() listen_cmd = f"socat -U - TCP{cfg.addr_ipver}-LISTEN:{port}" - with bkg(listen_cmd) as socat: - wait_port_listen(port) - cmd(f"echo -e \"hello\\nworld\"| {cfg.bin_remote} -f {cfg.ifname} -s {cfg.addr} -p {port}", host=cfg.remote, shell=True) + with bkg(listen_cmd, host=cfg.remote, exit_wait=True) as socat: + wait_port_listen(port, host=cfg.remote) + cmd(f"echo -e \"hello\\nworld\"| {cfg.bin_local} -f {cfg.ifname} -s {cfg.remote_addr} -p {port}", shell=True) ksft_eq(socat.stdout.strip(), "hello\nworld") @@ -56,9 +56,9 @@ def check_tx_chunks(cfg) -> None: port = rand_port() listen_cmd = f"socat -U - TCP{cfg.addr_ipver}-LISTEN:{port}" - with bkg(listen_cmd, exit_wait=True) as socat: - wait_port_listen(port) - cmd(f"echo -e \"hello\\nworld\"| {cfg.bin_remote} -f {cfg.ifname} -s {cfg.addr} -p {port} -z 3", host=cfg.remote, shell=True) + with bkg(listen_cmd, host=cfg.remote, exit_wait=True) as socat: + wait_port_listen(port, host=cfg.remote) + cmd(f"echo -e \"hello\\nworld\"| {cfg.bin_local} -f {cfg.ifname} -s {cfg.remote_addr} -p {port} -z 3", shell=True) ksft_eq(socat.stdout.strip(), "hello\nworld") diff --git a/tools/testing/selftests/drivers/net/hw/lib/py/__init__.py b/tools/testing/selftests/drivers/net/hw/lib/py/__init__.py index 1462a339a74b..0ceb297e7757 100644 --- a/tools/testing/selftests/drivers/net/hw/lib/py/__init__.py +++ b/tools/testing/selftests/drivers/net/hw/lib/py/__init__.py @@ -13,7 +13,7 @@ try: # Import one by one to avoid pylint false positives from net.lib.py import EthtoolFamily, NetdevFamily, NetshaperFamily, \ - NlError, RtnlFamily, DevlinkFamily + NlError, RtnlFamily, DevlinkFamily, PSPFamily from net.lib.py import CmdExitFailure from net.lib.py import bkg, cmd, defer, ethtool, fd_read_timeout, ip, \ rand_port, tool, wait_port_listen @@ -22,7 +22,7 @@ try: from net.lib.py import ksft_disruptive, ksft_exit, ksft_pr, ksft_run, \ ksft_setup from net.lib.py import ksft_eq, ksft_ge, ksft_in, ksft_is, ksft_lt, \ - ksft_ne, ksft_not_in, ksft_raises, ksft_true + ksft_ne, ksft_not_in, ksft_raises, ksft_true, ksft_gt, ksft_not_none from net.lib.py import NetNSEnter from drivers.net.lib.py import GenerateTraffic from drivers.net.lib.py import NetDrvEnv, NetDrvEpEnv diff --git a/tools/testing/selftests/drivers/net/hw/ncdevmem.c b/tools/testing/selftests/drivers/net/hw/ncdevmem.c index 72f828021f83..3288ed04ce08 100644 --- a/tools/testing/selftests/drivers/net/hw/ncdevmem.c +++ b/tools/testing/selftests/drivers/net/hw/ncdevmem.c @@ -39,6 +39,7 @@ #define __EXPORTED_HEADERS__ #include <linux/uio.h> +#include <stdarg.h> #include <stdio.h> #include <stdlib.h> #include <unistd.h> @@ -97,6 +98,10 @@ static unsigned int dmabuf_id; static uint32_t tx_dmabuf_id; static int waittime_ms = 500; +/* System state loaded by current_config_load() */ +#define MAX_FLOWS 8 +static int ntuple_ids[MAX_FLOWS] = { -1, -1, -1, -1, -1, -1, -1, -1, }; + struct memory_buffer { int fd; size_t size; @@ -115,6 +120,21 @@ struct memory_provider { size_t off, int n); }; +static void pr_err(const char *fmt, ...) +{ + va_list args; + + fprintf(stderr, "%s: ", TEST_PREFIX); + + va_start(args, fmt); + vfprintf(stderr, fmt, args); + va_end(args); + + if (errno != 0) + fprintf(stderr, ": %s", strerror(errno)); + fprintf(stderr, "\n"); +} + static struct memory_buffer *udmabuf_alloc(size_t size) { struct udmabuf_create create; @@ -123,27 +143,33 @@ static struct memory_buffer *udmabuf_alloc(size_t size) ctx = malloc(sizeof(*ctx)); if (!ctx) - error(1, ENOMEM, "malloc failed"); + return NULL; ctx->size = size; ctx->devfd = open("/dev/udmabuf", O_RDWR); - if (ctx->devfd < 0) - error(1, errno, - "%s: [skip,no-udmabuf: Unable to access DMA buffer device file]\n", - TEST_PREFIX); + if (ctx->devfd < 0) { + pr_err("[skip,no-udmabuf: Unable to access DMA buffer device file]"); + goto err_free_ctx; + } ctx->memfd = memfd_create("udmabuf-test", MFD_ALLOW_SEALING); - if (ctx->memfd < 0) - error(1, errno, "%s: [skip,no-memfd]\n", TEST_PREFIX); + if (ctx->memfd < 0) { + pr_err("[skip,no-memfd]"); + goto err_close_dev; + } ret = fcntl(ctx->memfd, F_ADD_SEALS, F_SEAL_SHRINK); - if (ret < 0) - error(1, errno, "%s: [skip,fcntl-add-seals]\n", TEST_PREFIX); + if (ret < 0) { + pr_err("[skip,fcntl-add-seals]"); + goto err_close_memfd; + } ret = ftruncate(ctx->memfd, size); - if (ret == -1) - error(1, errno, "%s: [FAIL,memfd-truncate]\n", TEST_PREFIX); + if (ret == -1) { + pr_err("[FAIL,memfd-truncate]"); + goto err_close_memfd; + } memset(&create, 0, sizeof(create)); @@ -151,15 +177,29 @@ static struct memory_buffer *udmabuf_alloc(size_t size) create.offset = 0; create.size = size; ctx->fd = ioctl(ctx->devfd, UDMABUF_CREATE, &create); - if (ctx->fd < 0) - error(1, errno, "%s: [FAIL, create udmabuf]\n", TEST_PREFIX); + if (ctx->fd < 0) { + pr_err("[FAIL, create udmabuf]"); + goto err_close_fd; + } ctx->buf_mem = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, ctx->fd, 0); - if (ctx->buf_mem == MAP_FAILED) - error(1, errno, "%s: [FAIL, map udmabuf]\n", TEST_PREFIX); + if (ctx->buf_mem == MAP_FAILED) { + pr_err("[FAIL, map udmabuf]"); + goto err_close_fd; + } return ctx; + +err_close_fd: + close(ctx->fd); +err_close_memfd: + close(ctx->memfd); +err_close_dev: + close(ctx->devfd); +err_free_ctx: + free(ctx); + return NULL; } static void udmabuf_free(struct memory_buffer *ctx) @@ -217,7 +257,7 @@ static void print_nonzero_bytes(void *ptr, size_t size) putchar(p[i]); } -void validate_buffer(void *line, size_t size) +int validate_buffer(void *line, size_t size) { static unsigned char seed = 1; unsigned char *ptr = line; @@ -232,8 +272,10 @@ void validate_buffer(void *line, size_t size) "Failed validation: expected=%u, actual=%u, index=%lu\n", expected, ptr[i], i); errors++; - if (errors > 20) - error(1, 0, "validation failed."); + if (errors > 20) { + pr_err("validation failed"); + return -1; + } } seed++; if (seed == do_validation) @@ -241,6 +283,86 @@ void validate_buffer(void *line, size_t size) } fprintf(stdout, "Validated buffer\n"); + return 0; +} + +static int +__run_command(char *out, size_t outlen, const char *cmd, va_list args) +{ + char command[256]; + FILE *fp; + + vsnprintf(command, sizeof(command), cmd, args); + + fprintf(stderr, "Running: %s\n", command); + fp = popen(command, "r"); + if (!fp) + return -1; + if (out) { + size_t len; + + if (!fgets(out, outlen, fp)) + return -1; + + /* Remove trailing newline if present */ + len = strlen(out); + if (len && out[len - 1] == '\n') + out[len - 1] = '\0'; + } + return pclose(fp); +} + +static int run_command(const char *cmd, ...) +{ + va_list args; + int ret; + + va_start(args, cmd); + ret = __run_command(NULL, 0, cmd, args); + va_end(args); + + return ret; +} + +static int ethtool_add_flow(const char *format, ...) +{ + char local_output[256], cmd[256]; + const char *id_start; + int flow_idx, ret; + char *endptr; + long flow_id; + va_list args; + + for (flow_idx = 0; flow_idx < MAX_FLOWS; flow_idx++) + if (ntuple_ids[flow_idx] == -1) + break; + if (flow_idx == MAX_FLOWS) { + fprintf(stderr, "Error: too many flows\n"); + return -1; + } + + snprintf(cmd, sizeof(cmd), "ethtool -N %s %s", ifname, format); + + va_start(args, format); + ret = __run_command(local_output, sizeof(local_output), cmd, args); + va_end(args); + + if (ret != 0) + return ret; + + /* Extract the ID from the output */ + id_start = strstr(local_output, "Added rule with ID "); + if (!id_start) + return -1; + id_start += strlen("Added rule with ID "); + + flow_id = strtol(id_start, &endptr, 10); + if (endptr == id_start || flow_id < 0 || flow_id > INT_MAX) + return -1; + + fprintf(stderr, "Added flow rule with ID %ld\n", flow_id); + ntuple_ids[flow_idx] = flow_id; + return flow_id; } static int rxq_num(int ifindex) @@ -270,29 +392,17 @@ static int rxq_num(int ifindex) return num; } -#define run_command(cmd, ...) \ - ({ \ - char command[256]; \ - memset(command, 0, sizeof(command)); \ - snprintf(command, sizeof(command), cmd, ##__VA_ARGS__); \ - fprintf(stderr, "Running: %s\n", command); \ - system(command); \ - }) - -static int reset_flow_steering(void) +static void reset_flow_steering(void) { - /* Depending on the NIC, toggling ntuple off and on might not - * be allowed. Additionally, attempting to delete existing filters - * will fail if no filters are present. Therefore, do not enforce - * the exit status. - */ - - run_command("sudo ethtool -K %s ntuple off >&2", ifname); - run_command("sudo ethtool -K %s ntuple on >&2", ifname); - run_command( - "sudo ethtool -n %s | grep 'Filter:' | awk '{print $2}' | xargs -n1 ethtool -N %s delete >&2", - ifname, ifname); - return 0; + int i; + + for (i = 0; i < MAX_FLOWS; i++) { + if (ntuple_ids[i] == -1) + continue; + run_command("ethtool -N %s delete %d", + ifname, ntuple_ids[i]); + ntuple_ids[i] = -1; + } } static const char *tcp_data_split_str(int val) @@ -309,7 +419,81 @@ static const char *tcp_data_split_str(int val) } } -static int configure_headersplit(bool on) +static struct ethtool_rings_get_rsp *get_ring_config(void) +{ + struct ethtool_rings_get_req *get_req; + struct ethtool_rings_get_rsp *get_rsp; + struct ynl_error yerr; + struct ynl_sock *ys; + + ys = ynl_sock_create(&ynl_ethtool_family, &yerr); + if (!ys) { + fprintf(stderr, "YNL: %s\n", yerr.msg); + return NULL; + } + + get_req = ethtool_rings_get_req_alloc(); + ethtool_rings_get_req_set_header_dev_index(get_req, ifindex); + get_rsp = ethtool_rings_get(ys, get_req); + ethtool_rings_get_req_free(get_req); + + ynl_sock_destroy(ys); + + return get_rsp; +} + +static void restore_ring_config(const struct ethtool_rings_get_rsp *config) +{ + struct ethtool_rings_get_req *get_req; + struct ethtool_rings_get_rsp *get_rsp; + struct ethtool_rings_set_req *req; + struct ynl_error yerr; + struct ynl_sock *ys; + int ret; + + if (!config) + return; + + ys = ynl_sock_create(&ynl_ethtool_family, &yerr); + if (!ys) { + fprintf(stderr, "YNL: %s\n", yerr.msg); + return; + } + + req = ethtool_rings_set_req_alloc(); + ethtool_rings_set_req_set_header_dev_index(req, ifindex); + ethtool_rings_set_req_set_tcp_data_split(req, + ETHTOOL_TCP_DATA_SPLIT_UNKNOWN); + if (config->_present.hds_thresh) + ethtool_rings_set_req_set_hds_thresh(req, config->hds_thresh); + + ret = ethtool_rings_set(ys, req); + if (ret < 0) + fprintf(stderr, "YNL restoring HDS cfg: %s\n", ys->err.msg); + + get_req = ethtool_rings_get_req_alloc(); + ethtool_rings_get_req_set_header_dev_index(get_req, ifindex); + get_rsp = ethtool_rings_get(ys, get_req); + ethtool_rings_get_req_free(get_req); + + /* use explicit value if UKNOWN didn't give us the previous */ + if (get_rsp->tcp_data_split != config->tcp_data_split) { + ethtool_rings_set_req_set_tcp_data_split(req, + config->tcp_data_split); + ret = ethtool_rings_set(ys, req); + if (ret < 0) + fprintf(stderr, "YNL restoring expl HDS cfg: %s\n", + ys->err.msg); + } + + ethtool_rings_get_rsp_free(get_rsp); + ethtool_rings_set_req_free(req); + + ynl_sock_destroy(ys); +} + +static int +configure_headersplit(const struct ethtool_rings_get_rsp *old, bool on) { struct ethtool_rings_get_req *get_req; struct ethtool_rings_get_rsp *get_rsp; @@ -326,8 +510,15 @@ static int configure_headersplit(bool on) req = ethtool_rings_set_req_alloc(); ethtool_rings_set_req_set_header_dev_index(req, ifindex); - /* 0 - off, 1 - auto, 2 - on */ - ethtool_rings_set_req_set_tcp_data_split(req, on ? 2 : 0); + if (on) { + ethtool_rings_set_req_set_tcp_data_split(req, + ETHTOOL_TCP_DATA_SPLIT_ENABLED); + if (old->_present.hds_thresh) + ethtool_rings_set_req_set_hds_thresh(req, 0); + } else { + ethtool_rings_set_req_set_tcp_data_split(req, + ETHTOOL_TCP_DATA_SPLIT_UNKNOWN); + } ret = ethtool_rings_set(ys, req); if (ret < 0) fprintf(stderr, "YNL failed: %s\n", ys->err.msg); @@ -351,12 +542,103 @@ static int configure_headersplit(bool on) static int configure_rss(void) { - return run_command("sudo ethtool -X %s equal %d >&2", ifname, start_queue); + return run_command("ethtool -X %s equal %d >&2", ifname, start_queue); +} + +static void reset_rss(void) +{ + run_command("ethtool -X %s default >&2", ifname, start_queue); } -static int configure_channels(unsigned int rx, unsigned int tx) +static int check_changing_channels(unsigned int rx, unsigned int tx) { - return run_command("sudo ethtool -L %s rx %u tx %u", ifname, rx, tx); + struct ethtool_channels_get_req *gchan; + struct ethtool_channels_set_req *schan; + struct ethtool_channels_get_rsp *chan; + struct ynl_error yerr; + struct ynl_sock *ys; + int ret; + + fprintf(stderr, "setting channel count rx:%u tx:%u\n", rx, tx); + + ys = ynl_sock_create(&ynl_ethtool_family, &yerr); + if (!ys) { + fprintf(stderr, "YNL: %s\n", yerr.msg); + return -1; + } + + gchan = ethtool_channels_get_req_alloc(); + if (!gchan) { + ret = -1; + goto exit_close_sock; + } + + ethtool_channels_get_req_set_header_dev_index(gchan, ifindex); + chan = ethtool_channels_get(ys, gchan); + ethtool_channels_get_req_free(gchan); + if (!chan) { + fprintf(stderr, "YNL get channels: %s\n", ys->err.msg); + ret = -1; + goto exit_close_sock; + } + + schan = ethtool_channels_set_req_alloc(); + if (!schan) { + ret = -1; + goto exit_free_chan; + } + + ethtool_channels_set_req_set_header_dev_index(schan, ifindex); + + if (chan->_present.combined_count) { + if (chan->_present.rx_count || chan->_present.tx_count) { + ethtool_channels_set_req_set_rx_count(schan, 0); + ethtool_channels_set_req_set_tx_count(schan, 0); + } + + if (rx == tx) { + ethtool_channels_set_req_set_combined_count(schan, rx); + } else if (rx > tx) { + ethtool_channels_set_req_set_combined_count(schan, tx); + ethtool_channels_set_req_set_rx_count(schan, rx - tx); + } else { + ethtool_channels_set_req_set_combined_count(schan, rx); + ethtool_channels_set_req_set_tx_count(schan, tx - rx); + } + + } else if (chan->_present.rx_count) { + ethtool_channels_set_req_set_rx_count(schan, rx); + ethtool_channels_set_req_set_tx_count(schan, tx); + } else { + fprintf(stderr, "Error: device has neither combined nor rx channels\n"); + ret = -1; + goto exit_free_schan; + } + + ret = ethtool_channels_set(ys, schan); + if (ret) { + fprintf(stderr, "YNL set channels: %s\n", ys->err.msg); + } else { + /* We were expecting a failure, go back to previous settings */ + ethtool_channels_set_req_set_combined_count(schan, + chan->combined_count); + ethtool_channels_set_req_set_rx_count(schan, chan->rx_count); + ethtool_channels_set_req_set_tx_count(schan, chan->tx_count); + + ret = ethtool_channels_set(ys, schan); + if (ret) + fprintf(stderr, "YNL un-setting channels: %s\n", + ys->err.msg); + } + +exit_free_schan: + ethtool_channels_set_req_free(schan); +exit_free_chan: + ethtool_channels_get_rsp_free(chan); +exit_close_sock: + ynl_sock_destroy(ys); + + return ret; } static int configure_flow_steering(struct sockaddr_in6 *server_sin) @@ -364,6 +646,7 @@ static int configure_flow_steering(struct sockaddr_in6 *server_sin) const char *type = "tcp6"; const char *server_addr; char buf[40]; + int flow_id; inet_ntop(AF_INET6, &server_sin->sin6_addr, buf, sizeof(buf)); server_addr = buf; @@ -374,23 +657,22 @@ static int configure_flow_steering(struct sockaddr_in6 *server_sin) } /* Try configure 5-tuple */ - if (run_command("sudo ethtool -N %s flow-type %s %s %s dst-ip %s %s %s dst-port %s queue %d >&2", - ifname, - type, - client_ip ? "src-ip" : "", - client_ip ?: "", - server_addr, - client_ip ? "src-port" : "", - client_ip ? port : "", - port, start_queue)) + flow_id = ethtool_add_flow("flow-type %s %s %s dst-ip %s %s %s dst-port %s queue %d", + type, + client_ip ? "src-ip" : "", + client_ip ?: "", + server_addr, + client_ip ? "src-port" : "", + client_ip ? port : "", + port, start_queue); + if (flow_id < 0) { /* If that fails, try configure 3-tuple */ - if (run_command("sudo ethtool -N %s flow-type %s dst-ip %s dst-port %s queue %d >&2", - ifname, - type, - server_addr, - port, start_queue)) + flow_id = ethtool_add_flow("flow-type %s dst-ip %s dst-port %s queue %d", + type, server_addr, port, start_queue); + if (flow_id < 0) /* If that fails, return error */ return -1; + } return 0; } @@ -405,6 +687,7 @@ static int bind_rx_queue(unsigned int ifindex, unsigned int dmabuf_fd, *ys = ynl_sock_create(&ynl_netdev_family, &yerr); if (!*ys) { + netdev_queue_id_free(queues); fprintf(stderr, "YNL: %s\n", yerr.msg); return -1; } @@ -483,18 +766,24 @@ err_close: return -1; } -static void enable_reuseaddr(int fd) +static int enable_reuseaddr(int fd) { int opt = 1; int ret; ret = setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &opt, sizeof(opt)); - if (ret) - error(1, errno, "%s: [FAIL, SO_REUSEPORT]\n", TEST_PREFIX); + if (ret) { + pr_err("SO_REUSEPORT failed"); + return -1; + } ret = setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &opt, sizeof(opt)); - if (ret) - error(1, errno, "%s: [FAIL, SO_REUSEADDR]\n", TEST_PREFIX); + if (ret) { + pr_err("SO_REUSEADDR failed"); + return -1; + } + + return 0; } static int parse_address(const char *str, int port, struct sockaddr_in6 *sin6) @@ -537,6 +826,7 @@ static struct netdev_queue_id *create_queues(void) static int do_server(struct memory_buffer *mem) { + struct ethtool_rings_get_rsp *ring_config; char ctrl_data[sizeof(int) * 20000]; size_t non_page_aligned_frags = 0; struct sockaddr_in6 client_addr; @@ -548,54 +838,72 @@ static int do_server(struct memory_buffer *mem) char *tmp_mem = NULL; struct ynl_sock *ys; char iobuf[819200]; + int ret, err = -1; char buffer[256]; int socket_fd; int client_fd; - int ret; ret = parse_address(server_ip, atoi(port), &server_sin); - if (ret < 0) - error(1, 0, "parse server address"); + if (ret < 0) { + pr_err("parse server address"); + return -1; + } - if (reset_flow_steering()) - error(1, 0, "Failed to reset flow steering\n"); + ring_config = get_ring_config(); + if (!ring_config) { + pr_err("Failed to get current ring configuration"); + return -1; + } - if (configure_headersplit(1)) - error(1, 0, "Failed to enable TCP header split\n"); + if (configure_headersplit(ring_config, 1)) { + pr_err("Failed to enable TCP header split"); + goto err_free_ring_config; + } /* Configure RSS to divert all traffic from our devmem queues */ - if (configure_rss()) - error(1, 0, "Failed to configure rss\n"); + if (configure_rss()) { + pr_err("Failed to configure rss"); + goto err_reset_headersplit; + } /* Flow steer our devmem flows to start_queue */ - if (configure_flow_steering(&server_sin)) - error(1, 0, "Failed to configure flow steering\n"); - - sleep(1); + if (configure_flow_steering(&server_sin)) { + pr_err("Failed to configure flow steering"); + goto err_reset_rss; + } - if (bind_rx_queue(ifindex, mem->fd, create_queues(), num_queues, &ys)) - error(1, 0, "Failed to bind\n"); + if (bind_rx_queue(ifindex, mem->fd, create_queues(), num_queues, &ys)) { + pr_err("Failed to bind"); + goto err_reset_flow_steering; + } tmp_mem = malloc(mem->size); if (!tmp_mem) - error(1, ENOMEM, "malloc failed"); + goto err_unbind; socket_fd = socket(AF_INET6, SOCK_STREAM, 0); - if (socket_fd < 0) - error(1, errno, "%s: [FAIL, create socket]\n", TEST_PREFIX); + if (socket_fd < 0) { + pr_err("Failed to create socket"); + goto err_free_tmp; + } - enable_reuseaddr(socket_fd); + if (enable_reuseaddr(socket_fd)) + goto err_close_socket; fprintf(stderr, "binding to address %s:%d\n", server_ip, ntohs(server_sin.sin6_port)); ret = bind(socket_fd, &server_sin, sizeof(server_sin)); - if (ret) - error(1, errno, "%s: [FAIL, bind]\n", TEST_PREFIX); + if (ret) { + pr_err("Failed to bind"); + goto err_close_socket; + } ret = listen(socket_fd, 1); - if (ret) - error(1, errno, "%s: [FAIL, listen]\n", TEST_PREFIX); + if (ret) { + pr_err("Failed to listen"); + goto err_close_socket; + } client_addr_len = sizeof(client_addr); @@ -604,6 +912,10 @@ static int do_server(struct memory_buffer *mem) fprintf(stderr, "Waiting or connection on %s:%d\n", buffer, ntohs(server_sin.sin6_port)); client_fd = accept(socket_fd, &client_addr, &client_addr_len); + if (client_fd < 0) { + pr_err("Failed to accept"); + goto err_close_socket; + } inet_ntop(AF_INET6, &client_addr.sin6_addr, buffer, sizeof(buffer)); @@ -631,10 +943,15 @@ static int do_server(struct memory_buffer *mem) continue; if (ret < 0) { perror("recvmsg"); + if (errno == EFAULT) { + pr_err("received EFAULT, won't recover"); + goto err_close_client; + } continue; } if (ret == 0) { - fprintf(stderr, "client exited\n"); + errno = 0; + pr_err("client exited"); goto cleanup; } @@ -672,9 +989,10 @@ static int do_server(struct memory_buffer *mem) dmabuf_cmsg->frag_size, dmabuf_cmsg->frag_token, total_received, dmabuf_cmsg->dmabuf_id); - if (dmabuf_cmsg->dmabuf_id != dmabuf_id) - error(1, 0, - "received on wrong dmabuf_id: flow steering error\n"); + if (dmabuf_cmsg->dmabuf_id != dmabuf_id) { + pr_err("received on wrong dmabuf_id: flow steering error"); + goto err_close_client; + } if (dmabuf_cmsg->frag_size % getpagesize()) non_page_aligned_frags++; @@ -685,22 +1003,27 @@ static int do_server(struct memory_buffer *mem) dmabuf_cmsg->frag_offset, dmabuf_cmsg->frag_size); - if (do_validation) - validate_buffer(tmp_mem, - dmabuf_cmsg->frag_size); - else + if (do_validation) { + if (validate_buffer(tmp_mem, + dmabuf_cmsg->frag_size)) + goto err_close_client; + } else { print_nonzero_bytes(tmp_mem, dmabuf_cmsg->frag_size); + } ret = setsockopt(client_fd, SOL_SOCKET, SO_DEVMEM_DONTNEED, &token, sizeof(token)); - if (ret != 1) - error(1, 0, - "SO_DEVMEM_DONTNEED not enough tokens"); + if (ret != 1) { + pr_err("SO_DEVMEM_DONTNEED not enough tokens"); + goto err_close_client; + } + } + if (!is_devmem) { + pr_err("flow steering error"); + goto err_close_client; } - if (!is_devmem) - error(1, 0, "flow steering error\n"); fprintf(stderr, "total_received=%lu\n", total_received); } @@ -711,54 +1034,121 @@ static int do_server(struct memory_buffer *mem) page_aligned_frags, non_page_aligned_frags); cleanup: + err = 0; - free(tmp_mem); +err_close_client: close(client_fd); +err_close_socket: close(socket_fd); +err_free_tmp: + free(tmp_mem); +err_unbind: ynl_sock_destroy(ys); - - return 0; +err_reset_flow_steering: + reset_flow_steering(); +err_reset_rss: + reset_rss(); +err_reset_headersplit: + restore_ring_config(ring_config); +err_free_ring_config: + ethtool_rings_get_rsp_free(ring_config); + return err; } -void run_devmem_tests(void) +int run_devmem_tests(void) { + struct ethtool_rings_get_rsp *ring_config; + struct netdev_queue_id *queues; struct memory_buffer *mem; struct ynl_sock *ys; + int err = -1; mem = provider->alloc(getpagesize() * NUM_PAGES); + if (!mem) { + pr_err("Failed to allocate memory buffer"); + return -1; + } + + ring_config = get_ring_config(); + if (!ring_config) { + pr_err("Failed to get current ring configuration"); + goto err_free_mem; + } /* Configure RSS to divert all traffic from our devmem queues */ - if (configure_rss()) - error(1, 0, "rss error\n"); + if (configure_rss()) { + pr_err("rss error"); + goto err_free_ring_config; + } + + if (configure_headersplit(ring_config, 1)) { + pr_err("Failed to configure header split"); + goto err_reset_rss; + } + + queues = netdev_queue_id_alloc(num_queues); + if (!queues) { + pr_err("Failed to allocate empty queues array"); + goto err_reset_headersplit; + } + + if (!bind_rx_queue(ifindex, mem->fd, queues, num_queues, &ys)) { + pr_err("Binding empty queues array should have failed"); + goto err_unbind; + } - if (configure_headersplit(1)) - error(1, 0, "Failed to configure header split\n"); + if (configure_headersplit(ring_config, 0)) { + pr_err("Failed to configure header split"); + goto err_reset_headersplit; + } - if (!bind_rx_queue(ifindex, mem->fd, - calloc(num_queues, sizeof(struct netdev_queue_id)), - num_queues, &ys)) - error(1, 0, "Binding empty queues array should have failed\n"); + queues = create_queues(); + if (!queues) { + pr_err("Failed to create queues"); + goto err_reset_headersplit; + } - if (configure_headersplit(0)) - error(1, 0, "Failed to configure header split\n"); + if (!bind_rx_queue(ifindex, mem->fd, queues, num_queues, &ys)) { + pr_err("Configure dmabuf with header split off should have failed"); + goto err_unbind; + } - if (!bind_rx_queue(ifindex, mem->fd, create_queues(), num_queues, &ys)) - error(1, 0, "Configure dmabuf with header split off should have failed\n"); + if (configure_headersplit(ring_config, 1)) { + pr_err("Failed to configure header split"); + goto err_reset_headersplit; + } - if (configure_headersplit(1)) - error(1, 0, "Failed to configure header split\n"); + queues = create_queues(); + if (!queues) { + pr_err("Failed to create queues"); + goto err_reset_headersplit; + } - if (bind_rx_queue(ifindex, mem->fd, create_queues(), num_queues, &ys)) - error(1, 0, "Failed to bind\n"); + if (bind_rx_queue(ifindex, mem->fd, queues, num_queues, &ys)) { + pr_err("Failed to bind"); + goto err_reset_headersplit; + } /* Deactivating a bound queue should not be legal */ - if (!configure_channels(num_queues, num_queues - 1)) - error(1, 0, "Deactivating a bound queue should be illegal.\n"); + if (!check_changing_channels(num_queues, num_queues)) { + pr_err("Deactivating a bound queue should be illegal"); + goto err_unbind; + } - /* Closing the netlink socket does an implicit unbind */ - ynl_sock_destroy(ys); + err = 0; + goto err_unbind; +err_unbind: + ynl_sock_destroy(ys); +err_reset_headersplit: + restore_ring_config(ring_config); +err_reset_rss: + reset_rss(); +err_free_ring_config: + ethtool_rings_get_rsp_free(ring_config); +err_free_mem: provider->free(mem); + return err; } static uint64_t gettimeofday_ms(void) @@ -778,13 +1168,15 @@ static int do_poll(int fd) pfd.fd = fd; ret = poll(&pfd, 1, waittime_ms); - if (ret == -1) - error(1, errno, "poll"); + if (ret == -1) { + pr_err("poll"); + return -1; + } return ret && (pfd.revents & POLLERR); } -static void wait_compl(int fd) +static int wait_compl(int fd) { int64_t tstop = gettimeofday_ms() + waittime_ms; char control[CMSG_SPACE(100)] = {}; @@ -798,18 +1190,23 @@ static void wait_compl(int fd) msg.msg_controllen = sizeof(control); while (gettimeofday_ms() < tstop) { - if (!do_poll(fd)) + ret = do_poll(fd); + if (ret < 0) + return ret; + if (!ret) continue; ret = recvmsg(fd, &msg, MSG_ERRQUEUE); if (ret < 0) { if (errno == EAGAIN) continue; - error(1, errno, "recvmsg(MSG_ERRQUEUE)"); - return; + pr_err("recvmsg(MSG_ERRQUEUE)"); + return -1; + } + if (msg.msg_flags & MSG_CTRUNC) { + pr_err("MSG_CTRUNC"); + return -1; } - if (msg.msg_flags & MSG_CTRUNC) - error(1, 0, "MSG_CTRUNC\n"); for (cm = CMSG_FIRSTHDR(&msg); cm; cm = CMSG_NXTHDR(&msg, cm)) { if (cm->cmsg_level != SOL_IP && @@ -823,20 +1220,25 @@ static void wait_compl(int fd) continue; serr = (void *)CMSG_DATA(cm); - if (serr->ee_origin != SO_EE_ORIGIN_ZEROCOPY) - error(1, 0, "wrong origin %u", serr->ee_origin); - if (serr->ee_errno != 0) - error(1, 0, "wrong errno %d", serr->ee_errno); + if (serr->ee_origin != SO_EE_ORIGIN_ZEROCOPY) { + pr_err("wrong origin %u", serr->ee_origin); + return -1; + } + if (serr->ee_errno != 0) { + pr_err("wrong errno %d", serr->ee_errno); + return -1; + } hi = serr->ee_data; lo = serr->ee_info; fprintf(stderr, "tx complete [%d,%d]\n", lo, hi); - return; + return 0; } } - error(1, 0, "did not receive tx completion"); + pr_err("did not receive tx completion"); + return -1; } static int do_client(struct memory_buffer *mem) @@ -850,50 +1252,69 @@ static int do_client(struct memory_buffer *mem) ssize_t line_size = 0; struct cmsghdr *cmsg; char *line = NULL; + int ret, err = -1; size_t len = 0; int socket_fd; __u32 ddmabuf; int opt = 1; - int ret; ret = parse_address(server_ip, atoi(port), &server_sin); - if (ret < 0) - error(1, 0, "parse server address"); + if (ret < 0) { + pr_err("parse server address"); + return -1; + } + + if (client_ip) { + ret = parse_address(client_ip, atoi(port), &client_sin); + if (ret < 0) { + pr_err("parse client address"); + return ret; + } + } socket_fd = socket(AF_INET6, SOCK_STREAM, 0); - if (socket_fd < 0) - error(1, socket_fd, "create socket"); + if (socket_fd < 0) { + pr_err("create socket"); + return -1; + } - enable_reuseaddr(socket_fd); + if (enable_reuseaddr(socket_fd)) + goto err_close_socket; ret = setsockopt(socket_fd, SOL_SOCKET, SO_BINDTODEVICE, ifname, strlen(ifname) + 1); - if (ret) - error(1, errno, "bindtodevice"); + if (ret) { + pr_err("bindtodevice"); + goto err_close_socket; + } - if (bind_tx_queue(ifindex, mem->fd, &ys)) - error(1, 0, "Failed to bind\n"); + if (bind_tx_queue(ifindex, mem->fd, &ys)) { + pr_err("Failed to bind"); + goto err_close_socket; + } if (client_ip) { - ret = parse_address(client_ip, atoi(port), &client_sin); - if (ret < 0) - error(1, 0, "parse client address"); - ret = bind(socket_fd, &client_sin, sizeof(client_sin)); - if (ret) - error(1, errno, "bind"); + if (ret) { + pr_err("bind"); + goto err_unbind; + } } ret = setsockopt(socket_fd, SOL_SOCKET, SO_ZEROCOPY, &opt, sizeof(opt)); - if (ret) - error(1, errno, "set sock opt"); + if (ret) { + pr_err("set sock opt"); + goto err_unbind; + } fprintf(stderr, "Connect to %s %d (via %s)\n", server_ip, ntohs(server_sin.sin6_port), ifname); ret = connect(socket_fd, &server_sin, sizeof(server_sin)); - if (ret) - error(1, errno, "connect"); + if (ret) { + pr_err("connect"); + goto err_unbind; + } while (1) { free(line); @@ -906,10 +1327,11 @@ static int do_client(struct memory_buffer *mem) if (max_chunk) { msg.msg_iovlen = (line_size + max_chunk - 1) / max_chunk; - if (msg.msg_iovlen > MAX_IOV) - error(1, 0, - "can't partition %zd bytes into maximum of %d chunks", - line_size, MAX_IOV); + if (msg.msg_iovlen > MAX_IOV) { + pr_err("can't partition %zd bytes into maximum of %d chunks", + line_size, MAX_IOV); + goto err_free_line; + } for (int i = 0; i < msg.msg_iovlen; i++) { iov[i].iov_base = (void *)(i * max_chunk); @@ -940,34 +1362,40 @@ static int do_client(struct memory_buffer *mem) *((__u32 *)CMSG_DATA(cmsg)) = ddmabuf; ret = sendmsg(socket_fd, &msg, MSG_ZEROCOPY); - if (ret < 0) - error(1, errno, "Failed sendmsg"); + if (ret < 0) { + pr_err("Failed sendmsg"); + goto err_free_line; + } fprintf(stderr, "sendmsg_ret=%d\n", ret); - if (ret != line_size) - error(1, errno, "Did not send all bytes %d vs %zd", ret, - line_size); + if (ret != line_size) { + pr_err("Did not send all bytes %d vs %zd", ret, line_size); + goto err_free_line; + } - wait_compl(socket_fd); + if (wait_compl(socket_fd)) + goto err_free_line; } fprintf(stderr, "%s: tx ok\n", TEST_PREFIX); + err = 0; + +err_free_line: free(line); +err_unbind: + ynl_sock_destroy(ys); +err_close_socket: close(socket_fd); - - if (ys) - ynl_sock_destroy(ys); - - return 0; + return err; } int main(int argc, char *argv[]) { struct memory_buffer *mem; int is_server = 0, opt; - int ret; + int ret, err = 1; while ((opt = getopt(argc, argv, "ls:c:p:v:q:t:f:z:")) != -1) { switch (opt) { @@ -1004,8 +1432,10 @@ int main(int argc, char *argv[]) } } - if (!ifname) - error(1, 0, "Missing -f argument\n"); + if (!ifname) { + pr_err("Missing -f argument"); + return 1; + } ifindex = if_nametoindex(ifname); @@ -1014,33 +1444,41 @@ int main(int argc, char *argv[]) if (!server_ip && !client_ip) { if (start_queue < 0 && num_queues < 0) { num_queues = rxq_num(ifindex); - if (num_queues < 0) - error(1, 0, "couldn't detect number of queues\n"); - if (num_queues < 2) - error(1, 0, - "number of device queues is too low\n"); + if (num_queues < 0) { + pr_err("couldn't detect number of queues"); + return 1; + } + if (num_queues < 2) { + pr_err("number of device queues is too low"); + return 1; + } /* make sure can bind to multiple queues */ start_queue = num_queues / 2; num_queues /= 2; } - if (start_queue < 0 || num_queues < 0) - error(1, 0, "Both -t and -q are required\n"); + if (start_queue < 0 || num_queues < 0) { + pr_err("Both -t and -q are required"); + return 1; + } - run_devmem_tests(); - return 0; + return run_devmem_tests(); } if (start_queue < 0 && num_queues < 0) { num_queues = rxq_num(ifindex); - if (num_queues < 2) - error(1, 0, "number of device queues is too low\n"); + if (num_queues < 2) { + pr_err("number of device queues is too low"); + return 1; + } num_queues = 1; start_queue = rxq_num(ifindex) - num_queues; - if (start_queue < 0) - error(1, 0, "couldn't detect number of queues\n"); + if (start_queue < 0) { + pr_err("couldn't detect number of queues"); + return 1; + } fprintf(stderr, "using queues %d..%d\n", start_queue, start_queue + num_queues); } @@ -1048,21 +1486,39 @@ int main(int argc, char *argv[]) for (; optind < argc; optind++) fprintf(stderr, "extra arguments: %s\n", argv[optind]); - if (start_queue < 0) - error(1, 0, "Missing -t argument\n"); + if (start_queue < 0) { + pr_err("Missing -t argument"); + return 1; + } - if (num_queues < 0) - error(1, 0, "Missing -q argument\n"); + if (num_queues < 0) { + pr_err("Missing -q argument"); + return 1; + } - if (!server_ip) - error(1, 0, "Missing -s argument\n"); + if (!server_ip) { + pr_err("Missing -s argument"); + return 1; + } - if (!port) - error(1, 0, "Missing -p argument\n"); + if (!port) { + pr_err("Missing -p argument"); + return 1; + } mem = provider->alloc(getpagesize() * NUM_PAGES); + if (!mem) { + pr_err("Failed to allocate memory buffer"); + return 1; + } + ret = is_server ? do_server(mem) : do_client(mem); - provider->free(mem); + if (ret) + goto err_free_mem; - return ret; + err = 0; + +err_free_mem: + provider->free(mem); + return err; } diff --git a/tools/testing/selftests/drivers/net/hw/nic_timestamp.py b/tools/testing/selftests/drivers/net/hw/nic_timestamp.py new file mode 100755 index 000000000000..c1e943d53f19 --- /dev/null +++ b/tools/testing/selftests/drivers/net/hw/nic_timestamp.py @@ -0,0 +1,113 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 + +""" +Tests related to configuration of HW timestamping +""" + +import errno +from lib.py import ksft_run, ksft_exit, ksft_ge, ksft_eq, KsftSkipEx +from lib.py import NetDrvEnv, EthtoolFamily, NlError + + +def __get_hwtimestamp_support(cfg): + """ Retrieve supported configuration information """ + + try: + tsinfo = cfg.ethnl.tsinfo_get({'header': {'dev-name': cfg.ifname}}) + except NlError as e: + if e.error == errno.EOPNOTSUPP: + raise KsftSkipEx("timestamping configuration is not supported") from e + raise + + ctx = {} + tx = tsinfo.get('tx-types', {}) + rx = tsinfo.get('rx-filters', {}) + + bits = tx.get('bits', {}) + ctx['tx'] = bits.get('bit', []) + bits = rx.get('bits', {}) + ctx['rx'] = bits.get('bit', []) + return ctx + + +def __get_hwtimestamp_config(cfg): + """ Retrieve current TS configuration information """ + + try: + tscfg = cfg.ethnl.tsconfig_get({'header': {'dev-name': cfg.ifname}}) + except NlError as e: + if e.error == errno.EOPNOTSUPP: + raise KsftSkipEx("timestamping configuration is not supported via netlink") from e + raise + return tscfg + + +def __set_hwtimestamp_config(cfg, ts): + """ Setup new TS configuration information """ + + ts['header'] = {'dev-name': cfg.ifname} + try: + res = cfg.ethnl.tsconfig_set(ts) + except NlError as e: + if e.error == errno.EOPNOTSUPP: + raise KsftSkipEx("timestamping configuration is not supported via netlink") from e + raise + return res + + +def test_hwtstamp_tx(cfg): + """ + Test TX timestamp configuration. + The driver should apply provided config and report back proper state. + """ + + orig_tscfg = __get_hwtimestamp_config(cfg) + ts = __get_hwtimestamp_support(cfg) + tx = ts['tx'] + for t in tx: + tscfg = orig_tscfg + tscfg['tx-types']['bits']['bit'] = [t] + res = __set_hwtimestamp_config(cfg, tscfg) + if res is None: + res = __get_hwtimestamp_config(cfg) + ksft_eq(res['tx-types']['bits']['bit'], [t]) + __set_hwtimestamp_config(cfg, orig_tscfg) + + +def test_hwtstamp_rx(cfg): + """ + Test RX timestamp configuration. + The filter configuration is taken from the list of supported filters. + The driver should apply the config without error and report back proper state. + Some extension of the timestamping scope is allowed for PTP filters. + """ + + orig_tscfg = __get_hwtimestamp_config(cfg) + ts = __get_hwtimestamp_support(cfg) + rx = ts['rx'] + for r in rx: + tscfg = orig_tscfg + tscfg['rx-filters']['bits']['bit'] = [r] + res = __set_hwtimestamp_config(cfg, tscfg) + if res is None: + res = __get_hwtimestamp_config(cfg) + if r['index'] == 0 or r['index'] == 1: + ksft_eq(res['rx-filters']['bits']['bit'][0]['index'], r['index']) + else: + # the driver can fallback to some value which has higher coverage for timestamping + ksft_ge(res['rx-filters']['bits']['bit'][0]['index'], r['index']) + __set_hwtimestamp_config(cfg, orig_tscfg) + + +def main() -> None: + """ Ksft boiler plate main """ + + with NetDrvEnv(__file__, nsim_test=False) as cfg: + cfg.ethnl = EthtoolFamily() + ksft_run([test_hwtstamp_tx, test_hwtstamp_rx], args=(cfg,)) + ksft_exit() + + +if __name__ == "__main__": + main() diff --git a/tools/testing/selftests/drivers/net/hw/rss_ctx.py b/tools/testing/selftests/drivers/net/hw/rss_ctx.py index 7bb552f8b182..ed7e405682f0 100755 --- a/tools/testing/selftests/drivers/net/hw/rss_ctx.py +++ b/tools/testing/selftests/drivers/net/hw/rss_ctx.py @@ -118,7 +118,7 @@ def test_rss_key_indir(cfg): qcnt = len(_get_rx_cnts(cfg)) if qcnt < 3: - KsftSkipEx("Device has fewer than 3 queues (or doesn't support queue stats)") + raise KsftSkipEx("Device has fewer than 3 queues (or doesn't support queue stats)") data = get_rss(cfg) want_keys = ['rss-hash-key', 'rss-hash-function', 'rss-indirection-table'] @@ -178,8 +178,13 @@ def test_rss_key_indir(cfg): cnts = _get_rx_cnts(cfg) GenerateTraffic(cfg).wait_pkts_and_stop(20000) cnts = _get_rx_cnts(cfg, prev=cnts) - # First two queues get less traffic than all the rest - ksft_lt(sum(cnts[:2]), sum(cnts[2:]), "traffic distributed: " + str(cnts)) + if qcnt > 4: + # First two queues get less traffic than all the rest + ksft_lt(sum(cnts[:2]), sum(cnts[2:]), + "traffic distributed: " + str(cnts)) + else: + # When queue count is low make sure third queue got significant pkts + ksft_ge(cnts[2], 3500, "traffic distributed: " + str(cnts)) def test_rss_queue_reconfigure(cfg, main_ctx=True): @@ -335,19 +340,20 @@ def test_hitless_key_update(cfg): data = get_rss(cfg) key_len = len(data['rss-hash-key']) - key = _rss_key_rand(key_len) + ethnl = EthtoolFamily() + key = random.randbytes(key_len) tgen = GenerateTraffic(cfg) try: errors0, carrier0 = get_drop_err_sum(cfg) t0 = datetime.datetime.now() - ethtool(f"-X {cfg.ifname} hkey " + _rss_key_str(key)) + ethnl.rss_set({"header": {"dev-index": cfg.ifindex}, "hkey": key}) t1 = datetime.datetime.now() errors1, carrier1 = get_drop_err_sum(cfg) finally: tgen.wait_pkts_and_stop(5000) - ksft_lt((t1 - t0).total_seconds(), 0.2) + ksft_lt((t1 - t0).total_seconds(), 0.15) ksft_eq(errors1 - errors1, 0) ksft_eq(carrier1 - carrier0, 0) diff --git a/tools/testing/selftests/drivers/net/hw/rss_flow_label.py b/tools/testing/selftests/drivers/net/hw/rss_flow_label.py new file mode 100755 index 000000000000..6fa95fe27c47 --- /dev/null +++ b/tools/testing/selftests/drivers/net/hw/rss_flow_label.py @@ -0,0 +1,167 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 + +""" +Tests for RSS hashing on IPv6 Flow Label. +""" + +import glob +import os +import socket +from lib.py import CmdExitFailure +from lib.py import ksft_run, ksft_exit, ksft_eq, ksft_ge, ksft_in, \ + ksft_not_in, ksft_raises, KsftSkipEx +from lib.py import bkg, cmd, defer, fd_read_timeout, rand_port +from lib.py import NetDrvEpEnv + + +def _check_system(cfg): + if not hasattr(socket, "SO_INCOMING_CPU"): + raise KsftSkipEx("socket.SO_INCOMING_CPU was added in Python 3.11") + + qcnt = len(glob.glob(f"/sys/class/net/{cfg.ifname}/queues/rx-*")) + if qcnt < 2: + raise KsftSkipEx(f"Local has only {qcnt} queues") + + for f in [f"/sys/class/net/{cfg.ifname}/queues/rx-0/rps_flow_cnt", + f"/sys/class/net/{cfg.ifname}/queues/rx-0/rps_cpus"]: + try: + with open(f, 'r') as fp: + setting = fp.read().strip() + # CPU mask will be zeros and commas + if setting.replace("0", "").replace(",", ""): + raise KsftSkipEx(f"RPS/RFS is configured: {f}: {setting}") + except FileNotFoundError: + pass + + # 1 is the default, if someone changed it we probably shouldn"t mess with it + af = cmd("cat /proc/sys/net/ipv6/auto_flowlabels", host=cfg.remote).stdout + if af.strip() != "1": + raise KsftSkipEx("Remote does not have auto_flowlabels enabled") + + +def _ethtool_get_cfg(cfg, fl_type): + descr = cmd(f"ethtool -n {cfg.ifname} rx-flow-hash {fl_type}").stdout + + converter = { + "IP SA": "s", + "IP DA": "d", + "L3 proto": "t", + "L4 bytes 0 & 1 [TCP/UDP src port]": "f", + "L4 bytes 2 & 3 [TCP/UDP dst port]": "n", + "IPv6 Flow Label": "l", + } + + ret = "" + for line in descr.split("\n")[1:-2]: + # if this raises we probably need to add more keys to converter above + ret += converter[line] + return ret + + +def _traffic(cfg, one_sock, one_cpu): + local_port = rand_port(socket.SOCK_DGRAM) + remote_port = rand_port(socket.SOCK_DGRAM) + + sock = socket.socket(socket.AF_INET6, socket.SOCK_DGRAM) + sock.bind(("", local_port)) + sock.connect((cfg.remote_addr_v["6"], 0)) + if one_sock: + send = f"exec 5<>/dev/udp/{cfg.addr_v['6']}/{local_port}; " \ + "for i in `seq 20`; do echo a >&5; sleep 0.02; done; exec 5>&-" + else: + send = "for i in `seq 20`; do echo a | socat -t0.02 - UDP6:" \ + f"[{cfg.addr_v['6']}]:{local_port},sourceport={remote_port}; done" + + cpus = set() + with bkg(send, shell=True, host=cfg.remote, exit_wait=True): + for _ in range(20): + fd_read_timeout(sock.fileno(), 1) + cpu = sock.getsockopt(socket.SOL_SOCKET, socket.SO_INCOMING_CPU) + cpus.add(cpu) + + if one_cpu: + ksft_eq(len(cpus), 1, + f"{one_sock=} - expected one CPU, got traffic on: {cpus=}") + else: + ksft_ge(len(cpus), 2, + f"{one_sock=} - expected many CPUs, got traffic on: {cpus=}") + + +def test_rss_flow_label(cfg): + """ + Test hashing on IPv6 flow label. Send traffic over a single socket + and over multiple sockets. Depend on the remote having auto-label + enabled so that it randomizes the label per socket. + """ + + cfg.require_ipver("6") + cfg.require_cmd("socat", remote=True) + _check_system(cfg) + + # Enable flow label hashing for UDP6 + initial = _ethtool_get_cfg(cfg, "udp6") + no_lbl = initial.replace("l", "") + if "l" not in initial: + try: + cmd(f"ethtool -N {cfg.ifname} rx-flow-hash udp6 l{no_lbl}") + except CmdExitFailure as exc: + raise KsftSkipEx("Device doesn't support Flow Label for UDP6") from exc + + defer(cmd, f"ethtool -N {cfg.ifname} rx-flow-hash udp6 {initial}") + + _traffic(cfg, one_sock=True, one_cpu=True) + _traffic(cfg, one_sock=False, one_cpu=False) + + # Disable it, we should see no hashing (reset was already defer()ed) + cmd(f"ethtool -N {cfg.ifname} rx-flow-hash udp6 {no_lbl}") + + _traffic(cfg, one_sock=False, one_cpu=True) + + +def _check_v4_flow_types(cfg): + for fl_type in ["tcp4", "udp4", "ah4", "esp4", "sctp4"]: + try: + cur = cmd(f"ethtool -n {cfg.ifname} rx-flow-hash {fl_type}").stdout + ksft_not_in("Flow Label", cur, + comment=f"{fl_type=} has Flow Label:" + cur) + except CmdExitFailure: + # Probably does not support this flow type + pass + + +def test_rss_flow_label_6only(cfg): + """ + Test interactions with IPv4 flow types. It should not be possible to set + IPv6 Flow Label hashing for an IPv4 flow type. The Flow Label should also + not appear in the IPv4 "current config". + """ + + with ksft_raises(CmdExitFailure) as cm: + cmd(f"ethtool -N {cfg.ifname} rx-flow-hash tcp4 sdfnl") + ksft_in("Invalid argument", cm.exception.cmd.stderr) + + _check_v4_flow_types(cfg) + + # Try to enable Flow Labels and check again, in case it leaks thru + initial = _ethtool_get_cfg(cfg, "udp6") + changed = initial.replace("l", "") if "l" in initial else initial + "l" + + cmd(f"ethtool -N {cfg.ifname} rx-flow-hash udp6 {changed}") + restore = defer(cmd, f"ethtool -N {cfg.ifname} rx-flow-hash udp6 {initial}") + + _check_v4_flow_types(cfg) + restore.exec() + _check_v4_flow_types(cfg) + + +def main() -> None: + with NetDrvEpEnv(__file__, nsim_test=False) as cfg: + ksft_run([test_rss_flow_label, + test_rss_flow_label_6only], + args=(cfg, )) + ksft_exit() + + +if __name__ == "__main__": + main() diff --git a/tools/testing/selftests/drivers/net/hw/tso.py b/tools/testing/selftests/drivers/net/hw/tso.py index c13dd5efa27a..0998e68ebaf0 100755 --- a/tools/testing/selftests/drivers/net/hw/tso.py +++ b/tools/testing/selftests/drivers/net/hw/tso.py @@ -60,16 +60,17 @@ def run_one_stream(cfg, ipver, remote_v4, remote_v6, should_lso): sock_wait_drain(sock) qstat_new = cfg.netnl.qstats_get({"ifindex": cfg.ifindex}, dump=True)[0] - # No math behind the 10 here, but try to catch cases where - # TCP falls back to non-LSO. - ksft_lt(tcp_sock_get_retrans(sock), 10) - sock.close() - # Check that at least 90% of the data was sent as LSO packets. # System noise may cause false negatives. Also header overheads # will add up to 5% of extra packes... The check is best effort. total_lso_wire = len(buf) * 0.90 // cfg.dev["mtu"] total_lso_super = len(buf) * 0.90 // cfg.dev["tso_max_size"] + + # Make sure we have order of magnitude more LSO packets than + # retransmits, in case TCP retransmitted all the LSO packets. + ksft_lt(tcp_sock_get_retrans(sock), total_lso_wire / 4) + sock.close() + if should_lso: if cfg.have_stat_super_count: ksft_ge(qstat_new['tx-hw-gso-packets'] - diff --git a/tools/testing/selftests/drivers/net/lib/py/__init__.py b/tools/testing/selftests/drivers/net/lib/py/__init__.py index 8711c67ad658..2a645415c4ca 100644 --- a/tools/testing/selftests/drivers/net/lib/py/__init__.py +++ b/tools/testing/selftests/drivers/net/lib/py/__init__.py @@ -12,16 +12,16 @@ try: # Import one by one to avoid pylint false positives from net.lib.py import EthtoolFamily, NetdevFamily, NetshaperFamily, \ - NlError, RtnlFamily, DevlinkFamily + NlError, RtnlFamily, DevlinkFamily, PSPFamily from net.lib.py import CmdExitFailure from net.lib.py import bkg, cmd, bpftool, bpftrace, defer, ethtool, \ - fd_read_timeout, ip, rand_port, tool, wait_port_listen + fd_read_timeout, ip, rand_port, tool, wait_port_listen, wait_file from net.lib.py import fd_read_timeout from net.lib.py import KsftSkipEx, KsftFailEx, KsftXfailEx from net.lib.py import ksft_disruptive, ksft_exit, ksft_pr, ksft_run, \ ksft_setup from net.lib.py import ksft_eq, ksft_ge, ksft_in, ksft_is, ksft_lt, \ - ksft_ne, ksft_not_in, ksft_raises, ksft_true + ksft_ne, ksft_not_in, ksft_raises, ksft_true, ksft_gt, ksft_not_none except ModuleNotFoundError as e: ksft_pr("Failed importing `net` library from kernel sources") ksft_pr(str(e)) diff --git a/tools/testing/selftests/drivers/net/lib/py/env.py b/tools/testing/selftests/drivers/net/lib/py/env.py index 1b8bd648048f..01be3d9b9720 100644 --- a/tools/testing/selftests/drivers/net/lib/py/env.py +++ b/tools/testing/selftests/drivers/net/lib/py/env.py @@ -4,7 +4,7 @@ import os import time from pathlib import Path from lib.py import KsftSkipEx, KsftXfailEx -from lib.py import ksft_setup +from lib.py import ksft_setup, wait_file from lib.py import cmd, ethtool, ip, CmdExitFailure from lib.py import NetNS, NetdevSimDev from .remote import Remote @@ -25,6 +25,9 @@ class NetDrvEnvBase: self.env = self._load_env_file() + # Following attrs must be set be inheriting classes + self.dev = None + def _load_env_file(self): env = os.environ.copy() @@ -48,6 +51,22 @@ class NetDrvEnvBase: env[pair[0]] = pair[1] return ksft_setup(env) + def __del__(self): + pass + + def __enter__(self): + ip(f"link set dev {self.dev['ifname']} up") + wait_file(f"/sys/class/net/{self.dev['ifname']}/carrier", + lambda x: x.strip() == "1") + + return self + + def __exit__(self, ex_type, ex_value, ex_tb): + """ + __exit__ gets called at the end of a "with" block. + """ + self.__del__() + class NetDrvEnv(NetDrvEnvBase): """ @@ -72,17 +91,6 @@ class NetDrvEnv(NetDrvEnvBase): self.ifname = self.dev['ifname'] self.ifindex = self.dev['ifindex'] - def __enter__(self): - ip(f"link set dev {self.dev['ifname']} up") - - return self - - def __exit__(self, ex_type, ex_value, ex_tb): - """ - __exit__ gets called at the end of a "with" block. - """ - self.__del__() - def __del__(self): if self._ns: self._ns.remove() @@ -219,15 +227,6 @@ class NetDrvEpEnv(NetDrvEnvBase): raise Exception("Can't resolve remote interface name, multiple interfaces match") return v6[0]["ifname"] if v6 else v4[0]["ifname"] - def __enter__(self): - return self - - def __exit__(self, ex_type, ex_value, ex_tb): - """ - __exit__ gets called at the end of a "with" block. - """ - self.__del__() - def __del__(self): if self._ns: self._ns.remove() @@ -246,6 +245,10 @@ class NetDrvEpEnv(NetDrvEnvBase): if not self.addr_v[ipver] or not self.remote_addr_v[ipver]: raise KsftSkipEx(f"Test requires IPv{ipver} connectivity") + def require_nsim(self): + if self._ns is None: + raise KsftXfailEx("Test only works on netdevsim") + def _require_cmd(self, comm, key, host=None): cached = self._required_cmd.get(comm, {}) if cached.get(key) is None: diff --git a/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh b/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh index b6071e80ebbb..8e1085e89647 100644 --- a/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh +++ b/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh @@ -148,12 +148,20 @@ function create_dynamic_target() { # Generate the command line argument for netconsole following: # netconsole=[+][src-port]@[src-ip]/[<dev>],[tgt-port]@<tgt-ip>/[tgt-macaddr] function create_cmdline_str() { + local BINDMODE=${1:-"ifname"} + if [ "${BINDMODE}" == "ifname" ] + then + SRCDEV=${SRCIF} + else + SRCDEV=$(mac_get "${SRCIF}") + fi + DSTMAC=$(ip netns exec "${NAMESPACE}" \ ip link show "${DSTIF}" | awk '/ether/ {print $2}') SRCPORT="1514" TGTPORT="6666" - echo "netconsole=\"+${SRCPORT}@${SRCIP}/${SRCIF},${TGTPORT}@${DSTIP}/${DSTMAC}\"" + echo "netconsole=\"+${SRCPORT}@${SRCIP}/${SRCDEV},${TGTPORT}@${DSTIP}/${DSTMAC}\"" } # Do not append the release to the header of the message diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_policer.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_policer.sh index 29a672c2270f..e212ad8ccef6 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_policer.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_policer.sh @@ -44,8 +44,7 @@ source $lib_dir/devlink_lib.sh h1_create() { - simple_if_init $h1 192.0.2.1/24 - defer simple_if_fini $h1 192.0.2.1/24 + adf_simple_if_init $h1 192.0.2.1/24 mtu_set $h1 10000 defer mtu_restore $h1 @@ -56,8 +55,7 @@ h1_create() h2_create() { - simple_if_init $h2 198.51.100.1/24 - defer simple_if_fini $h2 198.51.100.1/24 + adf_simple_if_init $h2 198.51.100.1/24 mtu_set $h2 10000 defer mtu_restore $h2 @@ -106,8 +104,7 @@ setup_prepare() # Reload to ensure devlink-trap settings are back to default. defer devlink_reload - vrf_prepare - defer vrf_cleanup + adf_vrf_prepare h1_create h2_create diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_ets_strict.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_ets_strict.sh index d5b6f2cc9a29..9ca340c5f3a6 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/qos_ets_strict.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/qos_ets_strict.sh @@ -57,8 +57,7 @@ source qos_lib.sh h1_create() { - simple_if_init $h1 - defer simple_if_fini $h1 + adf_simple_if_init $h1 mtu_set $h1 10000 defer mtu_restore $h1 @@ -70,8 +69,7 @@ h1_create() h2_create() { - simple_if_init $h2 - defer simple_if_fini $h2 + adf_simple_if_init $h2 mtu_set $h2 10000 defer mtu_restore $h2 @@ -83,8 +81,7 @@ h2_create() h3_create() { - simple_if_init $h3 - defer simple_if_fini $h3 + adf_simple_if_init $h3 mtu_set $h3 10000 defer mtu_restore $h3 @@ -225,8 +222,7 @@ setup_prepare() h3mac=$(mac_get $h3) - vrf_prepare - defer vrf_cleanup + adf_vrf_prepare h1_create h2_create diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_max_descriptors.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_max_descriptors.sh index 2b5d2c2751d5..a4a25637fe2a 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/qos_max_descriptors.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/qos_max_descriptors.sh @@ -68,8 +68,7 @@ mlxsw_only_on_spectrum 2+ || exit h1_create() { - simple_if_init $h1 - defer simple_if_fini $h1 + adf_simple_if_init $h1 vlan_create $h1 111 v$h1 192.0.2.33/28 defer vlan_destroy $h1 111 @@ -78,8 +77,7 @@ h1_create() h2_create() { - simple_if_init $h2 - defer simple_if_fini $h2 + adf_simple_if_init $h2 vlan_create $h2 111 v$h2 192.0.2.34/28 defer vlan_destroy $h2 111 @@ -178,8 +176,7 @@ setup_prepare() h2mac=$(mac_get $h2) - vrf_prepare - defer vrf_cleanup + adf_vrf_prepare h1_create h2_create diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh index cd4a5c21360c..d8f8ae8533cd 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh @@ -72,8 +72,7 @@ source qos_lib.sh h1_create() { - simple_if_init $h1 192.0.2.65/28 - defer simple_if_fini $h1 192.0.2.65/28 + adf_simple_if_init $h1 192.0.2.65/28 mtu_set $h1 10000 defer mtu_restore $h1 @@ -81,8 +80,7 @@ h1_create() h2_create() { - simple_if_init $h2 - defer simple_if_fini $h2 + adf_simple_if_init $h2 mtu_set $h2 10000 defer mtu_restore $h2 @@ -94,8 +92,7 @@ h2_create() h3_create() { - simple_if_init $h3 192.0.2.66/28 - defer simple_if_fini $h3 192.0.2.66/28 + adf_simple_if_init $h3 192.0.2.66/28 mtu_set $h3 10000 defer mtu_restore $h3 @@ -196,8 +193,7 @@ setup_prepare() h3mac=$(mac_get $h3) - vrf_prepare - defer vrf_cleanup + adf_vrf_prepare h1_create h2_create diff --git a/tools/testing/selftests/drivers/net/mlxsw/sch_red_core.sh b/tools/testing/selftests/drivers/net/mlxsw/sch_red_core.sh index 537d6baa77b7..47d2ffcf366e 100644 --- a/tools/testing/selftests/drivers/net/mlxsw/sch_red_core.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/sch_red_core.sh @@ -100,8 +100,7 @@ host_create() local dev=$1; shift local host=$1; shift - simple_if_init $dev - defer simple_if_fini $dev + adf_simple_if_init $dev mtu_set $dev 10000 defer mtu_restore $dev @@ -250,8 +249,7 @@ setup_prepare() h3_mac=$(mac_get $h3) - vrf_prepare - defer vrf_cleanup + adf_vrf_prepare h1_create h2_create diff --git a/tools/testing/selftests/drivers/net/napi_threaded.py b/tools/testing/selftests/drivers/net/napi_threaded.py index 9699a100a87d..f4be72b2145a 100755 --- a/tools/testing/selftests/drivers/net/napi_threaded.py +++ b/tools/testing/selftests/drivers/net/napi_threaded.py @@ -24,7 +24,8 @@ def _assert_napi_threaded_disabled(nl, napi_id) -> None: def _set_threaded_state(cfg, threaded) -> None: - cmd(f"echo {threaded} > /sys/class/net/{cfg.ifname}/threaded") + with open(f"/sys/class/net/{cfg.ifname}/threaded", "wb") as fp: + fp.write(str(threaded).encode('utf-8')) def _setup_deferred_cleanup(cfg) -> None: @@ -38,6 +39,34 @@ def _setup_deferred_cleanup(cfg) -> None: return combined +def napi_init(cfg, nl) -> None: + """ + Test that threaded state (in the persistent NAPI config) gets updated + even when NAPI with given ID is not allocated at the time. + """ + + qcnt = _setup_deferred_cleanup(cfg) + + _set_threaded_state(cfg, 1) + cmd(f"ethtool -L {cfg.ifname} combined 1") + _set_threaded_state(cfg, 0) + cmd(f"ethtool -L {cfg.ifname} combined {qcnt}") + + napis = nl.napi_get({'ifindex': cfg.ifindex}, dump=True) + for napi in napis: + ksft_eq(napi['threaded'], 'disabled') + ksft_eq(napi.get('pid'), None) + + cmd(f"ethtool -L {cfg.ifname} combined 1") + _set_threaded_state(cfg, 1) + cmd(f"ethtool -L {cfg.ifname} combined {qcnt}") + + napis = nl.napi_get({'ifindex': cfg.ifindex}, dump=True) + for napi in napis: + ksft_eq(napi['threaded'], 'enabled') + ksft_ne(napi.get('pid'), None) + + def enable_dev_threaded_disable_napi_threaded(cfg, nl) -> None: """ Test that when napi threaded is enabled at device level and @@ -103,7 +132,8 @@ def main() -> None: """ Ksft boiler plate main """ with NetDrvEnv(__file__, queue_count=2) as cfg: - ksft_run([change_num_queues, + ksft_run([napi_init, + change_num_queues, enable_dev_threaded_disable_napi_threaded], args=(cfg, NetdevFamily())) ksft_exit() diff --git a/tools/testing/selftests/drivers/net/netcons_cmdline.sh b/tools/testing/selftests/drivers/net/netcons_cmdline.sh index ad2fb8b1c463..d1d23dc67f99 100755 --- a/tools/testing/selftests/drivers/net/netcons_cmdline.sh +++ b/tools/testing/selftests/drivers/net/netcons_cmdline.sh @@ -19,9 +19,6 @@ check_netconsole_module modprobe netdevsim 2> /dev/null || true rmmod netconsole 2> /dev/null || true -# The content of kmsg will be save to the following file -OUTPUT_FILE="/tmp/${TARGET}" - # Check for basic system dependency and exit if not found # check_for_dependencies # Set current loglevel to KERN_INFO(6), and default to KERN_NOTICE(5) @@ -30,23 +27,39 @@ echo "6 5" > /proc/sys/kernel/printk trap do_cleanup EXIT # Create one namespace and two interfaces set_network -# Create the command line for netconsole, with the configuration from the -# function above -CMDLINE="$(create_cmdline_str)" - -# Load the module, with the cmdline set -modprobe netconsole "${CMDLINE}" - -# Listed for netconsole port inside the namespace and destination interface -listen_port_and_save_to "${OUTPUT_FILE}" & -# Wait for socat to start and listen to the port. -wait_local_port_listen "${NAMESPACE}" "${PORT}" udp -# Send the message -echo "${MSG}: ${TARGET}" > /dev/kmsg -# Wait until socat saves the file to disk -busywait "${BUSYWAIT_TIMEOUT}" test -s "${OUTPUT_FILE}" -# Make sure the message was received in the dst part -# and exit -validate_msg "${OUTPUT_FILE}" + +# Run the test twice, with different cmdline parameters +for BINDMODE in "ifname" "mac" +do + echo "Running with bind mode: ${BINDMODE}" >&2 + # Create the command line for netconsole, with the configuration from + # the function above + CMDLINE=$(create_cmdline_str "${BINDMODE}") + + # The content of kmsg will be save to the following file + OUTPUT_FILE="/tmp/${TARGET}-${BINDMODE}" + + # Load the module, with the cmdline set + modprobe netconsole "${CMDLINE}" + + # Listed for netconsole port inside the namespace and destination + # interface + listen_port_and_save_to "${OUTPUT_FILE}" & + # Wait for socat to start and listen to the port. + wait_local_port_listen "${NAMESPACE}" "${PORT}" udp + # Send the message + echo "${MSG}: ${TARGET}" > /dev/kmsg + # Wait until socat saves the file to disk + busywait "${BUSYWAIT_TIMEOUT}" test -s "${OUTPUT_FILE}" + # Make sure the message was received in the dst part + # and exit + validate_msg "${OUTPUT_FILE}" + + # kill socat in case it is still running + pkill_socat + # Unload the module + rmmod netconsole + echo "${BINDMODE} : Test passed" >&2 +done exit "${ksft_pass}" diff --git a/tools/testing/selftests/drivers/net/psp.py b/tools/testing/selftests/drivers/net/psp.py new file mode 100755 index 000000000000..4ae7a785ff10 --- /dev/null +++ b/tools/testing/selftests/drivers/net/psp.py @@ -0,0 +1,627 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 + +"""Test suite for PSP capable drivers.""" + +import errno +import fcntl +import socket +import struct +import termios +import time + +from lib.py import defer +from lib.py import ksft_run, ksft_exit, ksft_pr +from lib.py import ksft_true, ksft_eq, ksft_ne, ksft_gt, ksft_raises +from lib.py import ksft_not_none +from lib.py import KsftSkipEx +from lib.py import NetDrvEpEnv, PSPFamily, NlError +from lib.py import bkg, rand_port, wait_port_listen + + +def _get_outq(s): + one = b'\0' * 4 + outq = fcntl.ioctl(s.fileno(), termios.TIOCOUTQ, one) + return struct.unpack("I", outq)[0] + + +def _send_with_ack(cfg, msg): + cfg.comm_sock.send(msg) + response = cfg.comm_sock.recv(4) + if response != b'ack\0': + raise RuntimeError("Unexpected server response", response) + + +def _remote_read_len(cfg): + cfg.comm_sock.send(b'read len\0') + return int(cfg.comm_sock.recv(1024)[:-1].decode('utf-8')) + + +def _make_clr_conn(cfg, ipver=None): + _send_with_ack(cfg, b'conn clr\0') + remote_addr = cfg.remote_addr_v[ipver] if ipver else cfg.remote_addr + s = socket.create_connection((remote_addr, cfg.comm_port), ) + return s + + +def _make_psp_conn(cfg, version=0, ipver=None): + _send_with_ack(cfg, b'conn psp\0' + struct.pack('BB', version, version)) + remote_addr = cfg.remote_addr_v[ipver] if ipver else cfg.remote_addr + s = socket.create_connection((remote_addr, cfg.comm_port), ) + return s + + +def _close_conn(cfg, s): + _send_with_ack(cfg, b'data close\0') + s.close() + + +def _close_psp_conn(cfg, s): + _close_conn(cfg, s) + + +def _spi_xchg(s, rx): + s.send(struct.pack('I', rx['spi']) + rx['key']) + tx = s.recv(4 + len(rx['key'])) + return { + 'spi': struct.unpack('I', tx[:4])[0], + 'key': tx[4:] + } + + +def _send_careful(cfg, s, rounds): + data = b'0123456789' * 200 + for i in range(rounds): + n = 0 + for _ in range(10): # allow 10 retries + try: + n += s.send(data[n:], socket.MSG_DONTWAIT) + if n == len(data): + break + except BlockingIOError: + time.sleep(0.05) + else: + rlen = _remote_read_len(cfg) + outq = _get_outq(s) + report = f'sent: {i * len(data) + n} remote len: {rlen} outq: {outq}' + raise RuntimeError(report) + + return len(data) * rounds + + +def _check_data_rx(cfg, exp_len): + read_len = -1 + for _ in range(30): + cfg.comm_sock.send(b'read len\0') + read_len = int(cfg.comm_sock.recv(1024)[:-1].decode('utf-8')) + if read_len == exp_len: + break + time.sleep(0.01) + ksft_eq(read_len, exp_len) + + +def _check_data_outq(s, exp_len, force_wait=False): + outq = 0 + for _ in range(10): + outq = _get_outq(s) + if not force_wait and outq == exp_len: + break + time.sleep(0.01) + ksft_eq(outq, exp_len) + +# +# Test case boiler plate +# + +def _init_psp_dev(cfg): + if not hasattr(cfg, 'psp_dev_id'): + # Figure out which local device we are testing against + for dev in cfg.pspnl.dev_get({}, dump=True): + if dev['ifindex'] == cfg.ifindex: + cfg.psp_info = dev + cfg.psp_dev_id = cfg.psp_info['id'] + break + else: + raise KsftSkipEx("No PSP devices found") + + # Enable PSP if necessary + cap = cfg.psp_info['psp-versions-cap'] + ena = cfg.psp_info['psp-versions-ena'] + if cap != ena: + cfg.pspnl.dev_set({'id': cfg.psp_dev_id, 'psp-versions-ena': cap}) + defer(cfg.pspnl.dev_set, {'id': cfg.psp_dev_id, + 'psp-versions-ena': ena }) + +# +# Test cases +# + +def dev_list_devices(cfg): + """ Dump all devices """ + _init_psp_dev(cfg) + + devices = cfg.pspnl.dev_get({}, dump=True) + + found = False + for dev in devices: + found |= dev['id'] == cfg.psp_dev_id + ksft_true(found) + + +def dev_get_device(cfg): + """ Get the device we intend to use """ + _init_psp_dev(cfg) + + dev = cfg.pspnl.dev_get({'id': cfg.psp_dev_id}) + ksft_eq(dev['id'], cfg.psp_dev_id) + + +def dev_get_device_bad(cfg): + """ Test getting device which doesn't exist """ + raised = False + try: + cfg.pspnl.dev_get({'id': 1234567}) + except NlError as e: + ksft_eq(e.nl_msg.error, -errno.ENODEV) + raised = True + ksft_true(raised) + + +def dev_rotate(cfg): + """ Test key rotation """ + _init_psp_dev(cfg) + + rot = cfg.pspnl.key_rotate({"id": cfg.psp_dev_id}) + ksft_eq(rot['id'], cfg.psp_dev_id) + rot = cfg.pspnl.key_rotate({"id": cfg.psp_dev_id}) + ksft_eq(rot['id'], cfg.psp_dev_id) + + +def dev_rotate_spi(cfg): + """ Test key rotation and SPI check """ + _init_psp_dev(cfg) + + top_a = top_b = 0 + with socket.socket(socket.AF_INET6, socket.SOCK_STREAM) as s: + assoc_a = cfg.pspnl.rx_assoc({"version": 0, + "dev-id": cfg.psp_dev_id, + "sock-fd": s.fileno()}) + top_a = assoc_a['rx-key']['spi'] >> 31 + s.close() + rot = cfg.pspnl.key_rotate({"id": cfg.psp_dev_id}) + with socket.socket(socket.AF_INET6, socket.SOCK_STREAM) as s: + ksft_eq(rot['id'], cfg.psp_dev_id) + assoc_b = cfg.pspnl.rx_assoc({"version": 0, + "dev-id": cfg.psp_dev_id, + "sock-fd": s.fileno()}) + top_b = assoc_b['rx-key']['spi'] >> 31 + s.close() + ksft_ne(top_a, top_b) + + +def assoc_basic(cfg): + """ Test creating associations """ + _init_psp_dev(cfg) + + with socket.socket(socket.AF_INET6, socket.SOCK_STREAM) as s: + assoc = cfg.pspnl.rx_assoc({"version": 0, + "dev-id": cfg.psp_dev_id, + "sock-fd": s.fileno()}) + ksft_eq(assoc['dev-id'], cfg.psp_dev_id) + ksft_gt(assoc['rx-key']['spi'], 0) + ksft_eq(len(assoc['rx-key']['key']), 16) + + assoc = cfg.pspnl.tx_assoc({"dev-id": cfg.psp_dev_id, + "version": 0, + "tx-key": assoc['rx-key'], + "sock-fd": s.fileno()}) + ksft_eq(len(assoc), 0) + s.close() + + +def assoc_bad_dev(cfg): + """ Test creating associations with bad device ID """ + _init_psp_dev(cfg) + + with socket.socket(socket.AF_INET6, socket.SOCK_STREAM) as s: + with ksft_raises(NlError) as cm: + cfg.pspnl.rx_assoc({"version": 0, + "dev-id": cfg.psp_dev_id + 1234567, + "sock-fd": s.fileno()}) + ksft_eq(cm.exception.nl_msg.error, -errno.ENODEV) + + +def assoc_sk_only_conn(cfg): + """ Test creating associations based on socket """ + _init_psp_dev(cfg) + + with _make_clr_conn(cfg) as s: + assoc = cfg.pspnl.rx_assoc({"version": 0, + "sock-fd": s.fileno()}) + ksft_eq(assoc['dev-id'], cfg.psp_dev_id) + cfg.pspnl.tx_assoc({"version": 0, + "tx-key": assoc['rx-key'], + "sock-fd": s.fileno()}) + _close_conn(cfg, s) + + +def assoc_sk_only_mismatch(cfg): + """ Test creating associations based on socket (dev mismatch) """ + _init_psp_dev(cfg) + + with _make_clr_conn(cfg) as s: + with ksft_raises(NlError) as cm: + cfg.pspnl.rx_assoc({"version": 0, + "dev-id": cfg.psp_dev_id + 1234567, + "sock-fd": s.fileno()}) + the_exception = cm.exception + ksft_eq(the_exception.nl_msg.extack['bad-attr'], ".dev-id") + ksft_eq(the_exception.nl_msg.error, -errno.EINVAL) + + +def assoc_sk_only_mismatch_tx(cfg): + """ Test creating associations based on socket (dev mismatch) """ + _init_psp_dev(cfg) + + with _make_clr_conn(cfg) as s: + with ksft_raises(NlError) as cm: + assoc = cfg.pspnl.rx_assoc({"version": 0, + "sock-fd": s.fileno()}) + cfg.pspnl.tx_assoc({"version": 0, + "tx-key": assoc['rx-key'], + "dev-id": cfg.psp_dev_id + 1234567, + "sock-fd": s.fileno()}) + the_exception = cm.exception + ksft_eq(the_exception.nl_msg.extack['bad-attr'], ".dev-id") + ksft_eq(the_exception.nl_msg.error, -errno.EINVAL) + + +def assoc_sk_only_unconn(cfg): + """ Test creating associations based on socket (unconnected, should fail) """ + _init_psp_dev(cfg) + + with socket.socket(socket.AF_INET6, socket.SOCK_STREAM) as s: + with ksft_raises(NlError) as cm: + cfg.pspnl.rx_assoc({"version": 0, + "sock-fd": s.fileno()}) + the_exception = cm.exception + ksft_eq(the_exception.nl_msg.extack['miss-type'], "dev-id") + ksft_eq(the_exception.nl_msg.error, -errno.EINVAL) + + +def assoc_version_mismatch(cfg): + """ Test creating associations where Rx and Tx PSP versions do not match """ + _init_psp_dev(cfg) + + versions = list(cfg.psp_info['psp-versions-cap']) + if len(versions) < 2: + raise KsftSkipEx("Not enough PSP versions supported by the device for the test") + + # Translate versions to integers + versions = [cfg.pspnl.consts["version"].entries[v].value for v in versions] + + with socket.socket(socket.AF_INET6, socket.SOCK_STREAM) as s: + rx = cfg.pspnl.rx_assoc({"version": versions[0], + "dev-id": cfg.psp_dev_id, + "sock-fd": s.fileno()}) + + for version in versions[1:]: + with ksft_raises(NlError) as cm: + cfg.pspnl.tx_assoc({"dev-id": cfg.psp_dev_id, + "version": version, + "tx-key": rx['rx-key'], + "sock-fd": s.fileno()}) + the_exception = cm.exception + ksft_eq(the_exception.nl_msg.error, -errno.EINVAL) + + +def assoc_twice(cfg): + """ Test reusing Tx assoc for two sockets """ + _init_psp_dev(cfg) + + def rx_assoc_check(s): + assoc = cfg.pspnl.rx_assoc({"version": 0, + "dev-id": cfg.psp_dev_id, + "sock-fd": s.fileno()}) + ksft_eq(assoc['dev-id'], cfg.psp_dev_id) + ksft_gt(assoc['rx-key']['spi'], 0) + ksft_eq(len(assoc['rx-key']['key']), 16) + + return assoc + + with socket.socket(socket.AF_INET6, socket.SOCK_STREAM) as s: + assoc = rx_assoc_check(s) + tx = cfg.pspnl.tx_assoc({"dev-id": cfg.psp_dev_id, + "version": 0, + "tx-key": assoc['rx-key'], + "sock-fd": s.fileno()}) + ksft_eq(len(tx), 0) + + # Use the same Tx assoc second time + with socket.socket(socket.AF_INET6, socket.SOCK_STREAM) as s2: + rx_assoc_check(s2) + tx = cfg.pspnl.tx_assoc({"dev-id": cfg.psp_dev_id, + "version": 0, + "tx-key": assoc['rx-key'], + "sock-fd": s2.fileno()}) + ksft_eq(len(tx), 0) + + s.close() + + +def _data_basic_send(cfg, version, ipver): + """ Test basic data send """ + _init_psp_dev(cfg) + + # Version 0 is required by spec, don't let it skip + if version: + name = cfg.pspnl.consts["version"].entries_by_val[version].name + if name not in cfg.psp_info['psp-versions-cap']: + with socket.socket(socket.AF_INET6, socket.SOCK_STREAM) as s: + with ksft_raises(NlError) as cm: + cfg.pspnl.rx_assoc({"version": version, + "dev-id": cfg.psp_dev_id, + "sock-fd": s.fileno()}) + ksft_eq(cm.exception.nl_msg.error, -errno.EOPNOTSUPP) + raise KsftSkipEx("PSP version not supported", name) + + s = _make_psp_conn(cfg, version, ipver) + + rx_assoc = cfg.pspnl.rx_assoc({"version": version, + "dev-id": cfg.psp_dev_id, + "sock-fd": s.fileno()}) + rx = rx_assoc['rx-key'] + tx = _spi_xchg(s, rx) + + cfg.pspnl.tx_assoc({"dev-id": cfg.psp_dev_id, + "version": version, + "tx-key": tx, + "sock-fd": s.fileno()}) + + data_len = _send_careful(cfg, s, 100) + _check_data_rx(cfg, data_len) + _close_psp_conn(cfg, s) + + +def __bad_xfer_do(cfg, s, tx, version='hdr0-aes-gcm-128'): + # Make sure we accept the ACK for the SPI before we seal with the bad assoc + _check_data_outq(s, 0) + + cfg.pspnl.tx_assoc({"dev-id": cfg.psp_dev_id, + "version": version, + "tx-key": tx, + "sock-fd": s.fileno()}) + + data_len = _send_careful(cfg, s, 20) + _check_data_outq(s, data_len, force_wait=True) + _check_data_rx(cfg, 0) + _close_psp_conn(cfg, s) + + +def data_send_bad_key(cfg): + """ Test send data with bad key """ + _init_psp_dev(cfg) + + s = _make_psp_conn(cfg) + + rx_assoc = cfg.pspnl.rx_assoc({"version": 0, + "dev-id": cfg.psp_dev_id, + "sock-fd": s.fileno()}) + rx = rx_assoc['rx-key'] + tx = _spi_xchg(s, rx) + tx['key'] = (tx['key'][0] ^ 0xff).to_bytes(1, 'little') + tx['key'][1:] + __bad_xfer_do(cfg, s, tx) + + +def data_send_disconnect(cfg): + """ Test socket close after sending data """ + _init_psp_dev(cfg) + + with _make_psp_conn(cfg) as s: + assoc = cfg.pspnl.rx_assoc({"version": 0, + "sock-fd": s.fileno()}) + tx = _spi_xchg(s, assoc['rx-key']) + cfg.pspnl.tx_assoc({"version": 0, + "tx-key": tx, + "sock-fd": s.fileno()}) + + data_len = _send_careful(cfg, s, 100) + _check_data_rx(cfg, data_len) + + s.shutdown(socket.SHUT_RDWR) + s.close() + + +def _data_mss_adjust(cfg, ipver): + _init_psp_dev(cfg) + + # First figure out what the MSS would be without any adjustments + s = _make_clr_conn(cfg, ipver) + s.send(b"0123456789abcdef" * 1024) + _check_data_rx(cfg, 16 * 1024) + mss = s.getsockopt(socket.IPPROTO_TCP, socket.TCP_MAXSEG) + _close_conn(cfg, s) + + s = _make_psp_conn(cfg, 0, ipver) + try: + rx_assoc = cfg.pspnl.rx_assoc({"version": 0, + "dev-id": cfg.psp_dev_id, + "sock-fd": s.fileno()}) + rx = rx_assoc['rx-key'] + tx = _spi_xchg(s, rx) + + rxmss = s.getsockopt(socket.IPPROTO_TCP, socket.TCP_MAXSEG) + ksft_eq(mss, rxmss) + + cfg.pspnl.tx_assoc({"dev-id": cfg.psp_dev_id, + "version": 0, + "tx-key": tx, + "sock-fd": s.fileno()}) + + txmss = s.getsockopt(socket.IPPROTO_TCP, socket.TCP_MAXSEG) + ksft_eq(mss, txmss + 40) + + data_len = _send_careful(cfg, s, 100) + _check_data_rx(cfg, data_len) + _check_data_outq(s, 0) + + txmss = s.getsockopt(socket.IPPROTO_TCP, socket.TCP_MAXSEG) + ksft_eq(mss, txmss + 40) + finally: + _close_psp_conn(cfg, s) + + +def data_stale_key(cfg): + """ Test send on a double-rotated key """ + _init_psp_dev(cfg) + + s = _make_psp_conn(cfg) + try: + rx_assoc = cfg.pspnl.rx_assoc({"version": 0, + "dev-id": cfg.psp_dev_id, + "sock-fd": s.fileno()}) + rx = rx_assoc['rx-key'] + tx = _spi_xchg(s, rx) + + cfg.pspnl.tx_assoc({"dev-id": cfg.psp_dev_id, + "version": 0, + "tx-key": tx, + "sock-fd": s.fileno()}) + + data_len = _send_careful(cfg, s, 100) + _check_data_rx(cfg, data_len) + _check_data_outq(s, 0) + + cfg.pspnl.key_rotate({"id": cfg.psp_dev_id}) + cfg.pspnl.key_rotate({"id": cfg.psp_dev_id}) + + s.send(b'0123456789' * 200) + _check_data_outq(s, 2000, force_wait=True) + finally: + _close_psp_conn(cfg, s) + + +def __nsim_psp_rereg(cfg): + # The PSP dev ID will change, remember what was there before + before = set([x['id'] for x in cfg.pspnl.dev_get({}, dump=True)]) + + cfg._ns.nsims[0].dfs_write('psp_rereg', '1') + + after = set([x['id'] for x in cfg.pspnl.dev_get({}, dump=True)]) + + new_devs = list(after - before) + ksft_eq(len(new_devs), 1) + cfg.psp_dev_id = list(after - before)[0] + + +def removal_device_rx(cfg): + """ Test removing a netdev / PSD with active Rx assoc """ + + # We could technically devlink reload real devices, too + # but that kills the control socket. So test this on + # netdevsim only for now + cfg.require_nsim() + + s = _make_clr_conn(cfg) + try: + rx_assoc = cfg.pspnl.rx_assoc({"version": 0, + "dev-id": cfg.psp_dev_id, + "sock-fd": s.fileno()}) + ksft_not_none(rx_assoc) + + __nsim_psp_rereg(cfg) + finally: + _close_conn(cfg, s) + + +def removal_device_bi(cfg): + """ Test removing a netdev / PSD with active Rx/Tx assoc """ + + # We could technically devlink reload real devices, too + # but that kills the control socket. So test this on + # netdevsim only for now + cfg.require_nsim() + + s = _make_clr_conn(cfg) + try: + rx_assoc = cfg.pspnl.rx_assoc({"version": 0, + "dev-id": cfg.psp_dev_id, + "sock-fd": s.fileno()}) + cfg.pspnl.tx_assoc({"dev-id": cfg.psp_dev_id, + "version": 0, + "tx-key": rx_assoc['rx-key'], + "sock-fd": s.fileno()}) + __nsim_psp_rereg(cfg) + finally: + _close_conn(cfg, s) + + +def psp_ip_ver_test_builder(name, test_func, psp_ver, ipver): + """Build test cases for each combo of PSP version and IP version""" + def test_case(cfg): + cfg.require_ipver(ipver) + test_case.__name__ = f"{name}_v{psp_ver}_ip{ipver}" + test_func(cfg, psp_ver, ipver) + return test_case + + +def ipver_test_builder(name, test_func, ipver): + """Build test cases for each IP version""" + def test_case(cfg): + cfg.require_ipver(ipver) + test_case.__name__ = f"{name}_ip{ipver}" + test_func(cfg, ipver) + return test_case + + +def main() -> None: + """ Ksft boiler plate main """ + + with NetDrvEpEnv(__file__) as cfg: + cfg.pspnl = PSPFamily() + + # Set up responder and communication sock + responder = cfg.remote.deploy("psp_responder") + + cfg.comm_port = rand_port() + srv = None + try: + with bkg(responder + f" -p {cfg.comm_port}", host=cfg.remote, + exit_wait=True) as srv: + wait_port_listen(cfg.comm_port, host=cfg.remote) + + cfg.comm_sock = socket.create_connection((cfg.remote_addr, + cfg.comm_port), + timeout=1) + + cases = [ + psp_ip_ver_test_builder( + "data_basic_send", _data_basic_send, version, ipver + ) + for version in range(0, 4) + for ipver in ("4", "6") + ] + cases += [ + ipver_test_builder("data_mss_adjust", _data_mss_adjust, ipver) + for ipver in ("4", "6") + ] + + ksft_run(cases=cases, globs=globals(), + case_pfx={"dev_", "data_", "assoc_", "removal_"}, + args=(cfg, )) + + cfg.comm_sock.send(b"exit\0") + cfg.comm_sock.close() + finally: + if srv and (srv.stdout or srv.stderr): + ksft_pr("") + ksft_pr(f"Responder logs ({srv.ret}):") + if srv and srv.stdout: + ksft_pr("STDOUT:\n# " + srv.stdout.strip().replace("\n", "\n# ")) + if srv and srv.stderr: + ksft_pr("STDERR:\n# " + srv.stderr.strip().replace("\n", "\n# ")) + ksft_exit() + + +if __name__ == "__main__": + main() diff --git a/tools/testing/selftests/drivers/net/psp_responder.c b/tools/testing/selftests/drivers/net/psp_responder.c new file mode 100644 index 000000000000..f309e0d73cbf --- /dev/null +++ b/tools/testing/selftests/drivers/net/psp_responder.c @@ -0,0 +1,483 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <stdio.h> +#include <string.h> +#include <sys/poll.h> +#include <sys/socket.h> +#include <sys/time.h> +#include <netinet/in.h> +#include <unistd.h> + +#include <ynl.h> + +#include "psp-user.h" + +#define dbg(msg...) \ +do { \ + if (opts->verbose) \ + fprintf(stderr, "DEBUG: " msg); \ +} while (0) + +static bool should_quit; + +struct opts { + int port; + int devid; + bool verbose; +}; + +enum accept_cfg { + ACCEPT_CFG_NONE = 0, + ACCEPT_CFG_CLEAR, + ACCEPT_CFG_PSP, +}; + +static struct { + unsigned char tx; + unsigned char rx; +} psp_vers; + +static int conn_setup_psp(struct ynl_sock *ys, struct opts *opts, int data_sock) +{ + struct psp_rx_assoc_rsp *rsp; + struct psp_rx_assoc_req *req; + struct psp_tx_assoc_rsp *tsp; + struct psp_tx_assoc_req *teq; + char info[300]; + int key_len; + ssize_t sz; + __u32 spi; + + dbg("create PSP connection\n"); + + // Rx assoc alloc + req = psp_rx_assoc_req_alloc(); + + psp_rx_assoc_req_set_sock_fd(req, data_sock); + psp_rx_assoc_req_set_version(req, psp_vers.rx); + + rsp = psp_rx_assoc(ys, req); + psp_rx_assoc_req_free(req); + + if (!rsp) { + perror("ERROR: failed to Rx assoc"); + return -1; + } + + // SPI exchange + key_len = rsp->rx_key._len.key; + memcpy(info, &rsp->rx_key.spi, sizeof(spi)); + memcpy(&info[sizeof(spi)], rsp->rx_key.key, key_len); + sz = sizeof(spi) + key_len; + + send(data_sock, info, sz, MSG_WAITALL); + psp_rx_assoc_rsp_free(rsp); + + sz = recv(data_sock, info, sz, MSG_WAITALL); + if (sz < 0) { + perror("ERROR: failed to read PSP key from sock"); + return -1; + } + memcpy(&spi, info, sizeof(spi)); + + // Setup Tx assoc + teq = psp_tx_assoc_req_alloc(); + + psp_tx_assoc_req_set_sock_fd(teq, data_sock); + psp_tx_assoc_req_set_version(teq, psp_vers.tx); + psp_tx_assoc_req_set_tx_key_spi(teq, spi); + psp_tx_assoc_req_set_tx_key_key(teq, &info[sizeof(spi)], key_len); + + tsp = psp_tx_assoc(ys, teq); + psp_tx_assoc_req_free(teq); + if (!tsp) { + perror("ERROR: failed to Tx assoc"); + return -1; + } + psp_tx_assoc_rsp_free(tsp); + + return 0; +} + +static void send_ack(int sock) +{ + send(sock, "ack", 4, MSG_WAITALL); +} + +static void send_err(int sock) +{ + send(sock, "err", 4, MSG_WAITALL); +} + +static void send_str(int sock, int value) +{ + char buf[128]; + int ret; + + ret = snprintf(buf, sizeof(buf), "%d", value); + send(sock, buf, ret + 1, MSG_WAITALL); +} + +static void +run_session(struct ynl_sock *ys, struct opts *opts, + int server_sock, int comm_sock) +{ + enum accept_cfg accept_cfg = ACCEPT_CFG_NONE; + struct pollfd pfds[3]; + size_t data_read = 0; + int data_sock = -1; + + while (true) { + bool race_close = false; + int nfds; + + memset(pfds, 0, sizeof(pfds)); + + pfds[0].fd = server_sock; + pfds[0].events = POLLIN; + + pfds[1].fd = comm_sock; + pfds[1].events = POLLIN; + + nfds = 2; + if (data_sock >= 0) { + pfds[2].fd = data_sock; + pfds[2].events = POLLIN; + nfds++; + } + + dbg(" ...\n"); + if (poll(pfds, nfds, -1) < 0) { + perror("poll"); + break; + } + + /* data sock */ + if (pfds[2].revents & POLLIN) { + char buf[8192]; + ssize_t n; + + n = recv(data_sock, buf, sizeof(buf), 0); + if (n <= 0) { + if (n < 0) + perror("data read"); + close(data_sock); + data_sock = -1; + dbg("data sock closed\n"); + } else { + data_read += n; + dbg("data read %zd\n", data_read); + } + } + + /* comm sock */ + if (pfds[1].revents & POLLIN) { + static char buf[4096]; + static ssize_t off; + bool consumed; + ssize_t n; + + n = recv(comm_sock, &buf[off], sizeof(buf) - off, 0); + if (n <= 0) { + if (n < 0) + perror("comm read"); + return; + } + + off += n; + n = off; + +#define __consume(sz) \ + ({ \ + if (n == (sz)) { \ + off = 0; \ + } else { \ + off -= (sz); \ + memmove(buf, &buf[(sz)], off); \ + } \ + }) + +#define cmd(_name) \ + ({ \ + ssize_t sz = sizeof(_name); \ + bool match = n >= sz && !memcmp(buf, _name, sz); \ + \ + if (match) { \ + dbg("command: " _name "\n"); \ + __consume(sz); \ + } \ + consumed |= match; \ + match; \ + }) + + do { + consumed = false; + + if (cmd("read len")) + send_str(comm_sock, data_read); + + if (cmd("data echo")) { + if (data_sock >= 0) + send(data_sock, "echo", 5, + MSG_WAITALL); + else + fprintf(stderr, "WARN: echo but no data sock\n"); + send_ack(comm_sock); + } + if (cmd("data close")) { + if (data_sock >= 0) { + close(data_sock); + data_sock = -1; + send_ack(comm_sock); + } else { + race_close = true; + } + } + if (cmd("conn psp")) { + if (accept_cfg != ACCEPT_CFG_NONE) + fprintf(stderr, "WARN: old conn config still set!\n"); + accept_cfg = ACCEPT_CFG_PSP; + send_ack(comm_sock); + /* next two bytes are versions */ + if (off >= 2) { + memcpy(&psp_vers, buf, 2); + __consume(2); + } else { + fprintf(stderr, "WARN: short conn psp command!\n"); + } + } + if (cmd("conn clr")) { + if (accept_cfg != ACCEPT_CFG_NONE) + fprintf(stderr, "WARN: old conn config still set!\n"); + accept_cfg = ACCEPT_CFG_CLEAR; + send_ack(comm_sock); + } + if (cmd("exit")) + should_quit = true; +#undef cmd + + if (!consumed) { + fprintf(stderr, "WARN: unknown cmd: [%zd] %s\n", + off, buf); + } + } while (consumed && off); + } + + /* server sock */ + if (pfds[0].revents & POLLIN) { + if (data_sock >= 0) { + fprintf(stderr, "WARN: new data sock but old one still here\n"); + close(data_sock); + data_sock = -1; + } + data_sock = accept(server_sock, NULL, NULL); + if (data_sock < 0) { + perror("accept"); + continue; + } + data_read = 0; + + if (accept_cfg == ACCEPT_CFG_CLEAR) { + dbg("new data sock: clear\n"); + /* nothing to do */ + } else if (accept_cfg == ACCEPT_CFG_PSP) { + dbg("new data sock: psp\n"); + conn_setup_psp(ys, opts, data_sock); + } else { + fprintf(stderr, "WARN: new data sock but no config\n"); + } + accept_cfg = ACCEPT_CFG_NONE; + } + + if (race_close) { + if (data_sock >= 0) { + /* indeed, ordering problem, handle the close */ + close(data_sock); + data_sock = -1; + send_ack(comm_sock); + } else { + fprintf(stderr, "WARN: close but no data sock\n"); + send_err(comm_sock); + } + } + } + dbg("session ending\n"); +} + +static int spawn_server(struct opts *opts) +{ + struct sockaddr_in6 addr; + int fd; + + fd = socket(AF_INET6, SOCK_STREAM, 0); + if (fd < 0) { + perror("can't open socket"); + return -1; + } + + memset(&addr, 0, sizeof(addr)); + + addr.sin6_family = AF_INET6; + addr.sin6_addr = in6addr_any; + addr.sin6_port = htons(opts->port); + + if (bind(fd, (struct sockaddr *)&addr, sizeof(addr))) { + perror("can't bind socket"); + return -1; + } + + if (listen(fd, 5)) { + perror("can't listen"); + return -1; + } + + return fd; +} + +static int run_responder(struct ynl_sock *ys, struct opts *opts) +{ + int server_sock, comm; + + server_sock = spawn_server(opts); + if (server_sock < 0) + return 4; + + while (!should_quit) { + comm = accept(server_sock, NULL, NULL); + if (comm < 0) { + perror("accept failed"); + } else { + run_session(ys, opts, server_sock, comm); + close(comm); + } + } + + return 0; +} + +static void usage(const char *name, const char *miss) +{ + if (miss) + fprintf(stderr, "Missing argument: %s\n", miss); + + fprintf(stderr, "Usage: %s -p port [-v] [-d psp-dev-id]\n", name); + exit(EXIT_FAILURE); +} + +static void parse_cmd_opts(int argc, char **argv, struct opts *opts) +{ + int opt; + + while ((opt = getopt(argc, argv, "vp:d:")) != -1) { + switch (opt) { + case 'v': + opts->verbose = 1; + break; + case 'p': + opts->port = atoi(optarg); + break; + case 'd': + opts->devid = atoi(optarg); + break; + default: + usage(argv[0], NULL); + } + } +} + +static int psp_dev_set_ena(struct ynl_sock *ys, __u32 dev_id, __u32 versions) +{ + struct psp_dev_set_req *sreq; + struct psp_dev_set_rsp *srsp; + + fprintf(stderr, "Set PSP enable on device %d to 0x%x\n", + dev_id, versions); + + sreq = psp_dev_set_req_alloc(); + + psp_dev_set_req_set_id(sreq, dev_id); + psp_dev_set_req_set_psp_versions_ena(sreq, versions); + + srsp = psp_dev_set(ys, sreq); + psp_dev_set_req_free(sreq); + if (!srsp) + return 10; + + psp_dev_set_rsp_free(srsp); + return 0; +} + +int main(int argc, char **argv) +{ + struct psp_dev_get_list *dev_list; + bool devid_found = false; + __u32 ver_ena, ver_cap; + struct opts opts = {}; + struct ynl_error yerr; + struct ynl_sock *ys; + int first_id = 0; + int ret; + + parse_cmd_opts(argc, argv, &opts); + if (!opts.port) + usage(argv[0], "port"); // exits + + ys = ynl_sock_create(&ynl_psp_family, &yerr); + if (!ys) { + fprintf(stderr, "YNL: %s\n", yerr.msg); + return 1; + } + + dev_list = psp_dev_get_dump(ys); + if (ynl_dump_empty(dev_list)) { + if (ys->err.code) + goto err_close; + fprintf(stderr, "No PSP devices\n"); + goto err_close_silent; + } + + ynl_dump_foreach(dev_list, d) { + if (opts.devid) { + devid_found = true; + ver_ena = d->psp_versions_ena; + ver_cap = d->psp_versions_cap; + } else if (!first_id) { + first_id = d->id; + ver_ena = d->psp_versions_ena; + ver_cap = d->psp_versions_cap; + } else { + fprintf(stderr, "Multiple PSP devices found\n"); + goto err_close_silent; + } + } + psp_dev_get_list_free(dev_list); + + if (opts.devid && !devid_found) { + fprintf(stderr, "PSP device %d requested on cmdline, not found\n", + opts.devid); + goto err_close_silent; + } else if (!opts.devid) { + opts.devid = first_id; + } + + if (ver_ena != ver_cap) { + ret = psp_dev_set_ena(ys, opts.devid, ver_cap); + if (ret) + goto err_close; + } + + ret = run_responder(ys, &opts); + + if (ver_ena != ver_cap && psp_dev_set_ena(ys, opts.devid, ver_ena)) + fprintf(stderr, "WARN: failed to set the PSP versions back\n"); + + ynl_sock_destroy(ys); + + return ret; + +err_close: + fprintf(stderr, "YNL: %s\n", ys->err.msg); +err_close_silent: + ynl_sock_destroy(ys); + return 2; +} diff --git a/tools/testing/selftests/drivers/net/stats.py b/tools/testing/selftests/drivers/net/stats.py index c2bb5d3f1ca1..04d0a2a13e73 100755 --- a/tools/testing/selftests/drivers/net/stats.py +++ b/tools/testing/selftests/drivers/net/stats.py @@ -57,6 +57,36 @@ def check_fec(cfg) -> None: ksft_true(data['stats'], "driver does not report stats") +def check_fec_hist(cfg) -> None: + """ + Check that drivers which support FEC histogram statistics report + reasonable values. + """ + + try: + data = ethnl.fec_get({"header": {"dev-index": cfg.ifindex, + "flags": {'stats'}}}) + except NlError as e: + if e.error == errno.EOPNOTSUPP: + raise KsftSkipEx("FEC not supported by the device") from e + raise + if 'stats' not in data: + raise KsftSkipEx("FEC stats not supported by the device") + if 'hist' not in data['stats']: + raise KsftSkipEx("FEC histogram not supported by the device") + + hist = data['stats']['hist'] + for fec_bin in hist: + for key in ['bin-low', 'bin-high', 'bin-val']: + ksft_in(key, fec_bin, + "Drivers should always report FEC bin range and value") + ksft_ge(fec_bin['bin-high'], fec_bin['bin-low'], + "FEC bin range should be valid") + if 'bin-val-per-lane' in fec_bin: + ksft_eq(sum(fec_bin['bin-val-per-lane']), fec_bin['bin-val'], + "FEC bin value should be equal to sum of per-plane values") + + def pkt_byte_sum(cfg) -> None: """ Check that qstat and interface stats match in value. @@ -279,8 +309,9 @@ def main() -> None: """ Ksft boiler plate main """ with NetDrvEnv(__file__, queue_count=100) as cfg: - ksft_run([check_pause, check_fec, pkt_byte_sum, qstat_by_ifindex, - check_down, procfs_hammer, procfs_downup_hammer], + ksft_run([check_pause, check_fec, check_fec_hist, pkt_byte_sum, + qstat_by_ifindex, check_down, procfs_hammer, + procfs_downup_hammer], args=(cfg, )) ksft_exit() diff --git a/tools/testing/selftests/drivers/net/team/Makefile b/tools/testing/selftests/drivers/net/team/Makefile index eaf6938f100e..89d854c7e674 100644 --- a/tools/testing/selftests/drivers/net/team/Makefile +++ b/tools/testing/selftests/drivers/net/team/Makefile @@ -1,11 +1,13 @@ # SPDX-License-Identifier: GPL-2.0 # Makefile for net selftests -TEST_PROGS := dev_addr_lists.sh propagation.sh +TEST_PROGS := dev_addr_lists.sh propagation.sh options.sh TEST_INCLUDES := \ ../bonding/lag_lib.sh \ ../../../net/forwarding/lib.sh \ - ../../../net/lib.sh + ../../../net/lib.sh \ + ../../../net/in_netns.sh \ + ../../../net/lib/sh/defer.sh include ../../../lib.mk diff --git a/tools/testing/selftests/drivers/net/team/config b/tools/testing/selftests/drivers/net/team/config index 636b3525b679..558e1d0cf565 100644 --- a/tools/testing/selftests/drivers/net/team/config +++ b/tools/testing/selftests/drivers/net/team/config @@ -3,4 +3,5 @@ CONFIG_IPV6=y CONFIG_MACVLAN=y CONFIG_NETDEVSIM=m CONFIG_NET_TEAM=y +CONFIG_NET_TEAM_MODE_ACTIVEBACKUP=y CONFIG_NET_TEAM_MODE_LOADBALANCE=y diff --git a/tools/testing/selftests/drivers/net/team/options.sh b/tools/testing/selftests/drivers/net/team/options.sh new file mode 100755 index 000000000000..44888f32b513 --- /dev/null +++ b/tools/testing/selftests/drivers/net/team/options.sh @@ -0,0 +1,188 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# These tests verify basic set and get functionality of the team +# driver options over netlink. + +# Run in private netns. +test_dir="$(dirname "$0")" +if [[ $# -eq 0 ]]; then + "${test_dir}"/../../../net/in_netns.sh "$0" __subprocess + exit $? +fi + +ALL_TESTS=" + team_test_options +" + +source "${test_dir}/../../../net/lib.sh" + +TEAM_PORT="team0" +MEMBER_PORT="dummy0" + +setup() +{ + ip link add name "${MEMBER_PORT}" type dummy + ip link add name "${TEAM_PORT}" type team +} + +get_and_check_value() +{ + local option_name="$1" + local expected_value="$2" + local port_flag="$3" + + local value_from_get + + if ! value_from_get=$(teamnl "${TEAM_PORT}" getoption "${option_name}" \ + "${port_flag}"); then + echo "Could not get option '${option_name}'" >&2 + return 1 + fi + + if [[ "${value_from_get}" != "${expected_value}" ]]; then + echo "Incorrect value for option '${option_name}'" >&2 + echo "get (${value_from_get}) != set (${expected_value})" >&2 + return 1 + fi +} + +set_and_check_get() +{ + local option_name="$1" + local option_value="$2" + local port_flag="$3" + + local value_from_get + + if ! teamnl "${TEAM_PORT}" setoption "${option_name}" \ + "${option_value}" "${port_flag}"; then + echo "'setoption ${option_name} ${option_value}' failed" >&2 + return 1 + fi + + get_and_check_value "${option_name}" "${option_value}" "${port_flag}" + return $? +} + +# Get a "port flag" to pass to the `teamnl` command. +# E.g. $1="dummy0" -> "port=dummy0", +# $1="" -> "" +get_port_flag() +{ + local port_name="$1" + + if [[ -n "${port_name}" ]]; then + echo "--port=${port_name}" + fi +} + +attach_port_if_specified() +{ + local port_name="$1" + + if [[ -n "${port_name}" ]]; then + ip link set dev "${port_name}" master "${TEAM_PORT}" + return $? + fi +} + +detach_port_if_specified() +{ + local port_name="$1" + + if [[ -n "${port_name}" ]]; then + ip link set dev "${port_name}" nomaster + return $? + fi +} + +# Test that an option's get value matches its set value. +# Globals: +# RET - Used by testing infra like `check_err`. +# EXIT_STATUS - Used by `log_test` for whole script exit value. +# Arguments: +# option_name - The name of the option. +# value_1 - The first value to try setting. +# value_2 - The second value to try setting. +# port_name - The (optional) name of the attached port. +team_test_option() +{ + local option_name="$1" + local value_1="$2" + local value_2="$3" + local possible_values="$2 $3 $2" + local port_name="$4" + local port_flag + + RET=0 + + echo "Setting '${option_name}' to '${value_1}' and '${value_2}'" + + attach_port_if_specified "${port_name}" + check_err $? "Couldn't attach ${port_name} to master" + port_flag=$(get_port_flag "${port_name}") + + # Set and get both possible values. + for value in ${possible_values}; do + set_and_check_get "${option_name}" "${value}" "${port_flag}" + check_err $? "Failed to set '${option_name}' to '${value}'" + done + + detach_port_if_specified "${port_name}" + check_err $? "Couldn't detach ${port_name} from its master" + + log_test "Set + Get '${option_name}' test" +} + +# Test that getting a non-existant option fails. +# Globals: +# RET - Used by testing infra like `check_err`. +# EXIT_STATUS - Used by `log_test` for whole script exit value. +# Arguments: +# option_name - The name of the option. +# port_name - The (optional) name of the attached port. +team_test_get_option_fails() +{ + local option_name="$1" + local port_name="$2" + local port_flag + + RET=0 + + attach_port_if_specified "${port_name}" + check_err $? "Couldn't attach ${port_name} to master" + port_flag=$(get_port_flag "${port_name}") + + # Just confirm that getting the value fails. + teamnl "${TEAM_PORT}" getoption "${option_name}" "${port_flag}" + check_fail $? "Shouldn't be able to get option '${option_name}'" + + detach_port_if_specified "${port_name}" + + log_test "Get '${option_name}' fails" +} + +team_test_options() +{ + # Wrong option name behavior. + team_test_get_option_fails fake_option1 + team_test_get_option_fails fake_option2 "${MEMBER_PORT}" + + # Correct set and get behavior. + team_test_option mode activebackup loadbalance + team_test_option notify_peers_count 0 5 + team_test_option notify_peers_interval 0 5 + team_test_option mcast_rejoin_count 0 5 + team_test_option mcast_rejoin_interval 0 5 + team_test_option enabled true false "${MEMBER_PORT}" + team_test_option user_linkup true false "${MEMBER_PORT}" + team_test_option user_linkup_enabled true false "${MEMBER_PORT}" + team_test_option priority 10 20 "${MEMBER_PORT}" + team_test_option queue_id 0 1 "${MEMBER_PORT}" +} + +require_command teamnl +setup +tests_run +exit "${EXIT_STATUS}" diff --git a/tools/testing/selftests/drivers/net/xdp.py b/tools/testing/selftests/drivers/net/xdp.py index 1dd8bf3bf6c9..08fea4230759 100755 --- a/tools/testing/selftests/drivers/net/xdp.py +++ b/tools/testing/selftests/drivers/net/xdp.py @@ -112,10 +112,10 @@ def _load_xdp_prog(cfg, bpf_info): defer(ip, f"link set dev {cfg.remote_ifname} mtu 1500", host=cfg.remote) cmd( - f"ip link set dev {cfg.ifname} mtu {bpf_info.mtu} xdp obj {abs_path} sec {bpf_info.xdp_sec}", + f"ip link set dev {cfg.ifname} mtu {bpf_info.mtu} xdpdrv obj {abs_path} sec {bpf_info.xdp_sec}", shell=True ) - defer(ip, f"link set dev {cfg.ifname} mtu 1500 xdp off") + defer(ip, f"link set dev {cfg.ifname} mtu 1500 xdpdrv off") xdp_info = ip(f"-d link show dev {cfg.ifname}", json=True)[0] prog_info["id"] = xdp_info["xdp"]["prog"]["id"] @@ -290,34 +290,78 @@ def test_xdp_native_drop_mb(cfg): _test_drop(cfg, bpf_info, 8000) -def test_xdp_native_tx_mb(cfg): +def _test_xdp_native_tx(cfg, bpf_info, payload_lens): """ - Tests the XDP_TX action for a multi-buff case. + Tests the XDP_TX action. Args: cfg: Configuration object containing network settings. + bpf_info: BPFProgInfo object containing the BPF program metadata. + payload_lens: Array of packet lengths to send. """ cfg.require_cmd("socat", remote=True) - - bpf_info = BPFProgInfo("xdp_prog_frags", "xdp_native.bpf.o", "xdp.frags", 9000) prog_info = _load_xdp_prog(cfg, bpf_info) port = rand_port() _set_xdp_map("map_xdp_setup", TestConfig.MODE.value, XDPAction.TX.value) _set_xdp_map("map_xdp_setup", TestConfig.PORT.value, port) - test_string = ''.join(random.choice(string.ascii_lowercase) for _ in range(8000)) - rx_udp = f"socat -{cfg.addr_ipver} -T 2 -u UDP-RECV:{port},reuseport STDOUT" - tx_udp = f"echo {test_string} | socat -t 2 -u STDIN UDP:{cfg.baddr}:{port}" + expected_pkts = 0 + for payload_len in payload_lens: + test_string = "".join( + random.choice(string.ascii_lowercase) for _ in range(payload_len) + ) + + rx_udp = f"socat -{cfg.addr_ipver} -T 2 " + \ + f"-u UDP-RECV:{port},reuseport STDOUT" + + # Writing zero bytes to stdin gets ignored by socat, + # but with the shut-null flag socat generates a zero sized packet + # when the socket is closed. + tx_cmd_suffix = ",shut-null" if payload_len == 0 else "" + tx_udp = f"echo -n {test_string} | socat -t 2 " + \ + f"-u STDIN UDP:{cfg.baddr}:{port}{tx_cmd_suffix}" + + with bkg(rx_udp, host=cfg.remote, exit_wait=True) as rnc: + wait_port_listen(port, proto="udp", host=cfg.remote) + cmd(tx_udp, host=cfg.remote, shell=True) + + ksft_eq(rnc.stdout.strip(), test_string, "UDP packet exchange failed") + + expected_pkts += 1 + stats = _get_stats(prog_info["maps"]["map_xdp_stats"]) + ksft_eq(stats[XDPStats.RX.value], expected_pkts, "RX stats mismatch") + ksft_eq(stats[XDPStats.TX.value], expected_pkts, "TX stats mismatch") + + +def test_xdp_native_tx_sb(cfg): + """ + Tests the XDP_TX action for a single-buff case. + + Args: + cfg: Configuration object containing network settings. + """ + bpf_info = BPFProgInfo("xdp_prog", "xdp_native.bpf.o", "xdp", 1500) + + # Ensure there's enough room for an ETH / IP / UDP header + pkt_hdr_len = 42 if cfg.addr_ipver == "4" else 62 - with bkg(rx_udp, host=cfg.remote, exit_wait=True) as rnc: - wait_port_listen(port, proto="udp", host=cfg.remote) - cmd(tx_udp, host=cfg.remote, shell=True) + _test_xdp_native_tx(cfg, bpf_info, [0, 1500 // 2, 1500 - pkt_hdr_len]) - stats = _get_stats(prog_info['maps']['map_xdp_stats']) - ksft_eq(rnc.stdout.strip(), test_string, "UDP packet exchange failed") - ksft_eq(stats[XDPStats.TX.value], 1, "TX stats mismatch") +def test_xdp_native_tx_mb(cfg): + """ + Tests the XDP_TX action for a multi-buff case. + + Args: + cfg: Configuration object containing network settings. + """ + bpf_info = BPFProgInfo("xdp_prog_frags", "xdp_native.bpf.o", + "xdp.frags", 9000) + # The first packet ensures we exercise the fragmented code path. + # And the subsequent 0-sized packet ensures the driver + # reinitializes xdp_buff correctly. + _test_xdp_native_tx(cfg, bpf_info, [8000, 0]) def _validate_res(res, offset_lst, pkt_sz_lst): @@ -644,6 +688,7 @@ def main(): test_xdp_native_pass_mb, test_xdp_native_drop_sb, test_xdp_native_drop_mb, + test_xdp_native_tx_sb, test_xdp_native_tx_mb, test_xdp_native_adjst_tail_grow_data, test_xdp_native_adjst_tail_shrnk_data, diff --git a/tools/testing/selftests/filesystems/.gitignore b/tools/testing/selftests/filesystems/.gitignore index fcbdb1297e24..64ac0dfa46b7 100644 --- a/tools/testing/selftests/filesystems/.gitignore +++ b/tools/testing/selftests/filesystems/.gitignore @@ -1,6 +1,7 @@ # SPDX-License-Identifier: GPL-2.0-only dnotify_test devpts_pts +fclog file_stressor anon_inode_test kernfs_test diff --git a/tools/testing/selftests/filesystems/Makefile b/tools/testing/selftests/filesystems/Makefile index 73d4650af1a5..85427d7f19b9 100644 --- a/tools/testing/selftests/filesystems/Makefile +++ b/tools/testing/selftests/filesystems/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 CFLAGS += $(KHDR_INCLUDES) -TEST_GEN_PROGS := devpts_pts file_stressor anon_inode_test kernfs_test +TEST_GEN_PROGS := devpts_pts file_stressor anon_inode_test kernfs_test fclog TEST_GEN_PROGS_EXTENDED := dnotify_test include ../lib.mk diff --git a/tools/testing/selftests/filesystems/fclog.c b/tools/testing/selftests/filesystems/fclog.c new file mode 100644 index 000000000000..912a8b755c3b --- /dev/null +++ b/tools/testing/selftests/filesystems/fclog.c @@ -0,0 +1,130 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Author: Aleksa Sarai <cyphar@cyphar.com> + * Copyright (C) 2025 SUSE LLC. + */ + +#include <assert.h> +#include <errno.h> +#include <sched.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <sys/mount.h> + +#include "../kselftest_harness.h" + +#define ASSERT_ERRNO(expected, _t, seen) \ + __EXPECT(expected, #expected, \ + ({__typeof__(seen) _tmp_seen = (seen); \ + _tmp_seen >= 0 ? _tmp_seen : -errno; }), #seen, _t, 1) + +#define ASSERT_ERRNO_EQ(expected, seen) \ + ASSERT_ERRNO(expected, ==, seen) + +#define ASSERT_SUCCESS(seen) \ + ASSERT_ERRNO(0, <=, seen) + +FIXTURE(ns) +{ + int host_mntns; +}; + +FIXTURE_SETUP(ns) +{ + /* Stash the old mntns. */ + self->host_mntns = open("/proc/self/ns/mnt", O_RDONLY|O_CLOEXEC); + ASSERT_SUCCESS(self->host_mntns); + + /* Create a new mount namespace and make it private. */ + ASSERT_SUCCESS(unshare(CLONE_NEWNS)); + ASSERT_SUCCESS(mount(NULL, "/", NULL, MS_PRIVATE|MS_REC, NULL)); +} + +FIXTURE_TEARDOWN(ns) +{ + ASSERT_SUCCESS(setns(self->host_mntns, CLONE_NEWNS)); + ASSERT_SUCCESS(close(self->host_mntns)); +} + +TEST_F(ns, fscontext_log_enodata) +{ + int fsfd = fsopen("tmpfs", FSOPEN_CLOEXEC); + ASSERT_SUCCESS(fsfd); + + /* A brand new fscontext has no log entries. */ + char buf[128] = {}; + for (int i = 0; i < 16; i++) + ASSERT_ERRNO_EQ(-ENODATA, read(fsfd, buf, sizeof(buf))); + + ASSERT_SUCCESS(close(fsfd)); +} + +TEST_F(ns, fscontext_log_errorfc) +{ + int fsfd = fsopen("tmpfs", FSOPEN_CLOEXEC); + ASSERT_SUCCESS(fsfd); + + ASSERT_ERRNO_EQ(-EINVAL, fsconfig(fsfd, FSCONFIG_SET_STRING, "invalid-arg", "123", 0)); + + char buf[128] = {}; + ASSERT_SUCCESS(read(fsfd, buf, sizeof(buf))); + EXPECT_STREQ("e tmpfs: Unknown parameter 'invalid-arg'\n", buf); + + /* The message has been consumed. */ + ASSERT_ERRNO_EQ(-ENODATA, read(fsfd, buf, sizeof(buf))); + ASSERT_SUCCESS(close(fsfd)); +} + +TEST_F(ns, fscontext_log_errorfc_after_fsmount) +{ + int fsfd = fsopen("tmpfs", FSOPEN_CLOEXEC); + ASSERT_SUCCESS(fsfd); + + ASSERT_ERRNO_EQ(-EINVAL, fsconfig(fsfd, FSCONFIG_SET_STRING, "invalid-arg", "123", 0)); + + ASSERT_SUCCESS(fsconfig(fsfd, FSCONFIG_CMD_CREATE, NULL, NULL, 0)); + int mfd = fsmount(fsfd, FSMOUNT_CLOEXEC, MOUNT_ATTR_NOEXEC | MOUNT_ATTR_NOSUID); + ASSERT_SUCCESS(mfd); + ASSERT_SUCCESS(move_mount(mfd, "", AT_FDCWD, "/tmp", MOVE_MOUNT_F_EMPTY_PATH)); + + /* + * The fscontext log should still contain data even after + * FSCONFIG_CMD_CREATE and fsmount(). + */ + char buf[128] = {}; + ASSERT_SUCCESS(read(fsfd, buf, sizeof(buf))); + EXPECT_STREQ("e tmpfs: Unknown parameter 'invalid-arg'\n", buf); + + /* The message has been consumed. */ + ASSERT_ERRNO_EQ(-ENODATA, read(fsfd, buf, sizeof(buf))); + ASSERT_SUCCESS(close(fsfd)); +} + +TEST_F(ns, fscontext_log_emsgsize) +{ + int fsfd = fsopen("tmpfs", FSOPEN_CLOEXEC); + ASSERT_SUCCESS(fsfd); + + ASSERT_ERRNO_EQ(-EINVAL, fsconfig(fsfd, FSCONFIG_SET_STRING, "invalid-arg", "123", 0)); + + char buf[128] = {}; + /* + * Attempting to read a message with too small a buffer should not + * result in the message getting consumed. + */ + ASSERT_ERRNO_EQ(-EMSGSIZE, read(fsfd, buf, 0)); + ASSERT_ERRNO_EQ(-EMSGSIZE, read(fsfd, buf, 1)); + for (int i = 0; i < 16; i++) + ASSERT_ERRNO_EQ(-EMSGSIZE, read(fsfd, buf, 16)); + + ASSERT_SUCCESS(read(fsfd, buf, sizeof(buf))); + EXPECT_STREQ("e tmpfs: Unknown parameter 'invalid-arg'\n", buf); + + /* The message has been consumed. */ + ASSERT_ERRNO_EQ(-ENODATA, read(fsfd, buf, sizeof(buf))); + ASSERT_SUCCESS(close(fsfd)); +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/filesystems/mount-notify/mount-notify_test.c b/tools/testing/selftests/filesystems/mount-notify/mount-notify_test.c index 63ce708d93ed..e4b7c2b457ee 100644 --- a/tools/testing/selftests/filesystems/mount-notify/mount-notify_test.c +++ b/tools/testing/selftests/filesystems/mount-notify/mount-notify_test.c @@ -2,6 +2,13 @@ // Copyright (c) 2025 Miklos Szeredi <miklos@szeredi.hu> #define _GNU_SOURCE + +// Needed for linux/fanotify.h +typedef struct { + int val[2]; +} __kernel_fsid_t; +#define __kernel_fsid_t __kernel_fsid_t + #include <fcntl.h> #include <sched.h> #include <stdio.h> @@ -10,20 +17,12 @@ #include <sys/mount.h> #include <unistd.h> #include <sys/syscall.h> +#include <sys/fanotify.h> #include "../../kselftest_harness.h" #include "../statmount/statmount.h" #include "../utils.h" -// Needed for linux/fanotify.h -#ifndef __kernel_fsid_t -typedef struct { - int val[2]; -} __kernel_fsid_t; -#endif - -#include <sys/fanotify.h> - static const char root_mntpoint_templ[] = "/tmp/mount-notify_test_root.XXXXXX"; static const int mark_cmds[] = { diff --git a/tools/testing/selftests/filesystems/mount-notify/mount-notify_test_ns.c b/tools/testing/selftests/filesystems/mount-notify/mount-notify_test_ns.c index 090a5ca65004..9f57ca46e3af 100644 --- a/tools/testing/selftests/filesystems/mount-notify/mount-notify_test_ns.c +++ b/tools/testing/selftests/filesystems/mount-notify/mount-notify_test_ns.c @@ -2,6 +2,13 @@ // Copyright (c) 2025 Miklos Szeredi <miklos@szeredi.hu> #define _GNU_SOURCE + +// Needed for linux/fanotify.h +typedef struct { + int val[2]; +} __kernel_fsid_t; +#define __kernel_fsid_t __kernel_fsid_t + #include <fcntl.h> #include <sched.h> #include <stdio.h> @@ -10,21 +17,12 @@ #include <sys/mount.h> #include <unistd.h> #include <sys/syscall.h> +#include <sys/fanotify.h> #include "../../kselftest_harness.h" -#include "../../pidfd/pidfd.h" #include "../statmount/statmount.h" #include "../utils.h" -// Needed for linux/fanotify.h -#ifndef __kernel_fsid_t -typedef struct { - int val[2]; -} __kernel_fsid_t; -#endif - -#include <sys/fanotify.h> - static const char root_mntpoint_templ[] = "/tmp/mount-notify_test_root.XXXXXX"; static const int mark_types[] = { diff --git a/tools/testing/selftests/futex/functional/Makefile b/tools/testing/selftests/futex/functional/Makefile index 8cfb87f7f7c5..490ace1f017e 100644 --- a/tools/testing/selftests/futex/functional/Makefile +++ b/tools/testing/selftests/futex/functional/Makefile @@ -1,12 +1,14 @@ # SPDX-License-Identifier: GPL-2.0 +PKG_CONFIG ?= pkg-config +LIBNUMA_TEST = $(shell sh -c "$(PKG_CONFIG) numa --atleast-version 2.0.16 > /dev/null 2>&1 && echo SUFFICIENT || echo NO") + INCLUDES := -I../include -I../../ $(KHDR_INCLUDES) -CFLAGS := $(CFLAGS) -g -O2 -Wall -pthread $(INCLUDES) $(KHDR_INCLUDES) +CFLAGS := $(CFLAGS) -g -O2 -Wall -pthread -D_FILE_OFFSET_BITS=64 -D_TIME_BITS=64 $(INCLUDES) $(KHDR_INCLUDES) -DLIBNUMA_VER_$(LIBNUMA_TEST)=1 LDLIBS := -lpthread -lrt -lnuma LOCAL_HDRS := \ ../include/futextest.h \ - ../include/atomic.h \ - ../include/logging.h + ../include/atomic.h TEST_GEN_PROGS := \ futex_wait_timeout \ futex_wait_wouldblock \ diff --git a/tools/testing/selftests/futex/functional/futex_numa.c b/tools/testing/selftests/futex/functional/futex_numa.c index f29e4d627e79..e0a33510ccb6 100644 --- a/tools/testing/selftests/futex/functional/futex_numa.c +++ b/tools/testing/selftests/futex/functional/futex_numa.c @@ -5,9 +5,10 @@ #include <sys/mman.h> #include <fcntl.h> #include <stdbool.h> +#include <stdio.h> +#include <stdlib.h> #include <time.h> #include <assert.h> -#include "logging.h" #include "futextest.h" #include "futex2test.h" diff --git a/tools/testing/selftests/futex/functional/futex_numa_mpol.c b/tools/testing/selftests/futex/functional/futex_numa_mpol.c index a9ecfb2d3932..d037a3f10ee8 100644 --- a/tools/testing/selftests/futex/functional/futex_numa_mpol.c +++ b/tools/testing/selftests/futex/functional/futex_numa_mpol.c @@ -16,9 +16,9 @@ #include <linux/futex.h> #include <sys/mman.h> -#include "logging.h" #include "futextest.h" #include "futex2test.h" +#include "../../kselftest_harness.h" #define MAX_THREADS 64 @@ -77,7 +77,7 @@ static void join_max_threads(void) } } -static void __test_futex(void *futex_ptr, int must_fail, unsigned int futex_flags) +static void __test_futex(void *futex_ptr, int err_value, unsigned int futex_flags) { int to_wake, ret, i, need_exit = 0; @@ -88,11 +88,17 @@ static void __test_futex(void *futex_ptr, int must_fail, unsigned int futex_flag do { ret = futex2_wake(futex_ptr, to_wake, futex_flags); - if (must_fail) { - if (ret < 0) - break; - ksft_exit_fail_msg("futex2_wake(%d, 0x%x) should fail, but didn't\n", - to_wake, futex_flags); + + if (err_value) { + if (ret >= 0) + ksft_exit_fail_msg("futex2_wake(%d, 0x%x) should fail, but didn't\n", + to_wake, futex_flags); + + if (errno != err_value) + ksft_exit_fail_msg("futex2_wake(%d, 0x%x) expected error was %d, but returned %d (%s)\n", + to_wake, futex_flags, err_value, errno, strerror(errno)); + + break; } if (ret < 0) { ksft_exit_fail_msg("Failed futex2_wake(%d, 0x%x): %m\n", @@ -106,12 +112,12 @@ static void __test_futex(void *futex_ptr, int must_fail, unsigned int futex_flag join_max_threads(); for (i = 0; i < MAX_THREADS; i++) { - if (must_fail && thread_args[i].result != -1) { + if (err_value && thread_args[i].result != -1) { ksft_print_msg("Thread %d should fail but succeeded (%d)\n", i, thread_args[i].result); need_exit = 1; } - if (!must_fail && thread_args[i].result != 0) { + if (!err_value && thread_args[i].result != 0) { ksft_print_msg("Thread %d failed (%d)\n", i, thread_args[i].result); need_exit = 1; } @@ -120,58 +126,30 @@ static void __test_futex(void *futex_ptr, int must_fail, unsigned int futex_flag ksft_exit_fail_msg("Aborting due to earlier errors.\n"); } -static void test_futex(void *futex_ptr, int must_fail) +static void test_futex(void *futex_ptr, int err_value) { - __test_futex(futex_ptr, must_fail, FUTEX2_SIZE_U32 | FUTEX_PRIVATE_FLAG | FUTEX2_NUMA); + __test_futex(futex_ptr, err_value, FUTEX2_SIZE_U32 | FUTEX_PRIVATE_FLAG | FUTEX2_NUMA); } -static void test_futex_mpol(void *futex_ptr, int must_fail) +static void test_futex_mpol(void *futex_ptr, int err_value) { - __test_futex(futex_ptr, must_fail, FUTEX2_SIZE_U32 | FUTEX_PRIVATE_FLAG | FUTEX2_NUMA | FUTEX2_MPOL); + __test_futex(futex_ptr, err_value, FUTEX2_SIZE_U32 | FUTEX_PRIVATE_FLAG | FUTEX2_NUMA | FUTEX2_MPOL); } -static void usage(char *prog) -{ - printf("Usage: %s\n", prog); - printf(" -c Use color\n"); - printf(" -h Display this help message\n"); - printf(" -v L Verbosity level: %d=QUIET %d=CRITICAL %d=INFO\n", - VQUIET, VCRITICAL, VINFO); -} - -int main(int argc, char *argv[]) +TEST(futex_numa_mpol) { struct futex32_numa *futex_numa; - int mem_size, i; void *futex_ptr; - int c; - - while ((c = getopt(argc, argv, "chv:")) != -1) { - switch (c) { - case 'c': - log_color(1); - break; - case 'h': - usage(basename(argv[0])); - exit(0); - break; - case 'v': - log_verbosity(atoi(optarg)); - break; - default: - usage(basename(argv[0])); - exit(1); - } - } - - ksft_print_header(); - ksft_set_plan(1); + int mem_size; mem_size = sysconf(_SC_PAGE_SIZE); - futex_ptr = mmap(NULL, mem_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, 0, 0); + futex_ptr = mmap(NULL, mem_size * 2, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, 0, 0); if (futex_ptr == MAP_FAILED) ksft_exit_fail_msg("mmap() for %d bytes failed\n", mem_size); + /* Create an invalid memory region for the "Memory out of range" test */ + mprotect(futex_ptr + mem_size, mem_size, PROT_NONE); + futex_numa = futex_ptr; ksft_print_msg("Regular test\n"); @@ -182,27 +160,31 @@ int main(int argc, char *argv[]) if (futex_numa->numa == FUTEX_NO_NODE) ksft_exit_fail_msg("NUMA node is left uninitialized\n"); - ksft_print_msg("Memory too small\n"); - test_futex(futex_ptr + mem_size - 4, 1); + /* FUTEX2_NUMA futex must be 8-byte aligned */ + ksft_print_msg("Mis-aligned futex\n"); + test_futex(futex_ptr + mem_size - 4, EINVAL); ksft_print_msg("Memory out of range\n"); - test_futex(futex_ptr + mem_size, 1); + test_futex(futex_ptr + mem_size, EFAULT); futex_numa->numa = FUTEX_NO_NODE; mprotect(futex_ptr, mem_size, PROT_READ); ksft_print_msg("Memory, RO\n"); - test_futex(futex_ptr, 1); + test_futex(futex_ptr, EFAULT); mprotect(futex_ptr, mem_size, PROT_NONE); ksft_print_msg("Memory, no access\n"); - test_futex(futex_ptr, 1); + test_futex(futex_ptr, EFAULT); mprotect(futex_ptr, mem_size, PROT_READ | PROT_WRITE); ksft_print_msg("Memory back to RW\n"); test_futex(futex_ptr, 0); + ksft_test_result_pass("futex2 memory boundary tests passed\n"); + /* MPOL test. Does not work as expected */ - for (i = 0; i < 4; i++) { +#ifdef LIBNUMA_VER_SUFFICIENT + for (int i = 0; i < 4; i++) { unsigned long nodemask; int ret; @@ -221,15 +203,17 @@ int main(int argc, char *argv[]) ret = futex2_wake(futex_ptr, 0, FUTEX2_SIZE_U32 | FUTEX_PRIVATE_FLAG | FUTEX2_NUMA | FUTEX2_MPOL); if (ret < 0) ksft_test_result_fail("Failed to wake 0 with MPOL: %m\n"); - if (0) - test_futex_mpol(futex_numa, 0); if (futex_numa->numa != i) { ksft_exit_fail_msg("Returned NUMA node is %d expected %d\n", futex_numa->numa, i); } } } - ksft_test_result_pass("NUMA MPOL tests passed\n"); - ksft_finished(); - return 0; + ksft_test_result_pass("futex2 MPOL hints test passed\n"); +#else + ksft_test_result_skip("futex2 MPOL hints test requires libnuma 2.0.16+\n"); +#endif + munmap(futex_ptr, mem_size * 2); } + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/futex/functional/futex_priv_hash.c b/tools/testing/selftests/futex/functional/futex_priv_hash.c index aea001ac4946..3b7b5851f290 100644 --- a/tools/testing/selftests/futex/functional/futex_priv_hash.c +++ b/tools/testing/selftests/futex/functional/futex_priv_hash.c @@ -14,7 +14,7 @@ #include <linux/prctl.h> #include <sys/prctl.h> -#include "logging.h" +#include "../../kselftest_harness.h" #define MAX_THREADS 64 @@ -128,46 +128,14 @@ static void futex_dummy_op(void) ksft_exit_fail_msg("pthread_mutex_timedlock() did not timeout: %d.\n", ret); } -static void usage(char *prog) -{ - printf("Usage: %s\n", prog); - printf(" -c Use color\n"); - printf(" -g Test global hash instead intead local immutable \n"); - printf(" -h Display this help message\n"); - printf(" -v L Verbosity level: %d=QUIET %d=CRITICAL %d=INFO\n", - VQUIET, VCRITICAL, VINFO); -} - static const char *test_msg_auto_create = "Automatic hash bucket init on thread creation.\n"; static const char *test_msg_auto_inc = "Automatic increase with more than 16 CPUs\n"; -int main(int argc, char *argv[]) +TEST(priv_hash) { int futex_slots1, futex_slotsn, online_cpus; pthread_mutexattr_t mutex_attr_pi; int ret, retry = 20; - int c; - - while ((c = getopt(argc, argv, "chv:")) != -1) { - switch (c) { - case 'c': - log_color(1); - break; - case 'h': - usage(basename(argv[0])); - exit(0); - break; - case 'v': - log_verbosity(atoi(optarg)); - break; - default: - usage(basename(argv[0])); - exit(1); - } - } - - ksft_print_header(); - ksft_set_plan(21); ret = pthread_mutexattr_init(&mutex_attr_pi); ret |= pthread_mutexattr_setprotocol(&mutex_attr_pi, PTHREAD_PRIO_INHERIT); @@ -189,14 +157,14 @@ int main(int argc, char *argv[]) if (ret != 0) ksft_exit_fail_msg("pthread_join() failed: %d, %m\n", ret); - /* First thread, has to initialiaze private hash */ + /* First thread, has to initialize private hash */ futex_slots1 = futex_hash_slots_get(); if (futex_slots1 <= 0) { ksft_print_msg("Current hash buckets: %d\n", futex_slots1); - ksft_exit_fail_msg(test_msg_auto_create); + ksft_exit_fail_msg("%s", test_msg_auto_create); } - ksft_test_result_pass(test_msg_auto_create); + ksft_test_result_pass("%s", test_msg_auto_create); online_cpus = sysconf(_SC_NPROCESSORS_ONLN); ret = pthread_barrier_init(&barrier_main, NULL, MAX_THREADS + 1); @@ -237,11 +205,11 @@ retry_getslots: } ksft_print_msg("Expected increase of hash buckets but got: %d -> %d\n", futex_slots1, futex_slotsn); - ksft_exit_fail_msg(test_msg_auto_inc); + ksft_exit_fail_msg("%s", test_msg_auto_inc); } - ksft_test_result_pass(test_msg_auto_inc); + ksft_test_result_pass("%s", test_msg_auto_inc); } else { - ksft_test_result_skip(test_msg_auto_inc); + ksft_test_result_skip("%s", test_msg_auto_inc); } ret = pthread_mutex_unlock(&global_lock); @@ -257,17 +225,17 @@ retry_getslots: futex_hash_slots_set_verify(2); join_max_threads(); - ksft_test_result(counter == MAX_THREADS, "Created of waited for %d of %d threads\n", + ksft_test_result(counter == MAX_THREADS, "Created and waited for %d of %d threads\n", counter, MAX_THREADS); counter = 0; - /* Once the user set something, auto reisze must be disabled */ + /* Once the user set something, auto resize must be disabled */ ret = pthread_barrier_init(&barrier_main, NULL, MAX_THREADS); create_max_threads(thread_lock_fn); join_max_threads(); ret = futex_hash_slots_get(); - ksft_test_result(ret == 2, "No more auto-resize after manaul setting, got %d\n", + ksft_test_result(ret == 2, "No more auto-resize after manual setting, got %d\n", ret); futex_hash_slots_set_must_fail(1 << 29); @@ -280,7 +248,7 @@ retry_getslots: ret = futex_hash_slots_set(0); ksft_test_result(ret == 0, "Global hash request\n"); if (ret != 0) - goto out; + return; futex_hash_slots_set_must_fail(4); futex_hash_slots_set_must_fail(8); @@ -289,17 +257,14 @@ retry_getslots: futex_hash_slots_set_must_fail(6); ret = pthread_barrier_init(&barrier_main, NULL, MAX_THREADS); - if (ret != 0) { + if (ret != 0) ksft_exit_fail_msg("pthread_barrier_init failed: %m\n"); - return 1; - } + create_max_threads(thread_lock_fn); join_max_threads(); ret = futex_hash_slots_get(); ksft_test_result(ret == 0, "Continue to use global hash\n"); - -out: - ksft_finished(); - return 0; } + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/futex/functional/futex_requeue.c b/tools/testing/selftests/futex/functional/futex_requeue.c index 51485be6eb2f..69e2555b6039 100644 --- a/tools/testing/selftests/futex/functional/futex_requeue.c +++ b/tools/testing/selftests/futex/functional/futex_requeue.c @@ -7,24 +7,15 @@ #include <pthread.h> #include <limits.h> -#include "logging.h" + #include "futextest.h" +#include "../../kselftest_harness.h" -#define TEST_NAME "futex-requeue" #define timeout_ns 30000000 #define WAKE_WAIT_US 10000 volatile futex_t *f1; -void usage(char *prog) -{ - printf("Usage: %s\n", prog); - printf(" -c Use color\n"); - printf(" -h Display this help message\n"); - printf(" -v L Verbosity level: %d=QUIET %d=CRITICAL %d=INFO\n", - VQUIET, VCRITICAL, VINFO); -} - void *waiterfn(void *arg) { struct timespec to; @@ -38,67 +29,49 @@ void *waiterfn(void *arg) return NULL; } -int main(int argc, char *argv[]) +TEST(requeue_single) { - pthread_t waiter[10]; - int res, ret = RET_PASS; - int c, i; volatile futex_t _f1 = 0; volatile futex_t f2 = 0; + pthread_t waiter[10]; + int res; f1 = &_f1; - while ((c = getopt(argc, argv, "cht:v:")) != -1) { - switch (c) { - case 'c': - log_color(1); - break; - case 'h': - usage(basename(argv[0])); - exit(0); - case 'v': - log_verbosity(atoi(optarg)); - break; - default: - usage(basename(argv[0])); - exit(1); - } - } - - ksft_print_header(); - ksft_set_plan(2); - ksft_print_msg("%s: Test futex_requeue\n", - basename(argv[0])); - /* * Requeue a waiter from f1 to f2, and wake f2. */ if (pthread_create(&waiter[0], NULL, waiterfn, NULL)) - error("pthread_create failed\n", errno); + ksft_exit_fail_msg("pthread_create failed\n"); usleep(WAKE_WAIT_US); - info("Requeuing 1 futex from f1 to f2\n"); + ksft_print_dbg_msg("Requeuing 1 futex from f1 to f2\n"); res = futex_cmp_requeue(f1, 0, &f2, 0, 1, 0); - if (res != 1) { + if (res != 1) ksft_test_result_fail("futex_requeue simple returned: %d %s\n", res ? errno : res, res ? strerror(errno) : ""); - ret = RET_FAIL; - } - - info("Waking 1 futex at f2\n"); + ksft_print_dbg_msg("Waking 1 futex at f2\n"); res = futex_wake(&f2, 1, 0); if (res != 1) { ksft_test_result_fail("futex_requeue simple returned: %d %s\n", res ? errno : res, res ? strerror(errno) : ""); - ret = RET_FAIL; } else { ksft_test_result_pass("futex_requeue simple succeeds\n"); } +} + +TEST(requeue_multiple) +{ + volatile futex_t _f1 = 0; + volatile futex_t f2 = 0; + pthread_t waiter[10]; + int res, i; + f1 = &_f1; /* * Create 10 waiters at f1. At futex_requeue, wake 3 and requeue 7. @@ -106,31 +79,28 @@ int main(int argc, char *argv[]) */ for (i = 0; i < 10; i++) { if (pthread_create(&waiter[i], NULL, waiterfn, NULL)) - error("pthread_create failed\n", errno); + ksft_exit_fail_msg("pthread_create failed\n"); } usleep(WAKE_WAIT_US); - info("Waking 3 futexes at f1 and requeuing 7 futexes from f1 to f2\n"); + ksft_print_dbg_msg("Waking 3 futexes at f1 and requeuing 7 futexes from f1 to f2\n"); res = futex_cmp_requeue(f1, 0, &f2, 3, 7, 0); if (res != 10) { ksft_test_result_fail("futex_requeue many returned: %d %s\n", res ? errno : res, res ? strerror(errno) : ""); - ret = RET_FAIL; } - info("Waking INT_MAX futexes at f2\n"); + ksft_print_dbg_msg("Waking INT_MAX futexes at f2\n"); res = futex_wake(&f2, INT_MAX, 0); if (res != 7) { ksft_test_result_fail("futex_requeue many returned: %d %s\n", res ? errno : res, res ? strerror(errno) : ""); - ret = RET_FAIL; } else { ksft_test_result_pass("futex_requeue many succeeds\n"); } - - ksft_print_cnts(); - return ret; } + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/futex/functional/futex_requeue_pi.c b/tools/testing/selftests/futex/functional/futex_requeue_pi.c index 215c6cb539b4..f299d75848cd 100644 --- a/tools/testing/selftests/futex/functional/futex_requeue_pi.c +++ b/tools/testing/selftests/futex/functional/futex_requeue_pi.c @@ -26,11 +26,11 @@ #include <stdlib.h> #include <signal.h> #include <string.h> + #include "atomic.h" #include "futextest.h" -#include "logging.h" +#include "../../kselftest_harness.h" -#define TEST_NAME "futex-requeue-pi" #define MAX_WAKE_ITERS 1000 #define THREAD_MAX 10 #define SIGNAL_PERIOD_US 100 @@ -42,12 +42,6 @@ futex_t f1 = FUTEX_INITIALIZER; futex_t f2 = FUTEX_INITIALIZER; futex_t wake_complete = FUTEX_INITIALIZER; -/* Test option defaults */ -static long timeout_ns; -static int broadcast; -static int owner; -static int locked; - struct thread_arg { long id; struct timespec *timeout; @@ -56,18 +50,73 @@ struct thread_arg { }; #define THREAD_ARG_INITIALIZER { 0, NULL, 0, 0 } -void usage(char *prog) +FIXTURE(args) { - printf("Usage: %s\n", prog); - printf(" -b Broadcast wakeup (all waiters)\n"); - printf(" -c Use color\n"); - printf(" -h Display this help message\n"); - printf(" -l Lock the pi futex across requeue\n"); - printf(" -o Use a third party pi futex owner during requeue (cancels -l)\n"); - printf(" -t N Timeout in nanoseconds (default: 0)\n"); - printf(" -v L Verbosity level: %d=QUIET %d=CRITICAL %d=INFO\n", - VQUIET, VCRITICAL, VINFO); -} +}; + +FIXTURE_SETUP(args) +{ +}; + +FIXTURE_TEARDOWN(args) +{ +}; + +FIXTURE_VARIANT(args) +{ + long timeout_ns; + bool broadcast; + bool owner; + bool locked; +}; + +/* + * For a given timeout value, this macro creates a test input with all the + * possible combinations of valid arguments + */ +#define FIXTURE_VARIANT_ADD_TIMEOUT(timeout) \ + \ +FIXTURE_VARIANT_ADD(args, t_##timeout) \ +{ \ + .timeout_ns = timeout, \ +}; \ + \ +FIXTURE_VARIANT_ADD(args, t_##timeout##_broadcast) \ +{ \ + .timeout_ns = timeout, \ + .broadcast = true, \ +}; \ + \ +FIXTURE_VARIANT_ADD(args, t_##timeout##_broadcast_locked) \ +{ \ + .timeout_ns = timeout, \ + .broadcast = true, \ + .locked = true, \ +}; \ + \ +FIXTURE_VARIANT_ADD(args, t_##timeout##_broadcast_owner) \ +{ \ + .timeout_ns = timeout, \ + .broadcast = true, \ + .owner = true, \ +}; \ + \ +FIXTURE_VARIANT_ADD(args, t_##timeout##_locked) \ +{ \ + .timeout_ns = timeout, \ + .locked = true, \ +}; \ + \ +FIXTURE_VARIANT_ADD(args, t_##timeout##_owner) \ +{ \ + .timeout_ns = timeout, \ + .owner = true, \ +}; \ + +FIXTURE_VARIANT_ADD_TIMEOUT(0); +FIXTURE_VARIANT_ADD_TIMEOUT(5000); +FIXTURE_VARIANT_ADD_TIMEOUT(500000); +FIXTURE_VARIANT_ADD_TIMEOUT(2000000000); int create_rt_thread(pthread_t *pth, void*(*func)(void *), void *arg, int policy, int prio) @@ -81,26 +130,26 @@ int create_rt_thread(pthread_t *pth, void*(*func)(void *), void *arg, ret = pthread_attr_setinheritsched(&attr, PTHREAD_EXPLICIT_SCHED); if (ret) { - error("pthread_attr_setinheritsched\n", ret); + ksft_exit_fail_msg("pthread_attr_setinheritsched\n"); return -1; } ret = pthread_attr_setschedpolicy(&attr, policy); if (ret) { - error("pthread_attr_setschedpolicy\n", ret); + ksft_exit_fail_msg("pthread_attr_setschedpolicy\n"); return -1; } schedp.sched_priority = prio; ret = pthread_attr_setschedparam(&attr, &schedp); if (ret) { - error("pthread_attr_setschedparam\n", ret); + ksft_exit_fail_msg("pthread_attr_setschedparam\n"); return -1; } ret = pthread_create(pth, &attr, func, arg); if (ret) { - error("pthread_create\n", ret); + ksft_exit_fail_msg("pthread_create\n"); return -1; } return 0; @@ -112,7 +161,7 @@ void *waiterfn(void *arg) struct thread_arg *args = (struct thread_arg *)arg; futex_t old_val; - info("Waiter %ld: running\n", args->id); + ksft_print_dbg_msg("Waiter %ld: running\n", args->id); /* Each thread sleeps for a different amount of time * This is to avoid races, because we don't lock the * external mutex here */ @@ -120,26 +169,25 @@ void *waiterfn(void *arg) old_val = f1; atomic_inc(&waiters_blocked); - info("Calling futex_wait_requeue_pi: %p (%u) -> %p\n", + ksft_print_dbg_msg("Calling futex_wait_requeue_pi: %p (%u) -> %p\n", &f1, f1, &f2); args->ret = futex_wait_requeue_pi(&f1, old_val, &f2, args->timeout, FUTEX_PRIVATE_FLAG); - info("waiter %ld woke with %d %s\n", args->id, args->ret, + ksft_print_dbg_msg("waiter %ld woke with %d %s\n", args->id, args->ret, args->ret < 0 ? strerror(errno) : ""); atomic_inc(&waiters_woken); if (args->ret < 0) { if (args->timeout && errno == ETIMEDOUT) args->ret = 0; else { - args->ret = RET_ERROR; - error("futex_wait_requeue_pi\n", errno); + ksft_exit_fail_msg("futex_wait_requeue_pi\n"); } futex_lock_pi(&f2, NULL, 0, FUTEX_PRIVATE_FLAG); } futex_unlock_pi(&f2, FUTEX_PRIVATE_FLAG); - info("Waiter %ld: exiting with %d\n", args->id, args->ret); + ksft_print_dbg_msg("Waiter %ld: exiting with %d\n", args->id, args->ret); pthread_exit((void *)&args->ret); } @@ -152,14 +200,14 @@ void *broadcast_wakerfn(void *arg) int nr_wake = 1; int i = 0; - info("Waker: waiting for waiters to block\n"); + ksft_print_dbg_msg("Waker: waiting for waiters to block\n"); while (waiters_blocked.val < THREAD_MAX) usleep(1000); usleep(1000); - info("Waker: Calling broadcast\n"); + ksft_print_dbg_msg("Waker: Calling broadcast\n"); if (args->lock) { - info("Calling FUTEX_LOCK_PI on mutex=%x @ %p\n", f2, &f2); + ksft_print_dbg_msg("Calling FUTEX_LOCK_PI on mutex=%x @ %p\n", f2, &f2); futex_lock_pi(&f2, NULL, 0, FUTEX_PRIVATE_FLAG); } continue_requeue: @@ -167,16 +215,14 @@ void *broadcast_wakerfn(void *arg) args->ret = futex_cmp_requeue_pi(&f1, old_val, &f2, nr_wake, nr_requeue, FUTEX_PRIVATE_FLAG); if (args->ret < 0) { - args->ret = RET_ERROR; - error("FUTEX_CMP_REQUEUE_PI failed\n", errno); + ksft_exit_fail_msg("FUTEX_CMP_REQUEUE_PI failed\n"); } else if (++i < MAX_WAKE_ITERS) { task_count += args->ret; if (task_count < THREAD_MAX - waiters_woken.val) goto continue_requeue; } else { - error("max broadcast iterations (%d) reached with %d/%d tasks woken or requeued\n", - 0, MAX_WAKE_ITERS, task_count, THREAD_MAX); - args->ret = RET_ERROR; + ksft_exit_fail_msg("max broadcast iterations (%d) reached with %d/%d tasks woken or requeued\n", + MAX_WAKE_ITERS, task_count, THREAD_MAX); } futex_wake(&wake_complete, 1, FUTEX_PRIVATE_FLAG); @@ -187,7 +233,7 @@ void *broadcast_wakerfn(void *arg) if (args->ret > 0) args->ret = task_count; - info("Waker: exiting with %d\n", args->ret); + ksft_print_dbg_msg("Waker: exiting with %d\n", args->ret); pthread_exit((void *)&args->ret); } @@ -200,20 +246,20 @@ void *signal_wakerfn(void *arg) int nr_wake = 1; int i = 0; - info("Waker: waiting for waiters to block\n"); + ksft_print_dbg_msg("Waker: waiting for waiters to block\n"); while (waiters_blocked.val < THREAD_MAX) usleep(1000); usleep(1000); while (task_count < THREAD_MAX && waiters_woken.val < THREAD_MAX) { - info("task_count: %d, waiters_woken: %d\n", + ksft_print_dbg_msg("task_count: %d, waiters_woken: %d\n", task_count, waiters_woken.val); if (args->lock) { - info("Calling FUTEX_LOCK_PI on mutex=%x @ %p\n", - f2, &f2); + ksft_print_dbg_msg("Calling FUTEX_LOCK_PI on mutex=%x @ %p\n", + f2, &f2); futex_lock_pi(&f2, NULL, 0, FUTEX_PRIVATE_FLAG); } - info("Waker: Calling signal\n"); + ksft_print_dbg_msg("Waker: Calling signal\n"); /* cond_signal */ old_val = f1; args->ret = futex_cmp_requeue_pi(&f1, old_val, &f2, @@ -221,28 +267,23 @@ void *signal_wakerfn(void *arg) FUTEX_PRIVATE_FLAG); if (args->ret < 0) args->ret = -errno; - info("futex: %x\n", f2); + ksft_print_dbg_msg("futex: %x\n", f2); if (args->lock) { - info("Calling FUTEX_UNLOCK_PI on mutex=%x @ %p\n", - f2, &f2); + ksft_print_dbg_msg("Calling FUTEX_UNLOCK_PI on mutex=%x @ %p\n", + f2, &f2); futex_unlock_pi(&f2, FUTEX_PRIVATE_FLAG); } - info("futex: %x\n", f2); - if (args->ret < 0) { - error("FUTEX_CMP_REQUEUE_PI failed\n", errno); - args->ret = RET_ERROR; - break; - } + ksft_print_dbg_msg("futex: %x\n", f2); + if (args->ret < 0) + ksft_exit_fail_msg("FUTEX_CMP_REQUEUE_PI failed\n"); task_count += args->ret; usleep(SIGNAL_PERIOD_US); i++; /* we have to loop at least THREAD_MAX times */ if (i > MAX_WAKE_ITERS + THREAD_MAX) { - error("max signaling iterations (%d) reached, giving up on pending waiters.\n", - 0, MAX_WAKE_ITERS + THREAD_MAX); - args->ret = RET_ERROR; - break; + ksft_exit_fail_msg("max signaling iterations (%d) reached, giving up on pending waiters.\n", + MAX_WAKE_ITERS + THREAD_MAX); } } @@ -251,8 +292,8 @@ void *signal_wakerfn(void *arg) if (args->ret >= 0) args->ret = task_count; - info("Waker: exiting with %d\n", args->ret); - info("Waker: waiters_woken: %d\n", waiters_woken.val); + ksft_print_dbg_msg("Waker: exiting with %d\n", args->ret); + ksft_print_dbg_msg("Waker: waiters_woken: %d\n", waiters_woken.val); pthread_exit((void *)&args->ret); } @@ -269,35 +310,40 @@ void *third_party_blocker(void *arg) ret2 = futex_unlock_pi(&f2, FUTEX_PRIVATE_FLAG); out: - if (args->ret || ret2) { - error("third_party_blocker() futex error", 0); - args->ret = RET_ERROR; - } + if (args->ret || ret2) + ksft_exit_fail_msg("third_party_blocker() futex error"); pthread_exit((void *)&args->ret); } -int unit_test(int broadcast, long lock, int third_party_owner, long timeout_ns) +TEST_F(args, futex_requeue_pi) { - void *(*wakerfn)(void *) = signal_wakerfn; struct thread_arg blocker_arg = THREAD_ARG_INITIALIZER; struct thread_arg waker_arg = THREAD_ARG_INITIALIZER; pthread_t waiter[THREAD_MAX], waker, blocker; - struct timespec ts, *tsp = NULL; + void *(*wakerfn)(void *) = signal_wakerfn; + bool third_party_owner = variant->owner; + long timeout_ns = variant->timeout_ns; + bool broadcast = variant->broadcast; struct thread_arg args[THREAD_MAX]; - int *waiter_ret; - int i, ret = RET_PASS; + struct timespec ts, *tsp = NULL; + bool lock = variant->locked; + int *waiter_ret, i, ret = 0; + + ksft_print_msg( + "\tArguments: broadcast=%d locked=%d owner=%d timeout=%ldns\n", + broadcast, lock, third_party_owner, timeout_ns); if (timeout_ns) { time_t secs; - info("timeout_ns = %ld\n", timeout_ns); + ksft_print_dbg_msg("timeout_ns = %ld\n", timeout_ns); ret = clock_gettime(CLOCK_MONOTONIC, &ts); secs = (ts.tv_nsec + timeout_ns) / 1000000000; ts.tv_nsec = ((int64_t)ts.tv_nsec + timeout_ns) % 1000000000; ts.tv_sec += secs; - info("ts.tv_sec = %ld\n", ts.tv_sec); - info("ts.tv_nsec = %ld\n", ts.tv_nsec); + ksft_print_dbg_msg("ts.tv_sec = %ld\n", ts.tv_sec); + ksft_print_dbg_msg("ts.tv_nsec = %ld\n", ts.tv_nsec); tsp = &ts; } @@ -307,10 +353,7 @@ int unit_test(int broadcast, long lock, int third_party_owner, long timeout_ns) if (third_party_owner) { if (create_rt_thread(&blocker, third_party_blocker, (void *)&blocker_arg, SCHED_FIFO, 1)) { - error("Creating third party blocker thread failed\n", - errno); - ret = RET_ERROR; - goto out; + ksft_exit_fail_msg("Creating third party blocker thread failed\n"); } } @@ -318,20 +361,16 @@ int unit_test(int broadcast, long lock, int third_party_owner, long timeout_ns) for (i = 0; i < THREAD_MAX; i++) { args[i].id = i; args[i].timeout = tsp; - info("Starting thread %d\n", i); + ksft_print_dbg_msg("Starting thread %d\n", i); if (create_rt_thread(&waiter[i], waiterfn, (void *)&args[i], SCHED_FIFO, 1)) { - error("Creating waiting thread failed\n", errno); - ret = RET_ERROR; - goto out; + ksft_exit_fail_msg("Creating waiting thread failed\n"); } } waker_arg.lock = lock; if (create_rt_thread(&waker, wakerfn, (void *)&waker_arg, SCHED_FIFO, 1)) { - error("Creating waker thread failed\n", errno); - ret = RET_ERROR; - goto out; + ksft_exit_fail_msg("Creating waker thread failed\n"); } /* Wait for threads to finish */ @@ -345,7 +384,6 @@ int unit_test(int broadcast, long lock, int third_party_owner, long timeout_ns) pthread_join(blocker, NULL); pthread_join(waker, NULL); -out: if (!ret) { if (*waiter_ret) ret = *waiter_ret; @@ -355,66 +393,8 @@ out: ret = blocker_arg.ret; } - return ret; + if (ret) + ksft_test_result_fail("fail"); } -int main(int argc, char *argv[]) -{ - char *test_name; - int c, ret; - - while ((c = getopt(argc, argv, "bchlot:v:")) != -1) { - switch (c) { - case 'b': - broadcast = 1; - break; - case 'c': - log_color(1); - break; - case 'h': - usage(basename(argv[0])); - exit(0); - case 'l': - locked = 1; - break; - case 'o': - owner = 1; - locked = 0; - break; - case 't': - timeout_ns = atoi(optarg); - break; - case 'v': - log_verbosity(atoi(optarg)); - break; - default: - usage(basename(argv[0])); - exit(1); - } - } - - ksft_print_header(); - ksft_set_plan(1); - ksft_print_msg("%s: Test requeue functionality\n", basename(argv[0])); - ksft_print_msg( - "\tArguments: broadcast=%d locked=%d owner=%d timeout=%ldns\n", - broadcast, locked, owner, timeout_ns); - - ret = asprintf(&test_name, - "%s broadcast=%d locked=%d owner=%d timeout=%ldns", - TEST_NAME, broadcast, locked, owner, timeout_ns); - if (ret < 0) { - ksft_print_msg("Failed to generate test name\n"); - test_name = TEST_NAME; - } - - /* - * FIXME: unit_test is obsolete now that we parse options and the - * various style of runs are done by run.sh - simplify the code and move - * unit_test into main() - */ - ret = unit_test(broadcast, locked, owner, timeout_ns); - - print_result(test_name, ret); - return ret; -} +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/futex/functional/futex_requeue_pi_mismatched_ops.c b/tools/testing/selftests/futex/functional/futex_requeue_pi_mismatched_ops.c index d0a4d332ea44..77135a22a583 100644 --- a/tools/testing/selftests/futex/functional/futex_requeue_pi_mismatched_ops.c +++ b/tools/testing/selftests/futex/functional/futex_requeue_pi_mismatched_ops.c @@ -23,67 +23,32 @@ #include <stdlib.h> #include <string.h> #include <time.h> -#include "futextest.h" -#include "logging.h" -#define TEST_NAME "futex-requeue-pi-mismatched-ops" +#include "futextest.h" +#include "../../kselftest_harness.h" futex_t f1 = FUTEX_INITIALIZER; futex_t f2 = FUTEX_INITIALIZER; int child_ret = 0; -void usage(char *prog) -{ - printf("Usage: %s\n", prog); - printf(" -c Use color\n"); - printf(" -h Display this help message\n"); - printf(" -v L Verbosity level: %d=QUIET %d=CRITICAL %d=INFO\n", - VQUIET, VCRITICAL, VINFO); -} - void *blocking_child(void *arg) { child_ret = futex_wait(&f1, f1, NULL, FUTEX_PRIVATE_FLAG); if (child_ret < 0) { child_ret = -errno; - error("futex_wait\n", errno); + ksft_exit_fail_msg("futex_wait\n"); } return (void *)&child_ret; } -int main(int argc, char *argv[]) +TEST(requeue_pi_mismatched_ops) { - int ret = RET_PASS; pthread_t child; - int c; + int ret; - while ((c = getopt(argc, argv, "chv:")) != -1) { - switch (c) { - case 'c': - log_color(1); - break; - case 'h': - usage(basename(argv[0])); - exit(0); - case 'v': - log_verbosity(atoi(optarg)); - break; - default: - usage(basename(argv[0])); - exit(1); - } - } - - ksft_print_header(); - ksft_set_plan(1); - ksft_print_msg("%s: Detect mismatched requeue_pi operations\n", - basename(argv[0])); + if (pthread_create(&child, NULL, blocking_child, NULL)) + ksft_exit_fail_msg("pthread_create\n"); - if (pthread_create(&child, NULL, blocking_child, NULL)) { - error("pthread_create\n", errno); - ret = RET_ERROR; - goto out; - } /* Allow the child to block in the kernel. */ sleep(1); @@ -102,34 +67,27 @@ int main(int argc, char *argv[]) * FUTEX_WAKE. */ ret = futex_wake(&f1, 1, FUTEX_PRIVATE_FLAG); - if (ret == 1) { - ret = RET_PASS; - } else if (ret < 0) { - error("futex_wake\n", errno); - ret = RET_ERROR; - } else { - error("futex_wake did not wake the child\n", 0); - ret = RET_ERROR; - } + if (ret == 1) + ret = 0; + else if (ret < 0) + ksft_exit_fail_msg("futex_wake\n"); + else + ksft_exit_fail_msg("futex_wake did not wake the child\n"); } else { - error("futex_cmp_requeue_pi\n", errno); - ret = RET_ERROR; + ksft_exit_fail_msg("futex_cmp_requeue_pi\n"); } } else if (ret > 0) { - fail("futex_cmp_requeue_pi failed to detect the mismatch\n"); - ret = RET_FAIL; + ksft_test_result_fail("futex_cmp_requeue_pi failed to detect the mismatch\n"); } else { - error("futex_cmp_requeue_pi found no waiters\n", 0); - ret = RET_ERROR; + ksft_exit_fail_msg("futex_cmp_requeue_pi found no waiters\n"); } pthread_join(child, NULL); - if (!ret) - ret = child_ret; - - out: - /* If the kernel crashes, we shouldn't return at all. */ - print_result(TEST_NAME, ret); - return ret; + if (!ret && !child_ret) + ksft_test_result_pass("futex_requeue_pi_mismatched_ops passed\n"); + else + ksft_test_result_pass("futex_requeue_pi_mismatched_ops failed\n"); } + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/futex/functional/futex_requeue_pi_signal_restart.c b/tools/testing/selftests/futex/functional/futex_requeue_pi_signal_restart.c index c6b8f32990c8..e34ee0f9ebcc 100644 --- a/tools/testing/selftests/futex/functional/futex_requeue_pi_signal_restart.c +++ b/tools/testing/selftests/futex/functional/futex_requeue_pi_signal_restart.c @@ -24,11 +24,11 @@ #include <stdio.h> #include <stdlib.h> #include <string.h> + #include "atomic.h" #include "futextest.h" -#include "logging.h" +#include "../../kselftest_harness.h" -#define TEST_NAME "futex-requeue-pi-signal-restart" #define DELAY_US 100 futex_t f1 = FUTEX_INITIALIZER; @@ -37,15 +37,6 @@ atomic_t requeued = ATOMIC_INITIALIZER; int waiter_ret = 0; -void usage(char *prog) -{ - printf("Usage: %s\n", prog); - printf(" -c Use color\n"); - printf(" -h Display this help message\n"); - printf(" -v L Verbosity level: %d=QUIET %d=CRITICAL %d=INFO\n", - VQUIET, VCRITICAL, VINFO); -} - int create_rt_thread(pthread_t *pth, void*(*func)(void *), void *arg, int policy, int prio) { @@ -57,35 +48,28 @@ int create_rt_thread(pthread_t *pth, void*(*func)(void *), void *arg, memset(&schedp, 0, sizeof(schedp)); ret = pthread_attr_setinheritsched(&attr, PTHREAD_EXPLICIT_SCHED); - if (ret) { - error("pthread_attr_setinheritsched\n", ret); - return -1; - } + if (ret) + ksft_exit_fail_msg("pthread_attr_setinheritsched\n"); ret = pthread_attr_setschedpolicy(&attr, policy); - if (ret) { - error("pthread_attr_setschedpolicy\n", ret); - return -1; - } + if (ret) + ksft_exit_fail_msg("pthread_attr_setschedpolicy\n"); schedp.sched_priority = prio; ret = pthread_attr_setschedparam(&attr, &schedp); - if (ret) { - error("pthread_attr_setschedparam\n", ret); - return -1; - } + if (ret) + ksft_exit_fail_msg("pthread_attr_setschedparam\n"); ret = pthread_create(pth, &attr, func, arg); - if (ret) { - error("pthread_create\n", ret); - return -1; - } + if (ret) + ksft_exit_fail_msg("pthread_create\n"); + return 0; } void handle_signal(int signo) { - info("signal received %s requeue\n", + ksft_print_dbg_msg("signal received %s requeue\n", requeued.val ? "after" : "prior to"); } @@ -94,78 +78,46 @@ void *waiterfn(void *arg) unsigned int old_val; int res; - waiter_ret = RET_PASS; - - info("Waiter running\n"); - info("Calling FUTEX_LOCK_PI on f2=%x @ %p\n", f2, &f2); + ksft_print_dbg_msg("Waiter running\n"); + ksft_print_dbg_msg("Calling FUTEX_LOCK_PI on f2=%x @ %p\n", f2, &f2); old_val = f1; res = futex_wait_requeue_pi(&f1, old_val, &(f2), NULL, FUTEX_PRIVATE_FLAG); if (!requeued.val || errno != EWOULDBLOCK) { - fail("unexpected return from futex_wait_requeue_pi: %d (%s)\n", + ksft_test_result_fail("unexpected return from futex_wait_requeue_pi: %d (%s)\n", res, strerror(errno)); - info("w2:futex: %x\n", f2); + ksft_print_dbg_msg("w2:futex: %x\n", f2); if (!res) futex_unlock_pi(&f2, FUTEX_PRIVATE_FLAG); - waiter_ret = RET_FAIL; } - info("Waiter exiting with %d\n", waiter_ret); pthread_exit(NULL); } -int main(int argc, char *argv[]) +TEST(futex_requeue_pi_signal_restart) { unsigned int old_val; struct sigaction sa; pthread_t waiter; - int c, res, ret = RET_PASS; - - while ((c = getopt(argc, argv, "chv:")) != -1) { - switch (c) { - case 'c': - log_color(1); - break; - case 'h': - usage(basename(argv[0])); - exit(0); - case 'v': - log_verbosity(atoi(optarg)); - break; - default: - usage(basename(argv[0])); - exit(1); - } - } - - ksft_print_header(); - ksft_set_plan(1); - ksft_print_msg("%s: Test signal handling during requeue_pi\n", - basename(argv[0])); - ksft_print_msg("\tArguments: <none>\n"); + int res; sa.sa_handler = handle_signal; sigemptyset(&sa.sa_mask); sa.sa_flags = 0; - if (sigaction(SIGUSR1, &sa, NULL)) { - error("sigaction\n", errno); - exit(1); - } + if (sigaction(SIGUSR1, &sa, NULL)) + ksft_exit_fail_msg("sigaction\n"); - info("m1:f2: %x\n", f2); - info("Creating waiter\n"); + ksft_print_dbg_msg("m1:f2: %x\n", f2); + ksft_print_dbg_msg("Creating waiter\n"); res = create_rt_thread(&waiter, waiterfn, NULL, SCHED_FIFO, 1); - if (res) { - error("Creating waiting thread failed", res); - ret = RET_ERROR; - goto out; - } + if (res) + ksft_exit_fail_msg("Creating waiting thread failed"); - info("Calling FUTEX_LOCK_PI on f2=%x @ %p\n", f2, &f2); - info("m2:f2: %x\n", f2); + ksft_print_dbg_msg("Calling FUTEX_LOCK_PI on f2=%x @ %p\n", f2, &f2); + ksft_print_dbg_msg("m2:f2: %x\n", f2); futex_lock_pi(&f2, 0, 0, FUTEX_PRIVATE_FLAG); - info("m3:f2: %x\n", f2); + ksft_print_dbg_msg("m3:f2: %x\n", f2); while (1) { /* @@ -173,11 +125,11 @@ int main(int argc, char *argv[]) * restart futex_wait_requeue_pi() in the kernel. Wait for the * waiter to block on f1 again. */ - info("Issuing SIGUSR1 to waiter\n"); + ksft_print_dbg_msg("Issuing SIGUSR1 to waiter\n"); pthread_kill(waiter, SIGUSR1); usleep(DELAY_US); - info("Requeueing waiter via FUTEX_CMP_REQUEUE_PI\n"); + ksft_print_dbg_msg("Requeueing waiter via FUTEX_CMP_REQUEUE_PI\n"); old_val = f1; res = futex_cmp_requeue_pi(&f1, old_val, &(f2), 1, 0, FUTEX_PRIVATE_FLAG); @@ -191,12 +143,10 @@ int main(int argc, char *argv[]) atomic_set(&requeued, 1); break; } else if (res < 0) { - error("FUTEX_CMP_REQUEUE_PI failed\n", errno); - ret = RET_ERROR; - break; + ksft_exit_fail_msg("FUTEX_CMP_REQUEUE_PI failed\n"); } } - info("m4:f2: %x\n", f2); + ksft_print_dbg_msg("m4:f2: %x\n", f2); /* * Signal the waiter after requeue, waiter should return from @@ -204,19 +154,14 @@ int main(int argc, char *argv[]) * futex_unlock_pi() can't happen before the signal wakeup is detected * in the kernel. */ - info("Issuing SIGUSR1 to waiter\n"); + ksft_print_dbg_msg("Issuing SIGUSR1 to waiter\n"); pthread_kill(waiter, SIGUSR1); - info("Waiting for waiter to return\n"); + ksft_print_dbg_msg("Waiting for waiter to return\n"); pthread_join(waiter, NULL); - info("Calling FUTEX_UNLOCK_PI on mutex=%x @ %p\n", f2, &f2); + ksft_print_dbg_msg("Calling FUTEX_UNLOCK_PI on mutex=%x @ %p\n", f2, &f2); futex_unlock_pi(&f2, FUTEX_PRIVATE_FLAG); - info("m5:f2: %x\n", f2); - - out: - if (ret == RET_PASS && waiter_ret) - ret = waiter_ret; - - print_result(TEST_NAME, ret); - return ret; + ksft_print_dbg_msg("m5:f2: %x\n", f2); } + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/futex/functional/futex_wait.c b/tools/testing/selftests/futex/functional/futex_wait.c index 685140d9b93d..152ca4612886 100644 --- a/tools/testing/selftests/futex/functional/futex_wait.c +++ b/tools/testing/selftests/futex/functional/futex_wait.c @@ -9,25 +9,16 @@ #include <sys/shm.h> #include <sys/mman.h> #include <fcntl.h> -#include "logging.h" + #include "futextest.h" +#include "../../kselftest_harness.h" -#define TEST_NAME "futex-wait" #define timeout_ns 30000000 #define WAKE_WAIT_US 10000 #define SHM_PATH "futex_shm_file" void *futex; -void usage(char *prog) -{ - printf("Usage: %s\n", prog); - printf(" -c Use color\n"); - printf(" -h Display this help message\n"); - printf(" -v L Verbosity level: %d=QUIET %d=CRITICAL %d=INFO\n", - VQUIET, VCRITICAL, VINFO); -} - static void *waiterfn(void *arg) { struct timespec to; @@ -45,53 +36,37 @@ static void *waiterfn(void *arg) return NULL; } -int main(int argc, char *argv[]) +TEST(private_futex) { - int res, ret = RET_PASS, fd, c, shm_id; - u_int32_t f_private = 0, *shared_data; unsigned int flags = FUTEX_PRIVATE_FLAG; + u_int32_t f_private = 0; pthread_t waiter; - void *shm; + int res; futex = &f_private; - while ((c = getopt(argc, argv, "cht:v:")) != -1) { - switch (c) { - case 'c': - log_color(1); - break; - case 'h': - usage(basename(argv[0])); - exit(0); - case 'v': - log_verbosity(atoi(optarg)); - break; - default: - usage(basename(argv[0])); - exit(1); - } - } - - ksft_print_header(); - ksft_set_plan(3); - ksft_print_msg("%s: Test futex_wait\n", basename(argv[0])); - /* Testing a private futex */ - info("Calling private futex_wait on futex: %p\n", futex); + ksft_print_dbg_msg("Calling private futex_wait on futex: %p\n", futex); if (pthread_create(&waiter, NULL, waiterfn, (void *) &flags)) - error("pthread_create failed\n", errno); + ksft_exit_fail_msg("pthread_create failed\n"); usleep(WAKE_WAIT_US); - info("Calling private futex_wake on futex: %p\n", futex); + ksft_print_dbg_msg("Calling private futex_wake on futex: %p\n", futex); res = futex_wake(futex, 1, FUTEX_PRIVATE_FLAG); if (res != 1) { ksft_test_result_fail("futex_wake private returned: %d %s\n", errno, strerror(errno)); - ret = RET_FAIL; } else { ksft_test_result_pass("futex_wake private succeeds\n"); } +} + +TEST(anon_page) +{ + u_int32_t *shared_data; + pthread_t waiter; + int res, shm_id; /* Testing an anon page shared memory */ shm_id = shmget(IPC_PRIVATE, 4096, IPC_CREAT | 0666); @@ -105,67 +80,65 @@ int main(int argc, char *argv[]) *shared_data = 0; futex = shared_data; - info("Calling shared (page anon) futex_wait on futex: %p\n", futex); + ksft_print_dbg_msg("Calling shared (page anon) futex_wait on futex: %p\n", futex); if (pthread_create(&waiter, NULL, waiterfn, NULL)) - error("pthread_create failed\n", errno); + ksft_exit_fail_msg("pthread_create failed\n"); usleep(WAKE_WAIT_US); - info("Calling shared (page anon) futex_wake on futex: %p\n", futex); + ksft_print_dbg_msg("Calling shared (page anon) futex_wake on futex: %p\n", futex); res = futex_wake(futex, 1, 0); if (res != 1) { ksft_test_result_fail("futex_wake shared (page anon) returned: %d %s\n", errno, strerror(errno)); - ret = RET_FAIL; } else { ksft_test_result_pass("futex_wake shared (page anon) succeeds\n"); } + shmdt(shared_data); +} + +TEST(file_backed) +{ + u_int32_t f_private = 0; + pthread_t waiter; + int res, fd; + void *shm; /* Testing a file backed shared memory */ fd = open(SHM_PATH, O_RDWR | O_CREAT, S_IRUSR | S_IWUSR); - if (fd < 0) { - perror("open"); - exit(1); - } + if (fd < 0) + ksft_exit_fail_msg("open"); - if (ftruncate(fd, sizeof(f_private))) { - perror("ftruncate"); - exit(1); - } + if (ftruncate(fd, sizeof(f_private))) + ksft_exit_fail_msg("ftruncate"); shm = mmap(NULL, sizeof(f_private), PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); - if (shm == MAP_FAILED) { - perror("mmap"); - exit(1); - } + if (shm == MAP_FAILED) + ksft_exit_fail_msg("mmap"); memcpy(shm, &f_private, sizeof(f_private)); futex = shm; - info("Calling shared (file backed) futex_wait on futex: %p\n", futex); + ksft_print_dbg_msg("Calling shared (file backed) futex_wait on futex: %p\n", futex); if (pthread_create(&waiter, NULL, waiterfn, NULL)) - error("pthread_create failed\n", errno); + ksft_exit_fail_msg("pthread_create failed\n"); usleep(WAKE_WAIT_US); - info("Calling shared (file backed) futex_wake on futex: %p\n", futex); + ksft_print_dbg_msg("Calling shared (file backed) futex_wake on futex: %p\n", futex); res = futex_wake(shm, 1, 0); if (res != 1) { ksft_test_result_fail("futex_wake shared (file backed) returned: %d %s\n", errno, strerror(errno)); - ret = RET_FAIL; } else { ksft_test_result_pass("futex_wake shared (file backed) succeeds\n"); } - /* Freeing resources */ - shmdt(shared_data); munmap(shm, sizeof(f_private)); remove(SHM_PATH); close(fd); - - ksft_print_cnts(); - return ret; } + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/futex/functional/futex_wait_private_mapped_file.c b/tools/testing/selftests/futex/functional/futex_wait_private_mapped_file.c index fb4148f23fa3..8952ebda14ab 100644 --- a/tools/testing/selftests/futex/functional/futex_wait_private_mapped_file.c +++ b/tools/testing/selftests/futex/functional/futex_wait_private_mapped_file.c @@ -27,10 +27,9 @@ #include <libgen.h> #include <signal.h> -#include "logging.h" #include "futextest.h" +#include "../../kselftest_harness.h" -#define TEST_NAME "futex-wait-private-mapped-file" #define PAGE_SZ 4096 char pad[PAGE_SZ] = {1}; @@ -40,86 +39,44 @@ char pad2[PAGE_SZ] = {1}; #define WAKE_WAIT_US 3000000 struct timespec wait_timeout = { .tv_sec = 5, .tv_nsec = 0}; -void usage(char *prog) -{ - printf("Usage: %s\n", prog); - printf(" -c Use color\n"); - printf(" -h Display this help message\n"); - printf(" -v L Verbosity level: %d=QUIET %d=CRITICAL %d=INFO\n", - VQUIET, VCRITICAL, VINFO); -} - void *thr_futex_wait(void *arg) { int ret; - info("futex wait\n"); + ksft_print_dbg_msg("futex wait\n"); ret = futex_wait(&val, 1, &wait_timeout, 0); - if (ret && errno != EWOULDBLOCK && errno != ETIMEDOUT) { - error("futex error.\n", errno); - print_result(TEST_NAME, RET_ERROR); - exit(RET_ERROR); - } + if (ret && errno != EWOULDBLOCK && errno != ETIMEDOUT) + ksft_exit_fail_msg("futex error.\n"); if (ret && errno == ETIMEDOUT) - fail("waiter timedout\n"); + ksft_exit_fail_msg("waiter timedout\n"); - info("futex_wait: ret = %d, errno = %d\n", ret, errno); + ksft_print_dbg_msg("futex_wait: ret = %d, errno = %d\n", ret, errno); return NULL; } -int main(int argc, char **argv) +TEST(wait_private_mapped_file) { pthread_t thr; - int ret = RET_PASS; int res; - int c; - - while ((c = getopt(argc, argv, "chv:")) != -1) { - switch (c) { - case 'c': - log_color(1); - break; - case 'h': - usage(basename(argv[0])); - exit(0); - case 'v': - log_verbosity(atoi(optarg)); - break; - default: - usage(basename(argv[0])); - exit(1); - } - } - - ksft_print_header(); - ksft_set_plan(1); - ksft_print_msg( - "%s: Test the futex value of private file mappings in FUTEX_WAIT\n", - basename(argv[0])); - - ret = pthread_create(&thr, NULL, thr_futex_wait, NULL); - if (ret < 0) { - fprintf(stderr, "pthread_create error\n"); - ret = RET_ERROR; - goto out; - } - - info("wait a while\n"); + + res = pthread_create(&thr, NULL, thr_futex_wait, NULL); + if (res < 0) + ksft_exit_fail_msg("pthread_create error\n"); + + ksft_print_dbg_msg("wait a while\n"); usleep(WAKE_WAIT_US); val = 2; res = futex_wake(&val, 1, 0); - info("futex_wake %d\n", res); - if (res != 1) { - fail("FUTEX_WAKE didn't find the waiting thread.\n"); - ret = RET_FAIL; - } + ksft_print_dbg_msg("futex_wake %d\n", res); + if (res != 1) + ksft_exit_fail_msg("FUTEX_WAKE didn't find the waiting thread.\n"); - info("join\n"); + ksft_print_dbg_msg("join\n"); pthread_join(thr, NULL); - out: - print_result(TEST_NAME, ret); - return ret; + ksft_test_result_pass("wait_private_mapped_file"); } + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/futex/functional/futex_wait_timeout.c b/tools/testing/selftests/futex/functional/futex_wait_timeout.c index d183f878360b..0c8766aced2e 100644 --- a/tools/testing/selftests/futex/functional/futex_wait_timeout.c +++ b/tools/testing/selftests/futex/functional/futex_wait_timeout.c @@ -16,26 +16,15 @@ *****************************************************************************/ #include <pthread.h> + #include "futextest.h" #include "futex2test.h" -#include "logging.h" - -#define TEST_NAME "futex-wait-timeout" +#include "../../kselftest_harness.h" static long timeout_ns = 100000; /* 100us default timeout */ static futex_t futex_pi; static pthread_barrier_t barrier; -void usage(char *prog) -{ - printf("Usage: %s\n", prog); - printf(" -c Use color\n"); - printf(" -h Display this help message\n"); - printf(" -t N Timeout in nanoseconds (default: 100,000)\n"); - printf(" -v L Verbosity level: %d=QUIET %d=CRITICAL %d=INFO\n", - VQUIET, VCRITICAL, VINFO); -} - /* * Get a PI lock and hold it forever, so the main thread lock_pi will block * and we can test the timeout @@ -47,13 +36,13 @@ void *get_pi_lock(void *arg) ret = futex_lock_pi(&futex_pi, NULL, 0, 0); if (ret != 0) - error("futex_lock_pi failed\n", ret); + ksft_exit_fail_msg("futex_lock_pi failed\n"); pthread_barrier_wait(&barrier); /* Blocks forever */ ret = futex_wait(&lock, 0, NULL, 0); - error("futex_wait failed\n", ret); + ksft_exit_fail_msg("futex_wait failed\n"); return NULL; } @@ -61,12 +50,11 @@ void *get_pi_lock(void *arg) /* * Check if the function returned the expected error */ -static void test_timeout(int res, int *ret, char *test_name, int err) +static void test_timeout(int res, char *test_name, int err) { if (!res || errno != err) { ksft_test_result_fail("%s returned %d\n", test_name, res < 0 ? errno : res); - *ret = RET_FAIL; } else { ksft_test_result_pass("%s succeeds\n", test_name); } @@ -78,10 +66,8 @@ static void test_timeout(int res, int *ret, char *test_name, int err) static int futex_get_abs_timeout(clockid_t clockid, struct timespec *to, long timeout_ns) { - if (clock_gettime(clockid, to)) { - error("clock_gettime failed\n", errno); - return errno; - } + if (clock_gettime(clockid, to)) + ksft_exit_fail_msg("clock_gettime failed\n"); to->tv_nsec += timeout_ns; @@ -93,83 +79,66 @@ static int futex_get_abs_timeout(clockid_t clockid, struct timespec *to, return 0; } -int main(int argc, char *argv[]) +TEST(wait_bitset) { futex_t f1 = FUTEX_INITIALIZER; - int res, ret = RET_PASS; struct timespec to; - pthread_t thread; - int c; - struct futex_waitv waitv = { - .uaddr = (uintptr_t)&f1, - .val = f1, - .flags = FUTEX_32, - .__reserved = 0 - }; - - while ((c = getopt(argc, argv, "cht:v:")) != -1) { - switch (c) { - case 'c': - log_color(1); - break; - case 'h': - usage(basename(argv[0])); - exit(0); - case 't': - timeout_ns = atoi(optarg); - break; - case 'v': - log_verbosity(atoi(optarg)); - break; - default: - usage(basename(argv[0])); - exit(1); - } - } - - ksft_print_header(); - ksft_set_plan(9); - ksft_print_msg("%s: Block on a futex and wait for timeout\n", - basename(argv[0])); - ksft_print_msg("\tArguments: timeout=%ldns\n", timeout_ns); - - pthread_barrier_init(&barrier, NULL, 2); - pthread_create(&thread, NULL, get_pi_lock, NULL); + int res; /* initialize relative timeout */ to.tv_sec = 0; to.tv_nsec = timeout_ns; res = futex_wait(&f1, f1, &to, 0); - test_timeout(res, &ret, "futex_wait relative", ETIMEDOUT); + test_timeout(res, "futex_wait relative", ETIMEDOUT); /* FUTEX_WAIT_BITSET with CLOCK_REALTIME */ if (futex_get_abs_timeout(CLOCK_REALTIME, &to, timeout_ns)) - return RET_FAIL; + ksft_test_result_error("get_time error"); res = futex_wait_bitset(&f1, f1, &to, 1, FUTEX_CLOCK_REALTIME); - test_timeout(res, &ret, "futex_wait_bitset realtime", ETIMEDOUT); + test_timeout(res, "futex_wait_bitset realtime", ETIMEDOUT); /* FUTEX_WAIT_BITSET with CLOCK_MONOTONIC */ if (futex_get_abs_timeout(CLOCK_MONOTONIC, &to, timeout_ns)) - return RET_FAIL; + ksft_test_result_error("get_time error"); res = futex_wait_bitset(&f1, f1, &to, 1, 0); - test_timeout(res, &ret, "futex_wait_bitset monotonic", ETIMEDOUT); + test_timeout(res, "futex_wait_bitset monotonic", ETIMEDOUT); +} + +TEST(requeue_pi) +{ + futex_t f1 = FUTEX_INITIALIZER; + struct timespec to; + int res; /* FUTEX_WAIT_REQUEUE_PI with CLOCK_REALTIME */ if (futex_get_abs_timeout(CLOCK_REALTIME, &to, timeout_ns)) - return RET_FAIL; + ksft_test_result_error("get_time error"); res = futex_wait_requeue_pi(&f1, f1, &futex_pi, &to, FUTEX_CLOCK_REALTIME); - test_timeout(res, &ret, "futex_wait_requeue_pi realtime", ETIMEDOUT); + test_timeout(res, "futex_wait_requeue_pi realtime", ETIMEDOUT); /* FUTEX_WAIT_REQUEUE_PI with CLOCK_MONOTONIC */ if (futex_get_abs_timeout(CLOCK_MONOTONIC, &to, timeout_ns)) - return RET_FAIL; + ksft_test_result_error("get_time error"); res = futex_wait_requeue_pi(&f1, f1, &futex_pi, &to, 0); - test_timeout(res, &ret, "futex_wait_requeue_pi monotonic", ETIMEDOUT); + test_timeout(res, "futex_wait_requeue_pi monotonic", ETIMEDOUT); + +} + +TEST(lock_pi) +{ + struct timespec to; + pthread_t thread; + int res; + + /* Create a thread that will lock forever so any waiter will timeout */ + pthread_barrier_init(&barrier, NULL, 2); + pthread_create(&thread, NULL, get_pi_lock, NULL); /* Wait until the other thread calls futex_lock_pi() */ pthread_barrier_wait(&barrier); pthread_barrier_destroy(&barrier); + /* * FUTEX_LOCK_PI with CLOCK_REALTIME * Due to historical reasons, FUTEX_LOCK_PI supports only realtime @@ -181,26 +150,38 @@ int main(int argc, char *argv[]) * smaller than realtime and the syscall will timeout immediately. */ if (futex_get_abs_timeout(CLOCK_REALTIME, &to, timeout_ns)) - return RET_FAIL; + ksft_test_result_error("get_time error"); res = futex_lock_pi(&futex_pi, &to, 0, 0); - test_timeout(res, &ret, "futex_lock_pi realtime", ETIMEDOUT); + test_timeout(res, "futex_lock_pi realtime", ETIMEDOUT); /* Test operations that don't support FUTEX_CLOCK_REALTIME */ res = futex_lock_pi(&futex_pi, NULL, 0, FUTEX_CLOCK_REALTIME); - test_timeout(res, &ret, "futex_lock_pi invalid timeout flag", ENOSYS); + test_timeout(res, "futex_lock_pi invalid timeout flag", ENOSYS); +} + +TEST(waitv) +{ + futex_t f1 = FUTEX_INITIALIZER; + struct futex_waitv waitv = { + .uaddr = (uintptr_t)&f1, + .val = f1, + .flags = FUTEX_32, + .__reserved = 0, + }; + struct timespec to; + int res; /* futex_waitv with CLOCK_MONOTONIC */ if (futex_get_abs_timeout(CLOCK_MONOTONIC, &to, timeout_ns)) - return RET_FAIL; + ksft_test_result_error("get_time error"); res = futex_waitv(&waitv, 1, 0, &to, CLOCK_MONOTONIC); - test_timeout(res, &ret, "futex_waitv monotonic", ETIMEDOUT); + test_timeout(res, "futex_waitv monotonic", ETIMEDOUT); /* futex_waitv with CLOCK_REALTIME */ if (futex_get_abs_timeout(CLOCK_REALTIME, &to, timeout_ns)) - return RET_FAIL; + ksft_test_result_error("get_time error"); res = futex_waitv(&waitv, 1, 0, &to, CLOCK_REALTIME); - test_timeout(res, &ret, "futex_waitv realtime", ETIMEDOUT); - - ksft_print_cnts(); - return ret; + test_timeout(res, "futex_waitv realtime", ETIMEDOUT); } + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/futex/functional/futex_wait_uninitialized_heap.c b/tools/testing/selftests/futex/functional/futex_wait_uninitialized_heap.c index ed9cd07e31c1..ce2301500d83 100644 --- a/tools/testing/selftests/futex/functional/futex_wait_uninitialized_heap.c +++ b/tools/testing/selftests/futex/functional/futex_wait_uninitialized_heap.c @@ -29,95 +29,55 @@ #include <linux/futex.h> #include <libgen.h> -#include "logging.h" #include "futextest.h" +#include "../../kselftest_harness.h" -#define TEST_NAME "futex-wait-uninitialized-heap" #define WAIT_US 5000000 static int child_blocked = 1; -static int child_ret; +static bool child_ret; void *buf; -void usage(char *prog) -{ - printf("Usage: %s\n", prog); - printf(" -c Use color\n"); - printf(" -h Display this help message\n"); - printf(" -v L Verbosity level: %d=QUIET %d=CRITICAL %d=INFO\n", - VQUIET, VCRITICAL, VINFO); -} - void *wait_thread(void *arg) { int res; - child_ret = RET_PASS; + child_ret = true; res = futex_wait(buf, 1, NULL, 0); child_blocked = 0; if (res != 0 && errno != EWOULDBLOCK) { - error("futex failure\n", errno); - child_ret = RET_ERROR; + ksft_exit_fail_msg("futex failure\n"); + child_ret = false; } pthread_exit(NULL); } -int main(int argc, char **argv) +TEST(futex_wait_uninitialized_heap) { - int c, ret = RET_PASS; long page_size; pthread_t thr; - - while ((c = getopt(argc, argv, "chv:")) != -1) { - switch (c) { - case 'c': - log_color(1); - break; - case 'h': - usage(basename(argv[0])); - exit(0); - case 'v': - log_verbosity(atoi(optarg)); - break; - default: - usage(basename(argv[0])); - exit(1); - } - } + int ret; page_size = sysconf(_SC_PAGESIZE); buf = mmap(NULL, page_size, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, 0, 0); - if (buf == (void *)-1) { - error("mmap\n", errno); - exit(1); - } - - ksft_print_header(); - ksft_set_plan(1); - ksft_print_msg("%s: Test the uninitialized futex value in FUTEX_WAIT\n", - basename(argv[0])); - + if (buf == (void *)-1) + ksft_exit_fail_msg("mmap\n"); ret = pthread_create(&thr, NULL, wait_thread, NULL); - if (ret) { - error("pthread_create\n", errno); - ret = RET_ERROR; - goto out; - } + if (ret) + ksft_exit_fail_msg("pthread_create\n"); - info("waiting %dus for child to return\n", WAIT_US); + ksft_print_dbg_msg("waiting %dus for child to return\n", WAIT_US); usleep(WAIT_US); - ret = child_ret; - if (child_blocked) { - fail("child blocked in kernel\n"); - ret = RET_FAIL; - } + if (child_blocked) + ksft_test_result_fail("child blocked in kernel\n"); - out: - print_result(TEST_NAME, ret); - return ret; + if (!child_ret) + ksft_test_result_fail("child error\n"); } + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/futex/functional/futex_wait_wouldblock.c b/tools/testing/selftests/futex/functional/futex_wait_wouldblock.c index 2d8230da9064..36b7a54a4085 100644 --- a/tools/testing/selftests/futex/functional/futex_wait_wouldblock.c +++ b/tools/testing/selftests/futex/functional/futex_wait_wouldblock.c @@ -21,72 +21,44 @@ #include <stdlib.h> #include <string.h> #include <time.h> + #include "futextest.h" #include "futex2test.h" -#include "logging.h" +#include "../../kselftest_harness.h" -#define TEST_NAME "futex-wait-wouldblock" #define timeout_ns 100000 -void usage(char *prog) -{ - printf("Usage: %s\n", prog); - printf(" -c Use color\n"); - printf(" -h Display this help message\n"); - printf(" -v L Verbosity level: %d=QUIET %d=CRITICAL %d=INFO\n", - VQUIET, VCRITICAL, VINFO); -} - -int main(int argc, char *argv[]) +TEST(futex_wait_wouldblock) { struct timespec to = {.tv_sec = 0, .tv_nsec = timeout_ns}; futex_t f1 = FUTEX_INITIALIZER; - int res, ret = RET_PASS; - int c; - struct futex_waitv waitv = { - .uaddr = (uintptr_t)&f1, - .val = f1+1, - .flags = FUTEX_32, - .__reserved = 0 - }; + int res; - while ((c = getopt(argc, argv, "cht:v:")) != -1) { - switch (c) { - case 'c': - log_color(1); - break; - case 'h': - usage(basename(argv[0])); - exit(0); - case 'v': - log_verbosity(atoi(optarg)); - break; - default: - usage(basename(argv[0])); - exit(1); - } - } - - ksft_print_header(); - ksft_set_plan(2); - ksft_print_msg("%s: Test the unexpected futex value in FUTEX_WAIT\n", - basename(argv[0])); - - info("Calling futex_wait on f1: %u @ %p with val=%u\n", f1, &f1, f1+1); + ksft_print_dbg_msg("Calling futex_wait on f1: %u @ %p with val=%u\n", f1, &f1, f1+1); res = futex_wait(&f1, f1+1, &to, FUTEX_PRIVATE_FLAG); if (!res || errno != EWOULDBLOCK) { ksft_test_result_fail("futex_wait returned: %d %s\n", res ? errno : res, res ? strerror(errno) : ""); - ret = RET_FAIL; } else { ksft_test_result_pass("futex_wait\n"); } +} - if (clock_gettime(CLOCK_MONOTONIC, &to)) { - error("clock_gettime failed\n", errno); - return errno; - } +TEST(futex_waitv_wouldblock) +{ + struct timespec to = {.tv_sec = 0, .tv_nsec = timeout_ns}; + futex_t f1 = FUTEX_INITIALIZER; + struct futex_waitv waitv = { + .uaddr = (uintptr_t)&f1, + .val = f1 + 1, + .flags = FUTEX_32, + .__reserved = 0, + }; + int res; + + if (clock_gettime(CLOCK_MONOTONIC, &to)) + ksft_exit_fail_msg("clock_gettime failed %d\n", errno); to.tv_nsec += timeout_ns; @@ -95,17 +67,15 @@ int main(int argc, char *argv[]) to.tv_nsec -= 1000000000; } - info("Calling futex_waitv on f1: %u @ %p with val=%u\n", f1, &f1, f1+1); + ksft_print_dbg_msg("Calling futex_waitv on f1: %u @ %p with val=%u\n", f1, &f1, f1+1); res = futex_waitv(&waitv, 1, 0, &to, CLOCK_MONOTONIC); if (!res || errno != EWOULDBLOCK) { ksft_test_result_fail("futex_waitv returned: %d %s\n", res ? errno : res, res ? strerror(errno) : ""); - ret = RET_FAIL; } else { ksft_test_result_pass("futex_waitv\n"); } - - ksft_print_cnts(); - return ret; } + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/futex/functional/futex_waitv.c b/tools/testing/selftests/futex/functional/futex_waitv.c index a94337f677e1..c684b10eb76e 100644 --- a/tools/testing/selftests/futex/functional/futex_waitv.c +++ b/tools/testing/selftests/futex/functional/futex_waitv.c @@ -15,25 +15,16 @@ #include <pthread.h> #include <stdint.h> #include <sys/shm.h> + #include "futextest.h" #include "futex2test.h" -#include "logging.h" +#include "../../kselftest_harness.h" -#define TEST_NAME "futex-wait" #define WAKE_WAIT_US 10000 #define NR_FUTEXES 30 static struct futex_waitv waitv[NR_FUTEXES]; u_int32_t futexes[NR_FUTEXES] = {0}; -void usage(char *prog) -{ - printf("Usage: %s\n", prog); - printf(" -c Use color\n"); - printf(" -h Display this help message\n"); - printf(" -v L Verbosity level: %d=QUIET %d=CRITICAL %d=INFO\n", - VQUIET, VCRITICAL, VINFO); -} - void *waiterfn(void *arg) { struct timespec to; @@ -41,7 +32,7 @@ void *waiterfn(void *arg) /* setting absolute timeout for futex2 */ if (clock_gettime(CLOCK_MONOTONIC, &to)) - error("gettime64 failed\n", errno); + ksft_exit_fail_msg("gettime64 failed\n"); to.tv_sec++; @@ -57,34 +48,10 @@ void *waiterfn(void *arg) return NULL; } -int main(int argc, char *argv[]) +TEST(private_waitv) { pthread_t waiter; - int res, ret = RET_PASS; - struct timespec to; - int c, i; - - while ((c = getopt(argc, argv, "cht:v:")) != -1) { - switch (c) { - case 'c': - log_color(1); - break; - case 'h': - usage(basename(argv[0])); - exit(0); - case 'v': - log_verbosity(atoi(optarg)); - break; - default: - usage(basename(argv[0])); - exit(1); - } - } - - ksft_print_header(); - ksft_set_plan(7); - ksft_print_msg("%s: Test FUTEX_WAITV\n", - basename(argv[0])); + int res, i; for (i = 0; i < NR_FUTEXES; i++) { waitv[i].uaddr = (uintptr_t)&futexes[i]; @@ -95,7 +62,7 @@ int main(int argc, char *argv[]) /* Private waitv */ if (pthread_create(&waiter, NULL, waiterfn, NULL)) - error("pthread_create failed\n", errno); + ksft_exit_fail_msg("pthread_create failed\n"); usleep(WAKE_WAIT_US); @@ -104,10 +71,15 @@ int main(int argc, char *argv[]) ksft_test_result_fail("futex_wake private returned: %d %s\n", res ? errno : res, res ? strerror(errno) : ""); - ret = RET_FAIL; } else { ksft_test_result_pass("futex_waitv private\n"); } +} + +TEST(shared_waitv) +{ + pthread_t waiter; + int res, i; /* Shared waitv */ for (i = 0; i < NR_FUTEXES; i++) { @@ -128,7 +100,7 @@ int main(int argc, char *argv[]) } if (pthread_create(&waiter, NULL, waiterfn, NULL)) - error("pthread_create failed\n", errno); + ksft_exit_fail_msg("pthread_create failed\n"); usleep(WAKE_WAIT_US); @@ -137,19 +109,24 @@ int main(int argc, char *argv[]) ksft_test_result_fail("futex_wake shared returned: %d %s\n", res ? errno : res, res ? strerror(errno) : ""); - ret = RET_FAIL; } else { ksft_test_result_pass("futex_waitv shared\n"); } for (i = 0; i < NR_FUTEXES; i++) shmdt(u64_to_ptr(waitv[i].uaddr)); +} + +TEST(invalid_flag) +{ + struct timespec to; + int res; /* Testing a waiter without FUTEX_32 flag */ waitv[0].flags = FUTEX_PRIVATE_FLAG; if (clock_gettime(CLOCK_MONOTONIC, &to)) - error("gettime64 failed\n", errno); + ksft_exit_fail_msg("gettime64 failed\n"); to.tv_sec++; @@ -158,17 +135,22 @@ int main(int argc, char *argv[]) ksft_test_result_fail("futex_waitv private returned: %d %s\n", res ? errno : res, res ? strerror(errno) : ""); - ret = RET_FAIL; } else { ksft_test_result_pass("futex_waitv without FUTEX_32\n"); } +} + +TEST(unaligned_address) +{ + struct timespec to; + int res; /* Testing a waiter with an unaligned address */ waitv[0].flags = FUTEX_PRIVATE_FLAG | FUTEX_32; waitv[0].uaddr = 1; if (clock_gettime(CLOCK_MONOTONIC, &to)) - error("gettime64 failed\n", errno); + ksft_exit_fail_msg("gettime64 failed\n"); to.tv_sec++; @@ -177,16 +159,21 @@ int main(int argc, char *argv[]) ksft_test_result_fail("futex_wake private returned: %d %s\n", res ? errno : res, res ? strerror(errno) : ""); - ret = RET_FAIL; } else { ksft_test_result_pass("futex_waitv with an unaligned address\n"); } +} + +TEST(null_address) +{ + struct timespec to; + int res; /* Testing a NULL address for waiters.uaddr */ waitv[0].uaddr = 0x00000000; if (clock_gettime(CLOCK_MONOTONIC, &to)) - error("gettime64 failed\n", errno); + ksft_exit_fail_msg("gettime64 failed\n"); to.tv_sec++; @@ -195,14 +182,13 @@ int main(int argc, char *argv[]) ksft_test_result_fail("futex_waitv private returned: %d %s\n", res ? errno : res, res ? strerror(errno) : ""); - ret = RET_FAIL; } else { ksft_test_result_pass("futex_waitv NULL address in waitv.uaddr\n"); } /* Testing a NULL address for *waiters */ if (clock_gettime(CLOCK_MONOTONIC, &to)) - error("gettime64 failed\n", errno); + ksft_exit_fail_msg("gettime64 failed\n"); to.tv_sec++; @@ -211,14 +197,19 @@ int main(int argc, char *argv[]) ksft_test_result_fail("futex_waitv private returned: %d %s\n", res ? errno : res, res ? strerror(errno) : ""); - ret = RET_FAIL; } else { ksft_test_result_pass("futex_waitv NULL address in *waiters\n"); } +} + +TEST(invalid_clockid) +{ + struct timespec to; + int res; /* Testing an invalid clockid */ if (clock_gettime(CLOCK_MONOTONIC, &to)) - error("gettime64 failed\n", errno); + ksft_exit_fail_msg("gettime64 failed\n"); to.tv_sec++; @@ -227,11 +218,9 @@ int main(int argc, char *argv[]) ksft_test_result_fail("futex_waitv private returned: %d %s\n", res ? errno : res, res ? strerror(errno) : ""); - ret = RET_FAIL; } else { ksft_test_result_pass("futex_waitv invalid clockid\n"); } - - ksft_print_cnts(); - return ret; } + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/futex/functional/run.sh b/tools/testing/selftests/futex/functional/run.sh index 81739849f299..e88545c06d57 100755 --- a/tools/testing/selftests/futex/functional/run.sh +++ b/tools/testing/selftests/futex/functional/run.sh @@ -18,74 +18,36 @@ # ############################################################################### -# Test for a color capable console -if [ -z "$USE_COLOR" ]; then - tput setf 7 || tput setaf 7 - if [ $? -eq 0 ]; then - USE_COLOR=1 - tput sgr0 - fi -fi -if [ "$USE_COLOR" -eq 1 ]; then - COLOR="-c" -fi - - echo -# requeue pi testing -# without timeouts -./futex_requeue_pi $COLOR -./futex_requeue_pi $COLOR -b -./futex_requeue_pi $COLOR -b -l -./futex_requeue_pi $COLOR -b -o -./futex_requeue_pi $COLOR -l -./futex_requeue_pi $COLOR -o -# with timeouts -./futex_requeue_pi $COLOR -b -l -t 5000 -./futex_requeue_pi $COLOR -l -t 5000 -./futex_requeue_pi $COLOR -b -l -t 500000 -./futex_requeue_pi $COLOR -l -t 500000 -./futex_requeue_pi $COLOR -b -t 5000 -./futex_requeue_pi $COLOR -t 5000 -./futex_requeue_pi $COLOR -b -t 500000 -./futex_requeue_pi $COLOR -t 500000 -./futex_requeue_pi $COLOR -b -o -t 5000 -./futex_requeue_pi $COLOR -l -t 5000 -./futex_requeue_pi $COLOR -b -o -t 500000 -./futex_requeue_pi $COLOR -l -t 500000 -# with long timeout -./futex_requeue_pi $COLOR -b -l -t 2000000000 -./futex_requeue_pi $COLOR -l -t 2000000000 - +./futex_requeue_pi echo -./futex_requeue_pi_mismatched_ops $COLOR +./futex_requeue_pi_mismatched_ops echo -./futex_requeue_pi_signal_restart $COLOR +./futex_requeue_pi_signal_restart echo -./futex_wait_timeout $COLOR +./futex_wait_timeout echo -./futex_wait_wouldblock $COLOR +./futex_wait_wouldblock echo -./futex_wait_uninitialized_heap $COLOR -./futex_wait_private_mapped_file $COLOR +./futex_wait_uninitialized_heap +./futex_wait_private_mapped_file echo -./futex_wait $COLOR +./futex_wait echo -./futex_requeue $COLOR +./futex_requeue echo -./futex_waitv $COLOR +./futex_waitv echo -./futex_priv_hash $COLOR -./futex_priv_hash -g $COLOR +./futex_priv_hash echo -./futex_numa_mpol $COLOR +./futex_numa_mpol diff --git a/tools/testing/selftests/futex/include/futextest.h b/tools/testing/selftests/futex/include/futextest.h index 7a5fd1d5355e..3d48e9789d9f 100644 --- a/tools/testing/selftests/futex/include/futextest.h +++ b/tools/testing/selftests/futex/include/futextest.h @@ -58,6 +58,17 @@ typedef volatile u_int32_t futex_t; #define SYS_futex SYS_futex_time64 #endif +/* + * On 32bit systems if we use "-D_FILE_OFFSET_BITS=64 -D_TIME_BITS=64" or if + * we are using a newer compiler then the size of the timestamps will be 64bit, + * however, the SYS_futex will still point to the 32bit futex system call. + */ +#if __SIZEOF_POINTER__ == 4 && defined(SYS_futex_time64) && \ + defined(_TIME_BITS) && _TIME_BITS == 64 +# undef SYS_futex +# define SYS_futex SYS_futex_time64 +#endif + /** * futex() - SYS_futex syscall wrapper * @uaddr: address of first futex diff --git a/tools/testing/selftests/futex/include/logging.h b/tools/testing/selftests/futex/include/logging.h deleted file mode 100644 index 874c69ce5cce..000000000000 --- a/tools/testing/selftests/futex/include/logging.h +++ /dev/null @@ -1,148 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/****************************************************************************** - * - * Copyright © International Business Machines Corp., 2009 - * - * DESCRIPTION - * Glibc independent futex library for testing kernel functionality. - * - * AUTHOR - * Darren Hart <dvhart@linux.intel.com> - * - * HISTORY - * 2009-Nov-6: Initial version by Darren Hart <dvhart@linux.intel.com> - * - *****************************************************************************/ - -#ifndef _LOGGING_H -#define _LOGGING_H - -#include <stdio.h> -#include <string.h> -#include <unistd.h> -#include <linux/futex.h> -#include "kselftest.h" - -/* - * Define PASS, ERROR, and FAIL strings with and without color escape - * sequences, default to no color. - */ -#define ESC 0x1B, '[' -#define BRIGHT '1' -#define GREEN '3', '2' -#define YELLOW '3', '3' -#define RED '3', '1' -#define ESCEND 'm' -#define BRIGHT_GREEN ESC, BRIGHT, ';', GREEN, ESCEND -#define BRIGHT_YELLOW ESC, BRIGHT, ';', YELLOW, ESCEND -#define BRIGHT_RED ESC, BRIGHT, ';', RED, ESCEND -#define RESET_COLOR ESC, '0', 'm' -static const char PASS_COLOR[] = {BRIGHT_GREEN, ' ', 'P', 'A', 'S', 'S', - RESET_COLOR, 0}; -static const char ERROR_COLOR[] = {BRIGHT_YELLOW, 'E', 'R', 'R', 'O', 'R', - RESET_COLOR, 0}; -static const char FAIL_COLOR[] = {BRIGHT_RED, ' ', 'F', 'A', 'I', 'L', - RESET_COLOR, 0}; -static const char INFO_NORMAL[] = " INFO"; -static const char PASS_NORMAL[] = " PASS"; -static const char ERROR_NORMAL[] = "ERROR"; -static const char FAIL_NORMAL[] = " FAIL"; -const char *INFO = INFO_NORMAL; -const char *PASS = PASS_NORMAL; -const char *ERROR = ERROR_NORMAL; -const char *FAIL = FAIL_NORMAL; - -/* Verbosity setting for INFO messages */ -#define VQUIET 0 -#define VCRITICAL 1 -#define VINFO 2 -#define VMAX VINFO -int _verbose = VCRITICAL; - -/* Functional test return codes */ -#define RET_PASS 0 -#define RET_ERROR -1 -#define RET_FAIL -2 - -/** - * log_color() - Use colored output for PASS, ERROR, and FAIL strings - * @use_color: use color (1) or not (0) - */ -void log_color(int use_color) -{ - if (use_color) { - PASS = PASS_COLOR; - ERROR = ERROR_COLOR; - FAIL = FAIL_COLOR; - } else { - PASS = PASS_NORMAL; - ERROR = ERROR_NORMAL; - FAIL = FAIL_NORMAL; - } -} - -/** - * log_verbosity() - Set verbosity of test output - * @verbose: Enable (1) verbose output or not (0) - * - * Currently setting verbose=1 will enable INFO messages and 0 will disable - * them. FAIL and ERROR messages are always displayed. - */ -void log_verbosity(int level) -{ - if (level > VMAX) - level = VMAX; - else if (level < 0) - level = 0; - _verbose = level; -} - -/** - * print_result() - Print standard PASS | ERROR | FAIL results - * @ret: the return value to be considered: 0 | RET_ERROR | RET_FAIL - * - * print_result() is primarily intended for functional tests. - */ -void print_result(const char *test_name, int ret) -{ - switch (ret) { - case RET_PASS: - ksft_test_result_pass("%s\n", test_name); - ksft_print_cnts(); - return; - case RET_ERROR: - ksft_test_result_error("%s\n", test_name); - ksft_print_cnts(); - return; - case RET_FAIL: - ksft_test_result_fail("%s\n", test_name); - ksft_print_cnts(); - return; - } -} - -/* log level macros */ -#define info(message, vargs...) \ -do { \ - if (_verbose >= VINFO) \ - fprintf(stderr, "\t%s: "message, INFO, ##vargs); \ -} while (0) - -#define error(message, err, args...) \ -do { \ - if (_verbose >= VCRITICAL) {\ - if (err) \ - fprintf(stderr, "\t%s: %s: "message, \ - ERROR, strerror(err), ##args); \ - else \ - fprintf(stderr, "\t%s: "message, ERROR, ##args); \ - } \ -} while (0) - -#define fail(message, args...) \ -do { \ - if (_verbose >= VCRITICAL) \ - fprintf(stderr, "\t%s: "message, FAIL, ##args); \ -} while (0) - -#endif diff --git a/tools/testing/selftests/iommu/iommufd_fail_nth.c b/tools/testing/selftests/iommu/iommufd_fail_nth.c index 651fc9f13c08..45c14323a618 100644 --- a/tools/testing/selftests/iommu/iommufd_fail_nth.c +++ b/tools/testing/selftests/iommu/iommufd_fail_nth.c @@ -113,7 +113,7 @@ static bool fail_nth_next(struct __test_metadata *_metadata, * necessarily mean a test failure, just that the limit has to be made * bigger. */ - ASSERT_GT(400, nth_state->iteration); + ASSERT_GT(1000, nth_state->iteration); if (nth_state->iteration != 0) { ssize_t res; ssize_t res2; diff --git a/tools/testing/selftests/kexec/.gitignore b/tools/testing/selftests/kexec/.gitignore new file mode 100644 index 000000000000..5f3d9e089ae8 --- /dev/null +++ b/tools/testing/selftests/kexec/.gitignore @@ -0,0 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only +test_kexec_jump diff --git a/tools/testing/selftests/kho/init.c b/tools/testing/selftests/kho/init.c index 8034e24c6bf6..6d9e91d55d68 100644 --- a/tools/testing/selftests/kho/init.c +++ b/tools/testing/selftests/kho/init.c @@ -1,22 +1,17 @@ // SPDX-License-Identifier: GPL-2.0 -#ifndef NOLIBC -#include <errno.h> #include <stdio.h> #include <unistd.h> #include <fcntl.h> -#include <syscall.h> +#include <sys/syscall.h> #include <sys/mount.h> #include <sys/reboot.h> -#endif +#include <linux/kexec.h> /* from arch/x86/include/asm/setup.h */ #define COMMAND_LINE_SIZE 2048 -/* from include/linux/kexex.h */ -#define KEXEC_FILE_NO_INITRAMFS 0x00000004 - -#define KHO_FINILIZE "/debugfs/kho/out/finalize" +#define KHO_FINALIZE "/debugfs/kho/out/finalize" #define KERNEL_IMAGE "/kernel" static int mount_filesystems(void) @@ -32,7 +27,7 @@ static int kho_enable(void) const char enable[] = "1"; int fd; - fd = open(KHO_FINILIZE, O_RDWR); + fd = open(KHO_FINALIZE, O_RDWR); if (fd < 0) return -1; diff --git a/tools/testing/selftests/kho/vmtest.sh b/tools/testing/selftests/kho/vmtest.sh index ec70a17bd476..3f6c17166846 100755 --- a/tools/testing/selftests/kho/vmtest.sh +++ b/tools/testing/selftests/kho/vmtest.sh @@ -10,7 +10,6 @@ kernel_dir=$(realpath "$test_dir/../../../..") tmp_dir=$(mktemp -d /tmp/kho-test.XXXXXXXX) headers_dir="$tmp_dir/usr" -initrd_dir="$tmp_dir/initrd" initrd="$tmp_dir/initrd.cpio" source "$test_dir/../kselftest/ktap_helpers.sh" @@ -81,19 +80,22 @@ EOF function mkinitrd() { local kernel=$1 - mkdir -p "$initrd_dir"/{dev,debugfs,proc} - sudo mknod "$initrd_dir/dev/console" c 5 1 - - "$CROSS_COMPILE"gcc -s -static -Os -nostdinc -I"$headers_dir/include" \ - -fno-asynchronous-unwind-tables -fno-ident -nostdlib \ - -include "$test_dir/../../../include/nolibc/nolibc.h" \ - -o "$initrd_dir/init" "$test_dir/init.c" \ - - cp "$kernel" "$initrd_dir/kernel" + "$CROSS_COMPILE"gcc -s -static -Os -nostdinc -nostdlib \ + -fno-asynchronous-unwind-tables -fno-ident \ + -I "$headers_dir/include" \ + -I "$kernel_dir/tools/include/nolibc" \ + -o "$tmp_dir/init" "$test_dir/init.c" + + cat > "$tmp_dir/cpio_list" <<EOF +dir /dev 0755 0 0 +dir /proc 0755 0 0 +dir /debugfs 0755 0 0 +nod /dev/console 0600 0 0 c 5 1 +file /init $tmp_dir/init 0755 0 0 +file /kernel $kernel 0644 0 0 +EOF - pushd "$initrd_dir" &>/dev/null - find . | cpio -H newc --create > "$initrd" 2>/dev/null - popd &>/dev/null + "$build_dir/usr/gen_init_cpio" "$tmp_dir/cpio_list" > "$initrd" } function run_qemu() { diff --git a/tools/testing/selftests/kselftest.h b/tools/testing/selftests/kselftest.h index c3b6d2604b1e..afbcf8412ae5 100644 --- a/tools/testing/selftests/kselftest.h +++ b/tools/testing/selftests/kselftest.h @@ -54,6 +54,7 @@ #include <stdlib.h> #include <unistd.h> #include <stdarg.h> +#include <stdbool.h> #include <string.h> #include <stdio.h> #include <sys/utsname.h> @@ -92,6 +93,14 @@ #endif #define __printf(a, b) __attribute__((format(printf, a, b))) +#ifndef __always_unused +#define __always_unused __attribute__((__unused__)) +#endif + +#ifndef __maybe_unused +#define __maybe_unused __attribute__((__unused__)) +#endif + /* counters */ struct ksft_count { unsigned int ksft_pass; @@ -104,6 +113,7 @@ struct ksft_count { static struct ksft_count ksft_cnt; static unsigned int ksft_plan; +static bool ksft_debug_enabled; static inline unsigned int ksft_test_num(void) { @@ -175,6 +185,18 @@ static inline __printf(1, 2) void ksft_print_msg(const char *msg, ...) va_end(args); } +static inline void ksft_print_dbg_msg(const char *msg, ...) +{ + va_list args; + + if (!ksft_debug_enabled) + return; + + va_start(args, msg); + ksft_print_msg(msg, args); + va_end(args); +} + static inline void ksft_perror(const char *msg) { ksft_print_msg("%s: %s (%d)\n", msg, strerror(errno), errno); diff --git a/tools/testing/selftests/kselftest_harness.h b/tools/testing/selftests/kselftest_harness.h index 8516e8434bc4..3f66e862e83e 100644 --- a/tools/testing/selftests/kselftest_harness.h +++ b/tools/testing/selftests/kselftest_harness.h @@ -1091,7 +1091,7 @@ static int test_harness_argv_check(int argc, char **argv) { int opt; - while ((opt = getopt(argc, argv, "hlF:f:V:v:t:T:r:")) != -1) { + while ((opt = getopt(argc, argv, "dhlF:f:V:v:t:T:r:")) != -1) { switch (opt) { case 'f': case 'F': @@ -1104,12 +1104,16 @@ static int test_harness_argv_check(int argc, char **argv) case 'l': test_harness_list_tests(); return KSFT_SKIP; + case 'd': + ksft_debug_enabled = true; + break; case 'h': default: fprintf(stderr, - "Usage: %s [-h|-l] [-t|-T|-v|-V|-f|-F|-r name]\n" + "Usage: %s [-h|-l|-d] [-t|-T|-v|-V|-f|-F|-r name]\n" "\t-h print help\n" "\t-l list all tests\n" + "\t-d enable debug prints\n" "\n" "\t-t name include test\n" "\t-T name exclude test\n" @@ -1142,8 +1146,9 @@ static bool test_enabled(int argc, char **argv, int opt; optind = 1; - while ((opt = getopt(argc, argv, "F:f:V:v:t:T:r:")) != -1) { - has_positive |= islower(opt); + while ((opt = getopt(argc, argv, "dF:f:V:v:t:T:r:")) != -1) { + if (opt != 'd') + has_positive |= islower(opt); switch (tolower(opt)) { case 't': diff --git a/tools/testing/selftests/kselftest_harness/Makefile b/tools/testing/selftests/kselftest_harness/Makefile index 0617535a6ce4..d2369c01701a 100644 --- a/tools/testing/selftests/kselftest_harness/Makefile +++ b/tools/testing/selftests/kselftest_harness/Makefile @@ -2,6 +2,7 @@ TEST_GEN_PROGS_EXTENDED := harness-selftest TEST_PROGS := harness-selftest.sh +TEST_FILES := harness-selftest.expected EXTRA_CLEAN := harness-selftest.seen include ../lib.mk diff --git a/tools/testing/selftests/landlock/audit.h b/tools/testing/selftests/landlock/audit.h index b16986aa6442..02fd1393947a 100644 --- a/tools/testing/selftests/landlock/audit.h +++ b/tools/testing/selftests/landlock/audit.h @@ -20,14 +20,12 @@ #include <sys/time.h> #include <unistd.h> +#include "../kselftest.h" + #ifndef ARRAY_SIZE #define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0])) #endif -#ifndef __maybe_unused -#define __maybe_unused __attribute__((__unused__)) -#endif - #define REGEX_LANDLOCK_PREFIX "^audit([0-9.:]\\+): domain=\\([0-9a-f]\\+\\)" struct audit_filter { diff --git a/tools/testing/selftests/landlock/common.h b/tools/testing/selftests/landlock/common.h index 88a3c78f5d98..9acecae36f51 100644 --- a/tools/testing/selftests/landlock/common.h +++ b/tools/testing/selftests/landlock/common.h @@ -22,10 +22,6 @@ #define TMP_DIR "tmp" -#ifndef __maybe_unused -#define __maybe_unused __attribute__((__unused__)) -#endif - /* TEST_F_FORK() should not be used for new tests. */ #define TEST_F_FORK(fixture_name, test_name) TEST_F(fixture_name, test_name) diff --git a/tools/testing/selftests/lib.mk b/tools/testing/selftests/lib.mk index 530390033929..a448fae57831 100644 --- a/tools/testing/selftests/lib.mk +++ b/tools/testing/selftests/lib.mk @@ -228,7 +228,10 @@ $(OUTPUT)/%:%.S $(LINK.S) $^ $(LDLIBS) -o $@ endif +# Extract the expected header directory +khdr_output := $(patsubst %/usr/include,%,$(filter %/usr/include,$(KHDR_INCLUDES))) + headers: - $(Q)$(MAKE) -C $(top_srcdir) headers + $(Q)$(MAKE) -f $(top_srcdir)/Makefile -C $(khdr_output) headers .PHONY: run_tests all clean install emit_tests gen_mods_dir clean_mods_dir headers diff --git a/tools/testing/selftests/mm/.gitignore b/tools/testing/selftests/mm/.gitignore index e7b23a8a05fe..c2a8586e51a1 100644 --- a/tools/testing/selftests/mm/.gitignore +++ b/tools/testing/selftests/mm/.gitignore @@ -58,3 +58,5 @@ pkey_sighandler_tests_32 pkey_sighandler_tests_64 guard-regions merge +prctl_thp_disable +rmap diff --git a/tools/testing/selftests/mm/Makefile b/tools/testing/selftests/mm/Makefile index d13b3cef2a2b..eaf9312097f7 100644 --- a/tools/testing/selftests/mm/Makefile +++ b/tools/testing/selftests/mm/Makefile @@ -34,6 +34,7 @@ endif MAKEFLAGS += --no-builtin-rules CFLAGS = -Wall -O2 -I $(top_srcdir) $(EXTRA_CFLAGS) $(KHDR_INCLUDES) $(TOOLS_INCLUDES) +CFLAGS += -Wunreachable-code LDLIBS = -lrt -lpthread -lm # Some distributions (such as Ubuntu) configure GCC so that _FORTIFY_SOURCE is @@ -86,6 +87,7 @@ TEST_GEN_FILES += on-fault-limit TEST_GEN_FILES += pagemap_ioctl TEST_GEN_FILES += pfnmap TEST_GEN_FILES += process_madv +TEST_GEN_FILES += prctl_thp_disable TEST_GEN_FILES += thuge-gen TEST_GEN_FILES += transhuge-stress TEST_GEN_FILES += uffd-stress @@ -101,6 +103,7 @@ TEST_GEN_FILES += hugetlb_dio TEST_GEN_FILES += droppable TEST_GEN_FILES += guard-regions TEST_GEN_FILES += merge +TEST_GEN_FILES += rmap ifneq ($(ARCH),arm64) TEST_GEN_FILES += soft-dirty @@ -228,6 +231,8 @@ $(OUTPUT)/ksm_tests: LDLIBS += -lnuma $(OUTPUT)/migration: LDLIBS += -lnuma +$(OUTPUT)/rmap: LDLIBS += -lnuma + local_config.mk local_config.h: check_config.sh /bin/sh ./check_config.sh $(CC) diff --git a/tools/testing/selftests/mm/cow.c b/tools/testing/selftests/mm/cow.c index c744c603d688..6560c26f47d1 100644 --- a/tools/testing/selftests/mm/cow.c +++ b/tools/testing/selftests/mm/cow.c @@ -41,11 +41,6 @@ static size_t hugetlbsizes[10]; static int gup_fd; static bool has_huge_zeropage; -static int sz2ord(size_t size) -{ - return __builtin_ctzll(size / pagesize); -} - static int detect_thp_sizes(size_t sizes[], int max) { int count = 0; @@ -57,7 +52,7 @@ static int detect_thp_sizes(size_t sizes[], int max) if (!pmdsize) return 0; - orders = 1UL << sz2ord(pmdsize); + orders = 1UL << sz2ord(pmdsize, pagesize); orders |= thp_supported_orders(); for (i = 0; orders && count < max; i++) { @@ -1216,8 +1211,8 @@ static void run_anon_test_case(struct test_case const *test_case) size_t size = thpsizes[i]; struct thp_settings settings = *thp_current_settings(); - settings.hugepages[sz2ord(pmdsize)].enabled = THP_NEVER; - settings.hugepages[sz2ord(size)].enabled = THP_ALWAYS; + settings.hugepages[sz2ord(pmdsize, pagesize)].enabled = THP_NEVER; + settings.hugepages[sz2ord(size, pagesize)].enabled = THP_ALWAYS; thp_push_settings(&settings); if (size == pmdsize) { @@ -1868,7 +1863,7 @@ int main(int argc, char **argv) if (pmdsize) { /* Only if THP is supported. */ thp_read_settings(&default_settings); - default_settings.hugepages[sz2ord(pmdsize)].enabled = THP_INHERIT; + default_settings.hugepages[sz2ord(pmdsize, pagesize)].enabled = THP_INHERIT; thp_save_settings(); thp_push_settings(&default_settings); diff --git a/tools/testing/selftests/mm/gup_test.c b/tools/testing/selftests/mm/gup_test.c index bdeaac67ff9a..8900b840c17a 100644 --- a/tools/testing/selftests/mm/gup_test.c +++ b/tools/testing/selftests/mm/gup_test.c @@ -139,6 +139,8 @@ int main(int argc, char **argv) break; case 'n': nr_pages = atoi(optarg); + if (nr_pages < 0) + nr_pages = size / psize(); break; case 't': thp = 1; diff --git a/tools/testing/selftests/mm/hmm-tests.c b/tools/testing/selftests/mm/hmm-tests.c index 141bf63cbe05..15aadaf24a66 100644 --- a/tools/testing/selftests/mm/hmm-tests.c +++ b/tools/testing/selftests/mm/hmm-tests.c @@ -2027,11 +2027,10 @@ TEST_F(hmm, hmm_cow_in_device) if (pid == -1) ASSERT_EQ(pid, 0); if (!pid) { - /* Child process waitd for SIGTERM from the parent. */ + /* Child process waits for SIGTERM from the parent. */ while (1) { } - perror("Should not reach this\n"); - exit(0); + /* Should not reach this */ } /* Parent process writes to COW pages(s) and gets a * new copy in system. In case of device private pages, diff --git a/tools/testing/selftests/mm/hugepage-mremap.c b/tools/testing/selftests/mm/hugepage-mremap.c index c463d1c09c9b..2bd1dac75c3f 100644 --- a/tools/testing/selftests/mm/hugepage-mremap.c +++ b/tools/testing/selftests/mm/hugepage-mremap.c @@ -65,10 +65,20 @@ static void register_region_with_uffd(char *addr, size_t len) struct uffdio_api uffdio_api; /* Create and enable userfaultfd object. */ - uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK); - if (uffd == -1) - ksft_exit_fail_msg("userfaultfd: %s\n", strerror(errno)); + if (uffd == -1) { + switch (errno) { + case EPERM: + ksft_exit_skip("Insufficient permissions, try running as root.\n"); + break; + case ENOSYS: + ksft_exit_skip("userfaultfd is not supported/not enabled.\n"); + break; + default: + ksft_exit_fail_msg("userfaultfd failed with %s\n", strerror(errno)); + break; + } + } uffdio_api.api = UFFD_API; uffdio_api.features = 0; diff --git a/tools/testing/selftests/mm/khugepaged.c b/tools/testing/selftests/mm/khugepaged.c index a18c50d51141..3fe7ef04ac62 100644 --- a/tools/testing/selftests/mm/khugepaged.c +++ b/tools/testing/selftests/mm/khugepaged.c @@ -394,7 +394,7 @@ static void *file_setup_area(int nr_hpages) perror("open()"); exit(EXIT_FAILURE); } - p = mmap(BASE_ADDR, size, PROT_READ | PROT_EXEC, + p = mmap(BASE_ADDR, size, PROT_READ, MAP_PRIVATE, finfo.fd, 0); if (p == MAP_FAILED || p != BASE_ADDR) { perror("mmap()"); diff --git a/tools/testing/selftests/mm/ksm_functional_tests.c b/tools/testing/selftests/mm/ksm_functional_tests.c index d8bd1911dfc0..ac136f04b8d6 100644 --- a/tools/testing/selftests/mm/ksm_functional_tests.c +++ b/tools/testing/selftests/mm/ksm_functional_tests.c @@ -38,14 +38,11 @@ enum ksm_merge_mode { }; static int mem_fd; -static int ksm_fd; -static int ksm_full_scans_fd; -static int proc_self_ksm_stat_fd; -static int proc_self_ksm_merging_pages_fd; -static int ksm_use_zero_pages_fd; static int pagemap_fd; static size_t pagesize; +static void init_global_file_handles(void); + static bool range_maps_duplicates(char *addr, unsigned long size) { unsigned long offs_a, offs_b, pfn_a, pfn_b; @@ -73,88 +70,6 @@ static bool range_maps_duplicates(char *addr, unsigned long size) return false; } -static long get_my_ksm_zero_pages(void) -{ - char buf[200]; - char *substr_ksm_zero; - size_t value_pos; - ssize_t read_size; - unsigned long my_ksm_zero_pages; - - if (!proc_self_ksm_stat_fd) - return 0; - - read_size = pread(proc_self_ksm_stat_fd, buf, sizeof(buf) - 1, 0); - if (read_size < 0) - return -errno; - - buf[read_size] = 0; - - substr_ksm_zero = strstr(buf, "ksm_zero_pages"); - if (!substr_ksm_zero) - return 0; - - value_pos = strcspn(substr_ksm_zero, "0123456789"); - my_ksm_zero_pages = strtol(substr_ksm_zero + value_pos, NULL, 10); - - return my_ksm_zero_pages; -} - -static long get_my_merging_pages(void) -{ - char buf[10]; - ssize_t ret; - - if (proc_self_ksm_merging_pages_fd < 0) - return proc_self_ksm_merging_pages_fd; - - ret = pread(proc_self_ksm_merging_pages_fd, buf, sizeof(buf) - 1, 0); - if (ret <= 0) - return -errno; - buf[ret] = 0; - - return strtol(buf, NULL, 10); -} - -static long ksm_get_full_scans(void) -{ - char buf[10]; - ssize_t ret; - - ret = pread(ksm_full_scans_fd, buf, sizeof(buf) - 1, 0); - if (ret <= 0) - return -errno; - buf[ret] = 0; - - return strtol(buf, NULL, 10); -} - -static int ksm_merge(void) -{ - long start_scans, end_scans; - - /* Wait for two full scans such that any possible merging happened. */ - start_scans = ksm_get_full_scans(); - if (start_scans < 0) - return start_scans; - if (write(ksm_fd, "1", 1) != 1) - return -errno; - do { - end_scans = ksm_get_full_scans(); - if (end_scans < 0) - return end_scans; - } while (end_scans < start_scans + 2); - - return 0; -} - -static int ksm_unmerge(void) -{ - if (write(ksm_fd, "2", 1) != 1) - return -errno; - return 0; -} - static char *__mmap_and_merge_range(char val, unsigned long size, int prot, enum ksm_merge_mode mode) { @@ -163,12 +78,12 @@ static char *__mmap_and_merge_range(char val, unsigned long size, int prot, int ret; /* Stabilize accounting by disabling KSM completely. */ - if (ksm_unmerge()) { + if (ksm_stop() < 0) { ksft_print_msg("Disabling (unmerging) KSM failed\n"); return err_map; } - if (get_my_merging_pages() > 0) { + if (ksm_get_self_merging_pages() > 0) { ksft_print_msg("Still pages merged\n"); return err_map; } @@ -218,7 +133,7 @@ static char *__mmap_and_merge_range(char val, unsigned long size, int prot, } /* Run KSM to trigger merging and wait. */ - if (ksm_merge()) { + if (ksm_start() < 0) { ksft_print_msg("Running KSM failed\n"); goto unmap; } @@ -227,7 +142,7 @@ static char *__mmap_and_merge_range(char val, unsigned long size, int prot, * Check if anything was merged at all. Ignore the zero page that is * accounted differently (depending on kernel support). */ - if (val && !get_my_merging_pages()) { + if (val && !ksm_get_self_merging_pages()) { ksft_print_msg("No pages got merged\n"); goto unmap; } @@ -274,6 +189,7 @@ static void test_unmerge(void) ksft_test_result(!range_maps_duplicates(map, size), "Pages were unmerged\n"); unmap: + ksm_stop(); munmap(map, size); } @@ -286,15 +202,12 @@ static void test_unmerge_zero_pages(void) ksft_print_msg("[RUN] %s\n", __func__); - if (proc_self_ksm_stat_fd < 0) { - ksft_test_result_skip("open(\"/proc/self/ksm_stat\") failed\n"); - return; - } - if (ksm_use_zero_pages_fd < 0) { - ksft_test_result_skip("open \"/sys/kernel/mm/ksm/use_zero_pages\" failed\n"); + if (ksm_get_self_zero_pages() < 0) { + ksft_test_result_skip("accessing \"/proc/self/ksm_stat\" failed\n"); return; } - if (write(ksm_use_zero_pages_fd, "1", 1) != 1) { + + if (ksm_use_zero_pages() < 0) { ksft_test_result_skip("write \"/sys/kernel/mm/ksm/use_zero_pages\" failed\n"); return; } @@ -306,7 +219,7 @@ static void test_unmerge_zero_pages(void) /* Check if ksm_zero_pages is updated correctly after KSM merging */ pages_expected = size / pagesize; - if (pages_expected != get_my_ksm_zero_pages()) { + if (pages_expected != ksm_get_self_zero_pages()) { ksft_test_result_fail("'ksm_zero_pages' updated after merging\n"); goto unmap; } @@ -319,7 +232,7 @@ static void test_unmerge_zero_pages(void) /* Check if ksm_zero_pages is updated correctly after unmerging */ pages_expected /= 2; - if (pages_expected != get_my_ksm_zero_pages()) { + if (pages_expected != ksm_get_self_zero_pages()) { ksft_test_result_fail("'ksm_zero_pages' updated after unmerging\n"); goto unmap; } @@ -329,7 +242,7 @@ static void test_unmerge_zero_pages(void) *((unsigned int *)&map[offs]) = offs; /* Now we should have no zeropages remaining. */ - if (get_my_ksm_zero_pages()) { + if (ksm_get_self_zero_pages()) { ksft_test_result_fail("'ksm_zero_pages' updated after write fault\n"); goto unmap; } @@ -338,6 +251,7 @@ static void test_unmerge_zero_pages(void) ksft_test_result(!range_maps_duplicates(map, size), "KSM zero pages were unmerged\n"); unmap: + ksm_stop(); munmap(map, size); } @@ -366,6 +280,7 @@ static void test_unmerge_discarded(void) ksft_test_result(!range_maps_duplicates(map, size), "Pages were unmerged\n"); unmap: + ksm_stop(); munmap(map, size); } @@ -452,6 +367,7 @@ static void test_unmerge_uffd_wp(void) close_uffd: close(uffd); unmap: + ksm_stop(); munmap(map, size); } #endif @@ -506,27 +422,30 @@ static int test_child_ksm(void) /* Test if KSM is enabled for the process. */ if (prctl(PR_GET_MEMORY_MERGE, 0, 0, 0, 0) != 1) - return -1; + return 1; /* Test if merge could really happen. */ map = __mmap_and_merge_range(0xcf, size, PROT_READ | PROT_WRITE, KSM_MERGE_NONE); if (map == MAP_MERGE_FAIL) - return -2; + return 2; else if (map == MAP_MERGE_SKIP) - return -3; + return 3; + ksm_stop(); munmap(map, size); return 0; } static void test_child_ksm_err(int status) { - if (status == -1) + if (status == 1) ksft_test_result_fail("unexpected PR_GET_MEMORY_MERGE result in child\n"); - else if (status == -2) + else if (status == 2) ksft_test_result_fail("Merge in child failed\n"); - else if (status == -3) + else if (status == 3) ksft_test_result_skip("Merge in child skipped\n"); + else if (status == 4) + ksft_test_result_fail("Binary not found\n"); } /* Verify that prctl ksm flag is inherited. */ @@ -548,6 +467,7 @@ static void test_prctl_fork(void) child_pid = fork(); if (!child_pid) { + init_global_file_handles(); exit(test_child_ksm()); } else if (child_pid < 0) { ksft_test_result_fail("fork() failed\n"); @@ -595,10 +515,10 @@ static void test_prctl_fork_exec(void) return; } else if (child_pid == 0) { char *prg_name = "./ksm_functional_tests"; - char *argv_for_program[] = { prg_name, FORK_EXEC_CHILD_PRG_NAME }; + char *argv_for_program[] = { prg_name, FORK_EXEC_CHILD_PRG_NAME, NULL }; execv(prg_name, argv_for_program); - return; + exit(4); } if (waitpid(child_pid, &status, 0) > 0) { @@ -644,6 +564,7 @@ static void test_prctl_unmerge(void) ksft_test_result(!range_maps_duplicates(map, size), "Pages were unmerged\n"); unmap: + ksm_stop(); munmap(map, size); } @@ -677,6 +598,47 @@ static void test_prot_none(void) ksft_test_result(!range_maps_duplicates(map, size), "Pages were unmerged\n"); unmap: + ksm_stop(); + munmap(map, size); +} + +static void test_fork_ksm_merging_page_count(void) +{ + const unsigned int size = 2 * MiB; + char *map; + pid_t child_pid; + int status; + + ksft_print_msg("[RUN] %s\n", __func__); + + map = mmap_and_merge_range(0xcf, size, PROT_READ | PROT_WRITE, KSM_MERGE_MADVISE); + if (map == MAP_FAILED) + return; + + child_pid = fork(); + if (!child_pid) { + init_global_file_handles(); + exit(ksm_get_self_merging_pages()); + } else if (child_pid < 0) { + ksft_test_result_fail("fork() failed\n"); + goto unmap; + } + + if (waitpid(child_pid, &status, 0) < 0) { + ksft_test_result_fail("waitpid() failed\n"); + goto unmap; + } + + status = WEXITSTATUS(status); + if (status) { + ksft_test_result_fail("ksm_merging_page in child: %d\n", status); + goto unmap; + } + + ksft_test_result_pass("ksm_merging_pages is not inherited after fork\n"); + +unmap: + ksm_stop(); munmap(map, size); } @@ -685,24 +647,20 @@ static void init_global_file_handles(void) mem_fd = open("/proc/self/mem", O_RDWR); if (mem_fd < 0) ksft_exit_fail_msg("opening /proc/self/mem failed\n"); - ksm_fd = open("/sys/kernel/mm/ksm/run", O_RDWR); - if (ksm_fd < 0) - ksft_exit_skip("open(\"/sys/kernel/mm/ksm/run\") failed\n"); - ksm_full_scans_fd = open("/sys/kernel/mm/ksm/full_scans", O_RDONLY); - if (ksm_full_scans_fd < 0) - ksft_exit_skip("open(\"/sys/kernel/mm/ksm/full_scans\") failed\n"); + if (ksm_stop() < 0) + ksft_exit_skip("accessing \"/sys/kernel/mm/ksm/run\") failed\n"); + if (ksm_get_full_scans() < 0) + ksft_exit_skip("accessing \"/sys/kernel/mm/ksm/full_scans\") failed\n"); pagemap_fd = open("/proc/self/pagemap", O_RDONLY); if (pagemap_fd < 0) ksft_exit_skip("open(\"/proc/self/pagemap\") failed\n"); - proc_self_ksm_stat_fd = open("/proc/self/ksm_stat", O_RDONLY); - proc_self_ksm_merging_pages_fd = open("/proc/self/ksm_merging_pages", - O_RDONLY); - ksm_use_zero_pages_fd = open("/sys/kernel/mm/ksm/use_zero_pages", O_RDWR); + if (ksm_get_self_merging_pages() < 0) + ksft_exit_skip("accessing \"/proc/self/ksm_merging_pages\") failed\n"); } int main(int argc, char **argv) { - unsigned int tests = 8; + unsigned int tests = 9; int err; if (argc > 1 && !strcmp(argv[1], FORK_EXEC_CHILD_PRG_NAME)) { @@ -734,6 +692,7 @@ int main(int argc, char **argv) test_prctl_fork(); test_prctl_fork_exec(); test_prctl_unmerge(); + test_fork_ksm_merging_page_count(); err = ksft_get_fail_cnt(); if (err) diff --git a/tools/testing/selftests/mm/madv_populate.c b/tools/testing/selftests/mm/madv_populate.c index b6fabd5c27ed..d8d11bc67ddc 100644 --- a/tools/testing/selftests/mm/madv_populate.c +++ b/tools/testing/selftests/mm/madv_populate.c @@ -264,23 +264,6 @@ static void test_softdirty(void) munmap(addr, SIZE); } -static int system_has_softdirty(void) -{ - /* - * There is no way to check if the kernel supports soft-dirty, other - * than by writing to a page and seeing if the bit was set. But the - * tests are intended to check that the bit gets set when it should, so - * doing that check would turn a potentially legitimate fail into a - * skip. Fortunately, we know for sure that arm64 does not support - * soft-dirty. So for now, let's just use the arch as a corse guide. - */ -#if defined(__aarch64__) - return 0; -#else - return 1; -#endif -} - int main(int argc, char **argv) { int nr_tests = 16; @@ -288,7 +271,7 @@ int main(int argc, char **argv) pagesize = getpagesize(); - if (system_has_softdirty()) + if (softdirty_supported()) nr_tests += 5; ksft_print_header(); @@ -300,7 +283,7 @@ int main(int argc, char **argv) test_holes(); test_populate_read(); test_populate_write(); - if (system_has_softdirty()) + if (softdirty_supported()) test_softdirty(); err = ksft_get_fail_cnt(); diff --git a/tools/testing/selftests/mm/mremap_test.c b/tools/testing/selftests/mm/mremap_test.c index 5bd52a951cbd..bf2863b102e3 100644 --- a/tools/testing/selftests/mm/mremap_test.c +++ b/tools/testing/selftests/mm/mremap_test.c @@ -846,7 +846,7 @@ static void mremap_move_multi_invalid_vmas(FILE *maps_fp, } if (err != EFAULT) { errno = err; - perror("mrmeap() unexpected error"); + perror("mremap() unexpected error"); success = false; goto out_unmap; } @@ -899,7 +899,7 @@ static void mremap_move_multi_invalid_vmas(FILE *maps_fp, } if (err != EFAULT) { errno = err; - perror("mrmeap() unexpected error"); + perror("mremap() unexpected error"); success = false; goto out_unmap; } @@ -948,7 +948,7 @@ static void mremap_move_multi_invalid_vmas(FILE *maps_fp, } if (err != EFAULT) { errno = err; - perror("mrmeap() unexpected error"); + perror("mremap() unexpected error"); success = false; goto out_unmap; } diff --git a/tools/testing/selftests/mm/pagemap_ioctl.c b/tools/testing/selftests/mm/pagemap_ioctl.c index e6face7c0166..4fc8e578ec7c 100644 --- a/tools/testing/selftests/mm/pagemap_ioctl.c +++ b/tools/testing/selftests/mm/pagemap_ioctl.c @@ -209,7 +209,7 @@ int userfaultfd_tests(void) wp_addr_range(mem, mem_size); vec_size = mem_size/page_size; - vec = malloc(sizeof(struct page_region) * vec_size); + vec = calloc(vec_size, sizeof(struct page_region)); written = pagemap_ioctl(mem, mem_size, vec, 1, PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC, vec_size - 2, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN); @@ -247,11 +247,11 @@ int sanity_tests_sd(void) vec_size = num_pages/2; mem_size = num_pages * page_size; - vec = malloc(sizeof(struct page_region) * vec_size); + vec = calloc(vec_size, sizeof(struct page_region)); if (!vec) ksft_exit_fail_msg("error nomem\n"); - vec2 = malloc(sizeof(struct page_region) * vec_size); + vec2 = calloc(vec_size, sizeof(struct page_region)); if (!vec2) ksft_exit_fail_msg("error nomem\n"); @@ -436,7 +436,7 @@ int sanity_tests_sd(void) mem_size = 1050 * page_size; vec_size = mem_size/(page_size*2); - vec = malloc(sizeof(struct page_region) * vec_size); + vec = calloc(vec_size, sizeof(struct page_region)); if (!vec) ksft_exit_fail_msg("error nomem\n"); @@ -491,7 +491,7 @@ int sanity_tests_sd(void) mem_size = 10000 * page_size; vec_size = 50; - vec = malloc(sizeof(struct page_region) * vec_size); + vec = calloc(vec_size, sizeof(struct page_region)); if (!vec) ksft_exit_fail_msg("error nomem\n"); @@ -541,7 +541,7 @@ int sanity_tests_sd(void) vec_size = 1000; mem_size = vec_size * page_size; - vec = malloc(sizeof(struct page_region) * vec_size); + vec = calloc(vec_size, sizeof(struct page_region)); if (!vec) ksft_exit_fail_msg("error nomem\n"); @@ -695,8 +695,8 @@ int base_tests(char *prefix, char *mem, unsigned long long mem_size, int skip) } vec_size = mem_size/page_size; - vec = malloc(sizeof(struct page_region) * vec_size); - vec2 = malloc(sizeof(struct page_region) * vec_size); + vec = calloc(vec_size, sizeof(struct page_region)); + vec2 = calloc(vec_size, sizeof(struct page_region)); /* 1. all new pages must be not be written (dirty) */ written = pagemap_ioctl(mem, mem_size, vec, 1, PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC, @@ -807,8 +807,8 @@ int hpage_unit_tests(void) unsigned long long vec_size = map_size/page_size; struct page_region *vec, *vec2; - vec = malloc(sizeof(struct page_region) * vec_size); - vec2 = malloc(sizeof(struct page_region) * vec_size); + vec = calloc(vec_size, sizeof(struct page_region)); + vec2 = calloc(vec_size, sizeof(struct page_region)); if (!vec || !vec2) ksft_exit_fail_msg("malloc failed\n"); @@ -997,7 +997,7 @@ int unmapped_region_tests(void) void *start = (void *)0x10000000; int written, len = 0x00040000; long vec_size = len / page_size; - struct page_region *vec = malloc(sizeof(struct page_region) * vec_size); + struct page_region *vec = calloc(vec_size, sizeof(struct page_region)); /* 1. Get written pages */ written = pagemap_ioctl(start, len, vec, vec_size, 0, 0, @@ -1062,7 +1062,7 @@ int sanity_tests(void) mem_size = 10 * page_size; vec_size = mem_size / page_size; - vec = malloc(sizeof(struct page_region) * vec_size); + vec = calloc(vec_size, sizeof(struct page_region)); mem = mmap(NULL, mem_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0); if (mem == MAP_FAILED || vec == MAP_FAILED) ksft_exit_fail_msg("error nomem\n"); diff --git a/tools/testing/selftests/mm/pfnmap.c b/tools/testing/selftests/mm/pfnmap.c index 866ac023baf5..88659f0a90ea 100644 --- a/tools/testing/selftests/mm/pfnmap.c +++ b/tools/testing/selftests/mm/pfnmap.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * Basic VM_PFNMAP tests relying on mmap() of '/dev/mem' + * Basic VM_PFNMAP tests relying on mmap() of input file provided. + * Use '/dev/mem' as default. * * Copyright 2025, Red Hat, Inc. * @@ -25,6 +26,7 @@ #include "vm_util.h" static sigjmp_buf sigjmp_buf_env; +static char *file = "/dev/mem"; static void signal_handler(int sig) { @@ -51,7 +53,7 @@ static int test_read_access(char *addr, size_t size, size_t pagesize) return ret; } -static int find_ram_target(off_t *phys_addr, +static int find_ram_target(off_t *offset, unsigned long long pagesize) { unsigned long long start, end; @@ -91,7 +93,7 @@ static int find_ram_target(off_t *phys_addr, /* We need two pages. */ if (end > start + 2 * pagesize) { fclose(file); - *phys_addr = start; + *offset = start; return 0; } } @@ -100,7 +102,7 @@ static int find_ram_target(off_t *phys_addr, FIXTURE(pfnmap) { - off_t phys_addr; + off_t offset; size_t pagesize; int dev_mem_fd; char *addr1; @@ -113,23 +115,31 @@ FIXTURE_SETUP(pfnmap) { self->pagesize = getpagesize(); - /* We'll require two physical pages throughout our tests ... */ - if (find_ram_target(&self->phys_addr, self->pagesize)) - SKIP(return, "Cannot find ram target in '/proc/iomem'\n"); + if (strncmp(file, "/dev/mem", strlen("/dev/mem")) == 0) { + /* We'll require two physical pages throughout our tests ... */ + if (find_ram_target(&self->offset, self->pagesize)) + SKIP(return, + "Cannot find ram target in '/proc/iomem'\n"); + } else { + self->offset = 0; + } - self->dev_mem_fd = open("/dev/mem", O_RDONLY); + self->dev_mem_fd = open(file, O_RDONLY); if (self->dev_mem_fd < 0) - SKIP(return, "Cannot open '/dev/mem'\n"); + SKIP(return, "Cannot open '%s'\n", file); self->size1 = self->pagesize * 2; self->addr1 = mmap(NULL, self->size1, PROT_READ, MAP_SHARED, - self->dev_mem_fd, self->phys_addr); + self->dev_mem_fd, self->offset); if (self->addr1 == MAP_FAILED) - SKIP(return, "Cannot mmap '/dev/mem'\n"); + SKIP(return, "Cannot mmap '%s'\n", file); + + if (!check_vmflag_pfnmap(self->addr1)) + SKIP(return, "Invalid file: '%s'. Not pfnmap'ed\n", file); /* ... and want to be able to read from them. */ if (test_read_access(self->addr1, self->size1, self->pagesize)) - SKIP(return, "Cannot read-access mmap'ed '/dev/mem'\n"); + SKIP(return, "Cannot read-access mmap'ed '%s'\n", file); self->size2 = 0; self->addr2 = MAP_FAILED; @@ -182,7 +192,7 @@ TEST_F(pfnmap, munmap_split) */ self->size2 = self->pagesize; self->addr2 = mmap(NULL, self->pagesize, PROT_READ, MAP_SHARED, - self->dev_mem_fd, self->phys_addr); + self->dev_mem_fd, self->offset); ASSERT_NE(self->addr2, MAP_FAILED); } @@ -246,4 +256,14 @@ TEST_F(pfnmap, fork) ASSERT_EQ(ret, 0); } -TEST_HARNESS_MAIN +int main(int argc, char **argv) +{ + for (int i = 1; i < argc; i++) { + if (strcmp(argv[i], "--") == 0) { + if (i + 1 < argc && strlen(argv[i + 1]) > 0) + file = argv[i + 1]; + return test_harness_run(i, argv); + } + } + return test_harness_run(argc, argv); +} diff --git a/tools/testing/selftests/mm/pkey-helpers.h b/tools/testing/selftests/mm/pkey-helpers.h index ea404f80e6cb..fa15f006fa68 100644 --- a/tools/testing/selftests/mm/pkey-helpers.h +++ b/tools/testing/selftests/mm/pkey-helpers.h @@ -84,9 +84,6 @@ extern void abort_hooks(void); #ifndef noinline # define noinline __attribute__((noinline)) #endif -#ifndef __maybe_unused -# define __maybe_unused __attribute__((__unused__)) -#endif int sys_pkey_alloc(unsigned long flags, unsigned long init_val); int sys_pkey_free(unsigned long pkey); diff --git a/tools/testing/selftests/mm/pkey_sighandler_tests.c b/tools/testing/selftests/mm/pkey_sighandler_tests.c index b5e076a564c9..302fef54049c 100644 --- a/tools/testing/selftests/mm/pkey_sighandler_tests.c +++ b/tools/testing/selftests/mm/pkey_sighandler_tests.c @@ -41,7 +41,7 @@ static siginfo_t siginfo = {0}; * syscall will attempt to access the PLT in order to call a library function * which is protected by MPK 0 which we don't have access to. */ -static inline __always_inline +static __always_inline long syscall_raw(long n, long a1, long a2, long a3, long a4, long a5, long a6) { unsigned long ret; diff --git a/tools/testing/selftests/mm/prctl_thp_disable.c b/tools/testing/selftests/mm/prctl_thp_disable.c new file mode 100644 index 000000000000..84b4a4b345af --- /dev/null +++ b/tools/testing/selftests/mm/prctl_thp_disable.c @@ -0,0 +1,291 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Basic tests for PR_GET/SET_THP_DISABLE prctl calls + * + * Author(s): Usama Arif <usamaarif642@gmail.com> + */ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <sys/mman.h> +#include <linux/mman.h> +#include <sys/prctl.h> +#include <sys/wait.h> + +#include "../kselftest_harness.h" +#include "thp_settings.h" +#include "vm_util.h" + +#ifndef PR_THP_DISABLE_EXCEPT_ADVISED +#define PR_THP_DISABLE_EXCEPT_ADVISED (1 << 1) +#endif + +enum thp_collapse_type { + THP_COLLAPSE_NONE, + THP_COLLAPSE_MADV_NOHUGEPAGE, + THP_COLLAPSE_MADV_HUGEPAGE, /* MADV_HUGEPAGE before access */ + THP_COLLAPSE_MADV_COLLAPSE, /* MADV_COLLAPSE after access */ +}; + +/* + * Function to mmap a buffer, fault it in, madvise it appropriately (before + * page fault for MADV_HUGE, and after for MADV_COLLAPSE), and check if the + * mmap region is huge. + * Returns: + * 0 if test doesn't give hugepage + * 1 if test gives a hugepage + * -errno if mmap fails + */ +static int test_mmap_thp(enum thp_collapse_type madvise_buf, size_t pmdsize) +{ + char *mem, *mmap_mem; + size_t mmap_size; + int ret; + + /* For alignment purposes, we need twice the THP size. */ + mmap_size = 2 * pmdsize; + mmap_mem = (char *)mmap(NULL, mmap_size, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if (mmap_mem == MAP_FAILED) + return -errno; + + /* We need a THP-aligned memory area. */ + mem = (char *)(((uintptr_t)mmap_mem + pmdsize) & ~(pmdsize - 1)); + + if (madvise_buf == THP_COLLAPSE_MADV_HUGEPAGE) + madvise(mem, pmdsize, MADV_HUGEPAGE); + else if (madvise_buf == THP_COLLAPSE_MADV_NOHUGEPAGE) + madvise(mem, pmdsize, MADV_NOHUGEPAGE); + + /* Ensure memory is allocated */ + memset(mem, 1, pmdsize); + + if (madvise_buf == THP_COLLAPSE_MADV_COLLAPSE) + madvise(mem, pmdsize, MADV_COLLAPSE); + + /* HACK: make sure we have a separate VMA that we can check reliably. */ + mprotect(mem, pmdsize, PROT_READ); + + ret = check_huge_anon(mem, 1, pmdsize); + munmap(mmap_mem, mmap_size); + return ret; +} + +static void prctl_thp_disable_completely_test(struct __test_metadata *const _metadata, + size_t pmdsize, + enum thp_enabled thp_policy) +{ + ASSERT_EQ(prctl(PR_GET_THP_DISABLE, NULL, NULL, NULL, NULL), 1); + + /* tests after prctl overrides global policy */ + ASSERT_EQ(test_mmap_thp(THP_COLLAPSE_NONE, pmdsize), 0); + + ASSERT_EQ(test_mmap_thp(THP_COLLAPSE_MADV_NOHUGEPAGE, pmdsize), 0); + + ASSERT_EQ(test_mmap_thp(THP_COLLAPSE_MADV_HUGEPAGE, pmdsize), 0); + + ASSERT_EQ(test_mmap_thp(THP_COLLAPSE_MADV_COLLAPSE, pmdsize), 0); + + /* Reset to global policy */ + ASSERT_EQ(prctl(PR_SET_THP_DISABLE, 0, NULL, NULL, NULL), 0); + + /* tests after prctl is cleared, and only global policy is effective */ + ASSERT_EQ(test_mmap_thp(THP_COLLAPSE_NONE, pmdsize), + thp_policy == THP_ALWAYS ? 1 : 0); + + ASSERT_EQ(test_mmap_thp(THP_COLLAPSE_MADV_NOHUGEPAGE, pmdsize), 0); + + ASSERT_EQ(test_mmap_thp(THP_COLLAPSE_MADV_HUGEPAGE, pmdsize), + thp_policy == THP_NEVER ? 0 : 1); + + ASSERT_EQ(test_mmap_thp(THP_COLLAPSE_MADV_COLLAPSE, pmdsize), 1); +} + +FIXTURE(prctl_thp_disable_completely) +{ + struct thp_settings settings; + size_t pmdsize; +}; + +FIXTURE_VARIANT(prctl_thp_disable_completely) +{ + enum thp_enabled thp_policy; +}; + +FIXTURE_VARIANT_ADD(prctl_thp_disable_completely, never) +{ + .thp_policy = THP_NEVER, +}; + +FIXTURE_VARIANT_ADD(prctl_thp_disable_completely, madvise) +{ + .thp_policy = THP_MADVISE, +}; + +FIXTURE_VARIANT_ADD(prctl_thp_disable_completely, always) +{ + .thp_policy = THP_ALWAYS, +}; + +FIXTURE_SETUP(prctl_thp_disable_completely) +{ + if (!thp_available()) + SKIP(return, "Transparent Hugepages not available\n"); + + self->pmdsize = read_pmd_pagesize(); + if (!self->pmdsize) + SKIP(return, "Unable to read PMD size\n"); + + if (prctl(PR_SET_THP_DISABLE, 1, NULL, NULL, NULL)) + SKIP(return, "Unable to disable THPs completely for the process\n"); + + thp_save_settings(); + thp_read_settings(&self->settings); + self->settings.thp_enabled = variant->thp_policy; + self->settings.hugepages[sz2ord(self->pmdsize, getpagesize())].enabled = THP_INHERIT; + thp_write_settings(&self->settings); +} + +FIXTURE_TEARDOWN(prctl_thp_disable_completely) +{ + thp_restore_settings(); +} + +TEST_F(prctl_thp_disable_completely, nofork) +{ + prctl_thp_disable_completely_test(_metadata, self->pmdsize, variant->thp_policy); +} + +TEST_F(prctl_thp_disable_completely, fork) +{ + int ret = 0; + pid_t pid; + + /* Make sure prctl changes are carried across fork */ + pid = fork(); + ASSERT_GE(pid, 0); + + if (!pid) { + prctl_thp_disable_completely_test(_metadata, self->pmdsize, variant->thp_policy); + return; + } + + wait(&ret); + if (WIFEXITED(ret)) + ret = WEXITSTATUS(ret); + else + ret = -EINVAL; + ASSERT_EQ(ret, 0); +} + +static void prctl_thp_disable_except_madvise_test(struct __test_metadata *const _metadata, + size_t pmdsize, + enum thp_enabled thp_policy) +{ + ASSERT_EQ(prctl(PR_GET_THP_DISABLE, NULL, NULL, NULL, NULL), 3); + + /* tests after prctl overrides global policy */ + ASSERT_EQ(test_mmap_thp(THP_COLLAPSE_NONE, pmdsize), 0); + + ASSERT_EQ(test_mmap_thp(THP_COLLAPSE_MADV_NOHUGEPAGE, pmdsize), 0); + + ASSERT_EQ(test_mmap_thp(THP_COLLAPSE_MADV_HUGEPAGE, pmdsize), + thp_policy == THP_NEVER ? 0 : 1); + + ASSERT_EQ(test_mmap_thp(THP_COLLAPSE_MADV_COLLAPSE, pmdsize), 1); + + /* Reset to global policy */ + ASSERT_EQ(prctl(PR_SET_THP_DISABLE, 0, NULL, NULL, NULL), 0); + + /* tests after prctl is cleared, and only global policy is effective */ + ASSERT_EQ(test_mmap_thp(THP_COLLAPSE_NONE, pmdsize), + thp_policy == THP_ALWAYS ? 1 : 0); + + ASSERT_EQ(test_mmap_thp(THP_COLLAPSE_MADV_NOHUGEPAGE, pmdsize), 0); + + ASSERT_EQ(test_mmap_thp(THP_COLLAPSE_MADV_HUGEPAGE, pmdsize), + thp_policy == THP_NEVER ? 0 : 1); + + ASSERT_EQ(test_mmap_thp(THP_COLLAPSE_MADV_COLLAPSE, pmdsize), 1); +} + +FIXTURE(prctl_thp_disable_except_madvise) +{ + struct thp_settings settings; + size_t pmdsize; +}; + +FIXTURE_VARIANT(prctl_thp_disable_except_madvise) +{ + enum thp_enabled thp_policy; +}; + +FIXTURE_VARIANT_ADD(prctl_thp_disable_except_madvise, never) +{ + .thp_policy = THP_NEVER, +}; + +FIXTURE_VARIANT_ADD(prctl_thp_disable_except_madvise, madvise) +{ + .thp_policy = THP_MADVISE, +}; + +FIXTURE_VARIANT_ADD(prctl_thp_disable_except_madvise, always) +{ + .thp_policy = THP_ALWAYS, +}; + +FIXTURE_SETUP(prctl_thp_disable_except_madvise) +{ + if (!thp_available()) + SKIP(return, "Transparent Hugepages not available\n"); + + self->pmdsize = read_pmd_pagesize(); + if (!self->pmdsize) + SKIP(return, "Unable to read PMD size\n"); + + if (prctl(PR_SET_THP_DISABLE, 1, PR_THP_DISABLE_EXCEPT_ADVISED, NULL, NULL)) + SKIP(return, "Unable to set PR_THP_DISABLE_EXCEPT_ADVISED\n"); + + thp_save_settings(); + thp_read_settings(&self->settings); + self->settings.thp_enabled = variant->thp_policy; + self->settings.hugepages[sz2ord(self->pmdsize, getpagesize())].enabled = THP_INHERIT; + thp_write_settings(&self->settings); +} + +FIXTURE_TEARDOWN(prctl_thp_disable_except_madvise) +{ + thp_restore_settings(); +} + +TEST_F(prctl_thp_disable_except_madvise, nofork) +{ + prctl_thp_disable_except_madvise_test(_metadata, self->pmdsize, variant->thp_policy); +} + +TEST_F(prctl_thp_disable_except_madvise, fork) +{ + int ret = 0; + pid_t pid; + + /* Make sure prctl changes are carried across fork */ + pid = fork(); + ASSERT_GE(pid, 0); + + if (!pid) { + prctl_thp_disable_except_madvise_test(_metadata, self->pmdsize, + variant->thp_policy); + return; + } + + wait(&ret); + if (WIFEXITED(ret)) + ret = WEXITSTATUS(ret); + else + ret = -EINVAL; + ASSERT_EQ(ret, 0); +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/mm/protection_keys.c b/tools/testing/selftests/mm/protection_keys.c index 23ebec367015..2085982dba69 100644 --- a/tools/testing/selftests/mm/protection_keys.c +++ b/tools/testing/selftests/mm/protection_keys.c @@ -557,13 +557,11 @@ int mprotect_pkey(void *ptr, size_t size, unsigned long orig_prot, int nr_iterations = random() % 100; int ret; - while (0) { + while (nr_iterations-- >= 0) { int rpkey = alloc_random_pkey(); ret = sys_mprotect_pkey(ptr, size, orig_prot, pkey); dprintf1("sys_mprotect_pkey(%p, %zx, prot=0x%lx, pkey=%ld) ret: %d\n", ptr, size, orig_prot, pkey, ret); - if (nr_iterations-- < 0) - break; dprintf1("%s()::%d, ret: %d pkey_reg: 0x%016llx" " shadow: 0x%016llx\n", @@ -1304,7 +1302,7 @@ static void test_mprotect_with_pkey_0(int *ptr, u16 pkey) static void test_ptrace_of_child(int *ptr, u16 pkey) { - __attribute__((__unused__)) int peek_result; + __always_unused int peek_result; pid_t child_pid; void *ignored = 0; long ret; diff --git a/tools/testing/selftests/mm/rmap.c b/tools/testing/selftests/mm/rmap.c new file mode 100644 index 000000000000..13f7bccfd0a9 --- /dev/null +++ b/tools/testing/selftests/mm/rmap.c @@ -0,0 +1,433 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * RMAP functional tests + * + * Author(s): Wei Yang <richard.weiyang@gmail.com> + */ + +#include "../kselftest_harness.h" +#include <strings.h> +#include <pthread.h> +#include <numa.h> +#include <numaif.h> +#include <sys/mman.h> +#include <sys/prctl.h> +#include <sys/types.h> +#include <signal.h> +#include <time.h> +#include <sys/sem.h> +#include <unistd.h> +#include <fcntl.h> + +#include "vm_util.h" + +#define TOTAL_LEVEL 5 +#define MAX_CHILDREN 3 + +#define FAIL_ON_CHECK (1 << 0) +#define FAIL_ON_WORK (1 << 1) + +struct sembuf sem_wait = {0, -1, 0}; +struct sembuf sem_signal = {0, 1, 0}; + +enum backend_type { + ANON, + SHM, + NORM_FILE, +}; + +#define PREFIX "kst_rmap" +#define MAX_FILENAME_LEN 256 +const char *suffixes[] = { + "", + "_shm", + "_file", +}; + +struct global_data; +typedef int (*work_fn)(struct global_data *data); +typedef int (*check_fn)(struct global_data *data); +typedef void (*prepare_fn)(struct global_data *data); + +struct global_data { + int worker_level; + + int semid; + int pipefd[2]; + + unsigned int mapsize; + unsigned int rand_seed; + char *region; + + prepare_fn do_prepare; + work_fn do_work; + check_fn do_check; + + enum backend_type backend; + char filename[MAX_FILENAME_LEN]; + + unsigned long *expected_pfn; +}; + +/* + * Create a process tree with TOTAL_LEVEL height and at most MAX_CHILDREN + * children for each. + * + * It will randomly select one process as 'worker' process which will + * 'do_work' until all processes are created. And all other processes will + * wait until 'worker' finish its work. + */ +void propagate_children(struct __test_metadata *_metadata, struct global_data *data) +{ + pid_t root_pid, pid; + unsigned int num_child; + int status; + int ret = 0; + int curr_child, worker_child; + int curr_level = 1; + bool is_worker = true; + + root_pid = getpid(); +repeat: + num_child = rand_r(&data->rand_seed) % MAX_CHILDREN + 1; + worker_child = is_worker ? rand_r(&data->rand_seed) % num_child : -1; + + for (curr_child = 0; curr_child < num_child; curr_child++) { + pid = fork(); + + if (pid < 0) { + perror("Error: fork\n"); + } else if (pid == 0) { + curr_level++; + + if (curr_child != worker_child) + is_worker = false; + + if (curr_level == TOTAL_LEVEL) + break; + + data->rand_seed += curr_child; + goto repeat; + } + } + + if (data->do_prepare) + data->do_prepare(data); + + close(data->pipefd[1]); + + if (is_worker && curr_level == data->worker_level) { + /* This is the worker process, first wait last process created */ + char buf; + + while (read(data->pipefd[0], &buf, 1) > 0) + ; + + if (data->do_work) + ret = data->do_work(data); + + /* Kick others */ + semctl(data->semid, 0, IPC_RMID); + } else { + /* Wait worker finish */ + semop(data->semid, &sem_wait, 1); + if (data->do_check) + ret = data->do_check(data); + } + + /* Wait all child to quit */ + while (wait(&status) > 0) { + if (WIFEXITED(status)) + ret |= WEXITSTATUS(status); + } + + if (getpid() == root_pid) { + if (ret & FAIL_ON_WORK) + SKIP(return, "Failed in worker"); + + ASSERT_EQ(ret, 0); + } else { + exit(ret); + } +} + +FIXTURE(migrate) +{ + struct global_data data; +}; + +FIXTURE_SETUP(migrate) +{ + struct global_data *data = &self->data; + + if (numa_available() < 0) + SKIP(return, "NUMA not available"); + if (numa_bitmask_weight(numa_all_nodes_ptr) <= 1) + SKIP(return, "Not enough NUMA nodes available"); + + data->mapsize = getpagesize(); + + data->expected_pfn = mmap(0, sizeof(unsigned long), + PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_ANONYMOUS, -1, 0); + ASSERT_NE(data->expected_pfn, MAP_FAILED); + + /* Prepare semaphore */ + data->semid = semget(IPC_PRIVATE, 1, 0666 | IPC_CREAT); + ASSERT_NE(data->semid, -1); + ASSERT_NE(semctl(data->semid, 0, SETVAL, 0), -1); + + /* Prepare pipe */ + ASSERT_NE(pipe(data->pipefd), -1); + + data->rand_seed = time(NULL); + srand(data->rand_seed); + + data->worker_level = rand() % TOTAL_LEVEL + 1; + + data->do_prepare = NULL; + data->do_work = NULL; + data->do_check = NULL; + + data->backend = ANON; +}; + +FIXTURE_TEARDOWN(migrate) +{ + struct global_data *data = &self->data; + + if (data->region != MAP_FAILED) + munmap(data->region, data->mapsize); + data->region = MAP_FAILED; + if (data->expected_pfn != MAP_FAILED) + munmap(data->expected_pfn, sizeof(unsigned long)); + data->expected_pfn = MAP_FAILED; + semctl(data->semid, 0, IPC_RMID); + data->semid = -1; + + close(data->pipefd[0]); + + switch (data->backend) { + case ANON: + break; + case SHM: + shm_unlink(data->filename); + break; + case NORM_FILE: + unlink(data->filename); + break; + } +} + +void access_region(struct global_data *data) +{ + /* + * Force read "region" to make sure page fault in. + */ + FORCE_READ(*data->region); +} + +int try_to_move_page(char *region) +{ + int ret; + int node; + int status = 0; + int failures = 0; + + ret = move_pages(0, 1, (void **)®ion, NULL, &status, MPOL_MF_MOVE_ALL); + if (ret != 0) { + perror("Failed to get original numa"); + return FAIL_ON_WORK; + } + + /* Pick up a different target node */ + for (node = 0; node <= numa_max_node(); node++) { + if (numa_bitmask_isbitset(numa_all_nodes_ptr, node) && node != status) + break; + } + + if (node > numa_max_node()) { + ksft_print_msg("Couldn't find available numa node for testing\n"); + return FAIL_ON_WORK; + } + + while (1) { + ret = move_pages(0, 1, (void **)®ion, &node, &status, MPOL_MF_MOVE_ALL); + + /* migrate successfully */ + if (!ret) + break; + + /* error happened */ + if (ret < 0) { + ksft_perror("Failed to move pages"); + return FAIL_ON_WORK; + } + + /* migration is best effort; try again */ + if (++failures >= 100) + return FAIL_ON_WORK; + } + + return 0; +} + +int move_region(struct global_data *data) +{ + int ret; + int pagemap_fd; + + ret = try_to_move_page(data->region); + if (ret != 0) + return ret; + + pagemap_fd = open("/proc/self/pagemap", O_RDONLY); + if (pagemap_fd == -1) + return FAIL_ON_WORK; + *data->expected_pfn = pagemap_get_pfn(pagemap_fd, data->region); + + return 0; +} + +int has_same_pfn(struct global_data *data) +{ + unsigned long pfn; + int pagemap_fd; + + if (data->region == MAP_FAILED) + return 0; + + pagemap_fd = open("/proc/self/pagemap", O_RDONLY); + if (pagemap_fd == -1) + return FAIL_ON_CHECK; + + pfn = pagemap_get_pfn(pagemap_fd, data->region); + if (pfn != *data->expected_pfn) + return FAIL_ON_CHECK; + + return 0; +} + +TEST_F(migrate, anon) +{ + struct global_data *data = &self->data; + + /* Map an area and fault in */ + data->region = mmap(0, data->mapsize, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + ASSERT_NE(data->region, MAP_FAILED); + memset(data->region, 0xcf, data->mapsize); + + data->do_prepare = access_region; + data->do_work = move_region; + data->do_check = has_same_pfn; + + propagate_children(_metadata, data); +} + +TEST_F(migrate, shm) +{ + int shm_fd; + struct global_data *data = &self->data; + + snprintf(data->filename, MAX_FILENAME_LEN, "%s%s", PREFIX, suffixes[SHM]); + shm_fd = shm_open(data->filename, O_CREAT | O_RDWR, 0666); + ASSERT_NE(shm_fd, -1); + ftruncate(shm_fd, data->mapsize); + data->backend = SHM; + + /* Map a shared area and fault in */ + data->region = mmap(0, data->mapsize, PROT_READ | PROT_WRITE, + MAP_SHARED, shm_fd, 0); + ASSERT_NE(data->region, MAP_FAILED); + memset(data->region, 0xcf, data->mapsize); + close(shm_fd); + + data->do_prepare = access_region; + data->do_work = move_region; + data->do_check = has_same_pfn; + + propagate_children(_metadata, data); +} + +TEST_F(migrate, file) +{ + int fd; + struct global_data *data = &self->data; + + snprintf(data->filename, MAX_FILENAME_LEN, "%s%s", PREFIX, suffixes[NORM_FILE]); + fd = open(data->filename, O_CREAT | O_RDWR | O_EXCL, 0666); + ASSERT_NE(fd, -1); + ftruncate(fd, data->mapsize); + data->backend = NORM_FILE; + + /* Map a shared area and fault in */ + data->region = mmap(0, data->mapsize, PROT_READ | PROT_WRITE, + MAP_SHARED, fd, 0); + ASSERT_NE(data->region, MAP_FAILED); + memset(data->region, 0xcf, data->mapsize); + close(fd); + + data->do_prepare = access_region; + data->do_work = move_region; + data->do_check = has_same_pfn; + + propagate_children(_metadata, data); +} + +void prepare_local_region(struct global_data *data) +{ + /* Allocate range and set the same data */ + data->region = mmap(NULL, data->mapsize, PROT_READ|PROT_WRITE, + MAP_PRIVATE|MAP_ANON, -1, 0); + if (data->region == MAP_FAILED) + return; + + memset(data->region, 0xcf, data->mapsize); +} + +int merge_and_migrate(struct global_data *data) +{ + int pagemap_fd; + int ret = 0; + + if (data->region == MAP_FAILED) + return FAIL_ON_WORK; + + if (ksm_start() < 0) + return FAIL_ON_WORK; + + ret = try_to_move_page(data->region); + + pagemap_fd = open("/proc/self/pagemap", O_RDONLY); + if (pagemap_fd == -1) + return FAIL_ON_WORK; + *data->expected_pfn = pagemap_get_pfn(pagemap_fd, data->region); + + return ret; +} + +TEST_F(migrate, ksm) +{ + int ret; + struct global_data *data = &self->data; + + if (ksm_stop() < 0) + SKIP(return, "accessing \"/sys/kernel/mm/ksm/run\") failed"); + if (ksm_get_full_scans() < 0) + SKIP(return, "accessing \"/sys/kernel/mm/ksm/full_scan\") failed"); + + ret = prctl(PR_SET_MEMORY_MERGE, 1, 0, 0, 0); + if (ret < 0 && errno == EINVAL) + SKIP(return, "PR_SET_MEMORY_MERGE not supported"); + else if (ret) + ksft_exit_fail_perror("PR_SET_MEMORY_MERGE=1 failed"); + + data->do_prepare = prepare_local_region; + data->do_work = merge_and_migrate; + data->do_check = has_same_pfn; + + propagate_children(_metadata, data); +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/mm/run_vmtests.sh b/tools/testing/selftests/mm/run_vmtests.sh index 471e539d82b8..d9173f2312b7 100755 --- a/tools/testing/selftests/mm/run_vmtests.sh +++ b/tools/testing/selftests/mm/run_vmtests.sh @@ -85,6 +85,8 @@ separated by spaces: test handling of page fragment allocation and freeing - vma_merge test VMA merge cases behave as expected +- rmap + test rmap behaves as expected example: ./run_vmtests.sh -t "hmm mmap ksm" EOF @@ -136,7 +138,7 @@ run_gup_matrix() { # -n: How many pages to fetch together? 512 is special # because it's default thp size (or 2M on x86), 123 to # just test partial gup when hit a huge in whatever form - for num in "-n 1" "-n 512" "-n 123"; do + for num in "-n 1" "-n 512" "-n 123" "-n -1"; do CATEGORY="gup_test" run_test ./gup_test \ $huge $test_cmd $write $share $num done @@ -172,13 +174,13 @@ fi # set proper nr_hugepages if [ -n "$freepgs" ] && [ -n "$hpgsize_KB" ]; then - nr_hugepgs=$(cat /proc/sys/vm/nr_hugepages) + orig_nr_hugepgs=$(cat /proc/sys/vm/nr_hugepages) needpgs=$((needmem_KB / hpgsize_KB)) tries=2 while [ "$tries" -gt 0 ] && [ "$freepgs" -lt "$needpgs" ]; do lackpgs=$((needpgs - freepgs)) echo 3 > /proc/sys/vm/drop_caches - if ! echo $((lackpgs + nr_hugepgs)) > /proc/sys/vm/nr_hugepages; then + if ! echo $((lackpgs + orig_nr_hugepgs)) > /proc/sys/vm/nr_hugepages; then echo "Please run this test as root" exit $ksft_skip fi @@ -189,6 +191,7 @@ if [ -n "$freepgs" ] && [ -n "$hpgsize_KB" ]; then done < /proc/meminfo tries=$((tries - 1)) done + nr_hugepgs=$(cat /proc/sys/vm/nr_hugepages) if [ "$freepgs" -lt "$needpgs" ]; then printf "Not enough huge pages available (%d < %d)\n" \ "$freepgs" "$needpgs" @@ -311,9 +314,11 @@ if $RUN_ALL; then run_gup_matrix else # get_user_pages_fast() benchmark - CATEGORY="gup_test" run_test ./gup_test -u + CATEGORY="gup_test" run_test ./gup_test -u -n 1 + CATEGORY="gup_test" run_test ./gup_test -u -n -1 # pin_user_pages_fast() benchmark - CATEGORY="gup_test" run_test ./gup_test -a + CATEGORY="gup_test" run_test ./gup_test -a -n 1 + CATEGORY="gup_test" run_test ./gup_test -a -n -1 fi # Dump pages 0, 19, and 4096, using pin_user_pages: CATEGORY="gup_test" run_test ./gup_test -ct -F 0x1 0 19 0x1000 @@ -322,11 +327,15 @@ CATEGORY="gup_test" run_test ./gup_longterm CATEGORY="userfaultfd" run_test ./uffd-unit-tests uffd_stress_bin=./uffd-stress CATEGORY="userfaultfd" run_test ${uffd_stress_bin} anon 20 16 -# Hugetlb tests require source and destination huge pages. Pass in half -# the size of the free pages we have, which is used for *each*. +# Hugetlb tests require source and destination huge pages. Pass in almost half +# the size of the free pages we have, which is used for *each*. An adjustment +# of (nr_parallel - 1) is done (see nr_parallel in uffd-stress.c) to have some +# extra hugepages - this is done to prevent the test from failing by racily +# reserving more hugepages than strictly required. # uffd-stress expects a region expressed in MiB, so we adjust # half_ufd_size_MB accordingly. -half_ufd_size_MB=$(((freepgs * hpgsize_KB) / 1024 / 2)) +adjustment=$(( (31 < (nr_cpus - 1)) ? 31 : (nr_cpus - 1) )) +half_ufd_size_MB=$((((freepgs - adjustment) * hpgsize_KB) / 1024 / 2)) CATEGORY="userfaultfd" run_test ${uffd_stress_bin} hugetlb "$half_ufd_size_MB" 32 CATEGORY="userfaultfd" run_test ${uffd_stress_bin} hugetlb-private "$half_ufd_size_MB" 32 CATEGORY="userfaultfd" run_test ${uffd_stress_bin} shmem 20 16 @@ -532,6 +541,12 @@ CATEGORY="page_frag" run_test ./test_page_frag.sh aligned CATEGORY="page_frag" run_test ./test_page_frag.sh nonaligned +CATEGORY="rmap" run_test ./rmap + +if [ "${HAVE_HUGEPAGES}" = 1 ]; then + echo "$orig_nr_hugepgs" > /proc/sys/vm/nr_hugepages +fi + echo "SUMMARY: PASS=${count_pass} SKIP=${count_skip} FAIL=${count_fail}" | tap_prefix echo "1..${count_total}" | tap_output diff --git a/tools/testing/selftests/mm/soft-dirty.c b/tools/testing/selftests/mm/soft-dirty.c index 8a3f2b4b2186..4ee4db3750c1 100644 --- a/tools/testing/selftests/mm/soft-dirty.c +++ b/tools/testing/selftests/mm/soft-dirty.c @@ -200,8 +200,11 @@ int main(int argc, char **argv) int pagesize; ksft_print_header(); - ksft_set_plan(15); + if (!softdirty_supported()) + ksft_exit_skip("soft-dirty is not support\n"); + + ksft_set_plan(15); pagemap_fd = open(PAGEMAP_FILE_PATH, O_RDONLY); if (pagemap_fd < 0) ksft_exit_fail_msg("Failed to open %s\n", PAGEMAP_FILE_PATH); diff --git a/tools/testing/selftests/mm/split_huge_page_test.c b/tools/testing/selftests/mm/split_huge_page_test.c index 44a3f8a58806..743af3c05190 100644 --- a/tools/testing/selftests/mm/split_huge_page_test.c +++ b/tools/testing/selftests/mm/split_huge_page_test.c @@ -25,6 +25,8 @@ uint64_t pagesize; unsigned int pageshift; uint64_t pmd_pagesize; +unsigned int pmd_order; +int *expected_orders; #define SPLIT_DEBUGFS "/sys/kernel/debug/split_huge_pages" #define SMAP_PATH "/proc/self/smaps" @@ -34,28 +36,225 @@ uint64_t pmd_pagesize; #define PID_FMT_OFFSET "%d,0x%lx,0x%lx,%d,%d" #define PATH_FMT "%s,0x%lx,0x%lx,%d" -#define PFN_MASK ((1UL<<55)-1) -#define KPF_THP (1UL<<22) +const char *pagemap_proc = "/proc/self/pagemap"; +const char *kpageflags_proc = "/proc/kpageflags"; +int pagemap_fd; +int kpageflags_fd; -int is_backed_by_thp(char *vaddr, int pagemap_file, int kpageflags_file) +static bool is_backed_by_folio(char *vaddr, int order, int pagemap_fd, + int kpageflags_fd) { - uint64_t paddr; - uint64_t page_flags; + const uint64_t folio_head_flags = KPF_THP | KPF_COMPOUND_HEAD; + const uint64_t folio_tail_flags = KPF_THP | KPF_COMPOUND_TAIL; + const unsigned long nr_pages = 1UL << order; + unsigned long pfn_head; + uint64_t pfn_flags; + unsigned long pfn; + unsigned long i; + + pfn = pagemap_get_pfn(pagemap_fd, vaddr); + + /* non present page */ + if (pfn == -1UL) + return false; + + if (pageflags_get(pfn, kpageflags_fd, &pfn_flags)) + goto fail; + + /* check for order-0 pages */ + if (!order) { + if (pfn_flags & (folio_head_flags | folio_tail_flags)) + return false; + return true; + } + + /* non THP folio */ + if (!(pfn_flags & KPF_THP)) + return false; + + pfn_head = pfn & ~(nr_pages - 1); + + if (pageflags_get(pfn_head, kpageflags_fd, &pfn_flags)) + goto fail; + + /* head PFN has no compound_head flag set */ + if ((pfn_flags & folio_head_flags) != folio_head_flags) + return false; + + /* check all tail PFN flags */ + for (i = 1; i < nr_pages; i++) { + if (pageflags_get(pfn_head + i, kpageflags_fd, &pfn_flags)) + goto fail; + if ((pfn_flags & folio_tail_flags) != folio_tail_flags) + return false; + } + + /* + * check the PFN after this folio, but if its flags cannot be obtained, + * assume this folio has the expected order + */ + if (pageflags_get(pfn_head + nr_pages, kpageflags_fd, &pfn_flags)) + return true; + + /* If we find another tail page, then the folio is larger. */ + return (pfn_flags & folio_tail_flags) != folio_tail_flags; +fail: + ksft_exit_fail_msg("Failed to get folio info\n"); + return false; +} + +static int vaddr_pageflags_get(char *vaddr, int pagemap_fd, int kpageflags_fd, + uint64_t *flags) +{ + unsigned long pfn; + + pfn = pagemap_get_pfn(pagemap_fd, vaddr); + + /* non-present PFN */ + if (pfn == -1UL) + return 1; + + if (pageflags_get(pfn, kpageflags_fd, flags)) + return -1; + + return 0; +} + +/* + * gather_after_split_folio_orders - scan through [vaddr_start, len) and record + * folio orders + * + * @vaddr_start: start vaddr + * @len: range length + * @pagemap_fd: file descriptor to /proc/<pid>/pagemap + * @kpageflags_fd: file descriptor to /proc/kpageflags + * @orders: output folio order array + * @nr_orders: folio order array size + * + * gather_after_split_folio_orders() scan through [vaddr_start, len) and check + * all folios within the range and record their orders. All order-0 pages will + * be recorded. Non-present vaddr is skipped. + * + * NOTE: the function is used to check folio orders after a split is performed, + * so it assumes [vaddr_start, len) fully maps to after-split folios within that + * range. + * + * Return: 0 - no error, -1 - unhandled cases + */ +static int gather_after_split_folio_orders(char *vaddr_start, size_t len, + int pagemap_fd, int kpageflags_fd, int orders[], int nr_orders) +{ + uint64_t page_flags = 0; + int cur_order = -1; + char *vaddr; + + if (pagemap_fd == -1 || kpageflags_fd == -1) + return -1; + if (!orders) + return -1; + if (nr_orders <= 0) + return -1; + + for (vaddr = vaddr_start; vaddr < vaddr_start + len;) { + char *next_folio_vaddr; + int status; + + status = vaddr_pageflags_get(vaddr, pagemap_fd, kpageflags_fd, + &page_flags); + if (status < 0) + return -1; + + /* skip non present vaddr */ + if (status == 1) { + vaddr += psize(); + continue; + } - if (pagemap_file) { - pread(pagemap_file, &paddr, sizeof(paddr), - ((long)vaddr >> pageshift) * sizeof(paddr)); + /* all order-0 pages with possible false postive (non folio) */ + if (!(page_flags & (KPF_COMPOUND_HEAD | KPF_COMPOUND_TAIL))) { + orders[0]++; + vaddr += psize(); + continue; + } - if (kpageflags_file) { - pread(kpageflags_file, &page_flags, sizeof(page_flags), - (paddr & PFN_MASK) * sizeof(page_flags)); + /* skip non thp compound pages */ + if (!(page_flags & KPF_THP)) { + vaddr += psize(); + continue; + } - return !!(page_flags & KPF_THP); + /* vpn points to part of a THP at this point */ + if (page_flags & KPF_COMPOUND_HEAD) + cur_order = 1; + else { + vaddr += psize(); + continue; } + + next_folio_vaddr = vaddr + (1UL << (cur_order + pshift())); + + if (next_folio_vaddr >= vaddr_start + len) + break; + + while ((status = vaddr_pageflags_get(next_folio_vaddr, + pagemap_fd, kpageflags_fd, + &page_flags)) >= 0) { + /* + * non present vaddr, next compound head page, or + * order-0 page + */ + if (status == 1 || + (page_flags & KPF_COMPOUND_HEAD) || + !(page_flags & (KPF_COMPOUND_HEAD | KPF_COMPOUND_TAIL))) { + if (cur_order < nr_orders) { + orders[cur_order]++; + cur_order = -1; + vaddr = next_folio_vaddr; + } + break; + } + + cur_order++; + next_folio_vaddr = vaddr + (1UL << (cur_order + pshift())); + } + + if (status < 0) + return status; } + if (cur_order > 0 && cur_order < nr_orders) + orders[cur_order]++; return 0; } +static int check_after_split_folio_orders(char *vaddr_start, size_t len, + int pagemap_fd, int kpageflags_fd, int orders[], int nr_orders) +{ + int *vaddr_orders; + int status; + int i; + + vaddr_orders = (int *)malloc(sizeof(int) * nr_orders); + + if (!vaddr_orders) + ksft_exit_fail_msg("Cannot allocate memory for vaddr_orders"); + + memset(vaddr_orders, 0, sizeof(int) * nr_orders); + status = gather_after_split_folio_orders(vaddr_start, len, pagemap_fd, + kpageflags_fd, vaddr_orders, nr_orders); + if (status) + ksft_exit_fail_msg("gather folio info failed\n"); + + for (i = 0; i < nr_orders; i++) + if (vaddr_orders[i] != orders[i]) { + ksft_print_msg("order %d: expected: %d got %d\n", i, + orders[i], vaddr_orders[i]); + status = -1; + } + + free(vaddr_orders); + return status; +} + static void write_file(const char *path, const char *buf, size_t buflen) { int fd; @@ -111,7 +310,7 @@ static void verify_rss_anon_split_huge_page_all_zeroes(char *one_page, int nr_hp unsigned long rss_anon_before, rss_anon_after; size_t i; - if (!check_huge_anon(one_page, 4, pmd_pagesize)) + if (!check_huge_anon(one_page, nr_hpages, pmd_pagesize)) ksft_exit_fail_msg("No THP is allocated\n"); rss_anon_before = rss_anon(); @@ -135,7 +334,7 @@ static void verify_rss_anon_split_huge_page_all_zeroes(char *one_page, int nr_hp rss_anon_before, rss_anon_after); } -void split_pmd_zero_pages(void) +static void split_pmd_zero_pages(void) { char *one_page; int nr_hpages = 4; @@ -147,7 +346,7 @@ void split_pmd_zero_pages(void) free(one_page); } -void split_pmd_thp_to_order(int order) +static void split_pmd_thp_to_order(int order) { char *one_page; size_t len = 4 * pmd_pagesize; @@ -173,6 +372,13 @@ void split_pmd_thp_to_order(int order) if (one_page[i] != (char)i) ksft_exit_fail_msg("%ld byte corrupted\n", i); + memset(expected_orders, 0, sizeof(int) * (pmd_order + 1)); + expected_orders[order] = 4 << (pmd_order - order); + + if (check_after_split_folio_orders(one_page, len, pagemap_fd, + kpageflags_fd, expected_orders, + (pmd_order + 1))) + ksft_exit_fail_msg("Unexpected THP split\n"); if (!check_huge_anon(one_page, 0, pmd_pagesize)) ksft_exit_fail_msg("Still AnonHugePages not split\n"); @@ -181,90 +387,97 @@ void split_pmd_thp_to_order(int order) free(one_page); } -void split_pte_mapped_thp(void) +static void split_pte_mapped_thp(void) { - char *one_page, *pte_mapped, *pte_mapped2; - size_t len = 4 * pmd_pagesize; - uint64_t thp_size; + const size_t nr_thps = 4; + const size_t thp_area_size = nr_thps * pmd_pagesize; + const size_t page_area_size = nr_thps * pagesize; + char *thp_area, *tmp, *page_area = MAP_FAILED; size_t i; - const char *pagemap_template = "/proc/%d/pagemap"; - const char *kpageflags_proc = "/proc/kpageflags"; - char pagemap_proc[255]; - int pagemap_fd; - int kpageflags_fd; - if (snprintf(pagemap_proc, 255, pagemap_template, getpid()) < 0) - ksft_exit_fail_msg("get pagemap proc error: %s\n", strerror(errno)); + thp_area = mmap((void *)(1UL << 30), thp_area_size, PROT_READ | PROT_WRITE, + MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); + if (thp_area == MAP_FAILED) { + ksft_test_result_fail("Fail to allocate memory: %s\n", strerror(errno)); + return; + } - pagemap_fd = open(pagemap_proc, O_RDONLY); - if (pagemap_fd == -1) - ksft_exit_fail_msg("read pagemap: %s\n", strerror(errno)); + madvise(thp_area, thp_area_size, MADV_HUGEPAGE); - kpageflags_fd = open(kpageflags_proc, O_RDONLY); - if (kpageflags_fd == -1) - ksft_exit_fail_msg("read kpageflags: %s\n", strerror(errno)); + for (i = 0; i < thp_area_size; i++) + thp_area[i] = (char)i; - one_page = mmap((void *)(1UL << 30), len, PROT_READ | PROT_WRITE, - MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); - if (one_page == MAP_FAILED) - ksft_exit_fail_msg("Fail to allocate memory: %s\n", strerror(errno)); + if (!check_huge_anon(thp_area, nr_thps, pmd_pagesize)) { + ksft_test_result_skip("Not all THPs allocated\n"); + goto out; + } - madvise(one_page, len, MADV_HUGEPAGE); + /* + * To challenge spitting code, we will mremap a single page of each + * THP (page[i] of thp[i]) in the thp_area into page_area. This will + * replace the PMD mappings in the thp_area by PTE mappings first, + * but leaving the THP unsplit, to then create a page-sized hole in + * the thp_area. + * We will then manually trigger splitting of all THPs through the + * single mremap'ed pages of each THP in the page_area. + */ + page_area = mmap(NULL, page_area_size, PROT_READ | PROT_WRITE, + MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); + if (page_area == MAP_FAILED) { + ksft_test_result_fail("Fail to allocate memory: %s\n", strerror(errno)); + goto out; + } - for (i = 0; i < len; i++) - one_page[i] = (char)i; + for (i = 0; i < nr_thps; i++) { + tmp = mremap(thp_area + pmd_pagesize * i + pagesize * i, + pagesize, pagesize, MREMAP_MAYMOVE|MREMAP_FIXED, + page_area + pagesize * i); + if (tmp != MAP_FAILED) + continue; + ksft_test_result_fail("mremap failed: %s\n", strerror(errno)); + goto out; + } - if (!check_huge_anon(one_page, 4, pmd_pagesize)) - ksft_exit_fail_msg("No THP is allocated\n"); + /* + * Verify that our THPs were not split yet. Note that + * check_huge_anon() cannot be used as it checks for PMD mappings. + */ + for (i = 0; i < nr_thps; i++) { + if (is_backed_by_folio(page_area + i * pagesize, pmd_order, + pagemap_fd, kpageflags_fd)) + continue; + ksft_test_result_fail("THP %zu missing after mremap\n", i); + goto out; + } - /* remap the first pagesize of first THP */ - pte_mapped = mremap(one_page, pagesize, pagesize, MREMAP_MAYMOVE); - - /* remap the Nth pagesize of Nth THP */ - for (i = 1; i < 4; i++) { - pte_mapped2 = mremap(one_page + pmd_pagesize * i + pagesize * i, - pagesize, pagesize, - MREMAP_MAYMOVE|MREMAP_FIXED, - pte_mapped + pagesize * i); - if (pte_mapped2 == MAP_FAILED) - ksft_exit_fail_msg("mremap failed: %s\n", strerror(errno)); - } - - /* smap does not show THPs after mremap, use kpageflags instead */ - thp_size = 0; - for (i = 0; i < pagesize * 4; i++) - if (i % pagesize == 0 && - is_backed_by_thp(&pte_mapped[i], pagemap_fd, kpageflags_fd)) - thp_size++; - - if (thp_size != 4) - ksft_exit_fail_msg("Some THPs are missing during mremap\n"); - - /* split all remapped THPs */ - write_debugfs(PID_FMT, getpid(), (uint64_t)pte_mapped, - (uint64_t)pte_mapped + pagesize * 4, 0); - - /* smap does not show THPs after mremap, use kpageflags instead */ - thp_size = 0; - for (i = 0; i < pagesize * 4; i++) { - if (pte_mapped[i] != (char)i) - ksft_exit_fail_msg("%ld byte corrupted\n", i); + /* Split all THPs through the remapped pages. */ + write_debugfs(PID_FMT, getpid(), (uint64_t)page_area, + (uint64_t)page_area + page_area_size, 0); - if (i % pagesize == 0 && - is_backed_by_thp(&pte_mapped[i], pagemap_fd, kpageflags_fd)) - thp_size++; + /* Corruption during mremap or split? */ + for (i = 0; i < page_area_size; i++) { + if (page_area[i] == (char)i) + continue; + ksft_test_result_fail("%zu byte corrupted\n", i); + goto out; } - if (thp_size) - ksft_exit_fail_msg("Still %ld THPs not split\n", thp_size); + /* Split failed? */ + for (i = 0; i < nr_thps; i++) { + if (is_backed_by_folio(page_area + i * pagesize, 0, + pagemap_fd, kpageflags_fd)) + continue; + ksft_test_result_fail("THP %zu not split\n", i); + } ksft_test_result_pass("Split PTE-mapped huge pages successful\n"); - munmap(one_page, len); - close(pagemap_fd); - close(kpageflags_fd); +out: + munmap(thp_area, thp_area_size); + if (page_area != MAP_FAILED) + munmap(page_area, page_area_size); } -void split_file_backed_thp(int order) +static void split_file_backed_thp(int order) { int status; int fd; @@ -297,7 +510,7 @@ void split_file_backed_thp(int order) status = snprintf(testfile, INPUT_MAX, "%s/thp_file", tmpfs_loc); if (status >= INPUT_MAX) { - ksft_exit_fail_msg("Fail to create file-backed THP split testing file\n"); + ksft_print_msg("Fail to create file-backed THP split testing file\n"); goto cleanup; } @@ -366,7 +579,7 @@ out: ksft_exit_fail_msg("Error occurred\n"); } -bool prepare_thp_fs(const char *xfs_path, char *thp_fs_template, +static bool prepare_thp_fs(const char *xfs_path, char *thp_fs_template, const char **thp_fs_loc) { if (xfs_path) { @@ -382,7 +595,7 @@ bool prepare_thp_fs(const char *xfs_path, char *thp_fs_template, return true; } -void cleanup_thp_fs(const char *thp_fs_loc, bool created_tmp) +static void cleanup_thp_fs(const char *thp_fs_loc, bool created_tmp) { int status; @@ -395,8 +608,8 @@ void cleanup_thp_fs(const char *thp_fs_loc, bool created_tmp) strerror(errno)); } -int create_pagecache_thp_and_fd(const char *testfile, size_t fd_size, int *fd, - char **addr) +static int create_pagecache_thp_and_fd(const char *testfile, size_t fd_size, + int *fd, char **addr) { size_t i; unsigned char buf[1024]; @@ -462,10 +675,11 @@ err_out_unlink: return -1; } -void split_thp_in_pagecache_to_order_at(size_t fd_size, const char *fs_loc, - int order, int offset) +static void split_thp_in_pagecache_to_order_at(size_t fd_size, + const char *fs_loc, int order, int offset) { int fd; + char *split_addr; char *addr; size_t i; char testfile[INPUT_MAX]; @@ -479,14 +693,33 @@ void split_thp_in_pagecache_to_order_at(size_t fd_size, const char *fs_loc, err = create_pagecache_thp_and_fd(testfile, fd_size, &fd, &addr); if (err) return; + err = 0; - if (offset == -1) - write_debugfs(PID_FMT, getpid(), (uint64_t)addr, - (uint64_t)addr + fd_size, order); - else - write_debugfs(PID_FMT_OFFSET, getpid(), (uint64_t)addr, - (uint64_t)addr + fd_size, order, offset); + memset(expected_orders, 0, sizeof(int) * (pmd_order + 1)); + /* + * use [split_addr, split_addr + pagesize) range to split THPs, since + * the debugfs function always split a range with pagesize step and + * providing a full [addr, addr + fd_size) range can trigger multiple + * splits, complicating after-split result checking. + */ + if (offset == -1) { + for (split_addr = addr; split_addr < addr + fd_size; split_addr += pmd_pagesize) + write_debugfs(PID_FMT, getpid(), (uint64_t)split_addr, + (uint64_t)split_addr + pagesize, order); + + expected_orders[order] = fd_size / (pagesize << order); + } else { + int times = fd_size / pmd_pagesize; + + for (split_addr = addr; split_addr < addr + fd_size; split_addr += pmd_pagesize) + write_debugfs(PID_FMT_OFFSET, getpid(), (uint64_t)split_addr, + (uint64_t)split_addr + pagesize, order, offset); + + for (i = order + 1; i < pmd_order; i++) + expected_orders[i] = times; + expected_orders[order] = 2 * times; + } for (i = 0; i < fd_size; i++) if (*(addr + i) != (char)i) { @@ -495,6 +728,14 @@ void split_thp_in_pagecache_to_order_at(size_t fd_size, const char *fs_loc, goto out; } + if (check_after_split_folio_orders(addr, fd_size, pagemap_fd, + kpageflags_fd, expected_orders, + (pmd_order + 1))) { + ksft_print_msg("Unexpected THP split\n"); + err = 1; + goto out; + } + if (!check_huge_file(addr, 0, pmd_pagesize)) { ksft_print_msg("Still FilePmdMapped not split\n"); err = EXIT_FAILURE; @@ -525,6 +766,8 @@ int main(int argc, char **argv) const char *fs_loc; bool created_tmp; int offset; + unsigned int nr_pages; + unsigned int tests; ksft_print_header(); @@ -536,38 +779,58 @@ int main(int argc, char **argv) if (argc > 1) optional_xfs_path = argv[1]; - ksft_set_plan(1+8+1+9+9+8*4+2); - pagesize = getpagesize(); pageshift = ffs(pagesize) - 1; pmd_pagesize = read_pmd_pagesize(); if (!pmd_pagesize) ksft_exit_fail_msg("Reading PMD pagesize failed\n"); + nr_pages = pmd_pagesize / pagesize; + pmd_order = sz2ord(pmd_pagesize, pagesize); + + expected_orders = (int *)malloc(sizeof(int) * (pmd_order + 1)); + if (!expected_orders) + ksft_exit_fail_msg("Fail to allocate memory: %s\n", strerror(errno)); + + tests = 2 + (pmd_order - 1) + (2 * pmd_order) + (pmd_order - 1) * 4 + 2; + ksft_set_plan(tests); + + pagemap_fd = open(pagemap_proc, O_RDONLY); + if (pagemap_fd == -1) + ksft_exit_fail_msg("read pagemap: %s\n", strerror(errno)); + + kpageflags_fd = open(kpageflags_proc, O_RDONLY); + if (kpageflags_fd == -1) + ksft_exit_fail_msg("read kpageflags: %s\n", strerror(errno)); + fd_size = 2 * pmd_pagesize; split_pmd_zero_pages(); - for (i = 0; i < 9; i++) + for (i = 0; i < pmd_order; i++) if (i != 1) split_pmd_thp_to_order(i); split_pte_mapped_thp(); - for (i = 0; i < 9; i++) + for (i = 0; i < pmd_order; i++) split_file_backed_thp(i); created_tmp = prepare_thp_fs(optional_xfs_path, fs_loc_template, &fs_loc); - for (i = 8; i >= 0; i--) + for (i = pmd_order - 1; i >= 0; i--) split_thp_in_pagecache_to_order_at(fd_size, fs_loc, i, -1); - for (i = 0; i < 9; i++) + for (i = 0; i < pmd_order; i++) for (offset = 0; - offset < pmd_pagesize / pagesize; - offset += MAX(pmd_pagesize / pagesize / 4, 1 << i)) + offset < nr_pages; + offset += MAX(nr_pages / 4, 1 << i)) split_thp_in_pagecache_to_order_at(fd_size, fs_loc, i, offset); cleanup_thp_fs(fs_loc, created_tmp); + close(pagemap_fd); + close(kpageflags_fd); + free(expected_orders); + ksft_finished(); return 0; diff --git a/tools/testing/selftests/mm/test_vmalloc.sh b/tools/testing/selftests/mm/test_vmalloc.sh index d73b846736f1..d39096723fca 100755 --- a/tools/testing/selftests/mm/test_vmalloc.sh +++ b/tools/testing/selftests/mm/test_vmalloc.sh @@ -47,14 +47,14 @@ check_test_requirements() fi } -run_perfformance_check() +run_performance_check() { echo "Run performance tests to evaluate how fast vmalloc allocation is." echo "It runs all test cases on one single CPU with sequential order." modprobe $DRIVER $PERF_PARAM > /dev/null 2>&1 echo "Done." - echo "Ccheck the kernel message buffer to see the summary." + echo "Check the kernel message buffer to see the summary." } run_stability_check() @@ -160,7 +160,7 @@ function run_test() usage else if [[ "$1" = "performance" ]]; then - run_perfformance_check + run_performance_check elif [[ "$1" = "stress" ]]; then run_stability_check elif [[ "$1" = "smoke" ]]; then diff --git a/tools/testing/selftests/mm/thp_settings.c b/tools/testing/selftests/mm/thp_settings.c index bad60ac52874..574bd0f8ae48 100644 --- a/tools/testing/selftests/mm/thp_settings.c +++ b/tools/testing/selftests/mm/thp_settings.c @@ -382,10 +382,17 @@ unsigned long thp_shmem_supported_orders(void) return __thp_supported_orders(true); } -bool thp_is_enabled(void) +bool thp_available(void) { if (access(THP_SYSFS, F_OK) != 0) return false; + return true; +} + +bool thp_is_enabled(void) +{ + if (!thp_available()) + return false; int mode = thp_read_string("enabled", thp_enabled_strings); diff --git a/tools/testing/selftests/mm/thp_settings.h b/tools/testing/selftests/mm/thp_settings.h index 6c07f70beee9..76eeb712e5f1 100644 --- a/tools/testing/selftests/mm/thp_settings.h +++ b/tools/testing/selftests/mm/thp_settings.h @@ -84,6 +84,7 @@ void thp_set_read_ahead_path(char *path); unsigned long thp_supported_orders(void); unsigned long thp_shmem_supported_orders(void); +bool thp_available(void); bool thp_is_enabled(void); #endif /* __THP_SETTINGS_H__ */ diff --git a/tools/testing/selftests/mm/thuge-gen.c b/tools/testing/selftests/mm/thuge-gen.c index 8e2b08dc5762..4f5e290ff1a6 100644 --- a/tools/testing/selftests/mm/thuge-gen.c +++ b/tools/testing/selftests/mm/thuge-gen.c @@ -177,13 +177,16 @@ void find_pagesizes(void) globfree(&g); read_sysfs("/proc/sys/kernel/shmmax", &shmmax_val); - if (shmmax_val < NUM_PAGES * largest) - ksft_exit_fail_msg("Please do echo %lu > /proc/sys/kernel/shmmax", - largest * NUM_PAGES); + if (shmmax_val < NUM_PAGES * largest) { + ksft_print_msg("WARNING: shmmax is too small to run this test.\n"); + ksft_print_msg("Please run the following command to increase shmmax:\n"); + ksft_print_msg("echo %lu > /proc/sys/kernel/shmmax\n", largest * NUM_PAGES); + ksft_exit_skip("Test skipped due to insufficient shmmax value.\n"); + } #if defined(__x86_64__) if (largest != 1U<<30) { - ksft_exit_fail_msg("No GB pages available on x86-64\n" + ksft_exit_skip("No GB pages available on x86-64\n" "Please boot with hugepagesz=1G hugepages=%d\n", NUM_PAGES); } #endif diff --git a/tools/testing/selftests/mm/uffd-common.c b/tools/testing/selftests/mm/uffd-common.c index a37088a23ffe..994fe8c03923 100644 --- a/tools/testing/selftests/mm/uffd-common.c +++ b/tools/testing/selftests/mm/uffd-common.c @@ -7,18 +7,29 @@ #include "uffd-common.h" -#define BASE_PMD_ADDR ((void *)(1UL << 30)) - -volatile bool test_uffdio_copy_eexist = true; -unsigned long nr_parallel, nr_pages, nr_pages_per_cpu, page_size; -char *area_src, *area_src_alias, *area_dst, *area_dst_alias, *area_remap; -int uffd = -1, uffd_flags, finished, *pipefd, test_type; -bool map_shared; -bool test_uffdio_wp = true; -unsigned long long *count_verify; uffd_test_ops_t *uffd_test_ops; uffd_test_case_ops_t *uffd_test_case_ops; -atomic_bool ready_for_fork; + +#define BASE_PMD_ADDR ((void *)(1UL << 30)) + +/* pthread_mutex_t starts at page offset 0 */ +pthread_mutex_t *area_mutex(char *area, unsigned long nr, uffd_global_test_opts_t *gopts) +{ + return (pthread_mutex_t *) (area + nr * gopts->page_size); +} + +/* + * count is placed in the page after pthread_mutex_t naturally aligned + * to avoid non alignment faults on non-x86 archs. + */ +volatile unsigned long long *area_count(char *area, unsigned long nr, + uffd_global_test_opts_t *gopts) +{ + return (volatile unsigned long long *) + ((unsigned long)(area + nr * gopts->page_size + + sizeof(pthread_mutex_t) + sizeof(unsigned long long) - 1) & + ~(unsigned long)(sizeof(unsigned long long) - 1)); +} static int uffd_mem_fd_create(off_t mem_size, bool hugetlb) { @@ -40,15 +51,15 @@ static int uffd_mem_fd_create(off_t mem_size, bool hugetlb) return mem_fd; } -static void anon_release_pages(char *rel_area) +static void anon_release_pages(uffd_global_test_opts_t *gopts, char *rel_area) { - if (madvise(rel_area, nr_pages * page_size, MADV_DONTNEED)) + if (madvise(rel_area, gopts->nr_pages * gopts->page_size, MADV_DONTNEED)) err("madvise(MADV_DONTNEED) failed"); } -static int anon_allocate_area(void **alloc_area, bool is_src) +static int anon_allocate_area(uffd_global_test_opts_t *gopts, void **alloc_area, bool is_src) { - *alloc_area = mmap(NULL, nr_pages * page_size, PROT_READ | PROT_WRITE, + *alloc_area = mmap(NULL, gopts->nr_pages * gopts->page_size, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); if (*alloc_area == MAP_FAILED) { *alloc_area = NULL; @@ -57,31 +68,32 @@ static int anon_allocate_area(void **alloc_area, bool is_src) return 0; } -static void noop_alias_mapping(__u64 *start, size_t len, unsigned long offset) +static void noop_alias_mapping(uffd_global_test_opts_t *gopts, __u64 *start, + size_t len, unsigned long offset) { } -static void hugetlb_release_pages(char *rel_area) +static void hugetlb_release_pages(uffd_global_test_opts_t *gopts, char *rel_area) { - if (!map_shared) { - if (madvise(rel_area, nr_pages * page_size, MADV_DONTNEED)) + if (!gopts->map_shared) { + if (madvise(rel_area, gopts->nr_pages * gopts->page_size, MADV_DONTNEED)) err("madvise(MADV_DONTNEED) failed"); } else { - if (madvise(rel_area, nr_pages * page_size, MADV_REMOVE)) + if (madvise(rel_area, gopts->nr_pages * gopts->page_size, MADV_REMOVE)) err("madvise(MADV_REMOVE) failed"); } } -static int hugetlb_allocate_area(void **alloc_area, bool is_src) +static int hugetlb_allocate_area(uffd_global_test_opts_t *gopts, void **alloc_area, bool is_src) { - off_t size = nr_pages * page_size; + off_t size = gopts->nr_pages * gopts->page_size; off_t offset = is_src ? 0 : size; void *area_alias = NULL; char **alloc_area_alias; int mem_fd = uffd_mem_fd_create(size * 2, true); *alloc_area = mmap(NULL, size, PROT_READ | PROT_WRITE, - (map_shared ? MAP_SHARED : MAP_PRIVATE) | + (gopts->map_shared ? MAP_SHARED : MAP_PRIVATE) | (is_src ? 0 : MAP_NORESERVE), mem_fd, offset); if (*alloc_area == MAP_FAILED) { @@ -89,7 +101,7 @@ static int hugetlb_allocate_area(void **alloc_area, bool is_src) return -errno; } - if (map_shared) { + if (gopts->map_shared) { area_alias = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, mem_fd, offset); if (area_alias == MAP_FAILED) @@ -97,9 +109,9 @@ static int hugetlb_allocate_area(void **alloc_area, bool is_src) } if (is_src) { - alloc_area_alias = &area_src_alias; + alloc_area_alias = &gopts->area_src_alias; } else { - alloc_area_alias = &area_dst_alias; + alloc_area_alias = &gopts->area_dst_alias; } if (area_alias) *alloc_area_alias = area_alias; @@ -108,24 +120,25 @@ static int hugetlb_allocate_area(void **alloc_area, bool is_src) return 0; } -static void hugetlb_alias_mapping(__u64 *start, size_t len, unsigned long offset) +static void hugetlb_alias_mapping(uffd_global_test_opts_t *gopts, __u64 *start, + size_t len, unsigned long offset) { - if (!map_shared) + if (!gopts->map_shared) return; - *start = (unsigned long) area_dst_alias + offset; + *start = (unsigned long) gopts->area_dst_alias + offset; } -static void shmem_release_pages(char *rel_area) +static void shmem_release_pages(uffd_global_test_opts_t *gopts, char *rel_area) { - if (madvise(rel_area, nr_pages * page_size, MADV_REMOVE)) + if (madvise(rel_area, gopts->nr_pages * gopts->page_size, MADV_REMOVE)) err("madvise(MADV_REMOVE) failed"); } -static int shmem_allocate_area(void **alloc_area, bool is_src) +static int shmem_allocate_area(uffd_global_test_opts_t *gopts, void **alloc_area, bool is_src) { void *area_alias = NULL; - size_t bytes = nr_pages * page_size, hpage_size = read_pmd_pagesize(); + size_t bytes = gopts->nr_pages * gopts->page_size, hpage_size = read_pmd_pagesize(); unsigned long offset = is_src ? 0 : bytes; char *p = NULL, *p_alias = NULL; int mem_fd = uffd_mem_fd_create(bytes * 2, false); @@ -159,22 +172,23 @@ static int shmem_allocate_area(void **alloc_area, bool is_src) err("mmap of anonymous memory failed at %p", p_alias); if (is_src) - area_src_alias = area_alias; + gopts->area_src_alias = area_alias; else - area_dst_alias = area_alias; + gopts->area_dst_alias = area_alias; close(mem_fd); return 0; } -static void shmem_alias_mapping(__u64 *start, size_t len, unsigned long offset) +static void shmem_alias_mapping(uffd_global_test_opts_t *gopts, __u64 *start, + size_t len, unsigned long offset) { - *start = (unsigned long)area_dst_alias + offset; + *start = (unsigned long)gopts->area_dst_alias + offset; } -static void shmem_check_pmd_mapping(void *p, int expect_nr_hpages) +static void shmem_check_pmd_mapping(uffd_global_test_opts_t *gopts, void *p, int expect_nr_hpages) { - if (!check_huge_shmem(area_dst_alias, expect_nr_hpages, + if (!check_huge_shmem(gopts->area_dst_alias, expect_nr_hpages, read_pmd_pagesize())) err("Did not find expected %d number of hugepages", expect_nr_hpages); @@ -234,18 +248,18 @@ void uffd_stats_report(struct uffd_args *args, int n_cpus) printf("\n"); } -int userfaultfd_open(uint64_t *features) +int userfaultfd_open(uffd_global_test_opts_t *gopts, uint64_t *features) { struct uffdio_api uffdio_api; - uffd = uffd_open(UFFD_FLAGS); - if (uffd < 0) + gopts->uffd = uffd_open(UFFD_FLAGS); + if (gopts->uffd < 0) return -1; - uffd_flags = fcntl(uffd, F_GETFD, NULL); + gopts->uffd_flags = fcntl(gopts->uffd, F_GETFD, NULL); uffdio_api.api = UFFD_API; uffdio_api.features = *features; - if (ioctl(uffd, UFFDIO_API, &uffdio_api)) + if (ioctl(gopts->uffd, UFFDIO_API, &uffdio_api)) /* Probably lack of CAP_PTRACE? */ return -1; if (uffdio_api.api != UFFD_API) @@ -255,59 +269,63 @@ int userfaultfd_open(uint64_t *features) return 0; } -static inline void munmap_area(void **area) +static inline void munmap_area(uffd_global_test_opts_t *gopts, void **area) { if (*area) - if (munmap(*area, nr_pages * page_size)) + if (munmap(*area, gopts->nr_pages * gopts->page_size)) err("munmap"); *area = NULL; } -void uffd_test_ctx_clear(void) +void uffd_test_ctx_clear(uffd_global_test_opts_t *gopts) { size_t i; - if (pipefd) { - for (i = 0; i < nr_parallel * 2; ++i) { - if (close(pipefd[i])) + if (gopts->pipefd) { + for (i = 0; i < gopts->nr_parallel * 2; ++i) { + if (close(gopts->pipefd[i])) err("close pipefd"); } - free(pipefd); - pipefd = NULL; + free(gopts->pipefd); + gopts->pipefd = NULL; } - if (count_verify) { - free(count_verify); - count_verify = NULL; + if (gopts->count_verify) { + free(gopts->count_verify); + gopts->count_verify = NULL; } - if (uffd != -1) { - if (close(uffd)) + if (gopts->uffd != -1) { + if (close(gopts->uffd)) err("close uffd"); - uffd = -1; + gopts->uffd = -1; } - munmap_area((void **)&area_src); - munmap_area((void **)&area_src_alias); - munmap_area((void **)&area_dst); - munmap_area((void **)&area_dst_alias); - munmap_area((void **)&area_remap); + munmap_area(gopts, (void **)&gopts->area_src); + munmap_area(gopts, (void **)&gopts->area_src_alias); + munmap_area(gopts, (void **)&gopts->area_dst); + munmap_area(gopts, (void **)&gopts->area_dst_alias); + munmap_area(gopts, (void **)&gopts->area_remap); } -int uffd_test_ctx_init(uint64_t features, const char **errmsg) +int uffd_test_ctx_init(uffd_global_test_opts_t *gopts, uint64_t features, const char **errmsg) { unsigned long nr, cpu; int ret; + gopts->area_src_alias = NULL; + gopts->area_dst_alias = NULL; + gopts->area_remap = NULL; + if (uffd_test_case_ops && uffd_test_case_ops->pre_alloc) { - ret = uffd_test_case_ops->pre_alloc(errmsg); + ret = uffd_test_case_ops->pre_alloc(gopts, errmsg); if (ret) return ret; } - ret = uffd_test_ops->allocate_area((void **)&area_src, true); - ret |= uffd_test_ops->allocate_area((void **)&area_dst, false); + ret = uffd_test_ops->allocate_area(gopts, (void **) &gopts->area_src, true); + ret |= uffd_test_ops->allocate_area(gopts, (void **) &gopts->area_dst, false); if (ret) { if (errmsg) *errmsg = "memory allocation failed"; @@ -315,26 +333,26 @@ int uffd_test_ctx_init(uint64_t features, const char **errmsg) } if (uffd_test_case_ops && uffd_test_case_ops->post_alloc) { - ret = uffd_test_case_ops->post_alloc(errmsg); + ret = uffd_test_case_ops->post_alloc(gopts, errmsg); if (ret) return ret; } - ret = userfaultfd_open(&features); + ret = userfaultfd_open(gopts, &features); if (ret) { if (errmsg) *errmsg = "possible lack of privilege"; return ret; } - count_verify = malloc(nr_pages * sizeof(unsigned long long)); - if (!count_verify) + gopts->count_verify = malloc(gopts->nr_pages * sizeof(unsigned long long)); + if (!gopts->count_verify) err("count_verify"); - for (nr = 0; nr < nr_pages; nr++) { - *area_mutex(area_src, nr) = + for (nr = 0; nr < gopts->nr_pages; nr++) { + *area_mutex(gopts->area_src, nr, gopts) = (pthread_mutex_t)PTHREAD_MUTEX_INITIALIZER; - count_verify[nr] = *area_count(area_src, nr) = 1; + gopts->count_verify[nr] = *area_count(gopts->area_src, nr, gopts) = 1; /* * In the transition between 255 to 256, powerpc will * read out of order in my_bcmp and see both bytes as @@ -342,7 +360,7 @@ int uffd_test_ctx_init(uint64_t features, const char **errmsg) * after the count, to avoid my_bcmp to trigger false * positives. */ - *(area_count(area_src, nr) + 1) = 1; + *(area_count(gopts->area_src, nr, gopts) + 1) = 1; } /* @@ -363,13 +381,13 @@ int uffd_test_ctx_init(uint64_t features, const char **errmsg) * proactively split the thp and drop any accidentally initialized * pages within area_dst. */ - uffd_test_ops->release_pages(area_dst); + uffd_test_ops->release_pages(gopts, gopts->area_dst); - pipefd = malloc(sizeof(int) * nr_parallel * 2); - if (!pipefd) + gopts->pipefd = malloc(sizeof(int) * gopts->nr_parallel * 2); + if (!gopts->pipefd) err("pipefd"); - for (cpu = 0; cpu < nr_parallel; cpu++) - if (pipe2(&pipefd[cpu * 2], O_CLOEXEC | O_NONBLOCK)) + for (cpu = 0; cpu < gopts->nr_parallel; cpu++) + if (pipe2(&gopts->pipefd[cpu * 2], O_CLOEXEC | O_NONBLOCK)) err("pipe"); return 0; @@ -416,9 +434,9 @@ static void continue_range(int ufd, __u64 start, __u64 len, bool wp) ret, (int64_t) req.mapped); } -int uffd_read_msg(int ufd, struct uffd_msg *msg) +int uffd_read_msg(uffd_global_test_opts_t *gopts, struct uffd_msg *msg) { - int ret = read(uffd, msg, sizeof(*msg)); + int ret = read(gopts->uffd, msg, sizeof(*msg)); if (ret != sizeof(*msg)) { if (ret < 0) { @@ -433,7 +451,8 @@ int uffd_read_msg(int ufd, struct uffd_msg *msg) return 0; } -void uffd_handle_page_fault(struct uffd_msg *msg, struct uffd_args *args) +void uffd_handle_page_fault(uffd_global_test_opts_t *gopts, struct uffd_msg *msg, + struct uffd_args *args) { unsigned long offset; @@ -442,7 +461,7 @@ void uffd_handle_page_fault(struct uffd_msg *msg, struct uffd_args *args) if (msg->arg.pagefault.flags & UFFD_PAGEFAULT_FLAG_WP) { /* Write protect page faults */ - wp_range(uffd, msg->arg.pagefault.address, page_size, false); + wp_range(gopts->uffd, msg->arg.pagefault.address, gopts->page_size, false); args->wp_faults++; } else if (msg->arg.pagefault.flags & UFFD_PAGEFAULT_FLAG_MINOR) { uint8_t *area; @@ -460,12 +479,12 @@ void uffd_handle_page_fault(struct uffd_msg *msg, struct uffd_args *args) * (UFFD-registered). */ - area = (uint8_t *)(area_dst + - ((char *)msg->arg.pagefault.address - - area_dst_alias)); - for (b = 0; b < page_size; ++b) + area = (uint8_t *)(gopts->area_dst + + ((char *)msg->arg.pagefault.address - + gopts->area_dst_alias)); + for (b = 0; b < gopts->page_size; ++b) area[b] = ~area[b]; - continue_range(uffd, msg->arg.pagefault.address, page_size, + continue_range(gopts->uffd, msg->arg.pagefault.address, gopts->page_size, args->apply_wp); args->minor_faults++; } else { @@ -493,10 +512,10 @@ void uffd_handle_page_fault(struct uffd_msg *msg, struct uffd_args *args) if (msg->arg.pagefault.flags & UFFD_PAGEFAULT_FLAG_WRITE) err("unexpected write fault"); - offset = (char *)(unsigned long)msg->arg.pagefault.address - area_dst; - offset &= ~(page_size-1); + offset = (char *)(unsigned long)msg->arg.pagefault.address - gopts->area_dst; + offset &= ~(gopts->page_size-1); - if (copy_page(uffd, offset, args->apply_wp)) + if (copy_page(gopts, offset, args->apply_wp)) args->missing_faults++; } } @@ -504,6 +523,7 @@ void uffd_handle_page_fault(struct uffd_msg *msg, struct uffd_args *args) void *uffd_poll_thread(void *arg) { struct uffd_args *args = (struct uffd_args *)arg; + uffd_global_test_opts_t *gopts = args->gopts; unsigned long cpu = args->cpu; struct pollfd pollfd[2]; struct uffd_msg msg; @@ -514,12 +534,12 @@ void *uffd_poll_thread(void *arg) if (!args->handle_fault) args->handle_fault = uffd_handle_page_fault; - pollfd[0].fd = uffd; + pollfd[0].fd = gopts->uffd; pollfd[0].events = POLLIN; - pollfd[1].fd = pipefd[cpu*2]; + pollfd[1].fd = gopts->pipefd[cpu*2]; pollfd[1].events = POLLIN; - ready_for_fork = true; + gopts->ready_for_fork = true; for (;;) { ret = poll(pollfd, 2, -1); @@ -537,30 +557,30 @@ void *uffd_poll_thread(void *arg) } if (!(pollfd[0].revents & POLLIN)) err("pollfd[0].revents %d", pollfd[0].revents); - if (uffd_read_msg(uffd, &msg)) + if (uffd_read_msg(gopts, &msg)) continue; switch (msg.event) { default: err("unexpected msg event %u\n", msg.event); break; case UFFD_EVENT_PAGEFAULT: - args->handle_fault(&msg, args); + args->handle_fault(gopts, &msg, args); break; case UFFD_EVENT_FORK: - close(uffd); - uffd = msg.arg.fork.ufd; - pollfd[0].fd = uffd; + close(gopts->uffd); + gopts->uffd = msg.arg.fork.ufd; + pollfd[0].fd = gopts->uffd; break; case UFFD_EVENT_REMOVE: uffd_reg.range.start = msg.arg.remove.start; uffd_reg.range.len = msg.arg.remove.end - msg.arg.remove.start; - if (ioctl(uffd, UFFDIO_UNREGISTER, &uffd_reg.range)) + if (ioctl(gopts->uffd, UFFDIO_UNREGISTER, &uffd_reg.range)) err("remove failure"); break; case UFFD_EVENT_REMAP: - area_remap = area_dst; /* save for later unmap */ - area_dst = (char *)(unsigned long)msg.arg.remap.to; + gopts->area_remap = gopts->area_dst; /* save for later unmap */ + gopts->area_dst = (char *)(unsigned long)msg.arg.remap.to; break; } } @@ -568,17 +588,18 @@ void *uffd_poll_thread(void *arg) return NULL; } -static void retry_copy_page(int ufd, struct uffdio_copy *uffdio_copy, +static void retry_copy_page(uffd_global_test_opts_t *gopts, struct uffdio_copy *uffdio_copy, unsigned long offset) { - uffd_test_ops->alias_mapping(&uffdio_copy->dst, + uffd_test_ops->alias_mapping(gopts, + &uffdio_copy->dst, uffdio_copy->len, offset); - if (ioctl(ufd, UFFDIO_COPY, uffdio_copy)) { + if (ioctl(gopts->uffd, UFFDIO_COPY, uffdio_copy)) { /* real retval in ufdio_copy.copy */ if (uffdio_copy->copy != -EEXIST) err("UFFDIO_COPY retry error: %"PRId64, - (int64_t)uffdio_copy->copy); + (int64_t)uffdio_copy->copy); } else { err("UFFDIO_COPY retry unexpected: %"PRId64, (int64_t)uffdio_copy->copy); @@ -597,60 +618,60 @@ static void wake_range(int ufd, unsigned long addr, unsigned long len) addr), exit(1); } -int __copy_page(int ufd, unsigned long offset, bool retry, bool wp) +int __copy_page(uffd_global_test_opts_t *gopts, unsigned long offset, bool retry, bool wp) { struct uffdio_copy uffdio_copy; - if (offset >= nr_pages * page_size) + if (offset >= gopts->nr_pages * gopts->page_size) err("unexpected offset %lu\n", offset); - uffdio_copy.dst = (unsigned long) area_dst + offset; - uffdio_copy.src = (unsigned long) area_src + offset; - uffdio_copy.len = page_size; + uffdio_copy.dst = (unsigned long) gopts->area_dst + offset; + uffdio_copy.src = (unsigned long) gopts->area_src + offset; + uffdio_copy.len = gopts->page_size; if (wp) uffdio_copy.mode = UFFDIO_COPY_MODE_WP; else uffdio_copy.mode = 0; uffdio_copy.copy = 0; - if (ioctl(ufd, UFFDIO_COPY, &uffdio_copy)) { + if (ioctl(gopts->uffd, UFFDIO_COPY, &uffdio_copy)) { /* real retval in ufdio_copy.copy */ if (uffdio_copy.copy != -EEXIST) err("UFFDIO_COPY error: %"PRId64, (int64_t)uffdio_copy.copy); - wake_range(ufd, uffdio_copy.dst, page_size); - } else if (uffdio_copy.copy != page_size) { + wake_range(gopts->uffd, uffdio_copy.dst, gopts->page_size); + } else if (uffdio_copy.copy != gopts->page_size) { err("UFFDIO_COPY error: %"PRId64, (int64_t)uffdio_copy.copy); } else { - if (test_uffdio_copy_eexist && retry) { - test_uffdio_copy_eexist = false; - retry_copy_page(ufd, &uffdio_copy, offset); + if (gopts->test_uffdio_copy_eexist && retry) { + gopts->test_uffdio_copy_eexist = false; + retry_copy_page(gopts, &uffdio_copy, offset); } return 1; } return 0; } -int copy_page(int ufd, unsigned long offset, bool wp) +int copy_page(uffd_global_test_opts_t *gopts, unsigned long offset, bool wp) { - return __copy_page(ufd, offset, false, wp); + return __copy_page(gopts, offset, false, wp); } -int move_page(int ufd, unsigned long offset, unsigned long len) +int move_page(uffd_global_test_opts_t *gopts, unsigned long offset, unsigned long len) { struct uffdio_move uffdio_move; - if (offset + len > nr_pages * page_size) + if (offset + len > gopts->nr_pages * gopts->page_size) err("unexpected offset %lu and length %lu\n", offset, len); - uffdio_move.dst = (unsigned long) area_dst + offset; - uffdio_move.src = (unsigned long) area_src + offset; + uffdio_move.dst = (unsigned long) gopts->area_dst + offset; + uffdio_move.src = (unsigned long) gopts->area_src + offset; uffdio_move.len = len; uffdio_move.mode = UFFDIO_MOVE_MODE_ALLOW_SRC_HOLES; uffdio_move.move = 0; - if (ioctl(ufd, UFFDIO_MOVE, &uffdio_move)) { + if (ioctl(gopts->uffd, UFFDIO_MOVE, &uffdio_move)) { /* real retval in uffdio_move.move */ if (uffdio_move.move != -EEXIST) err("UFFDIO_MOVE error: %"PRId64, (int64_t)uffdio_move.move); - wake_range(ufd, uffdio_move.dst, len); + wake_range(gopts->uffd, uffdio_move.dst, len); } else if (uffdio_move.move != len) { err("UFFDIO_MOVE error: %"PRId64, (int64_t)uffdio_move.move); } else diff --git a/tools/testing/selftests/mm/uffd-common.h b/tools/testing/selftests/mm/uffd-common.h index 7700cbfa3975..37d3ca55905f 100644 --- a/tools/testing/selftests/mm/uffd-common.h +++ b/tools/testing/selftests/mm/uffd-common.h @@ -56,20 +56,17 @@ #define err(fmt, ...) errexit(1, fmt, ##__VA_ARGS__) -/* pthread_mutex_t starts at page offset 0 */ -#define area_mutex(___area, ___nr) \ - ((pthread_mutex_t *) ((___area) + (___nr)*page_size)) -/* - * count is placed in the page after pthread_mutex_t naturally aligned - * to avoid non alignment faults on non-x86 archs. - */ -#define area_count(___area, ___nr) \ - ((volatile unsigned long long *) ((unsigned long) \ - ((___area) + (___nr)*page_size + \ - sizeof(pthread_mutex_t) + \ - sizeof(unsigned long long) - 1) & \ - ~(unsigned long)(sizeof(unsigned long long) \ - - 1))) +struct uffd_global_test_opts { + unsigned long nr_parallel, nr_pages, nr_pages_per_cpu, page_size; + char *area_src, *area_src_alias, *area_dst, *area_dst_alias, *area_remap; + int uffd, uffd_flags, finished, *pipefd, test_type; + bool map_shared; + bool test_uffdio_wp; + unsigned long long *count_verify; + volatile bool test_uffdio_copy_eexist; + atomic_bool ready_for_fork; +}; +typedef struct uffd_global_test_opts uffd_global_test_opts_t; /* Userfaultfd test statistics */ struct uffd_args { @@ -79,50 +76,55 @@ struct uffd_args { unsigned long missing_faults; unsigned long wp_faults; unsigned long minor_faults; + struct uffd_global_test_opts *gopts; /* A custom fault handler; defaults to uffd_handle_page_fault. */ - void (*handle_fault)(struct uffd_msg *msg, struct uffd_args *args); + void (*handle_fault)(struct uffd_global_test_opts *gopts, + struct uffd_msg *msg, + struct uffd_args *args); }; struct uffd_test_ops { - int (*allocate_area)(void **alloc_area, bool is_src); - void (*release_pages)(char *rel_area); - void (*alias_mapping)(__u64 *start, size_t len, unsigned long offset); - void (*check_pmd_mapping)(void *p, int expect_nr_hpages); + int (*allocate_area)(uffd_global_test_opts_t *gopts, void **alloc_area, bool is_src); + void (*release_pages)(uffd_global_test_opts_t *gopts, char *rel_area); + void (*alias_mapping)(uffd_global_test_opts_t *gopts, + __u64 *start, + size_t len, + unsigned long offset); + void (*check_pmd_mapping)(uffd_global_test_opts_t *gopts, void *p, int expect_nr_hpages); }; typedef struct uffd_test_ops uffd_test_ops_t; struct uffd_test_case_ops { - int (*pre_alloc)(const char **errmsg); - int (*post_alloc)(const char **errmsg); + int (*pre_alloc)(uffd_global_test_opts_t *gopts, const char **errmsg); + int (*post_alloc)(uffd_global_test_opts_t *gopts, const char **errmsg); }; typedef struct uffd_test_case_ops uffd_test_case_ops_t; -extern unsigned long nr_parallel, nr_pages, nr_pages_per_cpu, page_size; -extern char *area_src, *area_src_alias, *area_dst, *area_dst_alias, *area_remap; -extern int uffd, uffd_flags, finished, *pipefd, test_type; -extern bool map_shared; -extern bool test_uffdio_wp; -extern unsigned long long *count_verify; -extern volatile bool test_uffdio_copy_eexist; -extern atomic_bool ready_for_fork; - +extern uffd_global_test_opts_t *uffd_gtest_opts; extern uffd_test_ops_t anon_uffd_test_ops; extern uffd_test_ops_t shmem_uffd_test_ops; extern uffd_test_ops_t hugetlb_uffd_test_ops; extern uffd_test_ops_t *uffd_test_ops; extern uffd_test_case_ops_t *uffd_test_case_ops; +pthread_mutex_t *area_mutex(char *area, unsigned long nr, uffd_global_test_opts_t *gopts); +volatile unsigned long long *area_count(char *area, + unsigned long nr, + uffd_global_test_opts_t *gopts); + void uffd_stats_report(struct uffd_args *args, int n_cpus); -int uffd_test_ctx_init(uint64_t features, const char **errmsg); -void uffd_test_ctx_clear(void); -int userfaultfd_open(uint64_t *features); -int uffd_read_msg(int ufd, struct uffd_msg *msg); +int uffd_test_ctx_init(uffd_global_test_opts_t *gopts, uint64_t features, const char **errmsg); +void uffd_test_ctx_clear(uffd_global_test_opts_t *gopts); +int userfaultfd_open(uffd_global_test_opts_t *gopts, uint64_t *features); +int uffd_read_msg(uffd_global_test_opts_t *gopts, struct uffd_msg *msg); void wp_range(int ufd, __u64 start, __u64 len, bool wp); -void uffd_handle_page_fault(struct uffd_msg *msg, struct uffd_args *args); -int __copy_page(int ufd, unsigned long offset, bool retry, bool wp); -int copy_page(int ufd, unsigned long offset, bool wp); -int move_page(int ufd, unsigned long offset, unsigned long len); +void uffd_handle_page_fault(uffd_global_test_opts_t *gopts, + struct uffd_msg *msg, + struct uffd_args *args); +int __copy_page(uffd_global_test_opts_t *gopts, unsigned long offset, bool retry, bool wp); +int copy_page(uffd_global_test_opts_t *gopts, unsigned long offset, bool wp); +int move_page(uffd_global_test_opts_t *gopts, unsigned long offset, unsigned long len); void *uffd_poll_thread(void *arg); int uffd_open_dev(unsigned int flags); diff --git a/tools/testing/selftests/mm/uffd-stress.c b/tools/testing/selftests/mm/uffd-stress.c index 40af7f67c407..b51c89e1cd1a 100644 --- a/tools/testing/selftests/mm/uffd-stress.c +++ b/tools/testing/selftests/mm/uffd-stress.c @@ -44,6 +44,12 @@ uint64_t features; #define BOUNCE_VERIFY (1<<2) #define BOUNCE_POLL (1<<3) static int bounces; +/* defined globally for this particular test as the sigalrm handler + * depends on test_uffdio_*_eexist. + * XXX: define gopts in main() when we figure out a way to deal with + * test_uffdio_*_eexist. + */ +static uffd_global_test_opts_t *gopts; /* exercise the test_uffdio_*_eexist every ALARM_INTERVAL_SECS */ #define ALARM_INTERVAL_SECS 10 @@ -51,7 +57,7 @@ static char *zeropage; pthread_attr_t attr; #define swap(a, b) \ - do { typeof(a) __tmp = (a); (a) = (b); (b) = __tmp; } while (0) + do { __auto_type __tmp = (a); (a) = (b); (b) = __tmp; } while (0) const char *examples = "# Run anonymous memory test on 100MiB region with 99999 bounces:\n" @@ -76,54 +82,58 @@ static void usage(void) exit(1); } -static void uffd_stats_reset(struct uffd_args *args, unsigned long n_cpus) +static void uffd_stats_reset(uffd_global_test_opts_t *gopts, struct uffd_args *args, + unsigned long n_cpus) { int i; for (i = 0; i < n_cpus; i++) { args[i].cpu = i; - args[i].apply_wp = test_uffdio_wp; + args[i].apply_wp = gopts->test_uffdio_wp; args[i].missing_faults = 0; args[i].wp_faults = 0; args[i].minor_faults = 0; + args[i].gopts = gopts; } } static void *locking_thread(void *arg) { - unsigned long cpu = (unsigned long) arg; + struct uffd_args *args = (struct uffd_args *) arg; + uffd_global_test_opts_t *gopts = args->gopts; + unsigned long cpu = (unsigned long) args->cpu; unsigned long page_nr; unsigned long long count; if (!(bounces & BOUNCE_RANDOM)) { page_nr = -bounces; if (!(bounces & BOUNCE_RACINGFAULTS)) - page_nr += cpu * nr_pages_per_cpu; + page_nr += cpu * gopts->nr_pages_per_cpu; } - while (!finished) { + while (!gopts->finished) { if (bounces & BOUNCE_RANDOM) { if (getrandom(&page_nr, sizeof(page_nr), 0) != sizeof(page_nr)) err("getrandom failed"); } else page_nr += 1; - page_nr %= nr_pages; - pthread_mutex_lock(area_mutex(area_dst, page_nr)); - count = *area_count(area_dst, page_nr); - if (count != count_verify[page_nr]) + page_nr %= gopts->nr_pages; + pthread_mutex_lock(area_mutex(gopts->area_dst, page_nr, gopts)); + count = *area_count(gopts->area_dst, page_nr, gopts); + if (count != gopts->count_verify[page_nr]) err("page_nr %lu memory corruption %llu %llu", - page_nr, count, count_verify[page_nr]); + page_nr, count, gopts->count_verify[page_nr]); count++; - *area_count(area_dst, page_nr) = count_verify[page_nr] = count; - pthread_mutex_unlock(area_mutex(area_dst, page_nr)); + *area_count(gopts->area_dst, page_nr, gopts) = gopts->count_verify[page_nr] = count; + pthread_mutex_unlock(area_mutex(gopts->area_dst, page_nr, gopts)); } return NULL; } -static int copy_page_retry(int ufd, unsigned long offset) +static int copy_page_retry(uffd_global_test_opts_t *gopts, unsigned long offset) { - return __copy_page(ufd, offset, true, test_uffdio_wp); + return __copy_page(gopts, offset, true, gopts->test_uffdio_wp); } pthread_mutex_t uffd_read_mutex = PTHREAD_MUTEX_INITIALIZER; @@ -131,15 +141,16 @@ pthread_mutex_t uffd_read_mutex = PTHREAD_MUTEX_INITIALIZER; static void *uffd_read_thread(void *arg) { struct uffd_args *args = (struct uffd_args *)arg; + uffd_global_test_opts_t *gopts = args->gopts; struct uffd_msg msg; pthread_mutex_unlock(&uffd_read_mutex); /* from here cancellation is ok */ for (;;) { - if (uffd_read_msg(uffd, &msg)) + if (uffd_read_msg(gopts, &msg)) continue; - uffd_handle_page_fault(&msg, args); + uffd_handle_page_fault(gopts, &msg, args); } return NULL; @@ -147,32 +158,34 @@ static void *uffd_read_thread(void *arg) static void *background_thread(void *arg) { - unsigned long cpu = (unsigned long) arg; + struct uffd_args *args = (struct uffd_args *) arg; + uffd_global_test_opts_t *gopts = args->gopts; + unsigned long cpu = (unsigned long) args->cpu; unsigned long page_nr, start_nr, mid_nr, end_nr; - start_nr = cpu * nr_pages_per_cpu; - end_nr = (cpu+1) * nr_pages_per_cpu; + start_nr = cpu * gopts->nr_pages_per_cpu; + end_nr = (cpu+1) * gopts->nr_pages_per_cpu; mid_nr = (start_nr + end_nr) / 2; /* Copy the first half of the pages */ for (page_nr = start_nr; page_nr < mid_nr; page_nr++) - copy_page_retry(uffd, page_nr * page_size); + copy_page_retry(gopts, page_nr * gopts->page_size); /* * If we need to test uffd-wp, set it up now. Then we'll have * at least the first half of the pages mapped already which * can be write-protected for testing */ - if (test_uffdio_wp) - wp_range(uffd, (unsigned long)area_dst + start_nr * page_size, - nr_pages_per_cpu * page_size, true); + if (gopts->test_uffdio_wp) + wp_range(gopts->uffd, (unsigned long)gopts->area_dst + start_nr * gopts->page_size, + gopts->nr_pages_per_cpu * gopts->page_size, true); /* * Continue the 2nd half of the page copying, handling write * protection faults if any */ for (page_nr = mid_nr; page_nr < end_nr; page_nr++) - copy_page_retry(uffd, page_nr * page_size); + copy_page_retry(gopts, page_nr * gopts->page_size); return NULL; } @@ -180,17 +193,21 @@ static void *background_thread(void *arg) static int stress(struct uffd_args *args) { unsigned long cpu; - pthread_t locking_threads[nr_parallel]; - pthread_t uffd_threads[nr_parallel]; - pthread_t background_threads[nr_parallel]; + uffd_global_test_opts_t *gopts = args->gopts; + pthread_t locking_threads[gopts->nr_parallel]; + pthread_t uffd_threads[gopts->nr_parallel]; + pthread_t background_threads[gopts->nr_parallel]; - finished = 0; - for (cpu = 0; cpu < nr_parallel; cpu++) { + gopts->finished = 0; + for (cpu = 0; cpu < gopts->nr_parallel; cpu++) { if (pthread_create(&locking_threads[cpu], &attr, - locking_thread, (void *)cpu)) + locking_thread, (void *)&args[cpu])) return 1; if (bounces & BOUNCE_POLL) { - if (pthread_create(&uffd_threads[cpu], &attr, uffd_poll_thread, &args[cpu])) + if (pthread_create(&uffd_threads[cpu], + &attr, + uffd_poll_thread, + (void *) &args[cpu])) err("uffd_poll_thread create"); } else { if (pthread_create(&uffd_threads[cpu], &attr, @@ -200,10 +217,10 @@ static int stress(struct uffd_args *args) pthread_mutex_lock(&uffd_read_mutex); } if (pthread_create(&background_threads[cpu], &attr, - background_thread, (void *)cpu)) + background_thread, (void *)&args[cpu])) return 1; } - for (cpu = 0; cpu < nr_parallel; cpu++) + for (cpu = 0; cpu < gopts->nr_parallel; cpu++) if (pthread_join(background_threads[cpu], NULL)) return 1; @@ -216,17 +233,17 @@ static int stress(struct uffd_args *args) * UFFDIO_COPY without writing zero pages into area_dst * because the background threads already completed). */ - uffd_test_ops->release_pages(area_src); + uffd_test_ops->release_pages(gopts, gopts->area_src); - finished = 1; - for (cpu = 0; cpu < nr_parallel; cpu++) + gopts->finished = 1; + for (cpu = 0; cpu < gopts->nr_parallel; cpu++) if (pthread_join(locking_threads[cpu], NULL)) return 1; - for (cpu = 0; cpu < nr_parallel; cpu++) { + for (cpu = 0; cpu < gopts->nr_parallel; cpu++) { char c; if (bounces & BOUNCE_POLL) { - if (write(pipefd[cpu*2+1], &c, 1) != 1) + if (write(gopts->pipefd[cpu*2+1], &c, 1) != 1) err("pipefd write error"); if (pthread_join(uffd_threads[cpu], (void *)&args[cpu])) @@ -242,26 +259,26 @@ static int stress(struct uffd_args *args) return 0; } -static int userfaultfd_stress(void) +static int userfaultfd_stress(uffd_global_test_opts_t *gopts) { void *area; unsigned long nr; - struct uffd_args args[nr_parallel]; - uint64_t mem_size = nr_pages * page_size; + struct uffd_args args[gopts->nr_parallel]; + uint64_t mem_size = gopts->nr_pages * gopts->page_size; int flags = 0; - memset(args, 0, sizeof(struct uffd_args) * nr_parallel); + memset(args, 0, sizeof(struct uffd_args) * gopts->nr_parallel); - if (features & UFFD_FEATURE_WP_UNPOPULATED && test_type == TEST_ANON) + if (features & UFFD_FEATURE_WP_UNPOPULATED && gopts->test_type == TEST_ANON) flags = UFFD_FEATURE_WP_UNPOPULATED; - if (uffd_test_ctx_init(flags, NULL)) + if (uffd_test_ctx_init(gopts, flags, NULL)) err("context init failed"); - if (posix_memalign(&area, page_size, page_size)) + if (posix_memalign(&area, gopts->page_size, gopts->page_size)) err("out of memory"); zeropage = area; - bzero(zeropage, page_size); + bzero(zeropage, gopts->page_size); pthread_mutex_lock(&uffd_read_mutex); @@ -284,18 +301,18 @@ static int userfaultfd_stress(void) fflush(stdout); if (bounces & BOUNCE_POLL) - fcntl(uffd, F_SETFL, uffd_flags | O_NONBLOCK); + fcntl(gopts->uffd, F_SETFL, gopts->uffd_flags | O_NONBLOCK); else - fcntl(uffd, F_SETFL, uffd_flags & ~O_NONBLOCK); + fcntl(gopts->uffd, F_SETFL, gopts->uffd_flags & ~O_NONBLOCK); /* register */ - if (uffd_register(uffd, area_dst, mem_size, - true, test_uffdio_wp, false)) + if (uffd_register(gopts->uffd, gopts->area_dst, mem_size, + true, gopts->test_uffdio_wp, false)) err("register failure"); - if (area_dst_alias) { - if (uffd_register(uffd, area_dst_alias, mem_size, - true, test_uffdio_wp, false)) + if (gopts->area_dst_alias) { + if (uffd_register(gopts->uffd, gopts->area_dst_alias, mem_size, + true, gopts->test_uffdio_wp, false)) err("register failure alias"); } @@ -323,87 +340,88 @@ static int userfaultfd_stress(void) * MADV_DONTNEED only after the UFFDIO_REGISTER, so it's * required to MADV_DONTNEED here. */ - uffd_test_ops->release_pages(area_dst); + uffd_test_ops->release_pages(gopts, gopts->area_dst); - uffd_stats_reset(args, nr_parallel); + uffd_stats_reset(gopts, args, gopts->nr_parallel); /* bounce pass */ if (stress(args)) { - uffd_test_ctx_clear(); + uffd_test_ctx_clear(gopts); return 1; } /* Clear all the write protections if there is any */ - if (test_uffdio_wp) - wp_range(uffd, (unsigned long)area_dst, - nr_pages * page_size, false); + if (gopts->test_uffdio_wp) + wp_range(gopts->uffd, (unsigned long)gopts->area_dst, + gopts->nr_pages * gopts->page_size, false); /* unregister */ - if (uffd_unregister(uffd, area_dst, mem_size)) + if (uffd_unregister(gopts->uffd, gopts->area_dst, mem_size)) err("unregister failure"); - if (area_dst_alias) { - if (uffd_unregister(uffd, area_dst_alias, mem_size)) + if (gopts->area_dst_alias) { + if (uffd_unregister(gopts->uffd, gopts->area_dst_alias, mem_size)) err("unregister failure alias"); } /* verification */ if (bounces & BOUNCE_VERIFY) - for (nr = 0; nr < nr_pages; nr++) - if (*area_count(area_dst, nr) != count_verify[nr]) + for (nr = 0; nr < gopts->nr_pages; nr++) + if (*area_count(gopts->area_dst, nr, gopts) != + gopts->count_verify[nr]) err("error area_count %llu %llu %lu\n", - *area_count(area_src, nr), - count_verify[nr], nr); + *area_count(gopts->area_src, nr, gopts), + gopts->count_verify[nr], nr); /* prepare next bounce */ - swap(area_src, area_dst); + swap(gopts->area_src, gopts->area_dst); - swap(area_src_alias, area_dst_alias); + swap(gopts->area_src_alias, gopts->area_dst_alias); - uffd_stats_report(args, nr_parallel); + uffd_stats_report(args, gopts->nr_parallel); } - uffd_test_ctx_clear(); + uffd_test_ctx_clear(gopts); return 0; } -static void set_test_type(const char *type) +static void set_test_type(uffd_global_test_opts_t *gopts, const char *type) { if (!strcmp(type, "anon")) { - test_type = TEST_ANON; + gopts->test_type = TEST_ANON; uffd_test_ops = &anon_uffd_test_ops; } else if (!strcmp(type, "hugetlb")) { - test_type = TEST_HUGETLB; + gopts->test_type = TEST_HUGETLB; uffd_test_ops = &hugetlb_uffd_test_ops; - map_shared = true; + gopts->map_shared = true; } else if (!strcmp(type, "hugetlb-private")) { - test_type = TEST_HUGETLB; + gopts->test_type = TEST_HUGETLB; uffd_test_ops = &hugetlb_uffd_test_ops; } else if (!strcmp(type, "shmem")) { - map_shared = true; - test_type = TEST_SHMEM; + gopts->map_shared = true; + gopts->test_type = TEST_SHMEM; uffd_test_ops = &shmem_uffd_test_ops; } else if (!strcmp(type, "shmem-private")) { - test_type = TEST_SHMEM; + gopts->test_type = TEST_SHMEM; uffd_test_ops = &shmem_uffd_test_ops; } } -static void parse_test_type_arg(const char *raw_type) +static void parse_test_type_arg(uffd_global_test_opts_t *gopts, const char *raw_type) { - set_test_type(raw_type); + set_test_type(gopts, raw_type); - if (!test_type) + if (!gopts->test_type) err("failed to parse test type argument: '%s'", raw_type); - if (test_type == TEST_HUGETLB) - page_size = default_huge_page_size(); + if (gopts->test_type == TEST_HUGETLB) + gopts->page_size = default_huge_page_size(); else - page_size = sysconf(_SC_PAGE_SIZE); + gopts->page_size = sysconf(_SC_PAGE_SIZE); - if (!page_size) + if (!gopts->page_size) err("Unable to determine page size"); - if ((unsigned long) area_count(NULL, 0) + sizeof(unsigned long long) * 2 - > page_size) + if ((unsigned long) area_count(NULL, 0, gopts) + sizeof(unsigned long long) * 2 + > gopts->page_size) err("Impossible to run this test"); /* @@ -415,21 +433,21 @@ static void parse_test_type_arg(const char *raw_type) if (uffd_get_features(&features) && errno == ENOENT) ksft_exit_skip("failed to get available features (%d)\n", errno); - test_uffdio_wp = test_uffdio_wp && + gopts->test_uffdio_wp = gopts->test_uffdio_wp && (features & UFFD_FEATURE_PAGEFAULT_FLAG_WP); - if (test_type != TEST_ANON && !(features & UFFD_FEATURE_WP_HUGETLBFS_SHMEM)) - test_uffdio_wp = false; + if (gopts->test_type != TEST_ANON && !(features & UFFD_FEATURE_WP_HUGETLBFS_SHMEM)) + gopts->test_uffdio_wp = false; - close(uffd); - uffd = -1; + close(gopts->uffd); + gopts->uffd = -1; } static void sigalrm(int sig) { if (sig != SIGALRM) abort(); - test_uffdio_copy_eexist = true; + gopts->test_uffdio_copy_eexist = true; alarm(ALARM_INTERVAL_SECS); } @@ -438,6 +456,8 @@ int main(int argc, char **argv) unsigned long nr_cpus; size_t bytes; + gopts = (uffd_global_test_opts_t *) malloc(sizeof(uffd_global_test_opts_t)); + if (argc < 4) usage(); @@ -445,29 +465,34 @@ int main(int argc, char **argv) err("failed to arm SIGALRM"); alarm(ALARM_INTERVAL_SECS); - parse_test_type_arg(argv[1]); + parse_test_type_arg(gopts, argv[1]); bytes = atol(argv[2]) * 1024 * 1024; - if (test_type == TEST_HUGETLB && - get_free_hugepages() < bytes / page_size) { - printf("skip: Skipping userfaultfd... not enough hugepages\n"); - return KSFT_SKIP; - } - nr_cpus = sysconf(_SC_NPROCESSORS_ONLN); if (nr_cpus > 32) { /* Don't let calculation below go to zero. */ ksft_print_msg("_SC_NPROCESSORS_ONLN (%lu) too large, capping nr_threads to 32\n", nr_cpus); - nr_parallel = 32; + gopts->nr_parallel = 32; } else { - nr_parallel = nr_cpus; + gopts->nr_parallel = nr_cpus; + } + + /* + * src and dst each require bytes / page_size number of hugepages. + * Ensure nr_parallel - 1 hugepages on top of that to account + * for racy extra reservation of hugepages. + */ + if (gopts->test_type == TEST_HUGETLB && + get_free_hugepages() < 2 * (bytes / gopts->page_size) + gopts->nr_parallel - 1) { + printf("skip: Skipping userfaultfd... not enough hugepages\n"); + return KSFT_SKIP; } - nr_pages_per_cpu = bytes / page_size / nr_parallel; - if (!nr_pages_per_cpu) { + gopts->nr_pages_per_cpu = bytes / gopts->page_size / gopts->nr_parallel; + if (!gopts->nr_pages_per_cpu) { _err("pages_per_cpu = 0, cannot test (%lu / %lu / %lu)", - bytes, page_size, nr_parallel); + bytes, gopts->page_size, gopts->nr_parallel); usage(); } @@ -476,11 +501,11 @@ int main(int argc, char **argv) _err("invalid bounces"); usage(); } - nr_pages = nr_pages_per_cpu * nr_parallel; + gopts->nr_pages = gopts->nr_pages_per_cpu * gopts->nr_parallel; printf("nr_pages: %lu, nr_pages_per_cpu: %lu\n", - nr_pages, nr_pages_per_cpu); - return userfaultfd_stress(); + gopts->nr_pages, gopts->nr_pages_per_cpu); + return userfaultfd_stress(gopts); } #else /* __NR_userfaultfd */ diff --git a/tools/testing/selftests/mm/uffd-unit-tests.c b/tools/testing/selftests/mm/uffd-unit-tests.c index 50501b38e34e..9e3be2ee7f1b 100644 --- a/tools/testing/selftests/mm/uffd-unit-tests.c +++ b/tools/testing/selftests/mm/uffd-unit-tests.c @@ -76,7 +76,7 @@ struct uffd_test_args { typedef struct uffd_test_args uffd_test_args_t; /* Returns: UFFD_TEST_* */ -typedef void (*uffd_test_fn)(uffd_test_args_t *); +typedef void (*uffd_test_fn)(uffd_global_test_opts_t *, uffd_test_args_t *); typedef struct { const char *name; @@ -181,33 +181,6 @@ out: return 1; } -/* - * This function initializes the global variables. TODO: remove global - * vars and then remove this. - */ -static int -uffd_setup_environment(uffd_test_args_t *args, uffd_test_case_t *test, - mem_type_t *mem_type, const char **errmsg) -{ - map_shared = mem_type->shared; - uffd_test_ops = mem_type->mem_ops; - uffd_test_case_ops = test->test_case_ops; - - if (mem_type->mem_flag & (MEM_HUGETLB_PRIVATE | MEM_HUGETLB)) - page_size = default_huge_page_size(); - else - page_size = psize(); - - /* Ensure we have at least 2 pages */ - nr_pages = MAX(UFFD_TEST_MEM_SIZE, page_size * 2) / page_size; - /* TODO: remove this global var.. it's so ugly */ - nr_parallel = 1; - - /* Initialize test arguments */ - args->mem_type = mem_type; - - return uffd_test_ctx_init(test->uffd_feature_required, errmsg); -} static bool uffd_feature_supported(uffd_test_case_t *test) { @@ -237,7 +210,8 @@ static int pagemap_open(void) } while (0) typedef struct { - int parent_uffd, child_uffd; + uffd_global_test_opts_t *gopts; + int child_uffd; } fork_event_args; static void *fork_event_consumer(void *data) @@ -245,10 +219,10 @@ static void *fork_event_consumer(void *data) fork_event_args *args = data; struct uffd_msg msg = { 0 }; - ready_for_fork = true; + args->gopts->ready_for_fork = true; /* Read until a full msg received */ - while (uffd_read_msg(args->parent_uffd, &msg)); + while (uffd_read_msg(args->gopts, &msg)); if (msg.event != UFFD_EVENT_FORK) err("wrong message: %u\n", msg.event); @@ -304,9 +278,9 @@ static void unpin_pages(pin_args *args) args->pinned = false; } -static int pagemap_test_fork(int uffd, bool with_event, bool test_pin) +static int pagemap_test_fork(uffd_global_test_opts_t *gopts, bool with_event, bool test_pin) { - fork_event_args args = { .parent_uffd = uffd, .child_uffd = -1 }; + fork_event_args args = { .gopts = gopts, .child_uffd = -1 }; pthread_t thread; pid_t child; uint64_t value; @@ -314,10 +288,10 @@ static int pagemap_test_fork(int uffd, bool with_event, bool test_pin) /* Prepare a thread to resolve EVENT_FORK */ if (with_event) { - ready_for_fork = false; + gopts->ready_for_fork = false; if (pthread_create(&thread, NULL, fork_event_consumer, &args)) err("pthread_create()"); - while (!ready_for_fork) + while (!gopts->ready_for_fork) ; /* Wait for the poll_thread to start executing before forking */ } @@ -328,14 +302,14 @@ static int pagemap_test_fork(int uffd, bool with_event, bool test_pin) fd = pagemap_open(); - if (test_pin && pin_pages(&args, area_dst, page_size)) + if (test_pin && pin_pages(&args, gopts->area_dst, gopts->page_size)) /* * Normally when reach here we have pinned in * previous tests, so shouldn't fail anymore */ err("pin page failed in child"); - value = pagemap_get_entry(fd, area_dst); + value = pagemap_get_entry(fd, gopts->area_dst); /* * After fork(), we should handle uffd-wp bit differently: * @@ -361,70 +335,70 @@ static int pagemap_test_fork(int uffd, bool with_event, bool test_pin) return result; } -static void uffd_wp_unpopulated_test(uffd_test_args_t *args) +static void uffd_wp_unpopulated_test(uffd_global_test_opts_t *gopts, uffd_test_args_t *args) { uint64_t value; int pagemap_fd; - if (uffd_register(uffd, area_dst, nr_pages * page_size, + if (uffd_register(gopts->uffd, gopts->area_dst, gopts->nr_pages * gopts->page_size, false, true, false)) err("register failed"); pagemap_fd = pagemap_open(); /* Test applying pte marker to anon unpopulated */ - wp_range(uffd, (uint64_t)area_dst, page_size, true); - value = pagemap_get_entry(pagemap_fd, area_dst); + wp_range(gopts->uffd, (uint64_t)gopts->area_dst, gopts->page_size, true); + value = pagemap_get_entry(pagemap_fd, gopts->area_dst); pagemap_check_wp(value, true); /* Test unprotect on anon pte marker */ - wp_range(uffd, (uint64_t)area_dst, page_size, false); - value = pagemap_get_entry(pagemap_fd, area_dst); + wp_range(gopts->uffd, (uint64_t)gopts->area_dst, gopts->page_size, false); + value = pagemap_get_entry(pagemap_fd, gopts->area_dst); pagemap_check_wp(value, false); /* Test zap on anon marker */ - wp_range(uffd, (uint64_t)area_dst, page_size, true); - if (madvise(area_dst, page_size, MADV_DONTNEED)) + wp_range(gopts->uffd, (uint64_t)gopts->area_dst, gopts->page_size, true); + if (madvise(gopts->area_dst, gopts->page_size, MADV_DONTNEED)) err("madvise(MADV_DONTNEED) failed"); - value = pagemap_get_entry(pagemap_fd, area_dst); + value = pagemap_get_entry(pagemap_fd, gopts->area_dst); pagemap_check_wp(value, false); /* Test fault in after marker removed */ - *area_dst = 1; - value = pagemap_get_entry(pagemap_fd, area_dst); + *gopts->area_dst = 1; + value = pagemap_get_entry(pagemap_fd, gopts->area_dst); pagemap_check_wp(value, false); /* Drop it to make pte none again */ - if (madvise(area_dst, page_size, MADV_DONTNEED)) + if (madvise(gopts->area_dst, gopts->page_size, MADV_DONTNEED)) err("madvise(MADV_DONTNEED) failed"); /* Test read-zero-page upon pte marker */ - wp_range(uffd, (uint64_t)area_dst, page_size, true); - *(volatile char *)area_dst; + wp_range(gopts->uffd, (uint64_t)gopts->area_dst, gopts->page_size, true); + *(volatile char *)gopts->area_dst; /* Drop it to make pte none again */ - if (madvise(area_dst, page_size, MADV_DONTNEED)) + if (madvise(gopts->area_dst, gopts->page_size, MADV_DONTNEED)) err("madvise(MADV_DONTNEED) failed"); uffd_test_pass(); } -static void uffd_wp_fork_test_common(uffd_test_args_t *args, +static void uffd_wp_fork_test_common(uffd_global_test_opts_t *gopts, uffd_test_args_t *args, bool with_event) { int pagemap_fd; uint64_t value; - if (uffd_register(uffd, area_dst, nr_pages * page_size, + if (uffd_register(gopts->uffd, gopts->area_dst, gopts->nr_pages * gopts->page_size, false, true, false)) err("register failed"); pagemap_fd = pagemap_open(); /* Touch the page */ - *area_dst = 1; - wp_range(uffd, (uint64_t)area_dst, page_size, true); - value = pagemap_get_entry(pagemap_fd, area_dst); + *gopts->area_dst = 1; + wp_range(gopts->uffd, (uint64_t)gopts->area_dst, gopts->page_size, true); + value = pagemap_get_entry(pagemap_fd, gopts->area_dst); pagemap_check_wp(value, true); - if (pagemap_test_fork(uffd, with_event, false)) { + if (pagemap_test_fork(gopts, with_event, false)) { uffd_test_fail("Detected %s uffd-wp bit in child in present pte", with_event ? "missing" : "stall"); goto out; @@ -442,79 +416,80 @@ static void uffd_wp_fork_test_common(uffd_test_args_t *args, * to expose pte markers. */ if (args->mem_type->shared) { - if (madvise(area_dst, page_size, MADV_DONTNEED)) + if (madvise(gopts->area_dst, gopts->page_size, MADV_DONTNEED)) err("MADV_DONTNEED"); } else { /* * NOTE: ignore retval because private-hugetlb doesn't yet * support swapping, so it could fail. */ - madvise(area_dst, page_size, MADV_PAGEOUT); + madvise(gopts->area_dst, gopts->page_size, MADV_PAGEOUT); } /* Uffd-wp should persist even swapped out */ - value = pagemap_get_entry(pagemap_fd, area_dst); + value = pagemap_get_entry(pagemap_fd, gopts->area_dst); pagemap_check_wp(value, true); - if (pagemap_test_fork(uffd, with_event, false)) { + if (pagemap_test_fork(gopts, with_event, false)) { uffd_test_fail("Detected %s uffd-wp bit in child in zapped pte", with_event ? "missing" : "stall"); goto out; } /* Unprotect; this tests swap pte modifications */ - wp_range(uffd, (uint64_t)area_dst, page_size, false); - value = pagemap_get_entry(pagemap_fd, area_dst); + wp_range(gopts->uffd, (uint64_t)gopts->area_dst, gopts->page_size, false); + value = pagemap_get_entry(pagemap_fd, gopts->area_dst); pagemap_check_wp(value, false); /* Fault in the page from disk */ - *area_dst = 2; - value = pagemap_get_entry(pagemap_fd, area_dst); + *gopts->area_dst = 2; + value = pagemap_get_entry(pagemap_fd, gopts->area_dst); pagemap_check_wp(value, false); uffd_test_pass(); out: - if (uffd_unregister(uffd, area_dst, nr_pages * page_size)) + if (uffd_unregister(gopts->uffd, gopts->area_dst, gopts->nr_pages * gopts->page_size)) err("unregister failed"); close(pagemap_fd); } -static void uffd_wp_fork_test(uffd_test_args_t *args) +static void uffd_wp_fork_test(uffd_global_test_opts_t *gopts, uffd_test_args_t *args) { - uffd_wp_fork_test_common(args, false); + uffd_wp_fork_test_common(gopts, args, false); } -static void uffd_wp_fork_with_event_test(uffd_test_args_t *args) +static void uffd_wp_fork_with_event_test(uffd_global_test_opts_t *gopts, uffd_test_args_t *args) { - uffd_wp_fork_test_common(args, true); + uffd_wp_fork_test_common(gopts, args, true); } -static void uffd_wp_fork_pin_test_common(uffd_test_args_t *args, +static void uffd_wp_fork_pin_test_common(uffd_global_test_opts_t *gopts, + uffd_test_args_t *args, bool with_event) { int pagemap_fd; pin_args pin_args = {}; - if (uffd_register(uffd, area_dst, page_size, false, true, false)) + if (uffd_register(gopts->uffd, gopts->area_dst, gopts->page_size, false, true, false)) err("register failed"); pagemap_fd = pagemap_open(); /* Touch the page */ - *area_dst = 1; - wp_range(uffd, (uint64_t)area_dst, page_size, true); + *gopts->area_dst = 1; + wp_range(gopts->uffd, (uint64_t)gopts->area_dst, gopts->page_size, true); /* * 1. First pin, then fork(). This tests fork() special path when * doing early CoW if the page is private. */ - if (pin_pages(&pin_args, area_dst, page_size)) { + if (pin_pages(&pin_args, gopts->area_dst, gopts->page_size)) { uffd_test_skip("Possibly CONFIG_GUP_TEST missing " "or unprivileged"); close(pagemap_fd); - uffd_unregister(uffd, area_dst, page_size); + uffd_unregister(gopts->uffd, gopts->area_dst, gopts->page_size); return; } - if (pagemap_test_fork(uffd, with_event, false)) { + if (pagemap_test_fork(gopts, with_event, false)) { uffd_test_fail("Detected %s uffd-wp bit in early CoW of fork()", with_event ? "missing" : "stall"); unpin_pages(&pin_args); @@ -527,49 +502,50 @@ static void uffd_wp_fork_pin_test_common(uffd_test_args_t *args, * 2. First fork(), then pin (in the child, where test_pin==true). * This tests COR, aka, page unsharing on private memories. */ - if (pagemap_test_fork(uffd, with_event, true)) { + if (pagemap_test_fork(gopts, with_event, true)) { uffd_test_fail("Detected %s uffd-wp bit when RO pin", with_event ? "missing" : "stall"); goto out; } uffd_test_pass(); out: - if (uffd_unregister(uffd, area_dst, page_size)) + if (uffd_unregister(gopts->uffd, gopts->area_dst, gopts->page_size)) err("register failed"); close(pagemap_fd); } -static void uffd_wp_fork_pin_test(uffd_test_args_t *args) +static void uffd_wp_fork_pin_test(uffd_global_test_opts_t *gopts, uffd_test_args_t *args) { - uffd_wp_fork_pin_test_common(args, false); + uffd_wp_fork_pin_test_common(gopts, args, false); } -static void uffd_wp_fork_pin_with_event_test(uffd_test_args_t *args) +static void uffd_wp_fork_pin_with_event_test(uffd_global_test_opts_t *gopts, uffd_test_args_t *args) { - uffd_wp_fork_pin_test_common(args, true); + uffd_wp_fork_pin_test_common(gopts, args, true); } -static void check_memory_contents(char *p) +static void check_memory_contents(uffd_global_test_opts_t *gopts, char *p) { unsigned long i, j; uint8_t expected_byte; - for (i = 0; i < nr_pages; ++i) { + for (i = 0; i < gopts->nr_pages; ++i) { expected_byte = ~((uint8_t)(i % ((uint8_t)-1))); - for (j = 0; j < page_size; j++) { - uint8_t v = *(uint8_t *)(p + (i * page_size) + j); + for (j = 0; j < gopts->page_size; j++) { + uint8_t v = *(uint8_t *)(p + (i * gopts->page_size) + j); if (v != expected_byte) err("unexpected page contents"); } } } -static void uffd_minor_test_common(bool test_collapse, bool test_wp) +static void uffd_minor_test_common(uffd_global_test_opts_t *gopts, bool test_collapse, bool test_wp) { unsigned long p; pthread_t uffd_mon; char c; struct uffd_args args = { 0 }; + args.gopts = gopts; /* * NOTE: MADV_COLLAPSE is not yet compatible with WP, so testing @@ -577,7 +553,7 @@ static void uffd_minor_test_common(bool test_collapse, bool test_wp) */ assert(!(test_collapse && test_wp)); - if (uffd_register(uffd, area_dst_alias, nr_pages * page_size, + if (uffd_register(gopts->uffd, gopts->area_dst_alias, gopts->nr_pages * gopts->page_size, /* NOTE! MADV_COLLAPSE may not work with uffd-wp */ false, test_wp, true)) err("register failure"); @@ -586,9 +562,9 @@ static void uffd_minor_test_common(bool test_collapse, bool test_wp) * After registering with UFFD, populate the non-UFFD-registered side of * the shared mapping. This should *not* trigger any UFFD minor faults. */ - for (p = 0; p < nr_pages; ++p) - memset(area_dst + (p * page_size), p % ((uint8_t)-1), - page_size); + for (p = 0; p < gopts->nr_pages; ++p) + memset(gopts->area_dst + (p * gopts->page_size), p % ((uint8_t)-1), + gopts->page_size); args.apply_wp = test_wp; if (pthread_create(&uffd_mon, NULL, uffd_poll_thread, &args)) @@ -600,50 +576,51 @@ static void uffd_minor_test_common(bool test_collapse, bool test_wp) * fault. uffd_poll_thread will resolve the fault by bit-flipping the * page's contents, and then issuing a CONTINUE ioctl. */ - check_memory_contents(area_dst_alias); + check_memory_contents(gopts, gopts->area_dst_alias); - if (write(pipefd[1], &c, sizeof(c)) != sizeof(c)) + if (write(gopts->pipefd[1], &c, sizeof(c)) != sizeof(c)) err("pipe write"); if (pthread_join(uffd_mon, NULL)) err("join() failed"); if (test_collapse) { - if (madvise(area_dst_alias, nr_pages * page_size, + if (madvise(gopts->area_dst_alias, gopts->nr_pages * gopts->page_size, MADV_COLLAPSE)) { /* It's fine to fail for this one... */ uffd_test_skip("MADV_COLLAPSE failed"); return; } - uffd_test_ops->check_pmd_mapping(area_dst, - nr_pages * page_size / + uffd_test_ops->check_pmd_mapping(gopts, + gopts->area_dst, + gopts->nr_pages * gopts->page_size / read_pmd_pagesize()); /* * This won't cause uffd-fault - it purely just makes sure there * was no corruption. */ - check_memory_contents(area_dst_alias); + check_memory_contents(gopts, gopts->area_dst_alias); } - if (args.missing_faults != 0 || args.minor_faults != nr_pages) + if (args.missing_faults != 0 || args.minor_faults != gopts->nr_pages) uffd_test_fail("stats check error"); else uffd_test_pass(); } -void uffd_minor_test(uffd_test_args_t *args) +void uffd_minor_test(uffd_global_test_opts_t *gopts, uffd_test_args_t *args) { - uffd_minor_test_common(false, false); + uffd_minor_test_common(gopts, false, false); } -void uffd_minor_wp_test(uffd_test_args_t *args) +void uffd_minor_wp_test(uffd_global_test_opts_t *gopts, uffd_test_args_t *args) { - uffd_minor_test_common(false, true); + uffd_minor_test_common(gopts, false, true); } -void uffd_minor_collapse_test(uffd_test_args_t *args) +void uffd_minor_collapse_test(uffd_global_test_opts_t *gopts, uffd_test_args_t *args) { - uffd_minor_test_common(true, false); + uffd_minor_test_common(gopts, true, false); } static sigjmp_buf jbuf, *sigbuf; @@ -678,7 +655,7 @@ static void sighndl(int sig, siginfo_t *siginfo, void *ptr) * This also tests UFFD_FEATURE_EVENT_FORK event along with the signal * feature. Using monitor thread, verify no userfault events are generated. */ -static int faulting_process(int signal_test, bool wp) +static int faulting_process(uffd_global_test_opts_t *gopts, int signal_test, bool wp) { unsigned long nr, i; unsigned long long count; @@ -687,7 +664,7 @@ static int faulting_process(int signal_test, bool wp) struct sigaction act; volatile unsigned long signalled = 0; - split_nr_pages = (nr_pages + 1) / 2; + split_nr_pages = (gopts->nr_pages + 1) / 2; if (signal_test) { sigbuf = &jbuf; @@ -701,7 +678,7 @@ static int faulting_process(int signal_test, bool wp) for (nr = 0; nr < split_nr_pages; nr++) { volatile int steps = 1; - unsigned long offset = nr * page_size; + unsigned long offset = nr * gopts->page_size; if (signal_test) { if (sigsetjmp(*sigbuf, 1) != 0) { @@ -713,15 +690,15 @@ static int faulting_process(int signal_test, bool wp) if (steps == 1) { /* This is a MISSING request */ steps++; - if (copy_page(uffd, offset, wp)) + if (copy_page(gopts, offset, wp)) signalled++; } else { /* This is a WP request */ assert(steps == 2); - wp_range(uffd, - (__u64)area_dst + + wp_range(gopts->uffd, + (__u64)gopts->area_dst + offset, - page_size, false); + gopts->page_size, false); } } else { signalled++; @@ -730,51 +707,53 @@ static int faulting_process(int signal_test, bool wp) } } - count = *area_count(area_dst, nr); - if (count != count_verify[nr]) + count = *area_count(gopts->area_dst, nr, gopts); + if (count != gopts->count_verify[nr]) err("nr %lu memory corruption %llu %llu\n", - nr, count, count_verify[nr]); + nr, count, gopts->count_verify[nr]); /* * Trigger write protection if there is by writing * the same value back. */ - *area_count(area_dst, nr) = count; + *area_count(gopts->area_dst, nr, gopts) = count; } if (signal_test) return signalled != split_nr_pages; - area_dst = mremap(area_dst, nr_pages * page_size, nr_pages * page_size, - MREMAP_MAYMOVE | MREMAP_FIXED, area_src); - if (area_dst == MAP_FAILED) + gopts->area_dst = mremap(gopts->area_dst, gopts->nr_pages * gopts->page_size, + gopts->nr_pages * gopts->page_size, + MREMAP_MAYMOVE | MREMAP_FIXED, + gopts->area_src); + if (gopts->area_dst == MAP_FAILED) err("mremap"); /* Reset area_src since we just clobbered it */ - area_src = NULL; + gopts->area_src = NULL; - for (; nr < nr_pages; nr++) { - count = *area_count(area_dst, nr); - if (count != count_verify[nr]) { + for (; nr < gopts->nr_pages; nr++) { + count = *area_count(gopts->area_dst, nr, gopts); + if (count != gopts->count_verify[nr]) { err("nr %lu memory corruption %llu %llu\n", - nr, count, count_verify[nr]); + nr, count, gopts->count_verify[nr]); } /* * Trigger write protection if there is by writing * the same value back. */ - *area_count(area_dst, nr) = count; + *area_count(gopts->area_dst, nr, gopts) = count; } - uffd_test_ops->release_pages(area_dst); + uffd_test_ops->release_pages(gopts, gopts->area_dst); - for (nr = 0; nr < nr_pages; nr++) - for (i = 0; i < page_size; i++) - if (*(area_dst + nr * page_size + i) != 0) + for (nr = 0; nr < gopts->nr_pages; nr++) + for (i = 0; i < gopts->page_size; i++) + if (*(gopts->area_dst + nr * gopts->page_size + i) != 0) err("page %lu offset %lu is not zero", nr, i); return 0; } -static void uffd_sigbus_test_common(bool wp) +static void uffd_sigbus_test_common(uffd_global_test_opts_t *gopts, bool wp) { unsigned long userfaults; pthread_t uffd_mon; @@ -782,25 +761,26 @@ static void uffd_sigbus_test_common(bool wp) int err; char c; struct uffd_args args = { 0 }; + args.gopts = gopts; - ready_for_fork = false; + gopts->ready_for_fork = false; - fcntl(uffd, F_SETFL, uffd_flags | O_NONBLOCK); + fcntl(gopts->uffd, F_SETFL, gopts->uffd_flags | O_NONBLOCK); - if (uffd_register(uffd, area_dst, nr_pages * page_size, + if (uffd_register(gopts->uffd, gopts->area_dst, gopts->nr_pages * gopts->page_size, true, wp, false)) err("register failure"); - if (faulting_process(1, wp)) + if (faulting_process(gopts, 1, wp)) err("faulting process failed"); - uffd_test_ops->release_pages(area_dst); + uffd_test_ops->release_pages(gopts, gopts->area_dst); args.apply_wp = wp; if (pthread_create(&uffd_mon, NULL, uffd_poll_thread, &args)) err("uffd_poll_thread create"); - while (!ready_for_fork) + while (!gopts->ready_for_fork) ; /* Wait for the poll_thread to start executing before forking */ pid = fork(); @@ -808,12 +788,12 @@ static void uffd_sigbus_test_common(bool wp) err("fork"); if (!pid) - exit(faulting_process(2, wp)); + exit(faulting_process(gopts, 2, wp)); waitpid(pid, &err, 0); if (err) err("faulting process failed"); - if (write(pipefd[1], &c, sizeof(c)) != sizeof(c)) + if (write(gopts->pipefd[1], &c, sizeof(c)) != sizeof(c)) err("pipe write"); if (pthread_join(uffd_mon, (void **)&userfaults)) err("pthread_join()"); @@ -824,28 +804,29 @@ static void uffd_sigbus_test_common(bool wp) uffd_test_pass(); } -static void uffd_sigbus_test(uffd_test_args_t *args) +static void uffd_sigbus_test(uffd_global_test_opts_t *gopts, uffd_test_args_t *args) { - uffd_sigbus_test_common(false); + uffd_sigbus_test_common(gopts, false); } -static void uffd_sigbus_wp_test(uffd_test_args_t *args) +static void uffd_sigbus_wp_test(uffd_global_test_opts_t *gopts, uffd_test_args_t *args) { - uffd_sigbus_test_common(true); + uffd_sigbus_test_common(gopts, true); } -static void uffd_events_test_common(bool wp) +static void uffd_events_test_common(uffd_global_test_opts_t *gopts, bool wp) { pthread_t uffd_mon; pid_t pid; int err; char c; struct uffd_args args = { 0 }; + args.gopts = gopts; - ready_for_fork = false; + gopts->ready_for_fork = false; - fcntl(uffd, F_SETFL, uffd_flags | O_NONBLOCK); - if (uffd_register(uffd, area_dst, nr_pages * page_size, + fcntl(gopts->uffd, F_SETFL, gopts->uffd_flags | O_NONBLOCK); + if (uffd_register(gopts->uffd, gopts->area_dst, gopts->nr_pages * gopts->page_size, true, wp, false)) err("register failure"); @@ -853,7 +834,7 @@ static void uffd_events_test_common(bool wp) if (pthread_create(&uffd_mon, NULL, uffd_poll_thread, &args)) err("uffd_poll_thread create"); - while (!ready_for_fork) + while (!gopts->ready_for_fork) ; /* Wait for the poll_thread to start executing before forking */ pid = fork(); @@ -861,39 +842,39 @@ static void uffd_events_test_common(bool wp) err("fork"); if (!pid) - exit(faulting_process(0, wp)); + exit(faulting_process(gopts, 0, wp)); waitpid(pid, &err, 0); if (err) err("faulting process failed"); - if (write(pipefd[1], &c, sizeof(c)) != sizeof(c)) + if (write(gopts->pipefd[1], &c, sizeof(c)) != sizeof(c)) err("pipe write"); if (pthread_join(uffd_mon, NULL)) err("pthread_join()"); - if (args.missing_faults != nr_pages) + if (args.missing_faults != gopts->nr_pages) uffd_test_fail("Fault counts wrong"); else uffd_test_pass(); } -static void uffd_events_test(uffd_test_args_t *args) +static void uffd_events_test(uffd_global_test_opts_t *gopts, uffd_test_args_t *args) { - uffd_events_test_common(false); + uffd_events_test_common(gopts, false); } -static void uffd_events_wp_test(uffd_test_args_t *args) +static void uffd_events_wp_test(uffd_global_test_opts_t *gopts, uffd_test_args_t *args) { - uffd_events_test_common(true); + uffd_events_test_common(gopts, true); } -static void retry_uffdio_zeropage(int ufd, +static void retry_uffdio_zeropage(uffd_global_test_opts_t *gopts, struct uffdio_zeropage *uffdio_zeropage) { - uffd_test_ops->alias_mapping(&uffdio_zeropage->range.start, + uffd_test_ops->alias_mapping(gopts, &uffdio_zeropage->range.start, uffdio_zeropage->range.len, 0); - if (ioctl(ufd, UFFDIO_ZEROPAGE, uffdio_zeropage)) { + if (ioctl(gopts->uffd, UFFDIO_ZEROPAGE, uffdio_zeropage)) { if (uffdio_zeropage->zeropage != -EEXIST) err("UFFDIO_ZEROPAGE error: %"PRId64, (int64_t)uffdio_zeropage->zeropage); @@ -903,16 +884,16 @@ static void retry_uffdio_zeropage(int ufd, } } -static bool do_uffdio_zeropage(int ufd, bool has_zeropage) +static bool do_uffdio_zeropage(uffd_global_test_opts_t *gopts, bool has_zeropage) { struct uffdio_zeropage uffdio_zeropage = { 0 }; int ret; __s64 res; - uffdio_zeropage.range.start = (unsigned long) area_dst; - uffdio_zeropage.range.len = page_size; + uffdio_zeropage.range.start = (unsigned long) gopts->area_dst; + uffdio_zeropage.range.len = gopts->page_size; uffdio_zeropage.mode = 0; - ret = ioctl(ufd, UFFDIO_ZEROPAGE, &uffdio_zeropage); + ret = ioctl(gopts->uffd, UFFDIO_ZEROPAGE, &uffdio_zeropage); res = uffdio_zeropage.zeropage; if (ret) { /* real retval in ufdio_zeropage.zeropage */ @@ -921,10 +902,10 @@ static bool do_uffdio_zeropage(int ufd, bool has_zeropage) else if (res != -EINVAL) err("UFFDIO_ZEROPAGE not -EINVAL"); } else if (has_zeropage) { - if (res != page_size) + if (res != gopts->page_size) err("UFFDIO_ZEROPAGE unexpected size"); else - retry_uffdio_zeropage(ufd, &uffdio_zeropage); + retry_uffdio_zeropage(gopts, &uffdio_zeropage); return true; } else err("UFFDIO_ZEROPAGE succeeded"); @@ -950,25 +931,29 @@ uffd_register_detect_zeropage(int uffd, void *addr, uint64_t len) } /* exercise UFFDIO_ZEROPAGE */ -static void uffd_zeropage_test(uffd_test_args_t *args) +static void uffd_zeropage_test(uffd_global_test_opts_t *gopts, uffd_test_args_t *args) { bool has_zeropage; int i; - has_zeropage = uffd_register_detect_zeropage(uffd, area_dst, page_size); - if (area_dst_alias) + has_zeropage = uffd_register_detect_zeropage(gopts->uffd, + gopts->area_dst, + gopts->page_size); + if (gopts->area_dst_alias) /* Ignore the retval; we already have it */ - uffd_register_detect_zeropage(uffd, area_dst_alias, page_size); + uffd_register_detect_zeropage(gopts->uffd, gopts->area_dst_alias, gopts->page_size); - if (do_uffdio_zeropage(uffd, has_zeropage)) - for (i = 0; i < page_size; i++) - if (area_dst[i] != 0) + if (do_uffdio_zeropage(gopts, has_zeropage)) + for (i = 0; i < gopts->page_size; i++) + if (gopts->area_dst[i] != 0) err("data non-zero at offset %d\n", i); - if (uffd_unregister(uffd, area_dst, page_size)) + if (uffd_unregister(gopts->uffd, gopts->area_dst, gopts->page_size)) err("unregister"); - if (area_dst_alias && uffd_unregister(uffd, area_dst_alias, page_size)) + if (gopts->area_dst_alias && uffd_unregister(gopts->uffd, + gopts->area_dst_alias, + gopts->page_size)) err("unregister"); uffd_test_pass(); @@ -987,26 +972,27 @@ static void uffd_register_poison(int uffd, void *addr, uint64_t len) err("registered area doesn't support COPY and POISON ioctls"); } -static void do_uffdio_poison(int uffd, unsigned long offset) +static void do_uffdio_poison(uffd_global_test_opts_t *gopts, unsigned long offset) { struct uffdio_poison uffdio_poison = { 0 }; int ret; __s64 res; - uffdio_poison.range.start = (unsigned long) area_dst + offset; - uffdio_poison.range.len = page_size; + uffdio_poison.range.start = (unsigned long) gopts->area_dst + offset; + uffdio_poison.range.len = gopts->page_size; uffdio_poison.mode = 0; - ret = ioctl(uffd, UFFDIO_POISON, &uffdio_poison); + ret = ioctl(gopts->uffd, UFFDIO_POISON, &uffdio_poison); res = uffdio_poison.updated; if (ret) err("UFFDIO_POISON error: %"PRId64, (int64_t)res); - else if (res != page_size) + else if (res != gopts->page_size) err("UFFDIO_POISON unexpected size: %"PRId64, (int64_t)res); } -static void uffd_poison_handle_fault( - struct uffd_msg *msg, struct uffd_args *args) +static void uffd_poison_handle_fault(uffd_global_test_opts_t *gopts, + struct uffd_msg *msg, + struct uffd_args *args) { unsigned long offset; @@ -1017,20 +1003,20 @@ static void uffd_poison_handle_fault( (UFFD_PAGEFAULT_FLAG_WP | UFFD_PAGEFAULT_FLAG_MINOR)) err("unexpected fault type %llu", msg->arg.pagefault.flags); - offset = (char *)(unsigned long)msg->arg.pagefault.address - area_dst; - offset &= ~(page_size-1); + offset = (char *)(unsigned long)msg->arg.pagefault.address - gopts->area_dst; + offset &= ~(gopts->page_size-1); /* Odd pages -> copy zeroed page; even pages -> poison. */ - if (offset & page_size) - copy_page(uffd, offset, false); + if (offset & gopts->page_size) + copy_page(gopts, offset, false); else - do_uffdio_poison(uffd, offset); + do_uffdio_poison(gopts, offset); } /* Make sure to cover odd/even, and minimum duplications */ #define UFFD_POISON_TEST_NPAGES 4 -static void uffd_poison_test(uffd_test_args_t *targs) +static void uffd_poison_test(uffd_global_test_opts_t *gopts, uffd_test_args_t *targs) { pthread_t uffd_mon; char c; @@ -1039,15 +1025,17 @@ static void uffd_poison_test(uffd_test_args_t *targs) unsigned long nr_sigbus = 0; unsigned long nr, poison_pages = UFFD_POISON_TEST_NPAGES; - if (nr_pages < poison_pages) { - uffd_test_skip("Too few pages for POISON test"); + if (gopts->nr_pages < poison_pages) { + uffd_test_skip("Too less pages for POISON test"); return; } - fcntl(uffd, F_SETFL, uffd_flags | O_NONBLOCK); + args.gopts = gopts; + + fcntl(gopts->uffd, F_SETFL, gopts->uffd_flags | O_NONBLOCK); - uffd_register_poison(uffd, area_dst, poison_pages * page_size); - memset(area_src, 0, poison_pages * page_size); + uffd_register_poison(gopts->uffd, gopts->area_dst, poison_pages * gopts->page_size); + memset(gopts->area_src, 0, poison_pages * gopts->page_size); args.handle_fault = uffd_poison_handle_fault; if (pthread_create(&uffd_mon, NULL, uffd_poll_thread, &args)) @@ -1060,8 +1048,8 @@ static void uffd_poison_test(uffd_test_args_t *targs) err("sigaction"); for (nr = 0; nr < poison_pages; ++nr) { - unsigned long offset = nr * page_size; - const char *bytes = (const char *) area_dst + offset; + unsigned long offset = nr * gopts->page_size; + const char *bytes = (const char *) gopts->area_dst + offset; const char *i; if (sigsetjmp(*sigbuf, 1)) { @@ -1074,14 +1062,14 @@ static void uffd_poison_test(uffd_test_args_t *targs) continue; } - for (i = bytes; i < bytes + page_size; ++i) { + for (i = bytes; i < bytes + gopts->page_size; ++i) { if (*i) err("nonzero byte in area_dst (%p) at %p: %u", - area_dst, i, *i); + gopts->area_dst, i, *i); } } - if (write(pipefd[1], &c, sizeof(c)) != sizeof(c)) + if (write(gopts->pipefd[1], &c, sizeof(c)) != sizeof(c)) err("pipe write"); if (pthread_join(uffd_mon, NULL)) err("pthread_join()"); @@ -1094,7 +1082,9 @@ static void uffd_poison_test(uffd_test_args_t *targs) } static void -uffd_move_handle_fault_common(struct uffd_msg *msg, struct uffd_args *args, +uffd_move_handle_fault_common(uffd_global_test_opts_t *gopts, + struct uffd_msg *msg, + struct uffd_args *args, unsigned long len) { unsigned long offset; @@ -1106,28 +1096,32 @@ uffd_move_handle_fault_common(struct uffd_msg *msg, struct uffd_args *args, (UFFD_PAGEFAULT_FLAG_WP | UFFD_PAGEFAULT_FLAG_MINOR | UFFD_PAGEFAULT_FLAG_WRITE)) err("unexpected fault type %llu", msg->arg.pagefault.flags); - offset = (char *)(unsigned long)msg->arg.pagefault.address - area_dst; + offset = (char *)(unsigned long)msg->arg.pagefault.address - gopts->area_dst; offset &= ~(len-1); - if (move_page(uffd, offset, len)) + if (move_page(gopts, offset, len)) args->missing_faults++; } -static void uffd_move_handle_fault(struct uffd_msg *msg, +static void uffd_move_handle_fault(uffd_global_test_opts_t *gopts, struct uffd_msg *msg, struct uffd_args *args) { - uffd_move_handle_fault_common(msg, args, page_size); + uffd_move_handle_fault_common(gopts, msg, args, gopts->page_size); } -static void uffd_move_pmd_handle_fault(struct uffd_msg *msg, +static void uffd_move_pmd_handle_fault(uffd_global_test_opts_t *gopts, struct uffd_msg *msg, struct uffd_args *args) { - uffd_move_handle_fault_common(msg, args, read_pmd_pagesize()); + uffd_move_handle_fault_common(gopts, msg, args, read_pmd_pagesize()); } static void -uffd_move_test_common(uffd_test_args_t *targs, unsigned long chunk_size, - void (*handle_fault)(struct uffd_msg *msg, struct uffd_args *args)) +uffd_move_test_common(uffd_global_test_opts_t *gopts, + uffd_test_args_t *targs, + unsigned long chunk_size, + void (*handle_fault)(struct uffd_global_test_opts *gopts, + struct uffd_msg *msg, struct uffd_args *args) +) { unsigned long nr; pthread_t uffd_mon; @@ -1139,11 +1133,13 @@ uffd_move_test_common(uffd_test_args_t *targs, unsigned long chunk_size, unsigned long src_offs = 0; unsigned long dst_offs = 0; + args.gopts = gopts; + /* Prevent source pages from being mapped more than once */ - if (madvise(area_src, nr_pages * page_size, MADV_DONTFORK)) + if (madvise(gopts->area_src, gopts->nr_pages * gopts->page_size, MADV_DONTFORK)) err("madvise(MADV_DONTFORK) failure"); - if (uffd_register(uffd, area_dst, nr_pages * page_size, + if (uffd_register(gopts->uffd, gopts->area_dst, gopts->nr_pages * gopts->page_size, true, false, false)) err("register failure"); @@ -1151,22 +1147,22 @@ uffd_move_test_common(uffd_test_args_t *targs, unsigned long chunk_size, if (pthread_create(&uffd_mon, NULL, uffd_poll_thread, &args)) err("uffd_poll_thread create"); - step_size = chunk_size / page_size; - step_count = nr_pages / step_size; + step_size = chunk_size / gopts->page_size; + step_count = gopts->nr_pages / step_size; - if (chunk_size > page_size) { - char *aligned_src = ALIGN_UP(area_src, chunk_size); - char *aligned_dst = ALIGN_UP(area_dst, chunk_size); + if (chunk_size > gopts->page_size) { + char *aligned_src = ALIGN_UP(gopts->area_src, chunk_size); + char *aligned_dst = ALIGN_UP(gopts->area_dst, chunk_size); - if (aligned_src != area_src || aligned_dst != area_dst) { - src_offs = (aligned_src - area_src) / page_size; - dst_offs = (aligned_dst - area_dst) / page_size; + if (aligned_src != gopts->area_src || aligned_dst != gopts->area_dst) { + src_offs = (aligned_src - gopts->area_src) / gopts->page_size; + dst_offs = (aligned_dst - gopts->area_dst) / gopts->page_size; step_count--; } - orig_area_src = area_src; - orig_area_dst = area_dst; - area_src = aligned_src; - area_dst = aligned_dst; + orig_area_src = gopts->area_src; + orig_area_dst = gopts->area_dst; + gopts->area_src = aligned_src; + gopts->area_dst = aligned_dst; } /* @@ -1180,34 +1176,34 @@ uffd_move_test_common(uffd_test_args_t *targs, unsigned long chunk_size, /* Check area_src content */ for (i = 0; i < step_size; i++) { - count = *area_count(area_src, nr + i); - if (count != count_verify[src_offs + nr + i]) + count = *area_count(gopts->area_src, nr + i, gopts); + if (count != gopts->count_verify[src_offs + nr + i]) err("nr %lu source memory invalid %llu %llu\n", - nr + i, count, count_verify[src_offs + nr + i]); + nr + i, count, gopts->count_verify[src_offs + nr + i]); } /* Faulting into area_dst should move the page or the huge page */ for (i = 0; i < step_size; i++) { - count = *area_count(area_dst, nr + i); - if (count != count_verify[dst_offs + nr + i]) + count = *area_count(gopts->area_dst, nr + i, gopts); + if (count != gopts->count_verify[dst_offs + nr + i]) err("nr %lu memory corruption %llu %llu\n", - nr, count, count_verify[dst_offs + nr + i]); + nr, count, gopts->count_verify[dst_offs + nr + i]); } /* Re-check area_src content which should be empty */ for (i = 0; i < step_size; i++) { - count = *area_count(area_src, nr + i); + count = *area_count(gopts->area_src, nr + i, gopts); if (count != 0) err("nr %lu move failed %llu %llu\n", - nr, count, count_verify[src_offs + nr + i]); + nr, count, gopts->count_verify[src_offs + nr + i]); } } - if (chunk_size > page_size) { - area_src = orig_area_src; - area_dst = orig_area_dst; + if (chunk_size > gopts->page_size) { + gopts->area_src = orig_area_src; + gopts->area_dst = orig_area_dst; } - if (write(pipefd[1], &c, sizeof(c)) != sizeof(c)) + if (write(gopts->pipefd[1], &c, sizeof(c)) != sizeof(c)) err("pipe write"); if (pthread_join(uffd_mon, NULL)) err("join() failed"); @@ -1218,24 +1214,24 @@ uffd_move_test_common(uffd_test_args_t *targs, unsigned long chunk_size, uffd_test_pass(); } -static void uffd_move_test(uffd_test_args_t *targs) +static void uffd_move_test(uffd_global_test_opts_t *gopts, uffd_test_args_t *targs) { - uffd_move_test_common(targs, page_size, uffd_move_handle_fault); + uffd_move_test_common(gopts, targs, gopts->page_size, uffd_move_handle_fault); } -static void uffd_move_pmd_test(uffd_test_args_t *targs) +static void uffd_move_pmd_test(uffd_global_test_opts_t *gopts, uffd_test_args_t *targs) { - if (madvise(area_dst, nr_pages * page_size, MADV_HUGEPAGE)) + if (madvise(gopts->area_dst, gopts->nr_pages * gopts->page_size, MADV_HUGEPAGE)) err("madvise(MADV_HUGEPAGE) failure"); - uffd_move_test_common(targs, read_pmd_pagesize(), + uffd_move_test_common(gopts, targs, read_pmd_pagesize(), uffd_move_pmd_handle_fault); } -static void uffd_move_pmd_split_test(uffd_test_args_t *targs) +static void uffd_move_pmd_split_test(uffd_global_test_opts_t *gopts, uffd_test_args_t *targs) { - if (madvise(area_dst, nr_pages * page_size, MADV_NOHUGEPAGE)) + if (madvise(gopts->area_dst, gopts->nr_pages * gopts->page_size, MADV_NOHUGEPAGE)) err("madvise(MADV_NOHUGEPAGE) failure"); - uffd_move_test_common(targs, read_pmd_pagesize(), + uffd_move_test_common(gopts, targs, read_pmd_pagesize(), uffd_move_pmd_handle_fault); } @@ -1295,6 +1291,11 @@ typedef enum { THR_STATE_UNINTERRUPTIBLE, } thread_state; +typedef struct { + uffd_global_test_opts_t *gopts; + volatile pid_t *pid; +} mmap_changing_thread_args; + static void sleep_short(void) { usleep(1000); @@ -1337,7 +1338,9 @@ static void thread_state_until(pid_t tid, thread_state state) static void *uffd_mmap_changing_thread(void *opaque) { - volatile pid_t *pid = opaque; + mmap_changing_thread_args *args = opaque; + uffd_global_test_opts_t *gopts = args->gopts; + volatile pid_t *pid = args->pid; int ret; /* Unfortunately, it's only fetch-able from the thread itself.. */ @@ -1345,21 +1348,21 @@ static void *uffd_mmap_changing_thread(void *opaque) *pid = syscall(SYS_gettid); /* Inject an event, this will hang solid until the event read */ - ret = madvise(area_dst, page_size, MADV_REMOVE); + ret = madvise(gopts->area_dst, gopts->page_size, MADV_REMOVE); if (ret) err("madvise(MADV_REMOVE) failed"); return NULL; } -static void uffd_consume_message(int fd) +static void uffd_consume_message(uffd_global_test_opts_t *gopts) { struct uffd_msg msg = { 0 }; - while (uffd_read_msg(fd, &msg)); + while (uffd_read_msg(gopts, &msg)); } -static void uffd_mmap_changing_test(uffd_test_args_t *targs) +static void uffd_mmap_changing_test(uffd_global_test_opts_t *gopts, uffd_test_args_t *targs) { /* * This stores the real PID (which can be different from how tid is @@ -1368,13 +1371,14 @@ static void uffd_mmap_changing_test(uffd_test_args_t *targs) pid_t pid = 0; pthread_t tid; int ret; + mmap_changing_thread_args args = { gopts, &pid }; - if (uffd_register(uffd, area_dst, nr_pages * page_size, + if (uffd_register(gopts->uffd, gopts->area_dst, gopts->nr_pages * gopts->page_size, true, false, false)) err("uffd_register() failed"); /* Create a thread to generate the racy event */ - ret = pthread_create(&tid, NULL, uffd_mmap_changing_thread, &pid); + ret = pthread_create(&tid, NULL, uffd_mmap_changing_thread, &args); if (ret) err("pthread_create() failed"); @@ -1388,26 +1392,26 @@ static void uffd_mmap_changing_test(uffd_test_args_t *targs) /* Wait until the thread hangs at REMOVE event */ thread_state_until(pid, THR_STATE_UNINTERRUPTIBLE); - if (!uffdio_mmap_changing_test_copy(uffd)) + if (!uffdio_mmap_changing_test_copy(gopts->uffd)) return; - if (!uffdio_mmap_changing_test_zeropage(uffd)) + if (!uffdio_mmap_changing_test_zeropage(gopts->uffd)) return; - if (!uffdio_mmap_changing_test_move(uffd)) + if (!uffdio_mmap_changing_test_move(gopts->uffd)) return; - if (!uffdio_mmap_changing_test_poison(uffd)) + if (!uffdio_mmap_changing_test_poison(gopts->uffd)) return; - if (!uffdio_mmap_changing_test_continue(uffd)) + if (!uffdio_mmap_changing_test_continue(gopts->uffd)) return; /* * All succeeded above! Recycle everything. Start by reading the * event so as to kick the thread roll again.. */ - uffd_consume_message(uffd); + uffd_consume_message(gopts); ret = pthread_join(tid, NULL); assert(ret == 0); @@ -1415,10 +1419,10 @@ static void uffd_mmap_changing_test(uffd_test_args_t *targs) uffd_test_pass(); } -static int prevent_hugepages(const char **errmsg) +static int prevent_hugepages(uffd_global_test_opts_t *gopts, const char **errmsg) { /* This should be done before source area is populated */ - if (madvise(area_src, nr_pages * page_size, MADV_NOHUGEPAGE)) { + if (madvise(gopts->area_src, gopts->nr_pages * gopts->page_size, MADV_NOHUGEPAGE)) { /* Ignore only if CONFIG_TRANSPARENT_HUGEPAGE=n */ if (errno != EINVAL) { if (errmsg) @@ -1429,10 +1433,10 @@ static int prevent_hugepages(const char **errmsg) return 0; } -static int request_hugepages(const char **errmsg) +static int request_hugepages(uffd_global_test_opts_t *gopts, const char **errmsg) { /* This should be done before source area is populated */ - if (madvise(area_src, nr_pages * page_size, MADV_HUGEPAGE)) { + if (madvise(gopts->area_src, gopts->nr_pages * gopts->page_size, MADV_HUGEPAGE)) { if (errmsg) { *errmsg = (errno == EINVAL) ? "CONFIG_TRANSPARENT_HUGEPAGE is not set" : @@ -1456,13 +1460,17 @@ struct uffd_test_case_ops uffd_move_test_pmd_case_ops = { * Note that _UFFDIO_ZEROPAGE is tested separately in the zeropage test. */ static void -do_register_ioctls_test(uffd_test_args_t *args, bool miss, bool wp, bool minor) +do_register_ioctls_test(uffd_global_test_opts_t *gopts, + uffd_test_args_t *args, + bool miss, + bool wp, + bool minor) { uint64_t ioctls = 0, expected = BIT_ULL(_UFFDIO_WAKE); mem_type_t *mem_type = args->mem_type; int ret; - ret = uffd_register_with_ioctls(uffd, area_dst, page_size, + ret = uffd_register_with_ioctls(gopts->uffd, gopts->area_dst, gopts->page_size, miss, wp, minor, &ioctls); /* @@ -1493,18 +1501,18 @@ do_register_ioctls_test(uffd_test_args_t *args, bool miss, bool wp, bool minor) "(miss=%d, wp=%d, minor=%d): expected=0x%"PRIx64", " "returned=0x%"PRIx64, miss, wp, minor, expected, ioctls); - if (uffd_unregister(uffd, area_dst, page_size)) + if (uffd_unregister(gopts->uffd, gopts->area_dst, gopts->page_size)) err("unregister"); } -static void uffd_register_ioctls_test(uffd_test_args_t *args) +static void uffd_register_ioctls_test(uffd_global_test_opts_t *gopts, uffd_test_args_t *args) { int miss, wp, minor; for (miss = 0; miss <= 1; miss++) for (wp = 0; wp <= 1; wp++) for (minor = 0; minor <= 1; minor++) - do_register_ioctls_test(args, miss, wp, minor); + do_register_ioctls_test(gopts, args, miss, wp, minor); uffd_test_pass(); } @@ -1742,6 +1750,28 @@ int main(int argc, char *argv[]) } for (j = 0; j < n_mems; j++) { mem_type = &mem_types[j]; + + /* Initialize global test options */ + uffd_global_test_opts_t gopts = { 0 }; + + gopts.map_shared = mem_type->shared; + uffd_test_ops = mem_type->mem_ops; + uffd_test_case_ops = test->test_case_ops; + + if (mem_type->mem_flag & (MEM_HUGETLB_PRIVATE | MEM_HUGETLB)) + gopts.page_size = default_huge_page_size(); + else + gopts.page_size = psize(); + + /* Ensure we have at least 2 pages */ + gopts.nr_pages = MAX(UFFD_TEST_MEM_SIZE, gopts.page_size * 2) + / gopts.page_size; + + gopts.nr_parallel = 1; + + /* Initialize test arguments */ + args.mem_type = mem_type; + if (!(test->mem_targets & mem_type->mem_flag)) continue; @@ -1756,13 +1786,12 @@ int main(int argc, char *argv[]) uffd_test_skip("feature missing"); continue; } - if (uffd_setup_environment(&args, test, mem_type, - &errmsg)) { + if (uffd_test_ctx_init(&gopts, test->uffd_feature_required, &errmsg)) { uffd_test_skip(errmsg); continue; } - test->uffd_fn(&args); - uffd_test_ctx_clear(); + test->uffd_fn(&gopts, &args); + uffd_test_ctx_clear(&gopts); } } diff --git a/tools/testing/selftests/mm/uffd-wp-mremap.c b/tools/testing/selftests/mm/uffd-wp-mremap.c index c2ba7d46c7b4..4e4a591cf527 100644 --- a/tools/testing/selftests/mm/uffd-wp-mremap.c +++ b/tools/testing/selftests/mm/uffd-wp-mremap.c @@ -19,11 +19,6 @@ static size_t thpsizes[20]; static int nr_hugetlbsizes; static size_t hugetlbsizes[10]; -static int sz2ord(size_t size) -{ - return __builtin_ctzll(size / pagesize); -} - static int detect_thp_sizes(size_t sizes[], int max) { int count = 0; @@ -87,9 +82,9 @@ static void *alloc_one_folio(size_t size, bool private, bool hugetlb) struct thp_settings settings = *thp_current_settings(); if (private) - settings.hugepages[sz2ord(size)].enabled = THP_ALWAYS; + settings.hugepages[sz2ord(size, pagesize)].enabled = THP_ALWAYS; else - settings.shmem_hugepages[sz2ord(size)].enabled = SHMEM_ALWAYS; + settings.shmem_hugepages[sz2ord(size, pagesize)].enabled = SHMEM_ALWAYS; thp_push_settings(&settings); @@ -157,7 +152,8 @@ static bool range_is_swapped(void *addr, size_t size) return true; } -static void test_one_folio(size_t size, bool private, bool swapout, bool hugetlb) +static void test_one_folio(uffd_global_test_opts_t *gopts, size_t size, bool private, + bool swapout, bool hugetlb) { struct uffdio_writeprotect wp_prms; uint64_t features = 0; @@ -181,21 +177,21 @@ static void test_one_folio(size_t size, bool private, bool swapout, bool hugetlb } /* Register range for uffd-wp. */ - if (userfaultfd_open(&features)) { + if (userfaultfd_open(gopts, &features)) { if (errno == ENOENT) ksft_test_result_skip("userfaultfd not available\n"); else ksft_test_result_fail("userfaultfd_open() failed\n"); goto out; } - if (uffd_register(uffd, mem, size, false, true, false)) { + if (uffd_register(gopts->uffd, mem, size, false, true, false)) { ksft_test_result_fail("uffd_register() failed\n"); goto out; } wp_prms.mode = UFFDIO_WRITEPROTECT_MODE_WP; wp_prms.range.start = (uintptr_t)mem; wp_prms.range.len = size; - if (ioctl(uffd, UFFDIO_WRITEPROTECT, &wp_prms)) { + if (ioctl(gopts->uffd, UFFDIO_WRITEPROTECT, &wp_prms)) { ksft_test_result_fail("ioctl(UFFDIO_WRITEPROTECT) failed\n"); goto out; } @@ -242,9 +238,9 @@ static void test_one_folio(size_t size, bool private, bool swapout, bool hugetlb out: if (mem) munmap(mem, size); - if (uffd >= 0) { - close(uffd); - uffd = -1; + if (gopts->uffd >= 0) { + close(gopts->uffd); + gopts->uffd = -1; } } @@ -336,6 +332,7 @@ static const struct testcase testcases[] = { int main(int argc, char **argv) { + uffd_global_test_opts_t gopts = { 0 }; struct thp_settings settings; int i, j, plan = 0; @@ -367,8 +364,8 @@ int main(int argc, char **argv) const struct testcase *tc = &testcases[i]; for (j = 0; j < *tc->nr_sizes; j++) - test_one_folio(tc->sizes[j], tc->private, tc->swapout, - tc->hugetlb); + test_one_folio(&gopts, tc->sizes[j], tc->private, + tc->swapout, tc->hugetlb); } /* If THP is supported, restore original THP settings. */ diff --git a/tools/testing/selftests/mm/va_high_addr_switch.c b/tools/testing/selftests/mm/va_high_addr_switch.c index 896b3f73fc53..306eba825107 100644 --- a/tools/testing/selftests/mm/va_high_addr_switch.c +++ b/tools/testing/selftests/mm/va_high_addr_switch.c @@ -230,10 +230,10 @@ void testcases_init(void) .msg = "mmap(-1, MAP_HUGETLB) again", }, { - .addr = (void *)(addr_switch_hint - pagesize), + .addr = (void *)(addr_switch_hint - hugepagesize), .size = 2 * hugepagesize, .flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS, - .msg = "mmap(addr_switch_hint - pagesize, 2*hugepagesize, MAP_HUGETLB)", + .msg = "mmap(addr_switch_hint - hugepagesize, 2*hugepagesize, MAP_HUGETLB)", .low_addr_required = 1, .keep_mapped = 1, }, diff --git a/tools/testing/selftests/mm/va_high_addr_switch.sh b/tools/testing/selftests/mm/va_high_addr_switch.sh index 325de53966b6..a7d4b02b21dd 100755 --- a/tools/testing/selftests/mm/va_high_addr_switch.sh +++ b/tools/testing/selftests/mm/va_high_addr_switch.sh @@ -9,6 +9,7 @@ # Kselftest framework requirement - SKIP code is 4. ksft_skip=4 +orig_nr_hugepages=0 skip() { @@ -76,5 +77,41 @@ check_test_requirements() esac } +save_nr_hugepages() +{ + orig_nr_hugepages=$(cat /proc/sys/vm/nr_hugepages) +} + +restore_nr_hugepages() +{ + echo "$orig_nr_hugepages" > /proc/sys/vm/nr_hugepages +} + +setup_nr_hugepages() +{ + local needpgs=$1 + while read -r name size unit; do + if [ "$name" = "HugePages_Free:" ]; then + freepgs="$size" + break + fi + done < /proc/meminfo + if [ "$freepgs" -ge "$needpgs" ]; then + return + fi + local hpgs=$((orig_nr_hugepages + needpgs)) + echo $hpgs > /proc/sys/vm/nr_hugepages + + local nr_hugepgs=$(cat /proc/sys/vm/nr_hugepages) + if [ "$nr_hugepgs" != "$hpgs" ]; then + restore_nr_hugepages + skip "$0: no enough hugepages for testing" + fi +} + check_test_requirements +save_nr_hugepages +# 4 keep_mapped pages, and one for tmp usage +setup_nr_hugepages 5 ./va_high_addr_switch --run-hugetlb +restore_nr_hugepages diff --git a/tools/testing/selftests/mm/virtual_address_range.c b/tools/testing/selftests/mm/virtual_address_range.c index 169dbd692bf5..81b33d8f78f4 100644 --- a/tools/testing/selftests/mm/virtual_address_range.c +++ b/tools/testing/selftests/mm/virtual_address_range.c @@ -44,12 +44,18 @@ * On Arm64 the address space is 256TB and support for * high mappings up to 4PB virtual address space has * been added. + * + * On PowerPC64, the address space up to 128TB can be + * mapped without a hint. Addresses beyond 128TB, up to + * 4PB, can be mapped with a hint. + * */ #define NR_CHUNKS_128TB ((128 * SZ_1TB) / MAP_CHUNK_SIZE) /* Number of chunks for 128TB */ #define NR_CHUNKS_256TB (NR_CHUNKS_128TB * 2UL) #define NR_CHUNKS_384TB (NR_CHUNKS_128TB * 3UL) #define NR_CHUNKS_3840TB (NR_CHUNKS_128TB * 30UL) +#define NR_CHUNKS_3968TB (NR_CHUNKS_128TB * 31UL) #define ADDR_MARK_128TB (1UL << 47) /* First address beyond 128TB */ #define ADDR_MARK_256TB (1UL << 48) /* First address beyond 256TB */ @@ -59,6 +65,11 @@ #define HIGH_ADDR_SHIFT 49 #define NR_CHUNKS_LOW NR_CHUNKS_256TB #define NR_CHUNKS_HIGH NR_CHUNKS_3840TB +#elif defined(__PPC64__) +#define HIGH_ADDR_MARK ADDR_MARK_128TB +#define HIGH_ADDR_SHIFT 48 +#define NR_CHUNKS_LOW NR_CHUNKS_128TB +#define NR_CHUNKS_HIGH NR_CHUNKS_3968TB #else #define HIGH_ADDR_MARK ADDR_MARK_128TB #define HIGH_ADDR_SHIFT 48 @@ -227,7 +238,7 @@ int main(int argc, char *argv[]) if (hptr[i] == MAP_FAILED) break; - mark_range(ptr[i], MAP_CHUNK_SIZE); + mark_range(hptr[i], MAP_CHUNK_SIZE); validate_addr(hptr[i], 1); } hchunks = i; diff --git a/tools/testing/selftests/mm/vm_util.c b/tools/testing/selftests/mm/vm_util.c index 9dafa7669ef9..e33cda301dad 100644 --- a/tools/testing/selftests/mm/vm_util.c +++ b/tools/testing/selftests/mm/vm_util.c @@ -338,6 +338,19 @@ int detect_hugetlb_page_sizes(size_t sizes[], int max) return count; } +int pageflags_get(unsigned long pfn, int kpageflags_fd, uint64_t *flags) +{ + size_t count; + + count = pread(kpageflags_fd, flags, sizeof(*flags), + pfn * sizeof(*flags)); + + if (count != sizeof(*flags)) + return -1; + + return 0; +} + /* If `ioctls' non-NULL, the allowed ioctls will be returned into the var */ int uffd_register_with_ioctls(int uffd, void *addr, uint64_t len, bool miss, bool wp, bool minor, uint64_t *ioctls) @@ -402,7 +415,7 @@ unsigned long get_free_hugepages(void) return fhp; } -bool check_vmflag_io(void *addr) +static bool check_vmflag(void *addr, const char *flag) { char buffer[MAX_LINE_LENGTH]; const char *flags; @@ -419,13 +432,40 @@ bool check_vmflag_io(void *addr) if (!flaglen) return false; - if (flaglen == strlen("io") && !memcmp(flags, "io", flaglen)) + if (flaglen == strlen(flag) && !memcmp(flags, flag, flaglen)) return true; flags += flaglen; } } +bool check_vmflag_io(void *addr) +{ + return check_vmflag(addr, "io"); +} + +bool check_vmflag_pfnmap(void *addr) +{ + return check_vmflag(addr, "pf"); +} + +bool softdirty_supported(void) +{ + char *addr; + bool supported = false; + const size_t pagesize = getpagesize(); + + /* New mappings are expected to be marked with VM_SOFTDIRTY (sd). */ + addr = mmap(0, pagesize, PROT_READ | PROT_WRITE, + MAP_ANONYMOUS | MAP_PRIVATE, 0, 0); + if (!addr) + ksft_exit_fail_msg("mmap failed\n"); + + supported = check_vmflag(addr, "sd"); + munmap(addr, pagesize); + return supported; +} + /* * Open an fd at /proc/$pid/maps and configure procmap_out ready for * PROCMAP_QUERY query. Returns 0 on success, or an error code otherwise. @@ -555,3 +595,126 @@ bool detect_huge_zeropage(void) close(fd); return enabled; } + +long ksm_get_self_zero_pages(void) +{ + int proc_self_ksm_stat_fd; + char buf[200]; + char *substr_ksm_zero; + size_t value_pos; + ssize_t read_size; + + proc_self_ksm_stat_fd = open("/proc/self/ksm_stat", O_RDONLY); + if (proc_self_ksm_stat_fd < 0) + return -errno; + + read_size = pread(proc_self_ksm_stat_fd, buf, sizeof(buf) - 1, 0); + close(proc_self_ksm_stat_fd); + if (read_size < 0) + return -errno; + + buf[read_size] = 0; + + substr_ksm_zero = strstr(buf, "ksm_zero_pages"); + if (!substr_ksm_zero) + return 0; + + value_pos = strcspn(substr_ksm_zero, "0123456789"); + return strtol(substr_ksm_zero + value_pos, NULL, 10); +} + +long ksm_get_self_merging_pages(void) +{ + int proc_self_ksm_merging_pages_fd; + char buf[10]; + ssize_t ret; + + proc_self_ksm_merging_pages_fd = open("/proc/self/ksm_merging_pages", + O_RDONLY); + if (proc_self_ksm_merging_pages_fd < 0) + return -errno; + + ret = pread(proc_self_ksm_merging_pages_fd, buf, sizeof(buf) - 1, 0); + close(proc_self_ksm_merging_pages_fd); + if (ret <= 0) + return -errno; + buf[ret] = 0; + + return strtol(buf, NULL, 10); +} + +long ksm_get_full_scans(void) +{ + int ksm_full_scans_fd; + char buf[10]; + ssize_t ret; + + ksm_full_scans_fd = open("/sys/kernel/mm/ksm/full_scans", O_RDONLY); + if (ksm_full_scans_fd < 0) + return -errno; + + ret = pread(ksm_full_scans_fd, buf, sizeof(buf) - 1, 0); + close(ksm_full_scans_fd); + if (ret <= 0) + return -errno; + buf[ret] = 0; + + return strtol(buf, NULL, 10); +} + +int ksm_use_zero_pages(void) +{ + int ksm_use_zero_pages_fd; + ssize_t ret; + + ksm_use_zero_pages_fd = open("/sys/kernel/mm/ksm/use_zero_pages", O_RDWR); + if (ksm_use_zero_pages_fd < 0) + return -errno; + + ret = write(ksm_use_zero_pages_fd, "1", 1); + close(ksm_use_zero_pages_fd); + return ret == 1 ? 0 : -errno; +} + +int ksm_start(void) +{ + int ksm_fd; + ssize_t ret; + long start_scans, end_scans; + + ksm_fd = open("/sys/kernel/mm/ksm/run", O_RDWR); + if (ksm_fd < 0) + return -errno; + + /* Wait for two full scans such that any possible merging happened. */ + start_scans = ksm_get_full_scans(); + if (start_scans < 0) { + close(ksm_fd); + return start_scans; + } + ret = write(ksm_fd, "1", 1); + close(ksm_fd); + if (ret != 1) + return -errno; + do { + end_scans = ksm_get_full_scans(); + if (end_scans < 0) + return end_scans; + } while (end_scans < start_scans + 2); + + return 0; +} + +int ksm_stop(void) +{ + int ksm_fd; + ssize_t ret; + + ksm_fd = open("/sys/kernel/mm/ksm/run", O_RDWR); + if (ksm_fd < 0) + return -errno; + + ret = write(ksm_fd, "2", 1); + close(ksm_fd); + return ret == 1 ? 0 : -errno; +} diff --git a/tools/testing/selftests/mm/vm_util.h b/tools/testing/selftests/mm/vm_util.h index b55d1809debc..26c30fdc0241 100644 --- a/tools/testing/selftests/mm/vm_util.h +++ b/tools/testing/selftests/mm/vm_util.h @@ -18,6 +18,9 @@ #define PM_SWAP BIT_ULL(62) #define PM_PRESENT BIT_ULL(63) +#define KPF_COMPOUND_HEAD BIT_ULL(15) +#define KPF_COMPOUND_TAIL BIT_ULL(16) +#define KPF_THP BIT_ULL(22) /* * Ignore the checkpatch warning, we must read from x but don't want to do * anything with it in order to trigger a read page fault. We therefore must use @@ -85,6 +88,7 @@ bool check_huge_shmem(void *addr, int nr_hpages, uint64_t hpage_size); int64_t allocate_transhuge(void *ptr, int pagemap_fd); unsigned long default_huge_page_size(void); int detect_hugetlb_page_sizes(size_t sizes[], int max); +int pageflags_get(unsigned long pfn, int kpageflags_fd, uint64_t *flags); int uffd_register(int uffd, void *addr, uint64_t len, bool miss, bool wp, bool minor); @@ -93,12 +97,14 @@ int uffd_register_with_ioctls(int uffd, void *addr, uint64_t len, bool miss, bool wp, bool minor, uint64_t *ioctls); unsigned long get_free_hugepages(void); bool check_vmflag_io(void *addr); +bool check_vmflag_pfnmap(void *addr); int open_procmap(pid_t pid, struct procmap_fd *procmap_out); int query_procmap(struct procmap_fd *procmap); bool find_vma_procmap(struct procmap_fd *procmap, void *address); int close_procmap(struct procmap_fd *procmap); int write_sysfs(const char *file_path, unsigned long val); int read_sysfs(const char *file_path, unsigned long *val); +bool softdirty_supported(void); static inline int open_self_procmap(struct procmap_fd *procmap_out) { @@ -126,9 +132,21 @@ static inline void log_test_result(int result) ksft_test_result_report(result, "%s\n", test_name); } +static inline int sz2ord(size_t size, size_t pagesize) +{ + return __builtin_ctzll(size / pagesize); +} + void *sys_mremap(void *old_address, unsigned long old_size, unsigned long new_size, int flags, void *new_address); +long ksm_get_self_zero_pages(void); +long ksm_get_self_merging_pages(void); +long ksm_get_full_scans(void); +int ksm_use_zero_pages(void); +int ksm_start(void); +int ksm_stop(void); + /* * On ppc64 this will only work with radix 2M hugepage size */ diff --git a/tools/testing/selftests/namespaces/.gitignore b/tools/testing/selftests/namespaces/.gitignore new file mode 100644 index 000000000000..ccfb40837a73 --- /dev/null +++ b/tools/testing/selftests/namespaces/.gitignore @@ -0,0 +1,3 @@ +nsid_test +file_handle_test +init_ino_test diff --git a/tools/testing/selftests/namespaces/Makefile b/tools/testing/selftests/namespaces/Makefile new file mode 100644 index 000000000000..5fe4b3dc07d3 --- /dev/null +++ b/tools/testing/selftests/namespaces/Makefile @@ -0,0 +1,7 @@ +# SPDX-License-Identifier: GPL-2.0-only +CFLAGS += -Wall -O0 -g $(KHDR_INCLUDES) $(TOOLS_INCLUDES) + +TEST_GEN_PROGS := nsid_test file_handle_test init_ino_test + +include ../lib.mk + diff --git a/tools/testing/selftests/namespaces/config b/tools/testing/selftests/namespaces/config new file mode 100644 index 000000000000..d09836260262 --- /dev/null +++ b/tools/testing/selftests/namespaces/config @@ -0,0 +1,7 @@ +CONFIG_UTS_NS=y +CONFIG_TIME_NS=y +CONFIG_IPC_NS=y +CONFIG_USER_NS=y +CONFIG_PID_NS=y +CONFIG_NET_NS=y +CONFIG_CGROUPS=y diff --git a/tools/testing/selftests/namespaces/file_handle_test.c b/tools/testing/selftests/namespaces/file_handle_test.c new file mode 100644 index 000000000000..f1bc5773f552 --- /dev/null +++ b/tools/testing/selftests/namespaces/file_handle_test.c @@ -0,0 +1,1429 @@ +// SPDX-License-Identifier: GPL-2.0 +#define _GNU_SOURCE +#include <errno.h> +#include <fcntl.h> +#include <grp.h> +#include <limits.h> +#include <sched.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/mount.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <sys/wait.h> +#include <unistd.h> +#include <linux/unistd.h> +#include "../kselftest_harness.h" + +#ifndef FD_NSFS_ROOT +#define FD_NSFS_ROOT -10003 /* Root of the nsfs filesystem */ +#endif + +TEST(nsfs_net_handle) +{ + struct file_handle *handle; + int mount_id; + int ret; + int fd; + int ns_fd; + struct stat st1, st2; + + /* Drop to unprivileged uid/gid */ + ASSERT_EQ(setresgid(65534, 65534, 65534), 0); /* nogroup */ + ASSERT_EQ(setresuid(65534, 65534, 65534), 0); /* nobody */ + + handle = malloc(sizeof(*handle) + MAX_HANDLE_SZ); + ASSERT_NE(handle, NULL); + + /* Open a namespace file descriptor */ + ns_fd = open("/proc/self/ns/net", O_RDONLY); + ASSERT_GE(ns_fd, 0); + + /* Get handle for the namespace */ + handle->handle_bytes = MAX_HANDLE_SZ; + ret = name_to_handle_at(ns_fd, "", handle, &mount_id, AT_EMPTY_PATH); + if (ret < 0 && errno == EOPNOTSUPP) { + SKIP(free(handle); close(ns_fd); + return, "nsfs doesn't support file handles"); + } + ASSERT_EQ(ret, 0); + ASSERT_GT(handle->handle_bytes, 0); + + /* Try to open using FD_NSFS_ROOT as unprivileged user */ + fd = open_by_handle_at(FD_NSFS_ROOT, handle, O_RDONLY); + if (fd < 0 && (errno == EINVAL || errno == EOPNOTSUPP)) { + SKIP(free(handle); close(ns_fd); + return, + "open_by_handle_at with FD_NSFS_ROOT not supported"); + } + if (fd < 0 && errno == EPERM) { + SKIP(free(handle); close(ns_fd); + return, + "Permission denied for unprivileged user (expected)"); + } + ASSERT_GE(fd, 0); + + /* Verify we opened the correct namespace */ + ASSERT_EQ(fstat(ns_fd, &st1), 0); + ASSERT_EQ(fstat(fd, &st2), 0); + ASSERT_EQ(st1.st_ino, st2.st_ino); + ASSERT_EQ(st1.st_dev, st2.st_dev); + + close(fd); + close(ns_fd); + free(handle); +} + +TEST(nsfs_uts_handle) +{ + struct file_handle *handle; + int mount_id; + int ret; + int fd; + int ns_fd; + struct stat st1, st2; + + /* Drop to unprivileged uid/gid */ + ASSERT_EQ(setresgid(65534, 65534, 65534), 0); /* nogroup */ + ASSERT_EQ(setresuid(65534, 65534, 65534), 0); /* nobody */ + + handle = malloc(sizeof(*handle) + MAX_HANDLE_SZ); + ASSERT_NE(handle, NULL); + + /* Open UTS namespace file descriptor */ + ns_fd = open("/proc/self/ns/uts", O_RDONLY); + ASSERT_GE(ns_fd, 0); + + /* Get handle for the namespace */ + handle->handle_bytes = MAX_HANDLE_SZ; + ret = name_to_handle_at(ns_fd, "", handle, &mount_id, AT_EMPTY_PATH); + if (ret < 0 && errno == EOPNOTSUPP) { + SKIP(free(handle); close(ns_fd); + return, "nsfs doesn't support file handles"); + } + ASSERT_EQ(ret, 0); + ASSERT_GT(handle->handle_bytes, 0); + + /* Try to open using FD_NSFS_ROOT */ + fd = open_by_handle_at(FD_NSFS_ROOT, handle, O_RDONLY); + if (fd < 0 && (errno == EINVAL || errno == EOPNOTSUPP)) { + SKIP(free(handle); close(ns_fd); + return, + "open_by_handle_at with FD_NSFS_ROOT not supported"); + } + ASSERT_GE(fd, 0); + + /* Verify we opened the correct namespace */ + ASSERT_EQ(fstat(ns_fd, &st1), 0); + ASSERT_EQ(fstat(fd, &st2), 0); + ASSERT_EQ(st1.st_ino, st2.st_ino); + ASSERT_EQ(st1.st_dev, st2.st_dev); + + close(fd); + close(ns_fd); + free(handle); +} + +TEST(nsfs_ipc_handle) +{ + struct file_handle *handle; + int mount_id; + int ret; + int fd; + int ns_fd; + struct stat st1, st2; + + /* Drop to unprivileged uid/gid */ + ASSERT_EQ(setresgid(65534, 65534, 65534), 0); /* nogroup */ + ASSERT_EQ(setresuid(65534, 65534, 65534), 0); /* nobody */ + + handle = malloc(sizeof(*handle) + MAX_HANDLE_SZ); + ASSERT_NE(handle, NULL); + + /* Open IPC namespace file descriptor */ + ns_fd = open("/proc/self/ns/ipc", O_RDONLY); + ASSERT_GE(ns_fd, 0); + + /* Get handle for the namespace */ + handle->handle_bytes = MAX_HANDLE_SZ; + ret = name_to_handle_at(ns_fd, "", handle, &mount_id, AT_EMPTY_PATH); + if (ret < 0 && errno == EOPNOTSUPP) { + SKIP(free(handle); close(ns_fd); + return, "nsfs doesn't support file handles"); + } + ASSERT_EQ(ret, 0); + ASSERT_GT(handle->handle_bytes, 0); + + /* Try to open using FD_NSFS_ROOT */ + fd = open_by_handle_at(FD_NSFS_ROOT, handle, O_RDONLY); + if (fd < 0 && (errno == EINVAL || errno == EOPNOTSUPP)) { + SKIP(free(handle); close(ns_fd); + return, + "open_by_handle_at with FD_NSFS_ROOT not supported"); + } + ASSERT_GE(fd, 0); + + /* Verify we opened the correct namespace */ + ASSERT_EQ(fstat(ns_fd, &st1), 0); + ASSERT_EQ(fstat(fd, &st2), 0); + ASSERT_EQ(st1.st_ino, st2.st_ino); + ASSERT_EQ(st1.st_dev, st2.st_dev); + + close(fd); + close(ns_fd); + free(handle); +} + +TEST(nsfs_pid_handle) +{ + struct file_handle *handle; + int mount_id; + int ret; + int fd; + int ns_fd; + struct stat st1, st2; + + /* Drop to unprivileged uid/gid */ + ASSERT_EQ(setresgid(65534, 65534, 65534), 0); /* nogroup */ + ASSERT_EQ(setresuid(65534, 65534, 65534), 0); /* nobody */ + + handle = malloc(sizeof(*handle) + MAX_HANDLE_SZ); + ASSERT_NE(handle, NULL); + + /* Open PID namespace file descriptor */ + ns_fd = open("/proc/self/ns/pid", O_RDONLY); + ASSERT_GE(ns_fd, 0); + + /* Get handle for the namespace */ + handle->handle_bytes = MAX_HANDLE_SZ; + ret = name_to_handle_at(ns_fd, "", handle, &mount_id, AT_EMPTY_PATH); + if (ret < 0 && errno == EOPNOTSUPP) { + SKIP(free(handle); close(ns_fd); + return, "nsfs doesn't support file handles"); + } + ASSERT_EQ(ret, 0); + ASSERT_GT(handle->handle_bytes, 0); + + /* Try to open using FD_NSFS_ROOT */ + fd = open_by_handle_at(FD_NSFS_ROOT, handle, O_RDONLY); + if (fd < 0 && (errno == EINVAL || errno == EOPNOTSUPP)) { + SKIP(free(handle); close(ns_fd); + return, + "open_by_handle_at with FD_NSFS_ROOT not supported"); + } + ASSERT_GE(fd, 0); + + /* Verify we opened the correct namespace */ + ASSERT_EQ(fstat(ns_fd, &st1), 0); + ASSERT_EQ(fstat(fd, &st2), 0); + ASSERT_EQ(st1.st_ino, st2.st_ino); + ASSERT_EQ(st1.st_dev, st2.st_dev); + + close(fd); + close(ns_fd); + free(handle); +} + +TEST(nsfs_mnt_handle) +{ + struct file_handle *handle; + int mount_id; + int ret; + int fd; + int ns_fd; + struct stat st1, st2; + + /* Drop to unprivileged uid/gid */ + ASSERT_EQ(setresgid(65534, 65534, 65534), 0); /* nogroup */ + ASSERT_EQ(setresuid(65534, 65534, 65534), 0); /* nobody */ + + handle = malloc(sizeof(*handle) + MAX_HANDLE_SZ); + ASSERT_NE(handle, NULL); + + /* Open mount namespace file descriptor */ + ns_fd = open("/proc/self/ns/mnt", O_RDONLY); + ASSERT_GE(ns_fd, 0); + + /* Get handle for the namespace */ + handle->handle_bytes = MAX_HANDLE_SZ; + ret = name_to_handle_at(ns_fd, "", handle, &mount_id, AT_EMPTY_PATH); + if (ret < 0 && errno == EOPNOTSUPP) { + SKIP(free(handle); close(ns_fd); + return, "nsfs doesn't support file handles"); + } + ASSERT_EQ(ret, 0); + ASSERT_GT(handle->handle_bytes, 0); + + /* Try to open using FD_NSFS_ROOT */ + fd = open_by_handle_at(FD_NSFS_ROOT, handle, O_RDONLY); + if (fd < 0 && (errno == EINVAL || errno == EOPNOTSUPP)) { + SKIP(free(handle); close(ns_fd); + return, + "open_by_handle_at with FD_NSFS_ROOT not supported"); + } + ASSERT_GE(fd, 0); + + /* Verify we opened the correct namespace */ + ASSERT_EQ(fstat(ns_fd, &st1), 0); + ASSERT_EQ(fstat(fd, &st2), 0); + ASSERT_EQ(st1.st_ino, st2.st_ino); + ASSERT_EQ(st1.st_dev, st2.st_dev); + + close(fd); + close(ns_fd); + free(handle); +} + +TEST(nsfs_user_handle) +{ + struct file_handle *handle; + int mount_id; + int ret; + int fd; + int ns_fd; + struct stat st1, st2; + + /* Drop to unprivileged uid/gid */ + ASSERT_EQ(setresgid(65534, 65534, 65534), 0); /* nogroup */ + ASSERT_EQ(setresuid(65534, 65534, 65534), 0); /* nobody */ + + handle = malloc(sizeof(*handle) + MAX_HANDLE_SZ); + ASSERT_NE(handle, NULL); + + /* Open user namespace file descriptor */ + ns_fd = open("/proc/self/ns/user", O_RDONLY); + ASSERT_GE(ns_fd, 0); + + /* Get handle for the namespace */ + handle->handle_bytes = MAX_HANDLE_SZ; + ret = name_to_handle_at(ns_fd, "", handle, &mount_id, AT_EMPTY_PATH); + if (ret < 0 && errno == EOPNOTSUPP) { + SKIP(free(handle); close(ns_fd); + return, "nsfs doesn't support file handles"); + } + ASSERT_EQ(ret, 0); + ASSERT_GT(handle->handle_bytes, 0); + + /* Try to open using FD_NSFS_ROOT */ + fd = open_by_handle_at(FD_NSFS_ROOT, handle, O_RDONLY); + if (fd < 0 && (errno == EINVAL || errno == EOPNOTSUPP)) { + SKIP(free(handle); close(ns_fd); + return, + "open_by_handle_at with FD_NSFS_ROOT not supported"); + } + ASSERT_GE(fd, 0); + + /* Verify we opened the correct namespace */ + ASSERT_EQ(fstat(ns_fd, &st1), 0); + ASSERT_EQ(fstat(fd, &st2), 0); + ASSERT_EQ(st1.st_ino, st2.st_ino); + ASSERT_EQ(st1.st_dev, st2.st_dev); + + close(fd); + close(ns_fd); + free(handle); +} + +TEST(nsfs_cgroup_handle) +{ + struct file_handle *handle; + int mount_id; + int ret; + int fd; + int ns_fd; + struct stat st1, st2; + + /* Drop to unprivileged uid/gid */ + ASSERT_EQ(setresgid(65534, 65534, 65534), 0); /* nogroup */ + ASSERT_EQ(setresuid(65534, 65534, 65534), 0); /* nobody */ + + handle = malloc(sizeof(*handle) + MAX_HANDLE_SZ); + ASSERT_NE(handle, NULL); + + /* Open cgroup namespace file descriptor */ + ns_fd = open("/proc/self/ns/cgroup", O_RDONLY); + if (ns_fd < 0) { + SKIP(free(handle); return, "cgroup namespace not available"); + } + + /* Get handle for the namespace */ + handle->handle_bytes = MAX_HANDLE_SZ; + ret = name_to_handle_at(ns_fd, "", handle, &mount_id, AT_EMPTY_PATH); + if (ret < 0 && errno == EOPNOTSUPP) { + SKIP(free(handle); close(ns_fd); + return, "nsfs doesn't support file handles"); + } + ASSERT_EQ(ret, 0); + ASSERT_GT(handle->handle_bytes, 0); + + /* Try to open using FD_NSFS_ROOT */ + fd = open_by_handle_at(FD_NSFS_ROOT, handle, O_RDONLY); + if (fd < 0 && (errno == EINVAL || errno == EOPNOTSUPP)) { + SKIP(free(handle); close(ns_fd); + return, + "open_by_handle_at with FD_NSFS_ROOT not supported"); + } + ASSERT_GE(fd, 0); + + /* Verify we opened the correct namespace */ + ASSERT_EQ(fstat(ns_fd, &st1), 0); + ASSERT_EQ(fstat(fd, &st2), 0); + ASSERT_EQ(st1.st_ino, st2.st_ino); + ASSERT_EQ(st1.st_dev, st2.st_dev); + + close(fd); + close(ns_fd); + free(handle); +} + +TEST(nsfs_time_handle) +{ + struct file_handle *handle; + int mount_id; + int ret; + int fd; + int ns_fd; + struct stat st1, st2; + + /* Drop to unprivileged uid/gid */ + ASSERT_EQ(setresgid(65534, 65534, 65534), 0); /* nogroup */ + ASSERT_EQ(setresuid(65534, 65534, 65534), 0); /* nobody */ + + handle = malloc(sizeof(*handle) + MAX_HANDLE_SZ); + ASSERT_NE(handle, NULL); + + /* Open time namespace file descriptor */ + ns_fd = open("/proc/self/ns/time", O_RDONLY); + if (ns_fd < 0) { + SKIP(free(handle); return, "time namespace not available"); + } + + /* Get handle for the namespace */ + handle->handle_bytes = MAX_HANDLE_SZ; + ret = name_to_handle_at(ns_fd, "", handle, &mount_id, AT_EMPTY_PATH); + if (ret < 0 && errno == EOPNOTSUPP) { + SKIP(free(handle); close(ns_fd); + return, "nsfs doesn't support file handles"); + } + ASSERT_EQ(ret, 0); + ASSERT_GT(handle->handle_bytes, 0); + + /* Try to open using FD_NSFS_ROOT */ + fd = open_by_handle_at(FD_NSFS_ROOT, handle, O_RDONLY); + if (fd < 0 && (errno == EINVAL || errno == EOPNOTSUPP)) { + SKIP(free(handle); close(ns_fd); + return, + "open_by_handle_at with FD_NSFS_ROOT not supported"); + } + ASSERT_GE(fd, 0); + + /* Verify we opened the correct namespace */ + ASSERT_EQ(fstat(ns_fd, &st1), 0); + ASSERT_EQ(fstat(fd, &st2), 0); + ASSERT_EQ(st1.st_ino, st2.st_ino); + ASSERT_EQ(st1.st_dev, st2.st_dev); + + close(fd); + close(ns_fd); + free(handle); +} + +TEST(nsfs_user_net_namespace_isolation) +{ + struct file_handle *handle; + int mount_id; + int ret; + int fd; + int ns_fd; + pid_t pid; + int status; + int pipefd[2]; + char result; + + handle = malloc(sizeof(*handle) + MAX_HANDLE_SZ); + ASSERT_NE(handle, NULL); + + /* Create pipe for communication */ + ASSERT_EQ(pipe(pipefd), 0); + + /* Get handle for current network namespace */ + ns_fd = open("/proc/self/ns/net", O_RDONLY); + ASSERT_GE(ns_fd, 0); + + handle->handle_bytes = MAX_HANDLE_SZ; + ret = name_to_handle_at(ns_fd, "", handle, &mount_id, AT_EMPTY_PATH); + if (ret < 0 && errno == EOPNOTSUPP) { + SKIP(free(handle); close(ns_fd); close(pipefd[0]); + close(pipefd[1]); + return, "nsfs doesn't support file handles"); + } + ASSERT_EQ(ret, 0); + close(ns_fd); + + pid = fork(); + ASSERT_GE(pid, 0); + + if (pid == 0) { + /* Child process */ + close(pipefd[0]); + + /* First create new user namespace to drop privileges */ + ret = unshare(CLONE_NEWUSER); + if (ret < 0) { + write(pipefd[1], "U", + 1); /* Unable to create user namespace */ + close(pipefd[1]); + exit(0); + } + + /* Write uid/gid mappings to maintain some capabilities */ + int uid_map_fd = open("/proc/self/uid_map", O_WRONLY); + int gid_map_fd = open("/proc/self/gid_map", O_WRONLY); + int setgroups_fd = open("/proc/self/setgroups", O_WRONLY); + + if (uid_map_fd < 0 || gid_map_fd < 0 || setgroups_fd < 0) { + write(pipefd[1], "M", 1); /* Unable to set mappings */ + close(pipefd[1]); + exit(0); + } + + /* Disable setgroups to allow gid mapping */ + write(setgroups_fd, "deny", 4); + close(setgroups_fd); + + /* Map current uid/gid to root in the new namespace */ + char mapping[64]; + snprintf(mapping, sizeof(mapping), "0 %d 1", getuid()); + write(uid_map_fd, mapping, strlen(mapping)); + close(uid_map_fd); + + snprintf(mapping, sizeof(mapping), "0 %d 1", getgid()); + write(gid_map_fd, mapping, strlen(mapping)); + close(gid_map_fd); + + /* Now create new network namespace */ + ret = unshare(CLONE_NEWNET); + if (ret < 0) { + write(pipefd[1], "N", + 1); /* Unable to create network namespace */ + close(pipefd[1]); + exit(0); + } + + /* Try to open parent's network namespace handle from new user+net namespace */ + fd = open_by_handle_at(FD_NSFS_ROOT, handle, O_RDONLY); + + if (fd >= 0) { + /* Should NOT succeed - we're in a different user namespace */ + write(pipefd[1], "S", 1); /* Unexpected success */ + close(fd); + } else if (errno == ESTALE) { + /* Expected: Stale file handle */ + write(pipefd[1], "P", 1); + } else { + /* Other error */ + write(pipefd[1], "F", 1); + } + + close(pipefd[1]); + exit(0); + } + + /* Parent process */ + close(pipefd[1]); + ASSERT_EQ(read(pipefd[0], &result, 1), 1); + + waitpid(pid, &status, 0); + ASSERT_TRUE(WIFEXITED(status)); + ASSERT_EQ(WEXITSTATUS(status), 0); + + if (result == 'U') { + SKIP(free(handle); close(pipefd[0]); + return, "Cannot create new user namespace"); + } + if (result == 'M') { + SKIP(free(handle); close(pipefd[0]); + return, "Cannot set uid/gid mappings"); + } + if (result == 'N') { + SKIP(free(handle); close(pipefd[0]); + return, "Cannot create new network namespace"); + } + + /* Should fail with permission denied since we're in a different user namespace */ + ASSERT_EQ(result, 'P'); + + close(pipefd[0]); + free(handle); +} + +TEST(nsfs_user_uts_namespace_isolation) +{ + struct file_handle *handle; + int mount_id; + int ret; + int fd; + int ns_fd; + pid_t pid; + int status; + int pipefd[2]; + char result; + + handle = malloc(sizeof(*handle) + MAX_HANDLE_SZ); + ASSERT_NE(handle, NULL); + + /* Create pipe for communication */ + ASSERT_EQ(pipe(pipefd), 0); + + /* Get handle for current UTS namespace */ + ns_fd = open("/proc/self/ns/uts", O_RDONLY); + ASSERT_GE(ns_fd, 0); + + handle->handle_bytes = MAX_HANDLE_SZ; + ret = name_to_handle_at(ns_fd, "", handle, &mount_id, AT_EMPTY_PATH); + if (ret < 0 && errno == EOPNOTSUPP) { + SKIP(free(handle); close(ns_fd); close(pipefd[0]); + close(pipefd[1]); + return, "nsfs doesn't support file handles"); + } + ASSERT_EQ(ret, 0); + close(ns_fd); + + pid = fork(); + ASSERT_GE(pid, 0); + + if (pid == 0) { + /* Child process */ + close(pipefd[0]); + + /* First create new user namespace to drop privileges */ + ret = unshare(CLONE_NEWUSER); + if (ret < 0) { + write(pipefd[1], "U", + 1); /* Unable to create user namespace */ + close(pipefd[1]); + exit(0); + } + + /* Write uid/gid mappings to maintain some capabilities */ + int uid_map_fd = open("/proc/self/uid_map", O_WRONLY); + int gid_map_fd = open("/proc/self/gid_map", O_WRONLY); + int setgroups_fd = open("/proc/self/setgroups", O_WRONLY); + + if (uid_map_fd < 0 || gid_map_fd < 0 || setgroups_fd < 0) { + write(pipefd[1], "M", 1); /* Unable to set mappings */ + close(pipefd[1]); + exit(0); + } + + /* Disable setgroups to allow gid mapping */ + write(setgroups_fd, "deny", 4); + close(setgroups_fd); + + /* Map current uid/gid to root in the new namespace */ + char mapping[64]; + snprintf(mapping, sizeof(mapping), "0 %d 1", getuid()); + write(uid_map_fd, mapping, strlen(mapping)); + close(uid_map_fd); + + snprintf(mapping, sizeof(mapping), "0 %d 1", getgid()); + write(gid_map_fd, mapping, strlen(mapping)); + close(gid_map_fd); + + /* Now create new UTS namespace */ + ret = unshare(CLONE_NEWUTS); + if (ret < 0) { + write(pipefd[1], "N", + 1); /* Unable to create UTS namespace */ + close(pipefd[1]); + exit(0); + } + + /* Try to open parent's UTS namespace handle from new user+uts namespace */ + fd = open_by_handle_at(FD_NSFS_ROOT, handle, O_RDONLY); + + if (fd >= 0) { + /* Should NOT succeed - we're in a different user namespace */ + write(pipefd[1], "S", 1); /* Unexpected success */ + close(fd); + } else if (errno == ESTALE) { + /* Expected: Stale file handle */ + write(pipefd[1], "P", 1); + } else { + /* Other error */ + write(pipefd[1], "F", 1); + } + + close(pipefd[1]); + exit(0); + } + + /* Parent process */ + close(pipefd[1]); + ASSERT_EQ(read(pipefd[0], &result, 1), 1); + + waitpid(pid, &status, 0); + ASSERT_TRUE(WIFEXITED(status)); + ASSERT_EQ(WEXITSTATUS(status), 0); + + if (result == 'U') { + SKIP(free(handle); close(pipefd[0]); + return, "Cannot create new user namespace"); + } + if (result == 'M') { + SKIP(free(handle); close(pipefd[0]); + return, "Cannot set uid/gid mappings"); + } + if (result == 'N') { + SKIP(free(handle); close(pipefd[0]); + return, "Cannot create new UTS namespace"); + } + + /* Should fail with ESTALE since we're in a different user namespace */ + ASSERT_EQ(result, 'P'); + + close(pipefd[0]); + free(handle); +} + +TEST(nsfs_user_ipc_namespace_isolation) +{ + struct file_handle *handle; + int mount_id; + int ret; + int fd; + int ns_fd; + pid_t pid; + int status; + int pipefd[2]; + char result; + + handle = malloc(sizeof(*handle) + MAX_HANDLE_SZ); + ASSERT_NE(handle, NULL); + + /* Create pipe for communication */ + ASSERT_EQ(pipe(pipefd), 0); + + /* Get handle for current IPC namespace */ + ns_fd = open("/proc/self/ns/ipc", O_RDONLY); + ASSERT_GE(ns_fd, 0); + + handle->handle_bytes = MAX_HANDLE_SZ; + ret = name_to_handle_at(ns_fd, "", handle, &mount_id, AT_EMPTY_PATH); + if (ret < 0 && errno == EOPNOTSUPP) { + SKIP(free(handle); close(ns_fd); close(pipefd[0]); + close(pipefd[1]); + return, "nsfs doesn't support file handles"); + } + ASSERT_EQ(ret, 0); + close(ns_fd); + + pid = fork(); + ASSERT_GE(pid, 0); + + if (pid == 0) { + /* Child process */ + close(pipefd[0]); + + /* First create new user namespace to drop privileges */ + ret = unshare(CLONE_NEWUSER); + if (ret < 0) { + write(pipefd[1], "U", + 1); /* Unable to create user namespace */ + close(pipefd[1]); + exit(0); + } + + /* Write uid/gid mappings to maintain some capabilities */ + int uid_map_fd = open("/proc/self/uid_map", O_WRONLY); + int gid_map_fd = open("/proc/self/gid_map", O_WRONLY); + int setgroups_fd = open("/proc/self/setgroups", O_WRONLY); + + if (uid_map_fd < 0 || gid_map_fd < 0 || setgroups_fd < 0) { + write(pipefd[1], "M", 1); /* Unable to set mappings */ + close(pipefd[1]); + exit(0); + } + + /* Disable setgroups to allow gid mapping */ + write(setgroups_fd, "deny", 4); + close(setgroups_fd); + + /* Map current uid/gid to root in the new namespace */ + char mapping[64]; + snprintf(mapping, sizeof(mapping), "0 %d 1", getuid()); + write(uid_map_fd, mapping, strlen(mapping)); + close(uid_map_fd); + + snprintf(mapping, sizeof(mapping), "0 %d 1", getgid()); + write(gid_map_fd, mapping, strlen(mapping)); + close(gid_map_fd); + + /* Now create new IPC namespace */ + ret = unshare(CLONE_NEWIPC); + if (ret < 0) { + write(pipefd[1], "N", + 1); /* Unable to create IPC namespace */ + close(pipefd[1]); + exit(0); + } + + /* Try to open parent's IPC namespace handle from new user+ipc namespace */ + fd = open_by_handle_at(FD_NSFS_ROOT, handle, O_RDONLY); + + if (fd >= 0) { + /* Should NOT succeed - we're in a different user namespace */ + write(pipefd[1], "S", 1); /* Unexpected success */ + close(fd); + } else if (errno == ESTALE) { + /* Expected: Stale file handle */ + write(pipefd[1], "P", 1); + } else { + /* Other error */ + write(pipefd[1], "F", 1); + } + + close(pipefd[1]); + exit(0); + } + + /* Parent process */ + close(pipefd[1]); + ASSERT_EQ(read(pipefd[0], &result, 1), 1); + + waitpid(pid, &status, 0); + ASSERT_TRUE(WIFEXITED(status)); + ASSERT_EQ(WEXITSTATUS(status), 0); + + if (result == 'U') { + SKIP(free(handle); close(pipefd[0]); + return, "Cannot create new user namespace"); + } + if (result == 'M') { + SKIP(free(handle); close(pipefd[0]); + return, "Cannot set uid/gid mappings"); + } + if (result == 'N') { + SKIP(free(handle); close(pipefd[0]); + return, "Cannot create new IPC namespace"); + } + + /* Should fail with ESTALE since we're in a different user namespace */ + ASSERT_EQ(result, 'P'); + + close(pipefd[0]); + free(handle); +} + +TEST(nsfs_user_mnt_namespace_isolation) +{ + struct file_handle *handle; + int mount_id; + int ret; + int fd; + int ns_fd; + pid_t pid; + int status; + int pipefd[2]; + char result; + + handle = malloc(sizeof(*handle) + MAX_HANDLE_SZ); + ASSERT_NE(handle, NULL); + + /* Create pipe for communication */ + ASSERT_EQ(pipe(pipefd), 0); + + /* Get handle for current mount namespace */ + ns_fd = open("/proc/self/ns/mnt", O_RDONLY); + ASSERT_GE(ns_fd, 0); + + handle->handle_bytes = MAX_HANDLE_SZ; + ret = name_to_handle_at(ns_fd, "", handle, &mount_id, AT_EMPTY_PATH); + if (ret < 0 && errno == EOPNOTSUPP) { + SKIP(free(handle); close(ns_fd); close(pipefd[0]); + close(pipefd[1]); + return, "nsfs doesn't support file handles"); + } + ASSERT_EQ(ret, 0); + close(ns_fd); + + pid = fork(); + ASSERT_GE(pid, 0); + + if (pid == 0) { + /* Child process */ + close(pipefd[0]); + + /* First create new user namespace to drop privileges */ + ret = unshare(CLONE_NEWUSER); + if (ret < 0) { + write(pipefd[1], "U", + 1); /* Unable to create user namespace */ + close(pipefd[1]); + exit(0); + } + + /* Write uid/gid mappings to maintain some capabilities */ + int uid_map_fd = open("/proc/self/uid_map", O_WRONLY); + int gid_map_fd = open("/proc/self/gid_map", O_WRONLY); + int setgroups_fd = open("/proc/self/setgroups", O_WRONLY); + + if (uid_map_fd < 0 || gid_map_fd < 0 || setgroups_fd < 0) { + write(pipefd[1], "M", 1); /* Unable to set mappings */ + close(pipefd[1]); + exit(0); + } + + /* Disable setgroups to allow gid mapping */ + write(setgroups_fd, "deny", 4); + close(setgroups_fd); + + /* Map current uid/gid to root in the new namespace */ + char mapping[64]; + snprintf(mapping, sizeof(mapping), "0 %d 1", getuid()); + write(uid_map_fd, mapping, strlen(mapping)); + close(uid_map_fd); + + snprintf(mapping, sizeof(mapping), "0 %d 1", getgid()); + write(gid_map_fd, mapping, strlen(mapping)); + close(gid_map_fd); + + /* Now create new mount namespace */ + ret = unshare(CLONE_NEWNS); + if (ret < 0) { + write(pipefd[1], "N", + 1); /* Unable to create mount namespace */ + close(pipefd[1]); + exit(0); + } + + /* Try to open parent's mount namespace handle from new user+mnt namespace */ + fd = open_by_handle_at(FD_NSFS_ROOT, handle, O_RDONLY); + + if (fd >= 0) { + /* Should NOT succeed - we're in a different user namespace */ + write(pipefd[1], "S", 1); /* Unexpected success */ + close(fd); + } else if (errno == ESTALE) { + /* Expected: Stale file handle */ + write(pipefd[1], "P", 1); + } else { + /* Other error */ + write(pipefd[1], "F", 1); + } + + close(pipefd[1]); + exit(0); + } + + /* Parent process */ + close(pipefd[1]); + ASSERT_EQ(read(pipefd[0], &result, 1), 1); + + waitpid(pid, &status, 0); + ASSERT_TRUE(WIFEXITED(status)); + ASSERT_EQ(WEXITSTATUS(status), 0); + + if (result == 'U') { + SKIP(free(handle); close(pipefd[0]); + return, "Cannot create new user namespace"); + } + if (result == 'M') { + SKIP(free(handle); close(pipefd[0]); + return, "Cannot set uid/gid mappings"); + } + if (result == 'N') { + SKIP(free(handle); close(pipefd[0]); + return, "Cannot create new mount namespace"); + } + + /* Should fail with ESTALE since we're in a different user namespace */ + ASSERT_EQ(result, 'P'); + + close(pipefd[0]); + free(handle); +} + +TEST(nsfs_user_cgroup_namespace_isolation) +{ + struct file_handle *handle; + int mount_id; + int ret; + int fd; + int ns_fd; + pid_t pid; + int status; + int pipefd[2]; + char result; + + handle = malloc(sizeof(*handle) + MAX_HANDLE_SZ); + ASSERT_NE(handle, NULL); + + /* Create pipe for communication */ + ASSERT_EQ(pipe(pipefd), 0); + + /* Get handle for current cgroup namespace */ + ns_fd = open("/proc/self/ns/cgroup", O_RDONLY); + if (ns_fd < 0) { + SKIP(free(handle); close(pipefd[0]); close(pipefd[1]); + return, "cgroup namespace not available"); + } + + handle->handle_bytes = MAX_HANDLE_SZ; + ret = name_to_handle_at(ns_fd, "", handle, &mount_id, AT_EMPTY_PATH); + if (ret < 0 && errno == EOPNOTSUPP) { + SKIP(free(handle); close(ns_fd); close(pipefd[0]); + close(pipefd[1]); + return, "nsfs doesn't support file handles"); + } + ASSERT_EQ(ret, 0); + close(ns_fd); + + pid = fork(); + ASSERT_GE(pid, 0); + + if (pid == 0) { + /* Child process */ + close(pipefd[0]); + + /* First create new user namespace to drop privileges */ + ret = unshare(CLONE_NEWUSER); + if (ret < 0) { + write(pipefd[1], "U", + 1); /* Unable to create user namespace */ + close(pipefd[1]); + exit(0); + } + + /* Write uid/gid mappings to maintain some capabilities */ + int uid_map_fd = open("/proc/self/uid_map", O_WRONLY); + int gid_map_fd = open("/proc/self/gid_map", O_WRONLY); + int setgroups_fd = open("/proc/self/setgroups", O_WRONLY); + + if (uid_map_fd < 0 || gid_map_fd < 0 || setgroups_fd < 0) { + write(pipefd[1], "M", 1); /* Unable to set mappings */ + close(pipefd[1]); + exit(0); + } + + /* Disable setgroups to allow gid mapping */ + write(setgroups_fd, "deny", 4); + close(setgroups_fd); + + /* Map current uid/gid to root in the new namespace */ + char mapping[64]; + snprintf(mapping, sizeof(mapping), "0 %d 1", getuid()); + write(uid_map_fd, mapping, strlen(mapping)); + close(uid_map_fd); + + snprintf(mapping, sizeof(mapping), "0 %d 1", getgid()); + write(gid_map_fd, mapping, strlen(mapping)); + close(gid_map_fd); + + /* Now create new cgroup namespace */ + ret = unshare(CLONE_NEWCGROUP); + if (ret < 0) { + write(pipefd[1], "N", + 1); /* Unable to create cgroup namespace */ + close(pipefd[1]); + exit(0); + } + + /* Try to open parent's cgroup namespace handle from new user+cgroup namespace */ + fd = open_by_handle_at(FD_NSFS_ROOT, handle, O_RDONLY); + + if (fd >= 0) { + /* Should NOT succeed - we're in a different user namespace */ + write(pipefd[1], "S", 1); /* Unexpected success */ + close(fd); + } else if (errno == ESTALE) { + /* Expected: Stale file handle */ + write(pipefd[1], "P", 1); + } else { + /* Other error */ + write(pipefd[1], "F", 1); + } + + close(pipefd[1]); + exit(0); + } + + /* Parent process */ + close(pipefd[1]); + ASSERT_EQ(read(pipefd[0], &result, 1), 1); + + waitpid(pid, &status, 0); + ASSERT_TRUE(WIFEXITED(status)); + ASSERT_EQ(WEXITSTATUS(status), 0); + + if (result == 'U') { + SKIP(free(handle); close(pipefd[0]); + return, "Cannot create new user namespace"); + } + if (result == 'M') { + SKIP(free(handle); close(pipefd[0]); + return, "Cannot set uid/gid mappings"); + } + if (result == 'N') { + SKIP(free(handle); close(pipefd[0]); + return, "Cannot create new cgroup namespace"); + } + + /* Should fail with ESTALE since we're in a different user namespace */ + ASSERT_EQ(result, 'P'); + + close(pipefd[0]); + free(handle); +} + +TEST(nsfs_user_pid_namespace_isolation) +{ + struct file_handle *handle; + int mount_id; + int ret; + int fd; + int ns_fd; + pid_t pid; + int status; + int pipefd[2]; + char result; + + handle = malloc(sizeof(*handle) + MAX_HANDLE_SZ); + ASSERT_NE(handle, NULL); + + /* Create pipe for communication */ + ASSERT_EQ(pipe(pipefd), 0); + + /* Get handle for current PID namespace */ + ns_fd = open("/proc/self/ns/pid", O_RDONLY); + ASSERT_GE(ns_fd, 0); + + handle->handle_bytes = MAX_HANDLE_SZ; + ret = name_to_handle_at(ns_fd, "", handle, &mount_id, AT_EMPTY_PATH); + if (ret < 0 && errno == EOPNOTSUPP) { + SKIP(free(handle); close(ns_fd); close(pipefd[0]); + close(pipefd[1]); + return, "nsfs doesn't support file handles"); + } + ASSERT_EQ(ret, 0); + close(ns_fd); + + pid = fork(); + ASSERT_GE(pid, 0); + + if (pid == 0) { + /* Child process */ + close(pipefd[0]); + + /* First create new user namespace to drop privileges */ + ret = unshare(CLONE_NEWUSER); + if (ret < 0) { + write(pipefd[1], "U", + 1); /* Unable to create user namespace */ + close(pipefd[1]); + exit(0); + } + + /* Write uid/gid mappings to maintain some capabilities */ + int uid_map_fd = open("/proc/self/uid_map", O_WRONLY); + int gid_map_fd = open("/proc/self/gid_map", O_WRONLY); + int setgroups_fd = open("/proc/self/setgroups", O_WRONLY); + + if (uid_map_fd < 0 || gid_map_fd < 0 || setgroups_fd < 0) { + write(pipefd[1], "M", 1); /* Unable to set mappings */ + close(pipefd[1]); + exit(0); + } + + /* Disable setgroups to allow gid mapping */ + write(setgroups_fd, "deny", 4); + close(setgroups_fd); + + /* Map current uid/gid to root in the new namespace */ + char mapping[64]; + snprintf(mapping, sizeof(mapping), "0 %d 1", getuid()); + write(uid_map_fd, mapping, strlen(mapping)); + close(uid_map_fd); + + snprintf(mapping, sizeof(mapping), "0 %d 1", getgid()); + write(gid_map_fd, mapping, strlen(mapping)); + close(gid_map_fd); + + /* Now create new PID namespace - requires fork to take effect */ + ret = unshare(CLONE_NEWPID); + if (ret < 0) { + write(pipefd[1], "N", + 1); /* Unable to create PID namespace */ + close(pipefd[1]); + exit(0); + } + + /* Fork again for PID namespace to take effect */ + pid_t child_pid = fork(); + if (child_pid < 0) { + write(pipefd[1], "N", + 1); /* Unable to fork in PID namespace */ + close(pipefd[1]); + exit(0); + } + + if (child_pid == 0) { + /* Grandchild in new PID namespace */ + /* Try to open parent's PID namespace handle from new user+pid namespace */ + fd = open_by_handle_at(FD_NSFS_ROOT, handle, O_RDONLY); + + if (fd >= 0) { + /* Should NOT succeed - we're in a different user namespace */ + write(pipefd[1], "S", + 1); /* Unexpected success */ + close(fd); + } else if (errno == ESTALE) { + /* Expected: Stale file handle */ + write(pipefd[1], "P", 1); + } else { + /* Other error */ + write(pipefd[1], "F", 1); + } + + close(pipefd[1]); + exit(0); + } + + /* Wait for grandchild */ + waitpid(child_pid, NULL, 0); + exit(0); + } + + /* Parent process */ + close(pipefd[1]); + ASSERT_EQ(read(pipefd[0], &result, 1), 1); + + waitpid(pid, &status, 0); + ASSERT_TRUE(WIFEXITED(status)); + ASSERT_EQ(WEXITSTATUS(status), 0); + + if (result == 'U') { + SKIP(free(handle); close(pipefd[0]); + return, "Cannot create new user namespace"); + } + if (result == 'M') { + SKIP(free(handle); close(pipefd[0]); + return, "Cannot set uid/gid mappings"); + } + if (result == 'N') { + SKIP(free(handle); close(pipefd[0]); + return, "Cannot create new PID namespace"); + } + + /* Should fail with ESTALE since we're in a different user namespace */ + ASSERT_EQ(result, 'P'); + + close(pipefd[0]); + free(handle); +} + +TEST(nsfs_user_time_namespace_isolation) +{ + struct file_handle *handle; + int mount_id; + int ret; + int fd; + int ns_fd; + pid_t pid; + int status; + int pipefd[2]; + char result; + + handle = malloc(sizeof(*handle) + MAX_HANDLE_SZ); + ASSERT_NE(handle, NULL); + + /* Create pipe for communication */ + ASSERT_EQ(pipe(pipefd), 0); + + /* Get handle for current time namespace */ + ns_fd = open("/proc/self/ns/time", O_RDONLY); + if (ns_fd < 0) { + SKIP(free(handle); close(pipefd[0]); close(pipefd[1]); + return, "time namespace not available"); + } + + handle->handle_bytes = MAX_HANDLE_SZ; + ret = name_to_handle_at(ns_fd, "", handle, &mount_id, AT_EMPTY_PATH); + if (ret < 0 && errno == EOPNOTSUPP) { + SKIP(free(handle); close(ns_fd); close(pipefd[0]); + close(pipefd[1]); + return, "nsfs doesn't support file handles"); + } + ASSERT_EQ(ret, 0); + close(ns_fd); + + pid = fork(); + ASSERT_GE(pid, 0); + + if (pid == 0) { + /* Child process */ + close(pipefd[0]); + + /* First create new user namespace to drop privileges */ + ret = unshare(CLONE_NEWUSER); + if (ret < 0) { + write(pipefd[1], "U", + 1); /* Unable to create user namespace */ + close(pipefd[1]); + exit(0); + } + + /* Write uid/gid mappings to maintain some capabilities */ + int uid_map_fd = open("/proc/self/uid_map", O_WRONLY); + int gid_map_fd = open("/proc/self/gid_map", O_WRONLY); + int setgroups_fd = open("/proc/self/setgroups", O_WRONLY); + + if (uid_map_fd < 0 || gid_map_fd < 0 || setgroups_fd < 0) { + write(pipefd[1], "M", 1); /* Unable to set mappings */ + close(pipefd[1]); + exit(0); + } + + /* Disable setgroups to allow gid mapping */ + write(setgroups_fd, "deny", 4); + close(setgroups_fd); + + /* Map current uid/gid to root in the new namespace */ + char mapping[64]; + snprintf(mapping, sizeof(mapping), "0 %d 1", getuid()); + write(uid_map_fd, mapping, strlen(mapping)); + close(uid_map_fd); + + snprintf(mapping, sizeof(mapping), "0 %d 1", getgid()); + write(gid_map_fd, mapping, strlen(mapping)); + close(gid_map_fd); + + /* Now create new time namespace - requires fork to take effect */ + ret = unshare(CLONE_NEWTIME); + if (ret < 0) { + write(pipefd[1], "N", + 1); /* Unable to create time namespace */ + close(pipefd[1]); + exit(0); + } + + /* Fork again for time namespace to take effect */ + pid_t child_pid = fork(); + if (child_pid < 0) { + write(pipefd[1], "N", + 1); /* Unable to fork in time namespace */ + close(pipefd[1]); + exit(0); + } + + if (child_pid == 0) { + /* Grandchild in new time namespace */ + /* Try to open parent's time namespace handle from new user+time namespace */ + fd = open_by_handle_at(FD_NSFS_ROOT, handle, O_RDONLY); + + if (fd >= 0) { + /* Should NOT succeed - we're in a different user namespace */ + write(pipefd[1], "S", + 1); /* Unexpected success */ + close(fd); + } else if (errno == ESTALE) { + /* Expected: Stale file handle */ + write(pipefd[1], "P", 1); + } else { + /* Other error */ + write(pipefd[1], "F", 1); + } + + close(pipefd[1]); + exit(0); + } + + /* Wait for grandchild */ + waitpid(child_pid, NULL, 0); + exit(0); + } + + /* Parent process */ + close(pipefd[1]); + ASSERT_EQ(read(pipefd[0], &result, 1), 1); + + waitpid(pid, &status, 0); + ASSERT_TRUE(WIFEXITED(status)); + ASSERT_EQ(WEXITSTATUS(status), 0); + + if (result == 'U') { + SKIP(free(handle); close(pipefd[0]); + return, "Cannot create new user namespace"); + } + if (result == 'M') { + SKIP(free(handle); close(pipefd[0]); + return, "Cannot set uid/gid mappings"); + } + if (result == 'N') { + SKIP(free(handle); close(pipefd[0]); + return, "Cannot create new time namespace"); + } + + /* Should fail with ESTALE since we're in a different user namespace */ + ASSERT_EQ(result, 'P'); + + close(pipefd[0]); + free(handle); +} + +TEST(nsfs_open_flags) +{ + struct file_handle *handle; + int mount_id; + int ret; + int fd; + int ns_fd; + + handle = malloc(sizeof(*handle) + MAX_HANDLE_SZ); + ASSERT_NE(handle, NULL); + + /* Open a namespace file descriptor */ + ns_fd = open("/proc/self/ns/net", O_RDONLY); + ASSERT_GE(ns_fd, 0); + + /* Get handle for the namespace */ + handle->handle_bytes = MAX_HANDLE_SZ; + ret = name_to_handle_at(ns_fd, "", handle, &mount_id, AT_EMPTY_PATH); + if (ret < 0 && errno == EOPNOTSUPP) { + SKIP(free(handle); close(ns_fd); + return, "nsfs doesn't support file handles"); + } + ASSERT_EQ(ret, 0); + ASSERT_GT(handle->handle_bytes, 0); + + /* Test invalid flags that should fail */ + fd = open_by_handle_at(FD_NSFS_ROOT, handle, O_WRONLY); + ASSERT_LT(fd, 0); + ASSERT_EQ(errno, EPERM); + + fd = open_by_handle_at(FD_NSFS_ROOT, handle, O_RDWR); + ASSERT_LT(fd, 0); + ASSERT_EQ(errno, EPERM); + + fd = open_by_handle_at(FD_NSFS_ROOT, handle, O_TRUNC); + ASSERT_LT(fd, 0); + ASSERT_EQ(errno, EPERM); + + fd = open_by_handle_at(FD_NSFS_ROOT, handle, O_DIRECT); + ASSERT_LT(fd, 0); + ASSERT_EQ(errno, EINVAL); + + fd = open_by_handle_at(FD_NSFS_ROOT, handle, O_TMPFILE); + ASSERT_LT(fd, 0); + ASSERT_EQ(errno, EINVAL); + + fd = open_by_handle_at(FD_NSFS_ROOT, handle, O_DIRECTORY); + ASSERT_LT(fd, 0); + ASSERT_EQ(errno, ENOTDIR); + + close(ns_fd); + free(handle); +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/namespaces/init_ino_test.c b/tools/testing/selftests/namespaces/init_ino_test.c new file mode 100644 index 000000000000..5b6993c3740b --- /dev/null +++ b/tools/testing/selftests/namespaces/init_ino_test.c @@ -0,0 +1,61 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +// Copyright (c) 2025 Christian Brauner <brauner@kernel.org> + +#define _GNU_SOURCE +#include <fcntl.h> +#include <stdio.h> +#include <stdlib.h> +#include <sys/stat.h> +#include <unistd.h> +#include <errno.h> +#include <string.h> +#include <linux/nsfs.h> + +#include "../kselftest_harness.h" + +struct ns_info { + const char *name; + const char *proc_path; + unsigned int expected_ino; +}; + +static struct ns_info namespaces[] = { + { "ipc", "/proc/1/ns/ipc", IPC_NS_INIT_INO }, + { "uts", "/proc/1/ns/uts", UTS_NS_INIT_INO }, + { "user", "/proc/1/ns/user", USER_NS_INIT_INO }, + { "pid", "/proc/1/ns/pid", PID_NS_INIT_INO }, + { "cgroup", "/proc/1/ns/cgroup", CGROUP_NS_INIT_INO }, + { "time", "/proc/1/ns/time", TIME_NS_INIT_INO }, + { "net", "/proc/1/ns/net", NET_NS_INIT_INO }, + { "mnt", "/proc/1/ns/mnt", MNT_NS_INIT_INO }, +}; + +TEST(init_namespace_inodes) +{ + struct stat st; + + for (int i = 0; i < sizeof(namespaces) / sizeof(namespaces[0]); i++) { + int ret = stat(namespaces[i].proc_path, &st); + + /* Some namespaces might not be available (e.g., time namespace on older kernels) */ + if (ret < 0) { + if (errno == ENOENT) { + ksft_test_result_skip("%s namespace not available\n", + namespaces[i].name); + continue; + } + ASSERT_GE(ret, 0) + TH_LOG("Failed to stat %s: %s", + namespaces[i].proc_path, strerror(errno)); + } + + ASSERT_EQ(st.st_ino, namespaces[i].expected_ino) + TH_LOG("Namespace %s has inode 0x%lx, expected 0x%x", + namespaces[i].name, st.st_ino, namespaces[i].expected_ino); + + ksft_print_msg("Namespace %s: inode 0x%lx matches expected 0x%x\n", + namespaces[i].name, st.st_ino, namespaces[i].expected_ino); + } +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/namespaces/nsid_test.c b/tools/testing/selftests/namespaces/nsid_test.c new file mode 100644 index 000000000000..e28accd74a57 --- /dev/null +++ b/tools/testing/selftests/namespaces/nsid_test.c @@ -0,0 +1,986 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <assert.h> +#include <fcntl.h> +#include <inttypes.h> +#include <libgen.h> +#include <limits.h> +#include <pthread.h> +#include <string.h> +#include <sys/mount.h> +#include <poll.h> +#include <sys/epoll.h> +#include <sys/resource.h> +#include <sys/stat.h> +#include <sys/socket.h> +#include <sys/un.h> +#include <unistd.h> +#include <linux/fs.h> +#include <linux/limits.h> +#include <linux/nsfs.h> +#include "../kselftest_harness.h" + +TEST(nsid_mntns_basic) +{ + __u64 mnt_ns_id = 0; + int fd_mntns; + int ret; + + /* Open the current mount namespace */ + fd_mntns = open("/proc/self/ns/mnt", O_RDONLY); + ASSERT_GE(fd_mntns, 0); + + /* Get the mount namespace ID */ + ret = ioctl(fd_mntns, NS_GET_MNTNS_ID, &mnt_ns_id); + ASSERT_EQ(ret, 0); + ASSERT_NE(mnt_ns_id, 0); + + /* Verify we can get the same ID again */ + __u64 mnt_ns_id2 = 0; + ret = ioctl(fd_mntns, NS_GET_ID, &mnt_ns_id2); + ASSERT_EQ(ret, 0); + ASSERT_EQ(mnt_ns_id, mnt_ns_id2); + + close(fd_mntns); +} + +TEST(nsid_mntns_separate) +{ + __u64 parent_mnt_ns_id = 0; + __u64 child_mnt_ns_id = 0; + int fd_parent_mntns, fd_child_mntns; + int ret; + pid_t pid; + int pipefd[2]; + + /* Get parent's mount namespace ID */ + fd_parent_mntns = open("/proc/self/ns/mnt", O_RDONLY); + ASSERT_GE(fd_parent_mntns, 0); + ret = ioctl(fd_parent_mntns, NS_GET_ID, &parent_mnt_ns_id); + ASSERT_EQ(ret, 0); + ASSERT_NE(parent_mnt_ns_id, 0); + + /* Create a pipe for synchronization */ + ASSERT_EQ(pipe(pipefd), 0); + + pid = fork(); + ASSERT_GE(pid, 0); + + if (pid == 0) { + /* Child process */ + close(pipefd[0]); + + /* Create new mount namespace */ + ret = unshare(CLONE_NEWNS); + if (ret != 0) { + /* Skip test if we don't have permission */ + if (errno == EPERM || errno == EACCES) { + write(pipefd[1], "S", 1); /* Signal skip */ + _exit(0); + } + _exit(1); + } + + /* Signal success */ + write(pipefd[1], "Y", 1); + close(pipefd[1]); + + /* Keep namespace alive */ + pause(); + _exit(0); + } + + /* Parent process */ + close(pipefd[1]); + + char buf; + ASSERT_EQ(read(pipefd[0], &buf, 1), 1); + close(pipefd[0]); + + if (buf == 'S') { + /* Child couldn't create namespace, skip test */ + kill(pid, SIGTERM); + waitpid(pid, NULL, 0); + close(fd_parent_mntns); + SKIP(return, "No permission to create mount namespace"); + } + + ASSERT_EQ(buf, 'Y'); + + /* Open child's mount namespace */ + char path[256]; + snprintf(path, sizeof(path), "/proc/%d/ns/mnt", pid); + fd_child_mntns = open(path, O_RDONLY); + ASSERT_GE(fd_child_mntns, 0); + + /* Get child's mount namespace ID */ + ret = ioctl(fd_child_mntns, NS_GET_ID, &child_mnt_ns_id); + ASSERT_EQ(ret, 0); + ASSERT_NE(child_mnt_ns_id, 0); + + /* Parent and child should have different mount namespace IDs */ + ASSERT_NE(parent_mnt_ns_id, child_mnt_ns_id); + + close(fd_parent_mntns); + close(fd_child_mntns); + + /* Clean up child process */ + kill(pid, SIGTERM); + waitpid(pid, NULL, 0); +} + +TEST(nsid_cgroupns_basic) +{ + __u64 cgroup_ns_id = 0; + int fd_cgroupns; + int ret; + + /* Open the current cgroup namespace */ + fd_cgroupns = open("/proc/self/ns/cgroup", O_RDONLY); + ASSERT_GE(fd_cgroupns, 0); + + /* Get the cgroup namespace ID */ + ret = ioctl(fd_cgroupns, NS_GET_ID, &cgroup_ns_id); + ASSERT_EQ(ret, 0); + ASSERT_NE(cgroup_ns_id, 0); + + /* Verify we can get the same ID again */ + __u64 cgroup_ns_id2 = 0; + ret = ioctl(fd_cgroupns, NS_GET_ID, &cgroup_ns_id2); + ASSERT_EQ(ret, 0); + ASSERT_EQ(cgroup_ns_id, cgroup_ns_id2); + + close(fd_cgroupns); +} + +TEST(nsid_cgroupns_separate) +{ + __u64 parent_cgroup_ns_id = 0; + __u64 child_cgroup_ns_id = 0; + int fd_parent_cgroupns, fd_child_cgroupns; + int ret; + pid_t pid; + int pipefd[2]; + + /* Get parent's cgroup namespace ID */ + fd_parent_cgroupns = open("/proc/self/ns/cgroup", O_RDONLY); + ASSERT_GE(fd_parent_cgroupns, 0); + ret = ioctl(fd_parent_cgroupns, NS_GET_ID, &parent_cgroup_ns_id); + ASSERT_EQ(ret, 0); + ASSERT_NE(parent_cgroup_ns_id, 0); + + /* Create a pipe for synchronization */ + ASSERT_EQ(pipe(pipefd), 0); + + pid = fork(); + ASSERT_GE(pid, 0); + + if (pid == 0) { + /* Child process */ + close(pipefd[0]); + + /* Create new cgroup namespace */ + ret = unshare(CLONE_NEWCGROUP); + if (ret != 0) { + /* Skip test if we don't have permission */ + if (errno == EPERM || errno == EACCES) { + write(pipefd[1], "S", 1); /* Signal skip */ + _exit(0); + } + _exit(1); + } + + /* Signal success */ + write(pipefd[1], "Y", 1); + close(pipefd[1]); + + /* Keep namespace alive */ + pause(); + _exit(0); + } + + /* Parent process */ + close(pipefd[1]); + + char buf; + ASSERT_EQ(read(pipefd[0], &buf, 1), 1); + close(pipefd[0]); + + if (buf == 'S') { + /* Child couldn't create namespace, skip test */ + kill(pid, SIGTERM); + waitpid(pid, NULL, 0); + close(fd_parent_cgroupns); + SKIP(return, "No permission to create cgroup namespace"); + } + + ASSERT_EQ(buf, 'Y'); + + /* Open child's cgroup namespace */ + char path[256]; + snprintf(path, sizeof(path), "/proc/%d/ns/cgroup", pid); + fd_child_cgroupns = open(path, O_RDONLY); + ASSERT_GE(fd_child_cgroupns, 0); + + /* Get child's cgroup namespace ID */ + ret = ioctl(fd_child_cgroupns, NS_GET_ID, &child_cgroup_ns_id); + ASSERT_EQ(ret, 0); + ASSERT_NE(child_cgroup_ns_id, 0); + + /* Parent and child should have different cgroup namespace IDs */ + ASSERT_NE(parent_cgroup_ns_id, child_cgroup_ns_id); + + close(fd_parent_cgroupns); + close(fd_child_cgroupns); + + /* Clean up child process */ + kill(pid, SIGTERM); + waitpid(pid, NULL, 0); +} + +TEST(nsid_ipcns_basic) +{ + __u64 ipc_ns_id = 0; + int fd_ipcns; + int ret; + + /* Open the current IPC namespace */ + fd_ipcns = open("/proc/self/ns/ipc", O_RDONLY); + ASSERT_GE(fd_ipcns, 0); + + /* Get the IPC namespace ID */ + ret = ioctl(fd_ipcns, NS_GET_ID, &ipc_ns_id); + ASSERT_EQ(ret, 0); + ASSERT_NE(ipc_ns_id, 0); + + /* Verify we can get the same ID again */ + __u64 ipc_ns_id2 = 0; + ret = ioctl(fd_ipcns, NS_GET_ID, &ipc_ns_id2); + ASSERT_EQ(ret, 0); + ASSERT_EQ(ipc_ns_id, ipc_ns_id2); + + close(fd_ipcns); +} + +TEST(nsid_ipcns_separate) +{ + __u64 parent_ipc_ns_id = 0; + __u64 child_ipc_ns_id = 0; + int fd_parent_ipcns, fd_child_ipcns; + int ret; + pid_t pid; + int pipefd[2]; + + /* Get parent's IPC namespace ID */ + fd_parent_ipcns = open("/proc/self/ns/ipc", O_RDONLY); + ASSERT_GE(fd_parent_ipcns, 0); + ret = ioctl(fd_parent_ipcns, NS_GET_ID, &parent_ipc_ns_id); + ASSERT_EQ(ret, 0); + ASSERT_NE(parent_ipc_ns_id, 0); + + /* Create a pipe for synchronization */ + ASSERT_EQ(pipe(pipefd), 0); + + pid = fork(); + ASSERT_GE(pid, 0); + + if (pid == 0) { + /* Child process */ + close(pipefd[0]); + + /* Create new IPC namespace */ + ret = unshare(CLONE_NEWIPC); + if (ret != 0) { + /* Skip test if we don't have permission */ + if (errno == EPERM || errno == EACCES) { + write(pipefd[1], "S", 1); /* Signal skip */ + _exit(0); + } + _exit(1); + } + + /* Signal success */ + write(pipefd[1], "Y", 1); + close(pipefd[1]); + + /* Keep namespace alive */ + pause(); + _exit(0); + } + + /* Parent process */ + close(pipefd[1]); + + char buf; + ASSERT_EQ(read(pipefd[0], &buf, 1), 1); + close(pipefd[0]); + + if (buf == 'S') { + /* Child couldn't create namespace, skip test */ + kill(pid, SIGTERM); + waitpid(pid, NULL, 0); + close(fd_parent_ipcns); + SKIP(return, "No permission to create IPC namespace"); + } + + ASSERT_EQ(buf, 'Y'); + + /* Open child's IPC namespace */ + char path[256]; + snprintf(path, sizeof(path), "/proc/%d/ns/ipc", pid); + fd_child_ipcns = open(path, O_RDONLY); + ASSERT_GE(fd_child_ipcns, 0); + + /* Get child's IPC namespace ID */ + ret = ioctl(fd_child_ipcns, NS_GET_ID, &child_ipc_ns_id); + ASSERT_EQ(ret, 0); + ASSERT_NE(child_ipc_ns_id, 0); + + /* Parent and child should have different IPC namespace IDs */ + ASSERT_NE(parent_ipc_ns_id, child_ipc_ns_id); + + close(fd_parent_ipcns); + close(fd_child_ipcns); + + /* Clean up child process */ + kill(pid, SIGTERM); + waitpid(pid, NULL, 0); +} + +TEST(nsid_utsns_basic) +{ + __u64 uts_ns_id = 0; + int fd_utsns; + int ret; + + /* Open the current UTS namespace */ + fd_utsns = open("/proc/self/ns/uts", O_RDONLY); + ASSERT_GE(fd_utsns, 0); + + /* Get the UTS namespace ID */ + ret = ioctl(fd_utsns, NS_GET_ID, &uts_ns_id); + ASSERT_EQ(ret, 0); + ASSERT_NE(uts_ns_id, 0); + + /* Verify we can get the same ID again */ + __u64 uts_ns_id2 = 0; + ret = ioctl(fd_utsns, NS_GET_ID, &uts_ns_id2); + ASSERT_EQ(ret, 0); + ASSERT_EQ(uts_ns_id, uts_ns_id2); + + close(fd_utsns); +} + +TEST(nsid_utsns_separate) +{ + __u64 parent_uts_ns_id = 0; + __u64 child_uts_ns_id = 0; + int fd_parent_utsns, fd_child_utsns; + int ret; + pid_t pid; + int pipefd[2]; + + /* Get parent's UTS namespace ID */ + fd_parent_utsns = open("/proc/self/ns/uts", O_RDONLY); + ASSERT_GE(fd_parent_utsns, 0); + ret = ioctl(fd_parent_utsns, NS_GET_ID, &parent_uts_ns_id); + ASSERT_EQ(ret, 0); + ASSERT_NE(parent_uts_ns_id, 0); + + /* Create a pipe for synchronization */ + ASSERT_EQ(pipe(pipefd), 0); + + pid = fork(); + ASSERT_GE(pid, 0); + + if (pid == 0) { + /* Child process */ + close(pipefd[0]); + + /* Create new UTS namespace */ + ret = unshare(CLONE_NEWUTS); + if (ret != 0) { + /* Skip test if we don't have permission */ + if (errno == EPERM || errno == EACCES) { + write(pipefd[1], "S", 1); /* Signal skip */ + _exit(0); + } + _exit(1); + } + + /* Signal success */ + write(pipefd[1], "Y", 1); + close(pipefd[1]); + + /* Keep namespace alive */ + pause(); + _exit(0); + } + + /* Parent process */ + close(pipefd[1]); + + char buf; + ASSERT_EQ(read(pipefd[0], &buf, 1), 1); + close(pipefd[0]); + + if (buf == 'S') { + /* Child couldn't create namespace, skip test */ + kill(pid, SIGTERM); + waitpid(pid, NULL, 0); + close(fd_parent_utsns); + SKIP(return, "No permission to create UTS namespace"); + } + + ASSERT_EQ(buf, 'Y'); + + /* Open child's UTS namespace */ + char path[256]; + snprintf(path, sizeof(path), "/proc/%d/ns/uts", pid); + fd_child_utsns = open(path, O_RDONLY); + ASSERT_GE(fd_child_utsns, 0); + + /* Get child's UTS namespace ID */ + ret = ioctl(fd_child_utsns, NS_GET_ID, &child_uts_ns_id); + ASSERT_EQ(ret, 0); + ASSERT_NE(child_uts_ns_id, 0); + + /* Parent and child should have different UTS namespace IDs */ + ASSERT_NE(parent_uts_ns_id, child_uts_ns_id); + + close(fd_parent_utsns); + close(fd_child_utsns); + + /* Clean up child process */ + kill(pid, SIGTERM); + waitpid(pid, NULL, 0); +} + +TEST(nsid_userns_basic) +{ + __u64 user_ns_id = 0; + int fd_userns; + int ret; + + /* Open the current user namespace */ + fd_userns = open("/proc/self/ns/user", O_RDONLY); + ASSERT_GE(fd_userns, 0); + + /* Get the user namespace ID */ + ret = ioctl(fd_userns, NS_GET_ID, &user_ns_id); + ASSERT_EQ(ret, 0); + ASSERT_NE(user_ns_id, 0); + + /* Verify we can get the same ID again */ + __u64 user_ns_id2 = 0; + ret = ioctl(fd_userns, NS_GET_ID, &user_ns_id2); + ASSERT_EQ(ret, 0); + ASSERT_EQ(user_ns_id, user_ns_id2); + + close(fd_userns); +} + +TEST(nsid_userns_separate) +{ + __u64 parent_user_ns_id = 0; + __u64 child_user_ns_id = 0; + int fd_parent_userns, fd_child_userns; + int ret; + pid_t pid; + int pipefd[2]; + + /* Get parent's user namespace ID */ + fd_parent_userns = open("/proc/self/ns/user", O_RDONLY); + ASSERT_GE(fd_parent_userns, 0); + ret = ioctl(fd_parent_userns, NS_GET_ID, &parent_user_ns_id); + ASSERT_EQ(ret, 0); + ASSERT_NE(parent_user_ns_id, 0); + + /* Create a pipe for synchronization */ + ASSERT_EQ(pipe(pipefd), 0); + + pid = fork(); + ASSERT_GE(pid, 0); + + if (pid == 0) { + /* Child process */ + close(pipefd[0]); + + /* Create new user namespace */ + ret = unshare(CLONE_NEWUSER); + if (ret != 0) { + /* Skip test if we don't have permission */ + if (errno == EPERM || errno == EACCES) { + write(pipefd[1], "S", 1); /* Signal skip */ + _exit(0); + } + _exit(1); + } + + /* Signal success */ + write(pipefd[1], "Y", 1); + close(pipefd[1]); + + /* Keep namespace alive */ + pause(); + _exit(0); + } + + /* Parent process */ + close(pipefd[1]); + + char buf; + ASSERT_EQ(read(pipefd[0], &buf, 1), 1); + close(pipefd[0]); + + if (buf == 'S') { + /* Child couldn't create namespace, skip test */ + kill(pid, SIGTERM); + waitpid(pid, NULL, 0); + close(fd_parent_userns); + SKIP(return, "No permission to create user namespace"); + } + + ASSERT_EQ(buf, 'Y'); + + /* Open child's user namespace */ + char path[256]; + snprintf(path, sizeof(path), "/proc/%d/ns/user", pid); + fd_child_userns = open(path, O_RDONLY); + ASSERT_GE(fd_child_userns, 0); + + /* Get child's user namespace ID */ + ret = ioctl(fd_child_userns, NS_GET_ID, &child_user_ns_id); + ASSERT_EQ(ret, 0); + ASSERT_NE(child_user_ns_id, 0); + + /* Parent and child should have different user namespace IDs */ + ASSERT_NE(parent_user_ns_id, child_user_ns_id); + + close(fd_parent_userns); + close(fd_child_userns); + + /* Clean up child process */ + kill(pid, SIGTERM); + waitpid(pid, NULL, 0); +} + +TEST(nsid_timens_basic) +{ + __u64 time_ns_id = 0; + int fd_timens; + int ret; + + /* Open the current time namespace */ + fd_timens = open("/proc/self/ns/time", O_RDONLY); + if (fd_timens < 0) { + SKIP(return, "Time namespaces not supported"); + } + + /* Get the time namespace ID */ + ret = ioctl(fd_timens, NS_GET_ID, &time_ns_id); + ASSERT_EQ(ret, 0); + ASSERT_NE(time_ns_id, 0); + + /* Verify we can get the same ID again */ + __u64 time_ns_id2 = 0; + ret = ioctl(fd_timens, NS_GET_ID, &time_ns_id2); + ASSERT_EQ(ret, 0); + ASSERT_EQ(time_ns_id, time_ns_id2); + + close(fd_timens); +} + +TEST(nsid_timens_separate) +{ + __u64 parent_time_ns_id = 0; + __u64 child_time_ns_id = 0; + int fd_parent_timens, fd_child_timens; + int ret; + pid_t pid; + int pipefd[2]; + + /* Open the current time namespace */ + fd_parent_timens = open("/proc/self/ns/time", O_RDONLY); + if (fd_parent_timens < 0) { + SKIP(return, "Time namespaces not supported"); + } + + /* Get parent's time namespace ID */ + ret = ioctl(fd_parent_timens, NS_GET_ID, &parent_time_ns_id); + ASSERT_EQ(ret, 0); + ASSERT_NE(parent_time_ns_id, 0); + + /* Create a pipe for synchronization */ + ASSERT_EQ(pipe(pipefd), 0); + + pid = fork(); + ASSERT_GE(pid, 0); + + if (pid == 0) { + /* Child process */ + close(pipefd[0]); + + /* Create new time namespace */ + ret = unshare(CLONE_NEWTIME); + if (ret != 0) { + /* Skip test if we don't have permission */ + if (errno == EPERM || errno == EACCES || errno == EINVAL) { + write(pipefd[1], "S", 1); /* Signal skip */ + _exit(0); + } + _exit(1); + } + + /* Fork a grandchild to actually enter the new namespace */ + pid_t grandchild = fork(); + if (grandchild == 0) { + /* Grandchild is in the new namespace */ + write(pipefd[1], "Y", 1); + close(pipefd[1]); + pause(); + _exit(0); + } else if (grandchild > 0) { + /* Child writes grandchild PID and waits */ + write(pipefd[1], "Y", 1); + write(pipefd[1], &grandchild, sizeof(grandchild)); + close(pipefd[1]); + pause(); /* Keep the parent alive to maintain the grandchild */ + _exit(0); + } else { + _exit(1); + } + } + + /* Parent process */ + close(pipefd[1]); + + char buf; + ASSERT_EQ(read(pipefd[0], &buf, 1), 1); + + if (buf == 'S') { + /* Child couldn't create namespace, skip test */ + kill(pid, SIGTERM); + waitpid(pid, NULL, 0); + close(fd_parent_timens); + close(pipefd[0]); + SKIP(return, "Cannot create time namespace"); + } + + ASSERT_EQ(buf, 'Y'); + + pid_t grandchild_pid; + ASSERT_EQ(read(pipefd[0], &grandchild_pid, sizeof(grandchild_pid)), sizeof(grandchild_pid)); + close(pipefd[0]); + + /* Open grandchild's time namespace */ + char path[256]; + snprintf(path, sizeof(path), "/proc/%d/ns/time", grandchild_pid); + fd_child_timens = open(path, O_RDONLY); + ASSERT_GE(fd_child_timens, 0); + + /* Get child's time namespace ID */ + ret = ioctl(fd_child_timens, NS_GET_ID, &child_time_ns_id); + ASSERT_EQ(ret, 0); + ASSERT_NE(child_time_ns_id, 0); + + /* Parent and child should have different time namespace IDs */ + ASSERT_NE(parent_time_ns_id, child_time_ns_id); + + close(fd_parent_timens); + close(fd_child_timens); + + /* Clean up child process */ + kill(pid, SIGTERM); + waitpid(pid, NULL, 0); +} + +TEST(nsid_pidns_basic) +{ + __u64 pid_ns_id = 0; + int fd_pidns; + int ret; + + /* Open the current PID namespace */ + fd_pidns = open("/proc/self/ns/pid", O_RDONLY); + ASSERT_GE(fd_pidns, 0); + + /* Get the PID namespace ID */ + ret = ioctl(fd_pidns, NS_GET_ID, &pid_ns_id); + ASSERT_EQ(ret, 0); + ASSERT_NE(pid_ns_id, 0); + + /* Verify we can get the same ID again */ + __u64 pid_ns_id2 = 0; + ret = ioctl(fd_pidns, NS_GET_ID, &pid_ns_id2); + ASSERT_EQ(ret, 0); + ASSERT_EQ(pid_ns_id, pid_ns_id2); + + close(fd_pidns); +} + +TEST(nsid_pidns_separate) +{ + __u64 parent_pid_ns_id = 0; + __u64 child_pid_ns_id = 0; + int fd_parent_pidns, fd_child_pidns; + int ret; + pid_t pid; + int pipefd[2]; + + /* Get parent's PID namespace ID */ + fd_parent_pidns = open("/proc/self/ns/pid", O_RDONLY); + ASSERT_GE(fd_parent_pidns, 0); + ret = ioctl(fd_parent_pidns, NS_GET_ID, &parent_pid_ns_id); + ASSERT_EQ(ret, 0); + ASSERT_NE(parent_pid_ns_id, 0); + + /* Create a pipe for synchronization */ + ASSERT_EQ(pipe(pipefd), 0); + + pid = fork(); + ASSERT_GE(pid, 0); + + if (pid == 0) { + /* Child process */ + close(pipefd[0]); + + /* Create new PID namespace */ + ret = unshare(CLONE_NEWPID); + if (ret != 0) { + /* Skip test if we don't have permission */ + if (errno == EPERM || errno == EACCES) { + write(pipefd[1], "S", 1); /* Signal skip */ + _exit(0); + } + _exit(1); + } + + /* Fork a grandchild to actually enter the new namespace */ + pid_t grandchild = fork(); + if (grandchild == 0) { + /* Grandchild is in the new namespace */ + write(pipefd[1], "Y", 1); + close(pipefd[1]); + pause(); + _exit(0); + } else if (grandchild > 0) { + /* Child writes grandchild PID and waits */ + write(pipefd[1], "Y", 1); + write(pipefd[1], &grandchild, sizeof(grandchild)); + close(pipefd[1]); + pause(); /* Keep the parent alive to maintain the grandchild */ + _exit(0); + } else { + _exit(1); + } + } + + /* Parent process */ + close(pipefd[1]); + + char buf; + ASSERT_EQ(read(pipefd[0], &buf, 1), 1); + + if (buf == 'S') { + /* Child couldn't create namespace, skip test */ + kill(pid, SIGTERM); + waitpid(pid, NULL, 0); + close(fd_parent_pidns); + close(pipefd[0]); + SKIP(return, "No permission to create PID namespace"); + } + + ASSERT_EQ(buf, 'Y'); + + pid_t grandchild_pid; + ASSERT_EQ(read(pipefd[0], &grandchild_pid, sizeof(grandchild_pid)), sizeof(grandchild_pid)); + close(pipefd[0]); + + /* Open grandchild's PID namespace */ + char path[256]; + snprintf(path, sizeof(path), "/proc/%d/ns/pid", grandchild_pid); + fd_child_pidns = open(path, O_RDONLY); + ASSERT_GE(fd_child_pidns, 0); + + /* Get child's PID namespace ID */ + ret = ioctl(fd_child_pidns, NS_GET_ID, &child_pid_ns_id); + ASSERT_EQ(ret, 0); + ASSERT_NE(child_pid_ns_id, 0); + + /* Parent and child should have different PID namespace IDs */ + ASSERT_NE(parent_pid_ns_id, child_pid_ns_id); + + close(fd_parent_pidns); + close(fd_child_pidns); + + /* Clean up child process */ + kill(pid, SIGTERM); + waitpid(pid, NULL, 0); +} + +TEST(nsid_netns_basic) +{ + __u64 net_ns_id = 0; + __u64 netns_cookie = 0; + int fd_netns; + int sock; + socklen_t optlen; + int ret; + + /* Open the current network namespace */ + fd_netns = open("/proc/self/ns/net", O_RDONLY); + ASSERT_GE(fd_netns, 0); + + /* Get the network namespace ID via ioctl */ + ret = ioctl(fd_netns, NS_GET_ID, &net_ns_id); + ASSERT_EQ(ret, 0); + ASSERT_NE(net_ns_id, 0); + + /* Create a socket to get the SO_NETNS_COOKIE */ + sock = socket(AF_UNIX, SOCK_STREAM, 0); + ASSERT_GE(sock, 0); + + /* Get the network namespace cookie via socket option */ + optlen = sizeof(netns_cookie); + ret = getsockopt(sock, SOL_SOCKET, SO_NETNS_COOKIE, &netns_cookie, &optlen); + ASSERT_EQ(ret, 0); + ASSERT_EQ(optlen, sizeof(netns_cookie)); + + /* The namespace ID and cookie should be identical */ + ASSERT_EQ(net_ns_id, netns_cookie); + + /* Verify we can get the same ID again */ + __u64 net_ns_id2 = 0; + ret = ioctl(fd_netns, NS_GET_ID, &net_ns_id2); + ASSERT_EQ(ret, 0); + ASSERT_EQ(net_ns_id, net_ns_id2); + + close(sock); + close(fd_netns); +} + +TEST(nsid_netns_separate) +{ + __u64 parent_net_ns_id = 0; + __u64 parent_netns_cookie = 0; + __u64 child_net_ns_id = 0; + __u64 child_netns_cookie = 0; + int fd_parent_netns, fd_child_netns; + int parent_sock, child_sock; + socklen_t optlen; + int ret; + pid_t pid; + int pipefd[2]; + + /* Get parent's network namespace ID */ + fd_parent_netns = open("/proc/self/ns/net", O_RDONLY); + ASSERT_GE(fd_parent_netns, 0); + ret = ioctl(fd_parent_netns, NS_GET_ID, &parent_net_ns_id); + ASSERT_EQ(ret, 0); + ASSERT_NE(parent_net_ns_id, 0); + + /* Get parent's network namespace cookie */ + parent_sock = socket(AF_UNIX, SOCK_STREAM, 0); + ASSERT_GE(parent_sock, 0); + optlen = sizeof(parent_netns_cookie); + ret = getsockopt(parent_sock, SOL_SOCKET, SO_NETNS_COOKIE, &parent_netns_cookie, &optlen); + ASSERT_EQ(ret, 0); + + /* Verify parent's ID and cookie match */ + ASSERT_EQ(parent_net_ns_id, parent_netns_cookie); + + /* Create a pipe for synchronization */ + ASSERT_EQ(pipe(pipefd), 0); + + pid = fork(); + ASSERT_GE(pid, 0); + + if (pid == 0) { + /* Child process */ + close(pipefd[0]); + + /* Create new network namespace */ + ret = unshare(CLONE_NEWNET); + if (ret != 0) { + /* Skip test if we don't have permission */ + if (errno == EPERM || errno == EACCES) { + write(pipefd[1], "S", 1); /* Signal skip */ + _exit(0); + } + _exit(1); + } + + /* Signal success */ + write(pipefd[1], "Y", 1); + close(pipefd[1]); + + /* Keep namespace alive */ + pause(); + _exit(0); + } + + /* Parent process */ + close(pipefd[1]); + + char buf; + ASSERT_EQ(read(pipefd[0], &buf, 1), 1); + close(pipefd[0]); + + if (buf == 'S') { + /* Child couldn't create namespace, skip test */ + kill(pid, SIGTERM); + waitpid(pid, NULL, 0); + close(fd_parent_netns); + close(parent_sock); + SKIP(return, "No permission to create network namespace"); + } + + ASSERT_EQ(buf, 'Y'); + + /* Open child's network namespace */ + char path[256]; + snprintf(path, sizeof(path), "/proc/%d/ns/net", pid); + fd_child_netns = open(path, O_RDONLY); + ASSERT_GE(fd_child_netns, 0); + + /* Get child's network namespace ID */ + ret = ioctl(fd_child_netns, NS_GET_ID, &child_net_ns_id); + ASSERT_EQ(ret, 0); + ASSERT_NE(child_net_ns_id, 0); + + /* Create socket in child's namespace to get cookie */ + ret = setns(fd_child_netns, CLONE_NEWNET); + if (ret == 0) { + child_sock = socket(AF_UNIX, SOCK_STREAM, 0); + ASSERT_GE(child_sock, 0); + + optlen = sizeof(child_netns_cookie); + ret = getsockopt(child_sock, SOL_SOCKET, SO_NETNS_COOKIE, &child_netns_cookie, &optlen); + ASSERT_EQ(ret, 0); + + /* Verify child's ID and cookie match */ + ASSERT_EQ(child_net_ns_id, child_netns_cookie); + + close(child_sock); + + /* Return to parent namespace */ + setns(fd_parent_netns, CLONE_NEWNET); + } + + /* Parent and child should have different network namespace IDs */ + ASSERT_NE(parent_net_ns_id, child_net_ns_id); + if (child_netns_cookie != 0) { + ASSERT_NE(parent_netns_cookie, child_netns_cookie); + } + + close(fd_parent_netns); + close(fd_child_netns); + close(parent_sock); + + /* Clean up child process */ + kill(pid, SIGTERM); + waitpid(pid, NULL, 0); +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore index 47c293c2962f..439101b518ee 100644 --- a/tools/testing/selftests/net/.gitignore +++ b/tools/testing/selftests/net/.gitignore @@ -16,6 +16,7 @@ ip_local_port_range ipsec ipv6_flowlabel ipv6_flowlabel_mgr +ipv6_fragmentation log.txt msg_oob msg_zerocopy @@ -51,6 +52,7 @@ tap tcp_fastopen_backup_key tcp_inq tcp_mmap +tcp_port_share tfo timestamping tls diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index b31a71f2b372..5d9d96515c4a 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -8,11 +8,12 @@ CFLAGS += -I../ TEST_PROGS := run_netsocktests run_afpackettests test_bpf.sh netdevice.sh \ rtnetlink.sh xfrm_policy.sh +TEST_PROGS += fcnal-ipv4.sh fcnal-ipv6.sh fcnal-other.sh TEST_PROGS += fib_tests.sh fib-onlink-tests.sh pmtu.sh udpgso.sh ip_defrag.sh TEST_PROGS += udpgso_bench.sh fib_rule_tests.sh msg_zerocopy.sh psock_snd.sh TEST_PROGS += udpgro_bench.sh udpgro.sh test_vxlan_under_vrf.sh reuseport_addr_any.sh TEST_PROGS += test_vxlan_fdb_changelink.sh so_txtime.sh ipv6_flowlabel.sh -TEST_PROGS += tcp_fastopen_backup_key.sh fcnal-test.sh l2tp.sh traceroute.sh +TEST_PROGS += tcp_fastopen_backup_key.sh l2tp.sh traceroute.sh TEST_PROGS += fin_ack_lat.sh fib_nexthop_multiprefix.sh fib_nexthops.sh fib_nexthop_nongw.sh TEST_PROGS += altnames.sh icmp.sh icmp_redirect.sh ip6_gre_headroom.sh TEST_PROGS += route_localnet.sh @@ -99,6 +100,7 @@ TEST_GEN_PROGS += bind_wildcard TEST_GEN_PROGS += bind_timewait TEST_PROGS += test_vxlan_mdb.sh TEST_PROGS += test_bridge_neigh_suppress.sh +TEST_PROGS += test_vxlan_nh.sh TEST_PROGS += test_vxlan_nolocalbypass.sh TEST_PROGS += test_bridge_backup_port.sh TEST_PROGS += test_neigh.sh @@ -115,14 +117,21 @@ TEST_PROGS += skf_net_off.sh TEST_GEN_FILES += skf_net_off TEST_GEN_FILES += tfo TEST_PROGS += tfo_passive.sh +TEST_PROGS += broadcast_ether_dst.sh TEST_PROGS += broadcast_pmtu.sh TEST_PROGS += ipv6_force_forwarding.sh +TEST_GEN_PROGS += ipv6_fragmentation +TEST_PROGS += route_hint.sh +TEST_GEN_PROGS += tcp_port_share # YNL files, must be before "include ..lib.mk" -YNL_GEN_FILES := busy_poller netlink-dumps +YNL_GEN_FILES := busy_poller +YNL_GEN_PROGS := netlink-dumps TEST_GEN_FILES += $(YNL_GEN_FILES) +TEST_GEN_PROGS += $(YNL_GEN_PROGS) TEST_FILES := settings +TEST_FILES += fcnal-test.sh TEST_FILES += in_netns.sh lib.sh setup_loopback.sh setup_veth.sh TEST_GEN_FILES += $(patsubst %.c,%.o,$(wildcard *.bpf.c)) diff --git a/tools/testing/selftests/net/af_unix/Makefile b/tools/testing/selftests/net/af_unix/Makefile index a4b61c6d0290..0a20c98bbcfd 100644 --- a/tools/testing/selftests/net/af_unix/Makefile +++ b/tools/testing/selftests/net/af_unix/Makefile @@ -1,4 +1,4 @@ -CFLAGS += $(KHDR_INCLUDES) +CFLAGS += $(KHDR_INCLUDES) -Wall -Wflex-array-member-not-at-end TEST_GEN_PROGS := diag_uid msg_oob scm_inq scm_pidfd scm_rights unix_connect include ../../lib.mk diff --git a/tools/testing/selftests/net/af_unix/scm_inq.c b/tools/testing/selftests/net/af_unix/scm_inq.c index 9d22561e7b8f..fc467714387e 100644 --- a/tools/testing/selftests/net/af_unix/scm_inq.c +++ b/tools/testing/selftests/net/af_unix/scm_inq.c @@ -11,11 +11,6 @@ #define NR_CHUNKS 100 #define MSG_LEN 256 -struct scm_inq { - struct cmsghdr cmsghdr; - int inq; -}; - FIXTURE(scm_inq) { int fd[2]; @@ -70,35 +65,38 @@ static void send_chunks(struct __test_metadata *_metadata, static void recv_chunks(struct __test_metadata *_metadata, FIXTURE_DATA(scm_inq) *self) { + char cmsg_buf[CMSG_SPACE(sizeof(int))]; struct msghdr msg = {}; struct iovec iov = {}; - struct scm_inq cmsg; + struct cmsghdr *cmsg; char buf[MSG_LEN]; int i, ret; int inq; msg.msg_iov = &iov; msg.msg_iovlen = 1; - msg.msg_control = &cmsg; - msg.msg_controllen = CMSG_SPACE(sizeof(cmsg.inq)); + msg.msg_control = cmsg_buf; + msg.msg_controllen = sizeof(cmsg_buf); iov.iov_base = buf; iov.iov_len = sizeof(buf); for (i = 0; i < NR_CHUNKS; i++) { memset(buf, 0, sizeof(buf)); - memset(&cmsg, 0, sizeof(cmsg)); + memset(cmsg_buf, 0, sizeof(cmsg_buf)); ret = recvmsg(self->fd[1], &msg, 0); ASSERT_EQ(MSG_LEN, ret); - ASSERT_NE(NULL, CMSG_FIRSTHDR(&msg)); - ASSERT_EQ(CMSG_LEN(sizeof(cmsg.inq)), cmsg.cmsghdr.cmsg_len); - ASSERT_EQ(SOL_SOCKET, cmsg.cmsghdr.cmsg_level); - ASSERT_EQ(SCM_INQ, cmsg.cmsghdr.cmsg_type); + + cmsg = CMSG_FIRSTHDR(&msg); + ASSERT_NE(NULL, cmsg); + ASSERT_EQ(CMSG_LEN(sizeof(int)), cmsg->cmsg_len); + ASSERT_EQ(SOL_SOCKET, cmsg->cmsg_level); + ASSERT_EQ(SCM_INQ, cmsg->cmsg_type); ret = ioctl(self->fd[1], SIOCINQ, &inq); ASSERT_EQ(0, ret); - ASSERT_EQ(cmsg.inq, inq); + ASSERT_EQ(*(int *)CMSG_DATA(cmsg), inq); } } diff --git a/tools/testing/selftests/net/af_unix/scm_pidfd.c b/tools/testing/selftests/net/af_unix/scm_pidfd.c index 37e034874034..ef2921988e5f 100644 --- a/tools/testing/selftests/net/af_unix/scm_pidfd.c +++ b/tools/testing/selftests/net/af_unix/scm_pidfd.c @@ -137,7 +137,6 @@ struct cmsg_data { static int parse_cmsg(struct msghdr *msg, struct cmsg_data *res) { struct cmsghdr *cmsg; - int data = 0; if (msg->msg_flags & (MSG_TRUNC | MSG_CTRUNC)) { log_err("recvmsg: truncated"); @@ -243,7 +242,6 @@ static int cmsg_check_dead(int fd, int expected_pid) int data = 0; char control[CMSG_SPACE(sizeof(struct ucred)) + CMSG_SPACE(sizeof(int))] = { 0 }; - pid_t client_pid; struct pidfd_info info = { .mask = PIDFD_INFO_EXIT, }; diff --git a/tools/testing/selftests/net/af_unix/scm_rights.c b/tools/testing/selftests/net/af_unix/scm_rights.c index 8b015f16c03d..914f99d153ce 100644 --- a/tools/testing/selftests/net/af_unix/scm_rights.c +++ b/tools/testing/selftests/net/af_unix/scm_rights.c @@ -271,20 +271,11 @@ void __send_fd(struct __test_metadata *_metadata, { #define MSG "x" #define MSGLEN 1 - struct { - struct cmsghdr cmsghdr; - int fd[2]; - } cmsg = { - .cmsghdr = { - .cmsg_len = CMSG_LEN(sizeof(cmsg.fd)), - .cmsg_level = SOL_SOCKET, - .cmsg_type = SCM_RIGHTS, - }, - .fd = { - self->fd[inflight * 2], - self->fd[inflight * 2], - }, + int fds[2] = { + self->fd[inflight * 2], + self->fd[inflight * 2], }; + char cmsg_buf[CMSG_SPACE(sizeof(fds))]; struct iovec iov = { .iov_base = MSG, .iov_len = MSGLEN, @@ -294,11 +285,18 @@ void __send_fd(struct __test_metadata *_metadata, .msg_namelen = 0, .msg_iov = &iov, .msg_iovlen = 1, - .msg_control = &cmsg, - .msg_controllen = CMSG_SPACE(sizeof(cmsg.fd)), + .msg_control = cmsg_buf, + .msg_controllen = sizeof(cmsg_buf), }; + struct cmsghdr *cmsg; int ret; + cmsg = CMSG_FIRSTHDR(&msg); + cmsg->cmsg_level = SOL_SOCKET; + cmsg->cmsg_type = SCM_RIGHTS; + cmsg->cmsg_len = CMSG_LEN(sizeof(fds)); + memcpy(CMSG_DATA(cmsg), fds, sizeof(fds)); + ret = sendmsg(self->fd[receiver * 2 + 1], &msg, variant->flags); if (variant->disabled) { diff --git a/tools/testing/selftests/net/bind_bhash.c b/tools/testing/selftests/net/bind_bhash.c index 57ff67a3751e..da04b0b19b73 100644 --- a/tools/testing/selftests/net/bind_bhash.c +++ b/tools/testing/selftests/net/bind_bhash.c @@ -75,7 +75,7 @@ static void *setup(void *arg) int *array = (int *)arg; for (i = 0; i < MAX_CONNECTIONS; i++) { - sock_fd = bind_socket(SO_REUSEADDR | SO_REUSEPORT, setup_addr); + sock_fd = bind_socket(SO_REUSEPORT, setup_addr); if (sock_fd < 0) { ret = sock_fd; pthread_exit(&ret); @@ -103,7 +103,7 @@ int main(int argc, const char *argv[]) setup_addr = use_v6 ? setup_addr_v6 : setup_addr_v4; - listener_fd = bind_socket(SO_REUSEADDR | SO_REUSEPORT, setup_addr); + listener_fd = bind_socket(SO_REUSEPORT, setup_addr); if (listen(listener_fd, 100) < 0) { perror("listen failed"); return -1; diff --git a/tools/testing/selftests/net/bpf_offload.py b/tools/testing/selftests/net/bpf_offload.py index b2c271b79240..c856d266c8f3 100755 --- a/tools/testing/selftests/net/bpf_offload.py +++ b/tools/testing/selftests/net/bpf_offload.py @@ -184,8 +184,8 @@ def bpftool_prog_list(expected=None, ns="", exclude_orphaned=True): progs = [ p for p in progs if not p['orphaned'] ] if expected is not None: if len(progs) != expected: - fail(True, "%d BPF programs loaded, expected %d" % - (len(progs), expected)) + fail(True, "%d BPF programs loaded, expected %d\nLoaded Progs:\n%s" % + (len(progs), expected, pp.pformat(progs))) return progs def bpftool_map_list(expected=None, ns=""): diff --git a/tools/testing/selftests/net/broadcast_ether_dst.sh b/tools/testing/selftests/net/broadcast_ether_dst.sh new file mode 100755 index 000000000000..334a7eca8a80 --- /dev/null +++ b/tools/testing/selftests/net/broadcast_ether_dst.sh @@ -0,0 +1,83 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Author: Brett A C Sheffield <bacs@librecast.net> +# Author: Oscar Maes <oscmaes92@gmail.com> +# +# Ensure destination ethernet field is correctly set for +# broadcast packets + +source lib.sh + +CLIENT_IP4="192.168.0.1" +GW_IP4="192.168.0.2" + +setup() { + setup_ns CLIENT_NS SERVER_NS + + ip -net "${SERVER_NS}" link add link1 type veth \ + peer name link0 netns "${CLIENT_NS}" + + ip -net "${CLIENT_NS}" link set link0 up + ip -net "${CLIENT_NS}" addr add "${CLIENT_IP4}"/24 dev link0 + + ip -net "${SERVER_NS}" link set link1 up + + ip -net "${CLIENT_NS}" route add default via "${GW_IP4}" + ip netns exec "${CLIENT_NS}" arp -s "${GW_IP4}" 00:11:22:33:44:55 +} + +cleanup() { + rm -f "${CAPFILE}" "${OUTPUT}" + ip -net "${SERVER_NS}" link del link1 + cleanup_ns "${CLIENT_NS}" "${SERVER_NS}" +} + +test_broadcast_ether_dst() { + local rc=0 + CAPFILE=$(mktemp -u cap.XXXXXXXXXX) + OUTPUT=$(mktemp -u out.XXXXXXXXXX) + + echo "Testing ethernet broadcast destination" + + # start tcpdump listening for icmp + # tcpdump will exit after receiving a single packet + # timeout will kill tcpdump if it is still running after 2s + timeout 2s ip netns exec "${CLIENT_NS}" \ + tcpdump -i link0 -c 1 -w "${CAPFILE}" icmp &> "${OUTPUT}" & + pid=$! + slowwait 1 grep -qs "listening" "${OUTPUT}" + + # send broadcast ping + ip netns exec "${CLIENT_NS}" \ + ping -W0.01 -c1 -b 255.255.255.255 &> /dev/null + + # wait for tcpdump for exit after receiving packet + wait "${pid}" + + # compare ethernet destination field to ff:ff:ff:ff:ff:ff + ether_dst=$(tcpdump -r "${CAPFILE}" -tnne 2>/dev/null | \ + awk '{sub(/,/,"",$3); print $3}') + if [[ "${ether_dst}" == "ff:ff:ff:ff:ff:ff" ]]; then + echo "[ OK ]" + rc="${ksft_pass}" + else + echo "[FAIL] expected dst ether addr to be ff:ff:ff:ff:ff:ff," \ + "got ${ether_dst}" + rc="${ksft_fail}" + fi + + return "${rc}" +} + +if [ ! -x "$(command -v tcpdump)" ]; then + echo "SKIP: Could not run test without tcpdump tool" + exit "${ksft_skip}" +fi + +trap cleanup EXIT + +setup +test_broadcast_ether_dst + +exit $? diff --git a/tools/testing/selftests/net/can/config b/tools/testing/selftests/net/can/config new file mode 100644 index 000000000000..188f79796670 --- /dev/null +++ b/tools/testing/selftests/net/can/config @@ -0,0 +1,3 @@ +CONFIG_CAN=m +CONFIG_CAN_DEV=m +CONFIG_CAN_VCAN=m diff --git a/tools/testing/selftests/net/cmsg_sender.c b/tools/testing/selftests/net/cmsg_sender.c index a825e628aee7..ded9b925865e 100644 --- a/tools/testing/selftests/net/cmsg_sender.c +++ b/tools/testing/selftests/net/cmsg_sender.c @@ -491,7 +491,8 @@ int main(int argc, char *argv[]) if (err) { fprintf(stderr, "Can't resolve address [%s]:%s\n", opt.host, opt.service); - return ERN_SOCK_CREATE; + err = ERN_SOCK_CREATE; + goto err_free_buff; } if (ai->ai_family == AF_INET6 && opt.sock.proto == IPPROTO_ICMP) @@ -500,8 +501,8 @@ int main(int argc, char *argv[]) fd = socket(ai->ai_family, opt.sock.type, opt.sock.proto); if (fd < 0) { fprintf(stderr, "Can't open socket: %s\n", strerror(errno)); - freeaddrinfo(ai); - return ERN_RESOLVE; + err = ERN_RESOLVE; + goto err_free_info; } if (opt.sock.proto == IPPROTO_ICMP) { @@ -574,6 +575,9 @@ int main(int argc, char *argv[]) err_out: close(fd); +err_free_info: freeaddrinfo(ai); +err_free_buff: + free(buf); return err; } diff --git a/tools/testing/selftests/net/config b/tools/testing/selftests/net/config index c24417d0047b..d548611e2698 100644 --- a/tools/testing/selftests/net/config +++ b/tools/testing/selftests/net/config @@ -26,6 +26,7 @@ CONFIG_IFB=y CONFIG_INET_DIAG=y CONFIG_INET_ESP=y CONFIG_INET_ESP_OFFLOAD=y +CONFIG_CRYPTO_SHA1=y CONFIG_NET_FOU=y CONFIG_NET_FOU_IP_TUNNELS=y CONFIG_NETFILTER=y diff --git a/tools/testing/selftests/net/fcnal-ipv4.sh b/tools/testing/selftests/net/fcnal-ipv4.sh new file mode 100755 index 000000000000..82f9c867c3e8 --- /dev/null +++ b/tools/testing/selftests/net/fcnal-ipv4.sh @@ -0,0 +1,2 @@ +#!/bin/sh +./fcnal-test.sh -t ipv4 diff --git a/tools/testing/selftests/net/fcnal-ipv6.sh b/tools/testing/selftests/net/fcnal-ipv6.sh new file mode 100755 index 000000000000..ab1fc7aa3caf --- /dev/null +++ b/tools/testing/selftests/net/fcnal-ipv6.sh @@ -0,0 +1,2 @@ +#!/bin/sh +./fcnal-test.sh -t ipv6 diff --git a/tools/testing/selftests/net/fcnal-other.sh b/tools/testing/selftests/net/fcnal-other.sh new file mode 100755 index 000000000000..a840cf80b32e --- /dev/null +++ b/tools/testing/selftests/net/fcnal-other.sh @@ -0,0 +1,2 @@ +#!/bin/sh +./fcnal-test.sh -t other diff --git a/tools/testing/selftests/net/fcnal-test.sh b/tools/testing/selftests/net/fcnal-test.sh index 4fcc38907e48..844a580ae74e 100755 --- a/tools/testing/selftests/net/fcnal-test.sh +++ b/tools/testing/selftests/net/fcnal-test.sh @@ -189,7 +189,7 @@ show_hint() kill_procs() { killall nettest ping ping6 >/dev/null 2>&1 - sleep 1 + slowwait 2 sh -c 'test -z "$(pgrep '"'^(nettest|ping|ping6)$'"')"' } set_ping_group() @@ -424,6 +424,8 @@ create_ns() ip netns exec ${ns} sysctl -qw net.ipv6.conf.all.keep_addr_on_down=1 ip netns exec ${ns} sysctl -qw net.ipv6.conf.all.forwarding=1 ip netns exec ${ns} sysctl -qw net.ipv6.conf.default.forwarding=1 + ip netns exec ${ns} sysctl -qw net.ipv6.conf.default.accept_dad=0 + ip netns exec ${ns} sysctl -qw net.ipv6.conf.all.accept_dad=0 } # create veth pair to connect namespaces and apply addresses. @@ -875,7 +877,7 @@ ipv4_tcp_md5_novrf() # basic use case log_start run_cmd nettest -s -M ${MD5_PW} -m ${NSB_IP} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_PW} log_test $? 0 "MD5: Single address config" @@ -883,7 +885,7 @@ ipv4_tcp_md5_novrf() log_start show_hint "Should timeout due to MD5 mismatch" run_cmd nettest -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_PW} log_test $? 2 "MD5: Server no config, client uses password" @@ -891,7 +893,7 @@ ipv4_tcp_md5_novrf() log_start show_hint "Should timeout since client uses wrong password" run_cmd nettest -s -M ${MD5_PW} -m ${NSB_IP} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_WRONG_PW} log_test $? 2 "MD5: Client uses wrong password" @@ -899,7 +901,7 @@ ipv4_tcp_md5_novrf() log_start show_hint "Should timeout due to MD5 mismatch" run_cmd nettest -s -M ${MD5_PW} -m ${NSB_LO_IP} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_PW} log_test $? 2 "MD5: Client address does not match address configured with password" @@ -910,7 +912,7 @@ ipv4_tcp_md5_novrf() # client in prefix log_start run_cmd nettest -s -M ${MD5_PW} -m ${NS_NET} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_PW} log_test $? 0 "MD5: Prefix config" @@ -918,7 +920,7 @@ ipv4_tcp_md5_novrf() log_start show_hint "Should timeout since client uses wrong password" run_cmd nettest -s -M ${MD5_PW} -m ${NS_NET} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_WRONG_PW} log_test $? 2 "MD5: Prefix config, client uses wrong password" @@ -926,7 +928,7 @@ ipv4_tcp_md5_novrf() log_start show_hint "Should timeout due to MD5 mismatch" run_cmd nettest -s -M ${MD5_PW} -m ${NS_NET} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -c ${NSB_LO_IP} -r ${NSA_IP} -X ${MD5_PW} log_test $? 2 "MD5: Prefix config, client address not in configured prefix" } @@ -943,7 +945,7 @@ ipv4_tcp_md5() # basic use case log_start run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NSB_IP} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_PW} log_test $? 0 "MD5: VRF: Single address config" @@ -951,7 +953,7 @@ ipv4_tcp_md5() log_start show_hint "Should timeout since server does not have MD5 auth" run_cmd nettest -s -I ${VRF} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_PW} log_test $? 2 "MD5: VRF: Server no config, client uses password" @@ -959,7 +961,7 @@ ipv4_tcp_md5() log_start show_hint "Should timeout since client uses wrong password" run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NSB_IP} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_WRONG_PW} log_test $? 2 "MD5: VRF: Client uses wrong password" @@ -967,7 +969,7 @@ ipv4_tcp_md5() log_start show_hint "Should timeout since server config differs from client" run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NSB_LO_IP} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_PW} log_test $? 2 "MD5: VRF: Client address does not match address configured with password" @@ -978,7 +980,7 @@ ipv4_tcp_md5() # client in prefix log_start run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_PW} log_test $? 0 "MD5: VRF: Prefix config" @@ -986,7 +988,7 @@ ipv4_tcp_md5() log_start show_hint "Should timeout since client uses wrong password" run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_WRONG_PW} log_test $? 2 "MD5: VRF: Prefix config, client uses wrong password" @@ -994,7 +996,7 @@ ipv4_tcp_md5() log_start show_hint "Should timeout since client address is outside of prefix" run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -c ${NSB_LO_IP} -r ${NSA_IP} -X ${MD5_PW} log_test $? 2 "MD5: VRF: Prefix config, client address not in configured prefix" @@ -1005,14 +1007,14 @@ ipv4_tcp_md5() log_start run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NSB_IP} & run_cmd nettest -s -M ${MD5_WRONG_PW} -m ${NSB_IP} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_PW} log_test $? 0 "MD5: VRF: Single address config in default VRF and VRF, conn in VRF" log_start run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NSB_IP} & run_cmd nettest -s -M ${MD5_WRONG_PW} -m ${NSB_IP} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsc nettest -r ${NSA_IP} -X ${MD5_WRONG_PW} log_test $? 0 "MD5: VRF: Single address config in default VRF and VRF, conn in default VRF" @@ -1020,7 +1022,7 @@ ipv4_tcp_md5() show_hint "Should timeout since client in default VRF uses VRF password" run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NSB_IP} & run_cmd nettest -s -M ${MD5_WRONG_PW} -m ${NSB_IP} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsc nettest -r ${NSA_IP} -X ${MD5_PW} log_test $? 2 "MD5: VRF: Single address config in default VRF and VRF, conn in default VRF with VRF pw" @@ -1028,21 +1030,21 @@ ipv4_tcp_md5() show_hint "Should timeout since client in VRF uses default VRF password" run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NSB_IP} & run_cmd nettest -s -M ${MD5_WRONG_PW} -m ${NSB_IP} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_WRONG_PW} log_test $? 2 "MD5: VRF: Single address config in default VRF and VRF, conn in VRF with default VRF pw" log_start run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET} & run_cmd nettest -s -M ${MD5_WRONG_PW} -m ${NS_NET} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_PW} log_test $? 0 "MD5: VRF: Prefix config in default VRF and VRF, conn in VRF" log_start run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET} & run_cmd nettest -s -M ${MD5_WRONG_PW} -m ${NS_NET} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsc nettest -r ${NSA_IP} -X ${MD5_WRONG_PW} log_test $? 0 "MD5: VRF: Prefix config in default VRF and VRF, conn in default VRF" @@ -1050,7 +1052,7 @@ ipv4_tcp_md5() show_hint "Should timeout since client in default VRF uses VRF password" run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET} & run_cmd nettest -s -M ${MD5_WRONG_PW} -m ${NS_NET} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsc nettest -r ${NSA_IP} -X ${MD5_PW} log_test $? 2 "MD5: VRF: Prefix config in default VRF and VRF, conn in default VRF with VRF pw" @@ -1058,7 +1060,7 @@ ipv4_tcp_md5() show_hint "Should timeout since client in VRF uses default VRF password" run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET} & run_cmd nettest -s -M ${MD5_WRONG_PW} -m ${NS_NET} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_WRONG_PW} log_test $? 2 "MD5: VRF: Prefix config in default VRF and VRF, conn in VRF with default VRF pw" @@ -1082,14 +1084,14 @@ test_ipv4_md5_vrf__vrf_server__no_bind_ifindex() log_start show_hint "Simulates applications using VRF without TCP_MD5SIG_FLAG_IFINDEX" run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET} --no-bind-key-ifindex & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_PW} log_test $? 0 "MD5: VRF: VRF-bound server, unbound key accepts connection" log_start show_hint "Binding both the socket and the key is not required but it works" run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET} --force-bind-key-ifindex & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_PW} log_test $? 0 "MD5: VRF: VRF-bound server, bound key accepts connection" } @@ -1103,25 +1105,25 @@ test_ipv4_md5_vrf__global_server__bind_ifindex0() log_start run_cmd nettest -s -M ${MD5_PW} -m ${NS_NET} --force-bind-key-ifindex & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_PW} log_test $? 2 "MD5: VRF: Global server, Key bound to ifindex=0 rejects VRF connection" log_start run_cmd nettest -s -M ${MD5_PW} -m ${NS_NET} --force-bind-key-ifindex & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsc nettest -r ${NSA_IP} -X ${MD5_PW} log_test $? 0 "MD5: VRF: Global server, key bound to ifindex=0 accepts non-VRF connection" log_start run_cmd nettest -s -M ${MD5_PW} -m ${NS_NET} --no-bind-key-ifindex & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_PW} log_test $? 0 "MD5: VRF: Global server, key not bound to ifindex accepts VRF connection" log_start run_cmd nettest -s -M ${MD5_PW} -m ${NS_NET} --no-bind-key-ifindex & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsc nettest -r ${NSA_IP} -X ${MD5_PW} log_test $? 0 "MD5: VRF: Global server, key not bound to ifindex accepts non-VRF connection" @@ -1193,7 +1195,7 @@ ipv4_tcp_novrf() do log_start run_cmd nettest -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -r ${a} log_test_addr ${a} $? 0 "Global server" done @@ -1201,7 +1203,7 @@ ipv4_tcp_novrf() a=${NSA_IP} log_start run_cmd nettest -s -I ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -r ${a} log_test_addr ${a} $? 0 "Device server" @@ -1221,13 +1223,13 @@ ipv4_tcp_novrf() do log_start run_cmd_nsb nettest -s & - sleep 1 + wait_local_port_listen ${NSB} 12345 tcp run_cmd nettest -r ${a} -0 ${NSA_IP} log_test_addr ${a} $? 0 "Client" log_start run_cmd_nsb nettest -s & - sleep 1 + wait_local_port_listen ${NSB} 12345 tcp run_cmd nettest -r ${a} -d ${NSA_DEV} log_test_addr ${a} $? 0 "Client, device bind" @@ -1249,7 +1251,7 @@ ipv4_tcp_novrf() do log_start run_cmd nettest -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd nettest -r ${a} -0 ${a} -1 ${a} log_test_addr ${a} $? 0 "Global server, local connection" done @@ -1257,7 +1259,7 @@ ipv4_tcp_novrf() a=${NSA_IP} log_start run_cmd nettest -s -I ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd nettest -r ${a} -0 ${a} log_test_addr ${a} $? 0 "Device server, unbound client, local connection" @@ -1266,7 +1268,7 @@ ipv4_tcp_novrf() log_start show_hint "Should fail 'Connection refused' since addresses on loopback are out of device scope" run_cmd nettest -s -I ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd nettest -r ${a} log_test_addr ${a} $? 1 "Device server, unbound client, local connection" done @@ -1274,7 +1276,7 @@ ipv4_tcp_novrf() a=${NSA_IP} log_start run_cmd nettest -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd nettest -r ${a} -0 ${a} -d ${NSA_DEV} log_test_addr ${a} $? 0 "Global server, device client, local connection" @@ -1283,7 +1285,7 @@ ipv4_tcp_novrf() log_start show_hint "Should fail 'No route to host' since addresses on loopback are out of device scope" run_cmd nettest -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd nettest -r ${a} -d ${NSA_DEV} log_test_addr ${a} $? 1 "Global server, device client, local connection" done @@ -1291,7 +1293,7 @@ ipv4_tcp_novrf() a=${NSA_IP} log_start run_cmd nettest -s -I ${NSA_DEV} -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd nettest -d ${NSA_DEV} -r ${a} -0 ${a} log_test_addr ${a} $? 0 "Device server, device client, local connection" @@ -1323,19 +1325,19 @@ ipv4_tcp_vrf() log_start show_hint "Should fail 'Connection refused' since global server with VRF is disabled" run_cmd nettest -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -r ${a} log_test_addr ${a} $? 1 "Global server" log_start run_cmd nettest -s -I ${VRF} -3 ${VRF} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -r ${a} log_test_addr ${a} $? 0 "VRF server" log_start run_cmd nettest -s -I ${NSA_DEV} -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -r ${a} log_test_addr ${a} $? 0 "Device server" @@ -1352,7 +1354,7 @@ ipv4_tcp_vrf() log_start show_hint "Should fail 'Connection refused' since global server with VRF is disabled" run_cmd nettest -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd nettest -r ${a} -d ${NSA_DEV} log_test_addr ${a} $? 1 "Global server, local connection" @@ -1374,14 +1376,14 @@ ipv4_tcp_vrf() log_start show_hint "client socket should be bound to VRF" run_cmd nettest -s -3 ${VRF} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -r ${a} log_test_addr ${a} $? 0 "Global server" log_start show_hint "client socket should be bound to VRF" run_cmd nettest -s -I ${VRF} -3 ${VRF} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -r ${a} log_test_addr ${a} $? 0 "VRF server" @@ -1396,7 +1398,7 @@ ipv4_tcp_vrf() log_start show_hint "client socket should be bound to device" run_cmd nettest -s -I ${NSA_DEV} -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -r ${a} log_test_addr ${a} $? 0 "Device server" @@ -1406,7 +1408,7 @@ ipv4_tcp_vrf() log_start show_hint "Should fail 'Connection refused' since client is not bound to VRF" run_cmd nettest -s -I ${VRF} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd nettest -r ${a} log_test_addr ${a} $? 1 "Global server, local connection" done @@ -1418,13 +1420,13 @@ ipv4_tcp_vrf() do log_start run_cmd_nsb nettest -s & - sleep 1 + wait_local_port_listen ${NSB} 12345 tcp run_cmd nettest -r ${a} -d ${VRF} log_test_addr ${a} $? 0 "Client, VRF bind" log_start run_cmd_nsb nettest -s & - sleep 1 + wait_local_port_listen ${NSB} 12345 tcp run_cmd nettest -r ${a} -d ${NSA_DEV} log_test_addr ${a} $? 0 "Client, device bind" @@ -1443,7 +1445,7 @@ ipv4_tcp_vrf() do log_start run_cmd nettest -s -I ${VRF} -3 ${VRF} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd nettest -r ${a} -d ${VRF} -0 ${a} log_test_addr ${a} $? 0 "VRF server, VRF client, local connection" done @@ -1451,26 +1453,26 @@ ipv4_tcp_vrf() a=${NSA_IP} log_start run_cmd nettest -s -I ${VRF} -3 ${VRF} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd nettest -r ${a} -d ${NSA_DEV} -0 ${a} log_test_addr ${a} $? 0 "VRF server, device client, local connection" log_start show_hint "Should fail 'No route to host' since client is out of VRF scope" run_cmd nettest -s -I ${VRF} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd nettest -r ${a} log_test_addr ${a} $? 1 "VRF server, unbound client, local connection" log_start run_cmd nettest -s -I ${NSA_DEV} -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd nettest -r ${a} -d ${VRF} -0 ${a} log_test_addr ${a} $? 0 "Device server, VRF client, local connection" log_start run_cmd nettest -s -I ${NSA_DEV} -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd nettest -r ${a} -d ${NSA_DEV} -0 ${a} log_test_addr ${a} $? 0 "Device server, device client, local connection" } @@ -1509,7 +1511,7 @@ ipv4_udp_novrf() do log_start run_cmd nettest -D -s -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd_nsb nettest -D -r ${a} log_test_addr ${a} $? 0 "Global server" @@ -1522,7 +1524,7 @@ ipv4_udp_novrf() a=${NSA_IP} log_start run_cmd nettest -D -I ${NSA_DEV} -s -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd_nsb nettest -D -r ${a} log_test_addr ${a} $? 0 "Device server" @@ -1533,31 +1535,31 @@ ipv4_udp_novrf() do log_start run_cmd_nsb nettest -D -s & - sleep 1 + wait_local_port_listen ${NSB} 12345 udp run_cmd nettest -D -r ${a} -0 ${NSA_IP} log_test_addr ${a} $? 0 "Client" log_start run_cmd_nsb nettest -D -s & - sleep 1 + wait_local_port_listen ${NSB} 12345 udp run_cmd nettest -D -r ${a} -d ${NSA_DEV} -0 ${NSA_IP} log_test_addr ${a} $? 0 "Client, device bind" log_start run_cmd_nsb nettest -D -s & - sleep 1 + wait_local_port_listen ${NSB} 12345 udp run_cmd nettest -D -r ${a} -d ${NSA_DEV} -C -0 ${NSA_IP} log_test_addr ${a} $? 0 "Client, device send via cmsg" log_start run_cmd_nsb nettest -D -s & - sleep 1 + wait_local_port_listen ${NSB} 12345 udp run_cmd nettest -D -r ${a} -d ${NSA_DEV} -S -0 ${NSA_IP} log_test_addr ${a} $? 0 "Client, device bind via IP_UNICAST_IF" log_start run_cmd_nsb nettest -D -s & - sleep 1 + wait_local_port_listen ${NSB} 12345 udp run_cmd nettest -D -r ${a} -d ${NSA_DEV} -S -0 ${NSA_IP} -U log_test_addr ${a} $? 0 "Client, device bind via IP_UNICAST_IF, with connect()" @@ -1580,7 +1582,7 @@ ipv4_udp_novrf() do log_start run_cmd nettest -D -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -D -r ${a} -0 ${a} -1 ${a} log_test_addr ${a} $? 0 "Global server, local connection" done @@ -1588,7 +1590,7 @@ ipv4_udp_novrf() a=${NSA_IP} log_start run_cmd nettest -s -D -I ${NSA_DEV} -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -D -r ${a} log_test_addr ${a} $? 0 "Device server, unbound client, local connection" @@ -1597,7 +1599,7 @@ ipv4_udp_novrf() log_start show_hint "Should fail 'Connection refused' since address is out of device scope" run_cmd nettest -s -D -I ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -D -r ${a} log_test_addr ${a} $? 1 "Device server, unbound client, local connection" done @@ -1605,25 +1607,25 @@ ipv4_udp_novrf() a=${NSA_IP} log_start run_cmd nettest -s -D & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -D -d ${NSA_DEV} -r ${a} log_test_addr ${a} $? 0 "Global server, device client, local connection" log_start run_cmd nettest -s -D & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -D -d ${NSA_DEV} -C -r ${a} log_test_addr ${a} $? 0 "Global server, device send via cmsg, local connection" log_start run_cmd nettest -s -D & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -D -d ${NSA_DEV} -S -r ${a} log_test_addr ${a} $? 0 "Global server, device client via IP_UNICAST_IF, local connection" log_start run_cmd nettest -s -D & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -D -d ${NSA_DEV} -S -r ${a} -U log_test_addr ${a} $? 0 "Global server, device client via IP_UNICAST_IF, local connection, with connect()" @@ -1636,28 +1638,28 @@ ipv4_udp_novrf() log_start show_hint "Should fail since addresses on loopback are out of device scope" run_cmd nettest -D -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -D -r ${a} -d ${NSA_DEV} log_test_addr ${a} $? 2 "Global server, device client, local connection" log_start show_hint "Should fail since addresses on loopback are out of device scope" run_cmd nettest -D -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -D -r ${a} -d ${NSA_DEV} -C log_test_addr ${a} $? 1 "Global server, device send via cmsg, local connection" log_start show_hint "Should fail since addresses on loopback are out of device scope" run_cmd nettest -D -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -D -r ${a} -d ${NSA_DEV} -S log_test_addr ${a} $? 1 "Global server, device client via IP_UNICAST_IF, local connection" log_start show_hint "Should fail since addresses on loopback are out of device scope" run_cmd nettest -D -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -D -r ${a} -d ${NSA_DEV} -S -U log_test_addr ${a} $? 1 "Global server, device client via IP_UNICAST_IF, local connection, with connect()" @@ -1667,7 +1669,7 @@ ipv4_udp_novrf() a=${NSA_IP} log_start run_cmd nettest -D -s -I ${NSA_DEV} -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -D -d ${NSA_DEV} -r ${a} -0 ${a} log_test_addr ${a} $? 0 "Device server, device client, local conn" @@ -1709,19 +1711,19 @@ ipv4_udp_vrf() log_start show_hint "Fails because ingress is in a VRF and global server is disabled" run_cmd nettest -D -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd_nsb nettest -D -r ${a} log_test_addr ${a} $? 1 "Global server" log_start run_cmd nettest -D -I ${VRF} -s -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd_nsb nettest -D -r ${a} log_test_addr ${a} $? 0 "VRF server" log_start run_cmd nettest -D -I ${NSA_DEV} -s -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd_nsb nettest -D -r ${a} log_test_addr ${a} $? 0 "Enslaved device server" @@ -1733,7 +1735,7 @@ ipv4_udp_vrf() log_start show_hint "Should fail 'Connection refused' since global server is out of scope" run_cmd nettest -D -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -D -d ${VRF} -r ${a} log_test_addr ${a} $? 1 "Global server, VRF client, local connection" done @@ -1741,26 +1743,26 @@ ipv4_udp_vrf() a=${NSA_IP} log_start run_cmd nettest -s -D -I ${VRF} -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -D -d ${VRF} -r ${a} log_test_addr ${a} $? 0 "VRF server, VRF client, local conn" log_start run_cmd nettest -s -D -I ${VRF} -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -D -d ${NSA_DEV} -r ${a} log_test_addr ${a} $? 0 "VRF server, enslaved device client, local connection" a=${NSA_IP} log_start run_cmd nettest -s -D -I ${NSA_DEV} -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -D -d ${VRF} -r ${a} log_test_addr ${a} $? 0 "Enslaved device server, VRF client, local conn" log_start run_cmd nettest -s -D -I ${NSA_DEV} -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -D -d ${NSA_DEV} -r ${a} log_test_addr ${a} $? 0 "Enslaved device server, device client, local conn" @@ -1775,19 +1777,19 @@ ipv4_udp_vrf() do log_start run_cmd nettest -D -s -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd_nsb nettest -D -r ${a} log_test_addr ${a} $? 0 "Global server" log_start run_cmd nettest -D -I ${VRF} -s -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd_nsb nettest -D -r ${a} log_test_addr ${a} $? 0 "VRF server" log_start run_cmd nettest -D -I ${NSA_DEV} -s -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd_nsb nettest -D -r ${a} log_test_addr ${a} $? 0 "Enslaved device server" @@ -1802,13 +1804,13 @@ ipv4_udp_vrf() # log_start run_cmd_nsb nettest -D -s & - sleep 1 + wait_local_port_listen ${NSB} 12345 udp run_cmd nettest -d ${VRF} -D -r ${NSB_IP} -1 ${NSA_IP} log_test $? 0 "VRF client" log_start run_cmd_nsb nettest -D -s & - sleep 1 + wait_local_port_listen ${NSB} 12345 udp run_cmd nettest -d ${NSA_DEV} -D -r ${NSB_IP} -1 ${NSA_IP} log_test $? 0 "Enslaved device client" @@ -1829,31 +1831,31 @@ ipv4_udp_vrf() a=${NSA_IP} log_start run_cmd nettest -D -s -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -D -d ${VRF} -r ${a} log_test_addr ${a} $? 0 "Global server, VRF client, local conn" log_start run_cmd nettest -s -D -I ${VRF} -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -D -d ${VRF} -r ${a} log_test_addr ${a} $? 0 "VRF server, VRF client, local conn" log_start run_cmd nettest -s -D -I ${VRF} -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -D -d ${NSA_DEV} -r ${a} log_test_addr ${a} $? 0 "VRF server, device client, local conn" log_start run_cmd nettest -s -D -I ${NSA_DEV} -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -D -d ${VRF} -r ${a} log_test_addr ${a} $? 0 "Enslaved device server, VRF client, local conn" log_start run_cmd nettest -s -D -I ${NSA_DEV} -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -D -d ${NSA_DEV} -r ${a} log_test_addr ${a} $? 0 "Enslaved device server, device client, local conn" @@ -1861,7 +1863,7 @@ ipv4_udp_vrf() do log_start run_cmd nettest -D -s -3 ${VRF} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -D -d ${VRF} -r ${a} log_test_addr ${a} $? 0 "Global server, VRF client, local conn" done @@ -1870,7 +1872,7 @@ ipv4_udp_vrf() do log_start run_cmd nettest -s -D -I ${VRF} -3 ${VRF} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -D -d ${VRF} -r ${a} log_test_addr ${a} $? 0 "VRF server, VRF client, local conn" done @@ -2093,7 +2095,7 @@ ipv4_rt() do log_start run_cmd nettest ${varg} -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest ${varg} -r ${a} & sleep 3 run_cmd ip link del ${VRF} @@ -2107,7 +2109,7 @@ ipv4_rt() do log_start run_cmd nettest ${varg} -s -I ${VRF} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest ${varg} -r ${a} & sleep 3 run_cmd ip link del ${VRF} @@ -2120,7 +2122,7 @@ ipv4_rt() a=${NSA_IP} log_start run_cmd nettest ${varg} -s -I ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest ${varg} -r ${a} & sleep 3 run_cmd ip link del ${VRF} @@ -2134,7 +2136,7 @@ ipv4_rt() # log_start run_cmd_nsb nettest ${varg} -s & - sleep 1 + wait_local_port_listen ${NSB} 12345 tcp run_cmd nettest ${varg} -d ${VRF} -r ${NSB_IP} & sleep 3 run_cmd ip link del ${VRF} @@ -2145,7 +2147,7 @@ ipv4_rt() log_start run_cmd_nsb nettest ${varg} -s & - sleep 1 + wait_local_port_listen ${NSB} 12345 tcp run_cmd nettest ${varg} -d ${NSA_DEV} -r ${NSB_IP} & sleep 3 run_cmd ip link del ${VRF} @@ -2161,7 +2163,7 @@ ipv4_rt() do log_start run_cmd nettest ${varg} -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd nettest ${varg} -d ${VRF} -r ${a} & sleep 3 run_cmd ip link del ${VRF} @@ -2175,7 +2177,7 @@ ipv4_rt() do log_start run_cmd nettest ${varg} -I ${VRF} -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd nettest ${varg} -d ${VRF} -r ${a} & sleep 3 run_cmd ip link del ${VRF} @@ -2189,7 +2191,7 @@ ipv4_rt() log_start run_cmd nettest ${varg} -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd nettest ${varg} -d ${NSA_DEV} -r ${a} & sleep 3 run_cmd ip link del ${VRF} @@ -2200,7 +2202,7 @@ ipv4_rt() log_start run_cmd nettest ${varg} -I ${VRF} -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd nettest ${varg} -d ${NSA_DEV} -r ${a} & sleep 3 run_cmd ip link del ${VRF} @@ -2211,7 +2213,7 @@ ipv4_rt() log_start run_cmd nettest ${varg} -I ${NSA_DEV} -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd nettest ${varg} -d ${NSA_DEV} -r ${a} & sleep 3 run_cmd ip link del ${VRF} @@ -2561,7 +2563,7 @@ ipv6_tcp_md5_novrf() # basic use case log_start run_cmd nettest -6 -s -M ${MD5_PW} -m ${NSB_IP6} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_PW} log_test $? 0 "MD5: Single address config" @@ -2569,7 +2571,7 @@ ipv6_tcp_md5_novrf() log_start show_hint "Should timeout due to MD5 mismatch" run_cmd nettest -6 -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_PW} log_test $? 2 "MD5: Server no config, client uses password" @@ -2577,7 +2579,7 @@ ipv6_tcp_md5_novrf() log_start show_hint "Should timeout since client uses wrong password" run_cmd nettest -6 -s -M ${MD5_PW} -m ${NSB_IP6} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_WRONG_PW} log_test $? 2 "MD5: Client uses wrong password" @@ -2585,7 +2587,7 @@ ipv6_tcp_md5_novrf() log_start show_hint "Should timeout due to MD5 mismatch" run_cmd nettest -6 -s -M ${MD5_PW} -m ${NSB_LO_IP6} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_PW} log_test $? 2 "MD5: Client address does not match address configured with password" @@ -2596,7 +2598,7 @@ ipv6_tcp_md5_novrf() # client in prefix log_start run_cmd nettest -6 -s -M ${MD5_PW} -m ${NS_NET6} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_PW} log_test $? 0 "MD5: Prefix config" @@ -2604,7 +2606,7 @@ ipv6_tcp_md5_novrf() log_start show_hint "Should timeout since client uses wrong password" run_cmd nettest -6 -s -M ${MD5_PW} -m ${NS_NET6} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_WRONG_PW} log_test $? 2 "MD5: Prefix config, client uses wrong password" @@ -2612,7 +2614,7 @@ ipv6_tcp_md5_novrf() log_start show_hint "Should timeout due to MD5 mismatch" run_cmd nettest -6 -s -M ${MD5_PW} -m ${NS_NET6} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -6 -c ${NSB_LO_IP6} -r ${NSA_IP6} -X ${MD5_PW} log_test $? 2 "MD5: Prefix config, client address not in configured prefix" } @@ -2629,7 +2631,7 @@ ipv6_tcp_md5() # basic use case log_start run_cmd nettest -6 -s -I ${VRF} -M ${MD5_PW} -m ${NSB_IP6} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_PW} log_test $? 0 "MD5: VRF: Single address config" @@ -2637,7 +2639,7 @@ ipv6_tcp_md5() log_start show_hint "Should timeout since server does not have MD5 auth" run_cmd nettest -6 -s -I ${VRF} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_PW} log_test $? 2 "MD5: VRF: Server no config, client uses password" @@ -2645,7 +2647,7 @@ ipv6_tcp_md5() log_start show_hint "Should timeout since client uses wrong password" run_cmd nettest -6 -s -I ${VRF} -M ${MD5_PW} -m ${NSB_IP6} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_WRONG_PW} log_test $? 2 "MD5: VRF: Client uses wrong password" @@ -2653,7 +2655,7 @@ ipv6_tcp_md5() log_start show_hint "Should timeout since server config differs from client" run_cmd nettest -6 -s -I ${VRF} -M ${MD5_PW} -m ${NSB_LO_IP6} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_PW} log_test $? 2 "MD5: VRF: Client address does not match address configured with password" @@ -2664,7 +2666,7 @@ ipv6_tcp_md5() # client in prefix log_start run_cmd nettest -6 -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET6} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_PW} log_test $? 0 "MD5: VRF: Prefix config" @@ -2672,7 +2674,7 @@ ipv6_tcp_md5() log_start show_hint "Should timeout since client uses wrong password" run_cmd nettest -6 -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET6} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_WRONG_PW} log_test $? 2 "MD5: VRF: Prefix config, client uses wrong password" @@ -2680,7 +2682,7 @@ ipv6_tcp_md5() log_start show_hint "Should timeout since client address is outside of prefix" run_cmd nettest -6 -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET6} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -6 -c ${NSB_LO_IP6} -r ${NSA_IP6} -X ${MD5_PW} log_test $? 2 "MD5: VRF: Prefix config, client address not in configured prefix" @@ -2691,14 +2693,14 @@ ipv6_tcp_md5() log_start run_cmd nettest -6 -s -I ${VRF} -M ${MD5_PW} -m ${NSB_IP6} & run_cmd nettest -6 -s -M ${MD5_WRONG_PW} -m ${NSB_IP6} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_PW} log_test $? 0 "MD5: VRF: Single address config in default VRF and VRF, conn in VRF" log_start run_cmd nettest -6 -s -I ${VRF} -M ${MD5_PW} -m ${NSB_IP6} & run_cmd nettest -6 -s -M ${MD5_WRONG_PW} -m ${NSB_IP6} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsc nettest -6 -r ${NSA_IP6} -X ${MD5_WRONG_PW} log_test $? 0 "MD5: VRF: Single address config in default VRF and VRF, conn in default VRF" @@ -2706,7 +2708,7 @@ ipv6_tcp_md5() show_hint "Should timeout since client in default VRF uses VRF password" run_cmd nettest -6 -s -I ${VRF} -M ${MD5_PW} -m ${NSB_IP6} & run_cmd nettest -6 -s -M ${MD5_WRONG_PW} -m ${NSB_IP6} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsc nettest -6 -r ${NSA_IP6} -X ${MD5_PW} log_test $? 2 "MD5: VRF: Single address config in default VRF and VRF, conn in default VRF with VRF pw" @@ -2714,21 +2716,21 @@ ipv6_tcp_md5() show_hint "Should timeout since client in VRF uses default VRF password" run_cmd nettest -6 -s -I ${VRF} -M ${MD5_PW} -m ${NSB_IP6} & run_cmd nettest -6 -s -M ${MD5_WRONG_PW} -m ${NSB_IP6} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_WRONG_PW} log_test $? 2 "MD5: VRF: Single address config in default VRF and VRF, conn in VRF with default VRF pw" log_start run_cmd nettest -6 -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET6} & run_cmd nettest -6 -s -M ${MD5_WRONG_PW} -m ${NS_NET6} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_PW} log_test $? 0 "MD5: VRF: Prefix config in default VRF and VRF, conn in VRF" log_start run_cmd nettest -6 -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET6} & run_cmd nettest -6 -s -M ${MD5_WRONG_PW} -m ${NS_NET6} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsc nettest -6 -r ${NSA_IP6} -X ${MD5_WRONG_PW} log_test $? 0 "MD5: VRF: Prefix config in default VRF and VRF, conn in default VRF" @@ -2736,7 +2738,7 @@ ipv6_tcp_md5() show_hint "Should timeout since client in default VRF uses VRF password" run_cmd nettest -6 -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET6} & run_cmd nettest -6 -s -M ${MD5_WRONG_PW} -m ${NS_NET6} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsc nettest -6 -r ${NSA_IP6} -X ${MD5_PW} log_test $? 2 "MD5: VRF: Prefix config in default VRF and VRF, conn in default VRF with VRF pw" @@ -2744,7 +2746,7 @@ ipv6_tcp_md5() show_hint "Should timeout since client in VRF uses default VRF password" run_cmd nettest -6 -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET6} & run_cmd nettest -6 -s -M ${MD5_WRONG_PW} -m ${NS_NET6} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_WRONG_PW} log_test $? 2 "MD5: VRF: Prefix config in default VRF and VRF, conn in VRF with default VRF pw" @@ -2772,7 +2774,7 @@ ipv6_tcp_novrf() do log_start run_cmd nettest -6 -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -6 -r ${a} log_test_addr ${a} $? 0 "Global server" done @@ -2793,7 +2795,7 @@ ipv6_tcp_novrf() do log_start run_cmd_nsb nettest -6 -s & - sleep 1 + wait_local_port_listen ${NSB} 12345 tcp run_cmd nettest -6 -r ${a} log_test_addr ${a} $? 0 "Client" done @@ -2802,7 +2804,7 @@ ipv6_tcp_novrf() do log_start run_cmd_nsb nettest -6 -s & - sleep 1 + wait_local_port_listen ${NSB} 12345 tcp run_cmd nettest -6 -r ${a} -d ${NSA_DEV} log_test_addr ${a} $? 0 "Client, device bind" done @@ -2822,7 +2824,7 @@ ipv6_tcp_novrf() do log_start run_cmd nettest -6 -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd nettest -6 -r ${a} log_test_addr ${a} $? 0 "Global server, local connection" done @@ -2830,7 +2832,7 @@ ipv6_tcp_novrf() a=${NSA_IP6} log_start run_cmd nettest -6 -s -I ${NSA_DEV} -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd nettest -6 -r ${a} -0 ${a} log_test_addr ${a} $? 0 "Device server, unbound client, local connection" @@ -2839,7 +2841,7 @@ ipv6_tcp_novrf() log_start show_hint "Should fail 'Connection refused' since addresses on loopback are out of device scope" run_cmd nettest -6 -s -I ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd nettest -6 -r ${a} log_test_addr ${a} $? 1 "Device server, unbound client, local connection" done @@ -2847,7 +2849,7 @@ ipv6_tcp_novrf() a=${NSA_IP6} log_start run_cmd nettest -6 -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd nettest -6 -r ${a} -d ${NSA_DEV} -0 ${a} log_test_addr ${a} $? 0 "Global server, device client, local connection" @@ -2856,7 +2858,7 @@ ipv6_tcp_novrf() log_start show_hint "Should fail 'Connection refused' since addresses on loopback are out of device scope" run_cmd nettest -6 -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd nettest -6 -r ${a} -d ${NSA_DEV} log_test_addr ${a} $? 1 "Global server, device client, local connection" done @@ -2865,7 +2867,7 @@ ipv6_tcp_novrf() do log_start run_cmd nettest -6 -s -I ${NSA_DEV} -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd nettest -6 -d ${NSA_DEV} -r ${a} log_test_addr ${a} $? 0 "Device server, device client, local conn" done @@ -2898,7 +2900,7 @@ ipv6_tcp_vrf() log_start show_hint "Should fail 'Connection refused' since global server with VRF is disabled" run_cmd nettest -6 -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -6 -r ${a} log_test_addr ${a} $? 1 "Global server" done @@ -2907,7 +2909,7 @@ ipv6_tcp_vrf() do log_start run_cmd nettest -6 -s -I ${VRF} -3 ${VRF} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -6 -r ${a} log_test_addr ${a} $? 0 "VRF server" done @@ -2916,7 +2918,7 @@ ipv6_tcp_vrf() a=${NSA_LINKIP6}%${NSB_DEV} log_start run_cmd nettest -6 -s -I ${VRF} -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -6 -r ${a} log_test_addr ${a} $? 0 "VRF server" @@ -2924,7 +2926,7 @@ ipv6_tcp_vrf() do log_start run_cmd nettest -6 -s -I ${NSA_DEV} -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -6 -r ${a} log_test_addr ${a} $? 0 "Device server" done @@ -2943,7 +2945,7 @@ ipv6_tcp_vrf() log_start show_hint "Should fail 'Connection refused' since global server with VRF is disabled" run_cmd nettest -6 -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd nettest -6 -r ${a} -d ${NSA_DEV} log_test_addr ${a} $? 1 "Global server, local connection" @@ -2964,7 +2966,7 @@ ipv6_tcp_vrf() do log_start run_cmd nettest -6 -s -3 ${VRF} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -6 -r ${a} log_test_addr ${a} $? 0 "Global server" done @@ -2973,7 +2975,7 @@ ipv6_tcp_vrf() do log_start run_cmd nettest -6 -s -I ${VRF} -3 ${VRF} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -6 -r ${a} log_test_addr ${a} $? 0 "VRF server" done @@ -2982,13 +2984,13 @@ ipv6_tcp_vrf() a=${NSA_LINKIP6}%${NSB_DEV} log_start run_cmd nettest -6 -s -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -6 -r ${a} log_test_addr ${a} $? 0 "Global server" log_start run_cmd nettest -6 -s -I ${VRF} -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -6 -r ${a} log_test_addr ${a} $? 0 "VRF server" @@ -2996,7 +2998,7 @@ ipv6_tcp_vrf() do log_start run_cmd nettest -6 -s -I ${NSA_DEV} -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -6 -r ${a} log_test_addr ${a} $? 0 "Device server" done @@ -3016,7 +3018,7 @@ ipv6_tcp_vrf() log_start show_hint "Fails 'Connection refused' since client is not in VRF" run_cmd nettest -6 -s -I ${VRF} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd nettest -6 -r ${a} log_test_addr ${a} $? 1 "Global server, local connection" done @@ -3029,7 +3031,7 @@ ipv6_tcp_vrf() do log_start run_cmd_nsb nettest -6 -s & - sleep 1 + wait_local_port_listen ${NSB} 12345 tcp run_cmd nettest -6 -r ${a} -d ${VRF} log_test_addr ${a} $? 0 "Client, VRF bind" done @@ -3038,7 +3040,7 @@ ipv6_tcp_vrf() log_start show_hint "Fails since VRF device does not allow linklocal addresses" run_cmd_nsb nettest -6 -s & - sleep 1 + wait_local_port_listen ${NSB} 12345 tcp run_cmd nettest -6 -r ${a} -d ${VRF} log_test_addr ${a} $? 1 "Client, VRF bind" @@ -3046,7 +3048,7 @@ ipv6_tcp_vrf() do log_start run_cmd_nsb nettest -6 -s & - sleep 1 + wait_local_port_listen ${NSB} 12345 tcp run_cmd nettest -6 -r ${a} -d ${NSA_DEV} log_test_addr ${a} $? 0 "Client, device bind" done @@ -3071,7 +3073,7 @@ ipv6_tcp_vrf() do log_start run_cmd nettest -6 -s -I ${VRF} -3 ${VRF} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd nettest -6 -r ${a} -d ${VRF} -0 ${a} log_test_addr ${a} $? 0 "VRF server, VRF client, local connection" done @@ -3079,7 +3081,7 @@ ipv6_tcp_vrf() a=${NSA_IP6} log_start run_cmd nettest -6 -s -I ${VRF} -3 ${VRF} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd nettest -6 -r ${a} -d ${NSA_DEV} -0 ${a} log_test_addr ${a} $? 0 "VRF server, device client, local connection" @@ -3087,13 +3089,13 @@ ipv6_tcp_vrf() log_start show_hint "Should fail since unbound client is out of VRF scope" run_cmd nettest -6 -s -I ${VRF} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd nettest -6 -r ${a} log_test_addr ${a} $? 1 "VRF server, unbound client, local connection" log_start run_cmd nettest -6 -s -I ${NSA_DEV} -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd nettest -6 -r ${a} -d ${VRF} -0 ${a} log_test_addr ${a} $? 0 "Device server, VRF client, local connection" @@ -3101,7 +3103,7 @@ ipv6_tcp_vrf() do log_start run_cmd nettest -6 -s -I ${NSA_DEV} -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd nettest -6 -r ${a} -d ${NSA_DEV} -0 ${a} log_test_addr ${a} $? 0 "Device server, device client, local connection" done @@ -3141,13 +3143,13 @@ ipv6_udp_novrf() do log_start run_cmd nettest -6 -D -s -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd_nsb nettest -6 -D -r ${a} log_test_addr ${a} $? 0 "Global server" log_start run_cmd nettest -6 -D -I ${NSA_DEV} -s -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd_nsb nettest -6 -D -r ${a} log_test_addr ${a} $? 0 "Device server" done @@ -3155,7 +3157,7 @@ ipv6_udp_novrf() a=${NSA_LO_IP6} log_start run_cmd nettest -6 -D -s -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd_nsb nettest -6 -D -r ${a} log_test_addr ${a} $? 0 "Global server" @@ -3165,7 +3167,7 @@ ipv6_udp_novrf() #log_start #show_hint "Should fail since loopback address is out of scope" #run_cmd nettest -6 -D -I ${NSA_DEV} -s -3 ${NSA_DEV} & - #sleep 1 + wait_local_port_listen ${NSA} 12345 udp #run_cmd_nsb nettest -6 -D -r ${a} #log_test_addr ${a} $? 1 "Device server" @@ -3185,25 +3187,25 @@ ipv6_udp_novrf() do log_start run_cmd_nsb nettest -6 -D -s & - sleep 1 + wait_local_port_listen ${NSB} 12345 udp run_cmd nettest -6 -D -r ${a} -0 ${NSA_IP6} log_test_addr ${a} $? 0 "Client" log_start run_cmd_nsb nettest -6 -D -s & - sleep 1 + wait_local_port_listen ${NSB} 12345 udp run_cmd nettest -6 -D -r ${a} -d ${NSA_DEV} -0 ${NSA_IP6} log_test_addr ${a} $? 0 "Client, device bind" log_start run_cmd_nsb nettest -6 -D -s & - sleep 1 + wait_local_port_listen ${NSB} 12345 udp run_cmd nettest -6 -D -r ${a} -d ${NSA_DEV} -C -0 ${NSA_IP6} log_test_addr ${a} $? 0 "Client, device send via cmsg" log_start run_cmd_nsb nettest -6 -D -s & - sleep 1 + wait_local_port_listen ${NSB} 12345 udp run_cmd nettest -6 -D -r ${a} -d ${NSA_DEV} -S -0 ${NSA_IP6} log_test_addr ${a} $? 0 "Client, device bind via IPV6_UNICAST_IF" @@ -3225,7 +3227,7 @@ ipv6_udp_novrf() do log_start run_cmd nettest -6 -D -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -6 -D -r ${a} -0 ${a} -1 ${a} log_test_addr ${a} $? 0 "Global server, local connection" done @@ -3233,7 +3235,7 @@ ipv6_udp_novrf() a=${NSA_IP6} log_start run_cmd nettest -6 -s -D -I ${NSA_DEV} -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -6 -D -r ${a} log_test_addr ${a} $? 0 "Device server, unbound client, local connection" @@ -3242,7 +3244,7 @@ ipv6_udp_novrf() log_start show_hint "Should fail 'Connection refused' since address is out of device scope" run_cmd nettest -6 -s -D -I ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -6 -D -r ${a} log_test_addr ${a} $? 1 "Device server, local connection" done @@ -3250,19 +3252,19 @@ ipv6_udp_novrf() a=${NSA_IP6} log_start run_cmd nettest -6 -s -D & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -6 -D -d ${NSA_DEV} -r ${a} log_test_addr ${a} $? 0 "Global server, device client, local connection" log_start run_cmd nettest -6 -s -D & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -6 -D -d ${NSA_DEV} -C -r ${a} log_test_addr ${a} $? 0 "Global server, device send via cmsg, local connection" log_start run_cmd nettest -6 -s -D & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -6 -D -d ${NSA_DEV} -S -r ${a} log_test_addr ${a} $? 0 "Global server, device client via IPV6_UNICAST_IF, local connection" @@ -3271,28 +3273,28 @@ ipv6_udp_novrf() log_start show_hint "Should fail 'No route to host' since addresses on loopback are out of device scope" run_cmd nettest -6 -D -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -6 -D -r ${a} -d ${NSA_DEV} log_test_addr ${a} $? 1 "Global server, device client, local connection" log_start show_hint "Should fail 'No route to host' since addresses on loopback are out of device scope" run_cmd nettest -6 -D -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -6 -D -r ${a} -d ${NSA_DEV} -C log_test_addr ${a} $? 1 "Global server, device send via cmsg, local connection" log_start show_hint "Should fail 'No route to host' since addresses on loopback are out of device scope" run_cmd nettest -6 -D -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -6 -D -r ${a} -d ${NSA_DEV} -S log_test_addr ${a} $? 1 "Global server, device client via IP_UNICAST_IF, local connection" log_start show_hint "Should fail 'No route to host' since addresses on loopback are out of device scope" run_cmd nettest -6 -D -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -6 -D -r ${a} -d ${NSA_DEV} -S -U log_test_addr ${a} $? 1 "Global server, device client via IP_UNICAST_IF, local connection, with connect()" done @@ -3300,7 +3302,7 @@ ipv6_udp_novrf() a=${NSA_IP6} log_start run_cmd nettest -6 -D -s -I ${NSA_DEV} -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -6 -D -d ${NSA_DEV} -r ${a} -0 ${a} log_test_addr ${a} $? 0 "Device server, device client, local conn" @@ -3314,7 +3316,7 @@ ipv6_udp_novrf() run_cmd_nsb ip -6 ro add ${NSA_IP6}/128 dev ${NSB_DEV} log_start run_cmd nettest -6 -s -D & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd_nsb nettest -6 -D -r ${NSA_IP6} log_test $? 0 "UDP in - LLA to GUA" @@ -3338,7 +3340,7 @@ ipv6_udp_vrf() log_start show_hint "Should fail 'Connection refused' since global server is disabled" run_cmd nettest -6 -D -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd_nsb nettest -6 -D -r ${a} log_test_addr ${a} $? 1 "Global server" done @@ -3347,7 +3349,7 @@ ipv6_udp_vrf() do log_start run_cmd nettest -6 -D -I ${VRF} -s -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd_nsb nettest -6 -D -r ${a} log_test_addr ${a} $? 0 "VRF server" done @@ -3356,7 +3358,7 @@ ipv6_udp_vrf() do log_start run_cmd nettest -6 -D -I ${NSA_DEV} -s -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd_nsb nettest -6 -D -r ${a} log_test_addr ${a} $? 0 "Enslaved device server" done @@ -3378,7 +3380,7 @@ ipv6_udp_vrf() log_start show_hint "Should fail 'Connection refused' since global server is disabled" run_cmd nettest -6 -D -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -6 -D -d ${VRF} -r ${a} log_test_addr ${a} $? 1 "Global server, VRF client, local conn" done @@ -3387,7 +3389,7 @@ ipv6_udp_vrf() do log_start run_cmd nettest -6 -D -I ${VRF} -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -6 -D -d ${VRF} -r ${a} log_test_addr ${a} $? 0 "VRF server, VRF client, local conn" done @@ -3396,25 +3398,25 @@ ipv6_udp_vrf() log_start show_hint "Should fail 'Connection refused' since global server is disabled" run_cmd nettest -6 -D -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -6 -D -d ${NSA_DEV} -r ${a} log_test_addr ${a} $? 1 "Global server, device client, local conn" log_start run_cmd nettest -6 -D -I ${VRF} -s -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -6 -D -d ${NSA_DEV} -r ${a} log_test_addr ${a} $? 0 "VRF server, device client, local conn" log_start run_cmd nettest -6 -D -I ${NSA_DEV} -s -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -6 -D -d ${VRF} -r ${a} log_test_addr ${a} $? 0 "Enslaved device server, VRF client, local conn" log_start run_cmd nettest -6 -D -I ${NSA_DEV} -s -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -6 -D -d ${NSA_DEV} -r ${a} log_test_addr ${a} $? 0 "Enslaved device server, device client, local conn" @@ -3429,7 +3431,7 @@ ipv6_udp_vrf() do log_start run_cmd nettest -6 -D -s -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd_nsb nettest -6 -D -r ${a} log_test_addr ${a} $? 0 "Global server" done @@ -3438,7 +3440,7 @@ ipv6_udp_vrf() do log_start run_cmd nettest -6 -D -I ${VRF} -s -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd_nsb nettest -6 -D -r ${a} log_test_addr ${a} $? 0 "VRF server" done @@ -3447,7 +3449,7 @@ ipv6_udp_vrf() do log_start run_cmd nettest -6 -D -I ${NSA_DEV} -s -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd_nsb nettest -6 -D -r ${a} log_test_addr ${a} $? 0 "Enslaved device server" done @@ -3465,7 +3467,7 @@ ipv6_udp_vrf() # log_start run_cmd_nsb nettest -6 -D -s & - sleep 1 + wait_local_port_listen ${NSB} 12345 udp run_cmd nettest -6 -D -d ${VRF} -r ${NSB_IP6} log_test $? 0 "VRF client" @@ -3476,7 +3478,7 @@ ipv6_udp_vrf() log_start run_cmd_nsb nettest -6 -D -s & - sleep 1 + wait_local_port_listen ${NSB} 12345 udp run_cmd nettest -6 -D -d ${NSA_DEV} -r ${NSB_IP6} log_test $? 0 "Enslaved device client" @@ -3491,13 +3493,13 @@ ipv6_udp_vrf() a=${NSA_IP6} log_start run_cmd nettest -6 -D -s -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -6 -D -d ${VRF} -r ${a} log_test_addr ${a} $? 0 "Global server, VRF client, local conn" #log_start run_cmd nettest -6 -D -I ${VRF} -s -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -6 -D -d ${VRF} -r ${a} log_test_addr ${a} $? 0 "VRF server, VRF client, local conn" @@ -3505,13 +3507,13 @@ ipv6_udp_vrf() a=${VRF_IP6} log_start run_cmd nettest -6 -D -s -3 ${VRF} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -6 -D -d ${VRF} -r ${a} log_test_addr ${a} $? 0 "Global server, VRF client, local conn" log_start run_cmd nettest -6 -D -I ${VRF} -s -3 ${VRF} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -6 -D -d ${VRF} -r ${a} log_test_addr ${a} $? 0 "VRF server, VRF client, local conn" @@ -3527,25 +3529,25 @@ ipv6_udp_vrf() a=${NSA_IP6} log_start run_cmd nettest -6 -D -s -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -6 -D -d ${NSA_DEV} -r ${a} log_test_addr ${a} $? 0 "Global server, device client, local conn" log_start run_cmd nettest -6 -D -I ${VRF} -s -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -6 -D -d ${NSA_DEV} -r ${a} log_test_addr ${a} $? 0 "VRF server, device client, local conn" log_start run_cmd nettest -6 -D -I ${NSA_DEV} -s -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -6 -D -d ${VRF} -r ${a} log_test_addr ${a} $? 0 "Device server, VRF client, local conn" log_start run_cmd nettest -6 -D -I ${NSA_DEV} -s -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -6 -D -d ${NSA_DEV} -r ${a} log_test_addr ${a} $? 0 "Device server, device client, local conn" @@ -3557,7 +3559,7 @@ ipv6_udp_vrf() # link local addresses log_start run_cmd nettest -6 -D -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd_nsb nettest -6 -D -d ${NSB_DEV} -r ${NSA_LINKIP6} log_test $? 0 "Global server, linklocal IP" @@ -3568,7 +3570,7 @@ ipv6_udp_vrf() log_start run_cmd_nsb nettest -6 -D -s & - sleep 1 + wait_local_port_listen ${NSB} 12345 udp run_cmd nettest -6 -D -d ${NSA_DEV} -r ${NSB_LINKIP6} log_test $? 0 "Enslaved device client, linklocal IP" @@ -3579,7 +3581,7 @@ ipv6_udp_vrf() log_start run_cmd nettest -6 -D -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -6 -D -d ${NSA_DEV} -r ${NSA_LINKIP6} log_test $? 0 "Enslaved device client, local conn - linklocal IP" @@ -3592,7 +3594,7 @@ ipv6_udp_vrf() run_cmd_nsb ip -6 ro add ${NSA_IP6}/128 dev ${NSB_DEV} log_start run_cmd nettest -6 -s -D & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd_nsb nettest -6 -D -r ${NSA_IP6} log_test $? 0 "UDP in - LLA to GUA" @@ -3771,7 +3773,7 @@ ipv6_rt() do log_start run_cmd nettest ${varg} -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest ${varg} -r ${a} & sleep 3 run_cmd ip link del ${VRF} @@ -3785,7 +3787,7 @@ ipv6_rt() do log_start run_cmd nettest ${varg} -I ${VRF} -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest ${varg} -r ${a} & sleep 3 run_cmd ip link del ${VRF} @@ -3799,7 +3801,7 @@ ipv6_rt() do log_start run_cmd nettest ${varg} -I ${NSA_DEV} -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest ${varg} -r ${a} & sleep 3 run_cmd ip link del ${VRF} @@ -3814,7 +3816,7 @@ ipv6_rt() # log_start run_cmd_nsb nettest ${varg} -s & - sleep 1 + wait_local_port_listen ${NSB} 12345 tcp run_cmd nettest ${varg} -d ${VRF} -r ${NSB_IP6} & sleep 3 run_cmd ip link del ${VRF} @@ -3825,7 +3827,7 @@ ipv6_rt() log_start run_cmd_nsb nettest ${varg} -s & - sleep 1 + wait_local_port_listen ${NSB} 12345 tcp run_cmd nettest ${varg} -d ${NSA_DEV} -r ${NSB_IP6} & sleep 3 run_cmd ip link del ${VRF} @@ -3842,7 +3844,7 @@ ipv6_rt() do log_start run_cmd nettest ${varg} -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd nettest ${varg} -d ${VRF} -r ${a} & sleep 3 run_cmd ip link del ${VRF} @@ -3856,7 +3858,7 @@ ipv6_rt() do log_start run_cmd nettest ${varg} -I ${VRF} -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd nettest ${varg} -d ${VRF} -r ${a} & sleep 3 run_cmd ip link del ${VRF} @@ -3869,7 +3871,7 @@ ipv6_rt() a=${NSA_IP6} log_start run_cmd nettest ${varg} -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd nettest ${varg} -d ${NSA_DEV} -r ${a} & sleep 3 run_cmd ip link del ${VRF} @@ -3880,7 +3882,7 @@ ipv6_rt() log_start run_cmd nettest ${varg} -I ${VRF} -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd nettest ${varg} -d ${NSA_DEV} -r ${a} & sleep 3 run_cmd ip link del ${VRF} @@ -3891,7 +3893,7 @@ ipv6_rt() log_start run_cmd nettest ${varg} -I ${NSA_DEV} -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd nettest ${varg} -d ${NSA_DEV} -r ${a} & sleep 3 run_cmd ip link del ${VRF} @@ -3950,7 +3952,7 @@ netfilter_tcp_reset() do log_start run_cmd nettest -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -r ${a} log_test_addr ${a} $? 1 "Global server, reject with TCP-reset on Rx" done @@ -3968,7 +3970,7 @@ netfilter_icmp() do log_start run_cmd nettest ${arg} -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest ${arg} -r ${a} log_test_addr ${a} $? 1 "Global ${stype} server, Rx reject icmp-port-unreach" done @@ -4007,7 +4009,7 @@ netfilter_tcp6_reset() do log_start run_cmd nettest -6 -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -6 -r ${a} log_test_addr ${a} $? 1 "Global server, reject with TCP-reset on Rx" done @@ -4025,7 +4027,7 @@ netfilter_icmp6() do log_start run_cmd nettest -6 -s ${arg} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -6 ${arg} -r ${a} log_test_addr ${a} $? 1 "Global ${stype} server, Rx reject icmp-port-unreach" done @@ -4221,12 +4223,12 @@ use_case_snat_on_vrf() run_cmd ip6tables -t nat -A POSTROUTING -p tcp -m tcp --dport ${port} -j SNAT --to-source ${NSA_LO_IP6} -o ${VRF} run_cmd_nsb nettest -s -l ${NSB_IP} -p ${port} & - sleep 1 + wait_local_port_listen ${NSB} ${port} tcp run_cmd nettest -d ${VRF} -r ${NSB_IP} -p ${port} log_test $? 0 "IPv4 TCP connection over VRF with SNAT" run_cmd_nsb nettest -6 -s -l ${NSB_IP6} -p ${port} & - sleep 1 + wait_local_port_listen ${NSB} ${port} tcp run_cmd nettest -6 -d ${VRF} -r ${NSB_IP6} -p ${port} log_test $? 0 "IPv6 TCP connection over VRF with SNAT" @@ -4272,6 +4274,7 @@ EOF TESTS_IPV4="ipv4_ping ipv4_tcp ipv4_udp ipv4_bind ipv4_runtime ipv4_netfilter" TESTS_IPV6="ipv6_ping ipv6_tcp ipv6_udp ipv6_bind ipv6_runtime ipv6_netfilter" TESTS_OTHER="use_cases" +# note: each TEST_ group needs a dedicated runner, e.g. fcnal-ipv4.sh PAUSE_ON_FAIL=no PAUSE=no @@ -4302,6 +4305,8 @@ elif [ "$TESTS" = "ipv4" ]; then TESTS="$TESTS_IPV4" elif [ "$TESTS" = "ipv6" ]; then TESTS="$TESTS_IPV6" +elif [ "$TESTS" = "other" ]; then + TESTS="$TESTS_OTHER" fi check_gen_prog "nettest" diff --git a/tools/testing/selftests/net/fdb_notify.sh b/tools/testing/selftests/net/fdb_notify.sh index c159230c9b62..0b8a2465dd04 100755 --- a/tools/testing/selftests/net/fdb_notify.sh +++ b/tools/testing/selftests/net/fdb_notify.sh @@ -40,16 +40,16 @@ do_test_dup() test_dup_bridge() { - ip_link_add br up type bridge vlan_filtering 1 + adf_ip_link_add br up type bridge vlan_filtering 1 do_test_dup add "bridge" dev br self do_test_dup del "bridge" dev br self } test_dup_vxlan_self() { - ip_link_add br up type bridge vlan_filtering 1 - ip_link_add vx up type vxlan id 2000 dstport 4789 - ip_link_set_master vx br + adf_ip_link_add br up type bridge vlan_filtering 1 + adf_ip_link_add vx up type vxlan id 2000 dstport 4789 + adf_ip_link_set_master vx br do_test_dup add "vxlan" dev vx self dst 192.0.2.1 do_test_dup del "vxlan" dev vx self dst 192.0.2.1 @@ -57,9 +57,9 @@ test_dup_vxlan_self() test_dup_vxlan_master() { - ip_link_add br up type bridge vlan_filtering 1 - ip_link_add vx up type vxlan id 2000 dstport 4789 - ip_link_set_master vx br + adf_ip_link_add br up type bridge vlan_filtering 1 + adf_ip_link_add vx up type vxlan id 2000 dstport 4789 + adf_ip_link_set_master vx br do_test_dup add "vxlan master" dev vx master do_test_dup del "vxlan master" dev vx master @@ -67,8 +67,8 @@ test_dup_vxlan_master() test_dup_macvlan_self() { - ip_link_add dd up type dummy - ip_link_add mv up link dd type macvlan mode passthru + adf_ip_link_add dd up type dummy + adf_ip_link_add mv up link dd type macvlan mode passthru do_test_dup add "macvlan self" dev mv self do_test_dup del "macvlan self" dev mv self @@ -76,10 +76,10 @@ test_dup_macvlan_self() test_dup_macvlan_master() { - ip_link_add br up type bridge vlan_filtering 1 - ip_link_add dd up type dummy - ip_link_add mv up link dd type macvlan mode passthru - ip_link_set_master mv br + adf_ip_link_add br up type bridge vlan_filtering 1 + adf_ip_link_add dd up type dummy + adf_ip_link_add mv up link dd type macvlan mode passthru + adf_ip_link_set_master mv br do_test_dup add "macvlan master" dev mv self do_test_dup del "macvlan master" dev mv self diff --git a/tools/testing/selftests/net/fib_nexthops.sh b/tools/testing/selftests/net/fib_nexthops.sh index b39f748c2572..2b0a90581e2f 100755 --- a/tools/testing/selftests/net/fib_nexthops.sh +++ b/tools/testing/selftests/net/fib_nexthops.sh @@ -467,8 +467,8 @@ ipv6_fdb_grp_fcnal() log_test $? 0 "Get Fdb nexthop group by id" # fdb nexthop group can only contain fdb nexthops - run_cmd "$IP nexthop add id 63 via 2001:db8:91::4" - run_cmd "$IP nexthop add id 64 via 2001:db8:91::5" + run_cmd "$IP nexthop add id 63 via 2001:db8:91::4 dev veth1" + run_cmd "$IP nexthop add id 64 via 2001:db8:91::5 dev veth1" run_cmd "$IP nexthop add id 103 group 63/64 fdb" log_test $? 2 "Fdb Nexthop group with non-fdb nexthops" @@ -494,6 +494,26 @@ ipv6_fdb_grp_fcnal() run_cmd "$IP nexthop add id 69 encap mpls 101 via 2001:db8:91::8 dev veth1 fdb" log_test $? 2 "Fdb Nexthop with encap" + # Replace FDB nexthop to non-FDB and vice versa + run_cmd "$IP nexthop add id 70 via 2001:db8:91::2 fdb" + run_cmd "$IP nexthop replace id 70 via 2001:db8:91::2 dev veth1" + log_test $? 0 "Replace FDB nexthop to non-FDB nexthop" + run_cmd "$IP nexthop replace id 70 via 2001:db8:91::2 fdb" + log_test $? 0 "Replace non-FDB nexthop to FDB nexthop" + + # Replace FDB nexthop address while in a group + run_cmd "$IP nexthop add id 71 group 70 fdb" + run_cmd "$IP nexthop replace id 70 via 2001:db8:91::3 fdb" + log_test $? 0 "Replace FDB nexthop address while in a group" + + # Cannot replace FDB nexthop to non-FDB and vice versa while in a group + run_cmd "$IP nexthop replace id 70 via 2001:db8:91::2 dev veth1" + log_test $? 2 "Replace FDB nexthop to non-FDB nexthop while in a group" + run_cmd "$IP nexthop add id 72 via 2001:db8:91::2 dev veth1" + run_cmd "$IP nexthop add id 73 group 72" + run_cmd "$IP nexthop replace id 72 via 2001:db8:91::2 fdb" + log_test $? 2 "Replace non-FDB nexthop to FDB nexthop while in a group" + run_cmd "$IP link add name vx10 type vxlan id 1010 local 2001:db8:91::9 remote 2001:db8:91::10 dstport 4789 nolearning noudpcsum tos inherit ttl 100" run_cmd "$BRIDGE fdb add 02:02:00:00:00:13 dev vx10 nhid 102 self" log_test $? 0 "Fdb mac add with nexthop group" @@ -547,15 +567,15 @@ ipv4_fdb_grp_fcnal() log_test $? 0 "Get Fdb nexthop group by id" # fdb nexthop group can only contain fdb nexthops - run_cmd "$IP nexthop add id 14 via 172.16.1.2" - run_cmd "$IP nexthop add id 15 via 172.16.1.3" + run_cmd "$IP nexthop add id 14 via 172.16.1.2 dev veth1" + run_cmd "$IP nexthop add id 15 via 172.16.1.3 dev veth1" run_cmd "$IP nexthop add id 103 group 14/15 fdb" log_test $? 2 "Fdb Nexthop group with non-fdb nexthops" # Non fdb nexthop group can not contain fdb nexthops run_cmd "$IP nexthop add id 16 via 172.16.1.2 fdb" run_cmd "$IP nexthop add id 17 via 172.16.1.3 fdb" - run_cmd "$IP nexthop add id 104 group 14/15" + run_cmd "$IP nexthop add id 104 group 16/17" log_test $? 2 "Non-Fdb Nexthop group with fdb nexthops" # fdb nexthop cannot have blackhole @@ -574,6 +594,26 @@ ipv4_fdb_grp_fcnal() run_cmd "$IP nexthop add id 17 encap mpls 101 via 172.16.1.2 dev veth1 fdb" log_test $? 2 "Fdb Nexthop with encap" + # Replace FDB nexthop to non-FDB and vice versa + run_cmd "$IP nexthop add id 18 via 172.16.1.2 fdb" + run_cmd "$IP nexthop replace id 18 via 172.16.1.2 dev veth1" + log_test $? 0 "Replace FDB nexthop to non-FDB nexthop" + run_cmd "$IP nexthop replace id 18 via 172.16.1.2 fdb" + log_test $? 0 "Replace non-FDB nexthop to FDB nexthop" + + # Replace FDB nexthop address while in a group + run_cmd "$IP nexthop add id 19 group 18 fdb" + run_cmd "$IP nexthop replace id 18 via 172.16.1.3 fdb" + log_test $? 0 "Replace FDB nexthop address while in a group" + + # Cannot replace FDB nexthop to non-FDB and vice versa while in a group + run_cmd "$IP nexthop replace id 18 via 172.16.1.2 dev veth1" + log_test $? 2 "Replace FDB nexthop to non-FDB nexthop while in a group" + run_cmd "$IP nexthop add id 20 via 172.16.1.2 dev veth1" + run_cmd "$IP nexthop add id 21 group 20" + run_cmd "$IP nexthop replace id 20 via 172.16.1.2 fdb" + log_test $? 2 "Replace non-FDB nexthop to FDB nexthop while in a group" + run_cmd "$IP link add name vx10 type vxlan id 1010 local 10.0.0.1 remote 10.0.0.2 dstport 4789 nolearning noudpcsum tos inherit ttl 100" run_cmd "$BRIDGE fdb add 02:02:00:00:00:13 dev vx10 nhid 102 self" log_test $? 0 "Fdb mac add with nexthop group" @@ -582,7 +622,7 @@ ipv4_fdb_grp_fcnal() run_cmd "$BRIDGE fdb add 02:02:00:00:00:14 dev vx10 nhid 12 self" log_test $? 255 "Fdb mac add with nexthop" - run_cmd "$IP ro add 172.16.0.0/22 nhid 15" + run_cmd "$IP ro add 172.16.0.0/22 nhid 16" log_test $? 2 "Route add with fdb nexthop" run_cmd "$IP ro add 172.16.0.0/22 nhid 103" diff --git a/tools/testing/selftests/net/forwarding/Makefile b/tools/testing/selftests/net/forwarding/Makefile index d7bb2e80e88c..e6f482a600da 100644 --- a/tools/testing/selftests/net/forwarding/Makefile +++ b/tools/testing/selftests/net/forwarding/Makefile @@ -1,8 +1,11 @@ # SPDX-License-Identifier: GPL-2.0+ OR MIT -TEST_PROGS = bridge_fdb_learning_limit.sh \ +TEST_PROGS = \ + bridge_activity_notify.sh \ + bridge_fdb_learning_limit.sh \ bridge_igmp.sh \ bridge_locked_port.sh \ + bridge_fdb_local_vlan_0.sh \ bridge_mdb.sh \ bridge_mdb_host.sh \ bridge_mdb_max.sh \ diff --git a/tools/testing/selftests/net/forwarding/README b/tools/testing/selftests/net/forwarding/README index 7b41cff993ad..392a5a91ed37 100644 --- a/tools/testing/selftests/net/forwarding/README +++ b/tools/testing/selftests/net/forwarding/README @@ -57,6 +57,21 @@ o Code shall be checked using ShellCheck [1] prior to submission. 1. https://www.shellcheck.net/ +Cleanups +-------- + +o lib.sh brings in defer.sh (by way of ../lib.sh) by default. Consider + making use of the defer primitive to schedule automatic cleanups. This + makes it harder to forget to remove a temporary netdevice, kill a running + process or perform other cleanup when the test script is interrupted. + +o When adding a helper that dirties the environment, but schedules all + necessary cleanups through defer, consider prefixing it adf_ for + consistency with lib.sh and ../lib.sh helpers. This serves as an + immediately visible bit of documentation about the helper API. + +o Definitely do the above for any new code in lib.sh, if practical. + Customization ============= diff --git a/tools/testing/selftests/net/forwarding/bridge_activity_notify.sh b/tools/testing/selftests/net/forwarding/bridge_activity_notify.sh new file mode 100755 index 000000000000..522a5b1b046c --- /dev/null +++ b/tools/testing/selftests/net/forwarding/bridge_activity_notify.sh @@ -0,0 +1,170 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# +-----------------------+ +------------------------+ +# | H1 (vrf) | | H2 (vrf) | +# | 192.0.2.1/28 | | 192.0.2.2/28 | +# | + $h1 | | + $h2 | +# +----|------------------+ +----|-------------------+ +# | | +# +----|--------------------------------------------------|-------------------+ +# | SW | | | +# | +--|--------------------------------------------------|-----------------+ | +# | | + $swp1 BR1 (802.1d) + $swp2 | | +# | | | | +# | +-----------------------------------------------------------------------+ | +# +---------------------------------------------------------------------------+ + +ALL_TESTS=" + new_inactive_test + existing_active_test + norefresh_test +" + +NUM_NETIFS=4 +source lib.sh + +h1_create() +{ + adf_simple_if_init "$h1" 192.0.2.1/28 +} + +h2_create() +{ + adf_simple_if_init "$h2" 192.0.2.2/28 +} + +switch_create() +{ + adf_ip_link_add br1 type bridge vlan_filtering 0 mcast_snooping 0 \ + ageing_time "$LOW_AGEING_TIME" + adf_ip_link_set_up br1 + + adf_ip_link_set_master "$swp1" br1 + adf_ip_link_set_up "$swp1" + + adf_ip_link_set_master "$swp2" br1 + adf_ip_link_set_up "$swp2" +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + swp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + adf_vrf_prepare + + h1_create + h2_create + switch_create +} + +fdb_active_wait() +{ + local mac=$1; shift + + bridge -d fdb get "$mac" br br1 | grep -q -v "inactive" +} + +fdb_inactive_wait() +{ + local mac=$1; shift + + bridge -d fdb get "$mac" br br1 | grep -q "inactive" +} + +new_inactive_test() +{ + local mac="00:11:22:33:44:55" + + # Add a new FDB entry as static and inactive and check that it + # becomes active upon traffic. + RET=0 + + bridge fdb add "$mac" dev "$swp1" master static activity_notify inactive + bridge -d fdb get "$mac" br br1 | grep -q "inactive" + check_err $? "FDB entry not present as \"inactive\" when should" + + $MZ "$h1" -c 1 -p 64 -a "$mac" -b bcast -t ip -q + + busywait "$BUSYWAIT_TIMEOUT" fdb_active_wait "$mac" + check_err $? "FDB entry present as \"inactive\" when should not" + + log_test "Transition from inactive to active" + + bridge fdb del "$mac" dev "$swp1" master +} + +existing_active_test() +{ + local mac="00:11:22:33:44:55" + local ageing_time + + # Enable activity notifications on an existing dynamic FDB entry and + # check that it becomes inactive after the ageing time passed. + RET=0 + + bridge fdb add "$mac" dev "$swp1" master dynamic + bridge fdb replace "$mac" dev "$swp1" master static activity_notify norefresh + + bridge -d fdb get "$mac" br br1 | grep -q "activity_notify" + check_err $? "FDB entry not present as \"activity_notify\" when should" + + bridge -d fdb get "$mac" br br1 | grep -q "inactive" + check_fail $? "FDB entry present as \"inactive\" when should not" + + ageing_time=$(bridge_ageing_time_get br1) + slowwait $((ageing_time * 2)) fdb_inactive_wait "$mac" + check_err $? "FDB entry not present as \"inactive\" when should" + + log_test "Transition from active to inactive" + + bridge fdb del "$mac" dev "$swp1" master +} + +norefresh_test() +{ + local mac="00:11:22:33:44:55" + local updated_time + + # Check that the "updated" time is reset when replacing an FDB entry + # without the "norefresh" keyword and that it is not reset when + # replacing with the "norefresh" keyword. + RET=0 + + bridge fdb add "$mac" dev "$swp1" master static + sleep 1 + + bridge fdb replace "$mac" dev "$swp1" master static activity_notify + updated_time=$(bridge -d -s -j fdb get "$mac" br br1 | jq '.[]["updated"]') + if [[ $updated_time -ne 0 ]]; then + check_err 1 "\"updated\" time was not reset when should" + fi + + sleep 1 + bridge fdb replace "$mac" dev "$swp1" master static norefresh + updated_time=$(bridge -d -s -j fdb get "$mac" br br1 | jq '.[]["updated"]') + if [[ $updated_time -eq 0 ]]; then + check_err 1 "\"updated\" time was reset when should not" + fi + + log_test "Resetting of \"updated\" time" + + bridge fdb del "$mac" dev "$swp1" master +} + +if ! bridge fdb help 2>&1 | grep -q "activity_notify"; then + echo "SKIP: iproute2 too old, missing bridge FDB activity notification control" + exit "$ksft_skip" +fi + +trap cleanup EXIT + +setup_prepare +setup_wait +tests_run + +exit "$EXIT_STATUS" diff --git a/tools/testing/selftests/net/forwarding/bridge_fdb_local_vlan_0.sh b/tools/testing/selftests/net/forwarding/bridge_fdb_local_vlan_0.sh new file mode 100755 index 000000000000..694de8ba97e4 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/bridge_fdb_local_vlan_0.sh @@ -0,0 +1,387 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# +-----------------------+ +-----------------------+ +-----------------------+ +# | H1 (vrf) | | H2 (vrf) | | H3 (vrf) | +# | + $h1 | | + $h2 | | + $h3 | +# | | 192.0.2.1/28 | | | 192.0.2.2/28 | | | 192.0.2.18/28 | +# | | 2001:db8:1::1/64 | | | 2001:db8:1::2/64 | | | 2001:db8:2::2/64 | +# | | | | | | | | | +# +----|------------------+ +----|------------------+ +----|------------------+ +# | | | +# +----|-------------------------|-------------------------|------------------+ +# | +--|-------------------------|------------------+ | | +# | | + $swp1 + $swp2 | + $swp3 | +# | | | 192.0.2.17/28 | +# | | BR1 (802.1q) | 2001:db8:2::1/64 | +# | | 192.0.2.3/28 | | +# | | 2001:db8:1::3/64 | | +# | +-----------------------------------------------+ SW | +# +---------------------------------------------------------------------------+ +# +#shellcheck disable=SC2317 # SC doesn't see our uses of functions. +#shellcheck disable=SC2034 # ... and global variables + +ALL_TESTS=" + test_d_no_sharing + test_d_sharing + test_q_no_sharing + test_q_sharing + test_addr_set +" + +NUM_NETIFS=6 +source lib.sh + +pMAC=00:11:22:33:44:55 +bMAC=00:11:22:33:44:66 +mMAC=00:11:22:33:44:77 +xMAC=00:11:22:33:44:88 + +host_create() +{ + local h=$1; shift + local ipv4=$1; shift + local ipv6=$1; shift + + adf_simple_if_init "$h" "$ipv4" "$ipv6" + adf_ip_route_add vrf "v$h" 192.0.2.16/28 nexthop via 192.0.2.3 + adf_ip_route_add vrf "v$h" 2001:db8:2::/64 nexthop via 2001:db8:1::3 +} + +h3_create() +{ + adf_simple_if_init "$h3" 192.0.2.18/28 2001:db8:2::2/64 + adf_ip_route_add vrf "v$h3" 192.0.2.0/28 nexthop via 192.0.2.17 + adf_ip_route_add vrf "v$h3" 2001:db8:1::/64 nexthop via 2001:db8:2::1 + + tc qdisc add dev "$h3" clsact + defer tc qdisc del dev "$h3" clsact + + tc filter add dev "$h3" ingress proto ip pref 104 \ + flower skip_hw ip_proto udp dst_port 4096 \ + action pass + defer tc filter del dev "$h3" ingress proto ip pref 104 + + tc qdisc add dev "$h2" clsact + defer tc qdisc del dev "$h2" clsact + + tc filter add dev "$h2" ingress proto ip pref 104 \ + flower skip_hw ip_proto udp dst_port 4096 \ + action pass + defer tc filter del dev "$h2" ingress proto ip pref 104 +} + +switch_create() +{ + adf_ip_link_set_up "$swp1" + + adf_ip_link_set_up "$swp2" + + adf_ip_addr_add "$swp3" 192.0.2.17/28 + adf_ip_addr_add "$swp3" 2001:db8:2::1/64 + adf_ip_link_set_up "$swp3" +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + swp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + swp3=${NETIFS[p5]} + h3=${NETIFS[p6]} + + adf_vrf_prepare + adf_forwarding_enable + + host_create "$h1" 192.0.2.1/28 2001:db8:1::1/64 + host_create "$h2" 192.0.2.2/28 2001:db8:1::2/64 + h3_create + + switch_create +} + +adf_bridge_configure() +{ + local dev + + adf_ip_addr_add br 192.0.2.3/28 + adf_ip_addr_add br 2001:db8:1::3/64 + + adf_bridge_vlan_add dev br vid 1 pvid untagged self + adf_bridge_vlan_add dev br vid 2 self + adf_bridge_vlan_add dev br vid 3 self + + for dev in "$swp1" "$swp2"; do + adf_ip_link_set_master "$dev" br + adf_bridge_vlan_add dev "$dev" vid 1 pvid untagged + adf_bridge_vlan_add dev "$dev" vid 2 + adf_bridge_vlan_add dev "$dev" vid 3 + done +} + +adf_bridge_create() +{ + local mac + + adf_ip_link_add br up type bridge vlan_default_pvid 0 "$@" + mac=$(mac_get br) + adf_bridge_configure + adf_ip_link_set_addr br "$mac" +} + +check_fdb_local_vlan_0_support() +{ + if adf_ip_link_add XXbr up type bridge vlan_filtering 1 \ + fdb_local_vlan_0 1 &>/dev/null; then + return 0 + fi + + log_test_skip "FDB sharing" \ + "iproute 2 or the kernel do not support fdb_local_vlan_0" +} + +check_mac_presence() +{ + local should_fail=$1; shift + local dev=$1; shift + local vlan=$1; shift + local mac + + mac=$(mac_get "$dev") + + if ((vlan == 0)); then + vlan=null + fi + + bridge -j fdb show dev "$dev" | + jq -e --arg mac "$mac" --argjson vlan "$vlan" \ + '.[] | select(.mac == $mac) | select(.vlan == $vlan)' > /dev/null + check_err_fail "$should_fail" $? "FDB dev $dev vid $vlan addr $mac exists" +} + +do_sharing_test() +{ + local should_fail=$1; shift + local what=$1; shift + local dev + + RET=0 + + for dev in "$swp1" "$swp2" br; do + check_mac_presence 0 "$dev" 0 + check_mac_presence "$should_fail" "$dev" 1 + check_mac_presence "$should_fail" "$dev" 2 + check_mac_presence "$should_fail" "$dev" 3 + done + + log_test "$what" +} + +do_end_to_end_test() +{ + local mac=$1; shift + local what=$1; shift + local probe_dev=${1-$h3}; shift + local expect=${1-10}; shift + + local t0 + local t1 + local dd + + RET=0 + + # In mausezahn, use $dev MAC as the destination MAC. In the MAC sharing + # context, that will cause an FDB miss on VLAN 1 and prompt a second + # lookup in VLAN 0. + + t0=$(tc_rule_stats_get "$probe_dev" 104 ingress) + + $MZ "$h1" -c 10 -p 64 -a own -b "$mac" \ + -A 192.0.2.1 -B 192.0.2.18 -t udp "dp=4096,sp=2048" -q + sleep 1 + + t1=$(tc_rule_stats_get "$probe_dev" 104 ingress) + dd=$((t1 - t0)) + + ((dd == expect)) + check_err $? "Expected $expect packets on $probe_dev got $dd" + + log_test "$what" +} + +do_tests() +{ + local should_fail=$1; shift + local what=$1; shift + local swp1_mac + local br_mac + + swp1_mac=$(mac_get "$swp1") + br_mac=$(mac_get br) + + do_sharing_test "$should_fail" "$what" + do_end_to_end_test "$swp1_mac" "$what: end to end, $swp1 MAC" + do_end_to_end_test "$br_mac" "$what: end to end, br MAC" +} + +bridge_standard() +{ + local vlan_filtering=$1; shift + + if ((vlan_filtering)); then + echo 802.1q + else + echo 802.1d + fi +} + +nonexistent_fdb_test() +{ + local vlan_filtering=$1; shift + local standard + + standard=$(bridge_standard "$vlan_filtering") + + # We expect flooding, so $h2 should get the traffic. + do_end_to_end_test "$xMAC" "$standard: Nonexistent FDB" "$h2" +} + +misleading_fdb_test() +{ + local vlan_filtering=$1; shift + local standard + + standard=$(bridge_standard "$vlan_filtering") + + defer_scope_push + # Add an FDB entry on VLAN 0. The lookup on VLAN-aware bridge + # shouldn't pick this up even with fdb_local_vlan_0 enabled, so + # the traffic should be flooded. This all holds on + # vlan_filtering bridge, on non-vlan_filtering one the FDB entry + # is expected to be found as usual, no flooding takes place. + # + # Adding only on VLAN 0 is a bit tricky, because bridge is + # trying to be nice and interprets the request as if the FDB + # should be added on each VLAN. + + bridge fdb add "$mMAC" dev "$swp1" master + bridge fdb del "$mMAC" dev "$swp1" vlan 1 master + bridge fdb del "$mMAC" dev "$swp1" vlan 2 master + bridge fdb del "$mMAC" dev "$swp1" vlan 3 master + + local expect=$((vlan_filtering ? 10 : 0)) + do_end_to_end_test "$mMAC" \ + "$standard: Lookup of non-local MAC on VLAN 0" \ + "$h2" "$expect" + defer_scope_pop +} + +change_mac() +{ + local dev=$1; shift + local mac=$1; shift + local cur_mac + + cur_mac=$(mac_get "$dev") + + log_info "Change $dev MAC $cur_mac -> $mac" + adf_ip_link_set_addr "$dev" "$mac" + defer log_info "Change $dev MAC back" +} + +do_test_no_sharing() +{ + local vlan_filtering=$1; shift + local standard + + standard=$(bridge_standard "$vlan_filtering") + + adf_bridge_create vlan_filtering "$vlan_filtering" + setup_wait + + do_tests 0 "$standard, no FDB sharing" + + change_mac "$swp1" "$pMAC" + change_mac br "$bMAC" + + do_tests 0 "$standard, no FDB sharing after MAC change" + + in_defer_scope check_fdb_local_vlan_0_support || return + + log_info "Set fdb_local_vlan_0=1" + ip link set dev br type bridge fdb_local_vlan_0 1 + + do_tests 1 "$standard, fdb sharing after toggle" +} + +do_test_sharing() +{ + local vlan_filtering=$1; shift + local standard + + standard=$(bridge_standard "$vlan_filtering") + + in_defer_scope check_fdb_local_vlan_0_support || return + + adf_bridge_create vlan_filtering "$vlan_filtering" fdb_local_vlan_0 1 + setup_wait + + do_tests 1 "$standard, FDB sharing" + + nonexistent_fdb_test "$vlan_filtering" + misleading_fdb_test "$vlan_filtering" + + change_mac "$swp1" "$pMAC" + change_mac br "$bMAC" + + do_tests 1 "$standard, FDB sharing after MAC change" + + log_info "Set fdb_local_vlan_0=0" + ip link set dev br type bridge fdb_local_vlan_0 0 + + do_tests 0 "$standard, No FDB sharing after toggle" +} + +test_d_no_sharing() +{ + do_test_no_sharing 0 +} + +test_d_sharing() +{ + do_test_sharing 0 +} + +test_q_no_sharing() +{ + do_test_no_sharing 1 +} + +test_q_sharing() +{ + do_test_sharing 1 +} + +adf_addr_set_bridge_create() +{ + adf_ip_link_add br up type bridge vlan_filtering 0 + adf_ip_link_set_addr br "$(mac_get br)" + adf_bridge_configure +} + +test_addr_set() +{ + adf_addr_set_bridge_create + setup_wait + + do_end_to_end_test "$(mac_get br)" "NET_ADDR_SET: end to end, br MAC" +} + +trap cleanup EXIT + +setup_prepare +tests_run diff --git a/tools/testing/selftests/net/forwarding/custom_multipath_hash.sh b/tools/testing/selftests/net/forwarding/custom_multipath_hash.sh index 7d531f7091e6..5dbfab0e23e3 100755 --- a/tools/testing/selftests/net/forwarding/custom_multipath_hash.sh +++ b/tools/testing/selftests/net/forwarding/custom_multipath_hash.sh @@ -226,7 +226,7 @@ send_flowlabel() # Generate 16384 echo requests, each with a random flow label. ip vrf exec v$h1 sh -c \ "for _ in {1..16384}; do \ - $PING6 2001:db8:4::2 -F 0 -c 1 -q >/dev/null 2>&1; \ + $PING6 -F 0 -c 1 -q 2001:db8:4::2 >/dev/null 2>&1; \ done" } diff --git a/tools/testing/selftests/net/forwarding/gre_custom_multipath_hash.sh b/tools/testing/selftests/net/forwarding/gre_custom_multipath_hash.sh index dda11a4a9450..b4f17a5bbc61 100755 --- a/tools/testing/selftests/net/forwarding/gre_custom_multipath_hash.sh +++ b/tools/testing/selftests/net/forwarding/gre_custom_multipath_hash.sh @@ -321,7 +321,7 @@ send_flowlabel() # Generate 16384 echo requests, each with a random flow label. ip vrf exec v$h1 sh -c \ "for _ in {1..16384}; do \ - $PING6 2001:db8:2::2 -F 0 -c 1 -q >/dev/null 2>&1; \ + $PING6 -F 0 -c 1 -q 2001:db8:2::2 >/dev/null 2>&1; \ done" } diff --git a/tools/testing/selftests/net/forwarding/ip6_forward_instats_vrf.sh b/tools/testing/selftests/net/forwarding/ip6_forward_instats_vrf.sh index 49fa94b53a1c..25036e38043c 100755 --- a/tools/testing/selftests/net/forwarding/ip6_forward_instats_vrf.sh +++ b/tools/testing/selftests/net/forwarding/ip6_forward_instats_vrf.sh @@ -95,7 +95,7 @@ ipv6_in_too_big_err() # Send too big packets ip vrf exec $vrf_name \ - $PING6 -s 1300 2001:1:2::2 -c 1 -w $PING_TIMEOUT &> /dev/null + $PING6 -s 1300 -c 1 -w $PING_TIMEOUT 2001:1:2::2 &> /dev/null local t1=$(ipv6_stats_get $rtr1 Ip6InTooBigErrors) test "$((t1 - t0))" -ne 0 @@ -131,7 +131,7 @@ ipv6_in_addr_err() # Disable forwarding temporary while sending the packet sysctl -qw net.ipv6.conf.all.forwarding=0 ip vrf exec $vrf_name \ - $PING6 2001:1:2::2 -c 1 -w $PING_TIMEOUT &> /dev/null + $PING6 -c 1 -w $PING_TIMEOUT 2001:1:2::2 &> /dev/null sysctl -qw net.ipv6.conf.all.forwarding=1 local t1=$(ipv6_stats_get $rtr1 Ip6InAddrErrors) @@ -150,7 +150,7 @@ ipv6_in_discard() # Add a policy to discard ip xfrm policy add dst 2001:1:2::2/128 dir fwd action block ip vrf exec $vrf_name \ - $PING6 2001:1:2::2 -c 1 -w $PING_TIMEOUT &> /dev/null + $PING6 -c 1 -w $PING_TIMEOUT 2001:1:2::2 &> /dev/null ip xfrm policy del dst 2001:1:2::2/128 dir fwd local t1=$(ipv6_stats_get $rtr1 Ip6InDiscards) diff --git a/tools/testing/selftests/net/forwarding/ip6gre_custom_multipath_hash.sh b/tools/testing/selftests/net/forwarding/ip6gre_custom_multipath_hash.sh index e28b4a079e52..b24acfa52a3a 100755 --- a/tools/testing/selftests/net/forwarding/ip6gre_custom_multipath_hash.sh +++ b/tools/testing/selftests/net/forwarding/ip6gre_custom_multipath_hash.sh @@ -323,7 +323,7 @@ send_flowlabel() # Generate 16384 echo requests, each with a random flow label. ip vrf exec v$h1 sh -c \ "for _ in {1..16384}; do \ - $PING6 2001:db8:2::2 -F 0 -c 1 -q >/dev/null 2>&1; \ + $PING6 -F 0 -c 1 -q 2001:db8:2::2 >/dev/null 2>&1; \ done" } diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh index 890b3374dacd..a9034f0bb58b 100644 --- a/tools/testing/selftests/net/forwarding/lib.sh +++ b/tools/testing/selftests/net/forwarding/lib.sh @@ -571,30 +571,6 @@ wait_for_dev() fi } -cmd_jq() -{ - local cmd=$1 - local jq_exp=$2 - local jq_opts=$3 - local ret - local output - - output="$($cmd)" - # it the command fails, return error right away - ret=$? - if [[ $ret -ne 0 ]]; then - return $ret - fi - output=$(echo $output | jq -r $jq_opts "$jq_exp") - ret=$? - if [[ $ret -ne 0 ]]; then - return $ret - fi - echo $output - # return success only in case of non-empty output - [ ! -z "$output" ] -} - pre_cleanup() { if [ "${PAUSE_ON_CLEANUP}" = "yes" ]; then @@ -623,6 +599,12 @@ vrf_cleanup() ip -4 rule del pref 32765 } +adf_vrf_prepare() +{ + vrf_prepare + defer vrf_cleanup +} + __last_tb_id=0 declare -A __TB_IDS @@ -735,6 +717,12 @@ simple_if_fini() vrf_destroy $vrf_name } +adf_simple_if_init() +{ + simple_if_init "$@" + defer simple_if_fini "$@" +} + tunnel_create() { local name=$1; shift @@ -1035,6 +1023,12 @@ forwarding_restore() sysctl_restore net.ipv4.conf.all.forwarding } +adf_forwarding_enable() +{ + forwarding_enable + defer forwarding_restore +} + declare -A MTU_ORIG mtu_set() { @@ -1291,8 +1285,8 @@ ping_do() vrf_name=$(master_name_get $if_name) ip vrf exec $vrf_name \ - $PING $args $dip -c $PING_COUNT -i 0.1 \ - -w $PING_TIMEOUT &> /dev/null + $PING $args -c $PING_COUNT -i 0.1 \ + -w $PING_TIMEOUT $dip &> /dev/null } ping_test() @@ -1322,8 +1316,8 @@ ping6_do() vrf_name=$(master_name_get $if_name) ip vrf exec $vrf_name \ - $PING6 $args $dip -c $PING_COUNT -i 0.1 \ - -w $PING_TIMEOUT &> /dev/null + $PING6 $args -c $PING_COUNT -i 0.1 \ + -w $PING_TIMEOUT $dip &> /dev/null } ping6_test() diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q_lag.sh b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q_lag.sh index a20d22d1df36..8d4ae6c952a1 100755 --- a/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q_lag.sh +++ b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q_lag.sh @@ -238,7 +238,7 @@ test_lag_slave() ip neigh flush dev br1 setup_wait_dev $up_dev setup_wait_dev $host_dev - $ARPING -I br1 192.0.2.130 -qfc 1 + $ARPING -I br1 -qfc 1 192.0.2.130 sleep 2 mirror_test vrf-h1 192.0.2.1 192.0.2.18 $host_dev 1 ">= 10" diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_vlan_bridge_1q.sh b/tools/testing/selftests/net/forwarding/mirror_gre_vlan_bridge_1q.sh index 1b902cc579f6..a21c771908b3 100755 --- a/tools/testing/selftests/net/forwarding/mirror_gre_vlan_bridge_1q.sh +++ b/tools/testing/selftests/net/forwarding/mirror_gre_vlan_bridge_1q.sh @@ -196,7 +196,7 @@ test_span_gre_forbidden_egress() bridge vlan add dev $swp3 vid 555 # Re-prime FDB - $ARPING -I br1.555 192.0.2.130 -fqc 1 + $ARPING -I br1.555 -fqc 1 192.0.2.130 sleep 1 quick_test_span_gre_dir $tundev @@ -290,7 +290,7 @@ test_span_gre_fdb_roaming() bridge fdb del dev $swp2 $h3mac vlan 555 master 2>/dev/null # Re-prime FDB - $ARPING -I br1.555 192.0.2.130 -fqc 1 + $ARPING -I br1.555 -fqc 1 192.0.2.130 sleep 1 quick_test_span_gre_dir $tundev diff --git a/tools/testing/selftests/net/forwarding/sch_ets_core.sh b/tools/testing/selftests/net/forwarding/sch_ets_core.sh index 8f9922c695b0..0453210271dc 100644 --- a/tools/testing/selftests/net/forwarding/sch_ets_core.sh +++ b/tools/testing/selftests/net/forwarding/sch_ets_core.sh @@ -165,8 +165,7 @@ h1_create() { local i; - simple_if_init $h1 - defer simple_if_fini $h1 + adf_simple_if_init $h1 mtu_set $h1 9900 defer mtu_restore $h1 @@ -182,8 +181,7 @@ h2_create() { local i - simple_if_init $h2 - defer simple_if_fini $h2 + adf_simple_if_init $h2 mtu_set $h2 9900 defer mtu_restore $h2 @@ -251,8 +249,7 @@ setup_prepare() put=$swp2 hut=$h2 - vrf_prepare - defer vrf_cleanup + adf_vrf_prepare h1_create h2_create diff --git a/tools/testing/selftests/net/forwarding/sch_red.sh b/tools/testing/selftests/net/forwarding/sch_red.sh index af166662b78a..f2a3d9254642 100755 --- a/tools/testing/selftests/net/forwarding/sch_red.sh +++ b/tools/testing/selftests/net/forwarding/sch_red.sh @@ -52,8 +52,7 @@ PKTSZ=1400 h1_create() { - simple_if_init $h1 192.0.2.1/28 - defer simple_if_fini $h1 192.0.2.1/28 + adf_simple_if_init $h1 192.0.2.1/28 mtu_set $h1 10000 defer mtu_restore $h1 @@ -65,8 +64,7 @@ h1_create() h2_create() { - simple_if_init $h2 192.0.2.2/28 - defer simple_if_fini $h2 192.0.2.2/28 + adf_simple_if_init $h2 192.0.2.2/28 mtu_set $h2 10000 defer mtu_restore $h2 @@ -74,8 +72,7 @@ h2_create() h3_create() { - simple_if_init $h3 192.0.2.3/28 - defer simple_if_fini $h3 192.0.2.3/28 + adf_simple_if_init $h3 192.0.2.3/28 mtu_set $h3 10000 defer mtu_restore $h3 @@ -125,8 +122,7 @@ setup_prepare() h3_mac=$(mac_get $h3) - vrf_prepare - defer vrf_cleanup + adf_vrf_prepare h1_create h2_create diff --git a/tools/testing/selftests/net/forwarding/sch_tbf_core.sh b/tools/testing/selftests/net/forwarding/sch_tbf_core.sh index ec309a5086bc..070c17faa9e4 100644 --- a/tools/testing/selftests/net/forwarding/sch_tbf_core.sh +++ b/tools/testing/selftests/net/forwarding/sch_tbf_core.sh @@ -59,8 +59,7 @@ host_create() local dev=$1; shift local host=$1; shift - simple_if_init $dev - defer simple_if_fini $dev + adf_simple_if_init $dev mtu_set $dev 10000 defer mtu_restore $dev @@ -149,8 +148,7 @@ setup_prepare() h2_mac=$(mac_get $h2) - vrf_prepare - defer vrf_cleanup + adf_vrf_prepare h1_create h2_create diff --git a/tools/testing/selftests/net/forwarding/vxlan_bridge_1q_mc_ul.sh b/tools/testing/selftests/net/forwarding/vxlan_bridge_1q_mc_ul.sh index 462db0b603e7..6a570d256e07 100755 --- a/tools/testing/selftests/net/forwarding/vxlan_bridge_1q_mc_ul.sh +++ b/tools/testing/selftests/net/forwarding/vxlan_bridge_1q_mc_ul.sh @@ -119,16 +119,15 @@ source lib.sh h1_create() { - simple_if_init "$h1" - defer simple_if_fini "$h1" + adf_simple_if_init "$h1" - ip_link_add "$h1.10" master "v$h1" link "$h1" type vlan id 10 - ip_link_set_up "$h1.10" - ip_addr_add "$h1.10" 192.0.2.1/28 + adf_ip_link_add "$h1.10" master "v$h1" link "$h1" type vlan id 10 + adf_ip_link_set_up "$h1.10" + adf_ip_addr_add "$h1.10" 192.0.2.1/28 - ip_link_add "$h1.20" master "v$h1" link "$h1" type vlan id 20 - ip_link_set_up "$h1.20" - ip_addr_add "$h1.20" 2001:db8:1::1/64 + adf_ip_link_add "$h1.20" master "v$h1" link "$h1" type vlan id 20 + adf_ip_link_set_up "$h1.20" + adf_ip_addr_add "$h1.20" 2001:db8:1::1/64 } install_capture() @@ -152,51 +151,51 @@ install_capture() h2_create() { # $h2 - ip_link_set_up "$h2" + adf_ip_link_set_up "$h2" # H2 vrf_create "v$h2" defer vrf_destroy "v$h2" - ip_link_set_up "v$h2" + adf_ip_link_set_up "v$h2" # br2 - ip_link_add br2 type bridge vlan_filtering 0 mcast_snooping 0 - ip_link_set_master br2 "v$h2" - ip_link_set_up br2 + adf_ip_link_add br2 type bridge vlan_filtering 0 mcast_snooping 0 + adf_ip_link_set_master br2 "v$h2" + adf_ip_link_set_up br2 # $h2 - ip_link_set_master "$h2" br2 + adf_ip_link_set_master "$h2" br2 install_capture "$h2" # v1$h2 - ip_link_set_up "v1$h2" - ip_link_set_master "v1$h2" br2 + adf_ip_link_set_up "v1$h2" + adf_ip_link_set_master "v1$h2" br2 } h3_create() { # $h3 - ip_link_set_up "$h3" + adf_ip_link_set_up "$h3" # H3 vrf_create "v$h3" defer vrf_destroy "v$h3" - ip_link_set_up "v$h3" + adf_ip_link_set_up "v$h3" # br3 - ip_link_add br3 type bridge vlan_filtering 0 mcast_snooping 0 - ip_link_set_master br3 "v$h3" - ip_link_set_up br3 + adf_ip_link_add br3 type bridge vlan_filtering 0 mcast_snooping 0 + adf_ip_link_set_master br3 "v$h3" + adf_ip_link_set_up br3 # $h3 - ip_link_set_master "$h3" br3 + adf_ip_link_set_master "$h3" br3 install_capture "$h3" # v1$h3 - ip_link_set_up "v1$h3" - ip_link_set_master "v1$h3" br3 + adf_ip_link_set_up "v1$h3" + adf_ip_link_set_master "v1$h3" br3 } switch_create() @@ -205,35 +204,35 @@ switch_create() # br1 swp1_mac=$(mac_get "$swp1") - ip_link_add br1 type bridge vlan_filtering 1 \ + adf_ip_link_add br1 type bridge vlan_filtering 1 \ vlan_default_pvid 0 mcast_snooping 0 - ip_link_set_addr br1 "$swp1_mac" - ip_link_set_up br1 + adf_ip_link_set_addr br1 "$swp1_mac" + adf_ip_link_set_up br1 # A dummy to force the IPv6 OIF=0 test to install a suitable MC route on # $IPMR to be deterministic. Also used for the IPv6 RX!=TX ping test. - ip_link_add "X$IPMR" up type dummy + adf_ip_link_add "X$IPMR" up type dummy # IPMR - ip_link_add "$IPMR" up type dummy - ip_addr_add "$IPMR" 192.0.2.100/28 - ip_addr_add "$IPMR" 2001:db8:4::1/64 + adf_ip_link_add "$IPMR" up type dummy + adf_ip_addr_add "$IPMR" 192.0.2.100/28 + adf_ip_addr_add "$IPMR" 2001:db8:4::1/64 # $swp1 - ip_link_set_up "$swp1" - ip_link_set_master "$swp1" br1 - bridge_vlan_add vid 10 dev "$swp1" - bridge_vlan_add vid 20 dev "$swp1" + adf_ip_link_set_up "$swp1" + adf_ip_link_set_master "$swp1" br1 + adf_bridge_vlan_add vid 10 dev "$swp1" + adf_bridge_vlan_add vid 20 dev "$swp1" # $swp2 - ip_link_set_up "$swp2" - ip_addr_add "$swp2" 192.0.2.33/28 - ip_addr_add "$swp2" 2001:db8:2::1/64 + adf_ip_link_set_up "$swp2" + adf_ip_addr_add "$swp2" 192.0.2.33/28 + adf_ip_addr_add "$swp2" 2001:db8:2::1/64 # $swp3 - ip_link_set_up "$swp3" - ip_addr_add "$swp3" 192.0.2.65/28 - ip_addr_add "$swp3" 2001:db8:3::1/64 + adf_ip_link_set_up "$swp3" + adf_ip_addr_add "$swp3" 192.0.2.65/28 + adf_ip_addr_add "$swp3" 2001:db8:3::1/64 } vx_create() @@ -241,11 +240,11 @@ vx_create() local name=$1; shift local vid=$1; shift - ip_link_add "$name" up type vxlan dstport "$VXPORT" \ + adf_ip_link_add "$name" up type vxlan dstport "$VXPORT" \ nolearning noudpcsum tos inherit ttl 16 \ "$@" - ip_link_set_master "$name" br1 - bridge_vlan_add vid "$vid" dev "$name" pvid untagged + adf_ip_link_set_master "$name" br1 + adf_bridge_vlan_add vid "$vid" dev "$name" pvid untagged } export -f vx_create @@ -290,39 +289,38 @@ ns_init_common() local ipv6_host=$1; shift # v2$h2 / v2$h3 - ip_link_set_up "$if_in" - ip_addr_add "$if_in" "$ipv4_in" - ip_addr_add "$if_in" "$ipv6_in" + adf_ip_link_set_up "$if_in" + adf_ip_addr_add "$if_in" "$ipv4_in" + adf_ip_addr_add "$if_in" "$ipv6_in" # br1 - ip_link_add br1 type bridge vlan_filtering 1 \ + adf_ip_link_add br1 type bridge vlan_filtering 1 \ vlan_default_pvid 0 mcast_snooping 0 - ip_link_set_up br1 + adf_ip_link_set_up br1 # vx10, vx20 vx10_create local "${ipv4_in%/*}" group "$GROUP4" dev "$if_in" vx20_create local "${ipv6_in%/*}" group "$GROUP6" dev "$if_in" # w1 - ip_link_add w1 type veth peer name w2 - ip_link_set_master w1 br1 - ip_link_set_up w1 - bridge_vlan_add vid 10 dev w1 - bridge_vlan_add vid 20 dev w1 + adf_ip_link_add w1 type veth peer name w2 + adf_ip_link_set_master w1 br1 + adf_ip_link_set_up w1 + adf_bridge_vlan_add vid 10 dev w1 + adf_bridge_vlan_add vid 20 dev w1 # w2 - simple_if_init w2 - defer simple_if_fini w2 + adf_simple_if_init w2 # w2.10 - ip_link_add w2.10 master vw2 link w2 type vlan id 10 - ip_link_set_up w2.10 - ip_addr_add w2.10 "$ipv4_host" + adf_ip_link_add w2.10 master vw2 link w2 type vlan id 10 + adf_ip_link_set_up w2.10 + adf_ip_addr_add w2.10 "$ipv4_host" # w2.20 - ip_link_add w2.20 master vw2 link w2 type vlan id 20 - ip_link_set_up w2.20 - ip_addr_add w2.20 "$ipv6_host" + adf_ip_link_add w2.20 master vw2 link w2 type vlan id 20 + adf_ip_link_set_up w2.20 + adf_ip_addr_add w2.20 "$ipv6_host" } export -f ns_init_common @@ -371,14 +369,11 @@ setup_prepare() swp3=${NETIFS[p5]} h3=${NETIFS[p6]} - vrf_prepare - defer vrf_cleanup + adf_vrf_prepare + adf_forwarding_enable - forwarding_enable - defer forwarding_restore - - ip_link_add "v1$h2" type veth peer name "v2$h2" - ip_link_add "v1$h3" type veth peer name "v2$h3" + adf_ip_link_add "v1$h2" type veth peer name "v2$h2" + adf_ip_link_add "v1$h3" type veth peer name "v2$h3" h1_create h2_create @@ -720,7 +715,7 @@ ipv4_mcroute_fdb_oif0_sep() { adf_install_sg_sep - ip_addr_add lo 192.0.2.120/28 + adf_ip_addr_add lo 192.0.2.120/28 vx10_create_wait local 192.0.2.120 group "$GROUP4" dev "$IPMR" mcroute bridge fdb del dev vx10 00:00:00:00:00:00 bridge fdb add dev vx10 00:00:00:00:00:00 self static dst "$GROUP4" @@ -731,7 +726,7 @@ ipv4_mcroute_fdb_oif0_sep_rx() { adf_install_sg_sep_rx lo - ip_addr_add lo 192.0.2.120/28 + adf_ip_addr_add lo 192.0.2.120/28 vx10_create_wait local 192.0.2.120 group "$GROUP4" dev "$IPMR" mcroute bridge fdb del dev vx10 00:00:00:00:00:00 bridge fdb add dev vx10 00:00:00:00:00:00 self static dst "$GROUP4" @@ -742,7 +737,7 @@ ipv4_mcroute_fdb_sep_rx() { adf_install_sg_sep_rx lo - ip_addr_add lo 192.0.2.120/28 + adf_ip_addr_add lo 192.0.2.120/28 vx10_create_wait local 192.0.2.120 group "$GROUP4" dev "$IPMR" mcroute bridge fdb del dev vx10 00:00:00:00:00:00 bridge fdb add \ @@ -754,7 +749,7 @@ ipv6_mcroute_fdb_sep_rx() { adf_install_sg_sep_rx "X$IPMR" - ip_addr_add "X$IPMR" 2001:db8:5::1/64 + adf_ip_addr_add "X$IPMR" 2001:db8:5::1/64 vx20_create_wait local 2001:db8:5::1 group "$GROUP6" dev "$IPMR" mcroute bridge -6 fdb del dev vx20 00:00:00:00:00:00 bridge -6 fdb add dev vx20 00:00:00:00:00:00 \ diff --git a/tools/testing/selftests/net/forwarding/vxlan_reserved.sh b/tools/testing/selftests/net/forwarding/vxlan_reserved.sh index 46c31794b91b..709845123727 100755 --- a/tools/testing/selftests/net/forwarding/vxlan_reserved.sh +++ b/tools/testing/selftests/net/forwarding/vxlan_reserved.sh @@ -47,8 +47,7 @@ source lib.sh h1_create() { - simple_if_init $h1 192.0.2.1/28 - defer simple_if_fini $h1 192.0.2.1/28 + adf_simple_if_init $h1 192.0.2.1/28 tc qdisc add dev $h1 clsact defer tc qdisc del dev $h1 clsact @@ -60,24 +59,23 @@ h1_create() switch_create() { - ip_link_add br1 type bridge vlan_filtering 0 mcast_snooping 0 + adf_ip_link_add br1 type bridge vlan_filtering 0 mcast_snooping 0 # Make sure the bridge uses the MAC address of the local port and not # that of the VxLAN's device. - ip_link_set_addr br1 $(mac_get $swp1) - ip_link_set_up br1 + adf_ip_link_set_addr br1 $(mac_get $swp1) + adf_ip_link_set_up br1 - ip_link_set_up $rp1 - ip_addr_add $rp1 192.0.2.17/28 - ip_route_add 192.0.2.32/28 nexthop via 192.0.2.18 + adf_ip_link_set_up $rp1 + adf_ip_addr_add $rp1 192.0.2.17/28 + adf_ip_route_add 192.0.2.32/28 nexthop via 192.0.2.18 - ip_link_set_master $swp1 br1 - ip_link_set_up $swp1 + adf_ip_link_set_master $swp1 br1 + adf_ip_link_set_up $swp1 } vrp2_create() { - simple_if_init $rp2 192.0.2.18/28 - defer simple_if_fini $rp2 192.0.2.18/28 + adf_simple_if_init $rp2 192.0.2.18/28 } setup_prepare() @@ -88,11 +86,8 @@ setup_prepare() rp1=${NETIFS[p3]} rp2=${NETIFS[p4]} - vrf_prepare - defer vrf_cleanup - - forwarding_enable - defer forwarding_restore + adf_vrf_prepare + adf_forwarding_enable h1_create switch_create @@ -200,10 +195,10 @@ vxlan_ping_do() vxlan_device_add() { - ip_link_add vx1 up type vxlan id 1000 \ + adf_ip_link_add vx1 up type vxlan id 1000 \ local 192.0.2.17 dstport "$VXPORT" \ nolearning noudpcsum tos inherit ttl 100 "$@" - ip_link_set_master vx1 br1 + adf_ip_link_set_master vx1 br1 } vxlan_all_reserved_bits() diff --git a/tools/testing/selftests/net/gro.c b/tools/testing/selftests/net/gro.c index d5824eadea10..2b1d9f2b3e9e 100644 --- a/tools/testing/selftests/net/gro.c +++ b/tools/testing/selftests/net/gro.c @@ -93,6 +93,7 @@ static bool tx_socket = true; static int tcp_offset = -1; static int total_hdr_len = -1; static int ethhdr_proto = -1; +static bool ipip; static const int num_flush_id_cases = 6; static void vlog(const char *fmt, ...) @@ -114,7 +115,9 @@ static void setup_sock_filter(int fd) int ipproto_off, opt_ipproto_off; int next_off; - if (proto == PF_INET) + if (ipip) + next_off = sizeof(struct iphdr) + offsetof(struct iphdr, protocol); + else if (proto == PF_INET) next_off = offsetof(struct iphdr, protocol); else next_off = offsetof(struct ipv6hdr, nexthdr); @@ -244,7 +247,7 @@ static void fill_datalinklayer(void *buf) eth->h_proto = ethhdr_proto; } -static void fill_networklayer(void *buf, int payload_len) +static void fill_networklayer(void *buf, int payload_len, int protocol) { struct ipv6hdr *ip6h = buf; struct iphdr *iph = buf; @@ -254,7 +257,7 @@ static void fill_networklayer(void *buf, int payload_len) ip6h->version = 6; ip6h->payload_len = htons(sizeof(struct tcphdr) + payload_len); - ip6h->nexthdr = IPPROTO_TCP; + ip6h->nexthdr = protocol; ip6h->hop_limit = 8; if (inet_pton(AF_INET6, addr6_src, &ip6h->saddr) != 1) error(1, errno, "inet_pton source ip6"); @@ -266,7 +269,7 @@ static void fill_networklayer(void *buf, int payload_len) iph->version = 4; iph->ihl = 5; iph->ttl = 8; - iph->protocol = IPPROTO_TCP; + iph->protocol = protocol; iph->tot_len = htons(sizeof(struct tcphdr) + payload_len + sizeof(struct iphdr)); iph->frag_off = htons(0x4000); /* DF = 1, MF = 0 */ @@ -313,9 +316,19 @@ static void create_packet(void *buf, int seq_offset, int ack_offset, { memset(buf, 0, total_hdr_len); memset(buf + total_hdr_len, 'a', payload_len); + fill_transportlayer(buf + tcp_offset, seq_offset, ack_offset, payload_len, fin); - fill_networklayer(buf + ETH_HLEN, payload_len); + + if (ipip) { + fill_networklayer(buf + ETH_HLEN, payload_len + sizeof(struct iphdr), + IPPROTO_IPIP); + fill_networklayer(buf + ETH_HLEN + sizeof(struct iphdr), + payload_len, IPPROTO_TCP); + } else { + fill_networklayer(buf + ETH_HLEN, payload_len, IPPROTO_TCP); + } + fill_datalinklayer(buf); } @@ -416,6 +429,13 @@ static void recompute_packet(char *buf, char *no_ext, int extlen) iph->tot_len = htons(ntohs(iph->tot_len) + extlen); iph->check = 0; iph->check = checksum_fold(iph, sizeof(struct iphdr), 0); + + if (ipip) { + iph += 1; + iph->tot_len = htons(ntohs(iph->tot_len) + extlen); + iph->check = 0; + iph->check = checksum_fold(iph, sizeof(struct iphdr), 0); + } } else { ip6h->payload_len = htons(ntohs(ip6h->payload_len) + extlen); } @@ -670,7 +690,7 @@ static void send_flush_id_case(int fd, struct sockaddr_ll *daddr, int tcase) iph2->id = htons(9); break; - case 3: /* DF=0, Fixed - should not coalesce */ + case 3: /* DF=0, Fixed - should coalesce */ iph1->frag_off &= ~htons(IP_DF); iph1->id = htons(8); @@ -777,7 +797,7 @@ static void send_fragment4(int fd, struct sockaddr_ll *daddr) */ memset(buf + total_hdr_len, 'a', PAYLOAD_LEN * 2); fill_transportlayer(buf + tcp_offset, PAYLOAD_LEN, 0, PAYLOAD_LEN * 2, 0); - fill_networklayer(buf + ETH_HLEN, PAYLOAD_LEN); + fill_networklayer(buf + ETH_HLEN, PAYLOAD_LEN, IPPROTO_TCP); fill_datalinklayer(buf); iph->frag_off = htons(0x6000); // DF = 1, MF = 1 @@ -1071,7 +1091,7 @@ static void gro_sender(void) * and min ipv6hdr size. Like MAX_HDR_SIZE, * MAX_PAYLOAD is defined with the larger header of the two. */ - int offset = proto == PF_INET ? 20 : 0; + int offset = (proto == PF_INET && !ipip) ? 20 : 0; int remainder = (MAX_PAYLOAD + offset) % MSS; send_large(txfd, &daddr, remainder); @@ -1188,10 +1208,9 @@ static void gro_receiver(void) correct_payload[0] = PAYLOAD_LEN * 2; check_recv_pkts(rxfd, correct_payload, 1); - printf("DF=0, Fixed - should not coalesce: "); - correct_payload[0] = PAYLOAD_LEN; - correct_payload[1] = PAYLOAD_LEN; - check_recv_pkts(rxfd, correct_payload, 2); + printf("DF=0, Fixed - should coalesce: "); + correct_payload[0] = PAYLOAD_LEN * 2; + check_recv_pkts(rxfd, correct_payload, 1); printf("DF=1, 2 Incrementing and one fixed - should coalesce only first 2 packets: "); correct_payload[0] = PAYLOAD_LEN * 2; @@ -1222,7 +1241,7 @@ static void gro_receiver(void) check_recv_pkts(rxfd, correct_payload, 2); } } else if (strcmp(testname, "large") == 0) { - int offset = proto == PF_INET ? 20 : 0; + int offset = (proto == PF_INET && !ipip) ? 20 : 0; int remainder = (MAX_PAYLOAD + offset) % MSS; correct_payload[0] = (MAX_PAYLOAD + offset); @@ -1251,6 +1270,7 @@ static void parse_args(int argc, char **argv) { "iface", required_argument, NULL, 'i' }, { "ipv4", no_argument, NULL, '4' }, { "ipv6", no_argument, NULL, '6' }, + { "ipip", no_argument, NULL, 'e' }, { "rx", no_argument, NULL, 'r' }, { "saddr", required_argument, NULL, 's' }, { "smac", required_argument, NULL, 'S' }, @@ -1260,7 +1280,7 @@ static void parse_args(int argc, char **argv) }; int c; - while ((c = getopt_long(argc, argv, "46d:D:i:rs:S:t:v", opts, NULL)) != -1) { + while ((c = getopt_long(argc, argv, "46d:D:ei:rs:S:t:v", opts, NULL)) != -1) { switch (c) { case '4': proto = PF_INET; @@ -1270,6 +1290,11 @@ static void parse_args(int argc, char **argv) proto = PF_INET6; ethhdr_proto = htons(ETH_P_IPV6); break; + case 'e': + ipip = true; + proto = PF_INET; + ethhdr_proto = htons(ETH_P_IP); + break; case 'd': addr4_dst = addr6_dst = optarg; break; @@ -1305,7 +1330,10 @@ int main(int argc, char **argv) { parse_args(argc, argv); - if (proto == PF_INET) { + if (ipip) { + tcp_offset = ETH_HLEN + sizeof(struct iphdr) * 2; + total_hdr_len = tcp_offset + sizeof(struct tcphdr); + } else if (proto == PF_INET) { tcp_offset = ETH_HLEN + sizeof(struct iphdr); total_hdr_len = tcp_offset + sizeof(struct tcphdr); } else if (proto == PF_INET6) { diff --git a/tools/testing/selftests/net/gro.sh b/tools/testing/selftests/net/gro.sh index 9e3f186bc2a1..4c5144c6f652 100755 --- a/tools/testing/selftests/net/gro.sh +++ b/tools/testing/selftests/net/gro.sh @@ -4,7 +4,7 @@ readonly SERVER_MAC="aa:00:00:00:00:02" readonly CLIENT_MAC="aa:00:00:00:00:01" readonly TESTS=("data" "ack" "flags" "tcp" "ip" "large") -readonly PROTOS=("ipv4" "ipv6") +readonly PROTOS=("ipv4" "ipv6" "ipip") dev="" test="all" proto="ipv4" diff --git a/tools/testing/selftests/net/ipv6_fragmentation.c b/tools/testing/selftests/net/ipv6_fragmentation.c new file mode 100644 index 000000000000..267ef62b5c72 --- /dev/null +++ b/tools/testing/selftests/net/ipv6_fragmentation.c @@ -0,0 +1,114 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Author: Brett A C Sheffield <bacs@librecast.net> + * + * Kernel selftest for the IPv6 fragmentation regression which affected stable + * kernels: + * + * https://lore.kernel.org/stable/aElivdUXqd1OqgMY@karahi.gladserv.com + * + * Commit: a18dfa9925b9 ("ipv6: save dontfrag in cork") was backported to stable + * without some prerequisite commits. + * + * This caused a regression when sending IPv6 UDP packets by preventing + * fragmentation and instead returning -1 (EMSGSIZE). + * + * This selftest demonstrates the issue by sending an IPv6 UDP packet to + * localhost (::1) on the loopback interface from the autoconfigured link-local + * address. + * + * sendmsg(2) returns bytes sent correctly on a working kernel, and returns -1 + * (EMSGSIZE) when the regression is present. + * + * The regression was not present in the mainline kernel, but add this test to + * catch similar breakage in future. + */ + +#define _GNU_SOURCE + +#include <error.h> +#include <net/if.h> +#include <netinet/in.h> +#include <sched.h> +#include <stdio.h> +#include <sys/ioctl.h> +#include <sys/socket.h> +#include <unistd.h> +#include "../kselftest.h" + +#define MTU 1500 +#define LARGER_THAN_MTU 8192 + +static void setup(void) +{ + struct ifreq ifr = { + .ifr_name = "lo" + }; + int ctl; + + /* we need to set MTU, so do this in a namespace to play nicely */ + if (unshare(CLONE_NEWNET) == -1) + error(KSFT_FAIL, errno, "unshare"); + + ctl = socket(AF_LOCAL, SOCK_STREAM, 0); + if (ctl == -1) + error(KSFT_FAIL, errno, "socket"); + + /* ensure MTU is smaller than what we plan to send */ + ifr.ifr_mtu = MTU; + if (ioctl(ctl, SIOCSIFMTU, &ifr) == -1) + error(KSFT_FAIL, errno, "ioctl: set MTU"); + + /* bring up interface */ + if (ioctl(ctl, SIOCGIFFLAGS, &ifr) == -1) + error(KSFT_FAIL, errno, "ioctl SIOCGIFFLAGS"); + ifr.ifr_flags = ifr.ifr_flags | IFF_UP; + if (ioctl(ctl, SIOCSIFFLAGS, &ifr) == -1) + error(KSFT_FAIL, errno, "ioctl: bring interface up"); + + if (close(ctl) == -1) + error(KSFT_FAIL, errno, "close"); +} + +int main(void) +{ + struct in6_addr addr = { + .s6_addr[15] = 0x01, /* ::1 */ + }; + struct sockaddr_in6 sa = { + .sin6_family = AF_INET6, + .sin6_addr = addr, + .sin6_port = htons(9) /* port 9/udp (DISCARD) */ + }; + static char buf[LARGER_THAN_MTU] = {0}; + struct iovec iov = { .iov_base = buf, .iov_len = sizeof(buf) }; + struct msghdr msg = { + .msg_iov = &iov, + .msg_iovlen = 1, + .msg_name = (struct sockaddr *)&sa, + .msg_namelen = sizeof(sa), + }; + ssize_t rc; + int s; + + printf("Testing IPv6 fragmentation\n"); + setup(); + s = socket(AF_INET6, SOCK_DGRAM, 0); +send_again: + rc = sendmsg(s, &msg, 0); + if (rc == -1) { + /* if interface wasn't ready, try again */ + if (errno == EADDRNOTAVAIL) { + usleep(1000); + goto send_again; + } + error(KSFT_FAIL, errno, "sendmsg"); + } else if (rc != LARGER_THAN_MTU) { + error(KSFT_FAIL, errno, "sendmsg returned %zi, expected %i", + rc, LARGER_THAN_MTU); + } + printf("[PASS] sendmsg() returned %zi\n", rc); + if (close(s) == -1) + error(KSFT_FAIL, errno, "close"); + return KSFT_PASS; +} diff --git a/tools/testing/selftests/net/lib.sh b/tools/testing/selftests/net/lib.sh index c7add0dc4c60..feba4ef69a54 100644 --- a/tools/testing/selftests/net/lib.sh +++ b/tools/testing/selftests/net/lib.sh @@ -543,31 +543,31 @@ require_command() fi } -ip_link_add() +adf_ip_link_add() { local name=$1; shift - ip link add name "$name" "$@" - defer ip link del dev "$name" + ip link add name "$name" "$@" && \ + defer ip link del dev "$name" } -ip_link_set_master() +adf_ip_link_set_master() { local member=$1; shift local master=$1; shift - ip link set dev "$member" master "$master" - defer ip link set dev "$member" nomaster + ip link set dev "$member" master "$master" && \ + defer ip link set dev "$member" nomaster } -ip_link_set_addr() +adf_ip_link_set_addr() { local name=$1; shift local addr=$1; shift local old_addr=$(mac_get "$name") - ip link set dev "$name" address "$addr" - defer ip link set dev "$name" address "$old_addr" + ip link set dev "$name" address "$addr" && \ + defer ip link set dev "$name" address "$old_addr" } ip_link_has_flag() @@ -585,44 +585,44 @@ ip_link_is_up() ip_link_has_flag "$1" UP } -ip_link_set_up() +adf_ip_link_set_up() { local name=$1; shift if ! ip_link_is_up "$name"; then - ip link set dev "$name" up - defer ip link set dev "$name" down + ip link set dev "$name" up && \ + defer ip link set dev "$name" down fi } -ip_link_set_down() +adf_ip_link_set_down() { local name=$1; shift if ip_link_is_up "$name"; then - ip link set dev "$name" down - defer ip link set dev "$name" up + ip link set dev "$name" down && \ + defer ip link set dev "$name" up fi } -ip_addr_add() +adf_ip_addr_add() { local name=$1; shift - ip addr add dev "$name" "$@" - defer ip addr del dev "$name" "$@" + ip addr add dev "$name" "$@" && \ + defer ip addr del dev "$name" "$@" } -ip_route_add() +adf_ip_route_add() { - ip route add "$@" - defer ip route del "$@" + ip route add "$@" && \ + defer ip route del "$@" } -bridge_vlan_add() +adf_bridge_vlan_add() { - bridge vlan add "$@" - defer bridge vlan del "$@" + bridge vlan add "$@" && \ + defer bridge vlan del "$@" } wait_local_port_listen() @@ -645,3 +645,27 @@ wait_local_port_listen() sleep 0.1 done } + +cmd_jq() +{ + local cmd=$1 + local jq_exp=$2 + local jq_opts=$3 + local ret + local output + + output="$($cmd)" + # it the command fails, return error right away + ret=$? + if [[ $ret -ne 0 ]]; then + return $ret + fi + output=$(echo $output | jq -r $jq_opts "$jq_exp") + ret=$? + if [[ $ret -ne 0 ]]; then + return $ret + fi + echo $output + # return success only in case of non-empty output + [ ! -z "$output" ] +} diff --git a/tools/testing/selftests/net/lib/py/__init__.py b/tools/testing/selftests/net/lib/py/__init__.py index 02be28dcc089..997b85cc216a 100644 --- a/tools/testing/selftests/net/lib/py/__init__.py +++ b/tools/testing/selftests/net/lib/py/__init__.py @@ -6,4 +6,4 @@ from .netns import NetNS, NetNSEnter from .nsim import * from .utils import * from .ynl import NlError, YnlFamily, EthtoolFamily, NetdevFamily, RtnlFamily, RtnlAddrFamily -from .ynl import NetshaperFamily, DevlinkFamily +from .ynl import NetshaperFamily, DevlinkFamily, PSPFamily diff --git a/tools/testing/selftests/net/lib/py/ksft.py b/tools/testing/selftests/net/lib/py/ksft.py index 8e35ed12ed9e..83b1574f7719 100644 --- a/tools/testing/selftests/net/lib/py/ksft.py +++ b/tools/testing/selftests/net/lib/py/ksft.py @@ -72,6 +72,11 @@ def ksft_true(a, comment=""): _fail("Check failed", a, "does not eval to True", comment) +def ksft_not_none(a, comment=""): + if a is None: + _fail("Check failed", a, "is None", comment) + + def ksft_in(a, b, comment=""): if a not in b: _fail("Check failed", a, "not in", b, comment) @@ -92,6 +97,11 @@ def ksft_ge(a, b, comment=""): _fail("Check failed", a, "<", b, comment) +def ksft_gt(a, b, comment=""): + if a <= b: + _fail("Check failed", a, "<=", b, comment) + + def ksft_lt(a, b, comment=""): if a >= b: _fail("Check failed", a, ">=", b, comment) diff --git a/tools/testing/selftests/net/lib/py/utils.py b/tools/testing/selftests/net/lib/py/utils.py index f395c90fb0f1..cb40ecef9456 100644 --- a/tools/testing/selftests/net/lib/py/utils.py +++ b/tools/testing/selftests/net/lib/py/utils.py @@ -1,9 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 -import errno import json as _json import os -import random import re import select import socket @@ -21,17 +19,19 @@ def fd_read_timeout(fd, timeout): rlist, _, _ = select.select([fd], [], [], timeout) if rlist: return os.read(fd, 1024) - else: - raise TimeoutError("Timeout waiting for fd read") + raise TimeoutError("Timeout waiting for fd read") class cmd: """ Execute a command on local or remote host. + @shell defaults to false, and class will try to split @comm into a list + if it's a string with spaces. + Use bkg() instead to run a command in the background. """ - def __init__(self, comm, shell=True, fail=True, ns=None, background=False, + def __init__(self, comm, shell=None, fail=True, ns=None, background=False, host=None, timeout=5, ksft_wait=None): if ns: comm = f'ip netns exec {ns} ' + comm @@ -45,6 +45,10 @@ class cmd: if host: self.proc = host.cmd(comm) else: + # If user doesn't explicitly request shell try to avoid it. + if shell is None and isinstance(comm, str) and ' ' in comm: + comm = comm.split() + # ksft_wait lets us wait for the background process to fully start, # we pass an FD to the child process, and wait for it to write back. # Similarly term_fd tells child it's time to exit. @@ -111,12 +115,13 @@ class bkg(cmd): with bkg("my_binary", ksft_wait=5): """ - def __init__(self, comm, shell=True, fail=None, ns=None, host=None, + def __init__(self, comm, shell=None, fail=None, ns=None, host=None, exit_wait=False, ksft_wait=None): super().__init__(comm, background=True, shell=shell, fail=fail, ns=ns, host=host, ksft_wait=ksft_wait) self.terminate = not exit_wait and not ksft_wait + self._exit_wait = exit_wait self.check_fail = fail if shell and self.terminate: @@ -127,7 +132,9 @@ class bkg(cmd): return self def __exit__(self, ex_type, ex_value, ex_tb): - return self.process(terminate=self.terminate, fail=self.check_fail) + # Force termination on exception + terminate = self.terminate or (self._exit_wait and ex_type) + return self.process(terminate=terminate, fail=self.check_fail) global_defer_queue = [] @@ -135,8 +142,6 @@ global_defer_queue = [] class defer: def __init__(self, func, *args, **kwargs): - global global_defer_queue - if not callable(func): raise Exception("defer created with un-callable object, did you call the function instead of passing its name?") @@ -224,11 +229,11 @@ def bpftrace(expr, json=None, ns=None, host=None, timeout=None): return cmd_obj -def rand_port(type=socket.SOCK_STREAM): +def rand_port(stype=socket.SOCK_STREAM): """ Get a random unprivileged port. """ - with socket.socket(socket.AF_INET6, type) as s: + with socket.socket(socket.AF_INET6, stype) as s: s.bind(("", 0)) return s.getsockname()[1] @@ -249,3 +254,21 @@ def wait_port_listen(port, proto="tcp", ns=None, host=None, sleep=0.005, deadlin if time.monotonic() > end: raise Exception("Waiting for port listen timed out") time.sleep(sleep) + + +def wait_file(fname, test_fn, sleep=0.005, deadline=5, encoding='utf-8'): + """ + Wait for file contents on the local system to satisfy a condition. + test_fn() should take one argument (file contents) and return whether + condition is met. + """ + end = time.monotonic() + deadline + + with open(fname, "r", encoding=encoding) as fp: + while True: + if test_fn(fp.read()): + break + fp.seek(0) + if time.monotonic() > end: + raise TimeoutError("Wait for file contents failed", fname) + time.sleep(sleep) diff --git a/tools/testing/selftests/net/lib/py/ynl.py b/tools/testing/selftests/net/lib/py/ynl.py index 2b3a61ea3bfa..32c223e93b2c 100644 --- a/tools/testing/selftests/net/lib/py/ynl.py +++ b/tools/testing/selftests/net/lib/py/ynl.py @@ -61,3 +61,8 @@ class DevlinkFamily(YnlFamily): def __init__(self, recv_size=0): super().__init__((SPEC_PATH / Path('devlink.yaml')).as_posix(), schema='', recv_size=recv_size) + +class PSPFamily(YnlFamily): + def __init__(self, recv_size=0): + super().__init__((SPEC_PATH / Path('psp.yaml')).as_posix(), + schema='', recv_size=recv_size) diff --git a/tools/testing/selftests/net/lib/sh/defer.sh b/tools/testing/selftests/net/lib/sh/defer.sh index 082f5d38321b..47ab78c4d465 100644 --- a/tools/testing/selftests/net/lib/sh/defer.sh +++ b/tools/testing/selftests/net/lib/sh/defer.sh @@ -1,6 +1,10 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 +# Whether to pause and allow debugging when an executed deferred command has a +# non-zero exit code. +: "${DEFER_PAUSE_ON_FAIL:=no}" + # map[(scope_id,track,cleanup_id) -> cleanup_command] # track={d=default | p=priority} declare -A __DEFER__JOBS @@ -38,8 +42,20 @@ __defer__run() local track=$1; shift local defer_ix=$1; shift local defer_key=$(__defer__defer_key $track $defer_ix) + local ret + + eval ${__DEFER__JOBS[$defer_key]} + ret=$? + + if [[ "$DEFER_PAUSE_ON_FAIL" == yes && "$ret" -ne 0 ]]; then + echo "Deferred command (track $track index $defer_ix):" + echo " ${__DEFER__JOBS[$defer_key]}" + echo "... ended with an exit status of $ret" + echo "Hit enter to continue, 'q' to quit" + read a + [[ "$a" == q ]] && exit 1 + fi - ${__DEFER__JOBS[$defer_key]} unset __DEFER__JOBS[$defer_key] } @@ -49,7 +65,7 @@ __defer__schedule() local ndefers=$(__defer__ndefers $track) local ndefers_key=$(__defer__ndefer_key $track) local defer_key=$(__defer__defer_key $track $ndefers) - local defer="$@" + local defer="${@@Q}" __DEFER__JOBS[$defer_key]="$defer" __DEFER__NJOBS[$ndefers_key]=$((ndefers + 1)) diff --git a/tools/testing/selftests/net/lib/xdp_native.bpf.c b/tools/testing/selftests/net/lib/xdp_native.bpf.c index 521ba38f2ddd..c368fc045f4b 100644 --- a/tools/testing/selftests/net/lib/xdp_native.bpf.c +++ b/tools/testing/selftests/net/lib/xdp_native.bpf.c @@ -14,6 +14,8 @@ #define MAX_PAYLOAD_LEN 5000 #define MAX_HDR_LEN 64 +extern int bpf_xdp_pull_data(struct xdp_md *xdp, __u32 len) __ksym __weak; + enum { XDP_MODE = 0, XDP_PORT = 1, @@ -68,30 +70,57 @@ static void record_stats(struct xdp_md *ctx, __u32 stat_type) static struct udphdr *filter_udphdr(struct xdp_md *ctx, __u16 port) { - void *data_end = (void *)(long)ctx->data_end; - void *data = (void *)(long)ctx->data; struct udphdr *udph = NULL; - struct ethhdr *eth = data; + void *data, *data_end; + struct ethhdr *eth; + int err; + + err = bpf_xdp_pull_data(ctx, sizeof(*eth)); + if (err) + return NULL; + + data_end = (void *)(long)ctx->data_end; + data = eth = (void *)(long)ctx->data; if (data + sizeof(*eth) > data_end) return NULL; if (eth->h_proto == bpf_htons(ETH_P_IP)) { - struct iphdr *iph = data + sizeof(*eth); + struct iphdr *iph; + + err = bpf_xdp_pull_data(ctx, sizeof(*eth) + sizeof(*iph) + + sizeof(*udph)); + if (err) + return NULL; + + data_end = (void *)(long)ctx->data_end; + data = (void *)(long)ctx->data; + + iph = data + sizeof(*eth); if (iph + 1 > (struct iphdr *)data_end || iph->protocol != IPPROTO_UDP) return NULL; - udph = (void *)eth + sizeof(*iph) + sizeof(*eth); - } else if (eth->h_proto == bpf_htons(ETH_P_IPV6)) { - struct ipv6hdr *ipv6h = data + sizeof(*eth); + udph = data + sizeof(*iph) + sizeof(*eth); + } else if (eth->h_proto == bpf_htons(ETH_P_IPV6)) { + struct ipv6hdr *ipv6h; + + err = bpf_xdp_pull_data(ctx, sizeof(*eth) + sizeof(*ipv6h) + + sizeof(*udph)); + if (err) + return NULL; + + data_end = (void *)(long)ctx->data_end; + data = (void *)(long)ctx->data; + + ipv6h = data + sizeof(*eth); if (ipv6h + 1 > (struct ipv6hdr *)data_end || ipv6h->nexthdr != IPPROTO_UDP) return NULL; - udph = (void *)eth + sizeof(*ipv6h) + sizeof(*eth); + udph = data + sizeof(*ipv6h) + sizeof(*eth); } else { return NULL; } @@ -145,17 +174,34 @@ static void swap_machdr(void *data) static int xdp_mode_tx_handler(struct xdp_md *ctx, __u16 port) { - void *data_end = (void *)(long)ctx->data_end; - void *data = (void *)(long)ctx->data; struct udphdr *udph = NULL; - struct ethhdr *eth = data; + void *data, *data_end; + struct ethhdr *eth; + int err; + + err = bpf_xdp_pull_data(ctx, sizeof(*eth)); + if (err) + return XDP_PASS; + + data_end = (void *)(long)ctx->data_end; + data = eth = (void *)(long)ctx->data; if (data + sizeof(*eth) > data_end) return XDP_PASS; if (eth->h_proto == bpf_htons(ETH_P_IP)) { - struct iphdr *iph = data + sizeof(*eth); - __be32 tmp_ip = iph->saddr; + struct iphdr *iph; + __be32 tmp_ip; + + err = bpf_xdp_pull_data(ctx, sizeof(*eth) + sizeof(*iph) + + sizeof(*udph)); + if (err) + return XDP_PASS; + + data_end = (void *)(long)ctx->data_end; + data = (void *)(long)ctx->data; + + iph = data + sizeof(*eth); if (iph + 1 > (struct iphdr *)data_end || iph->protocol != IPPROTO_UDP) @@ -169,8 +215,10 @@ static int xdp_mode_tx_handler(struct xdp_md *ctx, __u16 port) return XDP_PASS; record_stats(ctx, STATS_RX); + eth = data; swap_machdr((void *)eth); + tmp_ip = iph->saddr; iph->saddr = iph->daddr; iph->daddr = tmp_ip; @@ -178,9 +226,19 @@ static int xdp_mode_tx_handler(struct xdp_md *ctx, __u16 port) return XDP_TX; - } else if (eth->h_proto == bpf_htons(ETH_P_IPV6)) { - struct ipv6hdr *ipv6h = data + sizeof(*eth); + } else if (eth->h_proto == bpf_htons(ETH_P_IPV6)) { struct in6_addr tmp_ipv6; + struct ipv6hdr *ipv6h; + + err = bpf_xdp_pull_data(ctx, sizeof(*eth) + sizeof(*ipv6h) + + sizeof(*udph)); + if (err) + return XDP_PASS; + + data_end = (void *)(long)ctx->data_end; + data = (void *)(long)ctx->data; + + ipv6h = data + sizeof(*eth); if (ipv6h + 1 > (struct ipv6hdr *)data_end || ipv6h->nexthdr != IPPROTO_UDP) @@ -194,6 +252,7 @@ static int xdp_mode_tx_handler(struct xdp_md *ctx, __u16 port) return XDP_PASS; record_stats(ctx, STATS_RX); + eth = data; swap_machdr((void *)eth); __builtin_memcpy(&tmp_ipv6, &ipv6h->saddr, sizeof(tmp_ipv6)); @@ -361,7 +420,6 @@ static int xdp_adjst_tail_grow_data(struct xdp_md *ctx, __u16 offset) static int xdp_adjst_tail(struct xdp_md *ctx, __u16 port) { - void *data = (void *)(long)ctx->data; struct udphdr *udph = NULL; __s32 *adjust_offset, *val; __u32 key, hdr_len; @@ -373,7 +431,8 @@ static int xdp_adjst_tail(struct xdp_md *ctx, __u16 port) if (!udph) return XDP_PASS; - hdr_len = (void *)udph - data + sizeof(struct udphdr); + hdr_len = (void *)udph - (void *)(long)ctx->data + + sizeof(struct udphdr); key = XDP_ADJST_OFFSET; adjust_offset = bpf_map_lookup_elem(&map_xdp_setup, &key); if (!adjust_offset) @@ -513,8 +572,6 @@ static int xdp_adjst_head_grow_data(struct xdp_md *ctx, __u64 hdr_len, static int xdp_head_adjst(struct xdp_md *ctx, __u16 port) { - void *data_end = (void *)(long)ctx->data_end; - void *data = (void *)(long)ctx->data; struct udphdr *udph_ptr = NULL; __u32 key, size, hdr_len; __s32 *val; @@ -525,7 +582,8 @@ static int xdp_head_adjst(struct xdp_md *ctx, __u16 port) if (!udph_ptr) return XDP_PASS; - hdr_len = (void *)udph_ptr - data + sizeof(struct udphdr); + hdr_len = (void *)udph_ptr - (void *)(long)ctx->data + + sizeof(struct udphdr); key = XDP_ADJST_OFFSET; val = bpf_map_lookup_elem(&map_xdp_setup, &key); diff --git a/tools/testing/selftests/net/mptcp/diag.sh b/tools/testing/selftests/net/mptcp/diag.sh index 7a3cb4c09e45..d847ff1737c3 100755 --- a/tools/testing/selftests/net/mptcp/diag.sh +++ b/tools/testing/selftests/net/mptcp/diag.sh @@ -28,7 +28,7 @@ flush_pids() } # This function is used in the cleanup trap -#shellcheck disable=SC2317 +#shellcheck disable=SC2317,SC2329 cleanup() { ip netns pids "${ns}" | xargs --no-run-if-empty kill -SIGKILL &>/dev/null diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.c b/tools/testing/selftests/net/mptcp/mptcp_connect.c index 4f07ac9fa207..b148cadb96d0 100644 --- a/tools/testing/selftests/net/mptcp/mptcp_connect.c +++ b/tools/testing/selftests/net/mptcp/mptcp_connect.c @@ -1093,6 +1093,7 @@ int main_loop_s(int listensock) struct pollfd polls; socklen_t salen; int remotesock; + int err = 0; int fd = 0; again: @@ -1125,7 +1126,7 @@ again: SOCK_TEST_TCPULP(remotesock, 0); memset(&winfo, 0, sizeof(winfo)); - copyfd_io(fd, remotesock, 1, true, &winfo); + err = copyfd_io(fd, remotesock, 1, true, &winfo); } else { perror("accept"); return 1; @@ -1134,10 +1135,10 @@ again: if (cfg_input) close(fd); - if (--cfg_repeat > 0) + if (!err && --cfg_repeat > 0) goto again; - return 0; + return err; } static void init_rng(void) @@ -1247,7 +1248,7 @@ void xdisconnect(int fd) else xerror("bad family"); - strcpy(cmd, "ss -M | grep -q "); + strcpy(cmd, "ss -Mnt | grep -q "); cmdlen = strlen(cmd); if (!inet_ntop(addr.ss_family, raw_addr, &cmd[cmdlen], sizeof(cmd) - cmdlen)) @@ -1257,7 +1258,7 @@ void xdisconnect(int fd) /* * wait until the pending data is completely flushed and all - * the MPTCP sockets reached the closed status. + * the sockets reached the closed status. * disconnect will bypass/ignore/drop any pending data. */ for (i = 0; ; i += msec_sleep) { diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.sh b/tools/testing/selftests/net/mptcp/mptcp_connect.sh index 5e3c56253274..47ecb5b3836e 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_connect.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_connect.sh @@ -134,7 +134,7 @@ ns4="" TEST_GROUP="" # This function is used in the cleanup trap -#shellcheck disable=SC2317 +#shellcheck disable=SC2317,SC2329 cleanup() { rm -f "$cin_disconnect" @@ -211,6 +211,11 @@ if $checksum; then done fi +if $capture; then + rndh="${ns1:4}" + mptcp_lib_pr_info "Packet capture files will have this prefix: ${rndh}-" +fi + set_ethtool_flags() { local ns="$1" local dev="$2" @@ -361,7 +366,6 @@ do_transfer() if $capture; then local capuser - local rndh="${connector_ns:4}" if [ -z $SUDO_USER ] ; then capuser="" else diff --git a/tools/testing/selftests/net/mptcp/mptcp_inq.c b/tools/testing/selftests/net/mptcp/mptcp_inq.c index f3bcaa48df8f..8e8f6441ad8b 100644 --- a/tools/testing/selftests/net/mptcp/mptcp_inq.c +++ b/tools/testing/selftests/net/mptcp/mptcp_inq.c @@ -502,6 +502,7 @@ static int server(int unixfd) process_one_client(r, unixfd); + close(fd); return 0; } @@ -580,8 +581,12 @@ int main(int argc, char *argv[]) die_perror("pipe"); s = xfork(); - if (s == 0) - return server(unixfds[1]); + if (s == 0) { + close(unixfds[0]); + ret = server(unixfds[1]); + close(unixfds[1]); + return ret; + } close(unixfds[1]); diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index 82cae37d9c20..c90d8e8b95cb 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -8,7 +8,7 @@ # ShellCheck incorrectly believes that most of the code here is unreachable # because it's invoked by variable name, see how the "tests" array is used -#shellcheck disable=SC2317 +#shellcheck disable=SC2317,SC2329 . "$(dirname "${0}")/mptcp_lib.sh" @@ -74,6 +74,17 @@ unset join_create_err unset join_bind_err unset join_connect_err +unset fb_ns1 +unset fb_ns2 +unset fb_infinite_map_tx +unset fb_dss_corruption +unset fb_simult_conn +unset fb_mpc_passive +unset fb_mpc_active +unset fb_mpc_data +unset fb_md5_sig +unset fb_dss + # generated using "nfbpf_compile '(ip && (ip[54] & 0xf0) == 0x30) || # (ip6 && (ip6[74] & 0xf0) == 0x30)'" CBPF_MPTCP_SUBOPTION_ADD_ADDR="14, @@ -347,6 +358,7 @@ reset_with_add_addr_timeout() tables="${ip6tables}" fi + # set a maximum, to avoid too long timeout with exponential backoff ip netns exec $ns1 sysctl -q net.mptcp.add_addr_timeout=1 if ! ip netns exec $ns2 $tables -A OUTPUT -p tcp \ @@ -1399,6 +1411,115 @@ chk_join_tx_nr() print_results "join Tx" ${rc} } +chk_fallback_nr() +{ + local infinite_map_tx=${fb_infinite_map_tx:-0} + local dss_corruption=${fb_dss_corruption:-0} + local simult_conn=${fb_simult_conn:-0} + local mpc_passive=${fb_mpc_passive:-0} + local mpc_active=${fb_mpc_active:-0} + local mpc_data=${fb_mpc_data:-0} + local md5_sig=${fb_md5_sig:-0} + local dss=${fb_dss:-0} + local rc=${KSFT_PASS} + local ns=$1 + local count + + count=$(mptcp_lib_get_counter ${!ns} "MPTcpExtInfiniteMapTx") + if [ -z "$count" ]; then + rc=${KSFT_SKIP} + elif [ "$count" != "$infinite_map_tx" ]; then + rc=${KSFT_FAIL} + print_check "$ns infinite map tx fallback" + fail_test "got $count infinite map tx fallback[s] in $ns expected $infinite_map_tx" + fi + + count=$(mptcp_lib_get_counter ${!ns} "MPTcpExtDSSCorruptionFallback") + if [ -z "$count" ]; then + rc=${KSFT_SKIP} + elif [ "$count" != "$dss_corruption" ]; then + rc=${KSFT_FAIL} + print_check "$ns dss corruption fallback" + fail_test "got $count dss corruption fallback[s] in $ns expected $dss_corruption" + fi + + count=$(mptcp_lib_get_counter ${!ns} "MPTcpExtSimultConnectFallback") + if [ -z "$count" ]; then + rc=${KSFT_SKIP} + elif [ "$count" != "$simult_conn" ]; then + rc=${KSFT_FAIL} + print_check "$ns simult conn fallback" + fail_test "got $count simult conn fallback[s] in $ns expected $simult_conn" + fi + + count=$(mptcp_lib_get_counter ${!ns} "MPTcpExtMPCapableFallbackACK") + if [ -z "$count" ]; then + rc=${KSFT_SKIP} + elif [ "$count" != "$mpc_passive" ]; then + rc=${KSFT_FAIL} + print_check "$ns mpc passive fallback" + fail_test "got $count mpc passive fallback[s] in $ns expected $mpc_passive" + fi + + count=$(mptcp_lib_get_counter ${!ns} "MPTcpExtMPCapableFallbackSYNACK") + if [ -z "$count" ]; then + rc=${KSFT_SKIP} + elif [ "$count" != "$mpc_active" ]; then + rc=${KSFT_FAIL} + print_check "$ns mpc active fallback" + fail_test "got $count mpc active fallback[s] in $ns expected $mpc_active" + fi + + count=$(mptcp_lib_get_counter ${!ns} "MPTcpExtMPCapableDataFallback") + if [ -z "$count" ]; then + rc=${KSFT_SKIP} + elif [ "$count" != "$mpc_data" ]; then + rc=${KSFT_FAIL} + print_check "$ns mpc data fallback" + fail_test "got $count mpc data fallback[s] in $ns expected $mpc_data" + fi + + count=$(mptcp_lib_get_counter ${!ns} "MPTcpExtMD5SigFallback") + if [ -z "$count" ]; then + rc=${KSFT_SKIP} + elif [ "$count" != "$md5_sig" ]; then + rc=${KSFT_FAIL} + print_check "$ns MD5 Sig fallback" + fail_test "got $count MD5 Sig fallback[s] in $ns expected $md5_sig" + fi + + count=$(mptcp_lib_get_counter ${!ns} "MPTcpExtDssFallback") + if [ -z "$count" ]; then + rc=${KSFT_SKIP} + elif [ "$count" != "$dss" ]; then + rc=${KSFT_FAIL} + print_check "$ns dss fallback" + fail_test "got $count dss fallback[s] in $ns expected $dss" + fi + + return $rc +} + +chk_fallback_nr_all() +{ + local netns=("ns1" "ns2") + local fb_ns=("fb_ns1" "fb_ns2") + local rc=${KSFT_PASS} + + for i in 0 1; do + if [ -n "${!fb_ns[i]}" ]; then + eval "${!fb_ns[i]}" \ + chk_fallback_nr ${netns[i]} || rc=${?} + else + chk_fallback_nr ${netns[i]} || rc=${?} + fi + done + + if [ "${rc}" != "${KSFT_PASS}" ]; then + print_results "fallback" ${rc} + fi +} + chk_join_nr() { local syn_nr=$1 @@ -1484,6 +1605,8 @@ chk_join_nr() join_syn_tx="${join_syn_tx:-${syn_nr}}" \ chk_join_tx_nr + chk_fallback_nr_all + if $validate_checksum; then chk_csum_nr $csum_ns1 $csum_ns2 chk_fail_nr $fail_nr $fail_nr @@ -1547,7 +1670,6 @@ chk_add_nr() local tx="" local rx="" local count - local timeout if [[ $ns_invert = "invert" ]]; then ns_tx=$ns2 @@ -1556,15 +1678,13 @@ chk_add_nr() rx=" server" fi - timeout=$(ip netns exec ${ns_tx} sysctl -n net.mptcp.add_addr_timeout) - print_check "add addr rx${rx}" count=$(mptcp_lib_get_counter ${ns_rx} "MPTcpExtAddAddr") if [ -z "$count" ]; then print_skip - # if the test configured a short timeout tolerate greater then expected - # add addrs options, due to retransmissions - elif [ "$count" != "$add_nr" ] && { [ "$timeout" -gt 1 ] || [ "$count" -lt "$add_nr" ]; }; then + # Tolerate more ADD_ADDR then expected (if any), due to retransmissions + elif [ "$count" != "$add_nr" ] && + { [ "$add_nr" -eq 0 ] || [ "$count" -lt "$add_nr" ]; }; then fail_test "got $count ADD_ADDR[s] expected $add_nr" else print_ok @@ -1652,18 +1772,15 @@ chk_add_tx_nr() { local add_tx_nr=$1 local echo_tx_nr=$2 - local timeout local count - timeout=$(ip netns exec $ns1 sysctl -n net.mptcp.add_addr_timeout) - print_check "add addr tx" count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtAddAddrTx") if [ -z "$count" ]; then print_skip - # if the test configured a short timeout tolerate greater then expected - # add addrs options, due to retransmissions - elif [ "$count" != "$add_tx_nr" ] && { [ "$timeout" -gt 1 ] || [ "$count" -lt "$add_tx_nr" ]; }; then + # Tolerate more ADD_ADDR then expected (if any), due to retransmissions + elif [ "$count" != "$add_tx_nr" ] && + { [ "$add_tx_nr" -eq 0 ] || [ "$count" -lt "$add_tx_nr" ]; }; then fail_test "got $count ADD_ADDR[s] TX, expected $add_tx_nr" else print_ok @@ -2151,7 +2268,8 @@ signal_address_tests() pm_nl_add_endpoint $ns1 10.0.3.1 flags signal pm_nl_add_endpoint $ns1 10.0.4.1 flags signal pm_nl_set_limits $ns2 3 3 - run_tests $ns1 $ns2 10.0.1.1 + speed=slow \ + run_tests $ns1 $ns2 10.0.1.1 chk_join_nr 3 3 3 chk_add_nr 3 3 fi @@ -2163,7 +2281,8 @@ signal_address_tests() pm_nl_add_endpoint $ns1 10.0.3.1 flags signal pm_nl_add_endpoint $ns1 10.0.14.1 flags signal pm_nl_set_limits $ns2 3 3 - run_tests $ns1 $ns2 10.0.1.1 + speed=slow \ + run_tests $ns1 $ns2 10.0.1.1 join_syn_tx=3 \ chk_join_nr 1 1 1 chk_add_nr 3 3 @@ -2201,6 +2320,74 @@ signal_address_tests() fi } +laminar_endp_tests() +{ + # no laminar endpoints: routing rules are used + if reset_with_tcp_filter "without a laminar endpoint" ns1 10.0.2.2 REJECT && + mptcp_lib_kallsyms_has "mptcp_pm_get_endp_laminar_max$"; then + pm_nl_set_limits $ns1 0 2 + pm_nl_set_limits $ns2 2 2 + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal + run_tests $ns1 $ns2 10.0.1.1 + join_syn_tx=1 \ + chk_join_nr 0 0 0 + chk_add_nr 1 1 + fi + + # laminar endpoints: this endpoint is used + if reset_with_tcp_filter "with a laminar endpoint" ns1 10.0.2.2 REJECT && + mptcp_lib_kallsyms_has "mptcp_pm_get_endp_laminar_max$"; then + pm_nl_set_limits $ns1 0 2 + pm_nl_set_limits $ns2 2 2 + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal + pm_nl_add_endpoint $ns2 10.0.3.2 flags laminar + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 1 1 1 + chk_add_nr 1 1 + fi + + # laminar endpoints: these endpoints are used + if reset_with_tcp_filter "with multiple laminar endpoints" ns1 10.0.2.2 REJECT && + mptcp_lib_kallsyms_has "mptcp_pm_get_endp_laminar_max$"; then + pm_nl_set_limits $ns1 0 2 + pm_nl_set_limits $ns2 2 2 + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal + pm_nl_add_endpoint $ns1 10.0.3.1 flags signal + pm_nl_add_endpoint $ns2 dead:beef:3::2 flags laminar + pm_nl_add_endpoint $ns2 10.0.3.2 flags laminar + pm_nl_add_endpoint $ns2 10.0.4.2 flags laminar + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 2 2 2 + chk_add_nr 2 2 + fi + + # laminar endpoints: only one endpoint is used + if reset_with_tcp_filter "single laminar endpoint" ns1 10.0.2.2 REJECT && + mptcp_lib_kallsyms_has "mptcp_pm_get_endp_laminar_max$"; then + pm_nl_set_limits $ns1 0 2 + pm_nl_set_limits $ns2 2 2 + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal + pm_nl_add_endpoint $ns1 10.0.3.1 flags signal + pm_nl_add_endpoint $ns2 10.0.3.2 flags laminar + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 1 1 1 + chk_add_nr 2 2 + fi + + # laminar endpoints: subflow and laminar flags + if reset_with_tcp_filter "sublow + laminar endpoints" ns1 10.0.2.2 REJECT && + mptcp_lib_kallsyms_has "mptcp_pm_get_endp_laminar_max$"; then + pm_nl_set_limits $ns1 0 4 + pm_nl_set_limits $ns2 2 4 + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal + pm_nl_add_endpoint $ns2 10.0.1.2 flags subflow,laminar + pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow,laminar + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 1 1 1 + chk_add_nr 1 1 + fi +} + link_failure_tests() { # accept and use add_addr with additional subflows and link loss @@ -3187,6 +3374,17 @@ deny_join_id0_tests() run_tests $ns1 $ns2 10.0.1.1 chk_join_nr 1 1 1 fi + + # default limits, server deny join id 0 + signal + if reset_with_allow_join_id0 "default limits, server deny join id 0" 0 1; then + pm_nl_set_limits $ns1 0 2 + pm_nl_set_limits $ns2 0 2 + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal + pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow + pm_nl_add_endpoint $ns2 10.0.4.2 flags subflow + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 2 2 2 + fi } fullmesh_tests() @@ -3337,6 +3535,7 @@ fail_tests() join_csum_ns1=+1 join_csum_ns2=+0 \ join_fail_nr=1 join_rst_nr=0 join_infi_nr=1 \ join_corrupted_pkts="$(pedit_action_pkts)" \ + fb_ns1="fb_dss=1" fb_ns2="fb_infinite_map_tx=1" \ chk_join_nr 0 0 0 chk_fail_nr 1 -1 invert fi @@ -3978,6 +4177,7 @@ all_tests_sorted=( f@subflows_tests e@subflows_error_tests s@signal_address_tests + L@laminar_endp_tests l@link_failure_tests t@add_addr_timeout_tests r@remove_tests diff --git a/tools/testing/selftests/net/mptcp/mptcp_lib.sh b/tools/testing/selftests/net/mptcp/mptcp_lib.sh index 09cd24b2ae46..d62e653d48b0 100644 --- a/tools/testing/selftests/net/mptcp/mptcp_lib.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_lib.sh @@ -384,7 +384,7 @@ mptcp_lib_make_file() { mptcp_lib_print_file_err() { ls -l "${1}" 1>&2 echo "Trailing bytes are: " - tail -c 27 "${1}" + tail -c 32 "${1}" | od -x | head -n2 } # $1: input file ; $2: output file ; $3: what kind of file diff --git a/tools/testing/selftests/net/mptcp/mptcp_sockopt.c b/tools/testing/selftests/net/mptcp/mptcp_sockopt.c index e934dd26a59d..286164f7246e 100644 --- a/tools/testing/selftests/net/mptcp/mptcp_sockopt.c +++ b/tools/testing/selftests/net/mptcp/mptcp_sockopt.c @@ -667,22 +667,26 @@ static void process_one_client(int fd, int pipefd) do_getsockopts(&s, fd, ret, ret2); if (s.mptcpi_rcv_delta != (uint64_t)ret + 1) - xerror("mptcpi_rcv_delta %" PRIu64 ", expect %" PRIu64, s.mptcpi_rcv_delta, ret + 1, s.mptcpi_rcv_delta - ret); + xerror("mptcpi_rcv_delta %" PRIu64 ", expect %" PRIu64 ", diff %" PRId64, + s.mptcpi_rcv_delta, ret + 1, s.mptcpi_rcv_delta - (ret + 1)); /* be nice when running on top of older kernel */ if (s.pkt_stats_avail) { if (s.last_sample.mptcpi_bytes_sent != ret2) - xerror("mptcpi_bytes_sent %" PRIu64 ", expect %" PRIu64, + xerror("mptcpi_bytes_sent %" PRIu64 ", expect %" PRIu64 + ", diff %" PRId64, s.last_sample.mptcpi_bytes_sent, ret2, s.last_sample.mptcpi_bytes_sent - ret2); if (s.last_sample.mptcpi_bytes_received != ret) - xerror("mptcpi_bytes_received %" PRIu64 ", expect %" PRIu64, + xerror("mptcpi_bytes_received %" PRIu64 ", expect %" PRIu64 + ", diff %" PRId64, s.last_sample.mptcpi_bytes_received, ret, s.last_sample.mptcpi_bytes_received - ret); if (s.last_sample.mptcpi_bytes_acked != ret) - xerror("mptcpi_bytes_acked %" PRIu64 ", expect %" PRIu64, - s.last_sample.mptcpi_bytes_acked, ret2, - s.last_sample.mptcpi_bytes_acked - ret2); + xerror("mptcpi_bytes_acked %" PRIu64 ", expect %" PRIu64 + ", diff %" PRId64, + s.last_sample.mptcpi_bytes_acked, ret, + s.last_sample.mptcpi_bytes_acked - ret); } close(fd); @@ -722,6 +726,7 @@ static int server(int pipefd) process_one_client(r, pipefd); + close(fd); return 0; } @@ -847,8 +852,12 @@ int main(int argc, char *argv[]) die_perror("pipe"); s = xfork(); - if (s == 0) - return server(pipefds[1]); + if (s == 0) { + close(pipefds[0]); + ret = server(pipefds[1]); + close(pipefds[1]); + return ret; + } close(pipefds[1]); diff --git a/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh b/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh index 418a903c3a4d..f01989be6e9b 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh @@ -95,7 +95,7 @@ init() } # This function is used in the cleanup trap -#shellcheck disable=SC2317 +#shellcheck disable=SC2317,SC2329 cleanup() { mptcp_lib_ns_exit "${ns1}" "${ns2}" "${ns_sbox}" diff --git a/tools/testing/selftests/net/mptcp/pm_netlink.sh b/tools/testing/selftests/net/mptcp/pm_netlink.sh index ac7ec6f94023..ec6a87588191 100755 --- a/tools/testing/selftests/net/mptcp/pm_netlink.sh +++ b/tools/testing/selftests/net/mptcp/pm_netlink.sh @@ -32,7 +32,7 @@ ns1="" err=$(mktemp) # This function is used in the cleanup trap -#shellcheck disable=SC2317 +#shellcheck disable=SC2317,SC2329 cleanup() { rm -f "${err}" @@ -70,8 +70,9 @@ format_endpoints() { mptcp_lib_pm_nl_format_endpoints "${@}" } +# This function is invoked indirectly +#shellcheck disable=SC2317,SC2329 get_endpoint() { - # shellcheck disable=SC2317 # invoked indirectly mptcp_lib_pm_nl_get_endpoint "${ns1}" "${@}" } diff --git a/tools/testing/selftests/net/mptcp/pm_nl_ctl.c b/tools/testing/selftests/net/mptcp/pm_nl_ctl.c index 994a556f46c1..65b374232ff5 100644 --- a/tools/testing/selftests/net/mptcp/pm_nl_ctl.c +++ b/tools/testing/selftests/net/mptcp/pm_nl_ctl.c @@ -2,6 +2,7 @@ #include <errno.h> #include <error.h> +#include <stdbool.h> #include <stdio.h> #include <stdlib.h> #include <string.h> @@ -113,6 +114,8 @@ static int capture_events(int fd, int event_group) error(1, errno, "could not join the " MPTCP_PM_EV_GRP_NAME " mcast group"); do { + bool server_side = false; + FD_ZERO(&rfds); FD_SET(fd, &rfds); res_len = NLMSG_ALIGN(sizeof(struct nlmsghdr)) + @@ -187,11 +190,22 @@ static int capture_events(int fd, int event_group) else if (attrs->rta_type == MPTCP_ATTR_ERROR) fprintf(stderr, ",error:%u", *(__u8 *)RTA_DATA(attrs)); else if (attrs->rta_type == MPTCP_ATTR_SERVER_SIDE) - fprintf(stderr, ",server_side:%u", *(__u8 *)RTA_DATA(attrs)); + server_side = !!*(__u8 *)RTA_DATA(attrs); + else if (attrs->rta_type == MPTCP_ATTR_FLAGS) { + __u16 flags = *(__u16 *)RTA_DATA(attrs); + + /* only print when present, easier */ + if (flags & MPTCP_PM_EV_FLAG_DENY_JOIN_ID0) + fprintf(stderr, ",deny_join_id0:1"); + if (flags & MPTCP_PM_EV_FLAG_SERVER_SIDE) + server_side = true; + } attrs = RTA_NEXT(attrs, msg_len); } } + if (server_side) + fprintf(stderr, ",server_side:1"); fprintf(stderr, "\n"); } while (1); @@ -816,6 +830,8 @@ int add_addr(int fd, int pm_family, int argc, char *argv[]) flags |= MPTCP_PM_ADDR_FLAG_SUBFLOW; else if (!strcmp(tok, "signal")) flags |= MPTCP_PM_ADDR_FLAG_SIGNAL; + else if (!strcmp(tok, "laminar")) + flags |= MPTCP_PM_ADDR_FLAG_LAMINAR; else if (!strcmp(tok, "backup")) flags |= MPTCP_PM_ADDR_FLAG_BACKUP; else if (!strcmp(tok, "fullmesh")) @@ -1004,6 +1020,13 @@ static void print_addr(struct rtattr *attrs, int len) printf(","); } + if (flags & MPTCP_PM_ADDR_FLAG_LAMINAR) { + printf("laminar"); + flags &= ~MPTCP_PM_ADDR_FLAG_LAMINAR; + if (flags) + printf(","); + } + if (flags & MPTCP_PM_ADDR_FLAG_BACKUP) { printf("backup"); flags &= ~MPTCP_PM_ADDR_FLAG_BACKUP; diff --git a/tools/testing/selftests/net/mptcp/simult_flows.sh b/tools/testing/selftests/net/mptcp/simult_flows.sh index 2329c2f8519b..1903e8e84a31 100755 --- a/tools/testing/selftests/net/mptcp/simult_flows.sh +++ b/tools/testing/selftests/net/mptcp/simult_flows.sh @@ -35,7 +35,7 @@ usage() { } # This function is used in the cleanup trap -#shellcheck disable=SC2317 +#shellcheck disable=SC2317,SC2329 cleanup() { rm -f "$cout" "$sout" diff --git a/tools/testing/selftests/net/mptcp/userspace_pm.sh b/tools/testing/selftests/net/mptcp/userspace_pm.sh index 333064b0b5ac..87323942cb8a 100755 --- a/tools/testing/selftests/net/mptcp/userspace_pm.sh +++ b/tools/testing/selftests/net/mptcp/userspace_pm.sh @@ -94,7 +94,7 @@ test_fail() } # This function is used in the cleanup trap -#shellcheck disable=SC2317 +#shellcheck disable=SC2317,SC2329 cleanup() { print_title "Cleanup" @@ -201,6 +201,9 @@ make_connection() is_v6="v4" fi + # set this on the client side only: will not affect the rest + ip netns exec "$ns2" sysctl -q net.mptcp.allow_join_initial_addr_port=0 + :>"$client_evts" :>"$server_evts" @@ -223,23 +226,28 @@ make_connection() local client_token local client_port local client_serverside + local client_nojoin local server_token local server_serverside + local server_nojoin client_token=$(mptcp_lib_evts_get_info token "$client_evts") client_port=$(mptcp_lib_evts_get_info sport "$client_evts") client_serverside=$(mptcp_lib_evts_get_info server_side "$client_evts") + client_nojoin=$(mptcp_lib_evts_get_info deny_join_id0 "$client_evts") server_token=$(mptcp_lib_evts_get_info token "$server_evts") server_serverside=$(mptcp_lib_evts_get_info server_side "$server_evts") + server_nojoin=$(mptcp_lib_evts_get_info deny_join_id0 "$server_evts") print_test "Established IP${is_v6} MPTCP Connection ns2 => ns1" - if [ "$client_token" != "" ] && [ "$server_token" != "" ] && [ "$client_serverside" = 0 ] && - [ "$server_serverside" = 1 ] + if [ "${client_token}" != "" ] && [ "${server_token}" != "" ] && + [ "${client_serverside:-0}" = 0 ] && [ "${server_serverside:-0}" = 1 ] && + [ "${client_nojoin:-0}" = 0 ] && [ "${server_nojoin:-0}" = 1 ] then test_pass print_title "Connection info: ${client_addr}:${client_port} -> ${connect_addr}:${app_port}" else - test_fail "Expected tokens (c:${client_token} - s:${server_token}) and server (c:${client_serverside} - s:${server_serverside})" + test_fail "Expected tokens (c:${client_token} - s:${server_token}), server (c:${client_serverside} - s:${server_serverside}), nojoin (c:${client_nojoin} - s:${server_nojoin})" mptcp_lib_result_print_all_tap exit ${KSFT_FAIL} fi diff --git a/tools/testing/selftests/net/netfilter/config b/tools/testing/selftests/net/netfilter/config index 79d5b33966ba..305e46b819cb 100644 --- a/tools/testing/selftests/net/netfilter/config +++ b/tools/testing/selftests/net/netfilter/config @@ -13,6 +13,7 @@ CONFIG_BRIDGE_VLAN_FILTERING=y CONFIG_CGROUP_BPF=y CONFIG_DUMMY=m CONFIG_INET_ESP=m +CONFIG_CRYPTO_SHA1=m CONFIG_IP_NF_MATCH_RPFILTER=m CONFIG_IP6_NF_MATCH_RPFILTER=m CONFIG_IP_NF_IPTABLES=m diff --git a/tools/testing/selftests/net/netfilter/conntrack_clash.sh b/tools/testing/selftests/net/netfilter/conntrack_clash.sh index 606a43a60f73..7fc6c5dbd551 100755 --- a/tools/testing/selftests/net/netfilter/conntrack_clash.sh +++ b/tools/testing/selftests/net/netfilter/conntrack_clash.sh @@ -99,7 +99,7 @@ run_one_clash_test() local entries local cre - if ! ip netns exec "$ns" ./udpclash $daddr $dport;then + if ! ip netns exec "$ns" timeout 30 ./udpclash $daddr $dport;then echo "INFO: did not receive expected number of replies for $daddr:$dport" ip netns exec "$ctns" conntrack -S # don't fail: check if clash resolution triggered after all. diff --git a/tools/testing/selftests/net/netfilter/conntrack_resize.sh b/tools/testing/selftests/net/netfilter/conntrack_resize.sh index 788cd56ea4a0..615fe3c6f405 100755 --- a/tools/testing/selftests/net/netfilter/conntrack_resize.sh +++ b/tools/testing/selftests/net/netfilter/conntrack_resize.sh @@ -187,7 +187,7 @@ ct_udpclash() [ -x udpclash ] || return while [ $now -lt $end ]; do - ip netns exec "$ns" ./udpclash 127.0.0.1 $((RANDOM%65536)) > /dev/null 2>&1 + ip netns exec "$ns" timeout 30 ./udpclash 127.0.0.1 $((RANDOM%65536)) > /dev/null 2>&1 now=$(date +%s) done @@ -277,6 +277,7 @@ check_taint() insert_flood() { local n="$1" + local timeout="$2" local r=0 r=$((RANDOM%$insert_count)) @@ -302,7 +303,7 @@ test_floodresize_all() read tainted_then < /proc/sys/kernel/tainted for n in "$nsclient1" "$nsclient2";do - insert_flood "$n" & + insert_flood "$n" "$timeout" & done # resize table constantly while flood/insert/dump/flushs diff --git a/tools/testing/selftests/net/netfilter/nft_concat_range.sh b/tools/testing/selftests/net/netfilter/nft_concat_range.sh index 20e76b395c85..ad97c6227f35 100755 --- a/tools/testing/selftests/net/netfilter/nft_concat_range.sh +++ b/tools/testing/selftests/net/netfilter/nft_concat_range.sh @@ -29,7 +29,7 @@ TYPES="net_port port_net net6_port port_proto net6_port_mac net6_port_mac_proto net6_port_net6_port net_port_mac_proto_net" # Reported bugs, also described by TYPE_ variables below -BUGS="flush_remove_add reload net_port_proto_match avx2_mismatch" +BUGS="flush_remove_add reload net_port_proto_match avx2_mismatch doublecreate" # List of possible paths to pktgen script from kernel tree for performance tests PKTGEN_SCRIPT_PATHS=" @@ -408,6 +408,18 @@ perf_duration 0 " +TYPE_doublecreate=" +display cannot create same element twice +type_spec ipv4_addr . ipv4_addr +chain_spec ip saddr . ip daddr +dst addr4 +proto icmp + +race_repeat 0 + +perf_duration 0 +" + # Set template for all tests, types and rules are filled in depending on test set_template=' flush ruleset @@ -1900,6 +1912,48 @@ test_bug_avx2_mismatch() fi } +test_bug_doublecreate() +{ + local elements="1.2.3.4 . 1.2.4.1, 1.2.4.1 . 1.2.3.4" + local ret=1 + local i + + setup veth send_"${proto}" set || return ${ksft_skip} + + add "{ $elements }" || return 1 + # expected to work: 'add' on existing should be no-op. + add "{ $elements }" || return 1 + + # 'create' should return an error. + if nft create element inet filter test "{ $elements }" 2>/dev/null; then + err "Could create an existing element" + return 1 + fi +nft -f - <<EOF 2>/dev/null +flush set inet filter test +create element inet filter test { $elements } +create element inet filter test { $elements } +EOF + ret=$? + if [ $ret -eq 0 ]; then + err "Could create element twice in one transaction" + err "$(nft -a list ruleset)" + return 1 + fi + +nft -f - <<EOF 2>/dev/null +flush set inet filter test +create element inet filter test { $elements } +EOF + ret=$? + if [ $ret -ne 0 ]; then + err "Could not flush and re-create element in one transaction" + return 1 + fi + + return 0 +} + test_reported_issues() { eval test_bug_"${subtest}" } diff --git a/tools/testing/selftests/net/netfilter/nft_flowtable.sh b/tools/testing/selftests/net/netfilter/nft_flowtable.sh index a4ee5496f2a1..45832df98295 100755 --- a/tools/testing/selftests/net/netfilter/nft_flowtable.sh +++ b/tools/testing/selftests/net/netfilter/nft_flowtable.sh @@ -20,6 +20,7 @@ ret=0 SOCAT_TIMEOUT=60 nsin="" +nsin_small="" ns1out="" ns2out="" @@ -36,7 +37,7 @@ cleanup() { cleanup_all_ns - rm -f "$nsin" "$ns1out" "$ns2out" + rm -f "$nsin" "$nsin_small" "$ns1out" "$ns2out" [ "$log_netns" -eq 0 ] && sysctl -q net.netfilter.nf_log_all_netns="$log_netns" } @@ -72,6 +73,7 @@ lmtu=1500 rmtu=2000 filesize=$((2 * 1024 * 1024)) +filesize_small=$((filesize / 16)) usage(){ echo "nft_flowtable.sh [OPTIONS]" @@ -89,7 +91,10 @@ do o) omtu=$OPTARG;; l) lmtu=$OPTARG;; r) rmtu=$OPTARG;; - s) filesize=$OPTARG;; + s) + filesize=$OPTARG + filesize_small=$((OPTARG / 16)) + ;; *) usage;; esac done @@ -215,6 +220,7 @@ if ! ip netns exec "$ns2" ping -c 1 -q 10.0.1.99 > /dev/null; then fi nsin=$(mktemp) +nsin_small=$(mktemp) ns1out=$(mktemp) ns2out=$(mktemp) @@ -265,6 +271,7 @@ check_counters() check_dscp() { local what=$1 + local pmtud="$2" local ok=1 local counter @@ -277,37 +284,39 @@ check_dscp() local pc4z=${counter%*bytes*} local pc4z=${pc4z#*packets} + local failmsg="FAIL: pmtu $pmtu: $what counters do not match, expected" + case "$what" in "dscp_none") if [ "$pc4" -gt 0 ] || [ "$pc4z" -eq 0 ]; then - echo "FAIL: dscp counters do not match, expected dscp3 == 0, dscp0 > 0, but got $pc4,$pc4z" 1>&2 + echo "$failmsg dscp3 == 0, dscp0 > 0, but got $pc4,$pc4z" 1>&2 ret=1 ok=0 fi ;; "dscp_fwd") if [ "$pc4" -eq 0 ] || [ "$pc4z" -eq 0 ]; then - echo "FAIL: dscp counters do not match, expected dscp3 and dscp0 > 0 but got $pc4,$pc4z" 1>&2 + echo "$failmsg dscp3 and dscp0 > 0 but got $pc4,$pc4z" 1>&2 ret=1 ok=0 fi ;; "dscp_ingress") if [ "$pc4" -eq 0 ] || [ "$pc4z" -gt 0 ]; then - echo "FAIL: dscp counters do not match, expected dscp3 > 0, dscp0 == 0 but got $pc4,$pc4z" 1>&2 + echo "$failmsg dscp3 > 0, dscp0 == 0 but got $pc4,$pc4z" 1>&2 ret=1 ok=0 fi ;; "dscp_egress") if [ "$pc4" -eq 0 ] || [ "$pc4z" -gt 0 ]; then - echo "FAIL: dscp counters do not match, expected dscp3 > 0, dscp0 == 0 but got $pc4,$pc4z" 1>&2 + echo "$failmsg dscp3 > 0, dscp0 == 0 but got $pc4,$pc4z" 1>&2 ret=1 ok=0 fi ;; *) - echo "FAIL: Unknown DSCP check" 1>&2 + echo "$failmsg: Unknown DSCP check" 1>&2 ret=1 ok=0 esac @@ -319,9 +328,9 @@ check_dscp() check_transfer() { - in=$1 - out=$2 - what=$3 + local in=$1 + local out=$2 + local what=$3 if ! cmp "$in" "$out" > /dev/null 2>&1; then echo "FAIL: file mismatch for $what" 1>&2 @@ -342,25 +351,39 @@ test_tcp_forwarding_ip() { local nsa=$1 local nsb=$2 - local dstip=$3 - local dstport=$4 + local pmtu=$3 + local dstip=$4 + local dstport=$5 local lret=0 + local socatc + local socatl + local infile="$nsin" + + if [ $pmtu -eq 0 ]; then + infile="$nsin_small" + fi - timeout "$SOCAT_TIMEOUT" ip netns exec "$nsb" socat -4 TCP-LISTEN:12345,reuseaddr STDIO < "$nsin" > "$ns2out" & + timeout "$SOCAT_TIMEOUT" ip netns exec "$nsb" socat -4 TCP-LISTEN:12345,reuseaddr STDIO < "$infile" > "$ns2out" & lpid=$! busywait 1000 listener_ready - timeout "$SOCAT_TIMEOUT" ip netns exec "$nsa" socat -4 TCP:"$dstip":"$dstport" STDIO < "$nsin" > "$ns1out" + timeout "$SOCAT_TIMEOUT" ip netns exec "$nsa" socat -4 TCP:"$dstip":"$dstport" STDIO < "$infile" > "$ns1out" + socatc=$? wait $lpid + socatl=$? - if ! check_transfer "$nsin" "$ns2out" "ns1 -> ns2"; then + if [ $socatl -ne 0 ] || [ $socatc -ne 0 ];then + rc=1 + fi + + if ! check_transfer "$infile" "$ns2out" "ns1 -> ns2"; then lret=1 ret=1 fi - if ! check_transfer "$nsin" "$ns1out" "ns1 <- ns2"; then + if ! check_transfer "$infile" "$ns1out" "ns1 <- ns2"; then lret=1 ret=1 fi @@ -370,14 +393,16 @@ test_tcp_forwarding_ip() test_tcp_forwarding() { - test_tcp_forwarding_ip "$1" "$2" 10.0.2.99 12345 + local pmtu="$3" + + test_tcp_forwarding_ip "$1" "$2" "$pmtu" 10.0.2.99 12345 return $? } test_tcp_forwarding_set_dscp() { - check_dscp "dscp_none" + local pmtu="$3" ip netns exec "$nsr1" nft -f - <<EOF table netdev dscpmangle { @@ -388,8 +413,8 @@ table netdev dscpmangle { } EOF if [ $? -eq 0 ]; then - test_tcp_forwarding_ip "$1" "$2" 10.0.2.99 12345 - check_dscp "dscp_ingress" + test_tcp_forwarding_ip "$1" "$2" "$3" 10.0.2.99 12345 + check_dscp "dscp_ingress" "$pmtu" ip netns exec "$nsr1" nft delete table netdev dscpmangle else @@ -405,10 +430,10 @@ table netdev dscpmangle { } EOF if [ $? -eq 0 ]; then - test_tcp_forwarding_ip "$1" "$2" 10.0.2.99 12345 - check_dscp "dscp_egress" + test_tcp_forwarding_ip "$1" "$2" "$pmtu" 10.0.2.99 12345 + check_dscp "dscp_egress" "$pmtu" - ip netns exec "$nsr1" nft flush table netdev dscpmangle + ip netns exec "$nsr1" nft delete table netdev dscpmangle else echo "SKIP: Could not load netdev:egress for veth1" fi @@ -416,48 +441,53 @@ fi # partial. If flowtable really works, then both dscp-is-0 and dscp-is-cs3 # counters should have seen packets (before and after ft offload kicks in). ip netns exec "$nsr1" nft -a insert rule inet filter forward ip dscp set cs3 - test_tcp_forwarding_ip "$1" "$2" 10.0.2.99 12345 - check_dscp "dscp_fwd" + test_tcp_forwarding_ip "$1" "$2" "$pmtu" 10.0.2.99 12345 + check_dscp "dscp_fwd" "$pmtu" } test_tcp_forwarding_nat() { + local nsa="$1" + local nsb="$2" + local pmtu="$3" + local what="$4" local lret - local pmtu - test_tcp_forwarding_ip "$1" "$2" 10.0.2.99 12345 - lret=$? + [ "$pmtu" -eq 0 ] && what="$what (pmtu disabled)" - pmtu=$3 - what=$4 + test_tcp_forwarding_ip "$nsa" "$nsb" "$pmtu" 10.0.2.99 12345 + lret=$? if [ "$lret" -eq 0 ] ; then if [ "$pmtu" -eq 1 ] ;then - check_counters "flow offload for ns1/ns2 with masquerade and pmtu discovery $what" + check_counters "flow offload for ns1/ns2 with masquerade $what" else echo "PASS: flow offload for ns1/ns2 with masquerade $what" fi - test_tcp_forwarding_ip "$1" "$2" 10.6.6.6 1666 + test_tcp_forwarding_ip "$1" "$2" "$pmtu" 10.6.6.6 1666 lret=$? if [ "$pmtu" -eq 1 ] ;then - check_counters "flow offload for ns1/ns2 with dnat and pmtu discovery $what" + check_counters "flow offload for ns1/ns2 with dnat $what" elif [ "$lret" -eq 0 ] ; then echo "PASS: flow offload for ns1/ns2 with dnat $what" fi + else + echo "FAIL: flow offload for ns1/ns2 with dnat $what" fi return $lret } make_file "$nsin" "$filesize" +make_file "$nsin_small" "$filesize_small" # First test: # No PMTU discovery, nsr1 is expected to fragment packets from ns1 to ns2 as needed. # Due to MTU mismatch in both directions, all packets (except small packets like pure # acks) have to be handled by normal forwarding path. Therefore, packet counters # are not checked. -if test_tcp_forwarding "$ns1" "$ns2"; then +if test_tcp_forwarding "$ns1" "$ns2" 0; then echo "PASS: flow offloaded for ns1/ns2" else echo "FAIL: flow offload for ns1/ns2:" 1>&2 @@ -489,8 +519,9 @@ table ip nat { } EOF +check_dscp "dscp_none" "0" if ! test_tcp_forwarding_set_dscp "$ns1" "$ns2" 0 ""; then - echo "FAIL: flow offload for ns1/ns2 with dscp update" 1>&2 + echo "FAIL: flow offload for ns1/ns2 with dscp update and no pmtu discovery" 1>&2 exit 0 fi @@ -513,6 +544,14 @@ ip netns exec "$ns2" sysctl net.ipv4.ip_no_pmtu_disc=0 > /dev/null # For earlier tests (large mtus), packets cannot be handled via flowtable # (except pure acks and other small packets). ip netns exec "$nsr1" nft reset counters table inet filter >/dev/null +ip netns exec "$ns2" nft reset counters table inet filter >/dev/null + +if ! test_tcp_forwarding_set_dscp "$ns1" "$ns2" 1 ""; then + echo "FAIL: flow offload for ns1/ns2 with dscp update and pmtu discovery" 1>&2 + exit 0 +fi + +ip netns exec "$nsr1" nft reset counters table inet filter >/dev/null if ! test_tcp_forwarding_nat "$ns1" "$ns2" 1 ""; then echo "FAIL: flow offload for ns1/ns2 with NAT and pmtu discovery" 1>&2 @@ -644,7 +683,7 @@ ip -net "$ns2" route del 192.168.10.1 via 10.0.2.1 ip -net "$ns2" route add default via 10.0.2.1 ip -net "$ns2" route add default via dead:2::1 -if test_tcp_forwarding "$ns1" "$ns2"; then +if test_tcp_forwarding "$ns1" "$ns2" 1; then check_counters "ipsec tunnel mode for ns1/ns2" else echo "FAIL: ipsec tunnel mode for ns1/ns2" @@ -668,7 +707,7 @@ if [ "$1" = "" ]; then fi echo "re-run with random mtus and file size: -o $o -l $l -r $r -s $filesize" - $0 -o "$o" -l "$l" -r "$r" -s "$filesize" + $0 -o "$o" -l "$l" -r "$r" -s "$filesize" || ret=1 fi exit $ret diff --git a/tools/testing/selftests/net/netfilter/nft_nat.sh b/tools/testing/selftests/net/netfilter/nft_nat.sh index a954754b99b3..b3ec2d0a3f56 100755 --- a/tools/testing/selftests/net/netfilter/nft_nat.sh +++ b/tools/testing/selftests/net/netfilter/nft_nat.sh @@ -569,7 +569,7 @@ test_redirect6() ip netns exec "$ns0" sysctl net.ipv6.conf.all.forwarding=1 > /dev/null if ! ip netns exec "$ns2" ping -q -c 1 dead:1::99 > /dev/null;then - echo "ERROR: cannnot ping $ns1 from $ns2 via ipv6" + echo "ERROR: cannot ping $ns1 from $ns2 via ipv6" lret=1 fi @@ -859,7 +859,7 @@ EOF # from router:service bypass connection tracking. test_port_shadow_notrack "$family" - # test nat based mitigation: fowarded packets coming from service port + # test nat based mitigation: forwarded packets coming from service port # are masqueraded with random highport. test_port_shadow_pat "$family" diff --git a/tools/testing/selftests/net/netfilter/udpclash.c b/tools/testing/selftests/net/netfilter/udpclash.c index 85c7b906ad08..79de163d61ab 100644 --- a/tools/testing/selftests/net/netfilter/udpclash.c +++ b/tools/testing/selftests/net/netfilter/udpclash.c @@ -29,7 +29,7 @@ struct thread_args { int sockfd; }; -static int wait = 1; +static volatile int wait = 1; static void *thread_main(void *varg) { diff --git a/tools/testing/selftests/net/netlink-dumps.c b/tools/testing/selftests/net/netlink-dumps.c index 07423f256f96..7618ebe528a4 100644 --- a/tools/testing/selftests/net/netlink-dumps.c +++ b/tools/testing/selftests/net/netlink-dumps.c @@ -31,9 +31,18 @@ struct ext_ack { const char *str; }; -/* 0: no done, 1: done found, 2: extack found, -1: error */ -static int nl_get_extack(char *buf, size_t n, struct ext_ack *ea) +enum get_ea_ret { + ERROR = -1, + NO_CTRL = 0, + FOUND_DONE, + FOUND_ERR, + FOUND_EXTACK, +}; + +static enum get_ea_ret +nl_get_extack(char *buf, size_t n, struct ext_ack *ea) { + enum get_ea_ret ret = NO_CTRL; const struct nlmsghdr *nlh; const struct nlattr *attr; ssize_t rem; @@ -41,15 +50,19 @@ static int nl_get_extack(char *buf, size_t n, struct ext_ack *ea) for (rem = n; rem > 0; NLMSG_NEXT(nlh, rem)) { nlh = (struct nlmsghdr *)&buf[n - rem]; if (!NLMSG_OK(nlh, rem)) - return -1; + return ERROR; - if (nlh->nlmsg_type != NLMSG_DONE) + if (nlh->nlmsg_type == NLMSG_ERROR) + ret = FOUND_ERR; + else if (nlh->nlmsg_type == NLMSG_DONE) + ret = FOUND_DONE; + else continue; ea->err = -*(int *)NLMSG_DATA(nlh); if (!(nlh->nlmsg_flags & NLM_F_ACK_TLVS)) - return 1; + return ret; ynl_attr_for_each(attr, nlh, sizeof(int)) { switch (ynl_attr_type(attr)) { @@ -68,10 +81,10 @@ static int nl_get_extack(char *buf, size_t n, struct ext_ack *ea) } } - return 2; + return FOUND_EXTACK; } - return 0; + return ret; } static const struct { @@ -99,9 +112,9 @@ static const struct { TEST(dump_extack) { int netlink_sock; + int i, cnt, ret; char buf[8192]; int one = 1; - int i, cnt; ssize_t n; netlink_sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE); @@ -118,7 +131,7 @@ TEST(dump_extack) ASSERT_EQ(n, 0); /* Dump so many times we fill up the buffer */ - cnt = 64; + cnt = 80; for (i = 0; i < cnt; i++) { n = send(netlink_sock, &dump_neigh_bad, sizeof(dump_neigh_bad), 0); @@ -140,10 +153,20 @@ TEST(dump_extack) } ASSERT_GE(n, (ssize_t)sizeof(struct nlmsghdr)); - EXPECT_EQ(nl_get_extack(buf, n, &ea), 2); + ret = nl_get_extack(buf, n, &ea); + /* Once we fill the buffer we'll see one ENOBUFS followed + * by a number of EBUSYs. Then the last recv() will finally + * trigger and complete the dump. + */ + if (ret == FOUND_ERR && (ea.err == ENOBUFS || ea.err == EBUSY)) + continue; + EXPECT_EQ(ret, FOUND_EXTACK); + EXPECT_EQ(ea.err, EINVAL); EXPECT_EQ(ea.attr_offs, sizeof(struct nlmsghdr) + sizeof(struct ndmsg)); } + /* Make sure last message was a full DONE+extack */ + EXPECT_EQ(ret, FOUND_EXTACK); } static const struct { diff --git a/tools/testing/selftests/net/openvswitch/openvswitch.sh b/tools/testing/selftests/net/openvswitch/openvswitch.sh index 3c8d3455d8e7..b327d3061ed5 100755 --- a/tools/testing/selftests/net/openvswitch/openvswitch.sh +++ b/tools/testing/selftests/net/openvswitch/openvswitch.sh @@ -25,6 +25,7 @@ tests=" nat_related_v4 ip4-nat-related: ICMP related matches work with SNAT netlink_checks ovsnl: validate netlink attrs and settings upcall_interfaces ovs: test the upcall interfaces + tunnel_metadata ovs: test extraction of tunnel metadata drop_reason drop: test drop reasons are emitted psample psample: Sampling packets with psample" @@ -113,13 +114,13 @@ ovs_add_dp () { } ovs_add_if () { - info "Adding IF to DP: br:$2 if:$3" - if [ "$4" != "-u" ]; then - ovs_sbx "$1" python3 $ovs_base/ovs-dpctl.py add-if "$2" "$3" \ - || return 1 + info "Adding IF to DP: br:$3 if:$4 ($2)" + if [ "$5" != "-u" ]; then + ovs_sbx "$1" python3 $ovs_base/ovs-dpctl.py add-if \ + -t "$2" "$3" "$4" || return 1 else python3 $ovs_base/ovs-dpctl.py add-if \ - -u "$2" "$3" >$ovs_dir/$3.out 2>$ovs_dir/$3.err & + -u -t "$2" "$3" "$4" >$ovs_dir/$4.out 2>$ovs_dir/$4.err & pid=$! on_exit "ovs_sbx $1 kill -TERM $pid 2>/dev/null" fi @@ -166,9 +167,9 @@ ovs_add_netns_and_veths () { fi if [ "$7" != "-u" ]; then - ovs_add_if "$1" "$2" "$4" || return 1 + ovs_add_if "$1" "netdev" "$2" "$4" || return 1 else - ovs_add_if "$1" "$2" "$4" -u || return 1 + ovs_add_if "$1" "netdev" "$2" "$4" -u || return 1 fi if [ $TRACING -eq 1 ]; then @@ -756,6 +757,79 @@ test_upcall_interfaces() { return 0 } +ovs_add_kernel_tunnel() { + local sbxname=$1; shift + local ns=$1; shift + local tnl_type=$1; shift + local name=$1; shift + local addr=$1; shift + + info "setting up kernel ${tnl_type} tunnel ${name}" + ovs_sbx "${sbxname}" ip -netns ${ns} link add dev ${name} type ${tnl_type} $* || return 1 + on_exit "ovs_sbx ${sbxname} ip -netns ${ns} link del ${name} >/dev/null 2>&1" + ovs_sbx "${sbxname}" ip -netns ${ns} addr add dev ${name} ${addr} || return 1 + ovs_sbx "${sbxname}" ip -netns ${ns} link set dev ${name} mtu 1450 up || return 1 +} + +test_tunnel_metadata() { + which arping >/dev/null 2>&1 || return $ksft_skip + + sbxname="test_tunnel_metadata" + sbx_add "${sbxname}" || return 1 + + info "setting up new DP" + ovs_add_dp "${sbxname}" tdp0 -V 2:1 || return 1 + + ovs_add_netns_and_veths "${sbxname}" tdp0 tns left0 l0 \ + 172.31.110.1/24 || return 1 + + info "removing veth interface from openvswitch and setting IP" + ovs_del_if "${sbxname}" tdp0 left0 || return 1 + ovs_sbx "${sbxname}" ip addr add 172.31.110.2/24 dev left0 || return 1 + ovs_sbx "${sbxname}" ip link set left0 up || return 1 + + info "setting up tunnel port in openvswitch" + ovs_add_if "${sbxname}" "vxlan" tdp0 ovs-vxlan0 -u || return 1 + on_exit "ovs_sbx ${sbxname} ip link del ovs-vxlan0" + ovs_wait ip link show ovs-vxlan0 &>/dev/null || return 1 + ovs_sbx "${sbxname}" ip link set ovs-vxlan0 up || return 1 + + configs=$(echo ' + 1 172.31.221.1/24 1155332 32 set udpcsum flags\(df\|csum\) + 2 172.31.222.1/24 1234567 45 set noudpcsum flags\(df\) + 3 172.31.223.1/24 1020304 23 unset udpcsum flags\(csum\) + 4 172.31.224.1/24 1357986 15 unset noudpcsum' | sed '/^$/d') + + while read -r i addr id ttl df csum flags; do + ovs_add_kernel_tunnel "${sbxname}" tns vxlan vxlan${i} ${addr} \ + remote 172.31.110.2 id ${id} dstport 4789 \ + ttl ${ttl} df ${df} ${csum} || return 1 + done <<< "${configs}" + + ovs_wait grep -q 'listening on upcall packet handler' \ + ${ovs_dir}/ovs-vxlan0.out || return 1 + + info "sending arping" + for i in 1 2 3 4; do + ovs_sbx "${sbxname}" ip netns exec tns \ + arping -I vxlan${i} 172.31.22${i}.2 -c 1 \ + >${ovs_dir}/arping.stdout 2>${ovs_dir}/arping.stderr + done + + info "checking that received decapsulated packets carry correct metadata" + while read -r i addr id ttl df csum flags; do + arp_hdr="arp\\(sip=172.31.22${i}.1,tip=172.31.22${i}.2,op=1,sha=" + addrs="src=172.31.110.1,dst=172.31.110.2" + ports="tp_src=[0-9]*,tp_dst=4789" + tnl_md="tunnel\\(tun_id=${id},${addrs},ttl=${ttl},${ports},${flags}\\)" + + ovs_sbx "${sbxname}" grep -qE "MISS upcall.*${tnl_md}.*${arp_hdr}" \ + ${ovs_dir}/ovs-vxlan0.out || return 1 + done <<< "${configs}" + + return 0 +} + run_test() { ( tname="$1" diff --git a/tools/testing/selftests/net/openvswitch/ovs-dpctl.py b/tools/testing/selftests/net/openvswitch/ovs-dpctl.py index 8a0396bfaf99..b521e0dea506 100644 --- a/tools/testing/selftests/net/openvswitch/ovs-dpctl.py +++ b/tools/testing/selftests/net/openvswitch/ovs-dpctl.py @@ -1877,7 +1877,7 @@ class OvsPacket(GenericNetlinkSocket): elif msg["cmd"] == OvsPacket.OVS_PACKET_CMD_EXECUTE: up.execute(msg) else: - print("Unkonwn cmd: %d" % msg["cmd"]) + print("Unknown cmd: %d" % msg["cmd"]) except NetlinkError as ne: raise ne diff --git a/tools/testing/selftests/net/ovpn/ovpn-cli.c b/tools/testing/selftests/net/ovpn/ovpn-cli.c index 9201f2905f2c..688a5fa6fdac 100644 --- a/tools/testing/selftests/net/ovpn/ovpn-cli.c +++ b/tools/testing/selftests/net/ovpn/ovpn-cli.c @@ -32,9 +32,10 @@ #include <sys/socket.h> +#include "../../kselftest.h" + /* defines to make checkpatch happy */ #define strscpy strncpy -#define __always_unused __attribute__((__unused__)) /* libnl < 3.5.0 does not set the NLA_F_NESTED on its own, therefore we * have to explicitly do it to prevent the kernel from failing upon diff --git a/tools/testing/selftests/net/packetdrill/defaults.sh b/tools/testing/selftests/net/packetdrill/defaults.sh index 1095a7b22f44..37edd3dc3b07 100755 --- a/tools/testing/selftests/net/packetdrill/defaults.sh +++ b/tools/testing/selftests/net/packetdrill/defaults.sh @@ -51,7 +51,8 @@ sysctl -q net.ipv4.tcp_pacing_ss_ratio=200 sysctl -q net.ipv4.tcp_pacing_ca_ratio=120 sysctl -q net.ipv4.tcp_notsent_lowat=4294967295 > /dev/null 2>&1 -sysctl -q net.ipv4.tcp_fastopen=0x70403 +sysctl -q net.ipv4.tcp_fastopen=0x3 +# Use TFO_COOKIE in ksft_runner.sh for this key. sysctl -q net.ipv4.tcp_fastopen_key=a1a1a1a1-b2b2b2b2-c3c3c3c3-d4d4d4d4 sysctl -q net.ipv4.tcp_syncookies=1 diff --git a/tools/testing/selftests/net/packetdrill/ksft_runner.sh b/tools/testing/selftests/net/packetdrill/ksft_runner.sh index a7e790af38ff..b34e5cf0112e 100755 --- a/tools/testing/selftests/net/packetdrill/ksft_runner.sh +++ b/tools/testing/selftests/net/packetdrill/ksft_runner.sh @@ -3,21 +3,26 @@ source "$(dirname $(realpath $0))/../../kselftest/ktap_helpers.sh" -readonly ipv4_args=('--ip_version=ipv4 ' - '--local_ip=192.168.0.1 ' - '--gateway_ip=192.168.0.1 ' - '--netmask_ip=255.255.0.0 ' - '--remote_ip=192.0.2.1 ' - '-D CMSG_LEVEL_IP=SOL_IP ' - '-D CMSG_TYPE_RECVERR=IP_RECVERR ') - -readonly ipv6_args=('--ip_version=ipv6 ' - '--mtu=1520 ' - '--local_ip=fd3d:0a0b:17d6::1 ' - '--gateway_ip=fd3d:0a0b:17d6:8888::1 ' - '--remote_ip=fd3d:fa7b:d17d::1 ' - '-D CMSG_LEVEL_IP=SOL_IPV6 ' - '-D CMSG_TYPE_RECVERR=IPV6_RECVERR ') +declare -A ip_args=( + [ipv4]="--ip_version=ipv4 + --local_ip=192.168.0.1 + --gateway_ip=192.168.0.1 + --netmask_ip=255.255.0.0 + --remote_ip=192.0.2.1 + -D TFO_COOKIE=3021b9d889017eeb + -D TFO_COOKIE_ZERO=b7c12350a90dc8f5 + -D CMSG_LEVEL_IP=SOL_IP + -D CMSG_TYPE_RECVERR=IP_RECVERR" + [ipv6]="--ip_version=ipv6 + --mtu=1520 + --local_ip=fd3d:0a0b:17d6::1 + --gateway_ip=fd3d:0a0b:17d6:8888::1 + --remote_ip=fd3d:fa7b:d17d::1 + -D TFO_COOKIE=c1d1e9742a47a9bc + -D TFO_COOKIE_ZERO=82af1a8f9a205c34 + -D CMSG_LEVEL_IP=SOL_IPV6 + -D CMSG_TYPE_RECVERR=IPV6_RECVERR" +) if [ $# -ne 1 ]; then ktap_exit_fail_msg "usage: $0 <script>" @@ -38,12 +43,20 @@ if [[ -n "${KSFT_MACHINE_SLOW}" ]]; then failfunc=ktap_test_xfail fi +ip_versions=$(grep -E '^--ip_version=' $script | cut -d '=' -f 2) +if [[ -z $ip_versions ]]; then + ip_versions="ipv4 ipv6" +elif [[ ! "$ip_versions" =~ ^ipv[46]$ ]]; then + ktap_exit_fail_msg "Too many or unsupported --ip_version: $ip_versions" + exit "$KSFT_FAIL" +fi + ktap_print_header -ktap_set_plan 2 +ktap_set_plan $(echo $ip_versions | wc -w) -unshare -n packetdrill ${ipv4_args[@]} ${optargs[@]} $script > /dev/null \ - && ktap_test_pass "ipv4" || $failfunc "ipv4" -unshare -n packetdrill ${ipv6_args[@]} ${optargs[@]} $script > /dev/null \ - && ktap_test_pass "ipv6" || $failfunc "ipv6" +for ip_version in $ip_versions; do + unshare -n packetdrill ${ip_args[$ip_version]} ${optargs[@]} $script > /dev/null \ + && ktap_test_pass $ip_version || $failfunc $ip_version +done ktap_finished diff --git a/tools/testing/selftests/net/packetdrill/tcp_close_no_rst.pkt b/tools/testing/selftests/net/packetdrill/tcp_close_no_rst.pkt new file mode 100644 index 000000000000..eef01d5f1118 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_close_no_rst.pkt @@ -0,0 +1,32 @@ +// SPDX-License-Identifier: GPL-2.0 + +--mss=1000 + +`./defaults.sh` + +// Initialize connection + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK> + +.1 < . 1:1(0) ack 1 win 32792 + + + +0 accept(3, ..., ...) = 4 + +0 < . 1:1001(1000) ack 1 win 32792 + +0 > . 1:1(0) ack 1001 + +0 read(4, ..., 1000) = 1000 + +// resend the payload + a FIN + +0 < F. 1:1001(1000) ack 1 win 32792 +// Why do we have a delay and no dsack ? + +0~+.04 > . 1:1(0) ack 1002 + + +0 close(4) = 0 + +// According to RFC 2525, section 2.17 +// we should _not_ send an RST here, because there was no data to consume. + +0 > F. 1:1(0) ack 1002 diff --git a/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_basic-cookie-not-reqd.pkt b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_basic-cookie-not-reqd.pkt new file mode 100644 index 000000000000..32aff9bc4052 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_basic-cookie-not-reqd.pkt @@ -0,0 +1,32 @@ +// SPDX-License-Identifier: GPL-2.0 +// +// Basic TFO server test +// +// Test TFO_SERVER_COOKIE_NOT_REQD flag on receiving +// SYN with data but without Fast Open cookie option. + +`./defaults.sh + ./set_sysctls.py /proc/sys/net/ipv4/tcp_fastopen=0x202` + + 0 socket(..., SOCK_STREAM|SOCK_NONBLOCK, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN, [1], 4) = 0 + +// Since TFO_SERVER_COOKIE_NOT_REQD, a TFO socket will be created with +// the data accepted. + +0 < S 0:1000(1000) win 32792 <mss 1460,sackOK,nop,nop> + +0 > S. 0:0(0) ack 1001 <mss 1460,nop,nop,sackOK> + +0 accept(3, ..., ...) = 4 + +0 %{ assert (tcpi_options & TCPI_OPT_SYN_DATA) != 0, tcpi_options }% + +0 read(4, ..., 1024) = 1000 + +// Data After SYN will be accepted too. + +0 < . 1001:2001(1000) ack 1 win 5840 + +0 > . 1:1(0) ack 2001 + +// Should change the implementation later to set the SYN flag as well. + +0 read(4, ..., 1024) = 1000 + +0 write(4, ..., 1000) = 1000 + +0 > P. 1:1001(1000) ack 2001 diff --git a/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_basic-no-setsockopt.pkt b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_basic-no-setsockopt.pkt new file mode 100644 index 000000000000..649997a58099 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_basic-no-setsockopt.pkt @@ -0,0 +1,21 @@ +// SPDX-License-Identifier: GPL-2.0 +// +// Basic TFO server test +// +// Test TFO_SERVER_WO_SOCKOPT1 without setsockopt(TCP_FASTOPEN) + +`./defaults.sh + ./set_sysctls.py /proc/sys/net/ipv4/tcp_fastopen=0x402` + + 0 socket(..., SOCK_STREAM|SOCK_NONBLOCK, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:10(10) win 32792 <mss 1460,sackOK,nop,nop,FO TFO_COOKIE,nop,nop> + +0 > S. 0:0(0) ack 11 <mss 1460,nop,nop,sackOK> + + +0 accept(3, ..., ...) = 4 + +0 %{ assert (tcpi_options & TCPI_OPT_SYN_DATA) != 0, tcpi_options }% + + +0 read(4, ..., 512) = 10 diff --git a/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_basic-non-tfo-listener.pkt b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_basic-non-tfo-listener.pkt new file mode 100644 index 000000000000..4a00e0d994f2 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_basic-non-tfo-listener.pkt @@ -0,0 +1,26 @@ +// SPDX-License-Identifier: GPL-2.0 +// +// Basic TFO server test +// +// Server w/o TCP_FASTOPEN socket option + +`./defaults.sh` + + 0 socket(..., SOCK_STREAM|SOCK_NONBLOCK, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:10(10) win 32792 <mss 1460,sackOK,FO TFO_COOKIE> + +// Data is ignored since TCP_FASTOPEN is not set on the listener + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK> + + +0 accept(3, ..., ...) = -1 EAGAIN (Resource temporarily unavailable) + +// The above should block until ack comes in below. + +0 < . 1:31(30) ack 1 win 5840 + +0 accept(3, ..., ...) = 4 + + +0 %{ assert (tcpi_options & TCPI_OPT_SYN_DATA) == 0, tcpi_options }% + +0 read(4, ..., 512) = 30 diff --git a/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_basic-pure-syn-data.pkt b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_basic-pure-syn-data.pkt new file mode 100644 index 000000000000..345ed26ff7f8 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_basic-pure-syn-data.pkt @@ -0,0 +1,50 @@ +// SPDX-License-Identifier: GPL-2.0 +// +// Basic TFO server test +// +// Test that TFO-enabled server would not respond SYN-ACK with any TFO option +// when receiving a pure SYN-data. It should respond a pure SYN-ack. + +`./defaults.sh` + + 0 socket(..., SOCK_STREAM|SOCK_NONBLOCK, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN, [1], 4) = 0 + + +0 < S 999000:999040(40) win 32792 <mss 1460,sackOK,TS val 100 ecr 100,nop,wscale 6> + +0 > S. 1234:1234(0) ack 999001 <mss 1460,sackOK,TS val 100 ecr 100,nop,wscale 8> + +0 < . 1:1(0) ack 1 win 100 + +0 accept(3, ..., ...) = 4 + +0 %{ assert (tcpi_options & TCPI_OPT_SYN_DATA) == 0, tcpi_options }% + +0 close(3) = 0 + +// Test ECN-setup SYN with ECN disabled because this has happened in reality + +0 socket(..., SOCK_STREAM|SOCK_NONBLOCK, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN, [1], 4) = 0 + + +0 < SEW 999000:999040(40) win 32792 <mss 1460,sackOK,TS val 100 ecr 100,nop,wscale 6> + +0 > S. 1234:1234(0) ack 999001 <mss 1460,sackOK,TS val 100 ecr 100,nop,wscale 8> + +0 < . 1:1(0) ack 1 win 100 + +0 accept(3, ..., ...) = 4 + +0 %{ assert (tcpi_options & TCPI_OPT_SYN_DATA) == 0, tcpi_options }% + +0 close(3) = 0 + +// Test ECN-setup SYN w/ ECN enabled + +0 `sysctl -q net.ipv4.tcp_ecn=2` + +0 socket(..., SOCK_STREAM|SOCK_NONBLOCK, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN, [1], 4) = 0 + + +0 < SEW 999000:999040(40) win 32792 <mss 1460,sackOK,TS val 100 ecr 100,nop,wscale 6> + +0 > SE. 1234:1234(0) ack 999001 <mss 1460,sackOK,TS val 100 ecr 100,nop,wscale 8> + +0 < . 1:1(0) ack 1 win 100 + +0 accept(3, ..., ...) = 4 + +0 %{ assert (tcpi_options & TCPI_OPT_SYN_DATA) == 0, tcpi_options }% + +0 close(3) = 0 diff --git a/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_basic-rw.pkt b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_basic-rw.pkt new file mode 100644 index 000000000000..98e6f84497cd --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_basic-rw.pkt @@ -0,0 +1,23 @@ +// SPDX-License-Identifier: GPL-2.0 +// +// Basic TFO server test +// +// Test TFO server with SYN that has TFO cookie and data. + +`./defaults.sh` + + 0 socket(..., SOCK_STREAM|SOCK_NONBLOCK, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN, [1], 4) = 0 + + +0 < S 0:10(10) win 32792 <mss 1460,sackOK,nop,nop,FO TFO_COOKIE,nop,nop> + +0 > S. 0:0(0) ack 11 <mss 1460,nop,nop,sackOK> + + +0 accept(3, ..., ...) = 4 + +0 %{ assert (tcpi_options & TCPI_OPT_SYN_DATA) != 0, tcpi_options }% + + +0 read(4, ..., 512) = 10 + +0 write(4, ..., 100) = 100 + +0 > P. 1:101(100) ack 11 diff --git a/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_basic-zero-payload.pkt b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_basic-zero-payload.pkt new file mode 100644 index 000000000000..95b1047ffdd5 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_basic-zero-payload.pkt @@ -0,0 +1,26 @@ +// SPDX-License-Identifier: GPL-2.0 +// +// Basic TFO server test +// +// Test zero-payload packet w/ valid TFO cookie - a TFO socket will +// still be created and accepted but read() will not return until a +// later pkt with 10 byte. + +`./defaults.sh` + + 0 socket(..., SOCK_STREAM|SOCK_NONBLOCK, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN, [1], 4) = 0 + + +0 < S 0:0(0) win 32792 <mss 1460,sackOK,nop,nop,FO TFO_COOKIE,nop,nop> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK> + +0 accept(3, ..., ...) = 4 + +0 %{ assert (tcpi_options & TCPI_OPT_SYN_DATA) == 0, tcpi_options }% + +// A TFO socket is created and is writable. + +0 write(4, ..., 100) = 100 + +0 > P. 1:101(100) ack 1 + +0...0.300 read(4, ..., 512) = 10 + +.3 < P. 1:11(10) ack 1 win 5840 diff --git a/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_client-ack-dropped-then-recovery-ms-timestamps.pkt b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_client-ack-dropped-then-recovery-ms-timestamps.pkt new file mode 100644 index 000000000000..f75efd51ed0c --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_client-ack-dropped-then-recovery-ms-timestamps.pkt @@ -0,0 +1,46 @@ +// SPDX-License-Identifier: GPL-2.0 +// +// A reproducer case for a TFO SYNACK RTO undo bug in: +// 794200d66273 ("tcp: undo cwnd on Fast Open spurious SYNACK retransmit") +// This sequence that tickles this bug is: +// - Fast Open server receives TFO SYN with data, sends SYNACK +// - (client receives SYNACK and sends ACK, but ACK is lost) +// - server app sends some data packets +// - (N of the first data packets are lost) +// - server receives client ACK that has a TS ECR matching first SYNACK, +// and also SACKs suggesting the first N data packets were lost +// - server performs undo of SYNACK RTO, then immediately enters recovery +// - buggy behavior in 794200d66273 then performed an undo that caused +// the connection to be in a bad state, in CA_Open with retrans_out != 0 + +// Check that outbound TS Val ticks are as we would expect with 1000 usec per +// timestamp tick: +--tcp_ts_tick_usecs=1000 + +`./defaults.sh` + +// Initialize connection + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:1000(1000) win 65535 <mss 1012,sackOK,TS val 1000 ecr 0,wscale 7,nop,nop,nop,FO TFO_COOKIE> + +0 > S. 0:0(0) ack 1001 <mss 1460,sackOK,TS val 2000 ecr 1000,nop,wscale 8> + +0 accept(3, ..., ...) = 4 + +// Application writes more data + +.010 write(4, ..., 10000) = 10000 + +0 > P. 1:5001(5000) ack 1001 <nop,nop,TS val 2010 ecr 1000> + +0 > P. 5001:10001(5000) ack 1001 <nop,nop,TS val 2010 ecr 1000> + +0 %{ assert tcpi_snd_cwnd == 10, tcpi_snd_cwnd }% + + +0 < . 1001:1001(0) ack 1 win 257 <TS val 1010 ecr 2000,sack 2001:5001> + +0 > P. 1:2001(2000) ack 1001 <nop,nop,TS val 2010 ecr 1010> + +0 %{ assert tcpi_ca_state == TCP_CA_Recovery, tcpi_ca_state }% + +0 %{ assert tcpi_snd_cwnd == 7, tcpi_snd_cwnd }% + + +0 < . 1001:1001(0) ack 1 win 257 <TS val 1011 ecr 2000,sack 2001:6001> + +0 %{ assert tcpi_ca_state == TCP_CA_Recovery, tcpi_ca_state }% + +0 %{ assert tcpi_snd_cwnd == 7, tcpi_snd_cwnd }% diff --git a/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_experimental_option.pkt b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_experimental_option.pkt new file mode 100644 index 000000000000..c3cb0e8bdcf8 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_experimental_option.pkt @@ -0,0 +1,37 @@ +// SPDX-License-Identifier: GPL-2.0 +// +// Test the Experimental Option +// +// SYN w/ FOEXP w/o cookie must generates SYN+ACK w/ FOEXP +// w/ a valid cookie, and the cookie must be the same one +// with one generated by IANA FO + +`./defaults.sh` + +// Request a TFO cookie by Experimental Option +// This must generate the same TFO_COOKIE + 0 socket(..., SOCK_STREAM|SOCK_NONBLOCK, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN, [1], 4) = 0 + + +0 < S 0:10(10) win 32792 <mss 1460,sackOK,nop,nop,FOEXP> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,FOEXP TFO_COOKIE> + + +0 close(3) = 0 + +// Test if FOEXP with a valid cookie creates a TFO socket + 0 socket(..., SOCK_STREAM|SOCK_NONBLOCK, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN, [1], 4) = 0 + + +0 < S 0:10(10) win 32792 <mss 1460,sackOK,nop,nop,FOEXP TFO_COOKIE> + +0 > S. 0:0(0) ack 11 <mss 1460,nop,nop,sackOK> + + +0 accept(3, ..., ...) = 4 + +0 %{ assert (tcpi_options & TCPI_OPT_SYN_DATA) != 0, tcpi_options }% + + +0 read(4, ..., 512) = 10 diff --git a/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_fin-close-socket.pkt b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_fin-close-socket.pkt new file mode 100644 index 000000000000..dc09f8d9a381 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_fin-close-socket.pkt @@ -0,0 +1,30 @@ +// SPDX-License-Identifier: GPL-2.0 +// +// Send a FIN pkt with the ACK bit to a TFO socket. +// The socket will go to TCP_CLOSE_WAIT state and data can be +// read until the socket is closed, at which time a FIN will be sent. + +`./defaults.sh` + + 0 socket(..., SOCK_STREAM|SOCK_NONBLOCK, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN, [1], 4) = 0 + + +0 < S 0:10(10) win 32792 <mss 1460,sackOK,nop,nop,FO TFO_COOKIE,nop,nop> + +0 > S. 0:0(0) ack 11 <mss 1460,nop,nop,sackOK> + +// FIN is acked and the socket goes to TCP_CLOSE_WAIT state +// in tcp_fin() called from tcp_data_queue(). + +0 < F. 11:11(0) ack 1 win 32792 + +0 > . 1:1(0) ack 12 + + +0 accept(3, ..., ...) = 4 + +0 %{ assert (tcpi_options & TCPI_OPT_SYN_DATA) != 0, tcpi_options }% + +0 %{ assert tcpi_state == TCP_CLOSE_WAIT, tcpi_state }% + + +0 read(4, ..., 512) = 10 + +0 close(4) = 0 + +0 > F. 1:1(0) ack 12 + * > F. 1:1(0) ack 12 diff --git a/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_icmp-before-accept.pkt b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_icmp-before-accept.pkt new file mode 100644 index 000000000000..d5543672e2bd --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_icmp-before-accept.pkt @@ -0,0 +1,49 @@ +// SPDX-License-Identifier: GPL-2.0 +// +// Send an ICMP host_unreachable pkt to a pending SYN_RECV req. +// +// If it's a TFO req, the ICMP error will cause it to switch +// to TCP_CLOSE state but remains in the acceptor queue. + +--ip_version=ipv4 + +`./defaults.sh` + + 0 socket(..., SOCK_STREAM|SOCK_NONBLOCK, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN, [1], 4) = 0 + + +0 < S 0:10(10) win 32792 <mss 1460,sackOK,nop,nop,FO TFO_COOKIE,nop,nop> + +0 > S. 0:0(0) ack 11 <mss 1460,nop,nop,sackOK> + +// Out-of-window icmp is ignored but accounted. + +0 `nstat > /dev/null` + +0 < icmp unreachable [5000:6000(1000)] + +0 `nstat | grep TcpExtOutOfWindowIcmps > /dev/null` + +// Valid ICMP unreach. + +0 < icmp unreachable host_unreachable [0:10(10)] + +// Unlike the non-TFO case, the req is still there to be accepted. + +0 accept(3, ..., ...) = 4 + +0 %{ assert (tcpi_options & TCPI_OPT_SYN_DATA) != 0, tcpi_options }% + +// tcp_done_with_error() in tcp_v4_err() sets sk->sk_state +// to TCP_CLOSE + +0 %{ assert tcpi_state == TCP_CLOSE, tcpi_state }% + +// The 1st read will succeed and return the data in SYN + +0 read(4, ..., 512) = 10 + +// The 2nd read will fail. + +0 read(4, ..., 512) = -1 EHOSTUNREACH (No route to host) + +// But is no longer writable because it's in TCP_CLOSE state. + +0 write(4, ..., 100) = -1 EPIPE (Broken Pipe) + +// inbound pkt will trigger RST because the socket has been moved +// off the TCP hash tables. + +0 < . 1:1(0) ack 1 win 32792 + +0 > R 1:1(0) diff --git a/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_reset-after-accept.pkt b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_reset-after-accept.pkt new file mode 100644 index 000000000000..040d5547ed80 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_reset-after-accept.pkt @@ -0,0 +1,37 @@ +// SPDX-License-Identifier: GPL-2.0 +// +// Send a RST to a TFO socket after it has been accepted. +// +// First read() will return all the data and this is consistent +// with the non-TFO case. Second read will return -1 + +`./defaults.sh` + + 0 socket(..., SOCK_STREAM|SOCK_NONBLOCK, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN, [1], 4) = 0 + + +0 < S 0:10(10) win 32792 <mss 1460,sackOK,nop,nop,FO TFO_COOKIE,nop,nop> + +0 > S. 0:0(0) ack 11 <mss 1460,nop,nop,sackOK> + + +0 accept(3, ..., ...) = 4 + +0 %{ assert (tcpi_options & TCPI_OPT_SYN_DATA) != 0, tcpi_options }% + +0 %{ assert tcpi_state == TCP_SYN_RECV, tcpi_state }% + +// 1st read will return the data from SYN. +// tcp_reset() sets sk->sk_err to ECONNRESET for SYN_RECV. + +0 < R. 11:11(0) win 32792 + +0 %{ assert tcpi_state == TCP_CLOSE, tcpi_state }% + +// This one w/o ACK bit will cause the same effect. +// +0 < R 11:11(0) win 32792 +// See Step 2 in tcp_validate_incoming(). + +// found_ok_skb in tcp_recvmsg_locked() + +0 read(4, ..., 512) = 10 + +// !copied && sk->sk_err -> sock_error(sk) + +0 read(4, ..., 512) = -1 ECONNRESET (Connection reset by peer) + +0 close(4) = 0 diff --git a/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_reset-before-accept.pkt b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_reset-before-accept.pkt new file mode 100644 index 000000000000..7f9de6c66cbd --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_reset-before-accept.pkt @@ -0,0 +1,32 @@ +// SPDX-License-Identifier: GPL-2.0 +// +// Send a RST to a TFO socket before it is accepted. +// +// The socket won't go away and after it's accepted the data +// in the SYN pkt can still be read. But that's about all that +// the acceptor can do with the socket. + +`./defaults.sh` + + 0 socket(..., SOCK_STREAM|SOCK_NONBLOCK, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN, [1], 4) = 0 + + +0 < S 0:10(10) win 32792 <mss 1460,sackOK,nop,nop,nop,wscale 7,FO TFO_COOKIE,nop,nop> + +0 > S. 0:0(0) ack 11 <mss 1460,nop,nop,sackOK,nop,wscale 8> + +// 1st read will return the data from SYN. + +0 < R. 11:11(0) win 257 + +// This one w/o ACK bit will cause the same effect. +// +0 < R 11:11(0) win 257 + + +0 accept(3, ..., ...) = 4 + +0 %{ assert (tcpi_options & TCPI_OPT_SYN_DATA) != 0, tcpi_options }% + +0 %{ assert tcpi_state == TCP_CLOSE, tcpi_state }% + + +0 read(4, ..., 512) = 10 + +0 read(4, ..., 512) = -1 ECONNRESET (Connection reset by peer) + +0 close(4) = 0 diff --git a/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_reset-close-with-unread-data.pkt b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_reset-close-with-unread-data.pkt new file mode 100644 index 000000000000..548a87701b5d --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_reset-close-with-unread-data.pkt @@ -0,0 +1,32 @@ +// SPDX-License-Identifier: GPL-2.0 +// +// Send a RST to a TFO socket after it is accepted. +// +// The socket will change to TCP_CLOSE state with pending data so +// write() will fail. Pending data can be still be read and close() +// won't trigger RST if data is not read +// +// 565b7b2d2e63 ("tcp: do not send reset to already closed sockets") +// https://lore.kernel.org/netdev/4C1A2502.1030502@openvz.org/ + +`./defaults.sh` + + 0 socket(..., SOCK_STREAM|SOCK_NONBLOCK, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN, [1], 4) = 0 + + +0 < S 0:10(10) win 32792 <mss 1460,sackOK,nop,nop, FO TFO_COOKIE,nop,nop> + +0 > S. 0:0(0) ack 11 <mss 1460,nop,nop,sackOK> + + +0 accept(3, ..., ...) = 4 + +0 %{ assert (tcpi_options & TCPI_OPT_SYN_DATA) != 0, tcpi_options }% + +0 %{ assert tcpi_state == TCP_SYN_RECV, tcpi_state }% + +// tcp_done() sets sk->sk_state to TCP_CLOSE and clears tp->fastopen_rsk + +0 < R. 11:11(0) win 32792 + +0 %{ assert tcpi_state == TCP_CLOSE, tcpi_state }% + + +0 write(4, ..., 100) = -1 ECONNRESET(Connection reset by peer) + +0 close(4) = 0 diff --git a/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_reset-non-tfo-socket.pkt b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_reset-non-tfo-socket.pkt new file mode 100644 index 000000000000..20090bf77655 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_reset-non-tfo-socket.pkt @@ -0,0 +1,37 @@ +// SPDX-License-Identifier: GPL-2.0 +// +// Send a RST to a fully established socket with pending data before +// it is accepted. +// +// The socket with pending data won't go away and can still be accepted +// with data read. But it will be in TCP_CLOSE state. + +`./defaults.sh` + + 0 socket(..., SOCK_STREAM|SOCK_NONBLOCK, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN, [1], 4) = 0 + +// Invalid cookie, so accept() fails. + +0 < S 0:10(10) win 32792 <mss 1460,sackOK,nop,nop,FO aaaaaaaaaaaaaaaa,nop,nop> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK, FO TFO_COOKIE,nop,nop> + + +0 accept(3, ..., ...) = -1 EAGAIN (Resource temporarily unavailable) + +// Complete 3WHS and send data and RST + +0 < . 1:1(0) ack 1 win 32792 + +0 < . 1:11(10) ack 1 win 32792 + +0 < R. 11:11(0) win 32792 + +// A valid reset won't make the fully-established socket go away. +// It's just that the acceptor will get a dead, unusable socket +// in TCP_CLOSE state. + +0 accept(3, ..., ...) = 4 + +0 %{ assert (tcpi_options & TCPI_OPT_SYN_DATA) == 0, tcpi_options }% + +0 %{ assert tcpi_state == TCP_CLOSE, tcpi_state }% + + +0 write(4, ..., 100) = -1 ECONNRESET(Connection reset by peer) + +0 read(4, ..., 512) = 10 + +0 read(4, ..., 512) = 0 diff --git a/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_sockopt-fastopen-key.pkt b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_sockopt-fastopen-key.pkt new file mode 100644 index 000000000000..9f52d7de3436 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_sockopt-fastopen-key.pkt @@ -0,0 +1,74 @@ +// SPDX-License-Identifier: GPL-2.0 +// +// Test the server cookie is generated by aes64 encoding of remote and local +// IP addresses with a master key specified via sockopt TCP_FASTOPEN_KEY +// +`./defaults.sh + ./set_sysctls.py /proc/sys/net/ipv4/tcp_fastopen_key=00000000-00000000-00000000-00000000` + + 0 socket(..., SOCK_STREAM|SOCK_NONBLOCK, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +// Set a key of a1a1a1a1-b2b2b2b2-c3c3c3c3-d4d4d4d4 (big endian). +// This would produce a cookie of TFO_COOKIE like many other +// tests (which the same key but set via sysctl). + +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN_KEY, + "\xa1\xa1\xa1\xa1\xb2\xb2\xb2\xb2\xc3\xc3\xc3\xc3\xd4\xd4\xd4\xd4", 16) = 0 + + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN, [1], 4) = 0 + +// Request a valid cookie TFO_COOKIE + +0 < S 1428932:1428942(10) win 10000 <mss 1012,nop,nop,FO,sackOK,TS val 1 ecr 0,nop,wscale 7> + +0 > S. 0:0(0) ack 1428933 <mss 1460,sackOK,TS val 10000 ecr 1,nop,wscale 8,FO TFO_COOKIE,nop,nop> + +0 < . 1:1(0) ack 1 win 257 <nop,nop,TS val 2 ecr 10000> + +0 accept(3, ..., ...) = 4 + +0 %{ assert (tcpi_options & TCPI_OPT_SYN_DATA) == 0, tcpi_options }% + + +0 close(4) = 0 + +0 > F. 1:1(0) ack 1 <nop,nop,TS val 10001 ecr 2> + +0 < F. 1:1(0) ack 2 win 257 <nop,nop,TS val 3 ecr 10001> + +0 > . 2:2(0) ack 2 <nop,nop,TS val 10002 ecr 3> + + +0 close(3) = 0 + +// Restart the listener + +0 socket(..., SOCK_STREAM|SOCK_NONBLOCK, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN, [1], 4) = 0 + +// Test setting the key in the listen state, and produces an identical cookie + +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN_KEY, + "\xa1\xa1\xa1\xa1\xb2\xb2\xb2\xb2\xc3\xc3\xc3\xc3\xd4\xd4\xd4\xd4", 16) = 0 + + +0 < S 6814000:6815000(1000) win 10000 <mss 1012,nop,nop,FO TFO_COOKIE,sackOK,TS val 10 ecr 0,nop,wscale 7> + +0 > S. 0:0(0) ack 6815001 <mss 1460,sackOK,TS val 10000 ecr 10,nop,wscale 8> + +0 accept(3, ..., ...) = 4 + +0 %{ assert (tcpi_options & TCPI_OPT_SYN_DATA) != 0, tcpi_options }% + +0 < . 1001:1001(0) ack 1 win 257 <nop,nop,TS val 12 ecr 10000> + +0 read(4, ..., 8192) = 1000 + + +0 close(4) = 0 + +0 > F. 1:1(0) ack 1001 <nop,nop,TS val 10101 ecr 12> + +0 < F. 1001:1001(0) ack 2 win 257 <nop,nop,TS val 112 ecr 10101> + +0 > . 2:2(0) ack 1002 <nop,nop,TS val 10102 ecr 112> + + +0 close(3) = 0 + +// Restart the listener + +0 socket(..., SOCK_STREAM|SOCK_NONBLOCK, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN, [1], 4) = 0 + +// Test invalid key length (must be 16 bytes) + +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN_KEY, "", 0) = -1 (Invalid Argument) + +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN_KEY, "", 3) = -1 (Invalid Argument) + +// Previous cookie won't be accepted b/c this listener uses the global key (0-0-0-0) + +0 < S 6814000:6815000(1000) win 10000 <mss 1012,nop,nop,FO TFO_COOKIE,sackOK,TS val 10 ecr 0,nop,wscale 7> + +0 > S. 0:0(0) ack 6814001 <mss 1460,sackOK,TS val 10000 ecr 10,nop,wscale 8,FO TFO_COOKIE_ZERO,nop,nop> diff --git a/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_trigger-rst-listener-closed.pkt b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_trigger-rst-listener-closed.pkt new file mode 100644 index 000000000000..e82e06da44c9 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_trigger-rst-listener-closed.pkt @@ -0,0 +1,21 @@ +// SPDX-License-Identifier: GPL-2.0 +// +// Close a listener socket with pending TFO child. +// This will trigger RST pkt to go out. + +`./defaults.sh` + + 0 socket(..., SOCK_STREAM|SOCK_NONBLOCK, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN, [1], 4) = 0 + + +0 < S 0:10(10) win 32792 <mss 1460,sackOK,nop,nop,FO TFO_COOKIE,nop,nop> + +0 > S. 0:0(0) ack 11 <mss 1460,nop,nop,sackOK> + +// RST pkt is generated for each not-yet-accepted TFO child. +// inet_csk_listen_stop() -> inet_child_forget() -> tcp_disconnect() +// -> tcp_need_reset() is true for SYN_RECV + +0 close(3) = 0 + +0 > R. 1:1(0) ack 11 diff --git a/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_trigger-rst-reconnect.pkt b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_trigger-rst-reconnect.pkt new file mode 100644 index 000000000000..2a148bb14cbf --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_trigger-rst-reconnect.pkt @@ -0,0 +1,30 @@ +// SPDX-License-Identifier: GPL-2.0 +`./defaults.sh + ./set_sysctls.py /proc/sys/net/ipv4/tcp_timestamps=0` + + 0 socket(..., SOCK_STREAM|SOCK_NONBLOCK, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN, [1], 4) = 0 + + +0 < S 0:10(10) win 32792 <mss 1460,nop,nop,sackOK,nop,nop,FO TFO_COOKIE> + +0 > S. 0:0(0) ack 11 win 65535 <mss 1460,nop,nop,sackOK> + +// sk->sk_state is TCP_SYN_RECV + +0 accept(3, ..., ...) = 4 + +0 %{ assert tcpi_state == TCP_SYN_RECV, tcpi_state }% + +// tcp_disconnect() sets sk->sk_state to TCP_CLOSE + +0 connect(4, AF_UNSPEC, ...) = 0 + +0 > R. 1:1(0) ack 11 win 65535 + +0 %{ assert tcpi_state == TCP_CLOSE, tcpi_state }% + +// connect() sets sk->sk_state to TCP_SYN_SENT + +0 fcntl(4, F_SETFL, O_RDWR|O_NONBLOCK) = 0 + +0 connect(4, ..., ...) = -1 EINPROGRESS (Operation is now in progress) + +0 > S 0:0(0) win 65535 <mss 1460,nop,nop,sackOK,nop,wscale 8> + +0 %{ assert tcpi_state == TCP_SYN_SENT, tcpi_state }% + +// tp->fastopen_rsk must be NULL + +1 > S 0:0(0) win 65535 <mss 1460,nop,nop,sackOK,nop,wscale 8> diff --git a/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_trigger-rst-unread-data-closed.pkt b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_trigger-rst-unread-data-closed.pkt new file mode 100644 index 000000000000..09fb63f78a0e --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_trigger-rst-unread-data-closed.pkt @@ -0,0 +1,23 @@ +// SPDX-License-Identifier: GPL-2.0 +// +// Close a TFO socket with unread data. +// This will trigger a RST pkt. + +`./defaults.sh` + + 0 socket(..., SOCK_STREAM|SOCK_NONBLOCK, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN, [1], 4) = 0 + + +0 < S 0:10(10) win 32792 <mss 1460,sackOK,nop,nop,FO TFO_COOKIE,nop,nop> + +0 > S. 0:0(0) ack 11 <mss 1460,nop,nop,sackOK> + + +0 accept(3, ..., ...) = 4 + +0 %{ assert (tcpi_options & TCPI_OPT_SYN_DATA) != 0, tcpi_options }% + +0 %{ assert tcpi_state == TCP_SYN_RECV, tcpi_state }% + +// data_was_unread == true in __tcp_close() + +0 close(4) = 0 + +0 > R. 1:1(0) ack 11 diff --git a/tools/testing/selftests/net/pmtu.sh b/tools/testing/selftests/net/pmtu.sh index 88e914c4eef9..a3323c21f001 100755 --- a/tools/testing/selftests/net/pmtu.sh +++ b/tools/testing/selftests/net/pmtu.sh @@ -1089,10 +1089,11 @@ cleanup() { cleanup_all_ns - ip link del veth_A-C 2>/dev/null - ip link del veth_A-R1 2>/dev/null - cleanup_del_ovs_internal - cleanup_del_ovs_vswitchd + [ -e "/sys/class/net/veth_A-C" ] && ip link del veth_A-C + [ -e "/sys/class/net/veth_A-R1" ] && ip link del veth_A-R1 + [ -e "/sys/class/net/ovs_br0" ] && cleanup_del_ovs_internal + [ -e "/sys/class/net/ovs_br0" ] && cleanup_del_ovs_vswitchd + rm -f "$tmpoutfile" } diff --git a/tools/testing/selftests/net/psock_lib.h b/tools/testing/selftests/net/psock_lib.h index 6e4fef560873..067265b0a554 100644 --- a/tools/testing/selftests/net/psock_lib.h +++ b/tools/testing/selftests/net/psock_lib.h @@ -22,10 +22,6 @@ #define PORT_BASE 8000 -#ifndef __maybe_unused -# define __maybe_unused __attribute__ ((__unused__)) -#endif - static __maybe_unused void pair_udp_setfilter(int fd) { /* the filter below checks for all of the following conditions that diff --git a/tools/testing/selftests/net/psock_tpacket.c b/tools/testing/selftests/net/psock_tpacket.c index 221270cee3ea..2938045c5cf9 100644 --- a/tools/testing/selftests/net/psock_tpacket.c +++ b/tools/testing/selftests/net/psock_tpacket.c @@ -22,6 +22,7 @@ * - TPACKET_V3: RX_RING */ +#undef NDEBUG #include <stdio.h> #include <stdlib.h> #include <sys/types.h> @@ -33,7 +34,6 @@ #include <ctype.h> #include <fcntl.h> #include <unistd.h> -#include <bits/wordsize.h> #include <net/ethernet.h> #include <netinet/ip.h> #include <arpa/inet.h> @@ -785,7 +785,7 @@ static int test_kernel_bit_width(void) static int test_user_bit_width(void) { - return __WORDSIZE; + return sizeof(long) * 8; } static const char *tpacket_str[] = { diff --git a/tools/testing/selftests/net/route_hint.sh b/tools/testing/selftests/net/route_hint.sh new file mode 100755 index 000000000000..2db01ece0cc1 --- /dev/null +++ b/tools/testing/selftests/net/route_hint.sh @@ -0,0 +1,79 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# This test ensures directed broadcast routes use dst hint mechanism + +source lib.sh + +CLIENT_IP4="192.168.0.1" +SERVER_IP4="192.168.0.2" +BROADCAST_ADDRESS="192.168.0.255" + +setup() { + setup_ns CLIENT_NS SERVER_NS + + ip -net "${SERVER_NS}" link add link1 type veth peer name link0 netns "${CLIENT_NS}" + + ip -net "${CLIENT_NS}" link set link0 up + ip -net "${CLIENT_NS}" addr add "${CLIENT_IP4}/24" dev link0 + + ip -net "${SERVER_NS}" link set link1 up + ip -net "${SERVER_NS}" addr add "${SERVER_IP4}/24" dev link1 + + ip netns exec "${CLIENT_NS}" ethtool -K link0 tcp-segmentation-offload off + ip netns exec "${SERVER_NS}" sh -c "echo 500000000 > /sys/class/net/link1/gro_flush_timeout" + ip netns exec "${SERVER_NS}" sh -c "echo 1 > /sys/class/net/link1/napi_defer_hard_irqs" + ip netns exec "${SERVER_NS}" ethtool -K link1 generic-receive-offload on +} + +cleanup() { + ip -net "${SERVER_NS}" link del link1 + cleanup_ns "${CLIENT_NS}" "${SERVER_NS}" +} + +directed_bcast_hint_test() +{ + local rc=0 + + echo "Testing for directed broadcast route hint" + + orig_in_brd=$(ip netns exec "${SERVER_NS}" lnstat -j -i1 -c1 | jq '.in_brd') + ip netns exec "${CLIENT_NS}" mausezahn link0 -a own -b bcast -A "${CLIENT_IP4}" \ + -B "${BROADCAST_ADDRESS}" -c1 -t tcp "sp=1-100,dp=1234,s=1,a=0" -p 5 -q + sleep 1 + new_in_brd=$(ip netns exec "${SERVER_NS}" lnstat -j -i1 -c1 | jq '.in_brd') + + res=$(echo "${new_in_brd} - ${orig_in_brd}" | bc) + + if [ "${res}" -lt 100 ]; then + echo "[ OK ]" + rc="${ksft_pass}" + else + echo "[FAIL] expected in_brd to be under 100, got ${res}" + rc="${ksft_fail}" + fi + + return "${rc}" +} + +if [ ! -x "$(command -v mausezahn)" ]; then + echo "SKIP: Could not run test without mausezahn tool" + exit "${ksft_skip}" +fi + +if [ ! -x "$(command -v jq)" ]; then + echo "SKIP: Could not run test without jq tool" + exit "${ksft_skip}" +fi + +if [ ! -x "$(command -v bc)" ]; then + echo "SKIP: Could not run test without bc tool" + exit "${ksft_skip}" +fi + +trap cleanup EXIT + +setup + +directed_bcast_hint_test +exit $? diff --git a/tools/testing/selftests/net/rps_default_mask.sh b/tools/testing/selftests/net/rps_default_mask.sh index 4287a8529890..b200019b3c80 100755 --- a/tools/testing/selftests/net/rps_default_mask.sh +++ b/tools/testing/selftests/net/rps_default_mask.sh @@ -54,16 +54,16 @@ cleanup echo 1 > /proc/sys/net/core/rps_default_mask setup -chk_rps "changing rps_default_mask dont affect existing devices" "" lo $INITIAL_RPS_DEFAULT_MASK +chk_rps "changing rps_default_mask doesn't affect existing devices" "" lo $INITIAL_RPS_DEFAULT_MASK echo 3 > /proc/sys/net/core/rps_default_mask -chk_rps "changing rps_default_mask dont affect existing netns" $NETNS lo 0 +chk_rps "changing rps_default_mask doesn't affect existing netns" $NETNS lo 0 ip link add name $VETH type veth peer netns $NETNS name $VETH ip link set dev $VETH up ip -n $NETNS link set dev $VETH up -chk_rps "changing rps_default_mask affect newly created devices" "" $VETH 3 -chk_rps "changing rps_default_mask don't affect newly child netns[II]" $NETNS $VETH 0 +chk_rps "changing rps_default_mask affects newly created devices" "" $VETH 3 +chk_rps "changing rps_default_mask doesn't affect newly child netns[II]" $NETNS $VETH 0 ip link del dev $VETH ip netns del $NETNS @@ -72,8 +72,8 @@ chk_rps "rps_default_mask is 0 by default in child netns" "$NETNS" lo 0 ip netns exec $NETNS sysctl -qw net.core.rps_default_mask=1 ip link add name $VETH type veth peer netns $NETNS name $VETH -chk_rps "changing rps_default_mask in child ns don't affect the main one" "" lo $INITIAL_RPS_DEFAULT_MASK +chk_rps "changing rps_default_mask in child ns doesn't affect the main one" "" lo $INITIAL_RPS_DEFAULT_MASK chk_rps "changing rps_default_mask in child ns affects new childns devices" $NETNS $VETH 1 -chk_rps "changing rps_default_mask in child ns don't affect existing devices" $NETNS lo 0 +chk_rps "changing rps_default_mask in child ns doesn't affect existing devices" $NETNS lo 0 exit $ret diff --git a/tools/testing/selftests/net/rtnetlink.sh b/tools/testing/selftests/net/rtnetlink.sh index d6c00efeb664..dbf77513f617 100755 --- a/tools/testing/selftests/net/rtnetlink.sh +++ b/tools/testing/selftests/net/rtnetlink.sh @@ -313,6 +313,8 @@ kci_test_addrlft() slowwait 5 check_addr_not_exist "$devdummy" "10.23.11." if [ $? -eq 1 ]; then + # troubleshoot the reason for our failure + run_cmd ip addr show dev "$devdummy" check_err 1 end_test "FAIL: preferred_lft addresses remaining" return @@ -323,6 +325,11 @@ kci_test_addrlft() kci_test_promote_secondaries() { + run_cmd ifconfig "$devdummy" + if [ $ret -ne 0 ]; then + end_test "SKIP: ifconfig not installed" + return $ksft_skip + fi promote=$(sysctl -n net.ipv4.conf.$devdummy.promote_secondaries) sysctl -q net.ipv4.conf.$devdummy.promote_secondaries=1 @@ -519,7 +526,7 @@ kci_test_encap_fou() run_cmd_fail ip -netns "$testns" fou del port 9999 run_cmd ip -netns "$testns" fou del port 7777 if [ $ret -ne 0 ]; then - end_test "FAIL: fou"s + end_test "FAIL: fou" return 1 fi @@ -1201,6 +1208,12 @@ do_test_address_proto() local ret=0 local err + run_cmd_grep 'proto' ip address help + if [ $? -ne 0 ];then + end_test "SKIP: addr proto ${what}: iproute2 too old" + return $ksft_skip + fi + ip address add dev "$devdummy" "$addr3" check_err $? proto=$(address_get_proto "$addr3") diff --git a/tools/testing/selftests/net/socket.c b/tools/testing/selftests/net/socket.c index db1aeb8c5d1e..be1080003c61 100644 --- a/tools/testing/selftests/net/socket.c +++ b/tools/testing/selftests/net/socket.c @@ -39,6 +39,7 @@ static int run_tests(void) { char err_string1[ERR_STRING_SZ]; char err_string2[ERR_STRING_SZ]; + const char *msg1, *msg2; int i, err; err = 0; @@ -56,13 +57,13 @@ static int run_tests(void) errno == -s->expect) continue; - strerror_r(-s->expect, err_string1, ERR_STRING_SZ); - strerror_r(errno, err_string2, ERR_STRING_SZ); + msg1 = strerror_r(-s->expect, err_string1, ERR_STRING_SZ); + msg2 = strerror_r(errno, err_string2, ERR_STRING_SZ); fprintf(stderr, "socket(%d, %d, %d) expected " "err (%s) got (%s)\n", s->domain, s->type, s->protocol, - err_string1, err_string2); + msg1, msg2); err = -1; break; @@ -70,12 +71,12 @@ static int run_tests(void) close(fd); if (s->expect < 0) { - strerror_r(errno, err_string1, ERR_STRING_SZ); + msg1 = strerror_r(errno, err_string1, ERR_STRING_SZ); fprintf(stderr, "socket(%d, %d, %d) expected " "success got err (%s)\n", s->domain, s->type, s->protocol, - err_string1); + msg1); err = -1; break; diff --git a/tools/testing/selftests/net/tcp_port_share.c b/tools/testing/selftests/net/tcp_port_share.c new file mode 100644 index 000000000000..4c39d599dfce --- /dev/null +++ b/tools/testing/selftests/net/tcp_port_share.c @@ -0,0 +1,258 @@ +// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause +// Copyright (c) 2025 Cloudflare, Inc. + +/* Tests for TCP port sharing (bind bucket reuse). */ + +#include <arpa/inet.h> +#include <net/if.h> +#include <sys/ioctl.h> +#include <fcntl.h> +#include <sched.h> +#include <stdlib.h> + +#include "../kselftest_harness.h" + +#define DST_PORT 30000 +#define SRC_PORT 40000 + +struct sockaddr_inet { + union { + struct sockaddr_storage ss; + struct sockaddr_in6 v6; + struct sockaddr_in v4; + struct sockaddr sa; + }; + socklen_t len; + char str[INET6_ADDRSTRLEN + __builtin_strlen("[]:65535") + 1]; +}; + +const int one = 1; + +static int disconnect(int fd) +{ + return connect(fd, &(struct sockaddr){ AF_UNSPEC }, sizeof(struct sockaddr)); +} + +static int getsockname_port(int fd) +{ + struct sockaddr_inet addr = {}; + int err; + + addr.len = sizeof(addr); + err = getsockname(fd, &addr.sa, &addr.len); + if (err) + return -1; + + switch (addr.sa.sa_family) { + case AF_INET: + return ntohs(addr.v4.sin_port); + case AF_INET6: + return ntohs(addr.v6.sin6_port); + default: + errno = EAFNOSUPPORT; + return -1; + } +} + +static void make_inet_addr(int af, const char *ip, __u16 port, + struct sockaddr_inet *addr) +{ + const char *fmt = ""; + + memset(addr, 0, sizeof(*addr)); + + switch (af) { + case AF_INET: + addr->len = sizeof(addr->v4); + addr->v4.sin_family = af; + addr->v4.sin_port = htons(port); + inet_pton(af, ip, &addr->v4.sin_addr); + fmt = "%s:%hu"; + break; + case AF_INET6: + addr->len = sizeof(addr->v6); + addr->v6.sin6_family = af; + addr->v6.sin6_port = htons(port); + inet_pton(af, ip, &addr->v6.sin6_addr); + fmt = "[%s]:%hu"; + break; + } + + snprintf(addr->str, sizeof(addr->str), fmt, ip, port); +} + +FIXTURE(tcp_port_share) {}; + +FIXTURE_VARIANT(tcp_port_share) { + int domain; + /* IP to listen on and connect to */ + const char *dst_ip; + /* Primary IP to connect from */ + const char *src1_ip; + /* Secondary IP to connect from */ + const char *src2_ip; + /* IP to bind to in order to block the source port */ + const char *bind_ip; +}; + +FIXTURE_VARIANT_ADD(tcp_port_share, ipv4) { + .domain = AF_INET, + .dst_ip = "127.0.0.1", + .src1_ip = "127.1.1.1", + .src2_ip = "127.2.2.2", + .bind_ip = "127.3.3.3", +}; + +FIXTURE_VARIANT_ADD(tcp_port_share, ipv6) { + .domain = AF_INET6, + .dst_ip = "::1", + .src1_ip = "2001:db8::1", + .src2_ip = "2001:db8::2", + .bind_ip = "2001:db8::3", +}; + +FIXTURE_SETUP(tcp_port_share) +{ + int sc; + + ASSERT_EQ(unshare(CLONE_NEWNET), 0); + ASSERT_EQ(system("ip link set dev lo up"), 0); + ASSERT_EQ(system("ip addr add dev lo 2001:db8::1/32 nodad"), 0); + ASSERT_EQ(system("ip addr add dev lo 2001:db8::2/32 nodad"), 0); + ASSERT_EQ(system("ip addr add dev lo 2001:db8::3/32 nodad"), 0); + + sc = open("/proc/sys/net/ipv4/ip_local_port_range", O_WRONLY); + ASSERT_GE(sc, 0); + ASSERT_GT(dprintf(sc, "%hu %hu\n", SRC_PORT, SRC_PORT), 0); + ASSERT_EQ(close(sc), 0); +} + +FIXTURE_TEARDOWN(tcp_port_share) {} + +/* Verify that an ephemeral port becomes available again after the socket + * bound to it and blocking it from reuse is closed. + */ +TEST_F(tcp_port_share, can_reuse_port_after_bind_and_close) +{ + const typeof(variant) v = variant; + struct sockaddr_inet addr; + int c1, c2, ln, pb; + + /* Listen on <dst_ip>:<DST_PORT> */ + ln = socket(v->domain, SOCK_STREAM, 0); + ASSERT_GE(ln, 0) TH_LOG("socket(): %m"); + ASSERT_EQ(setsockopt(ln, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one)), 0); + + make_inet_addr(v->domain, v->dst_ip, DST_PORT, &addr); + ASSERT_EQ(bind(ln, &addr.sa, addr.len), 0) TH_LOG("bind(%s): %m", addr.str); + ASSERT_EQ(listen(ln, 2), 0); + + /* Connect from <src1_ip>:<SRC_PORT> */ + c1 = socket(v->domain, SOCK_STREAM, 0); + ASSERT_GE(c1, 0) TH_LOG("socket(): %m"); + ASSERT_EQ(setsockopt(c1, SOL_IP, IP_BIND_ADDRESS_NO_PORT, &one, sizeof(one)), 0); + + make_inet_addr(v->domain, v->src1_ip, 0, &addr); + ASSERT_EQ(bind(c1, &addr.sa, addr.len), 0) TH_LOG("bind(%s): %m", addr.str); + + make_inet_addr(v->domain, v->dst_ip, DST_PORT, &addr); + ASSERT_EQ(connect(c1, &addr.sa, addr.len), 0) TH_LOG("connect(%s): %m", addr.str); + ASSERT_EQ(getsockname_port(c1), SRC_PORT); + + /* Bind to <bind_ip>:<SRC_PORT>. Block the port from reuse. */ + pb = socket(v->domain, SOCK_STREAM, 0); + ASSERT_GE(pb, 0) TH_LOG("socket(): %m"); + ASSERT_EQ(setsockopt(pb, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one)), 0); + + make_inet_addr(v->domain, v->bind_ip, SRC_PORT, &addr); + ASSERT_EQ(bind(pb, &addr.sa, addr.len), 0) TH_LOG("bind(%s): %m", addr.str); + + /* Try to connect from <src2_ip>:<SRC_PORT>. Expect failure. */ + c2 = socket(v->domain, SOCK_STREAM, 0); + ASSERT_GE(c2, 0) TH_LOG("socket"); + ASSERT_EQ(setsockopt(c2, SOL_IP, IP_BIND_ADDRESS_NO_PORT, &one, sizeof(one)), 0); + + make_inet_addr(v->domain, v->src2_ip, 0, &addr); + ASSERT_EQ(bind(c2, &addr.sa, addr.len), 0) TH_LOG("bind(%s): %m", addr.str); + + make_inet_addr(v->domain, v->dst_ip, DST_PORT, &addr); + ASSERT_EQ(connect(c2, &addr.sa, addr.len), -1) TH_LOG("connect(%s)", addr.str); + ASSERT_EQ(errno, EADDRNOTAVAIL) TH_LOG("%m"); + + /* Unbind from <bind_ip>:<SRC_PORT>. Unblock the port for reuse. */ + ASSERT_EQ(close(pb), 0); + + /* Connect again from <src2_ip>:<SRC_PORT> */ + EXPECT_EQ(connect(c2, &addr.sa, addr.len), 0) TH_LOG("connect(%s): %m", addr.str); + EXPECT_EQ(getsockname_port(c2), SRC_PORT); + + ASSERT_EQ(close(c2), 0); + ASSERT_EQ(close(c1), 0); + ASSERT_EQ(close(ln), 0); +} + +/* Verify that a socket auto-bound during connect() blocks port reuse after + * disconnect (connect(AF_UNSPEC)) followed by an explicit port bind(). + */ +TEST_F(tcp_port_share, port_block_after_disconnect) +{ + const typeof(variant) v = variant; + struct sockaddr_inet addr; + int c1, c2, ln, pb; + + /* Listen on <dst_ip>:<DST_PORT> */ + ln = socket(v->domain, SOCK_STREAM, 0); + ASSERT_GE(ln, 0) TH_LOG("socket(): %m"); + ASSERT_EQ(setsockopt(ln, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one)), 0); + + make_inet_addr(v->domain, v->dst_ip, DST_PORT, &addr); + ASSERT_EQ(bind(ln, &addr.sa, addr.len), 0) TH_LOG("bind(%s): %m", addr.str); + ASSERT_EQ(listen(ln, 2), 0); + + /* Connect from <src1_ip>:<SRC_PORT> */ + c1 = socket(v->domain, SOCK_STREAM, 0); + ASSERT_GE(c1, 0) TH_LOG("socket(): %m"); + ASSERT_EQ(setsockopt(c1, SOL_IP, IP_BIND_ADDRESS_NO_PORT, &one, sizeof(one)), 0); + + make_inet_addr(v->domain, v->src1_ip, 0, &addr); + ASSERT_EQ(bind(c1, &addr.sa, addr.len), 0) TH_LOG("bind(%s): %m", addr.str); + + make_inet_addr(v->domain, v->dst_ip, DST_PORT, &addr); + ASSERT_EQ(connect(c1, &addr.sa, addr.len), 0) TH_LOG("connect(%s): %m", addr.str); + ASSERT_EQ(getsockname_port(c1), SRC_PORT); + + /* Disconnect the socket and bind it to <bind_ip>:<SRC_PORT> to block the port */ + ASSERT_EQ(disconnect(c1), 0) TH_LOG("disconnect: %m"); + ASSERT_EQ(setsockopt(c1, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one)), 0); + + make_inet_addr(v->domain, v->bind_ip, SRC_PORT, &addr); + ASSERT_EQ(bind(c1, &addr.sa, addr.len), 0) TH_LOG("bind(%s): %m", addr.str); + + /* Trigger port-addr bucket state update with another bind() and close() */ + pb = socket(v->domain, SOCK_STREAM, 0); + ASSERT_GE(pb, 0) TH_LOG("socket(): %m"); + ASSERT_EQ(setsockopt(pb, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one)), 0); + + make_inet_addr(v->domain, v->bind_ip, SRC_PORT, &addr); + ASSERT_EQ(bind(pb, &addr.sa, addr.len), 0) TH_LOG("bind(%s): %m", addr.str); + + ASSERT_EQ(close(pb), 0); + + /* Connect from <src2_ip>:<SRC_PORT>. Expect failure. */ + c2 = socket(v->domain, SOCK_STREAM, 0); + ASSERT_GE(c2, 0) TH_LOG("socket: %m"); + ASSERT_EQ(setsockopt(c2, SOL_IP, IP_BIND_ADDRESS_NO_PORT, &one, sizeof(one)), 0); + + make_inet_addr(v->domain, v->src2_ip, 0, &addr); + ASSERT_EQ(bind(c2, &addr.sa, addr.len), 0) TH_LOG("bind(%s): %m", addr.str); + + make_inet_addr(v->domain, v->dst_ip, DST_PORT, &addr); + EXPECT_EQ(connect(c2, &addr.sa, addr.len), -1) TH_LOG("connect(%s)", addr.str); + EXPECT_EQ(errno, EADDRNOTAVAIL) TH_LOG("%m"); + + ASSERT_EQ(close(c2), 0); + ASSERT_EQ(close(c1), 0); + ASSERT_EQ(close(ln), 0); +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/net/test_bridge_backup_port.sh b/tools/testing/selftests/net/test_bridge_backup_port.sh index 1b3f89e2b86e..2a7224fe74f2 100755 --- a/tools/testing/selftests/net/test_bridge_backup_port.sh +++ b/tools/testing/selftests/net/test_bridge_backup_port.sh @@ -315,6 +315,29 @@ backup_port() tc_check_packets $sw1 "dev vx0 egress" 101 1 log_test $? 0 "No forwarding out of vx0" + # Check that packets are forwarded out of vx0 when swp1 is + # administratively down and out of swp1 when it is administratively up + # again. + run_cmd "ip -n $sw1 link set dev swp1 down" + busywait $BUSYWAIT_TIMEOUT bridge_link_check $sw1 swp1 disabled + log_test $? 0 "swp1 administratively down" + + run_cmd "ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1" + tc_check_packets $sw1 "dev swp1 egress" 101 3 + log_test $? 0 "No forwarding out of swp1" + tc_check_packets $sw1 "dev vx0 egress" 101 2 + log_test $? 0 "Forwarding out of vx0" + + run_cmd "ip -n $sw1 link set dev swp1 up" + busywait $BUSYWAIT_TIMEOUT bridge_link_check $sw1 swp1 forwarding + log_test $? 0 "swp1 administratively up" + + run_cmd "ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1" + tc_check_packets $sw1 "dev swp1 egress" 101 4 + log_test $? 0 "Forwarding out of swp1" + tc_check_packets $sw1 "dev vx0 egress" 101 2 + log_test $? 0 "No forwarding out of vx0" + # Remove vx0 as the backup port of swp1 and check that packets are no # longer forwarded out of vx0 when swp1 does not have a carrier. run_cmd "bridge -n $sw1 link set dev swp1 nobackup_port" @@ -322,9 +345,9 @@ backup_port() log_test $? 1 "vx0 not configured as backup port of swp1" run_cmd "ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1" - tc_check_packets $sw1 "dev swp1 egress" 101 4 + tc_check_packets $sw1 "dev swp1 egress" 101 5 log_test $? 0 "Forwarding out of swp1" - tc_check_packets $sw1 "dev vx0 egress" 101 1 + tc_check_packets $sw1 "dev vx0 egress" 101 2 log_test $? 0 "No forwarding out of vx0" run_cmd "ip -n $sw1 link set dev swp1 carrier off" @@ -332,9 +355,9 @@ backup_port() log_test $? 0 "swp1 carrier off" run_cmd "ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1" - tc_check_packets $sw1 "dev swp1 egress" 101 4 + tc_check_packets $sw1 "dev swp1 egress" 101 5 log_test $? 0 "No forwarding out of swp1" - tc_check_packets $sw1 "dev vx0 egress" 101 1 + tc_check_packets $sw1 "dev vx0 egress" 101 2 log_test $? 0 "No forwarding out of vx0" } diff --git a/tools/testing/selftests/net/test_vxlan_fdb_changelink.sh b/tools/testing/selftests/net/test_vxlan_fdb_changelink.sh index 062f957950af..8b414d0edada 100755 --- a/tools/testing/selftests/net/test_vxlan_fdb_changelink.sh +++ b/tools/testing/selftests/net/test_vxlan_fdb_changelink.sh @@ -21,7 +21,7 @@ test_set_remote() { RET=0 - ip_link_add vx up type vxlan id 2000 dstport 4789 + adf_ip_link_add vx up type vxlan id 2000 dstport 4789 bridge fdb ap dev vx 00:00:00:00:00:00 dst 192.0.2.20 self permanent bridge fdb ap dev vx 00:00:00:00:00:00 dst 192.0.2.30 self permanent check_remotes "fdb append" @@ -74,12 +74,12 @@ test_change_mc_remote() { check_command netstat || return - ip_link_add v1 up type veth peer name v2 - ip_link_set_up v2 + adf_ip_link_add v1 up type veth peer name v2 + adf_ip_link_set_up v2 RET=0 - ip_link_add vx up type vxlan dstport 4789 \ + adf_ip_link_add vx up type vxlan dstport 4789 \ local 192.0.2.1 $(fmt_remote 224.1.1.1) dev v1 vni 1000 check_membership "group=224.1.1.1 fail=0" \ diff --git a/tools/testing/selftests/net/test_vxlan_nh.sh b/tools/testing/selftests/net/test_vxlan_nh.sh new file mode 100755 index 000000000000..20f3369f776b --- /dev/null +++ b/tools/testing/selftests/net/test_vxlan_nh.sh @@ -0,0 +1,223 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +source lib.sh +TESTS=" + basic_tx_ipv4 + basic_tx_ipv6 + learning + proxy_ipv4 + proxy_ipv6 +" +VERBOSE=0 + +################################################################################ +# Utilities + +run_cmd() +{ + local cmd="$1" + local out + local stderr="2>/dev/null" + + if [ "$VERBOSE" = "1" ]; then + echo "COMMAND: $cmd" + stderr= + fi + + out=$(eval "$cmd" "$stderr") + rc=$? + if [ "$VERBOSE" -eq 1 ] && [ -n "$out" ]; then + echo " $out" + fi + + return $rc +} + +################################################################################ +# Cleanup + +exit_cleanup_all() +{ + cleanup_all_ns + exit "${EXIT_STATUS}" +} + +################################################################################ +# Tests + +nh_stats_get() +{ + ip -n "$ns1" -s -j nexthop show id 10 | jq ".[][\"group_stats\"][][\"packets\"]" +} + +tc_stats_get() +{ + tc_rule_handle_stats_get "dev dummy1 egress" 101 ".packets" "-n $ns1" +} + +basic_tx_common() +{ + local af_str=$1; shift + local proto=$1; shift + local local_addr=$1; shift + local plen=$1; shift + local remote_addr=$1; shift + + RET=0 + + # Test basic Tx functionality. Check that stats are incremented on + # both the FDB nexthop group and the egress device. + + run_cmd "ip -n $ns1 link add name dummy1 up type dummy" + run_cmd "ip -n $ns1 route add $remote_addr/$plen dev dummy1" + run_cmd "tc -n $ns1 qdisc add dev dummy1 clsact" + run_cmd "tc -n $ns1 filter add dev dummy1 egress proto $proto pref 1 handle 101 flower ip_proto udp dst_ip $remote_addr dst_port 4789 action pass" + + run_cmd "ip -n $ns1 address add $local_addr/$plen dev lo" + + run_cmd "ip -n $ns1 nexthop add id 1 via $remote_addr fdb" + run_cmd "ip -n $ns1 nexthop add id 10 group 1 fdb" + + run_cmd "ip -n $ns1 link add name vx0 up type vxlan id 10010 local $local_addr dstport 4789" + run_cmd "bridge -n $ns1 fdb add 00:11:22:33:44:55 dev vx0 self static nhid 10" + + run_cmd "ip netns exec $ns1 mausezahn vx0 -a own -b 00:11:22:33:44:55 -c 1 -q" + + busywait "$BUSYWAIT_TIMEOUT" until_counter_is "== 1" nh_stats_get > /dev/null + check_err $? "FDB nexthop group stats did not increase" + + busywait "$BUSYWAIT_TIMEOUT" until_counter_is "== 1" tc_stats_get > /dev/null + check_err $? "tc filter stats did not increase" + + log_test "VXLAN FDB nexthop: $af_str basic Tx" +} + +basic_tx_ipv4() +{ + basic_tx_common "IPv4" ipv4 192.0.2.1 32 192.0.2.2 +} + +basic_tx_ipv6() +{ + basic_tx_common "IPv6" ipv6 2001:db8:1::1 128 2001:db8:1::2 +} + +learning() +{ + RET=0 + + # When learning is enabled on the VXLAN device, an incoming packet + # might try to refresh an FDB entry that points to an FDB nexthop group + # instead of an ordinary remote destination. Check that the kernel does + # not crash in this situation. + + run_cmd "ip -n $ns1 address add 192.0.2.1/32 dev lo" + run_cmd "ip -n $ns1 address add 192.0.2.2/32 dev lo" + + run_cmd "ip -n $ns1 nexthop add id 1 via 192.0.2.3 fdb" + run_cmd "ip -n $ns1 nexthop add id 10 group 1 fdb" + + run_cmd "ip -n $ns1 link add name vx0 up type vxlan id 10010 local 192.0.2.1 dstport 12345 localbypass" + run_cmd "ip -n $ns1 link add name vx1 up type vxlan id 10020 local 192.0.2.2 dstport 54321 learning" + + run_cmd "bridge -n $ns1 fdb add 00:11:22:33:44:55 dev vx0 self static dst 192.0.2.2 port 54321 vni 10020" + run_cmd "bridge -n $ns1 fdb add 00:aa:bb:cc:dd:ee dev vx1 self static nhid 10" + + run_cmd "ip netns exec $ns1 mausezahn vx0 -a 00:aa:bb:cc:dd:ee -b 00:11:22:33:44:55 -c 1 -q" + + log_test "VXLAN FDB nexthop: learning" +} + +proxy_common() +{ + local af_str=$1; shift + local local_addr=$1; shift + local plen=$1; shift + local remote_addr=$1; shift + local neigh_addr=$1; shift + local ping_cmd=$1; shift + + RET=0 + + # When the "proxy" option is enabled on the VXLAN device, the device + # will suppress ARP requests and IPv6 Neighbor Solicitation messages if + # it is able to reply on behalf of the remote host. That is, if a + # matching and valid neighbor entry is configured on the VXLAN device + # whose MAC address is not behind the "any" remote (0.0.0.0 / ::). The + # FDB entry for the neighbor's MAC address might point to an FDB + # nexthop group instead of an ordinary remote destination. Check that + # the kernel does not crash in this situation. + + run_cmd "ip -n $ns1 address add $local_addr/$plen dev lo" + + run_cmd "ip -n $ns1 nexthop add id 1 via $remote_addr fdb" + run_cmd "ip -n $ns1 nexthop add id 10 group 1 fdb" + + run_cmd "ip -n $ns1 link add name vx0 up type vxlan id 10010 local $local_addr dstport 4789 proxy" + + run_cmd "ip -n $ns1 neigh add $neigh_addr lladdr 00:11:22:33:44:55 nud perm dev vx0" + + run_cmd "bridge -n $ns1 fdb add 00:11:22:33:44:55 dev vx0 self static nhid 10" + + run_cmd "ip netns exec $ns1 $ping_cmd" + + log_test "VXLAN FDB nexthop: $af_str proxy" +} + +proxy_ipv4() +{ + proxy_common "IPv4" 192.0.2.1 32 192.0.2.2 192.0.2.3 \ + "arping -b -c 1 -s 192.0.2.1 -I vx0 192.0.2.3" +} + +proxy_ipv6() +{ + proxy_common "IPv6" 2001:db8:1::1 128 2001:db8:1::2 2001:db8:1::3 \ + "ndisc6 -r 1 -s 2001:db8:1::1 -w 1 2001:db8:1::3 vx0" +} + +################################################################################ +# Usage + +usage() +{ + cat <<EOF +usage: ${0##*/} OPTS + + -t <test> Test(s) to run (default: all) + (options: $TESTS) + -p Pause on fail + -v Verbose mode (show commands and output) +EOF +} + +################################################################################ +# Main + +while getopts ":t:pvh" opt; do + case $opt in + t) TESTS=$OPTARG;; + p) PAUSE_ON_FAIL=yes;; + v) VERBOSE=$((VERBOSE + 1));; + h) usage; exit 0;; + *) usage; exit 1;; + esac +done + +require_command mausezahn +require_command arping +require_command ndisc6 +require_command jq + +if ! ip nexthop help 2>&1 | grep -q "stats"; then + echo "SKIP: iproute2 ip too old, missing nexthop stats support" + exit "$ksft_skip" +fi + +trap exit_cleanup_all EXIT + +for t in $TESTS +do + setup_ns ns1; $t; cleanup_all_ns; +done diff --git a/tools/testing/selftests/net/tfo_passive.sh b/tools/testing/selftests/net/tfo_passive.sh index 80bf11fdc046..a4550511830a 100755 --- a/tools/testing/selftests/net/tfo_passive.sh +++ b/tools/testing/selftests/net/tfo_passive.sh @@ -95,7 +95,7 @@ wait res=$(cat $out_file) rm $out_file -if [ $res -eq 0 ]; then +if [ "$res" = "0" ]; then echo "got invalid NAPI ID from passive TFO socket" cleanup_ns exit 1 diff --git a/tools/testing/selftests/net/tls.c b/tools/testing/selftests/net/tls.c index 0f5640d8dc7f..e788b84551ca 100644 --- a/tools/testing/selftests/net/tls.c +++ b/tools/testing/selftests/net/tls.c @@ -439,6 +439,8 @@ TEST_F(tls, sendfile) EXPECT_GE(filefd, 0); fstat(filefd, &st); EXPECT_GE(sendfile(self->fd, filefd, 0, st.st_size), 0); + + close(filefd); } TEST_F(tls, send_then_sendfile) @@ -460,6 +462,9 @@ TEST_F(tls, send_then_sendfile) EXPECT_GE(sendfile(self->fd, filefd, 0, st.st_size), 0); EXPECT_EQ(recv(self->cfd, buf, st.st_size, MSG_WAITALL), st.st_size); + + free(buf); + close(filefd); } static void chunked_sendfile(struct __test_metadata *_metadata, @@ -2770,6 +2775,22 @@ TEST_F(tls_err, poll_partial_rec_async) } } +/* Use OOB+large send to trigger copy mode due to memory pressure. + * OOB causes a short read. + */ +TEST_F(tls_err, oob_pressure) +{ + char buf[1<<16]; + int i; + + memrnd(buf, sizeof(buf)); + + EXPECT_EQ(send(self->fd2, buf, 5, MSG_OOB), 5); + EXPECT_EQ(send(self->fd2, buf, sizeof(buf), 0), sizeof(buf)); + for (i = 0; i < 64; i++) + EXPECT_EQ(send(self->fd2, buf, 5, MSG_OOB), 5); +} + TEST(non_established) { struct tls12_crypto_info_aes_gcm_256 tls12; struct sockaddr_in addr; diff --git a/tools/testing/selftests/net/traceroute.sh b/tools/testing/selftests/net/traceroute.sh index 282f14760940..dbb34c7e09ce 100755 --- a/tools/testing/selftests/net/traceroute.sh +++ b/tools/testing/selftests/net/traceroute.sh @@ -10,28 +10,6 @@ PAUSE_ON_FAIL=no ################################################################################ # -log_test() -{ - local rc=$1 - local expected=$2 - local msg="$3" - - if [ ${rc} -eq ${expected} ]; then - printf "TEST: %-60s [ OK ]\n" "${msg}" - nsuccess=$((nsuccess+1)) - else - ret=1 - nfail=$((nfail+1)) - printf "TEST: %-60s [FAIL]\n" "${msg}" - if [ "${PAUSE_ON_FAIL}" = "yes" ]; then - echo - echo "hit enter to continue, 'q' to quit" - read a - [ "$a" = "q" ] && exit 1 - fi - fi -} - run_cmd() { local ns @@ -203,34 +181,137 @@ setup_traceroute6() run_traceroute6() { - if [ ! -x "$(command -v traceroute6)" ]; then - echo "SKIP: Could not run IPV6 test without traceroute6" - return - fi - setup_traceroute6 + RET=0 + # traceroute6 host-2 from host-1 (expects 2000:102::2) run_cmd $h1 "traceroute6 2000:103::4 | grep -q 2000:102::2" - log_test $? 0 "IPV6 traceroute" + check_err $? "traceroute6 did not return 2000:102::2" + log_test "IPv6 traceroute" cleanup_traceroute6 } ################################################################################ +# traceroute6 with VRF test +# +# Verify that in this scenario +# +# ------------------------ N2 +# | | +# ------ ------ N3 ---- +# | R1 | | R2 |------|H2| +# ------ ------ ---- +# | | +# ------------------------ N1 +# | +# ---- +# |H1| +# ---- +# +# Where H1's default route goes through R1 and R1's default route goes through +# R2 over N2, traceroute6 from H1 to H2 reports R2's address on N2 and not N1. +# The interfaces connecting R2 to the different subnets are membmer in a VRF +# and the intention is to check that traceroute6 does not report the VRF's +# address. +# +# Addresses are assigned as follows: +# +# N1: 2000:101::/64 +# N2: 2000:102::/64 +# N3: 2000:103::/64 +# +# R1's host part of address: 1 +# R2's host part of address: 2 +# H1's host part of address: 3 +# H2's host part of address: 4 +# +# For example: +# the IPv6 address of R1's interface on N2 is 2000:102::1/64 + +cleanup_traceroute6_vrf() +{ + cleanup_all_ns +} + +setup_traceroute6_vrf() +{ + # Start clean + cleanup_traceroute6_vrf + + setup_ns h1 h2 r1 r2 + create_ns "$h1" + create_ns "$h2" + create_ns "$r1" + create_ns "$r2" + + ip -n "$r2" link add name vrf100 up type vrf table 100 + ip -n "$r2" addr add 2001:db8:100::1/64 dev vrf100 + + # Setup N3 + connect_ns "$r2" eth3 - 2000:103::2/64 "$h2" eth3 - 2000:103::4/64 + + ip -n "$r2" link set dev eth3 master vrf100 + + ip -n "$h2" route add default via 2000:103::2 + + # Setup N2 + connect_ns "$r1" eth2 - 2000:102::1/64 "$r2" eth2 - 2000:102::2/64 + + ip -n "$r1" route add default via 2000:102::2 + + ip -n "$r2" link set dev eth2 master vrf100 + + # Setup N1. host-1 and router-2 connect to a bridge in router-1. + ip -n "$r1" link add name br100 up type bridge + ip -n "$r1" addr add 2000:101::1/64 dev br100 + + connect_ns "$h1" eth0 - 2000:101::3/64 "$r1" eth0 - - + + ip -n "$h1" route add default via 2000:101::1 + + ip -n "$r1" link set dev eth0 master br100 + + connect_ns "$r2" eth1 - 2000:101::2/64 "$r1" eth1 - - + + ip -n "$r2" link set dev eth1 master vrf100 + + ip -n "$r1" link set dev eth1 master br100 + + # Prime the network + ip netns exec "$h1" ping6 -c5 2000:103::4 >/dev/null 2>&1 +} + +run_traceroute6_vrf() +{ + setup_traceroute6_vrf + + RET=0 + + # traceroute6 host-2 from host-1 (expects 2000:102::2) + run_cmd "$h1" "traceroute6 2000:103::4 | grep 2000:102::2" + check_err $? "traceroute6 did not return 2000:102::2" + log_test "IPv6 traceroute with VRF" + + cleanup_traceroute6_vrf +} + +################################################################################ # traceroute test # -# Verify that traceroute from H1 to H2 shows 1.0.1.1 in this scenario +# Verify that traceroute from H1 to H2 shows 1.0.3.1 and 1.0.1.1 when +# traceroute uses 1.0.3.3 and 1.0.1.3 as the source IP, respectively. # -# 1.0.3.1/24 +# 1.0.3.3/24 1.0.3.1/24 # ---- 1.0.1.3/24 1.0.1.1/24 ---- 1.0.2.1/24 1.0.2.4/24 ---- # |H1|--------------------------|R1|--------------------------|H2| # ---- N1 ---- N2 ---- # -# where net.ipv4.icmp_errors_use_inbound_ifaddr is set on R1 and -# 1.0.3.1/24 and 1.0.1.1/24 are respectively R1's primary and secondary -# address on N1. -# +# where net.ipv4.icmp_errors_use_inbound_ifaddr is set on R1 and 1.0.3.1/24 and +# 1.0.1.1/24 are R1's primary addresses on N1. The kernel is expected to prefer +# a source address that is on the same subnet as the destination IP of the ICMP +# error message. cleanup_traceroute() { @@ -250,6 +331,7 @@ setup_traceroute() connect_ns $h1 eth0 1.0.1.3/24 - \ $router eth1 1.0.3.1/24 - + ip -n "$h1" addr add 1.0.3.3/24 dev eth0 ip netns exec $h1 ip route add default via 1.0.1.1 ip netns exec $router ip addr add 1.0.1.1/24 dev eth1 @@ -268,35 +350,107 @@ setup_traceroute() run_traceroute() { - if [ ! -x "$(command -v traceroute)" ]; then - echo "SKIP: Could not run IPV4 test without traceroute" - return - fi - setup_traceroute - # traceroute host-2 from host-1 (expects 1.0.1.1). Takes a while. - run_cmd $h1 "traceroute 1.0.2.4 | grep -q 1.0.1.1" - log_test $? 0 "IPV4 traceroute" + RET=0 + + # traceroute host-2 from host-1. Expect a source IP that is on the same + # subnet as destination IP of the ICMP error message. + run_cmd "$h1" "traceroute -s 1.0.1.3 1.0.2.4 | grep -q 1.0.1.1" + check_err $? "traceroute did not return 1.0.1.1" + run_cmd "$h1" "traceroute -s 1.0.3.3 1.0.2.4 | grep -q 1.0.3.1" + check_err $? "traceroute did not return 1.0.3.1" + log_test "IPv4 traceroute" cleanup_traceroute } ################################################################################ +# traceroute with VRF test +# +# Verify that traceroute from H1 to H2 shows 1.0.3.1 and 1.0.1.1 when +# traceroute uses 1.0.3.3 and 1.0.1.3 as the source IP, respectively. The +# intention is to check that the kernel does not choose an IP assigned to the +# VRF device, but rather an address from the VRF port (eth1) that received the +# packet that generates the ICMP error message. +# +# 1.0.4.1/24 (vrf100) +# 1.0.3.3/24 1.0.3.1/24 +# ---- 1.0.1.3/24 1.0.1.1/24 ---- 1.0.2.1/24 1.0.2.4/24 ---- +# |H1|--------------------------|R1|--------------------------|H2| +# ---- N1 ---- N2 ---- + +cleanup_traceroute_vrf() +{ + cleanup_all_ns +} + +setup_traceroute_vrf() +{ + # Start clean + cleanup_traceroute_vrf + + setup_ns h1 h2 router + create_ns "$h1" + create_ns "$h2" + create_ns "$router" + + ip -n "$router" link add name vrf100 up type vrf table 100 + ip -n "$router" addr add 1.0.4.1/24 dev vrf100 + + connect_ns "$h1" eth0 1.0.1.3/24 - \ + "$router" eth1 1.0.1.1/24 - + + ip -n "$h1" addr add 1.0.3.3/24 dev eth0 + ip -n "$h1" route add default via 1.0.1.1 + + ip -n "$router" link set dev eth1 master vrf100 + ip -n "$router" addr add 1.0.3.1/24 dev eth1 + ip netns exec "$router" sysctl -qw \ + net.ipv4.icmp_errors_use_inbound_ifaddr=1 + + connect_ns "$h2" eth0 1.0.2.4/24 - \ + "$router" eth2 1.0.2.1/24 - + + ip -n "$h2" route add default via 1.0.2.1 + + ip -n "$router" link set dev eth2 master vrf100 + + # Prime the network + ip netns exec "$h1" ping -c5 1.0.2.4 >/dev/null 2>&1 +} + +run_traceroute_vrf() +{ + setup_traceroute_vrf + + RET=0 + + # traceroute host-2 from host-1. Expect a source IP that is on the same + # subnet as destination IP of the ICMP error message. + run_cmd "$h1" "traceroute -s 1.0.1.3 1.0.2.4 | grep 1.0.1.1" + check_err $? "traceroute did not return 1.0.1.1" + run_cmd "$h1" "traceroute -s 1.0.3.3 1.0.2.4 | grep 1.0.3.1" + check_err $? "traceroute did not return 1.0.3.1" + log_test "IPv4 traceroute with VRF" + + cleanup_traceroute_vrf +} + +################################################################################ # Run tests run_tests() { run_traceroute6 + run_traceroute6_vrf run_traceroute + run_traceroute_vrf } ################################################################################ # main -declare -i nfail=0 -declare -i nsuccess=0 - while getopts :pv o do case $o in @@ -306,7 +460,9 @@ do esac done +require_command traceroute6 +require_command traceroute + run_tests -printf "\nTests passed: %3d\n" ${nsuccess} -printf "Tests failed: %3d\n" ${nfail} +exit "${EXIT_STATUS}" diff --git a/tools/testing/selftests/net/vlan_bridge_binding.sh b/tools/testing/selftests/net/vlan_bridge_binding.sh index e7cb8c678bde..db481af9b6b3 100755 --- a/tools/testing/selftests/net/vlan_bridge_binding.sh +++ b/tools/testing/selftests/net/vlan_bridge_binding.sh @@ -18,29 +18,29 @@ setup_prepare() { local port - ip_link_add br up type bridge vlan_filtering 1 + adf_ip_link_add br up type bridge vlan_filtering 1 for port in d1 d2 d3; do - ip_link_add $port type veth peer name r$port - ip_link_set_up $port - ip_link_set_up r$port - ip_link_set_master $port br + adf_ip_link_add $port type veth peer name r$port + adf_ip_link_set_up $port + adf_ip_link_set_up r$port + adf_ip_link_set_master $port br done - bridge_vlan_add vid 11 dev br self - bridge_vlan_add vid 11 dev d1 master + adf_bridge_vlan_add vid 11 dev br self + adf_bridge_vlan_add vid 11 dev d1 master - bridge_vlan_add vid 12 dev br self - bridge_vlan_add vid 12 dev d2 master + adf_bridge_vlan_add vid 12 dev br self + adf_bridge_vlan_add vid 12 dev d2 master - bridge_vlan_add vid 13 dev br self - bridge_vlan_add vid 13 dev d1 master - bridge_vlan_add vid 13 dev d2 master + adf_bridge_vlan_add vid 13 dev br self + adf_bridge_vlan_add vid 13 dev d1 master + adf_bridge_vlan_add vid 13 dev d2 master - bridge_vlan_add vid 14 dev br self - bridge_vlan_add vid 14 dev d1 master - bridge_vlan_add vid 14 dev d2 master - bridge_vlan_add vid 14 dev d3 master + adf_bridge_vlan_add vid 14 dev br self + adf_bridge_vlan_add vid 14 dev d1 master + adf_bridge_vlan_add vid 14 dev d2 master + adf_bridge_vlan_add vid 14 dev d3 master } operstate_is() @@ -74,7 +74,7 @@ add_one_vlan() local link=$1; shift local id=$1; shift - ip_link_add $link.$id link $link type vlan id $id "$@" + adf_ip_link_add $link.$id link $link type vlan id $id "$@" } add_vlans() @@ -98,7 +98,7 @@ down_netdevs() local dev for dev in "$@"; do - ip_link_set_down $dev + adf_ip_link_set_down $dev done } @@ -207,13 +207,13 @@ test_binding_toggle_off() do_test_binding_off : "on->off" } -dfr_set_binding_on() +adf_set_binding_on() { set_vlans type vlan bridge_binding on defer set_vlans type vlan bridge_binding off } -dfr_set_binding_off() +adf_set_binding_off() { set_vlans type vlan bridge_binding off defer set_vlans type vlan bridge_binding on @@ -223,14 +223,14 @@ test_binding_toggle_on_when_lower_down() { add_vlans bridge_binding off set_vlans up - do_test_binding_on dfr_set_binding_on "off->on when lower down" + do_test_binding_on adf_set_binding_on "off->on when lower down" } test_binding_toggle_off_when_lower_down() { add_vlans bridge_binding on set_vlans up - do_test_binding_off dfr_set_binding_off "on->off when lower down" + do_test_binding_off adf_set_binding_off "on->off when lower down" } test_binding_toggle_on_when_upper_down() diff --git a/tools/testing/selftests/net/ynl.mk b/tools/testing/selftests/net/ynl.mk index e907c2751956..793a2fc33d9f 100644 --- a/tools/testing/selftests/net/ynl.mk +++ b/tools/testing/selftests/net/ynl.mk @@ -5,10 +5,11 @@ # Inputs: # # YNL_GENS: families we need in the selftests -# YNL_PROGS: TEST_PROGS which need YNL (TODO, none exist, yet) +# YNL_GEN_PROGS: TEST_GEN_PROGS which need YNL # YNL_GEN_FILES: TEST_GEN_FILES which need YNL -YNL_OUTPUTS := $(patsubst %,$(OUTPUT)/%,$(YNL_GEN_FILES)) +YNL_OUTPUTS := $(patsubst %,$(OUTPUT)/%,$(YNL_GEN_FILES)) \ + $(patsubst %,$(OUTPUT)/%,$(YNL_GEN_PROGS)) YNL_SPECS := \ $(patsubst %,$(top_srcdir)/Documentation/netlink/specs/%.yaml,$(YNL_GENS)) diff --git a/tools/testing/selftests/nolibc/Makefile.nolibc b/tools/testing/selftests/nolibc/Makefile.nolibc index 0fb759ba992e..330e000baeb1 100644 --- a/tools/testing/selftests/nolibc/Makefile.nolibc +++ b/tools/testing/selftests/nolibc/Makefile.nolibc @@ -262,19 +262,22 @@ REPORT ?= awk '/\[OK\][\r]*$$/{p++} /\[FAIL\][\r]*$$/{if (!f) printf("\n"); f++ if (f || !p || !done) printf("failure\n"); else if (s) printf("warning\n"); else printf("success\n");; \ printf("\nSee all results in %s\n", ARGV[1]); }' +# Execute the toplevel kernel Makefile +KBUILD_MAKE = $(MAKE) -C $(srctree) ARCH=$(ARCH) CROSS_COMPILE=$(CROSS_COMPILE) LLVM= + help: @echo "Supported targets under selftests/nolibc:" @echo " all call the \"run\" target below" @echo " help this help" @echo " sysroot create the nolibc sysroot here (uses \$$ARCH)" - @echo " nolibc-test build the executable (uses \$$CC and \$$CROSS_COMPILE)" + @echo " nolibc-test build the executable (uses \$$CC or \$$CROSS_COMPILE)" @echo " libc-test build an executable using the compiler's default libc instead" @echo " run-user runs the executable under QEMU (uses \$$XARCH, \$$TEST)" @echo " initramfs.cpio prepare the initramfs archive with nolibc-test" @echo " initramfs prepare the initramfs tree with nolibc-test" @echo " defconfig create a fresh new default config (uses \$$XARCH)" - @echo " kernel (re)build the kernel (uses \$$XARCH)" - @echo " kernel-standalone (re)build the kernel with the initramfs (uses \$$XARCH)" + @echo " kernel (re)build the kernel (uses \$$XARCH, \$$CROSS_COMPILE)" + @echo " kernel-standalone (re)build the kernel with the initramfs (uses \$$XARCH, \$$CROSS_COMPILE)" @echo " run runs the kernel in QEMU after building it (uses \$$XARCH, \$$TEST)" @echo " rerun runs a previously prebuilt kernel in QEMU (uses \$$XARCH, \$$TEST)" @echo " clean clean the sysroot, initramfs, build and output files" @@ -340,17 +343,17 @@ initramfs: nolibc-test $(Q)cp nolibc-test initramfs/init defconfig: - $(Q)$(MAKE) -C $(srctree) ARCH=$(ARCH) CC=$(CC) CROSS_COMPILE=$(CROSS_COMPILE) $(DEFCONFIG) + $(Q)$(KBUILD_MAKE) $(DEFCONFIG) $(Q)if [ -n "$(EXTRACONFIG)" ]; then \ $(srctree)/scripts/config --file $(objtree)/.config $(EXTRACONFIG); \ - $(MAKE) -C $(srctree) ARCH=$(ARCH) CC=$(CC) CROSS_COMPILE=$(CROSS_COMPILE) olddefconfig < /dev/null; \ + $(KBUILD_MAKE) olddefconfig < /dev/null; \ fi kernel: - $(Q)$(MAKE) -C $(srctree) ARCH=$(ARCH) CC=$(CC) CROSS_COMPILE=$(CROSS_COMPILE) $(IMAGE_NAME) < /dev/null + $(Q)$(KBUILD_MAKE) $(IMAGE_NAME) < /dev/null kernel-standalone: initramfs - $(Q)$(MAKE) -C $(srctree) ARCH=$(ARCH) CC=$(CC) CROSS_COMPILE=$(CROSS_COMPILE) $(IMAGE_NAME) CONFIG_INITRAMFS_SOURCE=$(CURDIR)/initramfs < /dev/null + $(Q)$(KBUILD_MAKE) $(IMAGE_NAME) CONFIG_INITRAMFS_SOURCE=$(CURDIR)/initramfs < /dev/null # run the tests after building the kernel run: kernel initramfs.cpio diff --git a/tools/testing/selftests/nolibc/nolibc-test.c b/tools/testing/selftests/nolibc/nolibc-test.c index a297ee0d6d07..29de21595fc9 100644 --- a/tools/testing/selftests/nolibc/nolibc-test.c +++ b/tools/testing/selftests/nolibc/nolibc-test.c @@ -196,8 +196,8 @@ int expect_zr(int expr, int llen) } -#define EXPECT_NZ(cond, expr, val) \ - do { if (!(cond)) result(llen, SKIPPED); else ret += expect_nz(expr, llen; } while (0) +#define EXPECT_NZ(cond, expr) \ + do { if (!(cond)) result(llen, SKIPPED); else ret += expect_nz(expr, llen); } while (0) static __attribute__((unused)) int expect_nz(int expr, int llen) @@ -686,7 +686,6 @@ int expect_strtox(int llen, void *func, const char *input, int base, intmax_t ex #define CASE_TEST(name) \ case __LINE__: llen += printf("%d %s", test, #name); -/* constructors validate that they are executed in definition order */ __attribute__((constructor)) static void constructor1(void) { @@ -1334,6 +1333,7 @@ int run_syscall(int min, int max) CASE_TEST(chroot_root); EXPECT_SYSZR(euid0, chroot("/")); break; CASE_TEST(chroot_blah); EXPECT_SYSER(1, chroot("/proc/self/blah"), -1, ENOENT); break; CASE_TEST(chroot_exe); EXPECT_SYSER(1, chroot(argv0), -1, ENOTDIR); break; + CASE_TEST(clock_nanosleep); ts.tv_nsec = -1; EXPECT_EQ(1, EINVAL, clock_nanosleep(CLOCK_REALTIME, 0, &ts, NULL)); break; CASE_TEST(close_m1); EXPECT_SYSER(1, close(-1), -1, EBADF); break; CASE_TEST(close_dup); EXPECT_SYSZR(1, close(dup(0))); break; CASE_TEST(dup_0); tmp = dup(0); EXPECT_SYSNE(1, tmp, -1); close(tmp); break; diff --git a/tools/testing/selftests/perf_events/watermark_signal.c b/tools/testing/selftests/perf_events/watermark_signal.c index e03fe1b9bba2..b3a72f0ac522 100644 --- a/tools/testing/selftests/perf_events/watermark_signal.c +++ b/tools/testing/selftests/perf_events/watermark_signal.c @@ -17,8 +17,6 @@ #include "../kselftest_harness.h" -#define __maybe_unused __attribute__((__unused__)) - static int sigio_count; static void handle_sigio(int signum __maybe_unused, diff --git a/tools/testing/selftests/pidfd/config b/tools/testing/selftests/pidfd/config index 6133524710f7..cf7cc0ce0248 100644 --- a/tools/testing/selftests/pidfd/config +++ b/tools/testing/selftests/pidfd/config @@ -4,6 +4,5 @@ CONFIG_USER_NS=y CONFIG_PID_NS=y CONFIG_NET_NS=y CONFIG_TIME_NS=y -CONFIG_GENERIC_VDSO_TIME_NS=y CONFIG_CGROUPS=y CONFIG_CHECKPOINT_RESTORE=y diff --git a/tools/testing/selftests/powerpc/include/instructions.h b/tools/testing/selftests/powerpc/include/instructions.h index 4efa6314bd96..864f0c9f1afc 100644 --- a/tools/testing/selftests/powerpc/include/instructions.h +++ b/tools/testing/selftests/powerpc/include/instructions.h @@ -67,7 +67,7 @@ static inline int paste_last(void *i) #define PPC_INST_PASTE_LAST __PASTE(0, 0, 1, 1) /* This defines the prefixed load/store instructions */ -#ifdef __ASSEMBLY__ +#ifdef __ASSEMBLER__ # define stringify_in_c(...) __VA_ARGS__ #else # define __stringify_in_c(...) #__VA_ARGS__ diff --git a/tools/testing/selftests/proc/.gitignore b/tools/testing/selftests/proc/.gitignore index 243f4537a670..9c9735570abf 100644 --- a/tools/testing/selftests/proc/.gitignore +++ b/tools/testing/selftests/proc/.gitignore @@ -19,6 +19,7 @@ /proc-tid0 /proc-uptime-001 /proc-uptime-002 +/proc-pidns /read /self /setns-dcache diff --git a/tools/testing/selftests/proc/Makefile b/tools/testing/selftests/proc/Makefile index 2a9547630115..a7de2bb6d8be 100644 --- a/tools/testing/selftests/proc/Makefile +++ b/tools/testing/selftests/proc/Makefile @@ -29,5 +29,6 @@ TEST_GEN_PROGS += setns-sysvipc TEST_GEN_PROGS += thread-self TEST_GEN_PROGS += proc-multiple-procfs TEST_GEN_PROGS += proc-fsconfig-hidepid +TEST_GEN_PROGS += proc-pidns include ../lib.mk diff --git a/tools/testing/selftests/proc/proc-maps-race.c b/tools/testing/selftests/proc/proc-maps-race.c index 94bba4553130..a546475db550 100644 --- a/tools/testing/selftests/proc/proc-maps-race.c +++ b/tools/testing/selftests/proc/proc-maps-race.c @@ -32,6 +32,8 @@ #include <stdlib.h> #include <string.h> #include <unistd.h> +#include <linux/fs.h> +#include <sys/ioctl.h> #include <sys/mman.h> #include <sys/stat.h> #include <sys/types.h> @@ -317,6 +319,25 @@ static bool capture_mod_pattern(FIXTURE_DATA(proc_maps_race) *self, strcmp(restored_first_line->text, self->first_line.text) == 0; } +static bool query_addr_at(int maps_fd, void *addr, + unsigned long *vma_start, unsigned long *vma_end) +{ + struct procmap_query q; + + memset(&q, 0, sizeof(q)); + q.size = sizeof(q); + /* Find the VMA at the split address */ + q.query_addr = (unsigned long long)addr; + q.query_flags = 0; + if (ioctl(maps_fd, PROCMAP_QUERY, &q)) + return false; + + *vma_start = q.vma_start; + *vma_end = q.vma_end; + + return true; +} + static inline bool split_vma(FIXTURE_DATA(proc_maps_race) *self) { return mmap(self->mod_info->addr, self->page_size, self->mod_info->prot | PROT_EXEC, @@ -559,6 +580,8 @@ TEST_F(proc_maps_race, test_maps_tearing_from_split) do { bool last_line_changed; bool first_line_changed; + unsigned long vma_start; + unsigned long vma_end; ASSERT_TRUE(read_boundary_lines(self, &new_last_line, &new_first_line)); @@ -595,6 +618,19 @@ TEST_F(proc_maps_race, test_maps_tearing_from_split) first_line_changed = strcmp(new_first_line.text, self->first_line.text) != 0; ASSERT_EQ(last_line_changed, first_line_changed); + /* Check if PROCMAP_QUERY ioclt() finds the right VMA */ + ASSERT_TRUE(query_addr_at(self->maps_fd, mod_info->addr + self->page_size, + &vma_start, &vma_end)); + /* + * The vma at the split address can be either the same as + * original one (if read before the split) or the same as the + * first line in the second page (if read after the split). + */ + ASSERT_TRUE((vma_start == self->last_line.start_addr && + vma_end == self->last_line.end_addr) || + (vma_start == split_first_line.start_addr && + vma_end == split_first_line.end_addr)); + clock_gettime(CLOCK_MONOTONIC_COARSE, &end_ts); end_test_iteration(&end_ts, self->verbose); } while (end_ts.tv_sec - start_ts.tv_sec < self->duration_sec); @@ -636,6 +672,9 @@ TEST_F(proc_maps_race, test_maps_tearing_from_resize) clock_gettime(CLOCK_MONOTONIC_COARSE, &start_ts); start_test_loop(&start_ts, self->verbose); do { + unsigned long vma_start; + unsigned long vma_end; + ASSERT_TRUE(read_boundary_lines(self, &new_last_line, &new_first_line)); /* Check if we read vmas after shrinking it */ @@ -662,6 +701,16 @@ TEST_F(proc_maps_race, test_maps_tearing_from_resize) "Expand result invalid", self)); } + /* Check if PROCMAP_QUERY ioclt() finds the right VMA */ + ASSERT_TRUE(query_addr_at(self->maps_fd, mod_info->addr, &vma_start, &vma_end)); + /* + * The vma should stay at the same address and have either the + * original size of 3 pages or 1 page if read after shrinking. + */ + ASSERT_TRUE(vma_start == self->last_line.start_addr && + (vma_end - vma_start == self->page_size * 3 || + vma_end - vma_start == self->page_size)); + clock_gettime(CLOCK_MONOTONIC_COARSE, &end_ts); end_test_iteration(&end_ts, self->verbose); } while (end_ts.tv_sec - start_ts.tv_sec < self->duration_sec); @@ -703,6 +752,9 @@ TEST_F(proc_maps_race, test_maps_tearing_from_remap) clock_gettime(CLOCK_MONOTONIC_COARSE, &start_ts); start_test_loop(&start_ts, self->verbose); do { + unsigned long vma_start; + unsigned long vma_end; + ASSERT_TRUE(read_boundary_lines(self, &new_last_line, &new_first_line)); /* Check if we read vmas after remapping it */ @@ -729,6 +781,19 @@ TEST_F(proc_maps_race, test_maps_tearing_from_remap) "Remap restore result invalid", self)); } + /* Check if PROCMAP_QUERY ioclt() finds the right VMA */ + ASSERT_TRUE(query_addr_at(self->maps_fd, mod_info->addr + self->page_size, + &vma_start, &vma_end)); + /* + * The vma should either stay at the same address and have the + * original size of 3 pages or we should find the remapped vma + * at the remap destination address with size of 1 page. + */ + ASSERT_TRUE((vma_start == self->last_line.start_addr && + vma_end - vma_start == self->page_size * 3) || + (vma_start == self->last_line.start_addr + self->page_size && + vma_end - vma_start == self->page_size)); + clock_gettime(CLOCK_MONOTONIC_COARSE, &end_ts); end_test_iteration(&end_ts, self->verbose); } while (end_ts.tv_sec - start_ts.tv_sec < self->duration_sec); diff --git a/tools/testing/selftests/proc/proc-pidns.c b/tools/testing/selftests/proc/proc-pidns.c new file mode 100644 index 000000000000..52500597f951 --- /dev/null +++ b/tools/testing/selftests/proc/proc-pidns.c @@ -0,0 +1,211 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Author: Aleksa Sarai <cyphar@cyphar.com> + * Copyright (C) 2025 SUSE LLC. + */ + +#include <assert.h> +#include <errno.h> +#include <sched.h> +#include <stdbool.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <stdio.h> +#include <sys/mount.h> +#include <sys/stat.h> +#include <sys/prctl.h> + +#include "../kselftest_harness.h" + +#define ASSERT_ERRNO(expected, _t, seen) \ + __EXPECT(expected, #expected, \ + ({__typeof__(seen) _tmp_seen = (seen); \ + _tmp_seen >= 0 ? _tmp_seen : -errno; }), #seen, _t, 1) + +#define ASSERT_ERRNO_EQ(expected, seen) \ + ASSERT_ERRNO(expected, ==, seen) + +#define ASSERT_SUCCESS(seen) \ + ASSERT_ERRNO(0, <=, seen) + +static int touch(char *path) +{ + int fd = open(path, O_WRONLY|O_CREAT|O_CLOEXEC, 0644); + if (fd < 0) + return -1; + return close(fd); +} + +FIXTURE(ns) +{ + int host_mntns, host_pidns; + int dummy_pidns; +}; + +FIXTURE_SETUP(ns) +{ + /* Stash the old mntns. */ + self->host_mntns = open("/proc/self/ns/mnt", O_RDONLY|O_CLOEXEC); + ASSERT_SUCCESS(self->host_mntns); + + /* Create a new mount namespace and make it private. */ + ASSERT_SUCCESS(unshare(CLONE_NEWNS)); + ASSERT_SUCCESS(mount(NULL, "/", NULL, MS_PRIVATE|MS_REC, NULL)); + + /* + * Create a proper tmpfs that we can use and will disappear once we + * leave this mntns. + */ + ASSERT_SUCCESS(mount("tmpfs", "/tmp", "tmpfs", 0, NULL)); + + /* + * Create a pidns we can use for later tests. We need to fork off a + * child so that we get a usable nsfd that we can bind-mount and open. + */ + ASSERT_SUCCESS(mkdir("/tmp/dummy", 0755)); + ASSERT_SUCCESS(touch("/tmp/dummy/pidns")); + ASSERT_SUCCESS(mkdir("/tmp/dummy/proc", 0755)); + + self->host_pidns = open("/proc/self/ns/pid", O_RDONLY|O_CLOEXEC); + ASSERT_SUCCESS(self->host_pidns); + ASSERT_SUCCESS(unshare(CLONE_NEWPID)); + + pid_t pid = fork(); + ASSERT_SUCCESS(pid); + if (!pid) { + prctl(PR_SET_PDEATHSIG, SIGKILL); + ASSERT_SUCCESS(mount("/proc/self/ns/pid", "/tmp/dummy/pidns", NULL, MS_BIND, NULL)); + ASSERT_SUCCESS(mount("proc", "/tmp/dummy/proc", "proc", 0, NULL)); + exit(0); + } + + int wstatus; + ASSERT_EQ(waitpid(pid, &wstatus, 0), pid); + ASSERT_TRUE(WIFEXITED(wstatus)); + ASSERT_EQ(WEXITSTATUS(wstatus), 0); + + ASSERT_SUCCESS(setns(self->host_pidns, CLONE_NEWPID)); + + self->dummy_pidns = open("/tmp/dummy/pidns", O_RDONLY|O_CLOEXEC); + ASSERT_SUCCESS(self->dummy_pidns); +} + +FIXTURE_TEARDOWN(ns) +{ + ASSERT_SUCCESS(setns(self->host_mntns, CLONE_NEWNS)); + ASSERT_SUCCESS(close(self->host_mntns)); + + ASSERT_SUCCESS(close(self->host_pidns)); + ASSERT_SUCCESS(close(self->dummy_pidns)); +} + +TEST_F(ns, pidns_mount_string_path) +{ + ASSERT_SUCCESS(mkdir("/tmp/proc-host", 0755)); + ASSERT_SUCCESS(mount("proc", "/tmp/proc-host", "proc", 0, "pidns=/proc/self/ns/pid")); + ASSERT_SUCCESS(access("/tmp/proc-host/self/", X_OK)); + + ASSERT_SUCCESS(mkdir("/tmp/proc-dummy", 0755)); + ASSERT_SUCCESS(mount("proc", "/tmp/proc-dummy", "proc", 0, "pidns=/tmp/dummy/pidns")); + ASSERT_ERRNO_EQ(-ENOENT, access("/tmp/proc-dummy/1/", X_OK)); + ASSERT_ERRNO_EQ(-ENOENT, access("/tmp/proc-dummy/self/", X_OK)); +} + +TEST_F(ns, pidns_fsconfig_string_path) +{ + int fsfd = fsopen("proc", FSOPEN_CLOEXEC); + ASSERT_SUCCESS(fsfd); + + ASSERT_SUCCESS(fsconfig(fsfd, FSCONFIG_SET_STRING, "pidns", "/tmp/dummy/pidns", 0)); + ASSERT_SUCCESS(fsconfig(fsfd, FSCONFIG_CMD_CREATE, NULL, NULL, 0)); + + int mountfd = fsmount(fsfd, FSMOUNT_CLOEXEC, 0); + ASSERT_SUCCESS(mountfd); + + ASSERT_ERRNO_EQ(-ENOENT, faccessat(mountfd, "1/", X_OK, 0)); + ASSERT_ERRNO_EQ(-ENOENT, faccessat(mountfd, "self/", X_OK, 0)); + + ASSERT_SUCCESS(close(fsfd)); + ASSERT_SUCCESS(close(mountfd)); +} + +TEST_F(ns, pidns_fsconfig_fd) +{ + int fsfd = fsopen("proc", FSOPEN_CLOEXEC); + ASSERT_SUCCESS(fsfd); + + ASSERT_SUCCESS(fsconfig(fsfd, FSCONFIG_SET_FD, "pidns", NULL, self->dummy_pidns)); + ASSERT_SUCCESS(fsconfig(fsfd, FSCONFIG_CMD_CREATE, NULL, NULL, 0)); + + int mountfd = fsmount(fsfd, FSMOUNT_CLOEXEC, 0); + ASSERT_SUCCESS(mountfd); + + ASSERT_ERRNO_EQ(-ENOENT, faccessat(mountfd, "1/", X_OK, 0)); + ASSERT_ERRNO_EQ(-ENOENT, faccessat(mountfd, "self/", X_OK, 0)); + + ASSERT_SUCCESS(close(fsfd)); + ASSERT_SUCCESS(close(mountfd)); +} + +TEST_F(ns, pidns_reconfigure_remount) +{ + ASSERT_SUCCESS(mkdir("/tmp/proc", 0755)); + ASSERT_SUCCESS(mount("proc", "/tmp/proc", "proc", 0, "")); + + ASSERT_SUCCESS(access("/tmp/proc/1/", X_OK)); + ASSERT_SUCCESS(access("/tmp/proc/self/", X_OK)); + + ASSERT_ERRNO_EQ(-EBUSY, mount(NULL, "/tmp/proc", NULL, MS_REMOUNT, "pidns=/tmp/dummy/pidns")); + + ASSERT_SUCCESS(access("/tmp/proc/1/", X_OK)); + ASSERT_SUCCESS(access("/tmp/proc/self/", X_OK)); +} + +TEST_F(ns, pidns_reconfigure_fsconfig_string_path) +{ + int fsfd = fsopen("proc", FSOPEN_CLOEXEC); + ASSERT_SUCCESS(fsfd); + + ASSERT_SUCCESS(fsconfig(fsfd, FSCONFIG_CMD_CREATE, NULL, NULL, 0)); + + int mountfd = fsmount(fsfd, FSMOUNT_CLOEXEC, 0); + ASSERT_SUCCESS(mountfd); + + ASSERT_SUCCESS(faccessat(mountfd, "1/", X_OK, 0)); + ASSERT_SUCCESS(faccessat(mountfd, "self/", X_OK, 0)); + + ASSERT_ERRNO_EQ(-EBUSY, fsconfig(fsfd, FSCONFIG_SET_STRING, "pidns", "/tmp/dummy/pidns", 0)); + ASSERT_SUCCESS(fsconfig(fsfd, FSCONFIG_CMD_RECONFIGURE, NULL, NULL, 0)); /* noop */ + + ASSERT_SUCCESS(faccessat(mountfd, "1/", X_OK, 0)); + ASSERT_SUCCESS(faccessat(mountfd, "self/", X_OK, 0)); + + ASSERT_SUCCESS(close(fsfd)); + ASSERT_SUCCESS(close(mountfd)); +} + +TEST_F(ns, pidns_reconfigure_fsconfig_fd) +{ + int fsfd = fsopen("proc", FSOPEN_CLOEXEC); + ASSERT_SUCCESS(fsfd); + + ASSERT_SUCCESS(fsconfig(fsfd, FSCONFIG_CMD_CREATE, NULL, NULL, 0)); + + int mountfd = fsmount(fsfd, FSMOUNT_CLOEXEC, 0); + ASSERT_SUCCESS(mountfd); + + ASSERT_SUCCESS(faccessat(mountfd, "1/", X_OK, 0)); + ASSERT_SUCCESS(faccessat(mountfd, "self/", X_OK, 0)); + + ASSERT_ERRNO_EQ(-EBUSY, fsconfig(fsfd, FSCONFIG_SET_FD, "pidns", NULL, self->dummy_pidns)); + ASSERT_SUCCESS(fsconfig(fsfd, FSCONFIG_CMD_RECONFIGURE, NULL, NULL, 0)); /* noop */ + + ASSERT_SUCCESS(faccessat(mountfd, "1/", X_OK, 0)); + ASSERT_SUCCESS(faccessat(mountfd, "self/", X_OK, 0)); + + ASSERT_SUCCESS(close(fsfd)); + ASSERT_SUCCESS(close(mountfd)); +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/riscv/README b/tools/testing/selftests/riscv/README new file mode 100644 index 000000000000..443da395da68 --- /dev/null +++ b/tools/testing/selftests/riscv/README @@ -0,0 +1,24 @@ +KSelfTest RISC-V +================ + +- These tests are riscv specific and so not built or run but just skipped + completely when env-variable ARCH is found to be different than 'riscv'. + +- Holding true the above, RISC-V KSFT tests can be run within the + KSelfTest framework using standard Linux top-level-makefile targets: + + $ make TARGETS=riscv kselftest-clean + $ make TARGETS=riscv kselftest + + or + + $ make -C tools/testing/selftests TARGETS=riscv \ + INSTALL_PATH=<your-installation-path> install + + or, alternatively, only specific riscv/ subtargets can be picked: + + $ make -C tools/testing/selftests TARGETS=riscv RISCV_SUBTARGETS="mm vector" \ + INSTALL_PATH=<your-installation-path> install + + Further details on building and running KSFT can be found in: + Documentation/dev-tools/kselftest.rst diff --git a/tools/testing/selftests/rseq/rseq.c b/tools/testing/selftests/rseq/rseq.c index 663a9cef1952..dcac5cbe7933 100644 --- a/tools/testing/selftests/rseq/rseq.c +++ b/tools/testing/selftests/rseq/rseq.c @@ -40,9 +40,9 @@ * Define weak versions to play nice with binaries that are statically linked * against a libc that doesn't support registering its own rseq. */ -__weak ptrdiff_t __rseq_offset; -__weak unsigned int __rseq_size; -__weak unsigned int __rseq_flags; +extern __weak ptrdiff_t __rseq_offset; +extern __weak unsigned int __rseq_size; +extern __weak unsigned int __rseq_flags; static const ptrdiff_t *libc_rseq_offset_p = &__rseq_offset; static const unsigned int *libc_rseq_size_p = &__rseq_size; @@ -209,7 +209,7 @@ void rseq_init(void) * libc not having registered a restartable sequence. Try to find the * symbols if that's the case. */ - if (!*libc_rseq_size_p) { + if (!libc_rseq_size_p || !*libc_rseq_size_p) { libc_rseq_offset_p = dlsym(RTLD_NEXT, "__rseq_offset"); libc_rseq_size_p = dlsym(RTLD_NEXT, "__rseq_size"); libc_rseq_flags_p = dlsym(RTLD_NEXT, "__rseq_flags"); diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c index 61acbd45ffaa..874f17763536 100644 --- a/tools/testing/selftests/seccomp/seccomp_bpf.c +++ b/tools/testing/selftests/seccomp/seccomp_bpf.c @@ -24,6 +24,7 @@ #include <linux/filter.h> #include <sys/prctl.h> #include <sys/ptrace.h> +#include <sys/time.h> #include <sys/user.h> #include <linux/prctl.h> #include <linux/ptrace.h> @@ -73,6 +74,14 @@ #define noinline __attribute__((noinline)) #endif +#ifndef __nocf_check +#define __nocf_check __attribute__((nocf_check)) +#endif + +#ifndef __naked +#define __naked __attribute__((__naked__)) +#endif + #ifndef PR_SET_NO_NEW_PRIVS #define PR_SET_NO_NEW_PRIVS 38 #define PR_GET_NO_NEW_PRIVS 39 @@ -3547,6 +3556,10 @@ static void signal_handler(int signal) perror("write from signal"); } +static void signal_handler_nop(int signal) +{ +} + TEST(user_notification_signal) { pid_t pid; @@ -4819,6 +4832,132 @@ TEST(user_notification_wait_killable_fatal) EXPECT_EQ(SIGTERM, WTERMSIG(status)); } +/* Ensure signals after the reply do not interrupt */ +TEST(user_notification_wait_killable_after_reply) +{ + int i, max_iter = 100000; + int listener, status; + int pipe_fds[2]; + pid_t pid; + long ret; + + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret) + { + TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); + } + + listener = user_notif_syscall( + __NR_dup, SECCOMP_FILTER_FLAG_NEW_LISTENER | + SECCOMP_FILTER_FLAG_WAIT_KILLABLE_RECV); + ASSERT_GE(listener, 0); + + /* + * Used to count invocations. One token is transferred from the child + * to the parent per syscall invocation, the parent tries to take + * one token per successful RECV. If the syscall is restarted after + * RECV the parent will try to get two tokens while the child only + * provided one. + */ + ASSERT_EQ(pipe(pipe_fds), 0); + + pid = fork(); + ASSERT_GE(pid, 0); + + if (pid == 0) { + struct sigaction new_action = { + .sa_handler = signal_handler_nop, + .sa_flags = SA_RESTART, + }; + struct itimerval timer = { + .it_value = { .tv_usec = 1000 }, + .it_interval = { .tv_usec = 1000 }, + }; + char c = 'a'; + + close(pipe_fds[0]); + + /* Setup the sigaction with SA_RESTART */ + if (sigaction(SIGALRM, &new_action, NULL)) { + perror("sigaction"); + exit(1); + } + + /* + * Kill with SIGALRM repeatedly, to try to hit the race when + * handling the syscall. + */ + if (setitimer(ITIMER_REAL, &timer, NULL) < 0) + perror("setitimer"); + + for (i = 0; i < max_iter; ++i) { + int fd; + + /* Send one token per iteration to catch repeats. */ + if (write(pipe_fds[1], &c, sizeof(c)) != 1) { + perror("write"); + exit(1); + } + + fd = syscall(__NR_dup, 0); + if (fd < 0) { + perror("dup"); + exit(1); + } + close(fd); + } + + exit(0); + } + + close(pipe_fds[1]); + + for (i = 0; i < max_iter; ++i) { + struct seccomp_notif req = {}; + struct seccomp_notif_addfd addfd = {}; + struct pollfd pfd = { + .fd = pipe_fds[0], + .events = POLLIN, + }; + char c; + + /* + * Try to receive one token. If it failed, one child syscall + * was restarted after RECV and needed to be handled twice. + */ + ASSERT_EQ(poll(&pfd, 1, 1000), 1) + kill(pid, SIGKILL); + + ASSERT_EQ(read(pipe_fds[0], &c, sizeof(c)), 1) + kill(pid, SIGKILL); + + /* + * Get the notification, reply to it as fast as possible to test + * whether the child wrongly skips going into the non-preemptible + * (TASK_KILLABLE) state. + */ + do + ret = ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req); + while (ret < 0 && errno == ENOENT); /* Accept interruptions before RECV */ + ASSERT_EQ(ret, 0) + kill(pid, SIGKILL); + + addfd.id = req.id; + addfd.flags = SECCOMP_ADDFD_FLAG_SEND; + addfd.srcfd = 0; + ASSERT_GE(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd), 0) + kill(pid, SIGKILL); + } + + /* + * Wait for the process to exit, and make sure the process terminated + * with a zero exit code.. + */ + EXPECT_EQ(waitpid(pid, &status, 0), pid); + EXPECT_EQ(true, WIFEXITED(status)); + EXPECT_EQ(0, WEXITSTATUS(status)); +} + struct tsync_vs_thread_leader_args { pthread_t leader; }; @@ -4896,7 +5035,36 @@ TEST(tsync_vs_dead_thread_leader) EXPECT_EQ(0, status); } -noinline int probed(void) +#ifdef __x86_64__ + +/* + * We need naked probed_uprobe function. Using __nocf_check + * check to skip possible endbr64 instruction and ignoring + * -Wattributes, otherwise the compilation might fail. + */ +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wattributes" + +__naked __nocf_check noinline int probed_uprobe(void) +{ + /* + * Optimized uprobe is possible only on top of nop5 instruction. + */ + asm volatile (" \n" + ".byte 0x0f, 0x1f, 0x44, 0x00, 0x00 \n" + "ret \n" + ); +} +#pragma GCC diagnostic pop + +#else +noinline int probed_uprobe(void) +{ + return 1; +} +#endif + +noinline int probed_uretprobe(void) { return 1; } @@ -4949,35 +5117,46 @@ static ssize_t get_uprobe_offset(const void *addr) return found ? (uintptr_t)addr - start + base : -1; } -FIXTURE(URETPROBE) { +FIXTURE(UPROBE) { int fd; }; -FIXTURE_VARIANT(URETPROBE) { +FIXTURE_VARIANT(UPROBE) { /* - * All of the URETPROBE behaviors can be tested with either - * uretprobe attached or not + * All of the U(RET)PROBE behaviors can be tested with either + * u(ret)probe attached or not */ bool attach; + /* + * Test both uprobe and uretprobe. + */ + bool uretprobe; +}; + +FIXTURE_VARIANT_ADD(UPROBE, not_attached) { + .attach = false, + .uretprobe = false, }; -FIXTURE_VARIANT_ADD(URETPROBE, attached) { +FIXTURE_VARIANT_ADD(UPROBE, uprobe_attached) { .attach = true, + .uretprobe = false, }; -FIXTURE_VARIANT_ADD(URETPROBE, not_attached) { - .attach = false, +FIXTURE_VARIANT_ADD(UPROBE, uretprobe_attached) { + .attach = true, + .uretprobe = true, }; -FIXTURE_SETUP(URETPROBE) +FIXTURE_SETUP(UPROBE) { const size_t attr_sz = sizeof(struct perf_event_attr); struct perf_event_attr attr; ssize_t offset; int type, bit; -#ifndef __NR_uretprobe - SKIP(return, "__NR_uretprobe syscall not defined"); +#if !defined(__NR_uprobe) || !defined(__NR_uretprobe) + SKIP(return, "__NR_uprobe ot __NR_uretprobe syscalls not defined"); #endif if (!variant->attach) @@ -4987,12 +5166,17 @@ FIXTURE_SETUP(URETPROBE) type = determine_uprobe_perf_type(); ASSERT_GE(type, 0); - bit = determine_uprobe_retprobe_bit(); - ASSERT_GE(bit, 0); - offset = get_uprobe_offset(probed); + + if (variant->uretprobe) { + bit = determine_uprobe_retprobe_bit(); + ASSERT_GE(bit, 0); + } + + offset = get_uprobe_offset(variant->uretprobe ? probed_uretprobe : probed_uprobe); ASSERT_GE(offset, 0); - attr.config |= 1 << bit; + if (variant->uretprobe) + attr.config |= 1 << bit; attr.size = attr_sz; attr.type = type; attr.config1 = ptr_to_u64("/proc/self/exe"); @@ -5003,7 +5187,7 @@ FIXTURE_SETUP(URETPROBE) PERF_FLAG_FD_CLOEXEC); } -FIXTURE_TEARDOWN(URETPROBE) +FIXTURE_TEARDOWN(UPROBE) { /* we could call close(self->fd), but we'd need extra filter for * that and since we are calling _exit right away.. @@ -5017,11 +5201,17 @@ static int run_probed_with_filter(struct sock_fprog *prog) return -1; } - probed(); + /* + * Uprobe is optimized after first hit, so let's hit twice. + */ + probed_uprobe(); + probed_uprobe(); + + probed_uretprobe(); return 0; } -TEST_F(URETPROBE, uretprobe_default_allow) +TEST_F(UPROBE, uprobe_default_allow) { struct sock_filter filter[] = { BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), @@ -5034,7 +5224,7 @@ TEST_F(URETPROBE, uretprobe_default_allow) ASSERT_EQ(0, run_probed_with_filter(&prog)); } -TEST_F(URETPROBE, uretprobe_default_block) +TEST_F(UPROBE, uprobe_default_block) { struct sock_filter filter[] = { BPF_STMT(BPF_LD|BPF_W|BPF_ABS, @@ -5051,11 +5241,14 @@ TEST_F(URETPROBE, uretprobe_default_block) ASSERT_EQ(0, run_probed_with_filter(&prog)); } -TEST_F(URETPROBE, uretprobe_block_uretprobe_syscall) +TEST_F(UPROBE, uprobe_block_syscall) { struct sock_filter filter[] = { BPF_STMT(BPF_LD|BPF_W|BPF_ABS, offsetof(struct seccomp_data, nr)), +#ifdef __NR_uprobe + BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_uprobe, 1, 2), +#endif #ifdef __NR_uretprobe BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_uretprobe, 0, 1), #endif @@ -5070,11 +5263,14 @@ TEST_F(URETPROBE, uretprobe_block_uretprobe_syscall) ASSERT_EQ(0, run_probed_with_filter(&prog)); } -TEST_F(URETPROBE, uretprobe_default_block_with_uretprobe_syscall) +TEST_F(UPROBE, uprobe_default_block_with_syscall) { struct sock_filter filter[] = { BPF_STMT(BPF_LD|BPF_W|BPF_ABS, offsetof(struct seccomp_data, nr)), +#ifdef __NR_uprobe + BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_uprobe, 3, 0), +#endif #ifdef __NR_uretprobe BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_uretprobe, 2, 0), #endif diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/police.json b/tools/testing/selftests/tc-testing/tc-tests/actions/police.json index 5596f4df0e9f..b2cc6ea74450 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/actions/police.json +++ b/tools/testing/selftests/tc-testing/tc-tests/actions/police.json @@ -879,7 +879,7 @@ "cmdUnderTest": "$TC actions add action police pkts_rate 1000 pkts_burst 200 index 1", "expExitCode": "0", "verifyCmd": "$TC actions ls action police", - "matchPattern": "action order [0-9]*: police 0x1 rate 0bit burst 0b mtu 4096Mb pkts_rate 1000 pkts_burst 200", + "matchPattern": "action order [0-9]*: police 0x1 rate 0bit burst 0b mtu (4Gb|4096Mb) pkts_rate 1000 pkts_burst 200", "matchCount": "1", "teardown": [ "$TC actions flush action police" diff --git a/tools/testing/selftests/thermal/intel/workload_hint/workload_hint_test.c b/tools/testing/selftests/thermal/intel/workload_hint/workload_hint_test.c index ba58589a1145..ca2bd03154e4 100644 --- a/tools/testing/selftests/thermal/intel/workload_hint/workload_hint_test.c +++ b/tools/testing/selftests/thermal/intel/workload_hint/workload_hint_test.c @@ -144,6 +144,8 @@ int main(int argc, char **argv) ret = sscanf(index_str, "%d", &index); if (ret < 0) break; + + index &= 0x0f; if (index > WORKLOAD_TYPE_MAX_INDEX) printf("Invalid workload type index\n"); else diff --git a/tools/testing/selftests/ublk/Makefile b/tools/testing/selftests/ublk/Makefile index 5d7f4ecfb816..770269efe42a 100644 --- a/tools/testing/selftests/ublk/Makefile +++ b/tools/testing/selftests/ublk/Makefile @@ -20,6 +20,7 @@ TEST_PROGS += test_generic_09.sh TEST_PROGS += test_generic_10.sh TEST_PROGS += test_generic_11.sh TEST_PROGS += test_generic_12.sh +TEST_PROGS += test_generic_13.sh TEST_PROGS += test_null_01.sh TEST_PROGS += test_null_02.sh diff --git a/tools/testing/selftests/ublk/kublk.c b/tools/testing/selftests/ublk/kublk.c index b71faba86c3b..6b8123c12a7a 100644 --- a/tools/testing/selftests/ublk/kublk.c +++ b/tools/testing/selftests/ublk/kublk.c @@ -1384,21 +1384,23 @@ static int cmd_dev_list(struct dev_ctx *ctx) static int cmd_dev_get_features(void) { #define const_ilog2(x) (63 - __builtin_clzll(x)) +#define FEAT_NAME(f) [const_ilog2(f)] = #f static const char *feat_map[] = { - [const_ilog2(UBLK_F_SUPPORT_ZERO_COPY)] = "ZERO_COPY", - [const_ilog2(UBLK_F_URING_CMD_COMP_IN_TASK)] = "COMP_IN_TASK", - [const_ilog2(UBLK_F_NEED_GET_DATA)] = "GET_DATA", - [const_ilog2(UBLK_F_USER_RECOVERY)] = "USER_RECOVERY", - [const_ilog2(UBLK_F_USER_RECOVERY_REISSUE)] = "RECOVERY_REISSUE", - [const_ilog2(UBLK_F_UNPRIVILEGED_DEV)] = "UNPRIVILEGED_DEV", - [const_ilog2(UBLK_F_CMD_IOCTL_ENCODE)] = "CMD_IOCTL_ENCODE", - [const_ilog2(UBLK_F_USER_COPY)] = "USER_COPY", - [const_ilog2(UBLK_F_ZONED)] = "ZONED", - [const_ilog2(UBLK_F_USER_RECOVERY_FAIL_IO)] = "RECOVERY_FAIL_IO", - [const_ilog2(UBLK_F_UPDATE_SIZE)] = "UPDATE_SIZE", - [const_ilog2(UBLK_F_AUTO_BUF_REG)] = "AUTO_BUF_REG", - [const_ilog2(UBLK_F_QUIESCE)] = "QUIESCE", - [const_ilog2(UBLK_F_PER_IO_DAEMON)] = "PER_IO_DAEMON", + FEAT_NAME(UBLK_F_SUPPORT_ZERO_COPY), + FEAT_NAME(UBLK_F_URING_CMD_COMP_IN_TASK), + FEAT_NAME(UBLK_F_NEED_GET_DATA), + FEAT_NAME(UBLK_F_USER_RECOVERY), + FEAT_NAME(UBLK_F_USER_RECOVERY_REISSUE), + FEAT_NAME(UBLK_F_UNPRIVILEGED_DEV), + FEAT_NAME(UBLK_F_CMD_IOCTL_ENCODE), + FEAT_NAME(UBLK_F_USER_COPY), + FEAT_NAME(UBLK_F_ZONED), + FEAT_NAME(UBLK_F_USER_RECOVERY_FAIL_IO), + FEAT_NAME(UBLK_F_UPDATE_SIZE), + FEAT_NAME(UBLK_F_AUTO_BUF_REG), + FEAT_NAME(UBLK_F_QUIESCE), + FEAT_NAME(UBLK_F_PER_IO_DAEMON), + FEAT_NAME(UBLK_F_BUF_REG_OFF_DAEMON), }; struct ublk_dev *dev; __u64 features = 0; @@ -1425,7 +1427,7 @@ static int cmd_dev_get_features(void) feat = feat_map[i]; else feat = "unknown"; - printf("\t%-20s: 0x%llx\n", feat, 1ULL << i); + printf("0x%-16llx: %s\n", 1ULL << i, feat); } } diff --git a/tools/testing/selftests/ublk/test_generic_01.sh b/tools/testing/selftests/ublk/test_generic_01.sh index 9227a208ba53..21a31cd5491a 100755 --- a/tools/testing/selftests/ublk/test_generic_01.sh +++ b/tools/testing/selftests/ublk/test_generic_01.sh @@ -10,6 +10,10 @@ if ! _have_program bpftrace; then exit "$UBLK_SKIP_CODE" fi +if ! _have_program fio; then + exit "$UBLK_SKIP_CODE" +fi + _prep_test "null" "sequential io order" dev_id=$(_add_ublk_dev -t null) diff --git a/tools/testing/selftests/ublk/test_generic_02.sh b/tools/testing/selftests/ublk/test_generic_02.sh index 3e80121e3bf5..12920768b1a0 100755 --- a/tools/testing/selftests/ublk/test_generic_02.sh +++ b/tools/testing/selftests/ublk/test_generic_02.sh @@ -10,6 +10,10 @@ if ! _have_program bpftrace; then exit "$UBLK_SKIP_CODE" fi +if ! _have_program fio; then + exit "$UBLK_SKIP_CODE" +fi + _prep_test "null" "sequential io order for MQ" dev_id=$(_add_ublk_dev -t null -q 2) diff --git a/tools/testing/selftests/ublk/test_generic_12.sh b/tools/testing/selftests/ublk/test_generic_12.sh index 7abbb00d251d..b4046201b4d9 100755 --- a/tools/testing/selftests/ublk/test_generic_12.sh +++ b/tools/testing/selftests/ublk/test_generic_12.sh @@ -10,6 +10,10 @@ if ! _have_program bpftrace; then exit "$UBLK_SKIP_CODE" fi +if ! _have_program fio; then + exit "$UBLK_SKIP_CODE" +fi + _prep_test "null" "do imbalanced load, it should be balanced over I/O threads" NTHREADS=6 diff --git a/tools/testing/selftests/ublk/test_generic_13.sh b/tools/testing/selftests/ublk/test_generic_13.sh new file mode 100755 index 000000000000..b7aa90b1cb74 --- /dev/null +++ b/tools/testing/selftests/ublk/test_generic_13.sh @@ -0,0 +1,20 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh + +TID="generic_13" +ERR_CODE=0 + +_prep_test "null" "check that feature list is complete" + +if ${UBLK_PROG} features | grep -q unknown; then + echo "# unknown feature detected!" + echo "# did you add a feature and forget to update feat_map in kublk?" + echo "# this failure is expected if running an older test suite against" + echo "# a newer kernel with new features added" + ERR_CODE=255 +fi + +_cleanup_test "null" +_show_result $TID $ERR_CODE diff --git a/tools/testing/selftests/ublk/test_null_01.sh b/tools/testing/selftests/ublk/test_null_01.sh index a34203f72668..c2cb8f7a09fe 100755 --- a/tools/testing/selftests/ublk/test_null_01.sh +++ b/tools/testing/selftests/ublk/test_null_01.sh @@ -6,6 +6,10 @@ TID="null_01" ERR_CODE=0 +if ! _have_program fio; then + exit "$UBLK_SKIP_CODE" +fi + _prep_test "null" "basic IO test" dev_id=$(_add_ublk_dev -t null) diff --git a/tools/testing/selftests/ublk/test_null_02.sh b/tools/testing/selftests/ublk/test_null_02.sh index 5633ca876655..8accd35beb55 100755 --- a/tools/testing/selftests/ublk/test_null_02.sh +++ b/tools/testing/selftests/ublk/test_null_02.sh @@ -6,6 +6,10 @@ TID="null_02" ERR_CODE=0 +if ! _have_program fio; then + exit "$UBLK_SKIP_CODE" +fi + _prep_test "null" "basic IO test with zero copy" dev_id=$(_add_ublk_dev -t null -z) diff --git a/tools/testing/selftests/ublk/test_stress_05.sh b/tools/testing/selftests/ublk/test_stress_05.sh index 566cfd90d192..274295061042 100755 --- a/tools/testing/selftests/ublk/test_stress_05.sh +++ b/tools/testing/selftests/ublk/test_stress_05.sh @@ -5,6 +5,10 @@ TID="stress_05" ERR_CODE=0 +if ! _have_program fio; then + exit "$UBLK_SKIP_CODE" +fi + run_io_and_remove() { local size=$1 diff --git a/tools/testing/selftests/ublk/utils.h b/tools/testing/selftests/ublk/utils.h index 36545d1567f1..a852e0b7153e 100644 --- a/tools/testing/selftests/ublk/utils.h +++ b/tools/testing/selftests/ublk/utils.h @@ -2,8 +2,6 @@ #ifndef KUBLK_UTILS_H #define KUBLK_UTILS_H -#define __maybe_unused __attribute__((unused)) - #ifndef min #define min(a, b) ((a) < (b) ? (a) : (b)) #endif diff --git a/tools/testing/selftests/vDSO/.gitignore b/tools/testing/selftests/vDSO/.gitignore index 30d5c8f0e5c7..ba322a353aff 100644 --- a/tools/testing/selftests/vDSO/.gitignore +++ b/tools/testing/selftests/vDSO/.gitignore @@ -1,7 +1,6 @@ # SPDX-License-Identifier: GPL-2.0-only vdso_test vdso_test_abi -vdso_test_clock_getres vdso_test_correctness vdso_test_gettimeofday vdso_test_getcpu diff --git a/tools/testing/selftests/vDSO/Makefile b/tools/testing/selftests/vDSO/Makefile index 918a2caa070e..e361aca22a74 100644 --- a/tools/testing/selftests/vDSO/Makefile +++ b/tools/testing/selftests/vDSO/Makefile @@ -4,7 +4,6 @@ include ../../../scripts/Makefile.arch TEST_GEN_PROGS := vdso_test_gettimeofday TEST_GEN_PROGS += vdso_test_getcpu TEST_GEN_PROGS += vdso_test_abi -TEST_GEN_PROGS += vdso_test_clock_getres ifeq ($(ARCH),$(filter $(ARCH),x86 x86_64)) TEST_GEN_PROGS += vdso_standalone_test_x86 endif @@ -29,7 +28,6 @@ CFLAGS_NOLIBC := -nostdlib -nostdinc -ffreestanding -fno-asynchronous-unwind-tab $(OUTPUT)/vdso_test_gettimeofday: parse_vdso.c vdso_test_gettimeofday.c $(OUTPUT)/vdso_test_getcpu: parse_vdso.c vdso_test_getcpu.c $(OUTPUT)/vdso_test_abi: parse_vdso.c vdso_test_abi.c -$(OUTPUT)/vdso_test_clock_getres: vdso_test_clock_getres.c $(OUTPUT)/vdso_standalone_test_x86: vdso_standalone_test_x86.c parse_vdso.c | headers $(OUTPUT)/vdso_standalone_test_x86: CFLAGS:=$(CFLAGS_NOLIBC) $(CFLAGS) diff --git a/tools/testing/selftests/vDSO/vdso_call.h b/tools/testing/selftests/vDSO/vdso_call.h index bb237d771051..e7205584cbdc 100644 --- a/tools/testing/selftests/vDSO/vdso_call.h +++ b/tools/testing/selftests/vDSO/vdso_call.h @@ -44,7 +44,6 @@ register long _r6 asm ("r6"); \ register long _r7 asm ("r7"); \ register long _r8 asm ("r8"); \ - register long _rval asm ("r3"); \ \ LOADARGS_##nr(fn, args); \ \ @@ -54,13 +53,13 @@ " bns+ 1f\n" \ " neg 3, 3\n" \ "1:" \ - : "+r" (_r0), "=r" (_r3), "+r" (_r4), "+r" (_r5), \ + : "+r" (_r0), "+r" (_r3), "+r" (_r4), "+r" (_r5), \ "+r" (_r6), "+r" (_r7), "+r" (_r8) \ - : "r" (_rval) \ + : \ : "r9", "r10", "r11", "r12", "cr0", "cr1", "cr5", \ "cr6", "cr7", "xer", "lr", "ctr", "memory" \ ); \ - _rval; \ + _r3; \ }) #else diff --git a/tools/testing/selftests/vDSO/vdso_test_abi.c b/tools/testing/selftests/vDSO/vdso_test_abi.c index a54424e2336f..238d609a457a 100644 --- a/tools/testing/selftests/vDSO/vdso_test_abi.c +++ b/tools/testing/selftests/vDSO/vdso_test_abi.c @@ -26,24 +26,31 @@ static const char *version; static const char **name; +/* The same as struct __kernel_timespec */ +struct vdso_timespec64 { + uint64_t tv_sec; + uint64_t tv_nsec; +}; + typedef long (*vdso_gettimeofday_t)(struct timeval *tv, struct timezone *tz); typedef long (*vdso_clock_gettime_t)(clockid_t clk_id, struct timespec *ts); +typedef long (*vdso_clock_gettime64_t)(clockid_t clk_id, struct vdso_timespec64 *ts); typedef long (*vdso_clock_getres_t)(clockid_t clk_id, struct timespec *ts); typedef time_t (*vdso_time_t)(time_t *t); -const char *vdso_clock_name[12] = { - "CLOCK_REALTIME", - "CLOCK_MONOTONIC", - "CLOCK_PROCESS_CPUTIME_ID", - "CLOCK_THREAD_CPUTIME_ID", - "CLOCK_MONOTONIC_RAW", - "CLOCK_REALTIME_COARSE", - "CLOCK_MONOTONIC_COARSE", - "CLOCK_BOOTTIME", - "CLOCK_REALTIME_ALARM", - "CLOCK_BOOTTIME_ALARM", - "CLOCK_SGI_CYCLE", - "CLOCK_TAI", +static const char * const vdso_clock_name[] = { + [CLOCK_REALTIME] = "CLOCK_REALTIME", + [CLOCK_MONOTONIC] = "CLOCK_MONOTONIC", + [CLOCK_PROCESS_CPUTIME_ID] = "CLOCK_PROCESS_CPUTIME_ID", + [CLOCK_THREAD_CPUTIME_ID] = "CLOCK_THREAD_CPUTIME_ID", + [CLOCK_MONOTONIC_RAW] = "CLOCK_MONOTONIC_RAW", + [CLOCK_REALTIME_COARSE] = "CLOCK_REALTIME_COARSE", + [CLOCK_MONOTONIC_COARSE] = "CLOCK_MONOTONIC_COARSE", + [CLOCK_BOOTTIME] = "CLOCK_BOOTTIME", + [CLOCK_REALTIME_ALARM] = "CLOCK_REALTIME_ALARM", + [CLOCK_BOOTTIME_ALARM] = "CLOCK_BOOTTIME_ALARM", + [10 /* CLOCK_SGI_CYCLE */] = "CLOCK_SGI_CYCLE", + [CLOCK_TAI] = "CLOCK_TAI", }; static void vdso_test_gettimeofday(void) @@ -70,6 +77,33 @@ static void vdso_test_gettimeofday(void) } } +static void vdso_test_clock_gettime64(clockid_t clk_id) +{ + /* Find clock_gettime64. */ + vdso_clock_gettime64_t vdso_clock_gettime64 = + (vdso_clock_gettime64_t)vdso_sym(version, name[5]); + + if (!vdso_clock_gettime64) { + ksft_print_msg("Couldn't find %s\n", name[5]); + ksft_test_result_skip("%s %s\n", name[5], + vdso_clock_name[clk_id]); + return; + } + + struct vdso_timespec64 ts; + long ret = VDSO_CALL(vdso_clock_gettime64, 2, clk_id, &ts); + + if (ret == 0) { + ksft_print_msg("The time is %lld.%06lld\n", + (long long)ts.tv_sec, (long long)ts.tv_nsec); + ksft_test_result_pass("%s %s\n", name[5], + vdso_clock_name[clk_id]); + } else { + ksft_test_result_fail("%s %s\n", name[5], + vdso_clock_name[clk_id]); + } +} + static void vdso_test_clock_gettime(clockid_t clk_id) { /* Find clock_gettime. */ @@ -171,23 +205,23 @@ static inline void vdso_test_clock(clockid_t clock_id) ksft_print_msg("clock_id: %s\n", vdso_clock_name[clock_id]); vdso_test_clock_gettime(clock_id); + vdso_test_clock_gettime64(clock_id); vdso_test_clock_getres(clock_id); } -#define VDSO_TEST_PLAN 16 +#define VDSO_TEST_PLAN 29 int main(int argc, char **argv) { unsigned long sysinfo_ehdr = getauxval(AT_SYSINFO_EHDR); ksft_print_header(); - ksft_set_plan(VDSO_TEST_PLAN); - if (!sysinfo_ehdr) { - ksft_print_msg("AT_SYSINFO_EHDR is not present!\n"); - return KSFT_SKIP; - } + if (!sysinfo_ehdr) + ksft_exit_skip("AT_SYSINFO_EHDR is not present!\n"); + + ksft_set_plan(VDSO_TEST_PLAN); version = versions[VDSO_VERSION]; name = (const char **)&names[VDSO_NAMES]; @@ -198,40 +232,17 @@ int main(int argc, char **argv) vdso_test_gettimeofday(); -#if _POSIX_TIMERS > 0 - -#ifdef CLOCK_REALTIME vdso_test_clock(CLOCK_REALTIME); -#endif - -#ifdef CLOCK_BOOTTIME vdso_test_clock(CLOCK_BOOTTIME); -#endif - -#ifdef CLOCK_TAI vdso_test_clock(CLOCK_TAI); -#endif - -#ifdef CLOCK_REALTIME_COARSE vdso_test_clock(CLOCK_REALTIME_COARSE); -#endif - -#ifdef CLOCK_MONOTONIC vdso_test_clock(CLOCK_MONOTONIC); -#endif - -#ifdef CLOCK_MONOTONIC_RAW vdso_test_clock(CLOCK_MONOTONIC_RAW); -#endif - -#ifdef CLOCK_MONOTONIC_COARSE vdso_test_clock(CLOCK_MONOTONIC_COARSE); -#endif - -#endif + vdso_test_clock(CLOCK_PROCESS_CPUTIME_ID); + vdso_test_clock(CLOCK_THREAD_CPUTIME_ID); vdso_test_time(); - ksft_print_cnts(); - return ksft_get_fail_cnt() == 0 ? KSFT_PASS : KSFT_FAIL; + ksft_finished(); } diff --git a/tools/testing/selftests/vDSO/vdso_test_clock_getres.c b/tools/testing/selftests/vDSO/vdso_test_clock_getres.c deleted file mode 100644 index b5d5f59f725a..000000000000 --- a/tools/testing/selftests/vDSO/vdso_test_clock_getres.c +++ /dev/null @@ -1,123 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note -/* - * vdso_clock_getres.c: Sample code to test clock_getres. - * Copyright (c) 2019 Arm Ltd. - * - * Compile with: - * gcc -std=gnu99 vdso_clock_getres.c - * - * Tested on ARM, ARM64, MIPS32, x86 (32-bit and 64-bit), - * Power (32-bit and 64-bit), S390x (32-bit and 64-bit). - * Might work on other architectures. - */ - -#define _GNU_SOURCE -#include <elf.h> -#include <fcntl.h> -#include <stdint.h> -#include <stdio.h> -#include <stdlib.h> -#include <time.h> -#include <sys/auxv.h> -#include <sys/mman.h> -#include <sys/time.h> -#include <unistd.h> -#include <sys/syscall.h> - -#include "../kselftest.h" - -static long syscall_clock_getres(clockid_t _clkid, struct timespec *_ts) -{ - long ret; - - ret = syscall(SYS_clock_getres, _clkid, _ts); - - return ret; -} - -const char *vdso_clock_name[12] = { - "CLOCK_REALTIME", - "CLOCK_MONOTONIC", - "CLOCK_PROCESS_CPUTIME_ID", - "CLOCK_THREAD_CPUTIME_ID", - "CLOCK_MONOTONIC_RAW", - "CLOCK_REALTIME_COARSE", - "CLOCK_MONOTONIC_COARSE", - "CLOCK_BOOTTIME", - "CLOCK_REALTIME_ALARM", - "CLOCK_BOOTTIME_ALARM", - "CLOCK_SGI_CYCLE", - "CLOCK_TAI", -}; - -/* - * This function calls clock_getres in vdso and by system call - * with different values for clock_id. - * - * Example of output: - * - * clock_id: CLOCK_REALTIME [PASS] - * clock_id: CLOCK_BOOTTIME [PASS] - * clock_id: CLOCK_TAI [PASS] - * clock_id: CLOCK_REALTIME_COARSE [PASS] - * clock_id: CLOCK_MONOTONIC [PASS] - * clock_id: CLOCK_MONOTONIC_RAW [PASS] - * clock_id: CLOCK_MONOTONIC_COARSE [PASS] - */ -static inline int vdso_test_clock(unsigned int clock_id) -{ - struct timespec x, y; - - printf("clock_id: %s", vdso_clock_name[clock_id]); - clock_getres(clock_id, &x); - syscall_clock_getres(clock_id, &y); - - if ((x.tv_sec != y.tv_sec) || (x.tv_nsec != y.tv_nsec)) { - printf(" [FAIL]\n"); - return KSFT_FAIL; - } - - printf(" [PASS]\n"); - return KSFT_PASS; -} - -int main(int argc, char **argv) -{ - int ret = 0; - -#if _POSIX_TIMERS > 0 - -#ifdef CLOCK_REALTIME - ret += vdso_test_clock(CLOCK_REALTIME); -#endif - -#ifdef CLOCK_BOOTTIME - ret += vdso_test_clock(CLOCK_BOOTTIME); -#endif - -#ifdef CLOCK_TAI - ret += vdso_test_clock(CLOCK_TAI); -#endif - -#ifdef CLOCK_REALTIME_COARSE - ret += vdso_test_clock(CLOCK_REALTIME_COARSE); -#endif - -#ifdef CLOCK_MONOTONIC - ret += vdso_test_clock(CLOCK_MONOTONIC); -#endif - -#ifdef CLOCK_MONOTONIC_RAW - ret += vdso_test_clock(CLOCK_MONOTONIC_RAW); -#endif - -#ifdef CLOCK_MONOTONIC_COARSE - ret += vdso_test_clock(CLOCK_MONOTONIC_COARSE); -#endif - -#endif - if (ret > 0) - return KSFT_FAIL; - - return KSFT_PASS; -} diff --git a/tools/testing/selftests/watchdog/watchdog-test.c b/tools/testing/selftests/watchdog/watchdog-test.c index a1f506ba5578..4f09c5db0c7f 100644 --- a/tools/testing/selftests/watchdog/watchdog-test.c +++ b/tools/testing/selftests/watchdog/watchdog-test.c @@ -332,6 +332,12 @@ int main(int argc, char *argv[]) if (oneshot) goto end; + /* Check if WDIOF_KEEPALIVEPING is supported */ + if (!(info.options & WDIOF_KEEPALIVEPING)) { + printf("WDIOC_KEEPALIVE not supported by this device\n"); + goto end; + } + printf("Watchdog Ticking Away!\n"); /* diff --git a/tools/testing/selftests/wireguard/qemu/kernel.config b/tools/testing/selftests/wireguard/qemu/kernel.config index 0a5381717e9f..936b18be07cf 100644 --- a/tools/testing/selftests/wireguard/qemu/kernel.config +++ b/tools/testing/selftests/wireguard/qemu/kernel.config @@ -20,9 +20,10 @@ CONFIG_NETFILTER_XTABLES_LEGACY=y CONFIG_NETFILTER_XT_NAT=y CONFIG_NETFILTER_XT_MATCH_LENGTH=y CONFIG_NETFILTER_XT_MARK=y -CONFIG_NETFILTER_XT_TARGET_MASQUERADE=m -CONFIG_IP_NF_TARGET_REJECT=m -CONFIG_IP6_NF_TARGET_REJECT=m +CONFIG_NETFILTER_XT_TARGET_MASQUERADE=y +CONFIG_IP_NF_TARGET_REJECT=y +CONFIG_IP6_NF_TARGET_REJECT=y +CONFIG_IP_NF_IPTABLES_LEGACY=y CONFIG_IP_NF_IPTABLES=y CONFIG_IP_NF_FILTER=y CONFIG_IP_NF_MANGLE=y @@ -48,7 +49,6 @@ CONFIG_JUMP_LABEL=y CONFIG_FUTEX=y CONFIG_SHMEM=y CONFIG_SLUB=y -CONFIG_SPARSEMEM_VMEMMAP=y CONFIG_SMP=y CONFIG_SCHED_SMT=y CONFIG_SCHED_MC=y diff --git a/tools/testing/selftests/zram/README b/tools/testing/selftests/zram/README index 110b34834a6f..82921c75681c 100644 --- a/tools/testing/selftests/zram/README +++ b/tools/testing/selftests/zram/README @@ -14,7 +14,6 @@ Statistics for individual zram devices are exported through sysfs nodes at Kconfig required: CONFIG_ZRAM=y CONFIG_CRYPTO_LZ4=y -CONFIG_ZPOOL=y CONFIG_ZSMALLOC=y ZRAM Testcases diff --git a/tools/testing/shared/linux.c b/tools/testing/shared/linux.c index 0f97fb0d19e1..8c7257155958 100644 --- a/tools/testing/shared/linux.c +++ b/tools/testing/shared/linux.c @@ -16,21 +16,6 @@ int nr_allocated; int preempt_count; int test_verbose; -struct kmem_cache { - pthread_mutex_t lock; - unsigned int size; - unsigned int align; - int nr_objs; - void *objs; - void (*ctor)(void *); - unsigned int non_kernel; - unsigned long nr_allocated; - unsigned long nr_tallocated; - bool exec_callback; - void (*callback)(void *); - void *private; -}; - void kmem_cache_set_callback(struct kmem_cache *cachep, void (*callback)(void *)) { cachep->callback = callback; @@ -79,7 +64,8 @@ void *kmem_cache_alloc_lru(struct kmem_cache *cachep, struct list_lru *lru, if (!(gfp & __GFP_DIRECT_RECLAIM)) { if (!cachep->non_kernel) { - cachep->exec_callback = true; + if (cachep->callback) + cachep->exec_callback = true; return NULL; } @@ -152,6 +138,12 @@ void kmem_cache_free_bulk(struct kmem_cache *cachep, size_t size, void **list) if (kmalloc_verbose) pr_debug("Bulk free %p[0-%zu]\n", list, size - 1); + if (cachep->exec_callback) { + if (cachep->callback) + cachep->callback(cachep->private); + cachep->exec_callback = false; + } + pthread_mutex_lock(&cachep->lock); for (int i = 0; i < size; i++) kmem_cache_free_locked(cachep, list[i]); @@ -219,6 +211,8 @@ int kmem_cache_alloc_bulk(struct kmem_cache *cachep, gfp_t gfp, size_t size, for (i = 0; i < size; i++) __kmem_cache_free_locked(cachep, p[i]); pthread_mutex_unlock(&cachep->lock); + if (cachep->callback) + cachep->exec_callback = true; return 0; } @@ -234,26 +228,112 @@ int kmem_cache_alloc_bulk(struct kmem_cache *cachep, gfp_t gfp, size_t size, } struct kmem_cache * -kmem_cache_create(const char *name, unsigned int size, unsigned int align, - unsigned int flags, void (*ctor)(void *)) +__kmem_cache_create_args(const char *name, unsigned int size, + struct kmem_cache_args *args, + unsigned int flags) { struct kmem_cache *ret = malloc(sizeof(*ret)); pthread_mutex_init(&ret->lock, NULL); ret->size = size; - ret->align = align; + ret->align = args->align; + ret->sheaf_capacity = args->sheaf_capacity; ret->nr_objs = 0; ret->nr_allocated = 0; ret->nr_tallocated = 0; ret->objs = NULL; - ret->ctor = ctor; + ret->ctor = args->ctor; ret->non_kernel = 0; ret->exec_callback = false; ret->callback = NULL; ret->private = NULL; + return ret; } +struct slab_sheaf * +kmem_cache_prefill_sheaf(struct kmem_cache *s, gfp_t gfp, unsigned int size) +{ + struct slab_sheaf *sheaf; + unsigned int capacity; + + if (s->exec_callback) { + if (s->callback) + s->callback(s->private); + s->exec_callback = false; + } + + capacity = max(size, s->sheaf_capacity); + + sheaf = calloc(1, sizeof(*sheaf) + sizeof(void *) * capacity); + if (!sheaf) + return NULL; + + sheaf->cache = s; + sheaf->capacity = capacity; + sheaf->size = kmem_cache_alloc_bulk(s, gfp, size, sheaf->objects); + if (!sheaf->size) { + free(sheaf); + return NULL; + } + + return sheaf; +} + +int kmem_cache_refill_sheaf(struct kmem_cache *s, gfp_t gfp, + struct slab_sheaf **sheafp, unsigned int size) +{ + struct slab_sheaf *sheaf = *sheafp; + int refill; + + if (sheaf->size >= size) + return 0; + + if (size > sheaf->capacity) { + sheaf = kmem_cache_prefill_sheaf(s, gfp, size); + if (!sheaf) + return -ENOMEM; + + kmem_cache_return_sheaf(s, gfp, *sheafp); + *sheafp = sheaf; + return 0; + } + + refill = kmem_cache_alloc_bulk(s, gfp, size - sheaf->size, + &sheaf->objects[sheaf->size]); + if (!refill) + return -ENOMEM; + + sheaf->size += refill; + return 0; +} + +void kmem_cache_return_sheaf(struct kmem_cache *s, gfp_t gfp, + struct slab_sheaf *sheaf) +{ + if (sheaf->size) + kmem_cache_free_bulk(s, sheaf->size, &sheaf->objects[0]); + + free(sheaf); +} + +void * +kmem_cache_alloc_from_sheaf(struct kmem_cache *s, gfp_t gfp, + struct slab_sheaf *sheaf) +{ + void *obj; + + if (sheaf->size == 0) { + printf("Nothing left in sheaf!\n"); + return NULL; + } + + obj = sheaf->objects[--sheaf->size]; + sheaf->objects[sheaf->size] = NULL; + + return obj; +} + /* * Test the test infrastructure for kem_cache_alloc/free and bulk counterparts. */ diff --git a/tools/testing/shared/linux/maple_tree.h b/tools/testing/shared/linux/maple_tree.h index f67d47d32857..7d0fadef0f11 100644 --- a/tools/testing/shared/linux/maple_tree.h +++ b/tools/testing/shared/linux/maple_tree.h @@ -1,7 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0+ */ -#define atomic_t int32_t -#define atomic_inc(x) uatomic_inc(x) -#define atomic_read(x) uatomic_read(x) -#define atomic_set(x, y) uatomic_set(x, y) +#include <linux/atomic.h> + #define U8_MAX UCHAR_MAX #include "../../../../include/linux/maple_tree.h" diff --git a/tools/testing/shared/maple-shared.h b/tools/testing/shared/maple-shared.h index dc4d30f3860b..2a1e9a8594a2 100644 --- a/tools/testing/shared/maple-shared.h +++ b/tools/testing/shared/maple-shared.h @@ -10,4 +10,15 @@ #include <time.h> #include "linux/init.h" +void maple_rcu_cb(struct rcu_head *head); +#define rcu_cb maple_rcu_cb + +#define kfree_rcu(_struct, _memb) \ +do { \ + typeof(_struct) _p_struct = (_struct); \ + \ + call_rcu(&((_p_struct)->_memb), rcu_cb); \ +} while(0); + + #endif /* __MAPLE_SHARED_H__ */ diff --git a/tools/testing/shared/maple-shim.c b/tools/testing/shared/maple-shim.c index 640df76f483e..16252ee616c0 100644 --- a/tools/testing/shared/maple-shim.c +++ b/tools/testing/shared/maple-shim.c @@ -3,5 +3,12 @@ /* Very simple shim around the maple tree. */ #include "maple-shared.h" +#include <linux/slab.h> #include "../../../lib/maple_tree.c" + +void maple_rcu_cb(struct rcu_head *head) { + struct maple_node *node = container_of(head, struct maple_node, rcu); + + kmem_cache_free(maple_node_cache, node); +} diff --git a/tools/testing/shared/shared.mk b/tools/testing/shared/shared.mk index 923ee2492256..5bcdf26c8a9d 100644 --- a/tools/testing/shared/shared.mk +++ b/tools/testing/shared/shared.mk @@ -1,7 +1,10 @@ # SPDX-License-Identifier: GPL-2.0 +include ../../scripts/Makefile.arch -CFLAGS += -I../shared -I. -I../../include -I../../../lib -g -Og -Wall \ +CFLAGS += -I../shared -I. -I../../include -I../../arch/$(SRCARCH)/include \ + -I../../../lib -g -Og -Wall \ -D_LGPL_SOURCE -fsanitize=address -fsanitize=undefined +CFLAGS += $(EXTRA_CFLAGS) LDFLAGS += -fsanitize=address -fsanitize=undefined LDLIBS += -lpthread -lurcu LIBS := slab.o find_bit.o bitmap.o hweight.o vsprintf.o @@ -11,6 +14,7 @@ SHARED_DEPS = Makefile ../shared/shared.mk ../shared/*.h generated/map-shift.h \ generated/bit-length.h generated/autoconf.h \ ../../include/linux/*.h \ ../../include/asm/*.h \ + ../../arch/$(SRCARCH)/include/asm/*.h \ ../../../include/linux/xarray.h \ ../../../include/linux/maple_tree.h \ ../../../include/linux/radix-tree.h \ diff --git a/tools/testing/vma/linux/atomic.h b/tools/testing/vma/linux/atomic.h deleted file mode 100644 index 788c597c4fde..000000000000 --- a/tools/testing/vma/linux/atomic.h +++ /dev/null @@ -1,17 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ - -#ifndef _LINUX_ATOMIC_H -#define _LINUX_ATOMIC_H - -#define atomic_t int32_t -#define atomic_inc(x) uatomic_inc(x) -#define atomic_read(x) uatomic_read(x) -#define atomic_set(x, y) uatomic_set(x, y) -#define U8_MAX UCHAR_MAX - -#ifndef atomic_cmpxchg_relaxed -#define atomic_cmpxchg_relaxed uatomic_cmpxchg -#define atomic_cmpxchg_release uatomic_cmpxchg -#endif /* atomic_cmpxchg_relaxed */ - -#endif /* _LINUX_ATOMIC_H */ diff --git a/tools/testing/vma/vma_internal.h b/tools/testing/vma/vma_internal.h index 3639aa8dd2b0..dc976a285ad2 100644 --- a/tools/testing/vma/vma_internal.h +++ b/tools/testing/vma/vma_internal.h @@ -21,11 +21,13 @@ #include <stdlib.h> +#include <linux/atomic.h> #include <linux/list.h> #include <linux/maple_tree.h> #include <linux/mm.h> #include <linux/rbtree.h> #include <linux/refcount.h> +#include <linux/slab.h> extern unsigned long stack_guard_gap; #ifdef CONFIG_MMU @@ -249,6 +251,14 @@ struct mutex {}; #define DEFINE_MUTEX(mutexname) \ struct mutex mutexname = {} +#define DECLARE_BITMAP(name, bits) \ + unsigned long name[BITS_TO_LONGS(bits)] + +#define NUM_MM_FLAG_BITS (64) +typedef struct { + __private DECLARE_BITMAP(__mm_flags, NUM_MM_FLAG_BITS); +} mm_flags_t; + struct mm_struct { struct maple_tree mm_mt; int map_count; /* number of VMAs */ @@ -260,7 +270,7 @@ struct mm_struct { unsigned long def_flags; - unsigned long flags; /* Must use atomic bitops to access */ + mm_flags_t flags; /* Must use mm_flags_* helpers to access */ }; struct vm_area_struct; @@ -274,13 +284,14 @@ struct vm_area_struct; */ struct vm_area_desc { /* Immutable state. */ - struct mm_struct *mm; + const struct mm_struct *const mm; + struct file *const file; /* May vary from vm_file in stacked callers. */ unsigned long start; unsigned long end; /* Mutable fields. Populated with initial state. */ pgoff_t pgoff; - struct file *file; + struct file *vm_file; vm_flags_t vm_flags; pgprot_t page_prot; @@ -467,13 +478,21 @@ struct vm_operations_struct { struct mempolicy *(*get_policy)(struct vm_area_struct *vma, unsigned long addr, pgoff_t *ilx); #endif +#ifdef CONFIG_FIND_NORMAL_PAGE /* - * Called by vm_normal_page() for special PTEs to find the - * page for @addr. This is useful if the default behavior - * (using pte_page()) would not find the correct page. + * Called by vm_normal_page() for special PTEs in @vma at @addr. This + * allows for returning a "normal" page from vm_normal_page() even + * though the PTE indicates that the "struct page" either does not exist + * or should not be touched: "special". + * + * Do not add new users: this really only works when a "normal" page + * was mapped, but then the PTE got changed to something weird (+ + * marked special) that would not make pte_pfn() identify the originally + * inserted page. */ - struct page *(*find_special_page)(struct vm_area_struct *vma, - unsigned long addr); + struct page *(*find_normal_page)(struct vm_area_struct *vma, + unsigned long addr); +#endif /* CONFIG_FIND_NORMAL_PAGE */ }; struct vm_unmapped_area_info { @@ -509,65 +528,6 @@ struct pagetable_move_control { .len_in = len_, \ } -struct kmem_cache_args { - /** - * @align: The required alignment for the objects. - * - * %0 means no specific alignment is requested. - */ - unsigned int align; - /** - * @useroffset: Usercopy region offset. - * - * %0 is a valid offset, when @usersize is non-%0 - */ - unsigned int useroffset; - /** - * @usersize: Usercopy region size. - * - * %0 means no usercopy region is specified. - */ - unsigned int usersize; - /** - * @freeptr_offset: Custom offset for the free pointer - * in &SLAB_TYPESAFE_BY_RCU caches - * - * By default &SLAB_TYPESAFE_BY_RCU caches place the free pointer - * outside of the object. This might cause the object to grow in size. - * Cache creators that have a reason to avoid this can specify a custom - * free pointer offset in their struct where the free pointer will be - * placed. - * - * Note that placing the free pointer inside the object requires the - * caller to ensure that no fields are invalidated that are required to - * guard against object recycling (See &SLAB_TYPESAFE_BY_RCU for - * details). - * - * Using %0 as a value for @freeptr_offset is valid. If @freeptr_offset - * is specified, %use_freeptr_offset must be set %true. - * - * Note that @ctor currently isn't supported with custom free pointers - * as a @ctor requires an external free pointer. - */ - unsigned int freeptr_offset; - /** - * @use_freeptr_offset: Whether a @freeptr_offset is used. - */ - bool use_freeptr_offset; - /** - * @ctor: A constructor for the objects. - * - * The constructor is invoked for each object in a newly allocated slab - * page. It is the cache user's responsibility to free object in the - * same state as after calling the constructor, or deal appropriately - * with any differences between a freshly constructed and a reallocated - * object. - * - * %NULL means no constructor. - */ - void (*ctor)(void *); -}; - static inline void vma_iter_invalidate(struct vma_iterator *vmi) { mas_pause(&vmi->mas); @@ -652,40 +612,6 @@ static inline void vma_init(struct vm_area_struct *vma, struct mm_struct *mm) vma->vm_lock_seq = UINT_MAX; } -struct kmem_cache { - const char *name; - size_t object_size; - struct kmem_cache_args *args; -}; - -static inline struct kmem_cache *__kmem_cache_create(const char *name, - size_t object_size, - struct kmem_cache_args *args) -{ - struct kmem_cache *ret = malloc(sizeof(struct kmem_cache)); - - ret->name = name; - ret->object_size = object_size; - ret->args = args; - - return ret; -} - -#define kmem_cache_create(__name, __object_size, __args, ...) \ - __kmem_cache_create((__name), (__object_size), (__args)) - -static inline void *kmem_cache_alloc(struct kmem_cache *s, gfp_t gfpflags) -{ - (void)gfpflags; - - return calloc(s->object_size, 1); -} - -static inline void kmem_cache_free(struct kmem_cache *s, void *x) -{ - free(x); -} - /* * These are defined in vma.h, but sadly vm_stat_account() is referenced by * kernel/fork.c, so we have to these broadly available there, and temporarily @@ -842,11 +768,11 @@ static inline unsigned long vma_pages(struct vm_area_struct *vma) return (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; } -static inline void fput(struct file *) +static inline void fput(struct file *file) { } -static inline void mpol_put(struct mempolicy *) +static inline void mpol_put(struct mempolicy *pol) { } @@ -854,15 +780,15 @@ static inline void lru_add_drain(void) { } -static inline void tlb_gather_mmu(struct mmu_gather *, struct mm_struct *) +static inline void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm) { } -static inline void update_hiwater_rss(struct mm_struct *) +static inline void update_hiwater_rss(struct mm_struct *mm) { } -static inline void update_hiwater_vm(struct mm_struct *) +static inline void update_hiwater_vm(struct mm_struct *mm) { } @@ -871,36 +797,23 @@ static inline void unmap_vmas(struct mmu_gather *tlb, struct ma_state *mas, unsigned long end_addr, unsigned long tree_end, bool mm_wr_locked) { - (void)tlb; - (void)mas; - (void)vma; - (void)start_addr; - (void)end_addr; - (void)tree_end; - (void)mm_wr_locked; } static inline void free_pgtables(struct mmu_gather *tlb, struct ma_state *mas, struct vm_area_struct *vma, unsigned long floor, unsigned long ceiling, bool mm_wr_locked) { - (void)tlb; - (void)mas; - (void)vma; - (void)floor; - (void)ceiling; - (void)mm_wr_locked; } -static inline void mapping_unmap_writable(struct address_space *) +static inline void mapping_unmap_writable(struct address_space *mapping) { } -static inline void flush_dcache_mmap_lock(struct address_space *) +static inline void flush_dcache_mmap_lock(struct address_space *mapping) { } -static inline void tlb_finish_mmu(struct mmu_gather *) +static inline void tlb_finish_mmu(struct mmu_gather *tlb) { } @@ -909,7 +822,7 @@ static inline struct file *get_file(struct file *f) return f; } -static inline int vma_dup_policy(struct vm_area_struct *, struct vm_area_struct *) +static inline int vma_dup_policy(struct vm_area_struct *src, struct vm_area_struct *dst) { return 0; } @@ -936,10 +849,6 @@ static inline void vma_adjust_trans_huge(struct vm_area_struct *vma, unsigned long end, struct vm_area_struct *next) { - (void)vma; - (void)start; - (void)end; - (void)next; } static inline void hugetlb_split(struct vm_area_struct *, unsigned long) {} @@ -959,51 +868,48 @@ static inline void vm_acct_memory(long pages) { } -static inline void vma_interval_tree_insert(struct vm_area_struct *, - struct rb_root_cached *) +static inline void vma_interval_tree_insert(struct vm_area_struct *vma, + struct rb_root_cached *rb) { } -static inline void vma_interval_tree_remove(struct vm_area_struct *, - struct rb_root_cached *) +static inline void vma_interval_tree_remove(struct vm_area_struct *vma, + struct rb_root_cached *rb) { } -static inline void flush_dcache_mmap_unlock(struct address_space *) +static inline void flush_dcache_mmap_unlock(struct address_space *mapping) { } -static inline void anon_vma_interval_tree_insert(struct anon_vma_chain*, - struct rb_root_cached *) +static inline void anon_vma_interval_tree_insert(struct anon_vma_chain *avc, + struct rb_root_cached *rb) { } -static inline void anon_vma_interval_tree_remove(struct anon_vma_chain*, - struct rb_root_cached *) +static inline void anon_vma_interval_tree_remove(struct anon_vma_chain *avc, + struct rb_root_cached *rb) { } -static inline void uprobe_mmap(struct vm_area_struct *) +static inline void uprobe_mmap(struct vm_area_struct *vma) { } static inline void uprobe_munmap(struct vm_area_struct *vma, unsigned long start, unsigned long end) { - (void)vma; - (void)start; - (void)end; } -static inline void i_mmap_lock_write(struct address_space *) +static inline void i_mmap_lock_write(struct address_space *mapping) { } -static inline void anon_vma_lock_write(struct anon_vma *) +static inline void anon_vma_lock_write(struct anon_vma *anon_vma) { } -static inline void vma_assert_write_locked(struct vm_area_struct *) +static inline void vma_assert_write_locked(struct vm_area_struct *vma) { } @@ -1013,16 +919,16 @@ static inline void unlink_anon_vmas(struct vm_area_struct *vma) vma->anon_vma->was_unlinked = true; } -static inline void anon_vma_unlock_write(struct anon_vma *) +static inline void anon_vma_unlock_write(struct anon_vma *anon_vma) { } -static inline void i_mmap_unlock_write(struct address_space *) +static inline void i_mmap_unlock_write(struct address_space *mapping) { } -static inline void anon_vma_merge(struct vm_area_struct *, - struct vm_area_struct *) +static inline void anon_vma_merge(struct vm_area_struct *vma, + struct vm_area_struct *next) { } @@ -1031,27 +937,22 @@ static inline int userfaultfd_unmap_prep(struct vm_area_struct *vma, unsigned long end, struct list_head *unmaps) { - (void)vma; - (void)start; - (void)end; - (void)unmaps; - return 0; } -static inline void mmap_write_downgrade(struct mm_struct *) +static inline void mmap_write_downgrade(struct mm_struct *mm) { } -static inline void mmap_read_unlock(struct mm_struct *) +static inline void mmap_read_unlock(struct mm_struct *mm) { } -static inline void mmap_write_unlock(struct mm_struct *) +static inline void mmap_write_unlock(struct mm_struct *mm) { } -static inline int mmap_write_lock_killable(struct mm_struct *) +static inline int mmap_write_lock_killable(struct mm_struct *mm) { return 0; } @@ -1060,10 +961,6 @@ static inline bool can_modify_mm(struct mm_struct *mm, unsigned long start, unsigned long end) { - (void)mm; - (void)start; - (void)end; - return true; } @@ -1071,16 +968,13 @@ static inline void arch_unmap(struct mm_struct *mm, unsigned long start, unsigned long end) { - (void)mm; - (void)start; - (void)end; } -static inline void mmap_assert_locked(struct mm_struct *) +static inline void mmap_assert_locked(struct mm_struct *mm) { } -static inline bool mpol_equal(struct mempolicy *, struct mempolicy *) +static inline bool mpol_equal(struct mempolicy *a, struct mempolicy *b) { return true; } @@ -1088,63 +982,62 @@ static inline bool mpol_equal(struct mempolicy *, struct mempolicy *) static inline void khugepaged_enter_vma(struct vm_area_struct *vma, vm_flags_t vm_flags) { - (void)vma; - (void)vm_flags; } -static inline bool mapping_can_writeback(struct address_space *) +static inline bool mapping_can_writeback(struct address_space *mapping) { return true; } -static inline bool is_vm_hugetlb_page(struct vm_area_struct *) +static inline bool is_vm_hugetlb_page(struct vm_area_struct *vma) { return false; } -static inline bool vma_soft_dirty_enabled(struct vm_area_struct *) +static inline bool vma_soft_dirty_enabled(struct vm_area_struct *vma) { return false; } -static inline bool userfaultfd_wp(struct vm_area_struct *) +static inline bool userfaultfd_wp(struct vm_area_struct *vma) { return false; } -static inline void mmap_assert_write_locked(struct mm_struct *) +static inline void mmap_assert_write_locked(struct mm_struct *mm) { } -static inline void mutex_lock(struct mutex *) +static inline void mutex_lock(struct mutex *lock) { } -static inline void mutex_unlock(struct mutex *) +static inline void mutex_unlock(struct mutex *lock) { } -static inline bool mutex_is_locked(struct mutex *) +static inline bool mutex_is_locked(struct mutex *lock) { return true; } -static inline bool signal_pending(void *) +static inline bool signal_pending(void *p) { return false; } -static inline bool is_file_hugepages(struct file *) +static inline bool is_file_hugepages(struct file *file) { return false; } -static inline int security_vm_enough_memory_mm(struct mm_struct *, long) +static inline int security_vm_enough_memory_mm(struct mm_struct *mm, long pages) { return 0; } -static inline bool may_expand_vm(struct mm_struct *, vm_flags_t, unsigned long) +static inline bool may_expand_vm(struct mm_struct *mm, vm_flags_t flags, + unsigned long npages) { return true; } @@ -1169,7 +1062,7 @@ static inline void vm_flags_clear(struct vm_area_struct *vma, vma->__vm_flags &= ~flags; } -static inline int shmem_zero_setup(struct vm_area_struct *) +static inline int shmem_zero_setup(struct vm_area_struct *vma) { return 0; } @@ -1179,20 +1072,20 @@ static inline void vma_set_anonymous(struct vm_area_struct *vma) vma->vm_ops = NULL; } -static inline void ksm_add_vma(struct vm_area_struct *) +static inline void ksm_add_vma(struct vm_area_struct *vma) { } -static inline void perf_event_mmap(struct vm_area_struct *) +static inline void perf_event_mmap(struct vm_area_struct *vma) { } -static inline bool vma_is_dax(struct vm_area_struct *) +static inline bool vma_is_dax(struct vm_area_struct *vma) { return false; } -static inline struct vm_area_struct *get_gate_vma(struct mm_struct *) +static inline struct vm_area_struct *get_gate_vma(struct mm_struct *mm) { return NULL; } @@ -1217,16 +1110,16 @@ static inline void vma_set_page_prot(struct vm_area_struct *vma) WRITE_ONCE(vma->vm_page_prot, vm_page_prot); } -static inline bool arch_validate_flags(vm_flags_t) +static inline bool arch_validate_flags(vm_flags_t flags) { return true; } -static inline void vma_close(struct vm_area_struct *) +static inline void vma_close(struct vm_area_struct *vma) { } -static inline int mmap_file(struct file *, struct vm_area_struct *) +static inline int mmap_file(struct file *file, struct vm_area_struct *vma) { return 0; } @@ -1282,8 +1175,8 @@ static inline bool capable(int cap) return true; } -static inline bool mlock_future_ok(struct mm_struct *mm, vm_flags_t vm_flags, - unsigned long bytes) +static inline bool mlock_future_ok(const struct mm_struct *mm, + vm_flags_t vm_flags, unsigned long bytes) { unsigned long locked_pages, limit_pages; @@ -1325,6 +1218,13 @@ static inline void userfaultfd_unmap_complete(struct mm_struct *mm, { } +# define ACCESS_PRIVATE(p, member) ((p)->member) + +static inline bool mm_flags_test(int flag, const struct mm_struct *mm) +{ + return test_bit(flag, ACCESS_PRIVATE(&mm->flags, __mm_flags)); +} + /* * Denies creating a writable executable mapping or gaining executable permissions. * @@ -1355,7 +1255,7 @@ static inline void userfaultfd_unmap_complete(struct mm_struct *mm, static inline bool map_deny_write_exec(unsigned long old, unsigned long new) { /* If MDWE is disabled, we have nothing to deny. */ - if (!test_bit(MMF_HAS_MDWE, ¤t->mm->flags)) + if (mm_flags_test(MMF_HAS_MDWE, current->mm)) return false; /* If the new VMA is not executable, we have nothing to deny. */ @@ -1375,21 +1275,12 @@ static inline bool map_deny_write_exec(unsigned long old, unsigned long new) static inline int mapping_map_writable(struct address_space *mapping) { - int c = atomic_read(&mapping->i_mmap_writable); - - /* Derived from the raw_atomic_inc_unless_negative() implementation. */ - do { - if (c < 0) - return -EPERM; - } while (!__sync_bool_compare_and_swap(&mapping->i_mmap_writable, c, c+1)); - - return 0; + return atomic_inc_unless_negative(&mapping->i_mmap_writable) ? + 0 : -EPERM; } static inline unsigned long move_page_tables(struct pagetable_move_control *pmc) { - (void)pmc; - return 0; } @@ -1397,67 +1288,61 @@ static inline void free_pgd_range(struct mmu_gather *tlb, unsigned long addr, unsigned long end, unsigned long floor, unsigned long ceiling) { - (void)tlb; - (void)addr; - (void)end; - (void)floor; - (void)ceiling; } static inline int ksm_execve(struct mm_struct *mm) { - (void)mm; - return 0; } static inline void ksm_exit(struct mm_struct *mm) { - (void)mm; } static inline void vma_lock_init(struct vm_area_struct *vma, bool reset_refcnt) { - (void)vma; - (void)reset_refcnt; + if (reset_refcnt) + refcount_set(&vma->vm_refcnt, 0); } static inline void vma_numab_state_init(struct vm_area_struct *vma) { - (void)vma; } static inline void vma_numab_state_free(struct vm_area_struct *vma) { - (void)vma; } static inline void dup_anon_vma_name(struct vm_area_struct *orig_vma, struct vm_area_struct *new_vma) { - (void)orig_vma; - (void)new_vma; } static inline void free_anon_vma_name(struct vm_area_struct *vma) { - (void)vma; } /* Declared in vma.h. */ static inline void set_vma_from_desc(struct vm_area_struct *vma, struct vm_area_desc *desc); -static inline struct vm_area_desc *vma_to_desc(struct vm_area_struct *vma, - struct vm_area_desc *desc); - -static int compat_vma_mmap_prepare(struct file *file, - struct vm_area_struct *vma) +static inline int __compat_vma_mmap_prepare(const struct file_operations *f_op, + struct file *file, struct vm_area_struct *vma) { - struct vm_area_desc desc; + struct vm_area_desc desc = { + .mm = vma->vm_mm, + .file = vma->vm_file, + .start = vma->vm_start, + .end = vma->vm_end, + + .pgoff = vma->vm_pgoff, + .vm_file = vma->vm_file, + .vm_flags = vma->vm_flags, + .page_prot = vma->vm_page_prot, + }; int err; - err = file->f_op->mmap_prepare(vma_to_desc(vma, &desc)); + err = f_op->mmap_prepare(&desc); if (err) return err; set_vma_from_desc(vma, &desc); @@ -1465,6 +1350,12 @@ static int compat_vma_mmap_prepare(struct file *file, return 0; } +static inline int compat_vma_mmap_prepare(struct file *file, + struct vm_area_struct *vma) +{ + return __compat_vma_mmap_prepare(file->f_op, file, vma); +} + /* Did the driver provide valid mmap hook configuration? */ static inline bool can_mmap_file(struct file *file) { @@ -1495,7 +1386,6 @@ static inline int vfs_mmap_prepare(struct file *file, struct vm_area_desc *desc) static inline void fixup_hugetlb_reservations(struct vm_area_struct *vma) { - (void)vma; } static inline void vma_set_file(struct vm_area_struct *vma, struct file *file) @@ -1506,13 +1396,13 @@ static inline void vma_set_file(struct vm_area_struct *vma, struct file *file) fput(file); } -static inline bool shmem_file(struct file *) +static inline bool shmem_file(struct file *file) { return false; } -static inline vm_flags_t ksm_vma_flags(const struct mm_struct *, const struct file *, - vm_flags_t vm_flags) +static inline vm_flags_t ksm_vma_flags(const struct mm_struct *mm, + const struct file *file, vm_flags_t vm_flags) { return vm_flags; } diff --git a/tools/testing/vsock/util.c b/tools/testing/vsock/util.c index 7b861a8e997a..d843643ced6b 100644 --- a/tools/testing/vsock/util.c +++ b/tools/testing/vsock/util.c @@ -756,7 +756,6 @@ void setsockopt_ull_check(int fd, int level, int optname, fail: fprintf(stderr, "%s val %llu\n", errmsg, val); exit(EXIT_FAILURE); -; } /* Set "int" socket option and check that it's indeed set */ |