summaryrefslogtreecommitdiff
path: root/tools/testing/selftests
diff options
context:
space:
mode:
Diffstat (limited to 'tools/testing/selftests')
-rw-r--r--tools/testing/selftests/bpf/config5
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sk_bypass_prot_mem.c292
-rw-r--r--tools/testing/selftests/bpf/prog_tests/test_bpf_smc.c390
-rw-r--r--tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c129
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_tcp4.c8
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_tcp6.c8
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_smc.c117
-rw-r--r--tools/testing/selftests/bpf/progs/sk_bypass_prot_mem.c104
-rw-r--r--tools/testing/selftests/bpf/progs/test_xdp_meta.c386
-rw-r--r--tools/testing/selftests/bpf/test_kmods/bpf_testmod.c4
-rw-r--r--tools/testing/selftests/drivers/net/.gitignore1
-rw-r--r--tools/testing/selftests/drivers/net/Makefile3
-rwxr-xr-xtools/testing/selftests/drivers/net/bonding/bond_macvlan_ipvlan.sh1
-rw-r--r--tools/testing/selftests/drivers/net/gro.c (renamed from tools/testing/selftests/net/gro.c)5
-rwxr-xr-xtools/testing/selftests/drivers/net/gro.py164
-rw-r--r--tools/testing/selftests/drivers/net/hw/.gitignore1
-rw-r--r--tools/testing/selftests/drivers/net/hw/Makefile26
-rwxr-xr-xtools/testing/selftests/drivers/net/hw/devlink_rate_tc_bw.py174
-rw-r--r--tools/testing/selftests/drivers/net/hw/lib/py/__init__.py9
-rw-r--r--tools/testing/selftests/drivers/net/hw/toeplitz.c (renamed from tools/testing/selftests/net/toeplitz.c)72
-rwxr-xr-xtools/testing/selftests/drivers/net/hw/toeplitz.py211
-rw-r--r--tools/testing/selftests/drivers/net/lib/py/__init__.py9
-rw-r--r--tools/testing/selftests/drivers/net/lib/py/env.py2
-rw-r--r--tools/testing/selftests/drivers/net/lib/py/load.py84
-rw-r--r--tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh2
-rwxr-xr-xtools/testing/selftests/drivers/net/netcons_basic.sh5
-rwxr-xr-xtools/testing/selftests/drivers/net/netcons_overflow.sh2
-rw-r--r--tools/testing/selftests/drivers/net/netdevsim/Makefile1
-rwxr-xr-xtools/testing/selftests/drivers/net/netdevsim/devlink.sh116
-rwxr-xr-xtools/testing/selftests/drivers/net/netdevsim/ethtool-ring.sh85
-rwxr-xr-xtools/testing/selftests/drivers/net/psp.py13
-rwxr-xr-xtools/testing/selftests/drivers/net/ring_reconfig.py167
-rwxr-xr-xtools/testing/selftests/drivers/net/stats.py5
-rwxr-xr-xtools/testing/selftests/drivers/net/xdp.py57
-rw-r--r--tools/testing/selftests/net/.gitignore9
-rw-r--r--tools/testing/selftests/net/Makefile7
-rw-r--r--tools/testing/selftests/net/af_unix/.gitignore8
-rw-r--r--tools/testing/selftests/net/af_unix/Makefile1
-rw-r--r--tools/testing/selftests/net/af_unix/so_peek_off.c4
-rw-r--r--tools/testing/selftests/net/af_unix/unix_connreset.c180
-rwxr-xr-xtools/testing/selftests/net/arp_ndisc_evict_nocarrier.sh2
-rwxr-xr-xtools/testing/selftests/net/busy_poll_test.sh24
-rw-r--r--tools/testing/selftests/net/busy_poller.c16
-rwxr-xr-xtools/testing/selftests/net/fib_tests.sh66
-rwxr-xr-xtools/testing/selftests/net/forwarding/bridge_mdb.sh100
-rwxr-xr-xtools/testing/selftests/net/gro.sh105
-rw-r--r--tools/testing/selftests/net/io_uring_zerocopy_tx.c24
-rw-r--r--tools/testing/selftests/net/lib/Makefile1
-rwxr-xr-xtools/testing/selftests/net/lib/ksft_setup_loopback.sh111
-rw-r--r--tools/testing/selftests/net/lib/py/__init__.py5
-rw-r--r--tools/testing/selftests/net/lib/py/ksft.py105
-rw-r--r--tools/testing/selftests/net/lib/py/nsim.py2
-rw-r--r--tools/testing/selftests/net/lib/py/utils.py20
-rw-r--r--tools/testing/selftests/net/lib/xdp_native.bpf.c5
-rw-r--r--tools/testing/selftests/net/mptcp/mptcp_connect.c10
-rwxr-xr-xtools/testing/selftests/net/mptcp/mptcp_connect.sh146
-rwxr-xr-xtools/testing/selftests/net/mptcp/mptcp_join.sh244
-rw-r--r--tools/testing/selftests/net/mptcp/mptcp_lib.sh58
-rwxr-xr-xtools/testing/selftests/net/mptcp/mptcp_sockopt.sh45
-rwxr-xr-xtools/testing/selftests/net/mptcp/simult_flows.sh46
-rwxr-xr-xtools/testing/selftests/net/mptcp/userspace_pm.sh3
-rwxr-xr-xtools/testing/selftests/net/netfilter/nft_flowtable.sh126
-rw-r--r--tools/testing/selftests/net/netfilter/sctp_collision.c3
-rw-r--r--tools/testing/selftests/net/netlink-dumps.c1
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_rto_synack_rto_max.pkt54
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_syscall_bad_arg_sendmsg-empty-iov.pkt4
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_user_timeout_user-timeout-probe.pkt6
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_zerocopy_basic.pkt2
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_zerocopy_batch.pkt2
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_zerocopy_client.pkt2
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_zerocopy_closed.pkt2
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_zerocopy_epoll_edge.pkt3
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_zerocopy_epoll_exclusive.pkt3
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_zerocopy_epoll_oneshot.pkt3
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_zerocopy_fastopen-client.pkt2
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_zerocopy_fastopen-server.pkt2
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_zerocopy_maxfrags.pkt2
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_zerocopy_small.pkt2
-rwxr-xr-xtools/testing/selftests/net/rtnetlink.sh20
-rw-r--r--tools/testing/selftests/net/setup_loopback.sh120
-rw-r--r--tools/testing/selftests/net/setup_veth.sh45
-rw-r--r--tools/testing/selftests/net/so_txtime.c2
-rw-r--r--tools/testing/selftests/net/tls.c141
-rwxr-xr-xtools/testing/selftests/net/toeplitz.sh199
-rwxr-xr-xtools/testing/selftests/net/toeplitz_client.sh28
-rwxr-xr-xtools/testing/selftests/net/traceroute.sh313
-rw-r--r--tools/testing/selftests/net/txtimestamp.c2
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json28
-rw-r--r--tools/testing/selftests/ublk/kublk.c70
-rw-r--r--tools/testing/selftests/ublk/kublk.h9
-rwxr-xr-xtools/testing/selftests/vsock/vmtest.sh346
91 files changed, 4226 insertions, 1321 deletions
diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config
index f2a2fd236ca8..558839e3c185 100644
--- a/tools/testing/selftests/bpf/config
+++ b/tools/testing/selftests/bpf/config
@@ -126,3 +126,8 @@ CONFIG_XDP_SOCKETS=y
CONFIG_XFRM_INTERFACE=y
CONFIG_TCP_CONG_DCTCP=y
CONFIG_TCP_CONG_BBR=y
+CONFIG_INFINIBAND=y
+CONFIG_SMC=y
+CONFIG_SMC_HS_CTRL_BPF=y
+CONFIG_DIBS=y
+CONFIG_DIBS_LO=y \ No newline at end of file
diff --git a/tools/testing/selftests/bpf/prog_tests/sk_bypass_prot_mem.c b/tools/testing/selftests/bpf/prog_tests/sk_bypass_prot_mem.c
new file mode 100644
index 000000000000..e4940583924b
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/sk_bypass_prot_mem.c
@@ -0,0 +1,292 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright 2025 Google LLC */
+
+#include <test_progs.h>
+#include "sk_bypass_prot_mem.skel.h"
+#include "network_helpers.h"
+
+#define NR_PAGES 32
+#define NR_SOCKETS 2
+#define BUF_TOTAL (NR_PAGES * 4096 / NR_SOCKETS)
+#define BUF_SINGLE 1024
+#define NR_SEND (BUF_TOTAL / BUF_SINGLE)
+
+struct test_case {
+ char name[8];
+ int family;
+ int type;
+ int (*create_sockets)(struct test_case *test_case, int sk[], int len);
+ long (*get_memory_allocated)(struct test_case *test_case, struct sk_bypass_prot_mem *skel);
+};
+
+static int tcp_create_sockets(struct test_case *test_case, int sk[], int len)
+{
+ int server, i, err = 0;
+
+ server = start_server(test_case->family, test_case->type, NULL, 0, 0);
+ if (!ASSERT_GE(server, 0, "start_server_str"))
+ return server;
+
+ /* Keep for-loop so we can change NR_SOCKETS easily. */
+ for (i = 0; i < len; i += 2) {
+ sk[i] = connect_to_fd(server, 0);
+ if (sk[i] < 0) {
+ ASSERT_GE(sk[i], 0, "connect_to_fd");
+ err = sk[i];
+ break;
+ }
+
+ sk[i + 1] = accept(server, NULL, NULL);
+ if (sk[i + 1] < 0) {
+ ASSERT_GE(sk[i + 1], 0, "accept");
+ err = sk[i + 1];
+ break;
+ }
+ }
+
+ close(server);
+
+ return err;
+}
+
+static int udp_create_sockets(struct test_case *test_case, int sk[], int len)
+{
+ int i, j, err, rcvbuf = BUF_TOTAL;
+
+ /* Keep for-loop so we can change NR_SOCKETS easily. */
+ for (i = 0; i < len; i += 2) {
+ sk[i] = start_server(test_case->family, test_case->type, NULL, 0, 0);
+ if (sk[i] < 0) {
+ ASSERT_GE(sk[i], 0, "start_server");
+ return sk[i];
+ }
+
+ sk[i + 1] = connect_to_fd(sk[i], 0);
+ if (sk[i + 1] < 0) {
+ ASSERT_GE(sk[i + 1], 0, "connect_to_fd");
+ return sk[i + 1];
+ }
+
+ err = connect_fd_to_fd(sk[i], sk[i + 1], 0);
+ if (err) {
+ ASSERT_EQ(err, 0, "connect_fd_to_fd");
+ return err;
+ }
+
+ for (j = 0; j < 2; j++) {
+ err = setsockopt(sk[i + j], SOL_SOCKET, SO_RCVBUF, &rcvbuf, sizeof(int));
+ if (err) {
+ ASSERT_EQ(err, 0, "setsockopt(SO_RCVBUF)");
+ return err;
+ }
+ }
+ }
+
+ return 0;
+}
+
+static long get_memory_allocated(struct test_case *test_case,
+ bool *activated, long *memory_allocated)
+{
+ int sk;
+
+ *activated = true;
+
+ /* AF_INET and AF_INET6 share the same memory_allocated.
+ * tcp_init_sock() is called by AF_INET and AF_INET6,
+ * but udp_lib_init_sock() is inline.
+ */
+ sk = socket(AF_INET, test_case->type, 0);
+ if (!ASSERT_GE(sk, 0, "get_memory_allocated"))
+ return -1;
+
+ close(sk);
+
+ return *memory_allocated;
+}
+
+static long tcp_get_memory_allocated(struct test_case *test_case, struct sk_bypass_prot_mem *skel)
+{
+ return get_memory_allocated(test_case,
+ &skel->bss->tcp_activated,
+ &skel->bss->tcp_memory_allocated);
+}
+
+static long udp_get_memory_allocated(struct test_case *test_case, struct sk_bypass_prot_mem *skel)
+{
+ return get_memory_allocated(test_case,
+ &skel->bss->udp_activated,
+ &skel->bss->udp_memory_allocated);
+}
+
+static int check_bypass(struct test_case *test_case,
+ struct sk_bypass_prot_mem *skel, bool bypass)
+{
+ char buf[BUF_SINGLE] = {};
+ long memory_allocated[2];
+ int sk[NR_SOCKETS];
+ int err, i, j;
+
+ for (i = 0; i < ARRAY_SIZE(sk); i++)
+ sk[i] = -1;
+
+ err = test_case->create_sockets(test_case, sk, ARRAY_SIZE(sk));
+ if (err)
+ goto close;
+
+ memory_allocated[0] = test_case->get_memory_allocated(test_case, skel);
+
+ /* allocate pages >= NR_PAGES */
+ for (i = 0; i < ARRAY_SIZE(sk); i++) {
+ for (j = 0; j < NR_SEND; j++) {
+ int bytes = send(sk[i], buf, sizeof(buf), 0);
+
+ /* Avoid too noisy logs when something failed. */
+ if (bytes != sizeof(buf)) {
+ ASSERT_EQ(bytes, sizeof(buf), "send");
+ if (bytes < 0) {
+ err = bytes;
+ goto drain;
+ }
+ }
+ }
+ }
+
+ memory_allocated[1] = test_case->get_memory_allocated(test_case, skel);
+
+ if (bypass)
+ ASSERT_LE(memory_allocated[1], memory_allocated[0] + 10, "bypass");
+ else
+ ASSERT_GT(memory_allocated[1], memory_allocated[0] + NR_PAGES, "no bypass");
+
+drain:
+ if (test_case->type == SOCK_DGRAM) {
+ /* UDP starts purging sk->sk_receive_queue after one RCU
+ * grace period, then udp_memory_allocated goes down,
+ * so drain the queue before close().
+ */
+ for (i = 0; i < ARRAY_SIZE(sk); i++) {
+ for (j = 0; j < NR_SEND; j++) {
+ int bytes = recv(sk[i], buf, 1, MSG_DONTWAIT | MSG_TRUNC);
+
+ if (bytes == sizeof(buf))
+ continue;
+ if (bytes != -1 || errno != EAGAIN)
+ PRINT_FAIL("bytes: %d, errno: %s\n", bytes, strerror(errno));
+ break;
+ }
+ }
+ }
+
+close:
+ for (i = 0; i < ARRAY_SIZE(sk); i++) {
+ if (sk[i] < 0)
+ break;
+
+ close(sk[i]);
+ }
+
+ return err;
+}
+
+static void run_test(struct test_case *test_case)
+{
+ struct sk_bypass_prot_mem *skel;
+ struct nstoken *nstoken;
+ int cgroup, err;
+
+ skel = sk_bypass_prot_mem__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "open_and_load"))
+ return;
+
+ skel->bss->nr_cpus = libbpf_num_possible_cpus();
+
+ err = sk_bypass_prot_mem__attach(skel);
+ if (!ASSERT_OK(err, "attach"))
+ goto destroy_skel;
+
+ cgroup = test__join_cgroup("/sk_bypass_prot_mem");
+ if (!ASSERT_GE(cgroup, 0, "join_cgroup"))
+ goto destroy_skel;
+
+ err = make_netns("sk_bypass_prot_mem");
+ if (!ASSERT_EQ(err, 0, "make_netns"))
+ goto close_cgroup;
+
+ nstoken = open_netns("sk_bypass_prot_mem");
+ if (!ASSERT_OK_PTR(nstoken, "open_netns"))
+ goto remove_netns;
+
+ err = check_bypass(test_case, skel, false);
+ if (!ASSERT_EQ(err, 0, "test_bypass(false)"))
+ goto close_netns;
+
+ err = write_sysctl("/proc/sys/net/core/bypass_prot_mem", "1");
+ if (!ASSERT_EQ(err, 0, "write_sysctl(1)"))
+ goto close_netns;
+
+ err = check_bypass(test_case, skel, true);
+ if (!ASSERT_EQ(err, 0, "test_bypass(true by sysctl)"))
+ goto close_netns;
+
+ err = write_sysctl("/proc/sys/net/core/bypass_prot_mem", "0");
+ if (!ASSERT_EQ(err, 0, "write_sysctl(0)"))
+ goto close_netns;
+
+ skel->links.sock_create = bpf_program__attach_cgroup(skel->progs.sock_create, cgroup);
+ if (!ASSERT_OK_PTR(skel->links.sock_create, "attach_cgroup(sock_create)"))
+ goto close_netns;
+
+ err = check_bypass(test_case, skel, true);
+ ASSERT_EQ(err, 0, "test_bypass(true by bpf)");
+
+close_netns:
+ close_netns(nstoken);
+remove_netns:
+ remove_netns("sk_bypass_prot_mem");
+close_cgroup:
+ close(cgroup);
+destroy_skel:
+ sk_bypass_prot_mem__destroy(skel);
+}
+
+static struct test_case test_cases[] = {
+ {
+ .name = "TCP ",
+ .family = AF_INET,
+ .type = SOCK_STREAM,
+ .create_sockets = tcp_create_sockets,
+ .get_memory_allocated = tcp_get_memory_allocated,
+ },
+ {
+ .name = "UDP ",
+ .family = AF_INET,
+ .type = SOCK_DGRAM,
+ .create_sockets = udp_create_sockets,
+ .get_memory_allocated = udp_get_memory_allocated,
+ },
+ {
+ .name = "TCPv6",
+ .family = AF_INET6,
+ .type = SOCK_STREAM,
+ .create_sockets = tcp_create_sockets,
+ .get_memory_allocated = tcp_get_memory_allocated,
+ },
+ {
+ .name = "UDPv6",
+ .family = AF_INET6,
+ .type = SOCK_DGRAM,
+ .create_sockets = udp_create_sockets,
+ .get_memory_allocated = udp_get_memory_allocated,
+ },
+};
+
+void serial_test_sk_bypass_prot_mem(void)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(test_cases); i++) {
+ if (test__start_subtest(test_cases[i].name))
+ run_test(&test_cases[i]);
+ }
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/test_bpf_smc.c b/tools/testing/selftests/bpf/prog_tests/test_bpf_smc.c
new file mode 100644
index 000000000000..de22734abc4d
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/test_bpf_smc.c
@@ -0,0 +1,390 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <test_progs.h>
+#include <linux/genetlink.h>
+#include "network_helpers.h"
+#include "bpf_smc.skel.h"
+
+#ifndef IPPROTO_SMC
+#define IPPROTO_SMC 256
+#endif
+
+#define CLIENT_IP "127.0.0.1"
+#define SERVER_IP "127.0.1.0"
+#define SERVER_IP_VIA_RISK_PATH "127.0.2.0"
+
+#define SERVICE_1 80
+#define SERVICE_2 443
+#define SERVICE_3 8443
+
+#define TEST_NS "bpf_smc_netns"
+
+static struct netns_obj *test_netns;
+
+struct smc_policy_ip_key {
+ __u32 sip;
+ __u32 dip;
+};
+
+struct smc_policy_ip_value {
+ __u8 mode;
+};
+
+#if defined(__s390x__)
+/* s390x has default seid */
+static bool setup_ueid(void) { return true; }
+static void cleanup_ueid(void) {}
+#else
+enum {
+ SMC_NETLINK_ADD_UEID = 10,
+ SMC_NETLINK_REMOVE_UEID
+};
+
+enum {
+ SMC_NLA_EID_TABLE_UNSPEC,
+ SMC_NLA_EID_TABLE_ENTRY, /* string */
+};
+
+struct msgtemplate {
+ struct nlmsghdr n;
+ struct genlmsghdr g;
+ char buf[1024];
+};
+
+#define GENLMSG_DATA(glh) ((void *)(NLMSG_DATA(glh) + GENL_HDRLEN))
+#define GENLMSG_PAYLOAD(glh) (NLMSG_PAYLOAD(glh, 0) - GENL_HDRLEN)
+#define NLA_DATA(na) ((void *)((char *)(na) + NLA_HDRLEN))
+#define NLA_PAYLOAD(len) ((len) - NLA_HDRLEN)
+
+#define SMC_GENL_FAMILY_NAME "SMC_GEN_NETLINK"
+#define SMC_BPFTEST_UEID "SMC-BPFTEST-UEID"
+
+static uint16_t smc_nl_family_id = -1;
+
+static int send_cmd(int fd, __u16 nlmsg_type, __u32 nlmsg_pid,
+ __u16 nlmsg_flags, __u8 genl_cmd, __u16 nla_type,
+ void *nla_data, int nla_len)
+{
+ struct nlattr *na;
+ struct sockaddr_nl nladdr;
+ int r, buflen;
+ char *buf;
+
+ struct msgtemplate msg = {0};
+
+ msg.n.nlmsg_len = NLMSG_LENGTH(GENL_HDRLEN);
+ msg.n.nlmsg_type = nlmsg_type;
+ msg.n.nlmsg_flags = nlmsg_flags;
+ msg.n.nlmsg_seq = 0;
+ msg.n.nlmsg_pid = nlmsg_pid;
+ msg.g.cmd = genl_cmd;
+ msg.g.version = 1;
+ na = (struct nlattr *)GENLMSG_DATA(&msg);
+ na->nla_type = nla_type;
+ na->nla_len = nla_len + 1 + NLA_HDRLEN;
+ memcpy(NLA_DATA(na), nla_data, nla_len);
+ msg.n.nlmsg_len += NLMSG_ALIGN(na->nla_len);
+
+ buf = (char *)&msg;
+ buflen = msg.n.nlmsg_len;
+ memset(&nladdr, 0, sizeof(nladdr));
+ nladdr.nl_family = AF_NETLINK;
+
+ while ((r = sendto(fd, buf, buflen, 0, (struct sockaddr *)&nladdr,
+ sizeof(nladdr))) < buflen) {
+ if (r > 0) {
+ buf += r;
+ buflen -= r;
+ } else if (errno != EAGAIN) {
+ return -1;
+ }
+ }
+ return 0;
+}
+
+static bool get_smc_nl_family_id(void)
+{
+ struct sockaddr_nl nl_src;
+ struct msgtemplate msg;
+ struct nlattr *nl;
+ int fd, ret;
+ pid_t pid;
+
+ fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_GENERIC);
+ if (!ASSERT_OK_FD(fd, "nl_family socket"))
+ return false;
+
+ pid = getpid();
+
+ memset(&nl_src, 0, sizeof(nl_src));
+ nl_src.nl_family = AF_NETLINK;
+ nl_src.nl_pid = pid;
+
+ ret = bind(fd, (struct sockaddr *)&nl_src, sizeof(nl_src));
+ if (!ASSERT_OK(ret, "nl_family bind"))
+ goto fail;
+
+ ret = send_cmd(fd, GENL_ID_CTRL, pid,
+ NLM_F_REQUEST, CTRL_CMD_GETFAMILY,
+ CTRL_ATTR_FAMILY_NAME, (void *)SMC_GENL_FAMILY_NAME,
+ strlen(SMC_GENL_FAMILY_NAME));
+ if (!ASSERT_OK(ret, "nl_family query"))
+ goto fail;
+
+ ret = recv(fd, &msg, sizeof(msg), 0);
+ if (!ASSERT_FALSE(msg.n.nlmsg_type == NLMSG_ERROR || ret < 0 ||
+ !NLMSG_OK(&msg.n, ret), "nl_family response"))
+ goto fail;
+
+ nl = (struct nlattr *)GENLMSG_DATA(&msg);
+ nl = (struct nlattr *)((char *)nl + NLA_ALIGN(nl->nla_len));
+ if (!ASSERT_EQ(nl->nla_type, CTRL_ATTR_FAMILY_ID, "nl_family nla type"))
+ goto fail;
+
+ smc_nl_family_id = *(uint16_t *)NLA_DATA(nl);
+ close(fd);
+ return true;
+fail:
+ close(fd);
+ return false;
+}
+
+static bool smc_ueid(int op)
+{
+ struct sockaddr_nl nl_src;
+ struct msgtemplate msg;
+ struct nlmsgerr *err;
+ char test_ueid[32];
+ int fd, ret;
+ pid_t pid;
+
+ /* UEID required */
+ memset(test_ueid, '\x20', sizeof(test_ueid));
+ memcpy(test_ueid, SMC_BPFTEST_UEID, strlen(SMC_BPFTEST_UEID));
+ fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_GENERIC);
+ if (!ASSERT_OK_FD(fd, "ueid socket"))
+ return false;
+
+ pid = getpid();
+ memset(&nl_src, 0, sizeof(nl_src));
+ nl_src.nl_family = AF_NETLINK;
+ nl_src.nl_pid = pid;
+
+ ret = bind(fd, (struct sockaddr *)&nl_src, sizeof(nl_src));
+ if (!ASSERT_OK(ret, "ueid bind"))
+ goto fail;
+
+ ret = send_cmd(fd, smc_nl_family_id, pid,
+ NLM_F_REQUEST | NLM_F_ACK, op, SMC_NLA_EID_TABLE_ENTRY,
+ (void *)test_ueid, sizeof(test_ueid));
+ if (!ASSERT_OK(ret, "ueid cmd"))
+ goto fail;
+
+ ret = recv(fd, &msg, sizeof(msg), 0);
+ if (!ASSERT_FALSE(ret < 0 ||
+ !NLMSG_OK(&msg.n, ret), "ueid response"))
+ goto fail;
+
+ if (msg.n.nlmsg_type == NLMSG_ERROR) {
+ err = NLMSG_DATA(&msg);
+ switch (op) {
+ case SMC_NETLINK_REMOVE_UEID:
+ if (!ASSERT_FALSE((err->error && err->error != -ENOENT),
+ "ueid remove"))
+ goto fail;
+ break;
+ case SMC_NETLINK_ADD_UEID:
+ if (!ASSERT_OK(err->error, "ueid add"))
+ goto fail;
+ break;
+ default:
+ break;
+ }
+ }
+ close(fd);
+ return true;
+fail:
+ close(fd);
+ return false;
+}
+
+static bool setup_ueid(void)
+{
+ /* get smc nl id */
+ if (!get_smc_nl_family_id())
+ return false;
+ /* clear old ueid for bpftest */
+ smc_ueid(SMC_NETLINK_REMOVE_UEID);
+ /* smc-loopback required ueid */
+ return smc_ueid(SMC_NETLINK_ADD_UEID);
+}
+
+static void cleanup_ueid(void)
+{
+ smc_ueid(SMC_NETLINK_REMOVE_UEID);
+}
+#endif /* __s390x__ */
+
+static bool setup_netns(void)
+{
+ test_netns = netns_new(TEST_NS, true);
+ if (!ASSERT_OK_PTR(test_netns, "open net namespace"))
+ goto fail_netns;
+
+ SYS(fail_ip, "ip addr add 127.0.1.0/8 dev lo");
+ SYS(fail_ip, "ip addr add 127.0.2.0/8 dev lo");
+
+ return true;
+fail_ip:
+ netns_free(test_netns);
+fail_netns:
+ return false;
+}
+
+static void cleanup_netns(void)
+{
+ netns_free(test_netns);
+}
+
+static bool setup_smc(void)
+{
+ if (!setup_ueid())
+ return false;
+
+ if (!setup_netns())
+ goto fail_netns;
+
+ return true;
+fail_netns:
+ cleanup_ueid();
+ return false;
+}
+
+static int set_client_addr_cb(int fd, void *opts)
+{
+ const char *src = (const char *)opts;
+ struct sockaddr_in localaddr;
+
+ localaddr.sin_family = AF_INET;
+ localaddr.sin_port = htons(0);
+ localaddr.sin_addr.s_addr = inet_addr(src);
+ return !ASSERT_OK(bind(fd, &localaddr, sizeof(localaddr)), "client bind");
+}
+
+static void run_link(const char *src, const char *dst, int port)
+{
+ struct network_helper_opts opts = {0};
+ int server, client;
+
+ server = start_server_str(AF_INET, SOCK_STREAM, dst, port, NULL);
+ if (!ASSERT_OK_FD(server, "start service_1"))
+ return;
+
+ opts.proto = IPPROTO_TCP;
+ opts.post_socket_cb = set_client_addr_cb;
+ opts.cb_opts = (void *)src;
+
+ client = connect_to_fd_opts(server, &opts);
+ if (!ASSERT_OK_FD(client, "start connect"))
+ goto fail_client;
+
+ close(client);
+fail_client:
+ close(server);
+}
+
+static void block_link(int map_fd, const char *src, const char *dst)
+{
+ struct smc_policy_ip_value val = { .mode = /* block */ 0 };
+ struct smc_policy_ip_key key = {
+ .sip = inet_addr(src),
+ .dip = inet_addr(dst),
+ };
+
+ bpf_map_update_elem(map_fd, &key, &val, BPF_ANY);
+}
+
+/*
+ * This test describes a real-life service topology as follows:
+ *
+ * +-------------> service_1
+ * link 1 | |
+ * +--------------------> server | link 2
+ * | | V
+ * | +-------------> service_2
+ * | link 3
+ * client -------------------> server_via_unsafe_path -> service_3
+ *
+ * Among them,
+ * 1. link-1 is very suitable for using SMC.
+ * 2. link-2 is not suitable for using SMC, because the mode of this link is
+ * kind of short-link services.
+ * 3. link-3 is also not suitable for using SMC, because the RDMA link is
+ * unavailable and needs to go through a long timeout before it can fallback
+ * to TCP.
+ * To achieve this goal, we use a customized SMC ip strategy via smc_hs_ctrl.
+ */
+static void test_topo(void)
+{
+ struct bpf_smc *skel;
+ int rc, map_fd;
+
+ skel = bpf_smc__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "bpf_smc__open_and_load"))
+ return;
+
+ rc = bpf_smc__attach(skel);
+ if (!ASSERT_OK(rc, "bpf_smc__attach"))
+ goto fail;
+
+ map_fd = bpf_map__fd(skel->maps.smc_policy_ip);
+ if (!ASSERT_OK_FD(map_fd, "bpf_map__fd"))
+ goto fail;
+
+ /* Mock the process of transparent replacement, since we will modify
+ * protocol to ipproto_smc accropding to it via
+ * fmod_ret/update_socket_protocol.
+ */
+ write_sysctl("/proc/sys/net/smc/hs_ctrl", "linkcheck");
+
+ /* Configure ip strat */
+ block_link(map_fd, CLIENT_IP, SERVER_IP_VIA_RISK_PATH);
+ block_link(map_fd, SERVER_IP, SERVER_IP);
+
+ /* should go with smc */
+ run_link(CLIENT_IP, SERVER_IP, SERVICE_1);
+ /* should go with smc fallback */
+ run_link(SERVER_IP, SERVER_IP, SERVICE_2);
+
+ ASSERT_EQ(skel->bss->smc_cnt, 2, "smc count");
+ ASSERT_EQ(skel->bss->fallback_cnt, 1, "fallback count");
+
+ /* should go with smc */
+ run_link(CLIENT_IP, SERVER_IP, SERVICE_2);
+
+ ASSERT_EQ(skel->bss->smc_cnt, 3, "smc count");
+ ASSERT_EQ(skel->bss->fallback_cnt, 1, "fallback count");
+
+ /* should go with smc fallback */
+ run_link(CLIENT_IP, SERVER_IP_VIA_RISK_PATH, SERVICE_3);
+
+ ASSERT_EQ(skel->bss->smc_cnt, 4, "smc count");
+ ASSERT_EQ(skel->bss->fallback_cnt, 2, "fallback count");
+
+fail:
+ bpf_smc__destroy(skel);
+}
+
+void test_bpf_smc(void)
+{
+ if (!setup_smc()) {
+ printf("setup for smc test failed, test SKIP:\n");
+ test__skip();
+ return;
+ }
+
+ if (test__start_subtest("topo"))
+ test_topo();
+
+ cleanup_ueid();
+ cleanup_netns();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c b/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c
index 178292d1251a..ee94c281888a 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c
@@ -124,10 +124,10 @@ static int send_test_packet(int ifindex)
int n, sock = -1;
__u8 packet[sizeof(struct ethhdr) + TEST_PAYLOAD_LEN];
- /* The ethernet header is not relevant for this test and doesn't need to
- * be meaningful.
- */
- struct ethhdr eth = { 0 };
+ /* We use the Ethernet header only to identify the test packet */
+ struct ethhdr eth = {
+ .h_source = { 0x12, 0x34, 0xDE, 0xAD, 0xBE, 0xEF },
+ };
memcpy(packet, &eth, sizeof(eth));
memcpy(packet + sizeof(eth), test_payload, TEST_PAYLOAD_LEN);
@@ -160,8 +160,16 @@ static int write_test_packet(int tap_fd)
__u8 packet[sizeof(struct ethhdr) + TEST_PAYLOAD_LEN];
int n;
- /* The ethernet header doesn't need to be valid for this test */
- memset(packet, 0, sizeof(struct ethhdr));
+ /* The Ethernet header is mostly not relevant. We use it to identify the
+ * test packet and some BPF helpers we exercise expect to operate on
+ * Ethernet frames carrying IP packets. Pretend that's the case.
+ */
+ struct ethhdr eth = {
+ .h_source = { 0x12, 0x34, 0xDE, 0xAD, 0xBE, 0xEF },
+ .h_proto = htons(ETH_P_IP),
+ };
+
+ memcpy(packet, &eth, sizeof(eth));
memcpy(packet + sizeof(struct ethhdr), test_payload, TEST_PAYLOAD_LEN);
n = write(tap_fd, packet, sizeof(packet));
@@ -171,31 +179,19 @@ static int write_test_packet(int tap_fd)
return 0;
}
-static void assert_test_result(const struct bpf_map *result_map)
-{
- int err;
- __u32 map_key = 0;
- __u8 map_value[TEST_PAYLOAD_LEN];
-
- err = bpf_map__lookup_elem(result_map, &map_key, sizeof(map_key),
- &map_value, TEST_PAYLOAD_LEN, BPF_ANY);
- if (!ASSERT_OK(err, "lookup test_result"))
- return;
-
- ASSERT_MEMEQ(&map_value, &test_payload, TEST_PAYLOAD_LEN,
- "test_result map contains test payload");
-}
-
-static bool clear_test_result(struct bpf_map *result_map)
+static void dump_err_stream(const struct bpf_program *prog)
{
- const __u8 v[sizeof(test_payload)] = {};
- const __u32 k = 0;
- int err;
-
- err = bpf_map__update_elem(result_map, &k, sizeof(k), v, sizeof(v), BPF_ANY);
- ASSERT_OK(err, "update test_result");
+ char buf[512];
+ int ret;
- return err == 0;
+ ret = 0;
+ do {
+ ret = bpf_prog_stream_read(bpf_program__fd(prog),
+ BPF_STREAM_STDERR, buf, sizeof(buf),
+ NULL);
+ if (ret > 0)
+ fwrite(buf, sizeof(buf[0]), ret, stderr);
+ } while (ret > 0);
}
void test_xdp_context_veth(void)
@@ -270,11 +266,14 @@ void test_xdp_context_veth(void)
if (!ASSERT_GE(tx_ifindex, 0, "if_nametoindex tx"))
goto close;
+ skel->bss->test_pass = false;
+
ret = send_test_packet(tx_ifindex);
if (!ASSERT_OK(ret, "send_test_packet"))
goto close;
- assert_test_result(skel->maps.test_result);
+ if (!ASSERT_TRUE(skel->bss->test_pass, "test_pass"))
+ dump_err_stream(tc_prog);
close:
close_netns(nstoken);
@@ -286,7 +285,7 @@ close:
static void test_tuntap(struct bpf_program *xdp_prog,
struct bpf_program *tc_prio_1_prog,
struct bpf_program *tc_prio_2_prog,
- struct bpf_map *result_map)
+ bool *test_pass)
{
LIBBPF_OPTS(bpf_tc_hook, tc_hook, .attach_point = BPF_TC_INGRESS);
LIBBPF_OPTS(bpf_tc_opts, tc_opts, .handle = 1, .priority = 1);
@@ -295,8 +294,7 @@ static void test_tuntap(struct bpf_program *xdp_prog,
int tap_ifindex;
int ret;
- if (!clear_test_result(result_map))
- return;
+ *test_pass = false;
ns = netns_new(TAP_NETNS, true);
if (!ASSERT_OK_PTR(ns, "create and open ns"))
@@ -340,7 +338,8 @@ static void test_tuntap(struct bpf_program *xdp_prog,
if (!ASSERT_OK(ret, "write_test_packet"))
goto close;
- assert_test_result(result_map);
+ if (!ASSERT_TRUE(*test_pass, "test_pass"))
+ dump_err_stream(tc_prio_2_prog ? : tc_prio_1_prog);
close:
if (tap_fd >= 0)
@@ -411,7 +410,8 @@ static void test_tuntap_mirred(struct bpf_program *xdp_prog,
if (!ASSERT_OK(ret, "write_test_packet"))
goto close;
- ASSERT_TRUE(*test_pass, "test_pass");
+ if (!ASSERT_TRUE(*test_pass, "test_pass"))
+ dump_err_stream(tc_prog);
close:
if (tap_fd >= 0)
@@ -431,61 +431,82 @@ void test_xdp_context_tuntap(void)
test_tuntap(skel->progs.ing_xdp,
skel->progs.ing_cls,
NULL, /* tc prio 2 */
- skel->maps.test_result);
+ &skel->bss->test_pass);
if (test__start_subtest("dynptr_read"))
test_tuntap(skel->progs.ing_xdp,
skel->progs.ing_cls_dynptr_read,
NULL, /* tc prio 2 */
- skel->maps.test_result);
+ &skel->bss->test_pass);
if (test__start_subtest("dynptr_slice"))
test_tuntap(skel->progs.ing_xdp,
skel->progs.ing_cls_dynptr_slice,
NULL, /* tc prio 2 */
- skel->maps.test_result);
+ &skel->bss->test_pass);
if (test__start_subtest("dynptr_write"))
test_tuntap(skel->progs.ing_xdp_zalloc_meta,
skel->progs.ing_cls_dynptr_write,
skel->progs.ing_cls_dynptr_read,
- skel->maps.test_result);
+ &skel->bss->test_pass);
if (test__start_subtest("dynptr_slice_rdwr"))
test_tuntap(skel->progs.ing_xdp_zalloc_meta,
skel->progs.ing_cls_dynptr_slice_rdwr,
skel->progs.ing_cls_dynptr_slice,
- skel->maps.test_result);
+ &skel->bss->test_pass);
if (test__start_subtest("dynptr_offset"))
test_tuntap(skel->progs.ing_xdp_zalloc_meta,
skel->progs.ing_cls_dynptr_offset_wr,
skel->progs.ing_cls_dynptr_offset_rd,
- skel->maps.test_result);
+ &skel->bss->test_pass);
if (test__start_subtest("dynptr_offset_oob"))
test_tuntap(skel->progs.ing_xdp,
skel->progs.ing_cls_dynptr_offset_oob,
skel->progs.ing_cls,
- skel->maps.test_result);
- if (test__start_subtest("clone_data_meta_empty_on_data_write"))
+ &skel->bss->test_pass);
+ if (test__start_subtest("clone_data_meta_survives_data_write"))
test_tuntap_mirred(skel->progs.ing_xdp,
- skel->progs.clone_data_meta_empty_on_data_write,
+ skel->progs.clone_data_meta_survives_data_write,
&skel->bss->test_pass);
- if (test__start_subtest("clone_data_meta_empty_on_meta_write"))
+ if (test__start_subtest("clone_data_meta_survives_meta_write"))
test_tuntap_mirred(skel->progs.ing_xdp,
- skel->progs.clone_data_meta_empty_on_meta_write,
+ skel->progs.clone_data_meta_survives_meta_write,
&skel->bss->test_pass);
- if (test__start_subtest("clone_dynptr_empty_on_data_slice_write"))
+ if (test__start_subtest("clone_meta_dynptr_survives_data_slice_write"))
test_tuntap_mirred(skel->progs.ing_xdp,
- skel->progs.clone_dynptr_empty_on_data_slice_write,
+ skel->progs.clone_meta_dynptr_survives_data_slice_write,
&skel->bss->test_pass);
- if (test__start_subtest("clone_dynptr_empty_on_meta_slice_write"))
+ if (test__start_subtest("clone_meta_dynptr_survives_meta_slice_write"))
test_tuntap_mirred(skel->progs.ing_xdp,
- skel->progs.clone_dynptr_empty_on_meta_slice_write,
+ skel->progs.clone_meta_dynptr_survives_meta_slice_write,
&skel->bss->test_pass);
- if (test__start_subtest("clone_dynptr_rdonly_before_data_dynptr_write"))
+ if (test__start_subtest("clone_meta_dynptr_rw_before_data_dynptr_write"))
test_tuntap_mirred(skel->progs.ing_xdp,
- skel->progs.clone_dynptr_rdonly_before_data_dynptr_write,
+ skel->progs.clone_meta_dynptr_rw_before_data_dynptr_write,
&skel->bss->test_pass);
- if (test__start_subtest("clone_dynptr_rdonly_before_meta_dynptr_write"))
+ if (test__start_subtest("clone_meta_dynptr_rw_before_meta_dynptr_write"))
test_tuntap_mirred(skel->progs.ing_xdp,
- skel->progs.clone_dynptr_rdonly_before_meta_dynptr_write,
+ skel->progs.clone_meta_dynptr_rw_before_meta_dynptr_write,
&skel->bss->test_pass);
+ /* Tests for BPF helpers which touch headroom */
+ if (test__start_subtest("helper_skb_vlan_push_pop"))
+ test_tuntap(skel->progs.ing_xdp,
+ skel->progs.helper_skb_vlan_push_pop,
+ NULL, /* tc prio 2 */
+ &skel->bss->test_pass);
+ if (test__start_subtest("helper_skb_adjust_room"))
+ test_tuntap(skel->progs.ing_xdp,
+ skel->progs.helper_skb_adjust_room,
+ NULL, /* tc prio 2 */
+ &skel->bss->test_pass);
+ if (test__start_subtest("helper_skb_change_head_tail"))
+ test_tuntap(skel->progs.ing_xdp,
+ skel->progs.helper_skb_change_head_tail,
+ NULL, /* tc prio 2 */
+ &skel->bss->test_pass);
+ if (test__start_subtest("helper_skb_change_proto"))
+ test_tuntap(skel->progs.ing_xdp,
+ skel->progs.helper_skb_change_proto,
+ NULL, /* tc prio 2 */
+ &skel->bss->test_pass);
test_xdp_meta__destroy(skel);
}
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_tcp4.c b/tools/testing/selftests/bpf/progs/bpf_iter_tcp4.c
index 164640db3a29..b1e509b231cd 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_tcp4.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_tcp4.c
@@ -99,13 +99,13 @@ static int dump_tcp_sock(struct seq_file *seq, struct tcp_sock *tp,
icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT ||
icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
timer_active = 1;
- timer_expires = icsk->icsk_retransmit_timer.expires;
+ timer_expires = sp->tcp_retransmit_timer.expires;
} else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
timer_active = 4;
- timer_expires = icsk->icsk_retransmit_timer.expires;
- } else if (timer_pending(&sp->sk_timer)) {
+ timer_expires = sp->tcp_retransmit_timer.expires;
+ } else if (timer_pending(&icsk->icsk_keepalive_timer)) {
timer_active = 2;
- timer_expires = sp->sk_timer.expires;
+ timer_expires = icsk->icsk_keepalive_timer.expires;
} else {
timer_active = 0;
timer_expires = bpf_jiffies64();
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_tcp6.c b/tools/testing/selftests/bpf/progs/bpf_iter_tcp6.c
index 591c703f5032..dbc7166aee91 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_tcp6.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_tcp6.c
@@ -99,13 +99,13 @@ static int dump_tcp6_sock(struct seq_file *seq, struct tcp6_sock *tp,
icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT ||
icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
timer_active = 1;
- timer_expires = icsk->icsk_retransmit_timer.expires;
+ timer_expires = sp->tcp_retransmit_timer.expires;
} else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
timer_active = 4;
- timer_expires = icsk->icsk_retransmit_timer.expires;
- } else if (timer_pending(&sp->sk_timer)) {
+ timer_expires = sp->tcp_retransmit_timer.expires;
+ } else if (timer_pending(&icsk->icsk_keepalive_timer)) {
timer_active = 2;
- timer_expires = sp->sk_timer.expires;
+ timer_expires = icsk->icsk_keepalive_timer.expires;
} else {
timer_active = 0;
timer_expires = bpf_jiffies64();
diff --git a/tools/testing/selftests/bpf/progs/bpf_smc.c b/tools/testing/selftests/bpf/progs/bpf_smc.c
new file mode 100644
index 000000000000..70d8b08f5914
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_smc.c
@@ -0,0 +1,117 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "vmlinux.h"
+
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include "bpf_tracing_net.h"
+
+char _license[] SEC("license") = "GPL";
+
+enum {
+ BPF_SMC_LISTEN = 10,
+};
+
+struct smc_sock___local {
+ struct sock sk;
+ struct smc_sock *listen_smc;
+ bool use_fallback;
+} __attribute__((preserve_access_index));
+
+int smc_cnt = 0;
+int fallback_cnt = 0;
+
+SEC("fentry/smc_release")
+int BPF_PROG(bpf_smc_release, struct socket *sock)
+{
+ /* only count from one side (client) */
+ if (sock->sk->__sk_common.skc_state == BPF_SMC_LISTEN)
+ return 0;
+ smc_cnt++;
+ return 0;
+}
+
+SEC("fentry/smc_switch_to_fallback")
+int BPF_PROG(bpf_smc_switch_to_fallback, struct smc_sock___local *smc)
+{
+ /* only count from one side (client) */
+ if (smc && !smc->listen_smc)
+ fallback_cnt++;
+ return 0;
+}
+
+/* go with default value if no strat was found */
+bool default_ip_strat_value = true;
+
+struct smc_policy_ip_key {
+ __u32 sip;
+ __u32 dip;
+};
+
+struct smc_policy_ip_value {
+ __u8 mode;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(key_size, sizeof(struct smc_policy_ip_key));
+ __uint(value_size, sizeof(struct smc_policy_ip_value));
+ __uint(max_entries, 128);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+} smc_policy_ip SEC(".maps");
+
+static bool smc_check(__u32 src, __u32 dst)
+{
+ struct smc_policy_ip_value *value;
+ struct smc_policy_ip_key key = {
+ .sip = src,
+ .dip = dst,
+ };
+
+ value = bpf_map_lookup_elem(&smc_policy_ip, &key);
+ return value ? value->mode : default_ip_strat_value;
+}
+
+SEC("fmod_ret/update_socket_protocol")
+int BPF_PROG(smc_run, int family, int type, int protocol)
+{
+ struct task_struct *task;
+
+ if (family != AF_INET && family != AF_INET6)
+ return protocol;
+
+ if ((type & 0xf) != SOCK_STREAM)
+ return protocol;
+
+ if (protocol != 0 && protocol != IPPROTO_TCP)
+ return protocol;
+
+ task = bpf_get_current_task_btf();
+ /* Prevent from affecting other tests */
+ if (!task || !task->nsproxy->net_ns->smc.hs_ctrl)
+ return protocol;
+
+ return IPPROTO_SMC;
+}
+
+SEC("struct_ops")
+int BPF_PROG(bpf_smc_set_tcp_option_cond, const struct tcp_sock *tp,
+ struct inet_request_sock *ireq)
+{
+ return smc_check(ireq->req.__req_common.skc_daddr,
+ ireq->req.__req_common.skc_rcv_saddr);
+}
+
+SEC("struct_ops")
+int BPF_PROG(bpf_smc_set_tcp_option, struct tcp_sock *tp)
+{
+ return smc_check(tp->inet_conn.icsk_inet.sk.__sk_common.skc_rcv_saddr,
+ tp->inet_conn.icsk_inet.sk.__sk_common.skc_daddr);
+}
+
+SEC(".struct_ops")
+struct smc_hs_ctrl linkcheck = {
+ .name = "linkcheck",
+ .syn_option = (void *)bpf_smc_set_tcp_option,
+ .synack_option = (void *)bpf_smc_set_tcp_option_cond,
+};
diff --git a/tools/testing/selftests/bpf/progs/sk_bypass_prot_mem.c b/tools/testing/selftests/bpf/progs/sk_bypass_prot_mem.c
new file mode 100644
index 000000000000..09a00d11ffcc
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/sk_bypass_prot_mem.c
@@ -0,0 +1,104 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright 2025 Google LLC */
+
+#include "bpf_tracing_net.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <errno.h>
+
+extern int tcp_memory_per_cpu_fw_alloc __ksym;
+extern int udp_memory_per_cpu_fw_alloc __ksym;
+
+int nr_cpus;
+bool tcp_activated, udp_activated;
+long tcp_memory_allocated, udp_memory_allocated;
+
+struct sk_prot {
+ long *memory_allocated;
+ int *memory_per_cpu_fw_alloc;
+};
+
+static int drain_memory_per_cpu_fw_alloc(__u32 i, struct sk_prot *sk_prot_ctx)
+{
+ int *memory_per_cpu_fw_alloc;
+
+ memory_per_cpu_fw_alloc = bpf_per_cpu_ptr(sk_prot_ctx->memory_per_cpu_fw_alloc, i);
+ if (memory_per_cpu_fw_alloc)
+ *sk_prot_ctx->memory_allocated += *memory_per_cpu_fw_alloc;
+
+ return 0;
+}
+
+static long get_memory_allocated(struct sock *_sk, int *memory_per_cpu_fw_alloc)
+{
+ struct sock *sk = bpf_core_cast(_sk, struct sock);
+ struct sk_prot sk_prot_ctx;
+ long memory_allocated;
+
+ /* net_aligned_data.{tcp,udp}_memory_allocated was not available. */
+ memory_allocated = sk->__sk_common.skc_prot->memory_allocated->counter;
+
+ sk_prot_ctx.memory_allocated = &memory_allocated;
+ sk_prot_ctx.memory_per_cpu_fw_alloc = memory_per_cpu_fw_alloc;
+
+ bpf_loop(nr_cpus, drain_memory_per_cpu_fw_alloc, &sk_prot_ctx, 0);
+
+ return memory_allocated;
+}
+
+static void fentry_init_sock(struct sock *sk, bool *activated,
+ long *memory_allocated, int *memory_per_cpu_fw_alloc)
+{
+ if (!*activated)
+ return;
+
+ *memory_allocated = get_memory_allocated(sk, memory_per_cpu_fw_alloc);
+ *activated = false;
+}
+
+SEC("fentry/tcp_init_sock")
+int BPF_PROG(fentry_tcp_init_sock, struct sock *sk)
+{
+ fentry_init_sock(sk, &tcp_activated,
+ &tcp_memory_allocated, &tcp_memory_per_cpu_fw_alloc);
+ return 0;
+}
+
+SEC("fentry/udp_init_sock")
+int BPF_PROG(fentry_udp_init_sock, struct sock *sk)
+{
+ fentry_init_sock(sk, &udp_activated,
+ &udp_memory_allocated, &udp_memory_per_cpu_fw_alloc);
+ return 0;
+}
+
+SEC("cgroup/sock_create")
+int sock_create(struct bpf_sock *ctx)
+{
+ int err, val = 1;
+
+ err = bpf_setsockopt(ctx, SOL_SOCKET, SK_BPF_BYPASS_PROT_MEM,
+ &val, sizeof(val));
+ if (err)
+ goto err;
+
+ val = 0;
+
+ err = bpf_getsockopt(ctx, SOL_SOCKET, SK_BPF_BYPASS_PROT_MEM,
+ &val, sizeof(val));
+ if (err)
+ goto err;
+
+ if (val != 1) {
+ err = -EINVAL;
+ goto err;
+ }
+
+ return 1;
+
+err:
+ bpf_set_retval(err);
+ return 0;
+}
+
+char LICENSE[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_meta.c b/tools/testing/selftests/bpf/progs/test_xdp_meta.c
index d79cb74b571e..0a0f371a2dec 100644
--- a/tools/testing/selftests/bpf/progs/test_xdp_meta.c
+++ b/tools/testing/selftests/bpf/progs/test_xdp_meta.c
@@ -4,6 +4,7 @@
#include <linux/if_ether.h>
#include <linux/pkt_cls.h>
+#include <bpf/bpf_endian.h>
#include <bpf/bpf_helpers.h>
#include "bpf_kfuncs.h"
@@ -11,37 +12,72 @@
#define ctx_ptr(ctx, mem) (void *)(unsigned long)ctx->mem
-/* Demonstrates how metadata can be passed from an XDP program to a TC program
- * using bpf_xdp_adjust_meta.
- * For the sake of testing the metadata support in drivers, the XDP program uses
- * a fixed-size payload after the Ethernet header as metadata. The TC program
- * copies the metadata it receives into a map so it can be checked from
- * userspace.
+/* Demonstrate passing metadata from XDP to TC using bpf_xdp_adjust_meta.
+ *
+ * The XDP program extracts a fixed-size payload following the Ethernet header
+ * and stores it as packet metadata to test the driver's metadata support. The
+ * TC program then verifies if the passed metadata is correct.
*/
-struct {
- __uint(type, BPF_MAP_TYPE_ARRAY);
- __uint(max_entries, 1);
- __type(key, __u32);
- __uint(value_size, META_SIZE);
-} test_result SEC(".maps");
-
bool test_pass;
+static const __u8 smac_want[ETH_ALEN] = {
+ 0x12, 0x34, 0xDE, 0xAD, 0xBE, 0xEF,
+};
+
+static const __u8 meta_want[META_SIZE] = {
+ 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08,
+ 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18,
+ 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,
+ 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38,
+};
+
+static bool check_smac(const struct ethhdr *eth)
+{
+ return !__builtin_memcmp(eth->h_source, smac_want, ETH_ALEN);
+}
+
+static bool check_metadata(const char *file, int line, __u8 *meta_have)
+{
+ if (!__builtin_memcmp(meta_have, meta_want, META_SIZE))
+ return true;
+
+ bpf_stream_printk(BPF_STREAM_STDERR,
+ "FAIL:%s:%d: metadata mismatch\n"
+ " have:\n %pI6\n %pI6\n"
+ " want:\n %pI6\n %pI6\n",
+ file, line,
+ &meta_have[0x00], &meta_have[0x10],
+ &meta_want[0x00], &meta_want[0x10]);
+ return false;
+}
+
+#define check_metadata(meta_have) check_metadata(__FILE__, __LINE__, meta_have)
+
+static bool check_skb_metadata(const char *file, int line, struct __sk_buff *skb)
+{
+ __u8 *data_meta = ctx_ptr(skb, data_meta);
+ __u8 *data = ctx_ptr(skb, data);
+
+ return data_meta + META_SIZE <= data && (check_metadata)(file, line, data_meta);
+}
+
+#define check_skb_metadata(skb) check_skb_metadata(__FILE__, __LINE__, skb)
+
SEC("tc")
int ing_cls(struct __sk_buff *ctx)
{
- __u8 *data, *data_meta;
- __u32 key = 0;
-
- data_meta = ctx_ptr(ctx, data_meta);
- data = ctx_ptr(ctx, data);
+ __u8 *meta_have = ctx_ptr(ctx, data_meta);
+ __u8 *data = ctx_ptr(ctx, data);
- if (data_meta + META_SIZE > data)
- return TC_ACT_SHOT;
+ if (meta_have + META_SIZE > data)
+ goto out;
- bpf_map_update_elem(&test_result, &key, data_meta, BPF_ANY);
+ if (!check_metadata(meta_have))
+ goto out;
+ test_pass = true;
+out:
return TC_ACT_SHOT;
}
@@ -49,17 +85,17 @@ int ing_cls(struct __sk_buff *ctx)
SEC("tc")
int ing_cls_dynptr_read(struct __sk_buff *ctx)
{
+ __u8 meta_have[META_SIZE];
struct bpf_dynptr meta;
- const __u32 zero = 0;
- __u8 *dst;
-
- dst = bpf_map_lookup_elem(&test_result, &zero);
- if (!dst)
- return TC_ACT_SHOT;
bpf_dynptr_from_skb_meta(ctx, 0, &meta);
- bpf_dynptr_read(dst, META_SIZE, &meta, 0, 0);
+ bpf_dynptr_read(meta_have, META_SIZE, &meta, 0, 0);
+ if (!check_metadata(meta_have))
+ goto out;
+
+ test_pass = true;
+out:
return TC_ACT_SHOT;
}
@@ -86,20 +122,18 @@ SEC("tc")
int ing_cls_dynptr_slice(struct __sk_buff *ctx)
{
struct bpf_dynptr meta;
- const __u32 zero = 0;
- __u8 *dst, *src;
-
- dst = bpf_map_lookup_elem(&test_result, &zero);
- if (!dst)
- return TC_ACT_SHOT;
+ __u8 *meta_have;
bpf_dynptr_from_skb_meta(ctx, 0, &meta);
- src = bpf_dynptr_slice(&meta, 0, NULL, META_SIZE);
- if (!src)
- return TC_ACT_SHOT;
+ meta_have = bpf_dynptr_slice(&meta, 0, NULL, META_SIZE);
+ if (!meta_have)
+ goto out;
- __builtin_memcpy(dst, src, META_SIZE);
+ if (!check_metadata(meta_have))
+ goto out;
+ test_pass = true;
+out:
return TC_ACT_SHOT;
}
@@ -129,14 +163,12 @@ int ing_cls_dynptr_slice_rdwr(struct __sk_buff *ctx)
SEC("tc")
int ing_cls_dynptr_offset_rd(struct __sk_buff *ctx)
{
- struct bpf_dynptr meta;
const __u32 chunk_len = META_SIZE / 4;
- const __u32 zero = 0;
+ __u8 meta_have[META_SIZE];
+ struct bpf_dynptr meta;
__u8 *dst, *src;
- dst = bpf_map_lookup_elem(&test_result, &zero);
- if (!dst)
- return TC_ACT_SHOT;
+ dst = meta_have;
/* 1. Regular read */
bpf_dynptr_from_skb_meta(ctx, 0, &meta);
@@ -155,9 +187,14 @@ int ing_cls_dynptr_offset_rd(struct __sk_buff *ctx)
/* 4. Read from a slice starting at an offset */
src = bpf_dynptr_slice(&meta, 2 * chunk_len, NULL, chunk_len);
if (!src)
- return TC_ACT_SHOT;
+ goto out;
__builtin_memcpy(dst, src, chunk_len);
+ if (!check_metadata(meta_have))
+ goto out;
+
+ test_pass = true;
+out:
return TC_ACT_SHOT;
}
@@ -254,7 +291,7 @@ int ing_xdp_zalloc_meta(struct xdp_md *ctx)
/* Drop any non-test packets */
if (eth + 1 > ctx_ptr(ctx, data_end))
return XDP_DROP;
- if (eth->h_proto != 0)
+ if (!check_smac(eth))
return XDP_DROP;
ret = bpf_xdp_adjust_meta(ctx, -META_SIZE);
@@ -294,9 +331,9 @@ int ing_xdp(struct xdp_md *ctx)
/* The Linux networking stack may send other packets on the test
* interface that interfere with the test. Just drop them.
- * The test packets can be recognized by their ethertype of zero.
+ * The test packets can be recognized by their source MAC address.
*/
- if (eth->h_proto != 0)
+ if (!check_smac(eth))
return XDP_DROP;
__builtin_memcpy(data_meta, payload, META_SIZE);
@@ -304,22 +341,25 @@ int ing_xdp(struct xdp_md *ctx)
}
/*
- * Check that skb->data_meta..skb->data is empty if prog writes to packet
- * _payload_ using packet pointers. Applies only to cloned skbs.
+ * Check that, when operating on a cloned packet, skb->data_meta..skb->data is
+ * kept intact if prog writes to packet _payload_ using packet pointers.
*/
SEC("tc")
-int clone_data_meta_empty_on_data_write(struct __sk_buff *ctx)
+int clone_data_meta_survives_data_write(struct __sk_buff *ctx)
{
+ __u8 *meta_have = ctx_ptr(ctx, data_meta);
struct ethhdr *eth = ctx_ptr(ctx, data);
if (eth + 1 > ctx_ptr(ctx, data_end))
goto out;
/* Ignore non-test packets */
- if (eth->h_proto != 0)
+ if (!check_smac(eth))
+ goto out;
+
+ if (meta_have + META_SIZE > eth)
goto out;
- /* Expect no metadata */
- if (ctx->data_meta != ctx->data)
+ if (!check_metadata(meta_have))
goto out;
/* Packet write to trigger unclone in prologue */
@@ -331,40 +371,44 @@ out:
}
/*
- * Check that skb->data_meta..skb->data is empty if prog writes to packet
- * _metadata_ using packet pointers. Applies only to cloned skbs.
+ * Check that, when operating on a cloned packet, skb->data_meta..skb->data is
+ * kept intact if prog writes to packet _metadata_ using packet pointers.
*/
SEC("tc")
-int clone_data_meta_empty_on_meta_write(struct __sk_buff *ctx)
+int clone_data_meta_survives_meta_write(struct __sk_buff *ctx)
{
+ __u8 *meta_have = ctx_ptr(ctx, data_meta);
struct ethhdr *eth = ctx_ptr(ctx, data);
- __u8 *md = ctx_ptr(ctx, data_meta);
if (eth + 1 > ctx_ptr(ctx, data_end))
goto out;
/* Ignore non-test packets */
- if (eth->h_proto != 0)
+ if (!check_smac(eth))
goto out;
- if (md + 1 > ctx_ptr(ctx, data)) {
- /* Expect no metadata */
- test_pass = true;
- } else {
- /* Metadata write to trigger unclone in prologue */
- *md = 42;
- }
+ if (meta_have + META_SIZE > eth)
+ goto out;
+
+ if (!check_metadata(meta_have))
+ goto out;
+
+ /* Metadata write to trigger unclone in prologue */
+ *meta_have = 42;
+
+ test_pass = true;
out:
return TC_ACT_SHOT;
}
/*
- * Check that skb_meta dynptr is writable but empty if prog writes to packet
- * _payload_ using a dynptr slice. Applies only to cloned skbs.
+ * Check that, when operating on a cloned packet, metadata remains intact if
+ * prog creates a r/w slice to packet _payload_.
*/
SEC("tc")
-int clone_dynptr_empty_on_data_slice_write(struct __sk_buff *ctx)
+int clone_meta_dynptr_survives_data_slice_write(struct __sk_buff *ctx)
{
struct bpf_dynptr data, meta;
+ __u8 meta_have[META_SIZE];
struct ethhdr *eth;
bpf_dynptr_from_skb(ctx, 0, &data);
@@ -372,51 +416,45 @@ int clone_dynptr_empty_on_data_slice_write(struct __sk_buff *ctx)
if (!eth)
goto out;
/* Ignore non-test packets */
- if (eth->h_proto != 0)
+ if (!check_smac(eth))
goto out;
- /* Expect no metadata */
bpf_dynptr_from_skb_meta(ctx, 0, &meta);
- if (bpf_dynptr_is_rdonly(&meta) || bpf_dynptr_size(&meta) > 0)
+ bpf_dynptr_read(meta_have, META_SIZE, &meta, 0, 0);
+ if (!check_metadata(meta_have))
goto out;
- /* Packet write to trigger unclone in prologue */
- eth->h_proto = 42;
-
test_pass = true;
out:
return TC_ACT_SHOT;
}
/*
- * Check that skb_meta dynptr is writable but empty if prog writes to packet
- * _metadata_ using a dynptr slice. Applies only to cloned skbs.
+ * Check that, when operating on a cloned packet, metadata remains intact if
+ * prog creates an r/w slice to packet _metadata_.
*/
SEC("tc")
-int clone_dynptr_empty_on_meta_slice_write(struct __sk_buff *ctx)
+int clone_meta_dynptr_survives_meta_slice_write(struct __sk_buff *ctx)
{
struct bpf_dynptr data, meta;
const struct ethhdr *eth;
- __u8 *md;
+ __u8 *meta_have;
bpf_dynptr_from_skb(ctx, 0, &data);
eth = bpf_dynptr_slice(&data, 0, NULL, sizeof(*eth));
if (!eth)
goto out;
/* Ignore non-test packets */
- if (eth->h_proto != 0)
+ if (!check_smac(eth))
goto out;
- /* Expect no metadata */
bpf_dynptr_from_skb_meta(ctx, 0, &meta);
- if (bpf_dynptr_is_rdonly(&meta) || bpf_dynptr_size(&meta) > 0)
+ meta_have = bpf_dynptr_slice_rdwr(&meta, 0, NULL, META_SIZE);
+ if (!meta_have)
goto out;
- /* Metadata write to trigger unclone in prologue */
- bpf_dynptr_from_skb_meta(ctx, 0, &meta);
- md = bpf_dynptr_slice_rdwr(&meta, 0, NULL, sizeof(*md));
- if (md)
- *md = 42;
+ if (!check_metadata(meta_have))
+ goto out;
test_pass = true;
out:
@@ -424,34 +462,40 @@ out:
}
/*
- * Check that skb_meta dynptr is read-only before prog writes to packet payload
- * using dynptr_write helper. Applies only to cloned skbs.
+ * Check that, when operating on a cloned packet, skb_meta dynptr is read-write
+ * before prog writes to packet _payload_ using dynptr_write helper and metadata
+ * remains intact before and after the write.
*/
SEC("tc")
-int clone_dynptr_rdonly_before_data_dynptr_write(struct __sk_buff *ctx)
+int clone_meta_dynptr_rw_before_data_dynptr_write(struct __sk_buff *ctx)
{
struct bpf_dynptr data, meta;
+ __u8 meta_have[META_SIZE];
const struct ethhdr *eth;
+ int err;
bpf_dynptr_from_skb(ctx, 0, &data);
eth = bpf_dynptr_slice(&data, 0, NULL, sizeof(*eth));
if (!eth)
goto out;
/* Ignore non-test packets */
- if (eth->h_proto != 0)
+ if (!check_smac(eth))
goto out;
- /* Expect read-only metadata before unclone */
+ /* Expect read-write metadata before unclone */
bpf_dynptr_from_skb_meta(ctx, 0, &meta);
- if (!bpf_dynptr_is_rdonly(&meta) || bpf_dynptr_size(&meta) != META_SIZE)
+ if (bpf_dynptr_is_rdonly(&meta))
+ goto out;
+
+ err = bpf_dynptr_read(meta_have, META_SIZE, &meta, 0, 0);
+ if (err || !check_metadata(meta_have))
goto out;
/* Helper write to payload will unclone the packet */
bpf_dynptr_write(&data, offsetof(struct ethhdr, h_proto), "x", 1, 0);
- /* Expect no metadata after unclone */
- bpf_dynptr_from_skb_meta(ctx, 0, &meta);
- if (bpf_dynptr_is_rdonly(&meta) || bpf_dynptr_size(&meta) != 0)
+ err = bpf_dynptr_read(meta_have, META_SIZE, &meta, 0, 0);
+ if (err || !check_metadata(meta_have))
goto out;
test_pass = true;
@@ -460,31 +504,165 @@ out:
}
/*
- * Check that skb_meta dynptr is read-only if prog writes to packet
- * metadata using dynptr_write helper. Applies only to cloned skbs.
+ * Check that, when operating on a cloned packet, skb_meta dynptr is read-write
+ * before prog writes to packet _metadata_ using dynptr_write helper and
+ * metadata remains intact before and after the write.
*/
SEC("tc")
-int clone_dynptr_rdonly_before_meta_dynptr_write(struct __sk_buff *ctx)
+int clone_meta_dynptr_rw_before_meta_dynptr_write(struct __sk_buff *ctx)
{
struct bpf_dynptr data, meta;
+ __u8 meta_have[META_SIZE];
const struct ethhdr *eth;
+ int err;
bpf_dynptr_from_skb(ctx, 0, &data);
eth = bpf_dynptr_slice(&data, 0, NULL, sizeof(*eth));
if (!eth)
goto out;
/* Ignore non-test packets */
- if (eth->h_proto != 0)
+ if (!check_smac(eth))
goto out;
- /* Expect read-only metadata */
+ /* Expect read-write metadata before unclone */
bpf_dynptr_from_skb_meta(ctx, 0, &meta);
- if (!bpf_dynptr_is_rdonly(&meta) || bpf_dynptr_size(&meta) != META_SIZE)
+ if (bpf_dynptr_is_rdonly(&meta))
goto out;
- /* Metadata write. Expect failure. */
- bpf_dynptr_from_skb_meta(ctx, 0, &meta);
- if (bpf_dynptr_write(&meta, 0, "x", 1, 0) != -EINVAL)
+ err = bpf_dynptr_read(meta_have, META_SIZE, &meta, 0, 0);
+ if (err || !check_metadata(meta_have))
+ goto out;
+
+ /* Helper write to metadata will unclone the packet */
+ bpf_dynptr_write(&meta, 0, &meta_have[0], 1, 0);
+
+ err = bpf_dynptr_read(meta_have, META_SIZE, &meta, 0, 0);
+ if (err || !check_metadata(meta_have))
+ goto out;
+
+ test_pass = true;
+out:
+ return TC_ACT_SHOT;
+}
+
+SEC("tc")
+int helper_skb_vlan_push_pop(struct __sk_buff *ctx)
+{
+ int err;
+
+ /* bpf_skb_vlan_push assumes HW offload for primary VLAN tag. Only
+ * secondary tag push triggers an actual MAC header modification.
+ */
+ err = bpf_skb_vlan_push(ctx, 0, 42);
+ if (err)
+ goto out;
+ err = bpf_skb_vlan_push(ctx, 0, 207);
+ if (err)
+ goto out;
+
+ if (!check_skb_metadata(ctx))
+ goto out;
+
+ err = bpf_skb_vlan_pop(ctx);
+ if (err)
+ goto out;
+ err = bpf_skb_vlan_pop(ctx);
+ if (err)
+ goto out;
+
+ if (!check_skb_metadata(ctx))
+ goto out;
+
+ test_pass = true;
+out:
+ return TC_ACT_SHOT;
+}
+
+SEC("tc")
+int helper_skb_adjust_room(struct __sk_buff *ctx)
+{
+ int err;
+
+ /* Grow a 1 byte hole after the MAC header */
+ err = bpf_skb_adjust_room(ctx, 1, BPF_ADJ_ROOM_MAC, 0);
+ if (err)
+ goto out;
+
+ if (!check_skb_metadata(ctx))
+ goto out;
+
+ /* Shrink a 1 byte hole after the MAC header */
+ err = bpf_skb_adjust_room(ctx, -1, BPF_ADJ_ROOM_MAC, 0);
+ if (err)
+ goto out;
+
+ if (!check_skb_metadata(ctx))
+ goto out;
+
+ /* Grow a 256 byte hole to trigger head reallocation */
+ err = bpf_skb_adjust_room(ctx, 256, BPF_ADJ_ROOM_MAC, 0);
+ if (err)
+ goto out;
+
+ if (!check_skb_metadata(ctx))
+ goto out;
+
+ test_pass = true;
+out:
+ return TC_ACT_SHOT;
+}
+
+SEC("tc")
+int helper_skb_change_head_tail(struct __sk_buff *ctx)
+{
+ int err;
+
+ /* Reserve 1 extra in the front for packet data */
+ err = bpf_skb_change_head(ctx, 1, 0);
+ if (err)
+ goto out;
+
+ if (!check_skb_metadata(ctx))
+ goto out;
+
+ /* Reserve 256 extra bytes in the front to trigger head reallocation */
+ err = bpf_skb_change_head(ctx, 256, 0);
+ if (err)
+ goto out;
+
+ if (!check_skb_metadata(ctx))
+ goto out;
+
+ /* Reserve 4k extra bytes in the back to trigger head reallocation */
+ err = bpf_skb_change_tail(ctx, ctx->len + 4096, 0);
+ if (err)
+ goto out;
+
+ if (!check_skb_metadata(ctx))
+ goto out;
+
+ test_pass = true;
+out:
+ return TC_ACT_SHOT;
+}
+
+SEC("tc")
+int helper_skb_change_proto(struct __sk_buff *ctx)
+{
+ int err;
+
+ err = bpf_skb_change_proto(ctx, bpf_htons(ETH_P_IPV6), 0);
+ if (err)
+ goto out;
+
+ if (!check_skb_metadata(ctx))
+ goto out;
+
+ err = bpf_skb_change_proto(ctx, bpf_htons(ETH_P_IP), 0);
+ if (err)
+ goto out;
+
+ if (!check_skb_metadata(ctx))
goto out;
test_pass = true;
diff --git a/tools/testing/selftests/bpf/test_kmods/bpf_testmod.c b/tools/testing/selftests/bpf/test_kmods/bpf_testmod.c
index ed0a4721d8fd..1669a7eeda26 100644
--- a/tools/testing/selftests/bpf/test_kmods/bpf_testmod.c
+++ b/tools/testing/selftests/bpf/test_kmods/bpf_testmod.c
@@ -926,7 +926,7 @@ __bpf_kfunc int bpf_kfunc_call_kernel_connect(struct addr_args *args)
goto out;
}
- err = kernel_connect(sock, (struct sockaddr *)&args->addr,
+ err = kernel_connect(sock, (struct sockaddr_unsized *)&args->addr,
args->addrlen, 0);
out:
mutex_unlock(&sock_lock);
@@ -949,7 +949,7 @@ __bpf_kfunc int bpf_kfunc_call_kernel_bind(struct addr_args *args)
goto out;
}
- err = kernel_bind(sock, (struct sockaddr *)&args->addr, args->addrlen);
+ err = kernel_bind(sock, (struct sockaddr_unsized *)&args->addr, args->addrlen);
out:
mutex_unlock(&sock_lock);
diff --git a/tools/testing/selftests/drivers/net/.gitignore b/tools/testing/selftests/drivers/net/.gitignore
index 585ecb4d5dc4..3633c7a3ed65 100644
--- a/tools/testing/selftests/drivers/net/.gitignore
+++ b/tools/testing/selftests/drivers/net/.gitignore
@@ -1,3 +1,4 @@
# SPDX-License-Identifier: GPL-2.0-only
+gro
napi_id_helper
psp_responder
diff --git a/tools/testing/selftests/drivers/net/Makefile b/tools/testing/selftests/drivers/net/Makefile
index 71ee69e524d7..f5c71d993750 100644
--- a/tools/testing/selftests/drivers/net/Makefile
+++ b/tools/testing/selftests/drivers/net/Makefile
@@ -6,10 +6,12 @@ TEST_INCLUDES := $(wildcard lib/py/*.py) \
../../net/lib.sh \
TEST_GEN_FILES := \
+ gro \
napi_id_helper \
# end of TEST_GEN_FILES
TEST_PROGS := \
+ gro.py \
hds.py \
napi_id.py \
napi_threaded.py \
@@ -23,6 +25,7 @@ TEST_PROGS := \
ping.py \
psp.py \
queues.py \
+ ring_reconfig.py \
shaper.py \
stats.py \
xdp.py \
diff --git a/tools/testing/selftests/drivers/net/bonding/bond_macvlan_ipvlan.sh b/tools/testing/selftests/drivers/net/bonding/bond_macvlan_ipvlan.sh
index c4711272fe45..559f300f965a 100755
--- a/tools/testing/selftests/drivers/net/bonding/bond_macvlan_ipvlan.sh
+++ b/tools/testing/selftests/drivers/net/bonding/bond_macvlan_ipvlan.sh
@@ -30,6 +30,7 @@ check_connection()
local message=${3}
RET=0
+ sleep 0.25
ip netns exec ${ns} ping ${target} -c 4 -i 0.1 &>/dev/null
check_err $? "ping failed"
log_test "${bond_mode}/${xvlan_type}_${xvlan_mode}: ${message}"
diff --git a/tools/testing/selftests/net/gro.c b/tools/testing/selftests/drivers/net/gro.c
index cfc39f70635d..995b492f5bcb 100644
--- a/tools/testing/selftests/net/gro.c
+++ b/tools/testing/selftests/drivers/net/gro.c
@@ -57,7 +57,8 @@
#include <string.h>
#include <unistd.h>
-#include "../kselftest.h"
+#include "../../kselftest.h"
+#include "../../net/lib/ksft.h"
#define DPORT 8000
#define SPORT 1500
@@ -1127,6 +1128,8 @@ static void gro_receiver(void)
set_timeout(rxfd);
bind_packetsocket(rxfd);
+ ksft_ready();
+
memset(correct_payload, 0, sizeof(correct_payload));
if (strcmp(testname, "data") == 0) {
diff --git a/tools/testing/selftests/drivers/net/gro.py b/tools/testing/selftests/drivers/net/gro.py
new file mode 100755
index 000000000000..ba83713bf7b5
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/gro.py
@@ -0,0 +1,164 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+"""
+GRO (Generic Receive Offload) conformance tests.
+
+Validates that GRO coalescing works correctly by running the gro
+binary in different configurations and checking for correct packet
+coalescing behavior.
+
+Test cases:
+ - data: Data packets with same size/headers and correct seq numbers coalesce
+ - ack: Pure ACK packets do not coalesce
+ - flags: Packets with PSH, SYN, URG, RST flags do not coalesce
+ - tcp: Packets with incorrect checksum, non-consecutive seqno don't coalesce
+ - ip: Packets with different ECN, TTL, TOS, or IP options don't coalesce
+ - large: Packets larger than GRO_MAX_SIZE don't coalesce
+"""
+
+import os
+from lib.py import ksft_run, ksft_exit, ksft_pr
+from lib.py import NetDrvEpEnv, KsftXfailEx
+from lib.py import cmd, defer, bkg, ip
+from lib.py import ksft_variants
+
+
+def _resolve_dmac(cfg, ipver):
+ """
+ Find the destination MAC address remote host should use to send packets
+ towards the local host. It may be a router / gateway address.
+ """
+
+ attr = "dmac" + ipver
+ # Cache the response across test cases
+ if hasattr(cfg, attr):
+ return getattr(cfg, attr)
+
+ route = ip(f"-{ipver} route get {cfg.addr_v[ipver]}",
+ json=True, host=cfg.remote)[0]
+ gw = route.get("gateway")
+ # Local L2 segment, address directly
+ if not gw:
+ setattr(cfg, attr, cfg.dev['address'])
+ return getattr(cfg, attr)
+
+ # ping to make sure neighbor is resolved,
+ # bind to an interface, for v6 the GW is likely link local
+ cmd(f"ping -c1 -W0 -I{cfg.remote_ifname} {gw}", host=cfg.remote)
+
+ neigh = ip(f"neigh get {gw} dev {cfg.remote_ifname}",
+ json=True, host=cfg.remote)[0]
+ setattr(cfg, attr, neigh['lladdr'])
+ return getattr(cfg, attr)
+
+
+def _write_defer_restore(cfg, path, val, defer_undo=False):
+ with open(path, "r", encoding="utf-8") as fp:
+ orig_val = fp.read().strip()
+ if str(val) == orig_val:
+ return
+ with open(path, "w", encoding="utf-8") as fp:
+ fp.write(val)
+ if defer_undo:
+ defer(_write_defer_restore, cfg, path, orig_val)
+
+
+def _set_mtu_restore(dev, mtu, host):
+ if dev['mtu'] < mtu:
+ ip(f"link set dev {dev['ifname']} mtu {mtu}", host=host)
+ defer(ip, f"link set dev {dev['ifname']} mtu {dev['mtu']}", host=host)
+
+
+def _setup(cfg, test_name):
+ """ Setup hardware loopback mode for GRO testing. """
+
+ if not hasattr(cfg, "bin_remote"):
+ cfg.bin_local = cfg.test_dir / "gro"
+ cfg.bin_remote = cfg.remote.deploy(cfg.bin_local)
+
+ # "large" test needs at least 4k MTU
+ if test_name == "large":
+ _set_mtu_restore(cfg.dev, 4096, None)
+ _set_mtu_restore(cfg.remote_dev, 4096, cfg.remote)
+
+ flush_path = f"/sys/class/net/{cfg.ifname}/gro_flush_timeout"
+ irq_path = f"/sys/class/net/{cfg.ifname}/napi_defer_hard_irqs"
+
+ _write_defer_restore(cfg, flush_path, "200000", defer_undo=True)
+ _write_defer_restore(cfg, irq_path, "10", defer_undo=True)
+
+ try:
+ # Disable TSO for local tests
+ cfg.require_nsim() # will raise KsftXfailEx if not running on nsim
+
+ cmd(f"ethtool -K {cfg.ifname} gro on tso off")
+ cmd(f"ethtool -K {cfg.remote_ifname} gro on tso off", host=cfg.remote)
+ except KsftXfailEx:
+ pass
+
+def _gro_variants():
+ """Generator that yields all combinations of protocol and test types."""
+
+ for protocol in ["ipv4", "ipv6", "ipip"]:
+ for test_name in ["data", "ack", "flags", "tcp", "ip", "large"]:
+ yield protocol, test_name
+
+
+@ksft_variants(_gro_variants())
+def test(cfg, protocol, test_name):
+ """Run a single GRO test with retries."""
+
+ ipver = "6" if protocol[-1] == "6" else "4"
+ cfg.require_ipver(ipver)
+
+ _setup(cfg, test_name)
+
+ base_cmd_args = [
+ f"--{protocol}",
+ f"--dmac {_resolve_dmac(cfg, ipver)}",
+ f"--smac {cfg.remote_dev['address']}",
+ f"--daddr {cfg.addr_v[ipver]}",
+ f"--saddr {cfg.remote_addr_v[ipver]}",
+ f"--test {test_name}",
+ "--verbose"
+ ]
+ base_args = " ".join(base_cmd_args)
+
+ # Each test is run 6 times to deflake, because given the receive timing,
+ # not all packets that should coalesce will be considered in the same flow
+ # on every try.
+ max_retries = 6
+ for attempt in range(max_retries):
+ rx_cmd = f"{cfg.bin_local} {base_args} --rx --iface {cfg.ifname}"
+ tx_cmd = f"{cfg.bin_remote} {base_args} --iface {cfg.remote_ifname}"
+
+ fail_now = attempt >= max_retries - 1
+
+ with bkg(rx_cmd, ksft_ready=True, exit_wait=True,
+ fail=fail_now) as rx_proc:
+ cmd(tx_cmd, host=cfg.remote)
+
+ if rx_proc.ret == 0:
+ return
+
+ ksft_pr(rx_proc.stdout.strip().replace('\n', '\n# '))
+ ksft_pr(rx_proc.stderr.strip().replace('\n', '\n# '))
+
+ if test_name == "large" and os.environ.get("KSFT_MACHINE_SLOW"):
+ ksft_pr(f"Ignoring {protocol}/{test_name} failure due to slow environment")
+ return
+
+ ksft_pr(f"Attempt {attempt + 1}/{max_retries} failed, retrying...")
+
+
+def main() -> None:
+ """ Ksft boiler plate main """
+
+ with NetDrvEpEnv(__file__) as cfg:
+ ksft_run(cases=[test], args=(cfg,))
+ ksft_exit()
+
+
+if __name__ == "__main__":
+ main()
diff --git a/tools/testing/selftests/drivers/net/hw/.gitignore b/tools/testing/selftests/drivers/net/hw/.gitignore
index 6942bf575497..46540468a775 100644
--- a/tools/testing/selftests/drivers/net/hw/.gitignore
+++ b/tools/testing/selftests/drivers/net/hw/.gitignore
@@ -1,3 +1,4 @@
# SPDX-License-Identifier: GPL-2.0-only
iou-zcrx
ncdevmem
+toeplitz
diff --git a/tools/testing/selftests/drivers/net/hw/Makefile b/tools/testing/selftests/drivers/net/hw/Makefile
index 8133d1a0051c..9c163ba6feee 100644
--- a/tools/testing/selftests/drivers/net/hw/Makefile
+++ b/tools/testing/selftests/drivers/net/hw/Makefile
@@ -1,10 +1,26 @@
# SPDX-License-Identifier: GPL-2.0+ OR MIT
-TEST_GEN_FILES = iou-zcrx
+# Check if io_uring supports zero-copy receive
+HAS_IOURING_ZCRX := $(shell \
+ echo -e '#include <liburing.h>\n' \
+ 'void *func = (void *)io_uring_register_ifq;\n' \
+ 'int main() {return 0;}' | \
+ $(CC) -luring -x c - -o /dev/null 2>&1 && echo y)
+
+ifeq ($(HAS_IOURING_ZCRX),y)
+COND_GEN_FILES += iou-zcrx
+else
+$(warning excluding iouring tests, liburing not installed or too old)
+endif
+
+TEST_GEN_FILES := \
+ $(COND_GEN_FILES) \
+# end of TEST_GEN_FILES
TEST_PROGS = \
csum.py \
devlink_port_split.py \
+ devlink_rate_tc_bw.py \
devmem.py \
ethtool.sh \
ethtool_extended_state.sh \
@@ -21,6 +37,7 @@ TEST_PROGS = \
rss_ctx.py \
rss_flow_label.py \
rss_input_xfrm.py \
+ toeplitz.py \
tso.py \
xsk_reconfig.py \
#
@@ -38,7 +55,10 @@ TEST_INCLUDES := \
#
# YNL files, must be before "include ..lib.mk"
-YNL_GEN_FILES := ncdevmem
+YNL_GEN_FILES := \
+ ncdevmem \
+ toeplitz \
+# end of YNL_GEN_FILES
TEST_GEN_FILES += $(YNL_GEN_FILES)
TEST_GEN_FILES += $(patsubst %.c,%.o,$(wildcard *.bpf.c))
@@ -54,4 +74,6 @@ include ../../../net/ynl.mk
include ../../../net/bpf.mk
+ifeq ($(HAS_IOURING_ZCRX),y)
$(OUTPUT)/iou-zcrx: LDLIBS += -luring
+endif
diff --git a/tools/testing/selftests/drivers/net/hw/devlink_rate_tc_bw.py b/tools/testing/selftests/drivers/net/hw/devlink_rate_tc_bw.py
index ead6784d1910..4e4faa9275bb 100755
--- a/tools/testing/selftests/drivers/net/hw/devlink_rate_tc_bw.py
+++ b/tools/testing/selftests/drivers/net/hw/devlink_rate_tc_bw.py
@@ -21,21 +21,21 @@ Test Cases:
----------
1. test_no_tc_mapping_bandwidth:
- Verifies that without TC mapping, bandwidth is NOT distributed according to
- the configured 80/20 split between TC4 and TC3
- - This test should fail if bandwidth matches the 80/20 split without TC
+ the configured 20/80 split between TC3 and TC4
+ - This test should fail if bandwidth matches the 20/80 split without TC
mapping
- - Expected: Bandwidth should NOT be distributed as 80/20
+ - Expected: Bandwidth should NOT be distributed as 20/80
2. test_tc_mapping_bandwidth:
- Configures TC mapping using mqprio qdisc
- Verifies that with TC mapping, bandwidth IS distributed according to the
- configured 80/20 split between TC3 and TC4
- - Expected: Bandwidth should be distributed as 80/20
+ configured 20/80 split between TC3 and TC4
+ - Expected: Bandwidth should be distributed as 20/80
Bandwidth Distribution:
----------------------
-- TC3 (VLAN 101): Configured for 80% of total bandwidth
-- TC4 (VLAN 102): Configured for 20% of total bandwidth
+- TC3 (VLAN 101): Configured for 20% of total bandwidth
+- TC4 (VLAN 102): Configured for 80% of total bandwidth
- Total bandwidth: 1Gbps
- Tolerance: +-12%
@@ -64,43 +64,40 @@ from lib.py import KsftSkipEx, KsftFailEx, KsftXfailEx
from lib.py import NetDrvEpEnv, DevlinkFamily
from lib.py import NlError
from lib.py import cmd, defer, ethtool, ip
+from lib.py import Iperf3Runner
class BandwidthValidator:
"""
- Validates bandwidth totals and per-TC shares against expected values
- with a tolerance.
+ Validates total bandwidth and individual shares with tolerance
+ relative to the overall total.
"""
- def __init__(self):
+ def __init__(self, shares):
self.tolerance_percent = 12
- self.expected_total_gbps = 1.0
- self.total_min_expected = self.min_expected(self.expected_total_gbps)
- self.total_max_expected = self.max_expected(self.expected_total_gbps)
- self.tc_expected_percent = {
- 3: 20.0,
- 4: 80.0,
- }
+ self.expected_total = sum(shares.values())
+ self.bounds = {}
+
+ for name, exp in shares.items():
+ self.bounds[name] = (self.min_expected(exp), self.max_expected(exp))
def min_expected(self, value):
"""Calculates the minimum acceptable value based on tolerance."""
- return value - (value * self.tolerance_percent / 100)
+ return value - (self.expected_total * self.tolerance_percent / 100)
def max_expected(self, value):
"""Calculates the maximum acceptable value based on tolerance."""
- return value + (value * self.tolerance_percent / 100)
-
- def bound(self, expected, value):
- """Returns True if value is within expected tolerance."""
- return self.min_expected(expected) <= value <= self.max_expected(expected)
+ return value + (self.expected_total * self.tolerance_percent / 100)
- def tc_bandwidth_bound(self, value, tc_ix):
+ def bound(self, values):
"""
- Returns True if the given bandwidth value is within tolerance
- for the TC's expected bandwidth.
+ Return True if all given values fall within tolerance.
"""
- expected = self.tc_expected_percent[tc_ix]
- return self.bound(expected, value)
+ for name, value in values.items():
+ low, high = self.bounds[name]
+ if not low <= value <= high:
+ return False
+ return True
def setup_vf(cfg, set_tc_mapping=True):
@@ -116,8 +113,8 @@ def setup_vf(cfg, set_tc_mapping=True):
except Exception as exc:
raise KsftSkipEx(f"Failed to enable switchdev mode on {cfg.pci}") from exc
try:
- cmd(f"echo 1 > /sys/class/net/{cfg.ifname}/device/sriov_numvfs")
- defer(cmd, f"echo 0 > /sys/class/net/{cfg.ifname}/device/sriov_numvfs")
+ cmd(f"echo 1 > /sys/class/net/{cfg.ifname}/device/sriov_numvfs", shell=True)
+ defer(cmd, f"echo 0 > /sys/class/net/{cfg.ifname}/device/sriov_numvfs", shell=True)
except Exception as exc:
raise KsftSkipEx(f"Failed to enable SR-IOV on {cfg.ifname}") from exc
@@ -139,8 +136,8 @@ def setup_vlans_on_vf(vf_ifc):
Sets up two VLAN interfaces on the given VF, each mapped to a different TC.
"""
vlan_configs = [
- {"vlan_id": 101, "tc": 3, "ip": "198.51.100.2"},
- {"vlan_id": 102, "tc": 4, "ip": "198.51.100.10"},
+ {"vlan_id": 101, "tc": 3, "ip": "198.51.100.1"},
+ {"vlan_id": 102, "tc": 4, "ip": "198.51.100.9"},
]
for config in vlan_configs:
@@ -224,13 +221,13 @@ def setup_devlink_rate(cfg):
raise KsftFailEx(f"rate_set failed on VF port {port_index}") from exc
-def setup_remote_server(cfg):
+def setup_remote_vlans(cfg):
"""
- Sets up VLAN interfaces and starts iperf3 servers on the remote side.
+ Sets up VLAN interfaces on the remote side.
"""
remote_dev = cfg.remote_ifname
vlan_ids = [101, 102]
- remote_ips = ["198.51.100.1", "198.51.100.9"]
+ remote_ips = ["198.51.100.2", "198.51.100.10"]
for vlan_id, ip_addr in zip(vlan_ids, remote_ips):
vlan_dev = f"{remote_dev}.{vlan_id}"
@@ -238,14 +235,13 @@ def setup_remote_server(cfg):
f"type vlan id {vlan_id}", host=cfg.remote)
cmd(f"ip addr add {ip_addr}/29 dev {vlan_dev}", host=cfg.remote)
cmd(f"ip link set dev {vlan_dev} up", host=cfg.remote)
- cmd(f"iperf3 -s -1 -B {ip_addr}",background=True, host=cfg.remote)
defer(cmd, f"ip link del {vlan_dev}", host=cfg.remote)
def setup_test_environment(cfg, set_tc_mapping=True):
"""
Sets up the complete test environment including VF creation, VLANs,
- bridge configuration, devlink rate setup, and the remote server.
+ bridge configuration and devlink rate setup.
"""
vf_ifc = setup_vf(cfg, set_tc_mapping)
ksft_pr(f"Created VF interface: {vf_ifc}")
@@ -256,51 +252,39 @@ def setup_test_environment(cfg, set_tc_mapping=True):
setup_bridge(cfg)
setup_devlink_rate(cfg)
- setup_remote_server(cfg)
- time.sleep(2)
+ setup_remote_vlans(cfg)
-def run_iperf_client(server_ip, local_ip, barrier, min_expected_gbps=0.1):
+def measure_bandwidth(cfg, server_ip, client_ip, barrier):
"""
- Runs a single iperf3 client instance, binding to the given local IP.
- Waits on a barrier to synchronize with other threads.
+ Synchronizes with peers and runs an iperf3-based bandwidth measurement
+ between the given endpoints. Returns average Gbps.
"""
+ runner = Iperf3Runner(cfg, server_ip=server_ip, client_ip=client_ip)
try:
barrier.wait(timeout=10)
except Exception as exc:
raise KsftFailEx("iperf3 barrier wait timed") from exc
- iperf_cmd = ["iperf3", "-c", server_ip, "-B", local_ip, "-J"]
- result = subprocess.run(iperf_cmd, capture_output=True, text=True,
- check=True)
-
try:
- output = json.loads(result.stdout)
- bits_per_second = output["end"]["sum_received"]["bits_per_second"]
- gbps = bits_per_second / 1e9
- if gbps < min_expected_gbps:
- ksft_pr(
- f"iperf3 bandwidth too low: {gbps:.2f} Gbps "
- f"(expected ≥ {min_expected_gbps} Gbps)"
- )
- return None
- return gbps
- except json.JSONDecodeError as exc:
- ksft_pr(f"Failed to parse iperf3 JSON output: {exc}")
- return None
+ bw_gbps = runner.measure_bandwidth(reverse=True)
+ except Exception as exc:
+ raise KsftFailEx("iperf3 bandwidth measurement failed") from exc
+
+ return bw_gbps
-def run_bandwidth_test():
+def run_bandwidth_test(cfg):
"""
- Launches iperf3 client threads for each VLAN/TC pair and collects results.
+ Runs parallel bandwidth measurements for each VLAN/TC pair and collects results.
"""
- def _run_iperf_client_thread(server_ip, local_ip, results, barrier, tc_ix):
- results[tc_ix] = run_iperf_client(server_ip, local_ip, barrier)
+ def _run_measure_bandwidth_thread(local_ip, remote_ip, results, barrier, tc_ix):
+ results[tc_ix] = measure_bandwidth(cfg, local_ip, remote_ip, barrier)
vf_vlan_data = [
# (local_ip, remote_ip, TC)
- ("198.51.100.2", "198.51.100.1", 3),
- ("198.51.100.10", "198.51.100.9", 4),
+ ("198.51.100.1", "198.51.100.2", 3),
+ ("198.51.100.9", "198.51.100.10", 4),
]
results = {}
@@ -309,8 +293,8 @@ def run_bandwidth_test():
for local_ip, remote_ip, tc_ix in vf_vlan_data:
thread = threading.Thread(
- target=_run_iperf_client_thread,
- args=(remote_ip, local_ip, results, start_barrier, tc_ix)
+ target=_run_measure_bandwidth_thread,
+ args=(local_ip, remote_ip, results, start_barrier, tc_ix)
)
thread.start()
threads.append(thread)
@@ -320,10 +304,11 @@ def run_bandwidth_test():
for tc_ix, tc_bw in results.items():
if tc_bw is None:
- raise KsftFailEx("iperf3 client failed; cannot evaluate bandwidth")
+ raise KsftFailEx("iperf3 failed; cannot evaluate bandwidth")
return results
+
def calculate_bandwidth_percentages(results):
"""
Calculates the percentage of total bandwidth received by TC3 and TC4.
@@ -364,59 +349,48 @@ def verify_total_bandwidth(bw_data, validator):
"""
total = bw_data['total_bw']
- if validator.bound(validator.expected_total_gbps, total):
+ if validator.bound({"total": total}):
return
- if total < validator.total_min_expected:
+ low, high = validator.bounds["total"]
+
+ if total < low:
raise KsftSkipEx(
f"Total bandwidth {total:.2f} Gbps < minimum "
- f"{validator.total_min_expected:.2f} Gbps; "
- f"parent tx_max ({validator.expected_total_gbps:.1f} G) "
+ f"{low:.2f} Gbps; "
+ f"parent tx_max ({validator.expected_total:.1f} G) "
f"not reached, cannot validate share"
)
raise KsftFailEx(
f"Total bandwidth {total:.2f} Gbps exceeds allowed ceiling "
- f"{validator.total_max_expected:.2f} Gbps "
- f"(VF tx_max set to {validator.expected_total_gbps:.1f} G)"
+ f"{high:.2f} Gbps "
+ f"(VF tx_max set to {validator.expected_total:.1f} G)"
)
-def check_bandwidth_distribution(bw_data, validator):
- """
- Checks whether the measured TC3 and TC4 bandwidth percentages
- fall within their expected tolerance ranges.
-
- Returns:
- bool: True if both TC3 and TC4 percentages are within bounds.
- """
- tc3_valid = validator.tc_bandwidth_bound(bw_data['tc3_percentage'], 3)
- tc4_valid = validator.tc_bandwidth_bound(bw_data['tc4_percentage'], 4)
-
- return tc3_valid and tc4_valid
-
-
def run_bandwidth_distribution_test(cfg, set_tc_mapping):
"""
- Runs parallel iperf3 tests for both TCs and collects results.
+ Runs parallel bandwidth measurements for both TCs and collects results.
"""
setup_test_environment(cfg, set_tc_mapping)
- bandwidths = run_bandwidth_test()
+ bandwidths = run_bandwidth_test(cfg)
bw_data = calculate_bandwidth_percentages(bandwidths)
test_name = "with TC mapping" if set_tc_mapping else "without TC mapping"
print_bandwidth_results(bw_data, test_name)
- verify_total_bandwidth(bw_data, cfg.bw_validator)
+ verify_total_bandwidth(bw_data, cfg.traffic_bw_validator)
- return check_bandwidth_distribution(bw_data, cfg.bw_validator)
+ return cfg.tc_bw_validator.bound({"tc3": bw_data['tc3_percentage'],
+ "tc4": bw_data['tc4_percentage']})
def test_no_tc_mapping_bandwidth(cfg):
"""
- Verifies that bandwidth is not split 80/20 without traffic class mapping.
+ Verifies that bandwidth is not split 20/80 without traffic class mapping.
"""
- pass_bw_msg = "Bandwidth is NOT distributed as 80/20 without TC mapping"
- fail_bw_msg = "Bandwidth matched 80/20 split without TC mapping"
+ pass_bw_msg = "Bandwidth is NOT distributed as 20/80 without TC mapping"
+ fail_bw_msg = "Bandwidth matched 20/80 split without TC mapping"
is_mlx5 = "driver: mlx5" in ethtool(f"-i {cfg.ifname}").stdout
if run_bandwidth_distribution_test(cfg, set_tc_mapping=False):
@@ -430,13 +404,13 @@ def test_no_tc_mapping_bandwidth(cfg):
def test_tc_mapping_bandwidth(cfg):
"""
- Verifies that bandwidth is correctly split 80/20 between TC3 and TC4
+ Verifies that bandwidth is correctly split 20/80 between TC3 and TC4
when traffic class mapping is set.
"""
if run_bandwidth_distribution_test(cfg, set_tc_mapping=True):
- ksft_pr("Bandwidth is distributed as 80/20 with TC mapping")
+ ksft_pr("Bandwidth is distributed as 20/80 with TC mapping")
else:
- raise KsftFailEx("Bandwidth did not match 80/20 split with TC mapping")
+ raise KsftFailEx("Bandwidth did not match 20/80 split with TC mapping")
def main() -> None:
@@ -451,9 +425,9 @@ def main() -> None:
)
if not cfg.pci:
raise KsftSkipEx("Could not get PCI address of the interface")
- cfg.require_cmd("iperf3", local=True, remote=True)
- cfg.bw_validator = BandwidthValidator()
+ cfg.traffic_bw_validator = BandwidthValidator({"total": 1})
+ cfg.tc_bw_validator = BandwidthValidator({"tc3": 20, "tc4": 80})
cases = [test_no_tc_mapping_bandwidth, test_tc_mapping_bandwidth]
diff --git a/tools/testing/selftests/drivers/net/hw/lib/py/__init__.py b/tools/testing/selftests/drivers/net/hw/lib/py/__init__.py
index fb010a48a5a1..766bfc4ad842 100644
--- a/tools/testing/selftests/drivers/net/hw/lib/py/__init__.py
+++ b/tools/testing/selftests/drivers/net/hw/lib/py/__init__.py
@@ -25,10 +25,10 @@ try:
fd_read_timeout, ip, rand_port, wait_port_listen, wait_file
from net.lib.py import KsftSkipEx, KsftFailEx, KsftXfailEx
from net.lib.py import ksft_disruptive, ksft_exit, ksft_pr, ksft_run, \
- ksft_setup
+ ksft_setup, ksft_variants, KsftNamedVariant
from net.lib.py import ksft_eq, ksft_ge, ksft_in, ksft_is, ksft_lt, \
ksft_ne, ksft_not_in, ksft_raises, ksft_true, ksft_gt, ksft_not_none
- from drivers.net.lib.py import GenerateTraffic, Remote
+ from drivers.net.lib.py import GenerateTraffic, Remote, Iperf3Runner
from drivers.net.lib.py import NetDrvEnv, NetDrvEpEnv
__all__ = ["NetNS", "NetNSEnter", "NetdevSimDev",
@@ -40,11 +40,12 @@ try:
"wait_port_listen", "wait_file",
"KsftSkipEx", "KsftFailEx", "KsftXfailEx",
"ksft_disruptive", "ksft_exit", "ksft_pr", "ksft_run",
- "ksft_setup",
+ "ksft_setup", "ksft_variants", "KsftNamedVariant",
"ksft_eq", "ksft_ge", "ksft_in", "ksft_is", "ksft_lt",
"ksft_ne", "ksft_not_in", "ksft_raises", "ksft_true", "ksft_gt",
"ksft_not_none", "ksft_not_none",
- "NetDrvEnv", "NetDrvEpEnv", "GenerateTraffic", "Remote"]
+ "NetDrvEnv", "NetDrvEpEnv", "GenerateTraffic", "Remote",
+ "Iperf3Runner"]
except ModuleNotFoundError as e:
print("Failed importing `net` library from kernel sources")
print(str(e))
diff --git a/tools/testing/selftests/net/toeplitz.c b/tools/testing/selftests/drivers/net/hw/toeplitz.c
index 9ba03164d73a..a4d04438c313 100644
--- a/tools/testing/selftests/net/toeplitz.c
+++ b/tools/testing/selftests/drivers/net/hw/toeplitz.c
@@ -52,7 +52,11 @@
#include <sys/types.h>
#include <unistd.h>
-#include "../kselftest.h"
+#include <ynl.h>
+#include "ethtool-user.h"
+
+#include "../../../kselftest.h"
+#include "../../../net/lib/ksft.h"
#define TOEPLITZ_KEY_MIN_LEN 40
#define TOEPLITZ_KEY_MAX_LEN 60
@@ -64,6 +68,7 @@
#define FOUR_TUPLE_MAX_LEN ((sizeof(struct in6_addr) * 2) + (sizeof(uint16_t) * 2))
#define RSS_MAX_CPUS (1 << 16) /* real constraint is PACKET_FANOUT_MAX */
+#define RSS_MAX_INDIR (1 << 16)
#define RPS_MAX_CPUS 16UL /* must be a power of 2 */
@@ -101,6 +106,8 @@ struct ring_state {
static unsigned int rx_irq_cpus[RSS_MAX_CPUS]; /* map from rxq to cpu */
static int rps_silo_to_cpu[RPS_MAX_CPUS];
static unsigned char toeplitz_key[TOEPLITZ_KEY_MAX_LEN];
+static unsigned int rss_indir_tbl[RSS_MAX_INDIR];
+static unsigned int rss_indir_tbl_size;
static struct ring_state rings[RSS_MAX_CPUS];
static inline uint32_t toeplitz(const unsigned char *four_tuple,
@@ -129,7 +136,12 @@ static inline uint32_t toeplitz(const unsigned char *four_tuple,
/* Compare computed cpu with arrival cpu from packet_fanout_cpu */
static void verify_rss(uint32_t rx_hash, int cpu)
{
- int queue = rx_hash % cfg_num_queues;
+ int queue;
+
+ if (rss_indir_tbl_size)
+ queue = rss_indir_tbl[rx_hash % rss_indir_tbl_size];
+ else
+ queue = rx_hash % cfg_num_queues;
log_verbose(" rxq %d (cpu %d)", queue, rx_irq_cpus[queue]);
if (rx_irq_cpus[queue] != cpu) {
@@ -482,6 +494,56 @@ static void parse_rps_bitmap(const char *arg)
rps_silo_to_cpu[cfg_num_rps_cpus++] = i;
}
+static void read_rss_dev_info_ynl(void)
+{
+ struct ethtool_rss_get_req *req;
+ struct ethtool_rss_get_rsp *rsp;
+ struct ynl_sock *ys;
+
+ ys = ynl_sock_create(&ynl_ethtool_family, NULL);
+ if (!ys)
+ error(1, errno, "ynl_sock_create failed");
+
+ req = ethtool_rss_get_req_alloc();
+ if (!req)
+ error(1, errno, "ethtool_rss_get_req_alloc failed");
+
+ ethtool_rss_get_req_set_header_dev_name(req, cfg_ifname);
+
+ rsp = ethtool_rss_get(ys, req);
+ if (!rsp)
+ error(1, ys->err.code, "YNL: %s", ys->err.msg);
+
+ if (!rsp->_len.hkey)
+ error(1, 0, "RSS key not available for %s", cfg_ifname);
+
+ if (rsp->_len.hkey < TOEPLITZ_KEY_MIN_LEN ||
+ rsp->_len.hkey > TOEPLITZ_KEY_MAX_LEN)
+ error(1, 0, "RSS key length %u out of bounds [%u, %u]",
+ rsp->_len.hkey, TOEPLITZ_KEY_MIN_LEN,
+ TOEPLITZ_KEY_MAX_LEN);
+
+ memcpy(toeplitz_key, rsp->hkey, rsp->_len.hkey);
+
+ if (rsp->_count.indir > RSS_MAX_INDIR)
+ error(1, 0, "RSS indirection table too large (%u > %u)",
+ rsp->_count.indir, RSS_MAX_INDIR);
+
+ /* If indir table not available we'll fallback to simple modulo math */
+ if (rsp->_count.indir) {
+ memcpy(rss_indir_tbl, rsp->indir,
+ rsp->_count.indir * sizeof(rss_indir_tbl[0]));
+ rss_indir_tbl_size = rsp->_count.indir;
+
+ log_verbose("RSS indirection table size: %u\n",
+ rss_indir_tbl_size);
+ }
+
+ ethtool_rss_get_rsp_free(rsp);
+ ethtool_rss_get_req_free(req);
+ ynl_sock_destroy(ys);
+}
+
static void parse_opts(int argc, char **argv)
{
static struct option long_options[] = {
@@ -550,7 +612,7 @@ static void parse_opts(int argc, char **argv)
}
if (!have_toeplitz)
- error(1, 0, "Must supply rss key ('-k')");
+ read_rss_dev_info_ynl();
num_cpus = get_nprocs();
if (num_cpus > RSS_MAX_CPUS)
@@ -576,6 +638,10 @@ int main(int argc, char **argv)
fd_sink = setup_sink();
setup_rings();
+
+ /* Signal to test framework that we're ready to receive */
+ ksft_ready();
+
process_rings();
cleanup_rings();
diff --git a/tools/testing/selftests/drivers/net/hw/toeplitz.py b/tools/testing/selftests/drivers/net/hw/toeplitz.py
new file mode 100755
index 000000000000..d2db5ee9e358
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/hw/toeplitz.py
@@ -0,0 +1,211 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+"""
+Toeplitz Rx hashing test:
+ - rxhash (the hash value calculation itself);
+ - RSS mapping from rxhash to rx queue;
+ - RPS mapping from rxhash to cpu.
+"""
+
+import glob
+import os
+import socket
+from lib.py import ksft_run, ksft_exit, ksft_pr
+from lib.py import NetDrvEpEnv, EthtoolFamily, NetdevFamily
+from lib.py import cmd, bkg, rand_port, defer
+from lib.py import ksft_in
+from lib.py import ksft_variants, KsftNamedVariant, KsftSkipEx, KsftFailEx
+
+# "define" for the ID of the Toeplitz hash function
+ETH_RSS_HASH_TOP = 1
+
+
+def _check_rps_and_rfs_not_configured(cfg):
+ """Verify that RPS is not already configured."""
+
+ for rps_file in glob.glob(f"/sys/class/net/{cfg.ifname}/queues/rx-*/rps_cpus"):
+ with open(rps_file, "r", encoding="utf-8") as fp:
+ val = fp.read().strip()
+ if set(val) - {"0", ","}:
+ raise KsftSkipEx(f"RPS already configured on {rps_file}: {val}")
+
+ rfs_file = "/proc/sys/net/core/rps_sock_flow_entries"
+ with open(rfs_file, "r", encoding="utf-8") as fp:
+ val = fp.read().strip()
+ if val != "0":
+ raise KsftSkipEx(f"RFS already configured {rfs_file}: {val}")
+
+
+def _get_cpu_for_irq(irq):
+ with open(f"/proc/irq/{irq}/smp_affinity_list", "r",
+ encoding="utf-8") as fp:
+ data = fp.read().strip()
+ if "," in data or "-" in data:
+ raise KsftFailEx(f"IRQ{irq} is not mapped to a single core: {data}")
+ return int(data)
+
+
+def _get_irq_cpus(cfg):
+ """
+ Read the list of IRQs for the device Rx queues.
+ """
+ queues = cfg.netnl.queue_get({"ifindex": cfg.ifindex}, dump=True)
+ napis = cfg.netnl.napi_get({"ifindex": cfg.ifindex}, dump=True)
+
+ # Remap into ID-based dicts
+ napis = {n["id"]: n for n in napis}
+ queues = {f"{q['type']}{q['id']}": q for q in queues}
+
+ cpus = []
+ for rx in range(9999):
+ name = f"rx{rx}"
+ if name not in queues:
+ break
+ cpus.append(_get_cpu_for_irq(napis[queues[name]["napi-id"]]["irq"]))
+
+ return cpus
+
+
+def _get_unused_cpus(cfg, count=2):
+ """
+ Get CPUs that are not used by Rx queues.
+ Returns a list of at least 'count' CPU numbers.
+ """
+
+ # Get CPUs used by Rx queues
+ rx_cpus = set(_get_irq_cpus(cfg))
+
+ # Get total number of CPUs
+ num_cpus = os.cpu_count()
+
+ # Find unused CPUs
+ unused_cpus = [cpu for cpu in range(num_cpus) if cpu not in rx_cpus]
+
+ if len(unused_cpus) < count:
+ raise KsftSkipEx(f"Need at {count} CPUs not used by Rx queues, found {len(unused_cpus)}")
+
+ return unused_cpus[:count]
+
+
+def _configure_rps(cfg, rps_cpus):
+ """Configure RPS for all Rx queues."""
+
+ mask = 0
+ for cpu in rps_cpus:
+ mask |= (1 << cpu)
+ mask = hex(mask)[2:]
+
+ # Set RPS bitmap for all rx queues
+ for rps_file in glob.glob(f"/sys/class/net/{cfg.ifname}/queues/rx-*/rps_cpus"):
+ with open(rps_file, "w", encoding="utf-8") as fp:
+ fp.write(mask)
+
+ return mask
+
+
+def _send_traffic(cfg, proto_flag, ipver, port):
+ """Send 20 packets of requested type."""
+
+ # Determine protocol and IP version for socat
+ if proto_flag == "-u":
+ proto = "UDP"
+ else:
+ proto = "TCP"
+
+ baddr = f"[{cfg.addr_v['6']}]" if ipver == "6" else cfg.addr_v["4"]
+
+ # Run socat in a loop to send traffic periodically
+ # Use sh -c with a loop similar to toeplitz_client.sh
+ socat_cmd = f"""
+ for i in `seq 20`; do
+ echo "msg $i" | socat -{ipver} -t 0.1 - {proto}:{baddr}:{port};
+ sleep 0.001;
+ done
+ """
+
+ cmd(socat_cmd, shell=True, host=cfg.remote)
+
+
+def _test_variants():
+ for grp in ["", "rss", "rps"]:
+ for l4 in ["tcp", "udp"]:
+ for l3 in ["4", "6"]:
+ name = f"{l4}_ipv{l3}"
+ if grp:
+ name = f"{grp}_{name}"
+ yield KsftNamedVariant(name, "-" + l4[0], l3, grp)
+
+
+@ksft_variants(_test_variants())
+def test(cfg, proto_flag, ipver, grp):
+ """Run a single toeplitz test."""
+
+ cfg.require_ipver(ipver)
+
+ # Check that rxhash is enabled
+ ksft_in("receive-hashing: on", cmd(f"ethtool -k {cfg.ifname}").stdout)
+
+ rss = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex}})
+ # Make sure NIC is configured to use Toeplitz hash, and no key xfrm.
+ if rss.get('hfunc') != ETH_RSS_HASH_TOP or rss.get('input-xfrm'):
+ cfg.ethnl.rss_set({"header": {"dev-index": cfg.ifindex},
+ "hfunc": ETH_RSS_HASH_TOP,
+ "input-xfrm": {}})
+ defer(cfg.ethnl.rss_set, {"header": {"dev-index": cfg.ifindex},
+ "hfunc": rss.get('hfunc'),
+ "input-xfrm": rss.get('input-xfrm', {})
+ })
+
+ port = rand_port(socket.SOCK_DGRAM)
+
+ toeplitz_path = cfg.test_dir / "toeplitz"
+ rx_cmd = [
+ str(toeplitz_path),
+ "-" + ipver,
+ proto_flag,
+ "-d", str(port),
+ "-i", cfg.ifname,
+ "-T", "4000",
+ "-s",
+ "-v"
+ ]
+
+ if grp:
+ _check_rps_and_rfs_not_configured(cfg)
+ if grp == "rss":
+ irq_cpus = ",".join([str(x) for x in _get_irq_cpus(cfg)])
+ rx_cmd += ["-C", irq_cpus]
+ ksft_pr(f"RSS using CPUs: {irq_cpus}")
+ elif grp == "rps":
+ # Get CPUs not used by Rx queues and configure them for RPS
+ rps_cpus = _get_unused_cpus(cfg, count=2)
+ rps_mask = _configure_rps(cfg, rps_cpus)
+ defer(_configure_rps, cfg, [])
+ rx_cmd += ["-r", rps_mask]
+ ksft_pr(f"RPS using CPUs: {rps_cpus}, mask: {rps_mask}")
+
+ # Run rx in background, it will exit once it has seen enough packets
+ with bkg(" ".join(rx_cmd), ksft_ready=True, exit_wait=True) as rx_proc:
+ while rx_proc.proc.poll() is None:
+ _send_traffic(cfg, proto_flag, ipver, port)
+
+ # Check rx result
+ ksft_pr("Receiver output:")
+ ksft_pr(rx_proc.stdout.strip().replace('\n', '\n# '))
+ if rx_proc.stderr:
+ ksft_pr(rx_proc.stderr.strip().replace('\n', '\n# '))
+
+
+def main() -> None:
+ """Ksft boilerplate main."""
+
+ with NetDrvEpEnv(__file__) as cfg:
+ cfg.ethnl = EthtoolFamily()
+ cfg.netnl = NetdevFamily()
+ ksft_run(cases=[test], args=(cfg,))
+ ksft_exit()
+
+
+if __name__ == "__main__":
+ main()
diff --git a/tools/testing/selftests/drivers/net/lib/py/__init__.py b/tools/testing/selftests/drivers/net/lib/py/__init__.py
index b0c6300150fb..8b75faa9af6d 100644
--- a/tools/testing/selftests/drivers/net/lib/py/__init__.py
+++ b/tools/testing/selftests/drivers/net/lib/py/__init__.py
@@ -25,7 +25,7 @@ try:
fd_read_timeout, ip, rand_port, wait_port_listen, wait_file
from net.lib.py import KsftSkipEx, KsftFailEx, KsftXfailEx
from net.lib.py import ksft_disruptive, ksft_exit, ksft_pr, ksft_run, \
- ksft_setup
+ ksft_setup, ksft_variants, KsftNamedVariant
from net.lib.py import ksft_eq, ksft_ge, ksft_in, ksft_is, ksft_lt, \
ksft_ne, ksft_not_in, ksft_raises, ksft_true, ksft_gt, ksft_not_none
@@ -38,16 +38,17 @@ try:
"wait_port_listen", "wait_file",
"KsftSkipEx", "KsftFailEx", "KsftXfailEx",
"ksft_disruptive", "ksft_exit", "ksft_pr", "ksft_run",
- "ksft_setup",
+ "ksft_setup", "ksft_variants", "KsftNamedVariant",
"ksft_eq", "ksft_ge", "ksft_in", "ksft_is", "ksft_lt",
"ksft_ne", "ksft_not_in", "ksft_raises", "ksft_true", "ksft_gt",
"ksft_not_none", "ksft_not_none"]
from .env import NetDrvEnv, NetDrvEpEnv
- from .load import GenerateTraffic
+ from .load import GenerateTraffic, Iperf3Runner
from .remote import Remote
- __all__ += ["NetDrvEnv", "NetDrvEpEnv", "GenerateTraffic", "Remote"]
+ __all__ += ["NetDrvEnv", "NetDrvEpEnv", "GenerateTraffic", "Remote",
+ "Iperf3Runner"]
except ModuleNotFoundError as e:
print("Failed importing `net` library from kernel sources")
print(str(e))
diff --git a/tools/testing/selftests/drivers/net/lib/py/env.py b/tools/testing/selftests/drivers/net/lib/py/env.py
index 01be3d9b9720..8b644fd84ff2 100644
--- a/tools/testing/selftests/drivers/net/lib/py/env.py
+++ b/tools/testing/selftests/drivers/net/lib/py/env.py
@@ -168,6 +168,8 @@ class NetDrvEpEnv(NetDrvEnvBase):
# resolve remote interface name
self.remote_ifname = self.resolve_remote_ifc()
+ self.remote_dev = ip("-d link show dev " + self.remote_ifname,
+ host=self.remote, json=True)[0]
self._required_cmd = {}
diff --git a/tools/testing/selftests/drivers/net/lib/py/load.py b/tools/testing/selftests/drivers/net/lib/py/load.py
index c4e808407cc4..f181fa2d38fc 100644
--- a/tools/testing/selftests/drivers/net/lib/py/load.py
+++ b/tools/testing/selftests/drivers/net/lib/py/load.py
@@ -2,21 +2,89 @@
import re
import time
+import json
from lib.py import ksft_pr, cmd, ip, rand_port, wait_port_listen
-class GenerateTraffic:
- def __init__(self, env, port=None):
- env.require_cmd("iperf3", local=True, remote=True)
+class Iperf3Runner:
+ """
+ Sets up and runs iperf3 traffic.
+ """
+ def __init__(self, env, port=None, server_ip=None, client_ip=None):
+ env.require_cmd("iperf3", local=True, remote=True)
self.env = env
-
self.port = rand_port() if port is None else port
- self._iperf_server = cmd(f"iperf3 -s -1 -p {self.port}", background=True)
+ self.server_ip = server_ip
+ self.client_ip = client_ip
+
+ def _build_server(self):
+ cmdline = f"iperf3 -s -1 -p {self.port}"
+ if self.server_ip:
+ cmdline += f" -B {self.server_ip}"
+ return cmdline
+
+ def _build_client(self, streams, duration, reverse):
+ host = self.env.addr if self.server_ip is None else self.server_ip
+ cmdline = f"iperf3 -c {host} -p {self.port} -P {streams} -t {duration} -J"
+ if self.client_ip:
+ cmdline += f" -B {self.client_ip}"
+ if reverse:
+ cmdline += " --reverse"
+ return cmdline
+
+ def start_server(self):
+ """
+ Starts an iperf3 server with optional bind IP.
+ """
+ cmdline = self._build_server()
+ proc = cmd(cmdline, background=True)
wait_port_listen(self.port)
time.sleep(0.1)
- self._iperf_client = cmd(f"iperf3 -c {env.addr} -P 16 -p {self.port} -t 86400",
- background=True, host=env.remote)
+ return proc
+
+ def start_client(self, background=False, streams=1, duration=10, reverse=False):
+ """
+ Starts the iperf3 client with the configured options.
+ """
+ cmdline = self._build_client(streams, duration, reverse)
+ return cmd(cmdline, background=background, host=self.env.remote)
+
+ def measure_bandwidth(self, reverse=False):
+ """
+ Runs an iperf3 measurement and returns the average bandwidth (Gbps).
+ Discards the first and last few reporting intervals and uses only the
+ middle part of the run where throughput is typically stable.
+ """
+ self.start_server()
+ result = self.start_client(duration=10, reverse=reverse)
+
+ if result.ret != 0:
+ raise RuntimeError("iperf3 failed to run successfully")
+ try:
+ out = json.loads(result.stdout)
+ except json.JSONDecodeError as exc:
+ raise ValueError("Failed to parse iperf3 JSON output") from exc
+
+ intervals = out.get("intervals", [])
+ samples = [i["sum"]["bits_per_second"] / 1e9 for i in intervals]
+ if len(samples) < 10:
+ raise ValueError(f"iperf3 returned too few intervals: {len(samples)}")
+ # Discard potentially unstable first and last 3 seconds.
+ stable = samples[3:-3]
+
+ avg = sum(stable) / len(stable)
+
+ return avg
+
+
+class GenerateTraffic:
+ def __init__(self, env, port=None):
+ self.env = env
+ self.runner = Iperf3Runner(env, port)
+
+ self._iperf_server = self.runner.start_server()
+ self._iperf_client = self.runner.start_client(background=True, streams=16, duration=86400)
# Wait for traffic to ramp up
if not self._wait_pkts(pps=1000):
@@ -61,7 +129,7 @@ class GenerateTraffic:
def _wait_client_stopped(self, sleep=0.005, timeout=5):
end = time.monotonic() + timeout
- live_port_pattern = re.compile(fr":{self.port:04X} 0[^6] ")
+ live_port_pattern = re.compile(fr":{self.runner.port:04X} 0[^6] ")
while time.monotonic() < end:
data = cmd("cat /proc/net/tcp*", host=self.env.remote).stdout
diff --git a/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh b/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh
index 87f89fd92f8c..ae8abff4be40 100644
--- a/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh
+++ b/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh
@@ -249,7 +249,7 @@ function listen_port_and_save_to() {
# Just wait for 2 seconds
timeout 2 ip netns exec "${NAMESPACE}" \
- socat "${SOCAT_MODE}":"${PORT}",fork "${OUTPUT}"
+ socat "${SOCAT_MODE}":"${PORT}",fork "${OUTPUT}" 2> /dev/null
}
# Only validate that the message arrived properly
diff --git a/tools/testing/selftests/drivers/net/netcons_basic.sh b/tools/testing/selftests/drivers/net/netcons_basic.sh
index a3446b569976..2022f3061738 100755
--- a/tools/testing/selftests/drivers/net/netcons_basic.sh
+++ b/tools/testing/selftests/drivers/net/netcons_basic.sh
@@ -28,8 +28,6 @@ OUTPUT_FILE="/tmp/${TARGET}"
# Check for basic system dependency and exit if not found
check_for_dependencies
-# Set current loglevel to KERN_INFO(6), and default to KERN_NOTICE(5)
-echo "6 5" > /proc/sys/kernel/printk
# Remove the namespace, interfaces and netconsole target on exit
trap cleanup EXIT
@@ -39,6 +37,9 @@ do
for IP_VERSION in "ipv6" "ipv4"
do
echo "Running with target mode: ${FORMAT} (${IP_VERSION})"
+ # Set current loglevel to KERN_INFO(6), and default to
+ # KERN_NOTICE(5)
+ echo "6 5" > /proc/sys/kernel/printk
# Create one namespace and two interfaces
set_network "${IP_VERSION}"
# Create a dynamic target for netconsole
diff --git a/tools/testing/selftests/drivers/net/netcons_overflow.sh b/tools/testing/selftests/drivers/net/netcons_overflow.sh
index 29bad56448a2..06089643b771 100755
--- a/tools/testing/selftests/drivers/net/netcons_overflow.sh
+++ b/tools/testing/selftests/drivers/net/netcons_overflow.sh
@@ -15,7 +15,7 @@ SCRIPTDIR=$(dirname "$(readlink -e "${BASH_SOURCE[0]}")")
source "${SCRIPTDIR}"/lib/sh/lib_netcons.sh
# This is coming from netconsole code. Check for it in drivers/net/netconsole.c
-MAX_USERDATA_ITEMS=16
+MAX_USERDATA_ITEMS=256
# Function to create userdata entries
function create_userdata_max_entries() {
diff --git a/tools/testing/selftests/drivers/net/netdevsim/Makefile b/tools/testing/selftests/drivers/net/netdevsim/Makefile
index df10c7243511..1a228c5430f5 100644
--- a/tools/testing/selftests/drivers/net/netdevsim/Makefile
+++ b/tools/testing/selftests/drivers/net/netdevsim/Makefile
@@ -8,7 +8,6 @@ TEST_PROGS := \
ethtool-features.sh \
ethtool-fec.sh \
ethtool-pause.sh \
- ethtool-ring.sh \
fib.sh \
fib_notifications.sh \
hw_stats_l3.sh \
diff --git a/tools/testing/selftests/drivers/net/netdevsim/devlink.sh b/tools/testing/selftests/drivers/net/netdevsim/devlink.sh
index 030762b203d7..1b529ccaf050 100755
--- a/tools/testing/selftests/drivers/net/netdevsim/devlink.sh
+++ b/tools/testing/selftests/drivers/net/netdevsim/devlink.sh
@@ -3,7 +3,8 @@
lib_dir=$(dirname $0)/../../../net/forwarding
-ALL_TESTS="fw_flash_test params_test regions_test reload_test \
+ALL_TESTS="fw_flash_test params_test \
+ params_default_test regions_test reload_test \
netns_reload_test resource_test dev_info_test \
empty_reporter_test dummy_reporter_test rate_test"
NUM_NETIFS=0
@@ -78,17 +79,28 @@ fw_flash_test()
param_get()
{
local name=$1
+ local attr=${2:-value}
+ local cmode=${3:-driverinit}
cmd_jq "devlink dev param show $DL_HANDLE name $name -j" \
- '.[][][].values[] | select(.cmode == "driverinit").value'
+ '.[][][].values[] | select(.cmode == "'"$cmode"'").'"$attr"
}
param_set()
{
local name=$1
local value=$2
+ local cmode=${3:-driverinit}
- devlink dev param set $DL_HANDLE name $name cmode driverinit value $value
+ devlink dev param set $DL_HANDLE name $name cmode $cmode value $value
+}
+
+param_set_default()
+{
+ local name=$1
+ local cmode=${2:-driverinit}
+
+ devlink dev param set $DL_HANDLE name $name default cmode $cmode
}
check_value()
@@ -97,12 +109,18 @@ check_value()
local phase_name=$2
local expected_param_value=$3
local expected_debugfs_value=$4
+ local cmode=${5:-driverinit}
local value
+ local attr="value"
- value=$(param_get $name)
- check_err $? "Failed to get $name param value"
+ if [[ "$phase_name" == *"default"* ]]; then
+ attr="default"
+ fi
+
+ value=$(param_get $name $attr $cmode)
+ check_err $? "Failed to get $name param $attr"
[ "$value" == "$expected_param_value" ]
- check_err $? "Unexpected $phase_name $name param value"
+ check_err $? "Unexpected $phase_name $name param $attr"
value=$(<$DEBUGFS_DIR/$name)
check_err $? "Failed to get $name debugfs value"
[ "$value" == "$expected_debugfs_value" ]
@@ -135,6 +153,92 @@ params_test()
log_test "params test"
}
+value_to_debugfs()
+{
+ local value=$1
+
+ case "$value" in
+ true)
+ echo "Y"
+ ;;
+ false)
+ echo "N"
+ ;;
+ *)
+ echo "$value"
+ ;;
+ esac
+}
+
+test_default()
+{
+ local param_name=$1
+ local new_value=$2
+ local expected_default=$3
+ local cmode=${4:-driverinit}
+ local default_debugfs
+ local new_debugfs
+ local expected_debugfs
+
+ default_debugfs=$(value_to_debugfs $expected_default)
+ new_debugfs=$(value_to_debugfs $new_value)
+
+ expected_debugfs=$default_debugfs
+ check_value $param_name initial-default $expected_default $expected_debugfs $cmode
+
+ param_set $param_name $new_value $cmode
+ check_err $? "Failed to set $param_name to $new_value"
+
+ expected_debugfs=$([ "$cmode" == "runtime" ] && echo "$new_debugfs" || echo "$default_debugfs")
+ check_value $param_name post-set $new_value $expected_debugfs $cmode
+
+ devlink dev reload $DL_HANDLE
+ check_err $? "Failed to reload device"
+
+ expected_debugfs=$new_debugfs
+ check_value $param_name post-reload-new-value $new_value $expected_debugfs $cmode
+
+ param_set_default $param_name $cmode
+ check_err $? "Failed to set $param_name to default"
+
+ expected_debugfs=$([ "$cmode" == "runtime" ] && echo "$default_debugfs" || echo "$new_debugfs")
+ check_value $param_name post-set-default $expected_default $expected_debugfs $cmode
+
+ devlink dev reload $DL_HANDLE
+ check_err $? "Failed to reload device"
+
+ expected_debugfs=$default_debugfs
+ check_value $param_name post-reload-default $expected_default $expected_debugfs $cmode
+}
+
+params_default_test()
+{
+ RET=0
+
+ if ! devlink dev param help 2>&1 | grep -q "value VALUE | default"; then
+ echo "SKIP: devlink cli missing default feature"
+ return
+ fi
+
+ # Remove side effects of previous tests. Use plain param_set, because
+ # param_set_default is a feature under test here.
+ param_set max_macs 32 driverinit
+ check_err $? "Failed to reset max_macs to default value"
+ param_set test1 true driverinit
+ check_err $? "Failed to reset test1 to default value"
+ param_set test2 1234 runtime
+ check_err $? "Failed to reset test2 to default value"
+
+ devlink dev reload $DL_HANDLE
+ check_err $? "Failed to reload device for clean state"
+
+ test_default max_macs 16 32 driverinit
+ test_default test1 false true driverinit
+ test_default test2 100 1234 runtime
+
+ log_test "params default test"
+}
+
check_region_size()
{
local name=$1
diff --git a/tools/testing/selftests/drivers/net/netdevsim/ethtool-ring.sh b/tools/testing/selftests/drivers/net/netdevsim/ethtool-ring.sh
deleted file mode 100755
index c969559ffa7a..000000000000
--- a/tools/testing/selftests/drivers/net/netdevsim/ethtool-ring.sh
+++ /dev/null
@@ -1,85 +0,0 @@
-#!/bin/bash
-# SPDX-License-Identifier: GPL-2.0-only
-
-source ethtool-common.sh
-
-function get_value {
- local query="${SETTINGS_MAP[$1]}"
-
- echo $(ethtool -g $NSIM_NETDEV | \
- tail -n +$CURR_SETT_LINE | \
- awk -F':' -v pattern="$query:" '$0 ~ pattern {gsub(/[\t ]/, "", $2); print $2}')
-}
-
-function update_current_settings {
- for key in ${!SETTINGS_MAP[@]}; do
- CURRENT_SETTINGS[$key]=$(get_value $key)
- done
- echo ${CURRENT_SETTINGS[@]}
-}
-
-if ! ethtool -h | grep -q set-ring >/dev/null; then
- echo "SKIP: No --set-ring support in ethtool"
- exit 4
-fi
-
-NSIM_NETDEV=$(make_netdev)
-
-set -o pipefail
-
-declare -A SETTINGS_MAP=(
- ["rx"]="RX"
- ["rx-mini"]="RX Mini"
- ["rx-jumbo"]="RX Jumbo"
- ["tx"]="TX"
-)
-
-declare -A EXPECTED_SETTINGS=(
- ["rx"]=""
- ["rx-mini"]=""
- ["rx-jumbo"]=""
- ["tx"]=""
-)
-
-declare -A CURRENT_SETTINGS=(
- ["rx"]=""
- ["rx-mini"]=""
- ["rx-jumbo"]=""
- ["tx"]=""
-)
-
-MAX_VALUE=$((RANDOM % $((2**32-1))))
-RING_MAX_LIST=$(ls $NSIM_DEV_DFS/ethtool/ring/)
-
-for ring_max_entry in $RING_MAX_LIST; do
- echo $MAX_VALUE > $NSIM_DEV_DFS/ethtool/ring/$ring_max_entry
-done
-
-CURR_SETT_LINE=$(ethtool -g $NSIM_NETDEV | grep -i -m1 -n 'Current hardware settings' | cut -f1 -d:)
-
-# populate the expected settings map
-for key in ${!SETTINGS_MAP[@]}; do
- EXPECTED_SETTINGS[$key]=$(get_value $key)
-done
-
-# test
-for key in ${!SETTINGS_MAP[@]}; do
- value=$((RANDOM % $MAX_VALUE))
-
- ethtool -G $NSIM_NETDEV "$key" "$value"
-
- EXPECTED_SETTINGS[$key]="$value"
- expected=${EXPECTED_SETTINGS[@]}
- current=$(update_current_settings)
-
- check $? "$current" "$expected"
- set +x
-done
-
-if [ $num_errors -eq 0 ]; then
- echo "PASSED all $((num_passes)) checks"
- exit 0
-else
- echo "FAILED $num_errors/$((num_errors+num_passes)) checks"
- exit 1
-fi
diff --git a/tools/testing/selftests/drivers/net/psp.py b/tools/testing/selftests/drivers/net/psp.py
index 4ae7a785ff10..06559ef49b9a 100755
--- a/tools/testing/selftests/drivers/net/psp.py
+++ b/tools/testing/selftests/drivers/net/psp.py
@@ -109,6 +109,10 @@ def _check_data_outq(s, exp_len, force_wait=False):
time.sleep(0.01)
ksft_eq(outq, exp_len)
+
+def _get_stat(cfg, key):
+ return cfg.pspnl.get_stats({'dev-id': cfg.psp_dev_id})[key]
+
#
# Test case boiler plate
#
@@ -171,11 +175,16 @@ def dev_rotate(cfg):
""" Test key rotation """
_init_psp_dev(cfg)
+ prev_rotations = _get_stat(cfg, 'key-rotations')
+
rot = cfg.pspnl.key_rotate({"id": cfg.psp_dev_id})
ksft_eq(rot['id'], cfg.psp_dev_id)
rot = cfg.pspnl.key_rotate({"id": cfg.psp_dev_id})
ksft_eq(rot['id'], cfg.psp_dev_id)
+ cur_rotations = _get_stat(cfg, 'key-rotations')
+ ksft_eq(cur_rotations, prev_rotations + 2)
+
def dev_rotate_spi(cfg):
""" Test key rotation and SPI check """
@@ -475,6 +484,7 @@ def data_stale_key(cfg):
""" Test send on a double-rotated key """
_init_psp_dev(cfg)
+ prev_stale = _get_stat(cfg, 'stale-events')
s = _make_psp_conn(cfg)
try:
rx_assoc = cfg.pspnl.rx_assoc({"version": 0,
@@ -495,6 +505,9 @@ def data_stale_key(cfg):
cfg.pspnl.key_rotate({"id": cfg.psp_dev_id})
cfg.pspnl.key_rotate({"id": cfg.psp_dev_id})
+ cur_stale = _get_stat(cfg, 'stale-events')
+ ksft_gt(cur_stale, prev_stale)
+
s.send(b'0123456789' * 200)
_check_data_outq(s, 2000, force_wait=True)
finally:
diff --git a/tools/testing/selftests/drivers/net/ring_reconfig.py b/tools/testing/selftests/drivers/net/ring_reconfig.py
new file mode 100755
index 000000000000..f9530a8b0856
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/ring_reconfig.py
@@ -0,0 +1,167 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+"""
+Test channel and ring size configuration via ethtool (-L / -G).
+"""
+
+from lib.py import ksft_run, ksft_exit, ksft_pr
+from lib.py import ksft_eq
+from lib.py import NetDrvEpEnv, EthtoolFamily, GenerateTraffic
+from lib.py import defer, NlError
+
+
+def channels(cfg) -> None:
+ """
+ Twiddle channel counts in various combinations of parameters.
+ We're only looking for driver adhering to the requested config
+ if the config is accepted and crashes.
+ """
+ ehdr = {'header':{'dev-index': cfg.ifindex}}
+ chans = cfg.eth.channels_get(ehdr)
+
+ all_keys = ["rx", "tx", "combined"]
+ mixes = [{"combined"}, {"rx", "tx"}, {"rx", "combined"}, {"tx", "combined"},
+ {"rx", "tx", "combined"},]
+
+ # Get the set of keys that device actually supports
+ restore = {}
+ supported = set()
+ for key in all_keys:
+ if key + "-max" in chans:
+ supported.add(key)
+ restore |= {key + "-count": chans[key + "-count"]}
+
+ defer(cfg.eth.channels_set, ehdr | restore)
+
+ def test_config(config):
+ try:
+ cfg.eth.channels_set(ehdr | config)
+ get = cfg.eth.channels_get(ehdr)
+ for k, v in config.items():
+ ksft_eq(get.get(k, 0), v)
+ except NlError as e:
+ failed.append(mix)
+ ksft_pr("Can't set", config, e)
+ else:
+ ksft_pr("Okay", config)
+
+ failed = []
+ for mix in mixes:
+ if not mix.issubset(supported):
+ continue
+
+ # Set all the values in the mix to 1, other supported to 0
+ config = {}
+ for key in all_keys:
+ config[key + "-count"] = 1 if key in mix else 0
+ test_config(config)
+
+ for mix in mixes:
+ if not mix.issubset(supported):
+ continue
+ if mix in failed:
+ continue
+
+ # Set all the values in the mix to max, other supported to 0
+ config = {}
+ for key in all_keys:
+ config[key + "-count"] = chans[key + '-max'] if key in mix else 0
+ test_config(config)
+
+
+def _configure_min_ring_cnt(cfg) -> None:
+ """ Try to configure a single Rx/Tx ring. """
+ ehdr = {'header':{'dev-index': cfg.ifindex}}
+ chans = cfg.eth.channels_get(ehdr)
+
+ all_keys = ["rx-count", "tx-count", "combined-count"]
+ restore = {}
+ config = {}
+ for key in all_keys:
+ if key in chans:
+ restore[key] = chans[key]
+ config[key] = 0
+
+ if chans.get('combined-count', 0) > 1:
+ config['combined-count'] = 1
+ elif chans.get('rx-count', 0) > 1 and chans.get('tx-count', 0) > 1:
+ config['tx-count'] = 1
+ config['rx-count'] = 1
+ else:
+ # looks like we're already on 1 channel
+ return
+
+ cfg.eth.channels_set(ehdr | config)
+ defer(cfg.eth.channels_set, ehdr | restore)
+
+
+def ringparam(cfg) -> None:
+ """
+ Tweak the ringparam configuration. Try to run some traffic over min
+ ring size to make sure it actually functions.
+ """
+ ehdr = {'header':{'dev-index': cfg.ifindex}}
+ rings = cfg.eth.rings_get(ehdr)
+
+ restore = {}
+ maxes = {}
+ params = set()
+ for key in rings.keys():
+ if 'max' in key:
+ param = key[:-4]
+ maxes[param] = rings[key]
+ params.add(param)
+ restore[param] = rings[param]
+
+ defer(cfg.eth.rings_set, ehdr | restore)
+
+ # Speed up the reconfig by configuring just one ring
+ _configure_min_ring_cnt(cfg)
+
+ # Try to reach min on all settings
+ for param in params:
+ val = rings[param]
+ while True:
+ try:
+ cfg.eth.rings_set({'header':{'dev-index': cfg.ifindex},
+ param: val // 2})
+ if val == 0:
+ break
+ val //= 2
+ except NlError:
+ break
+
+ get = cfg.eth.rings_get(ehdr)
+ ksft_eq(get[param], val)
+
+ ksft_pr(f"Reached min for '{param}' at {val} (max {rings[param]})")
+
+ GenerateTraffic(cfg).wait_pkts_and_stop(10000)
+
+ # Try max across all params, if the driver supports large rings
+ # this may OOM so we ignore errors
+ try:
+ ksft_pr("Applying max settings")
+ config = {p: maxes[p] for p in params}
+ cfg.eth.rings_set(ehdr | config)
+ except NlError as e:
+ ksft_pr("Can't set max params", config, e)
+ else:
+ GenerateTraffic(cfg).wait_pkts_and_stop(10000)
+
+
+def main() -> None:
+ """ Ksft boiler plate main """
+
+ with NetDrvEpEnv(__file__) as cfg:
+ cfg.eth = EthtoolFamily()
+
+ ksft_run([channels,
+ ringparam],
+ args=(cfg, ))
+ ksft_exit()
+
+
+if __name__ == "__main__":
+ main()
diff --git a/tools/testing/selftests/drivers/net/stats.py b/tools/testing/selftests/drivers/net/stats.py
index 04d0a2a13e73..b08e4d48b15c 100755
--- a/tools/testing/selftests/drivers/net/stats.py
+++ b/tools/testing/selftests/drivers/net/stats.py
@@ -263,14 +263,15 @@ def procfs_downup_hammer(cfg) -> None:
Reading stats via procfs only holds the RCU lock, drivers often try
to sleep when reading the stats, or don't protect against races.
"""
- # Max out the queues, we'll flip between max and 1
+ # Set a large number of queues,
+ # we'll flip between min(max_queues, 64) and 1
channels = ethnl.channels_get({'header': {'dev-index': cfg.ifindex}})
if channels['combined-count'] == 0:
rx_type = 'rx'
else:
rx_type = 'combined'
cur_queue_cnt = channels[f'{rx_type}-count']
- max_queue_cnt = channels[f'{rx_type}-max']
+ max_queue_cnt = min(channels[f'{rx_type}-max'], 64)
cmd(f"ethtool -L {cfg.ifname} {rx_type} {max_queue_cnt}")
defer(cmd, f"ethtool -L {cfg.ifname} {rx_type} {cur_queue_cnt}")
diff --git a/tools/testing/selftests/drivers/net/xdp.py b/tools/testing/selftests/drivers/net/xdp.py
index a148004e1c36..e54df158dfe9 100755
--- a/tools/testing/selftests/drivers/net/xdp.py
+++ b/tools/testing/selftests/drivers/net/xdp.py
@@ -12,6 +12,7 @@ from dataclasses import dataclass
from enum import Enum
from lib.py import ksft_run, ksft_exit, ksft_eq, ksft_ge, ksft_ne, ksft_pr
+from lib.py import KsftNamedVariant, ksft_variants
from lib.py import KsftFailEx, NetDrvEpEnv
from lib.py import EthtoolFamily, NetdevFamily, NlError
from lib.py import bkg, cmd, rand_port, wait_port_listen
@@ -672,7 +673,18 @@ def test_xdp_native_adjst_head_shrnk_data(cfg):
_validate_res(res, offset_lst, pkt_sz_lst)
-def _test_xdp_native_ifc_stats(cfg, act):
+@ksft_variants([
+ KsftNamedVariant("pass", XDPAction.PASS),
+ KsftNamedVariant("drop", XDPAction.DROP),
+ KsftNamedVariant("tx", XDPAction.TX),
+])
+def test_xdp_native_qstats(cfg, act):
+ """
+ Send 1000 messages. Expect XDP action specified in @act.
+ Make sure the packets were counted to interface level qstats
+ (Rx, and Tx if act is TX).
+ """
+
cfg.require_cmd("socat")
bpf_info = BPFProgInfo("xdp_prog", "xdp_native.bpf.o", "xdp", 1500)
@@ -687,9 +699,12 @@ def _test_xdp_native_ifc_stats(cfg, act):
"/dev/null"
# Listener runs on "remote" in case of XDP_TX
rx_host = cfg.remote if act == XDPAction.TX else None
- # We want to spew 2000 packets quickly, bash seems to do a good enough job
- tx_udp = f"exec 5<>/dev/udp/{cfg.addr}/{port}; " \
- "for i in `seq 2000`; do echo a >&5; done; exec 5>&-"
+ # We want to spew 1000 packets quickly, bash seems to do a good enough job
+ # Each reopening of the socket gives us a differenot local port (for RSS)
+ tx_udp = "for _ in `seq 20`; do " \
+ f"exec 5<>/dev/udp/{cfg.addr}/{port}; " \
+ "for i in `seq 50`; do echo a >&5; done; " \
+ "exec 5>&-; done"
cfg.wait_hw_stats_settle()
# Qstats have more clearly defined semantics than rtnetlink.
@@ -704,11 +719,11 @@ def _test_xdp_native_ifc_stats(cfg, act):
cfg.wait_hw_stats_settle()
after = cfg.netnl.qstats_get({"ifindex": cfg.ifindex}, dump=True)[0]
- ksft_ge(after['rx-packets'] - before['rx-packets'], 2000)
+ expected_pkts = 1000
+ ksft_ge(after['rx-packets'] - before['rx-packets'], expected_pkts)
if act == XDPAction.TX:
- ksft_ge(after['tx-packets'] - before['tx-packets'], 2000)
+ ksft_ge(after['tx-packets'] - before['tx-packets'], expected_pkts)
- expected_pkts = 2000
stats = _get_stats(prog_info["maps"]["map_xdp_stats"])
ksft_eq(stats[XDPStats.RX.value], expected_pkts, "XDP RX stats mismatch")
if act == XDPAction.TX:
@@ -730,30 +745,6 @@ def _test_xdp_native_ifc_stats(cfg, act):
ksft_ge(after['tx-packets'], before['tx-packets'])
-def test_xdp_native_qstats_pass(cfg):
- """
- Send 2000 messages, expect XDP_PASS, make sure the packets were counted
- to interface level qstats (Rx).
- """
- _test_xdp_native_ifc_stats(cfg, XDPAction.PASS)
-
-
-def test_xdp_native_qstats_drop(cfg):
- """
- Send 2000 messages, expect XDP_DROP, make sure the packets were counted
- to interface level qstats (Rx).
- """
- _test_xdp_native_ifc_stats(cfg, XDPAction.DROP)
-
-
-def test_xdp_native_qstats_tx(cfg):
- """
- Send 2000 messages, expect XDP_TX, make sure the packets were counted
- to interface level qstats (Rx and Tx)
- """
- _test_xdp_native_ifc_stats(cfg, XDPAction.TX)
-
-
def main():
"""
Main function to execute the XDP tests.
@@ -778,9 +769,7 @@ def main():
test_xdp_native_adjst_tail_shrnk_data,
test_xdp_native_adjst_head_grow_data,
test_xdp_native_adjst_head_shrnk_data,
- test_xdp_native_qstats_pass,
- test_xdp_native_qstats_drop,
- test_xdp_native_qstats_tx,
+ test_xdp_native_qstats,
],
args=(cfg,))
ksft_exit()
diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore
index 8f9850a71f54..6930fe926c58 100644
--- a/tools/testing/selftests/net/.gitignore
+++ b/tools/testing/selftests/net/.gitignore
@@ -4,10 +4,8 @@ bind_timewait
bind_wildcard
busy_poller
cmsg_sender
-diag_uid
epoll_busy_poll
fin_ack_lat
-gro
hwtstamp_config
io_uring_zerocopy_tx
ioam6_parser
@@ -18,7 +16,6 @@ ipv6_flowlabel
ipv6_flowlabel_mgr
ipv6_fragmentation
log.txt
-msg_oob
msg_zerocopy
netlink-dumps
nettest
@@ -35,9 +32,6 @@ reuseport_bpf_numa
reuseport_dualstack
rxtimestamp
sctp_hello
-scm_inq
-scm_pidfd
-scm_rights
sk_bind_sendto_listen
sk_connect_zero_addr
sk_so_peek_off
@@ -45,7 +39,6 @@ skf_net_off
socket
so_incoming_cpu
so_netns_cookie
-so_peek_off
so_txtime
so_rcv_listener
stress_reuseport_listen
@@ -57,7 +50,6 @@ tcp_port_share
tfo
timestamping
tls
-toeplitz
tools
tun
txring_overwrite
@@ -65,4 +57,3 @@ txtimestamp
udpgso
udpgso_bench_rx
udpgso_bench_tx
-unix_connect
diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
index b5127e968108..b66ba04f19d9 100644
--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile
@@ -38,7 +38,6 @@ TEST_PROGS := \
fq_band_pktlimit.sh \
gre_gso.sh \
gre_ipv6_lladdr.sh \
- gro.sh \
icmp.sh \
icmp_redirect.sh \
io_uring_zerocopy_tx.sh \
@@ -121,8 +120,6 @@ TEST_PROGS := \
# end of TEST_PROGS
TEST_PROGS_EXTENDED := \
- toeplitz.sh \
- toeplitz_client.sh \
xfrm_policy_add_speed.sh \
# end of TEST_PROGS_EXTENDED
@@ -130,7 +127,6 @@ TEST_GEN_FILES := \
bind_bhash \
cmsg_sender \
fin_ack_lat \
- gro \
hwtstamp_config \
io_uring_zerocopy_tx \
ioam6_parser \
@@ -159,7 +155,6 @@ TEST_GEN_FILES := \
tcp_mmap \
tfo \
timestamping \
- toeplitz \
txring_overwrite \
txtimestamp \
udpgso \
@@ -193,8 +188,6 @@ TEST_FILES := \
in_netns.sh \
lib.sh \
settings \
- setup_loopback.sh \
- setup_veth.sh \
# end of TEST_FILES
# YNL files, must be before "include ..lib.mk"
diff --git a/tools/testing/selftests/net/af_unix/.gitignore b/tools/testing/selftests/net/af_unix/.gitignore
new file mode 100644
index 000000000000..240b26740c9e
--- /dev/null
+++ b/tools/testing/selftests/net/af_unix/.gitignore
@@ -0,0 +1,8 @@
+diag_uid
+msg_oob
+scm_inq
+scm_pidfd
+scm_rights
+so_peek_off
+unix_connect
+unix_connreset
diff --git a/tools/testing/selftests/net/af_unix/Makefile b/tools/testing/selftests/net/af_unix/Makefile
index 528d14c598bb..3cd677b72072 100644
--- a/tools/testing/selftests/net/af_unix/Makefile
+++ b/tools/testing/selftests/net/af_unix/Makefile
@@ -8,6 +8,7 @@ TEST_GEN_PROGS := \
scm_rights \
so_peek_off \
unix_connect \
+ unix_connreset \
# end of TEST_GEN_PROGS
include ../../lib.mk
diff --git a/tools/testing/selftests/net/af_unix/so_peek_off.c b/tools/testing/selftests/net/af_unix/so_peek_off.c
index 1a77728128e5..86e7b0fb522d 100644
--- a/tools/testing/selftests/net/af_unix/so_peek_off.c
+++ b/tools/testing/selftests/net/af_unix/so_peek_off.c
@@ -36,8 +36,8 @@ FIXTURE_VARIANT_ADD(so_peek_off, seqpacket)
FIXTURE_SETUP(so_peek_off)
{
struct timeval timeout = {
- .tv_sec = 0,
- .tv_usec = 3000,
+ .tv_sec = 5,
+ .tv_usec = 0,
};
int ret;
diff --git a/tools/testing/selftests/net/af_unix/unix_connreset.c b/tools/testing/selftests/net/af_unix/unix_connreset.c
new file mode 100644
index 000000000000..08c1de8f5a98
--- /dev/null
+++ b/tools/testing/selftests/net/af_unix/unix_connreset.c
@@ -0,0 +1,180 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Selftest for AF_UNIX socket close and ECONNRESET behaviour.
+ *
+ * This test verifies:
+ * 1. SOCK_STREAM returns EOF when the peer closes normally.
+ * 2. SOCK_STREAM returns ECONNRESET if peer closes with unread data.
+ * 3. SOCK_SEQPACKET returns EOF when the peer closes normally.
+ * 4. SOCK_SEQPACKET returns ECONNRESET if the peer closes with unread data.
+ * 5. SOCK_DGRAM does not return ECONNRESET when the peer closes.
+ *
+ * These tests document the intended Linux behaviour.
+ *
+ */
+
+#define _GNU_SOURCE
+#include <string.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <errno.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+#include "../../kselftest_harness.h"
+
+#define SOCK_PATH "/tmp/af_unix_connreset.sock"
+
+static void remove_socket_file(void)
+{
+ unlink(SOCK_PATH);
+}
+
+FIXTURE(unix_sock)
+{
+ int server;
+ int client;
+ int child;
+};
+
+FIXTURE_VARIANT(unix_sock)
+{
+ int socket_type;
+ const char *name;
+};
+
+FIXTURE_VARIANT_ADD(unix_sock, stream) {
+ .socket_type = SOCK_STREAM,
+ .name = "SOCK_STREAM",
+};
+
+FIXTURE_VARIANT_ADD(unix_sock, dgram) {
+ .socket_type = SOCK_DGRAM,
+ .name = "SOCK_DGRAM",
+};
+
+FIXTURE_VARIANT_ADD(unix_sock, seqpacket) {
+ .socket_type = SOCK_SEQPACKET,
+ .name = "SOCK_SEQPACKET",
+};
+
+FIXTURE_SETUP(unix_sock)
+{
+ struct sockaddr_un addr = {};
+ int err;
+
+ addr.sun_family = AF_UNIX;
+ strcpy(addr.sun_path, SOCK_PATH);
+ remove_socket_file();
+
+ self->server = socket(AF_UNIX, variant->socket_type, 0);
+ ASSERT_LT(-1, self->server);
+
+ err = bind(self->server, (struct sockaddr *)&addr, sizeof(addr));
+ ASSERT_EQ(0, err);
+
+ if (variant->socket_type == SOCK_STREAM ||
+ variant->socket_type == SOCK_SEQPACKET) {
+ err = listen(self->server, 1);
+ ASSERT_EQ(0, err);
+ }
+
+ self->client = socket(AF_UNIX, variant->socket_type | SOCK_NONBLOCK, 0);
+ ASSERT_LT(-1, self->client);
+
+ err = connect(self->client, (struct sockaddr *)&addr, sizeof(addr));
+ ASSERT_EQ(0, err);
+}
+
+FIXTURE_TEARDOWN(unix_sock)
+{
+ if (variant->socket_type == SOCK_STREAM ||
+ variant->socket_type == SOCK_SEQPACKET)
+ close(self->child);
+
+ close(self->client);
+ close(self->server);
+ remove_socket_file();
+}
+
+/* Test 1: peer closes normally */
+TEST_F(unix_sock, eof)
+{
+ char buf[16] = {};
+ ssize_t n;
+
+ if (variant->socket_type == SOCK_STREAM ||
+ variant->socket_type == SOCK_SEQPACKET) {
+ self->child = accept(self->server, NULL, NULL);
+ ASSERT_LT(-1, self->child);
+
+ close(self->child);
+ } else {
+ close(self->server);
+ }
+
+ n = recv(self->client, buf, sizeof(buf), 0);
+
+ if (variant->socket_type == SOCK_STREAM ||
+ variant->socket_type == SOCK_SEQPACKET) {
+ ASSERT_EQ(0, n);
+ } else {
+ ASSERT_EQ(-1, n);
+ ASSERT_EQ(EAGAIN, errno);
+ }
+}
+
+/* Test 2: peer closes with unread data */
+TEST_F(unix_sock, reset_unread_behavior)
+{
+ char buf[16] = {};
+ ssize_t n;
+
+ /* Send data that will remain unread */
+ send(self->client, "hello", 5, 0);
+
+ if (variant->socket_type == SOCK_DGRAM) {
+ /* No real connection, just close the server */
+ close(self->server);
+ } else {
+ self->child = accept(self->server, NULL, NULL);
+ ASSERT_LT(-1, self->child);
+
+ /* Peer closes before client reads */
+ close(self->child);
+ }
+
+ n = recv(self->client, buf, sizeof(buf), 0);
+ ASSERT_EQ(-1, n);
+
+ if (variant->socket_type == SOCK_STREAM ||
+ variant->socket_type == SOCK_SEQPACKET) {
+ ASSERT_EQ(ECONNRESET, errno);
+ } else {
+ ASSERT_EQ(EAGAIN, errno);
+ }
+}
+
+/* Test 3: closing unaccepted (embryo) server socket should reset client. */
+TEST_F(unix_sock, reset_closed_embryo)
+{
+ char buf[16] = {};
+ ssize_t n;
+
+ if (variant->socket_type == SOCK_DGRAM) {
+ snprintf(_metadata->results->reason,
+ sizeof(_metadata->results->reason),
+ "Test only applies to SOCK_STREAM and SOCK_SEQPACKET");
+ exit(KSFT_XFAIL);
+ }
+
+ /* Close server without accept()ing */
+ close(self->server);
+
+ n = recv(self->client, buf, sizeof(buf), 0);
+
+ ASSERT_EQ(-1, n);
+ ASSERT_EQ(ECONNRESET, errno);
+}
+
+TEST_HARNESS_MAIN
+
diff --git a/tools/testing/selftests/net/arp_ndisc_evict_nocarrier.sh b/tools/testing/selftests/net/arp_ndisc_evict_nocarrier.sh
index 92eb880c52f2..00758f00efbf 100755
--- a/tools/testing/selftests/net/arp_ndisc_evict_nocarrier.sh
+++ b/tools/testing/selftests/net/arp_ndisc_evict_nocarrier.sh
@@ -75,7 +75,7 @@ setup_v4() {
ip neigh get $V4_ADDR1 dev veth0 >/dev/null 2>&1
if [ $? -ne 0 ]; then
cleanup_v4
- echo "failed"
+ echo "failed; is the system using MACAddressPolicy=persistent ?"
exit 1
fi
diff --git a/tools/testing/selftests/net/busy_poll_test.sh b/tools/testing/selftests/net/busy_poll_test.sh
index 7d2d40812074..5ec1c85c1623 100755
--- a/tools/testing/selftests/net/busy_poll_test.sh
+++ b/tools/testing/selftests/net/busy_poll_test.sh
@@ -27,6 +27,8 @@ NAPI_DEFER_HARD_IRQS=100
GRO_FLUSH_TIMEOUT=50000
SUSPEND_TIMEOUT=20000000
+NAPI_THREADED_MODE_BUSY_POLL=2
+
setup_ns()
{
set -e
@@ -62,6 +64,9 @@ cleanup_ns()
test_busypoll()
{
suspend_value=${1:-0}
+ napi_threaded_value=${2:-0}
+ prefer_busy_poll_value=${3:-$PREFER_BUSY_POLL}
+
tmp_file=$(mktemp)
out_file=$(mktemp)
@@ -73,10 +78,11 @@ test_busypoll()
-b${SERVER_IP} \
-m${MAX_EVENTS} \
-u${BUSY_POLL_USECS} \
- -P${PREFER_BUSY_POLL} \
+ -P${prefer_busy_poll_value} \
-g${BUSY_POLL_BUDGET} \
-i${NSIM_SV_IFIDX} \
-s${suspend_value} \
+ -t${napi_threaded_value} \
-o${out_file}&
wait_local_port_listen nssv ${SERVER_PORT} tcp
@@ -109,6 +115,15 @@ test_busypoll_with_suspend()
return $?
}
+test_busypoll_with_napi_threaded()
+{
+ # Only enable napi threaded poll. Set suspend timeout and prefer busy
+ # poll to 0.
+ test_busypoll 0 ${NAPI_THREADED_MODE_BUSY_POLL} 0
+
+ return $?
+}
+
###
### Code start
###
@@ -154,6 +169,13 @@ if [ $? -ne 0 ]; then
exit 1
fi
+test_busypoll_with_napi_threaded
+if [ $? -ne 0 ]; then
+ echo "test_busypoll_with_napi_threaded failed"
+ cleanup_ns
+ exit 1
+fi
+
echo "$NSIM_SV_FD:$NSIM_SV_IFIDX" > $NSIM_DEV_SYS_UNLINK
echo $NSIM_CL_ID > $NSIM_DEV_SYS_DEL
diff --git a/tools/testing/selftests/net/busy_poller.c b/tools/testing/selftests/net/busy_poller.c
index 04c7ff577bb8..3a81f9c94795 100644
--- a/tools/testing/selftests/net/busy_poller.c
+++ b/tools/testing/selftests/net/busy_poller.c
@@ -65,15 +65,16 @@ static uint32_t cfg_busy_poll_usecs;
static uint16_t cfg_busy_poll_budget;
static uint8_t cfg_prefer_busy_poll;
-/* IRQ params */
+/* NAPI params */
static uint32_t cfg_defer_hard_irqs;
static uint64_t cfg_gro_flush_timeout;
static uint64_t cfg_irq_suspend_timeout;
+static enum netdev_napi_threaded cfg_napi_threaded_poll = NETDEV_NAPI_THREADED_DISABLED;
static void usage(const char *filepath)
{
error(1, 0,
- "Usage: %s -p<port> -b<addr> -m<max_events> -u<busy_poll_usecs> -P<prefer_busy_poll> -g<busy_poll_budget> -o<outfile> -d<defer_hard_irqs> -r<gro_flush_timeout> -s<irq_suspend_timeout> -i<ifindex>",
+ "Usage: %s -p<port> -b<addr> -m<max_events> -u<busy_poll_usecs> -P<prefer_busy_poll> -g<busy_poll_budget> -o<outfile> -d<defer_hard_irqs> -r<gro_flush_timeout> -s<irq_suspend_timeout> -t<napi_threaded_poll> -i<ifindex>",
filepath);
}
@@ -86,7 +87,7 @@ static void parse_opts(int argc, char **argv)
if (argc <= 1)
usage(argv[0]);
- while ((c = getopt(argc, argv, "p:m:b:u:P:g:o:d:r:s:i:")) != -1) {
+ while ((c = getopt(argc, argv, "p:m:b:u:P:g:o:d:r:s:i:t:")) != -1) {
/* most options take integer values, except o and b, so reduce
* code duplication a bit for the common case by calling
* strtoull here and leave bounds checking and casting per
@@ -168,6 +169,12 @@ static void parse_opts(int argc, char **argv)
cfg_ifindex = (int)tmp;
break;
+ case 't':
+ if (tmp > 2)
+ error(1, ERANGE, "napi threaded poll value must be 0-2");
+
+ cfg_napi_threaded_poll = (enum netdev_napi_threaded)tmp;
+ break;
}
}
@@ -247,6 +254,9 @@ static void setup_queue(void)
netdev_napi_set_req_set_irq_suspend_timeout(set_req,
cfg_irq_suspend_timeout);
+ if (cfg_napi_threaded_poll)
+ netdev_napi_set_req_set_threaded(set_req, cfg_napi_threaded_poll);
+
if (netdev_napi_set(ys, set_req))
error(1, 0, "can't set NAPI params: %s\n", yerr.msg);
diff --git a/tools/testing/selftests/net/fib_tests.sh b/tools/testing/selftests/net/fib_tests.sh
index a94b73a53f72..a88f797c549a 100755
--- a/tools/testing/selftests/net/fib_tests.sh
+++ b/tools/testing/selftests/net/fib_tests.sh
@@ -11,7 +11,8 @@ TESTS="unregister down carrier nexthop suppress ipv6_notify ipv4_notify \
ipv6_rt ipv4_rt ipv6_addr_metric ipv4_addr_metric ipv6_route_metrics \
ipv4_route_metrics ipv4_route_v6_gw rp_filter ipv4_del_addr \
ipv6_del_addr ipv4_mangle ipv6_mangle ipv4_bcast_neigh fib6_gc_test \
- ipv4_mpath_list ipv6_mpath_list ipv4_mpath_balance ipv6_mpath_balance"
+ ipv4_mpath_list ipv6_mpath_list ipv4_mpath_balance ipv6_mpath_balance \
+ fib6_ra_to_static"
VERBOSE=0
PAUSE_ON_FAIL=no
@@ -1476,6 +1477,68 @@ ipv6_route_metrics_test()
route_cleanup
}
+fib6_ra_to_static()
+{
+ setup
+
+ echo
+ echo "Fib6 route promotion from RA-learned to static test"
+ set -e
+
+ # ra6 is required for the test. (ipv6toolkit)
+ if [ ! -x "$(command -v ra6)" ]; then
+ echo "SKIP: ra6 not found."
+ set +e
+ cleanup &> /dev/null
+ return
+ fi
+
+ # Create a pair of veth devices to send a RA message from one
+ # device to another.
+ $IP link add veth1 type veth peer name veth2
+ $IP link set dev veth1 up
+ $IP link set dev veth2 up
+ $IP -6 address add 2001:10::1/64 dev veth1 nodad
+ $IP -6 address add 2001:10::2/64 dev veth2 nodad
+
+ # Make veth1 ready to receive RA messages.
+ $NS_EXEC sysctl -wq net.ipv6.conf.veth1.accept_ra=2
+
+ # Send a RA message with a prefix from veth2.
+ $NS_EXEC ra6 -i veth2 -d 2001:10::1 -P 2001:12::/64\#LA\#120\#60
+
+ # Wait for the RA message.
+ sleep 1
+
+ # systemd may mess up the test. Make sure that
+ # systemd-networkd.service and systemd-networkd.socket are stopped.
+ check_rt_num_clean 2 $($IP -6 route list|grep expires|wc -l) || return
+
+ # Configure static address on the same prefix
+ $IP -6 address add 2001:12::dead/64 dev veth1 nodad
+
+ # On-link route won't expire anymore, default route still owned by RA
+ check_rt_num 1 $($IP -6 route list |grep expires|wc -l)
+
+ # Send a second RA message with a prefix from veth2.
+ $NS_EXEC ra6 -i veth2 -d 2001:10::1 -P 2001:12::/64\#LA\#120\#60
+ sleep 1
+
+ # Expire is not back, on-link route is still static
+ check_rt_num 1 $($IP -6 route list |grep expires|wc -l)
+
+ $IP -6 address del 2001:12::dead/64 dev veth1 nodad
+
+ # Expire is back, on-link route is now owned by RA again
+ check_rt_num 2 $($IP -6 route list |grep expires|wc -l)
+
+ log_test $ret 0 "ipv6 promote RA route to static"
+
+ set +e
+
+ cleanup &> /dev/null
+}
+
# add route for a prefix, flushing any existing routes first
# expected to be the first step of a test
add_route()
@@ -2798,6 +2861,7 @@ do
ipv6_mpath_list) ipv6_mpath_list_test;;
ipv4_mpath_balance) ipv4_mpath_balance_test;;
ipv6_mpath_balance) ipv6_mpath_balance_test;;
+ fib6_ra_to_static) fib6_ra_to_static;;
help) echo "Test names: $TESTS"; exit 0;;
esac
diff --git a/tools/testing/selftests/net/forwarding/bridge_mdb.sh b/tools/testing/selftests/net/forwarding/bridge_mdb.sh
index 8c1597ebc2d3..e86d77946585 100755
--- a/tools/testing/selftests/net/forwarding/bridge_mdb.sh
+++ b/tools/testing/selftests/net/forwarding/bridge_mdb.sh
@@ -28,6 +28,7 @@ ALL_TESTS="
cfg_test
fwd_test
ctrl_test
+ disable_test
"
NUM_NETIFS=4
@@ -64,7 +65,10 @@ h2_destroy()
switch_create()
{
- ip link add name br0 type bridge vlan_filtering 1 vlan_default_pvid 0 \
+ local vlan_filtering=$1; shift
+
+ ip link add name br0 type bridge \
+ vlan_filtering "$vlan_filtering" vlan_default_pvid 0 \
mcast_snooping 1 mcast_igmp_version 3 mcast_mld_version 2
bridge vlan add vid 10 dev br0 self
bridge vlan add vid 20 dev br0 self
@@ -118,7 +122,7 @@ setup_prepare()
h1_create
h2_create
- switch_create
+ switch_create 1
}
cleanup()
@@ -1357,6 +1361,98 @@ ctrl_test()
ctrl_mldv2_is_in_test
}
+check_group()
+{
+ local group=$1; shift
+ local vid=$1; shift
+ local should_fail=$1; shift
+ local when=$1; shift
+ local -a vidkws
+
+ if ((vid)); then
+ vidkws=(vid "$vid")
+ fi
+
+ bridge mdb get dev br0 grp "$group" "${vidkws[@]}" 2>/dev/null |
+ grep -q "port $swp1"
+ check_err_fail "$should_fail" $? "$group seen $when snooping disable:"
+}
+
+__disable_test()
+{
+ local vid=$1; shift
+ local what=$1; shift
+ local -a vidkws
+
+ if ((vid)); then
+ vidkws=(vid "$vid")
+ fi
+
+ RET=0
+
+ bridge mdb add dev br0 port "$swp1" grp ff0e::1 permanent \
+ "${vidkws[@]}" filter_mode include source_list 2001:db8:1::1
+ bridge mdb add dev br0 port "$swp1" grp ff0e::2 permanent \
+ "${vidkws[@]}" filter_mode exclude
+
+ bridge mdb add dev br0 port "$swp1" grp ff0e::3 \
+ "${vidkws[@]}" filter_mode include source_list 2001:db8:1::2
+ bridge mdb add dev br0 port "$swp1" grp ff0e::4 \
+ "${vidkws[@]}" filter_mode exclude
+
+ bridge mdb add dev br0 port "$swp1" grp 239.1.1.1 permanent \
+ "${vidkws[@]}" filter_mode include source_list 192.0.2.1
+ bridge mdb add dev br0 port "$swp1" grp 239.1.1.2 permanent \
+ "${vidkws[@]}" filter_mode exclude
+
+ bridge mdb add dev br0 port "$swp1" grp 239.1.1.3 \
+ "${vidkws[@]}" filter_mode include source_list 192.0.2.2
+ bridge mdb add dev br0 port "$swp1" grp 239.1.1.4 \
+ "${vidkws[@]}" filter_mode exclude
+
+ check_group ff0e::1 "$vid" 0 "before"
+ check_group ff0e::2 "$vid" 0 "before"
+ check_group ff0e::3 "$vid" 0 "before"
+ check_group ff0e::4 "$vid" 0 "before"
+
+ check_group 239.1.1.1 "$vid" 0 "before"
+ check_group 239.1.1.2 "$vid" 0 "before"
+ check_group 239.1.1.3 "$vid" 0 "before"
+ check_group 239.1.1.4 "$vid" 0 "before"
+
+ ip link set dev br0 type bridge mcast_snooping 0
+
+ check_group ff0e::1 "$vid" 0 "after"
+ check_group ff0e::2 "$vid" 0 "after"
+ check_group ff0e::3 "$vid" 1 "after"
+ check_group ff0e::4 "$vid" 1 "after"
+
+ check_group 239.1.1.1 "$vid" 0 "after"
+ check_group 239.1.1.2 "$vid" 0 "after"
+ check_group 239.1.1.3 "$vid" 1 "after"
+ check_group 239.1.1.4 "$vid" 1 "after"
+
+ log_test "$what: Flush after disable"
+
+ ip link set dev br0 type bridge mcast_snooping 1
+ sleep 10
+}
+
+disable_test()
+{
+ __disable_test 10 802.1q
+
+ switch_destroy
+ switch_create 0
+ setup_wait
+
+ __disable_test 0 802.1d
+
+ switch_destroy
+ switch_create 1
+ setup_wait
+}
+
if ! bridge mdb help 2>&1 | grep -q "flush"; then
echo "SKIP: iproute2 too old, missing bridge mdb flush support"
exit $ksft_skip
diff --git a/tools/testing/selftests/net/gro.sh b/tools/testing/selftests/net/gro.sh
deleted file mode 100755
index 4c5144c6f652..000000000000
--- a/tools/testing/selftests/net/gro.sh
+++ /dev/null
@@ -1,105 +0,0 @@
-#!/bin/bash
-# SPDX-License-Identifier: GPL-2.0
-
-readonly SERVER_MAC="aa:00:00:00:00:02"
-readonly CLIENT_MAC="aa:00:00:00:00:01"
-readonly TESTS=("data" "ack" "flags" "tcp" "ip" "large")
-readonly PROTOS=("ipv4" "ipv6" "ipip")
-dev=""
-test="all"
-proto="ipv4"
-
-run_test() {
- local server_pid=0
- local exit_code=0
- local protocol=$1
- local test=$2
- local ARGS=( "--${protocol}" "--dmac" "${SERVER_MAC}" \
- "--smac" "${CLIENT_MAC}" "--test" "${test}" "--verbose" )
-
- setup_ns
- # Each test is run 6 times to deflake, because given the receive timing,
- # not all packets that should coalesce will be considered in the same flow
- # on every try.
- for tries in {1..6}; do
- # Actual test starts here
- ip netns exec $server_ns ./gro "${ARGS[@]}" "--rx" "--iface" "server" \
- 1>>log.txt &
- server_pid=$!
- sleep 0.5 # to allow for socket init
- ip netns exec $client_ns ./gro "${ARGS[@]}" "--iface" "client" \
- 1>>log.txt
- wait "${server_pid}"
- exit_code=$?
- if [[ ${test} == "large" && -n "${KSFT_MACHINE_SLOW}" && \
- ${exit_code} -ne 0 ]]; then
- echo "Ignoring errors due to slow environment" 1>&2
- exit_code=0
- fi
- if [[ "${exit_code}" -eq 0 ]]; then
- break;
- fi
- done
- cleanup_ns
- echo ${exit_code}
-}
-
-run_all_tests() {
- local failed_tests=()
- for proto in "${PROTOS[@]}"; do
- for test in "${TESTS[@]}"; do
- echo "running test ${proto} ${test}" >&2
- exit_code=$(run_test $proto $test)
- if [[ "${exit_code}" -ne 0 ]]; then
- failed_tests+=("${proto}_${test}")
- fi;
- done;
- done
- if [[ ${#failed_tests[@]} -ne 0 ]]; then
- echo "failed tests: ${failed_tests[*]}. \
- Please see log.txt for more logs"
- exit 1
- else
- echo "All Tests Succeeded!"
- fi;
-}
-
-usage() {
- echo "Usage: $0 \
- [-i <DEV>] \
- [-t data|ack|flags|tcp|ip|large] \
- [-p <ipv4|ipv6>]" 1>&2;
- exit 1;
-}
-
-while getopts "i:t:p:" opt; do
- case "${opt}" in
- i)
- dev="${OPTARG}"
- ;;
- t)
- test="${OPTARG}"
- ;;
- p)
- proto="${OPTARG}"
- ;;
- *)
- usage
- ;;
- esac
-done
-
-if [ -n "$dev" ]; then
- source setup_loopback.sh
-else
- source setup_veth.sh
-fi
-
-setup
-trap cleanup EXIT
-if [[ "${test}" == "all" ]]; then
- run_all_tests
-else
- exit_code=$(run_test "${proto}" "${test}")
- exit $exit_code
-fi;
diff --git a/tools/testing/selftests/net/io_uring_zerocopy_tx.c b/tools/testing/selftests/net/io_uring_zerocopy_tx.c
index 76e604e4810e..7bfeeb133705 100644
--- a/tools/testing/selftests/net/io_uring_zerocopy_tx.c
+++ b/tools/testing/selftests/net/io_uring_zerocopy_tx.c
@@ -106,14 +106,14 @@ static void do_tx(int domain, int type, int protocol)
ret = io_uring_queue_init(512, &ring, 0);
if (ret)
- error(1, ret, "io_uring: queue init");
+ error(1, -ret, "io_uring: queue init");
iov.iov_base = payload;
iov.iov_len = cfg_payload_len;
ret = io_uring_register_buffers(&ring, &iov, 1);
if (ret)
- error(1, ret, "io_uring: buffer registration");
+ error(1, -ret, "io_uring: buffer registration");
tstop = gettimeofday_ms() + cfg_runtime_ms;
do {
@@ -149,24 +149,24 @@ static void do_tx(int domain, int type, int protocol)
ret = io_uring_submit(&ring);
if (ret != cfg_nr_reqs)
- error(1, ret, "submit");
+ error(1, -ret, "submit");
if (cfg_cork)
do_setsockopt(fd, IPPROTO_UDP, UDP_CORK, 0);
for (i = 0; i < cfg_nr_reqs; i++) {
ret = io_uring_wait_cqe(&ring, &cqe);
if (ret)
- error(1, ret, "wait cqe");
+ error(1, -ret, "wait cqe");
if (cqe->user_data != NONZC_TAG &&
cqe->user_data != ZC_TAG)
- error(1, -EINVAL, "invalid cqe->user_data");
+ error(1, EINVAL, "invalid cqe->user_data");
if (cqe->flags & IORING_CQE_F_NOTIF) {
if (cqe->flags & IORING_CQE_F_MORE)
- error(1, -EINVAL, "invalid notif flags");
+ error(1, EINVAL, "invalid notif flags");
if (compl_cqes <= 0)
- error(1, -EINVAL, "notification mismatch");
+ error(1, EINVAL, "notification mismatch");
compl_cqes--;
i--;
io_uring_cqe_seen(&ring);
@@ -174,14 +174,14 @@ static void do_tx(int domain, int type, int protocol)
}
if (cqe->flags & IORING_CQE_F_MORE) {
if (cqe->user_data != ZC_TAG)
- error(1, cqe->res, "unexpected F_MORE");
+ error(1, -cqe->res, "unexpected F_MORE");
compl_cqes++;
}
if (cqe->res >= 0) {
packets++;
bytes += cqe->res;
} else if (cqe->res != -EAGAIN) {
- error(1, cqe->res, "send failed");
+ error(1, -cqe->res, "send failed");
}
io_uring_cqe_seen(&ring);
}
@@ -190,11 +190,11 @@ static void do_tx(int domain, int type, int protocol)
while (compl_cqes) {
ret = io_uring_wait_cqe(&ring, &cqe);
if (ret)
- error(1, ret, "wait cqe");
+ error(1, -ret, "wait cqe");
if (cqe->flags & IORING_CQE_F_MORE)
- error(1, -EINVAL, "invalid notif flags");
+ error(1, EINVAL, "invalid notif flags");
if (!(cqe->flags & IORING_CQE_F_NOTIF))
- error(1, -EINVAL, "missing notif flag");
+ error(1, EINVAL, "missing notif flag");
io_uring_cqe_seen(&ring);
compl_cqes--;
diff --git a/tools/testing/selftests/net/lib/Makefile b/tools/testing/selftests/net/lib/Makefile
index ce795bc0a1af..5339f56329e1 100644
--- a/tools/testing/selftests/net/lib/Makefile
+++ b/tools/testing/selftests/net/lib/Makefile
@@ -8,6 +8,7 @@ CFLAGS += -I../../
TEST_FILES := \
../../../../net/ynl \
../../../../../Documentation/netlink/specs \
+ ksft_setup_loopback.sh \
# end of TEST_FILES
TEST_GEN_FILES := \
diff --git a/tools/testing/selftests/net/lib/ksft_setup_loopback.sh b/tools/testing/selftests/net/lib/ksft_setup_loopback.sh
new file mode 100755
index 000000000000..3defbb1919c5
--- /dev/null
+++ b/tools/testing/selftests/net/lib/ksft_setup_loopback.sh
@@ -0,0 +1,111 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Setup script for running ksft tests over a real interface in loopback mode.
+# This scripts replaces the historical setup_loopback.sh. It puts
+# a (presumably) real hardware interface into loopback mode, creates macvlan
+# interfaces on top and places them in a network namespace for isolation.
+#
+# NETIF env variable must be exported to indicate the real target device.
+# Note that the test will override NETIF with one of the macvlans, the
+# actual ksft test will only see the macvlans.
+#
+# Example use:
+# export NETIF=eth0
+# ./net/lib/ksft_setup_loopback.sh ./drivers/net/gro.py
+
+if [ -z "$NETIF" ]; then
+ echo "Error: NETIF variable not set"
+ exit 1
+fi
+if ! [ -d "/sys/class/net/$NETIF" ]; then
+ echo "Error: Can't find $NETIF, invalid netdevice"
+ exit 1
+fi
+
+# Save original settings for cleanup
+readonly FLUSH_PATH="/sys/class/net/${NETIF}/gro_flush_timeout"
+readonly IRQ_PATH="/sys/class/net/${NETIF}/napi_defer_hard_irqs"
+FLUSH_TIMEOUT="$(< "${FLUSH_PATH}")"
+readonly FLUSH_TIMEOUT
+HARD_IRQS="$(< "${IRQ_PATH}")"
+readonly HARD_IRQS
+
+SERVER_NS=$(mktemp -u server-XXXXXXXX)
+readonly SERVER_NS
+CLIENT_NS=$(mktemp -u client-XXXXXXXX)
+readonly CLIENT_NS
+readonly SERVER_MAC="aa:00:00:00:00:02"
+readonly CLIENT_MAC="aa:00:00:00:00:01"
+
+# ksft expects addresses to communicate with remote
+export LOCAL_V6=2001:db8:1::1
+export REMOTE_V6=2001:db8:1::2
+
+cleanup() {
+ local exit_code=$?
+
+ echo "Cleaning up..."
+
+ # Remove macvlan interfaces and namespaces
+ ip -netns "${SERVER_NS}" link del dev server 2>/dev/null || true
+ ip netns del "${SERVER_NS}" 2>/dev/null || true
+ ip -netns "${CLIENT_NS}" link del dev client 2>/dev/null || true
+ ip netns del "${CLIENT_NS}" 2>/dev/null || true
+
+ # Disable loopback
+ ethtool -K "${NETIF}" loopback off 2>/dev/null || true
+ sleep 1
+
+ echo "${FLUSH_TIMEOUT}" >"${FLUSH_PATH}"
+ echo "${HARD_IRQS}" >"${IRQ_PATH}"
+
+ exit $exit_code
+}
+
+trap cleanup EXIT INT TERM
+
+# Enable loopback mode
+echo "Enabling loopback on ${NETIF}..."
+ethtool -K "${NETIF}" loopback on || {
+ echo "Failed to enable loopback mode"
+ exit 1
+}
+# The interface may need time to get carrier back, but selftests
+# will wait for carrier, so no need to wait / sleep here.
+
+# Use timer on host to trigger the network stack
+# Also disable device interrupt to not depend on NIC interrupt
+# Reduce test flakiness caused by unexpected interrupts
+echo 100000 >"${FLUSH_PATH}"
+echo 50 >"${IRQ_PATH}"
+
+# Create server namespace with macvlan
+ip netns add "${SERVER_NS}"
+ip link add link "${NETIF}" dev server address "${SERVER_MAC}" type macvlan
+ip link set dev server netns "${SERVER_NS}"
+ip -netns "${SERVER_NS}" link set dev server up
+ip -netns "${SERVER_NS}" addr add $LOCAL_V6/64 dev server
+ip -netns "${SERVER_NS}" link set dev lo up
+
+# Create client namespace with macvlan
+ip netns add "${CLIENT_NS}"
+ip link add link "${NETIF}" dev client address "${CLIENT_MAC}" type macvlan
+ip link set dev client netns "${CLIENT_NS}"
+ip -netns "${CLIENT_NS}" link set dev client up
+ip -netns "${CLIENT_NS}" addr add $REMOTE_V6/64 dev client
+ip -netns "${CLIENT_NS}" link set dev lo up
+
+echo "Setup complete!"
+echo " Device: ${NETIF}"
+echo " Server NS: ${SERVER_NS}"
+echo " Client NS: ${CLIENT_NS}"
+echo ""
+
+# Setup environment variables for tests
+export NETIF=server
+export REMOTE_TYPE=netns
+export REMOTE_ARGS="${CLIENT_NS}"
+
+# Run the command
+ip netns exec "${SERVER_NS}" "$@"
diff --git a/tools/testing/selftests/net/lib/py/__init__.py b/tools/testing/selftests/net/lib/py/__init__.py
index 97b7cf2b20eb..40f9ce307dd1 100644
--- a/tools/testing/selftests/net/lib/py/__init__.py
+++ b/tools/testing/selftests/net/lib/py/__init__.py
@@ -8,7 +8,8 @@ from .consts import KSRC
from .ksft import KsftFailEx, KsftSkipEx, KsftXfailEx, ksft_pr, ksft_eq, \
ksft_ne, ksft_true, ksft_not_none, ksft_in, ksft_not_in, ksft_is, \
ksft_ge, ksft_gt, ksft_lt, ksft_raises, ksft_busy_wait, \
- ktap_result, ksft_disruptive, ksft_setup, ksft_run, ksft_exit
+ ktap_result, ksft_disruptive, ksft_setup, ksft_run, ksft_exit, \
+ ksft_variants, KsftNamedVariant
from .netns import NetNS, NetNSEnter
from .nsim import NetdevSim, NetdevSimDev
from .utils import CmdExitFailure, fd_read_timeout, cmd, bkg, defer, \
@@ -21,7 +22,7 @@ __all__ = ["KSRC",
"ksft_ne", "ksft_true", "ksft_not_none", "ksft_in", "ksft_not_in",
"ksft_is", "ksft_ge", "ksft_gt", "ksft_lt", "ksft_raises",
"ksft_busy_wait", "ktap_result", "ksft_disruptive", "ksft_setup",
- "ksft_run", "ksft_exit",
+ "ksft_run", "ksft_exit", "ksft_variants", "KsftNamedVariant",
"NetNS", "NetNSEnter",
"CmdExitFailure", "fd_read_timeout", "cmd", "bkg", "defer",
"bpftool", "ip", "ethtool", "bpftrace", "rand_port",
diff --git a/tools/testing/selftests/net/lib/py/ksft.py b/tools/testing/selftests/net/lib/py/ksft.py
index 83b1574f7719..531e7fa1b3ea 100644
--- a/tools/testing/selftests/net/lib/py/ksft.py
+++ b/tools/testing/selftests/net/lib/py/ksft.py
@@ -1,12 +1,12 @@
# SPDX-License-Identifier: GPL-2.0
-import builtins
import functools
import inspect
import signal
import sys
import time
import traceback
+from collections import namedtuple
from .consts import KSFT_MAIN_NAME
from .utils import global_defer_queue
@@ -136,7 +136,7 @@ def ksft_busy_wait(cond, sleep=0.005, deadline=1, comment=""):
time.sleep(sleep)
-def ktap_result(ok, cnt=1, case="", comment=""):
+def ktap_result(ok, cnt=1, case_name="", comment=""):
global KSFT_RESULT_ALL
KSFT_RESULT_ALL = KSFT_RESULT_ALL and ok
@@ -146,8 +146,8 @@ def ktap_result(ok, cnt=1, case="", comment=""):
res += "ok "
res += str(cnt) + " "
res += KSFT_MAIN_NAME
- if case:
- res += "." + str(case.__name__)
+ if case_name:
+ res += "." + case_name
if comment:
res += " # " + comment
print(res, flush=True)
@@ -163,7 +163,7 @@ def ksft_flush_defer():
entry = global_defer_queue.pop()
try:
entry.exec_only()
- except:
+ except Exception:
ksft_pr(f"Exception while handling defer / cleanup (callback {i} of {qlen_start})!")
tb = traceback.format_exc()
for line in tb.strip().split('\n'):
@@ -171,6 +171,10 @@ def ksft_flush_defer():
KSFT_RESULT = False
+KsftCaseFunction = namedtuple("KsftCaseFunction",
+ ['name', 'original_func', 'variants'])
+
+
def ksft_disruptive(func):
"""
Decorator that marks the test as disruptive (e.g. the test
@@ -181,11 +185,47 @@ def ksft_disruptive(func):
@functools.wraps(func)
def wrapper(*args, **kwargs):
if not KSFT_DISRUPTIVE:
- raise KsftSkipEx(f"marked as disruptive")
+ raise KsftSkipEx("marked as disruptive")
return func(*args, **kwargs)
return wrapper
+class KsftNamedVariant:
+ """ Named string name + argument list tuple for @ksft_variants """
+
+ def __init__(self, name, *params):
+ self.params = params
+ self.name = name or "_".join([str(x) for x in self.params])
+
+
+def ksft_variants(params):
+ """
+ Decorator defining the sets of inputs for a test.
+ The parameters will be included in the name of the resulting sub-case.
+ Parameters can be either single object, tuple or a KsftNamedVariant.
+ The argument can be a list or a generator.
+
+ Example:
+
+ @ksft_variants([
+ (1, "a"),
+ (2, "b"),
+ KsftNamedVariant("three", 3, "c"),
+ ])
+ def my_case(cfg, a, b):
+ pass # ...
+
+ ksft_run(cases=[my_case], args=(cfg, ))
+
+ Will generate cases:
+ my_case.1_a
+ my_case.2_b
+ my_case.three
+ """
+
+ return lambda func: KsftCaseFunction(func.__name__, func, params)
+
+
def ksft_setup(env):
"""
Setup test framework global state from the environment.
@@ -199,7 +239,7 @@ def ksft_setup(env):
return False
try:
return bool(int(value))
- except:
+ except Exception:
raise Exception(f"failed to parse {name}")
if "DISRUPTIVE" in env:
@@ -220,9 +260,13 @@ def _ksft_intr(signum, frame):
ksft_pr(f"Ignoring SIGTERM (cnt: {term_cnt}), already exiting...")
-def ksft_run(cases=None, globs=None, case_pfx=None, args=()):
+def _ksft_generate_test_cases(cases, globs, case_pfx, args):
+ """Generate a flat list of (func, args, name) tuples"""
+
cases = cases or []
+ test_cases = []
+ # If using the globs method find all relevant functions
if globs and case_pfx:
for key, value in globs.items():
if not callable(value):
@@ -232,6 +276,27 @@ def ksft_run(cases=None, globs=None, case_pfx=None, args=()):
cases.append(value)
break
+ for func in cases:
+ if isinstance(func, KsftCaseFunction):
+ # Parametrized test - create case for each param
+ for param in func.variants:
+ if not isinstance(param, KsftNamedVariant):
+ if not isinstance(param, tuple):
+ param = (param, )
+ param = KsftNamedVariant(None, *param)
+
+ test_cases.append((func.original_func,
+ (*args, *param.params),
+ func.name + "." + param.name))
+ else:
+ test_cases.append((func, args, func.__name__))
+
+ return test_cases
+
+
+def ksft_run(cases=None, globs=None, case_pfx=None, args=()):
+ test_cases = _ksft_generate_test_cases(cases, globs, case_pfx, args)
+
global term_cnt
term_cnt = 0
prev_sigterm = signal.signal(signal.SIGTERM, _ksft_intr)
@@ -239,19 +304,19 @@ def ksft_run(cases=None, globs=None, case_pfx=None, args=()):
totals = {"pass": 0, "fail": 0, "skip": 0, "xfail": 0}
print("TAP version 13", flush=True)
- print("1.." + str(len(cases)), flush=True)
+ print("1.." + str(len(test_cases)), flush=True)
global KSFT_RESULT
cnt = 0
stop = False
- for case in cases:
+ for func, args, name in test_cases:
KSFT_RESULT = True
cnt += 1
comment = ""
cnt_key = ""
try:
- case(*args)
+ func(*args)
except KsftSkipEx as e:
comment = "SKIP " + str(e)
cnt_key = 'skip'
@@ -268,12 +333,26 @@ def ksft_run(cases=None, globs=None, case_pfx=None, args=()):
KSFT_RESULT = False
cnt_key = 'fail'
- ksft_flush_defer()
+ try:
+ ksft_flush_defer()
+ except BaseException as e:
+ tb = traceback.format_exc()
+ for line in tb.strip().split('\n'):
+ ksft_pr("Exception|", line)
+ if isinstance(e, KeyboardInterrupt):
+ ksft_pr()
+ ksft_pr("WARN: defer() interrupted, cleanup may be incomplete.")
+ ksft_pr(" Attempting to finish cleanup before exiting.")
+ ksft_pr(" Interrupt again to exit immediately.")
+ ksft_pr()
+ stop = True
+ # Flush was interrupted, try to finish the job best we can
+ ksft_flush_defer()
if not cnt_key:
cnt_key = 'pass' if KSFT_RESULT else 'fail'
- ktap_result(KSFT_RESULT, cnt, case, comment=comment)
+ ktap_result(KSFT_RESULT, cnt, name, comment=comment)
totals[cnt_key] += 1
if stop:
diff --git a/tools/testing/selftests/net/lib/py/nsim.py b/tools/testing/selftests/net/lib/py/nsim.py
index 1a8cbe9acc48..7c640ed64c0b 100644
--- a/tools/testing/selftests/net/lib/py/nsim.py
+++ b/tools/testing/selftests/net/lib/py/nsim.py
@@ -27,7 +27,7 @@ class NetdevSim:
self.port_index = port_index
self.ns = ns
self.dfs_dir = "%s/ports/%u/" % (nsimdev.dfs_dir, port_index)
- ret = ip("-j link show dev %s" % ifname, ns=ns)
+ ret = ip("-d -j link show dev %s" % ifname, ns=ns)
self.dev = json.loads(ret.stdout)[0]
self.ifindex = self.dev["ifindex"]
diff --git a/tools/testing/selftests/net/lib/py/utils.py b/tools/testing/selftests/net/lib/py/utils.py
index cb40ecef9456..106ee1f2df86 100644
--- a/tools/testing/selftests/net/lib/py/utils.py
+++ b/tools/testing/selftests/net/lib/py/utils.py
@@ -32,7 +32,7 @@ class cmd:
Use bkg() instead to run a command in the background.
"""
def __init__(self, comm, shell=None, fail=True, ns=None, background=False,
- host=None, timeout=5, ksft_wait=None):
+ host=None, timeout=5, ksft_ready=None, ksft_wait=None):
if ns:
comm = f'ip netns exec {ns} ' + comm
@@ -52,21 +52,25 @@ class cmd:
# ksft_wait lets us wait for the background process to fully start,
# we pass an FD to the child process, and wait for it to write back.
# Similarly term_fd tells child it's time to exit.
- pass_fds = ()
+ pass_fds = []
env = os.environ.copy()
if ksft_wait is not None:
- rfd, ready_fd = os.pipe()
wait_fd, self.ksft_term_fd = os.pipe()
- pass_fds = (ready_fd, wait_fd, )
- env["KSFT_READY_FD"] = str(ready_fd)
+ pass_fds.append(wait_fd)
env["KSFT_WAIT_FD"] = str(wait_fd)
+ ksft_ready = True # ksft_wait implies ready
+ if ksft_ready is not None:
+ rfd, ready_fd = os.pipe()
+ pass_fds.append(ready_fd)
+ env["KSFT_READY_FD"] = str(ready_fd)
self.proc = subprocess.Popen(comm, shell=shell, stdout=subprocess.PIPE,
stderr=subprocess.PIPE, pass_fds=pass_fds,
env=env)
if ksft_wait is not None:
- os.close(ready_fd)
os.close(wait_fd)
+ if ksft_ready is not None:
+ os.close(ready_fd)
msg = fd_read_timeout(rfd, ksft_wait)
os.close(rfd)
if not msg:
@@ -116,10 +120,10 @@ class bkg(cmd):
with bkg("my_binary", ksft_wait=5):
"""
def __init__(self, comm, shell=None, fail=None, ns=None, host=None,
- exit_wait=False, ksft_wait=None):
+ exit_wait=False, ksft_ready=None, ksft_wait=None):
super().__init__(comm, background=True,
shell=shell, fail=fail, ns=ns, host=host,
- ksft_wait=ksft_wait)
+ ksft_ready=ksft_ready, ksft_wait=ksft_wait)
self.terminate = not exit_wait and not ksft_wait
self._exit_wait = exit_wait
self.check_fail = fail
diff --git a/tools/testing/selftests/net/lib/xdp_native.bpf.c b/tools/testing/selftests/net/lib/xdp_native.bpf.c
index c368fc045f4b..64f05229ab24 100644
--- a/tools/testing/selftests/net/lib/xdp_native.bpf.c
+++ b/tools/testing/selftests/net/lib/xdp_native.bpf.c
@@ -332,7 +332,7 @@ static __u16 csum_fold_helper(__u32 csum)
}
static int xdp_adjst_tail_shrnk_data(struct xdp_md *ctx, __u16 offset,
- __u32 hdr_len)
+ unsigned long hdr_len)
{
char tmp_buff[MAX_ADJST_OFFSET];
__u32 buff_pos, udp_csum = 0;
@@ -422,8 +422,9 @@ static int xdp_adjst_tail(struct xdp_md *ctx, __u16 port)
{
struct udphdr *udph = NULL;
__s32 *adjust_offset, *val;
- __u32 key, hdr_len;
+ unsigned long hdr_len;
void *offset_ptr;
+ __u32 key;
__u8 tag;
int ret;
diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.c b/tools/testing/selftests/net/mptcp/mptcp_connect.c
index fc7e22b503d3..404a77bf366a 100644
--- a/tools/testing/selftests/net/mptcp/mptcp_connect.c
+++ b/tools/testing/selftests/net/mptcp/mptcp_connect.c
@@ -1072,6 +1072,8 @@ static void check_getpeername_connect(int fd)
socklen_t salen = sizeof(ss);
char a[INET6_ADDRSTRLEN];
char b[INET6_ADDRSTRLEN];
+ const char *iface;
+ size_t len;
if (getpeername(fd, (struct sockaddr *)&ss, &salen) < 0) {
perror("getpeername");
@@ -1081,7 +1083,13 @@ static void check_getpeername_connect(int fd)
xgetnameinfo((struct sockaddr *)&ss, salen,
a, sizeof(a), b, sizeof(b));
- if (strcmp(cfg_host, a) || strcmp(cfg_port, b))
+ iface = strchr(cfg_host, '%');
+ if (iface)
+ len = iface - cfg_host;
+ else
+ len = strlen(cfg_host) + 1;
+
+ if (strncmp(cfg_host, a, len) || strcmp(cfg_port, b))
fprintf(stderr, "%s: %s vs %s, %s vs %s\n", __func__,
cfg_host, a, cfg_port, b);
}
diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.sh b/tools/testing/selftests/net/mptcp/mptcp_connect.sh
index 9b7b93f8eb0c..a6447f7a31fe 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_connect.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_connect.sh
@@ -375,81 +375,75 @@ do_transfer()
local capfile="${rndh}-${connector_ns:0:3}-${listener_ns:0:3}-${cl_proto}-${srv_proto}-${connect_addr}-${port}"
local capopt="-i any -s 65535 -B 32768 ${capuser}"
- ip netns exec ${listener_ns} tcpdump ${capopt} -w "${capfile}-listener.pcap" >> "${capout}" 2>&1 &
+ ip netns exec ${listener_ns} tcpdump ${capopt} \
+ -w "${capfile}-listener.pcap" >> "${capout}" 2>&1 &
local cappid_listener=$!
- ip netns exec ${connector_ns} tcpdump ${capopt} -w "${capfile}-connector.pcap" >> "${capout}" 2>&1 &
- local cappid_connector=$!
+ if [ ${listener_ns} != ${connector_ns} ]; then
+ ip netns exec ${connector_ns} tcpdump ${capopt} \
+ -w "${capfile}-connector.pcap" >> "${capout}" 2>&1 &
+ local cappid_connector=$!
+ fi
sleep 1
fi
- NSTAT_HISTORY=/tmp/${listener_ns}.nstat ip netns exec ${listener_ns} \
- nstat -n
+ mptcp_lib_nstat_init "${listener_ns}"
if [ ${listener_ns} != ${connector_ns} ]; then
- NSTAT_HISTORY=/tmp/${connector_ns}.nstat ip netns exec ${connector_ns} \
- nstat -n
- fi
-
- local stat_synrx_last_l
- local stat_ackrx_last_l
- local stat_cookietx_last
- local stat_cookierx_last
- local stat_csum_err_s
- local stat_csum_err_c
- local stat_tcpfb_last_l
- stat_synrx_last_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableSYNRX")
- stat_ackrx_last_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableACKRX")
- stat_cookietx_last=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtSyncookiesSent")
- stat_cookierx_last=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtSyncookiesRecv")
- stat_csum_err_s=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtDataCsumErr")
- stat_csum_err_c=$(mptcp_lib_get_counter "${connector_ns}" "MPTcpExtDataCsumErr")
- stat_tcpfb_last_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableFallbackACK")
-
- timeout ${timeout_test} \
- ip netns exec ${listener_ns} \
- ./mptcp_connect -t ${timeout_poll} -l -p $port -s ${srv_proto} \
- $extra_args $local_addr < "$sin" > "$sout" &
+ mptcp_lib_nstat_init "${connector_ns}"
+ fi
+
+ ip netns exec ${listener_ns} \
+ ./mptcp_connect -t ${timeout_poll} -l -p $port -s ${srv_proto} \
+ $extra_args $local_addr < "$sin" > "$sout" &
local spid=$!
mptcp_lib_wait_local_port_listen "${listener_ns}" "${port}"
local start
start=$(date +%s%3N)
- timeout ${timeout_test} \
- ip netns exec ${connector_ns} \
- ./mptcp_connect -t ${timeout_poll} -p $port -s ${cl_proto} \
- $extra_args $connect_addr < "$cin" > "$cout" &
+ ip netns exec ${connector_ns} \
+ ./mptcp_connect -t ${timeout_poll} -p $port -s ${cl_proto} \
+ $extra_args $connect_addr < "$cin" > "$cout" &
local cpid=$!
+ mptcp_lib_wait_timeout "${timeout_test}" "${listener_ns}" \
+ "${connector_ns}" "${port}" "${cpid}" "${spid}" &
+ local timeout_pid=$!
+
wait $cpid
local retc=$?
wait $spid
local rets=$?
+ if kill -0 $timeout_pid; then
+ # Finished before the timeout: kill the background job
+ mptcp_lib_kill_group_wait $timeout_pid
+ timeout_pid=0
+ fi
+
local stop
stop=$(date +%s%3N)
if $capture; then
sleep 1
kill ${cappid_listener}
- kill ${cappid_connector}
+ if [ ${listener_ns} != ${connector_ns} ]; then
+ kill ${cappid_connector}
+ fi
fi
- NSTAT_HISTORY=/tmp/${listener_ns}.nstat ip netns exec ${listener_ns} \
- nstat | grep Tcp > /tmp/${listener_ns}.out
+ mptcp_lib_nstat_get "${listener_ns}"
if [ ${listener_ns} != ${connector_ns} ]; then
- NSTAT_HISTORY=/tmp/${connector_ns}.nstat ip netns exec ${connector_ns} \
- nstat | grep Tcp > /tmp/${connector_ns}.out
+ mptcp_lib_nstat_get "${connector_ns}"
fi
local duration
duration=$((stop-start))
printf "(duration %05sms) " "${duration}"
- if [ ${rets} -ne 0 ] || [ ${retc} -ne 0 ]; then
+ if [ ${rets} -ne 0 ] || [ ${retc} -ne 0 ] || [ ${timeout_pid} -ne 0 ]; then
mptcp_lib_pr_fail "client exit code $retc, server $rets"
- mptcp_lib_pr_err_stats "${listener_ns}" "${connector_ns}" "${port}" \
- "/tmp/${listener_ns}.out" "/tmp/${connector_ns}.out"
+ mptcp_lib_pr_err_stats "${listener_ns}" "${connector_ns}" "${port}"
echo
cat "$capout"
@@ -463,38 +457,38 @@ do_transfer()
rets=$?
local extra=""
- local stat_synrx_now_l
- local stat_ackrx_now_l
- local stat_cookietx_now
- local stat_cookierx_now
- local stat_ooo_now
- local stat_tcpfb_now_l
- stat_synrx_now_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableSYNRX")
- stat_ackrx_now_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableACKRX")
- stat_cookietx_now=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtSyncookiesSent")
- stat_cookierx_now=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtSyncookiesRecv")
- stat_ooo_now=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtTCPOFOQueue")
- stat_tcpfb_now_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableFallbackACK")
-
- expect_synrx=$((stat_synrx_last_l))
- expect_ackrx=$((stat_ackrx_last_l))
+ local stat_synrx
+ local stat_ackrx
+ local stat_cookietx
+ local stat_cookierx
+ local stat_ooo
+ local stat_tcpfb
+ stat_synrx=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableSYNRX")
+ stat_ackrx=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableACKRX")
+ stat_cookietx=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtSyncookiesSent")
+ stat_cookierx=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtSyncookiesRecv")
+ stat_ooo=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtTCPOFOQueue")
+ stat_tcpfb=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableFallbackACK")
+
+ expect_synrx=0
+ expect_ackrx=0
cookies=$(ip netns exec ${listener_ns} sysctl net.ipv4.tcp_syncookies)
cookies=${cookies##*=}
if [ ${cl_proto} = "MPTCP" ] && [ ${srv_proto} = "MPTCP" ]; then
- expect_synrx=$((stat_synrx_last_l+connect_per_transfer))
- expect_ackrx=$((stat_ackrx_last_l+connect_per_transfer))
+ expect_synrx=${connect_per_transfer}
+ expect_ackrx=${connect_per_transfer}
fi
- if [ ${stat_synrx_now_l} -lt ${expect_synrx} ]; then
- mptcp_lib_pr_fail "lower MPC SYN rx (${stat_synrx_now_l})" \
+ if [ ${stat_synrx} -lt ${expect_synrx} ]; then
+ mptcp_lib_pr_fail "lower MPC SYN rx (${stat_synrx})" \
"than expected (${expect_synrx})"
retc=1
fi
- if [ ${stat_ackrx_now_l} -lt ${expect_ackrx} ]; then
- if [ ${stat_ooo_now} -eq 0 ]; then
- mptcp_lib_pr_fail "lower MPC ACK rx (${stat_ackrx_now_l})" \
+ if [ ${stat_ackrx} -lt ${expect_ackrx} ]; then
+ if [ ${stat_ooo} -eq 0 ]; then
+ mptcp_lib_pr_fail "lower MPC ACK rx (${stat_ackrx})" \
"than expected (${expect_ackrx})"
rets=1
else
@@ -508,47 +502,45 @@ do_transfer()
csum_err_s=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtDataCsumErr")
csum_err_c=$(mptcp_lib_get_counter "${connector_ns}" "MPTcpExtDataCsumErr")
- local csum_err_s_nr=$((csum_err_s - stat_csum_err_s))
- if [ $csum_err_s_nr -gt 0 ]; then
- mptcp_lib_pr_fail "server got ${csum_err_s_nr} data checksum error[s]"
+ if [ $csum_err_s -gt 0 ]; then
+ mptcp_lib_pr_fail "server got ${csum_err_s} data checksum error[s]"
rets=1
fi
- local csum_err_c_nr=$((csum_err_c - stat_csum_err_c))
- if [ $csum_err_c_nr -gt 0 ]; then
- mptcp_lib_pr_fail "client got ${csum_err_c_nr} data checksum error[s]"
+ if [ $csum_err_c -gt 0 ]; then
+ mptcp_lib_pr_fail "client got ${csum_err_c} data checksum error[s]"
retc=1
fi
fi
- if [ ${stat_ooo_now} -eq 0 ] && [ ${stat_tcpfb_last_l} -ne ${stat_tcpfb_now_l} ]; then
+ if [ ${stat_ooo} -eq 0 ] && [ ${stat_tcpfb} -gt 0 ]; then
mptcp_lib_pr_fail "unexpected fallback to TCP"
rets=1
fi
if [ $cookies -eq 2 ];then
- if [ $stat_cookietx_last -ge $stat_cookietx_now ] ;then
+ if [ $stat_cookietx -eq 0 ] ;then
extra+=" WARN: CookieSent: did not advance"
fi
- if [ $stat_cookierx_last -ge $stat_cookierx_now ] ;then
+ if [ $stat_cookierx -eq 0 ] ;then
extra+=" WARN: CookieRecv: did not advance"
fi
else
- if [ $stat_cookietx_last -ne $stat_cookietx_now ] ;then
+ if [ $stat_cookietx -gt 0 ] ;then
extra+=" WARN: CookieSent: changed"
fi
- if [ $stat_cookierx_last -ne $stat_cookierx_now ] ;then
+ if [ $stat_cookierx -gt 0 ] ;then
extra+=" WARN: CookieRecv: changed"
fi
fi
- if [ ${stat_synrx_now_l} -gt ${expect_synrx} ]; then
+ if [ ${stat_synrx} -gt ${expect_synrx} ]; then
extra+=" WARN: SYNRX: expect ${expect_synrx},"
- extra+=" got ${stat_synrx_now_l} (probably retransmissions)"
+ extra+=" got ${stat_synrx} (probably retransmissions)"
fi
- if [ ${stat_ackrx_now_l} -gt ${expect_ackrx} ]; then
+ if [ ${stat_ackrx} -gt ${expect_ackrx} ]; then
extra+=" WARN: ACKRX: expect ${expect_ackrx},"
- extra+=" got ${stat_ackrx_now_l} (probably retransmissions)"
+ extra+=" got ${stat_ackrx} (probably retransmissions)"
fi
if [ $retc -eq 0 ] && [ $rets -eq 0 ]; then
diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh
index 43f31f8d587f..b2e6e548f796 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
@@ -62,6 +62,7 @@ unset sflags
unset fastclose
unset fullmesh
unset speed
+unset bind_addr
unset join_syn_rej
unset join_csum_ns1
unset join_csum_ns2
@@ -645,6 +646,27 @@ wait_mpj()
done
}
+wait_ll_ready()
+{
+ local ns="${1}"
+
+ local i
+ for i in $(seq 50); do
+ ip -n "${ns}" -6 addr show scope link | grep "inet6 fe80" |
+ grep -qw "tentative" || break
+ sleep 0.1
+ done
+}
+
+get_ll_addr()
+{
+ local ns="${1}"
+ local iface="${2}"
+
+ ip -n "${ns}" -6 addr show dev "${iface}" scope link |
+ grep "inet6 fe80" | sed 's#.*\(fe80::.*\)/.*#\1#'
+}
+
kill_events_pids()
{
mptcp_lib_kill_wait $evts_ns1_pid
@@ -951,6 +973,9 @@ do_transfer()
local FAILING_LINKS=${FAILING_LINKS:-""}
local fastclose=${fastclose:-""}
local speed=${speed:-"fast"}
+ local bind_addr=${bind_addr:-"::"}
+ local listener_in="${sin}"
+ local connector_in="${cin}"
port=$(get_port)
:> "$cout"
@@ -958,10 +983,8 @@ do_transfer()
cond_start_capture ${listener_ns}
- NSTAT_HISTORY=/tmp/${listener_ns}.nstat ip netns exec ${listener_ns} \
- nstat -n
- NSTAT_HISTORY=/tmp/${connector_ns}.nstat ip netns exec ${connector_ns} \
- nstat -n
+ mptcp_lib_nstat_init "${listener_ns}"
+ mptcp_lib_nstat_init "${connector_ns}"
local extra_args
if [ $speed = "fast" ]; then
@@ -999,42 +1022,40 @@ do_transfer()
extra_srv_args="$extra_args $extra_srv_args"
if [ "$test_linkfail" -gt 1 ];then
- timeout ${timeout_test} \
- ip netns exec ${listener_ns} \
- ./mptcp_connect -t ${timeout_poll} -l -p $port -s ${srv_proto} \
- $extra_srv_args "::" < "$sinfail" > "$sout" &
- else
- timeout ${timeout_test} \
- ip netns exec ${listener_ns} \
- ./mptcp_connect -t ${timeout_poll} -l -p $port -s ${srv_proto} \
- $extra_srv_args "::" < "$sin" > "$sout" &
+ listener_in="${sinfail}"
fi
+ ip netns exec ${listener_ns} \
+ ./mptcp_connect -t ${timeout_poll} -l -p ${port} -s ${srv_proto} \
+ ${extra_srv_args} "${bind_addr}" < "${listener_in}" > "${sout}" &
local spid=$!
mptcp_lib_wait_local_port_listen "${listener_ns}" "${port}"
extra_cl_args="$extra_args $extra_cl_args"
if [ "$test_linkfail" -eq 0 ];then
- timeout ${timeout_test} \
- ip netns exec ${connector_ns} \
- ./mptcp_connect -t ${timeout_poll} -p $port -s ${cl_proto} \
- $extra_cl_args $connect_addr < "$cin" > "$cout" &
+ ip netns exec ${connector_ns} \
+ ./mptcp_connect -t ${timeout_poll} -p $port -s ${cl_proto} \
+ $extra_cl_args $connect_addr < "$cin" > "$cout" &
elif [ "$test_linkfail" -eq 1 ] || [ "$test_linkfail" -eq 2 ];then
+ connector_in="${cinsent}"
( cat "$cinfail" ; sleep 2; link_failure $listener_ns ; cat "$cinfail" ) | \
tee "$cinsent" | \
- timeout ${timeout_test} \
ip netns exec ${connector_ns} \
./mptcp_connect -t ${timeout_poll} -p $port -s ${cl_proto} \
$extra_cl_args $connect_addr > "$cout" &
else
+ connector_in="${cinsent}"
tee "$cinsent" < "$cinfail" | \
- timeout ${timeout_test} \
- ip netns exec ${connector_ns} \
- ./mptcp_connect -t ${timeout_poll} -p $port -s ${cl_proto} \
- $extra_cl_args $connect_addr > "$cout" &
+ ip netns exec ${connector_ns} \
+ ./mptcp_connect -t ${timeout_poll} -p $port -s ${cl_proto} \
+ $extra_cl_args $connect_addr > "$cout" &
fi
local cpid=$!
+ mptcp_lib_wait_timeout "${timeout_test}" "${listener_ns}" \
+ "${connector_ns}" "${port}" "${cpid}" "${spid}" &
+ local timeout_pid=$!
+
pm_nl_set_endpoint $listener_ns $connector_ns $connect_addr
check_cestab $listener_ns $connector_ns
@@ -1043,31 +1064,26 @@ do_transfer()
wait $spid
local rets=$?
+ if kill -0 $timeout_pid; then
+ # Finished before the timeout: kill the background job
+ mptcp_lib_kill_group_wait $timeout_pid
+ timeout_pid=0
+ fi
+
cond_stop_capture
- NSTAT_HISTORY=/tmp/${listener_ns}.nstat ip netns exec ${listener_ns} \
- nstat | grep Tcp > /tmp/${listener_ns}.out
- NSTAT_HISTORY=/tmp/${connector_ns}.nstat ip netns exec ${connector_ns} \
- nstat | grep Tcp > /tmp/${connector_ns}.out
+ mptcp_lib_nstat_get "${listener_ns}"
+ mptcp_lib_nstat_get "${connector_ns}"
- if [ ${rets} -ne 0 ] || [ ${retc} -ne 0 ]; then
+ if [ ${rets} -ne 0 ] || [ ${retc} -ne 0 ] || [ ${timeout_pid} -ne 0 ]; then
fail_test "client exit code $retc, server $rets"
- mptcp_lib_pr_err_stats "${listener_ns}" "${connector_ns}" "${port}" \
- "/tmp/${listener_ns}.out" "/tmp/${connector_ns}.out"
+ mptcp_lib_pr_err_stats "${listener_ns}" "${connector_ns}" "${port}"
return 1
fi
- if [ "$test_linkfail" -gt 1 ];then
- check_transfer $sinfail $cout "file received by client" $trunc_size
- else
- check_transfer $sin $cout "file received by client" $trunc_size
- fi
+ check_transfer $listener_in $cout "file received by client" $trunc_size
retc=$?
- if [ "$test_linkfail" -eq 0 ];then
- check_transfer $cin $sout "file received by server" $trunc_size
- else
- check_transfer $cinsent $sout "file received by server" $trunc_size
- fi
+ check_transfer $connector_in $sout "file received by server" $trunc_size
rets=$?
[ $retc -eq 0 ] && [ $rets -eq 0 ]
@@ -1136,12 +1152,20 @@ run_tests()
do_transfer ${listener_ns} ${connector_ns} MPTCP MPTCP ${connect_addr}
}
+_dump_stats()
+{
+ local ns="${1}"
+ local side="${2}"
+
+ mptcp_lib_print_err "${side} ns stats (${ns2})"
+ mptcp_lib_pr_nstat "${ns}"
+ echo
+}
+
dump_stats()
{
- echo Server ns stats
- ip netns exec $ns1 nstat -as | grep Tcp
- echo Client ns stats
- ip netns exec $ns2 nstat -as | grep Tcp
+ _dump_stats "${ns1}" "Server"
+ _dump_stats "${ns2}" "Client"
}
chk_csum_nr()
@@ -2952,7 +2976,11 @@ mixed_tests()
pm_nl_add_endpoint $ns1 10.0.1.1 flags signal
speed=slow \
run_tests $ns1 $ns2 dead:beef:2::1
- chk_join_nr 1 1 1
+ if mptcp_lib_kallsyms_has "mptcp_pm_get_endp_fullmesh_max$"; then
+ chk_join_nr 0 0 0
+ else
+ chk_join_nr 1 1 1
+ fi
fi
# fullmesh still tries to create all the possibly subflows with
@@ -3233,6 +3261,133 @@ add_addr_ports_tests()
fi
}
+bind_tests()
+{
+ # bind to one address should not allow extra subflows to other addresses
+ if reset "bind main address v4, no join v4"; then
+ pm_nl_set_limits $ns1 0 2
+ pm_nl_set_limits $ns2 2 2
+ pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
+ bind_addr="10.0.1.1" \
+ run_tests $ns1 $ns2 10.0.1.1
+ join_syn_tx=1 \
+ chk_join_nr 0 0 0
+ chk_add_nr 1 1
+ fi
+
+ # bind to one address should not allow extra subflows to other addresses
+ if reset "bind main address v6, no join v6"; then
+ pm_nl_set_limits $ns1 0 2
+ pm_nl_set_limits $ns2 2 2
+ pm_nl_add_endpoint $ns1 dead:beef:2::1 flags signal
+ bind_addr="dead:beef:1::1" \
+ run_tests $ns1 $ns2 dead:beef:1::1
+ join_syn_tx=1 \
+ chk_join_nr 0 0 0
+ chk_add_nr 1 1
+ fi
+
+ # multiple binds to allow extra subflows to other addresses
+ if reset "multiple bind to allow joins v4"; then
+ local extra_bind
+
+ pm_nl_set_limits $ns1 0 2
+ pm_nl_set_limits $ns2 2 2
+ pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
+
+ # Launching another app listening on a different address
+ # Note: it could be a totally different app, e.g. nc, socat, ...
+ ip netns exec ${ns1} ./mptcp_connect -l -t -1 -p "$(get_port)" \
+ -s MPTCP 10.0.2.1 &
+ extra_bind=$!
+
+ bind_addr="10.0.1.1" \
+ run_tests $ns1 $ns2 10.0.1.1
+ chk_join_nr 1 1 1
+ chk_add_nr 1 1
+
+ kill ${extra_bind}
+ fi
+
+ # multiple binds to allow extra subflows to other addresses
+ if reset "multiple bind to allow joins v6"; then
+ local extra_bind
+
+ pm_nl_set_limits $ns1 0 2
+ pm_nl_set_limits $ns2 2 2
+ pm_nl_add_endpoint $ns1 dead:beef:2::1 flags signal
+
+ # Launching another app listening on a different address
+ # Note: it could be a totally different app, e.g. nc, socat, ...
+ ip netns exec ${ns1} ./mptcp_connect -l -t -1 -p "$(get_port)" \
+ -s MPTCP dead:beef:2::1 &
+ extra_bind=$!
+
+ bind_addr="dead:beef:1::1" \
+ run_tests $ns1 $ns2 dead:beef:1::1
+ chk_join_nr 1 1 1
+ chk_add_nr 1 1
+
+ kill ${extra_bind}
+ fi
+
+ # multiple binds to allow extra subflows to other addresses: v6 LL case
+ if reset "multiple bind to allow joins v6 link-local routing"; then
+ local extra_bind ns1ll1 ns1ll2
+
+ ns1ll1="$(get_ll_addr $ns1 ns1eth1)"
+ ns1ll2="$(get_ll_addr $ns1 ns1eth2)"
+
+ pm_nl_set_limits $ns1 0 2
+ pm_nl_set_limits $ns2 2 2
+ pm_nl_add_endpoint $ns1 "${ns1ll2}" flags signal
+
+ wait_ll_ready $ns1 # to be able to bind
+ wait_ll_ready $ns2 # also needed to bind on the client side
+ ip netns exec ${ns1} ./mptcp_connect -l -t -1 -p "$(get_port)" \
+ -s MPTCP "${ns1ll2}%ns1eth2" &
+ extra_bind=$!
+
+ bind_addr="${ns1ll1}%ns1eth1" \
+ run_tests $ns1 $ns2 "${ns1ll1}%ns2eth1"
+ # it is not possible to connect to the announced LL addr without
+ # specifying the outgoing interface.
+ join_connect_err=1 \
+ chk_join_nr 0 0 0
+ chk_add_nr 1 1
+
+ kill ${extra_bind}
+ fi
+
+ # multiple binds to allow extra subflows to v6 LL addresses: laminar
+ if reset "multiple bind to allow joins v6 link-local laminar" &&
+ continue_if mptcp_lib_kallsyms_has "mptcp_pm_get_endp_laminar_max$"; then
+ local extra_bind ns1ll1 ns1ll2 ns2ll2
+
+ ns1ll1="$(get_ll_addr $ns1 ns1eth1)"
+ ns1ll2="$(get_ll_addr $ns1 ns1eth2)"
+ ns2ll2="$(get_ll_addr $ns2 ns2eth2)"
+
+ pm_nl_set_limits $ns1 0 2
+ pm_nl_set_limits $ns2 2 2
+ pm_nl_add_endpoint $ns1 "${ns1ll2}" flags signal
+ pm_nl_add_endpoint $ns2 "${ns2ll2}" flags laminar dev ns2eth2
+
+ wait_ll_ready $ns1 # to be able to bind
+ wait_ll_ready $ns2 # also needed to bind on the client side
+ ip netns exec ${ns1} ./mptcp_connect -l -t -1 -p "$(get_port)" \
+ -s MPTCP "${ns1ll2}%ns1eth2" &
+ extra_bind=$!
+
+ bind_addr="${ns1ll1}%ns1eth1" \
+ run_tests $ns1 $ns2 "${ns1ll1}%ns2eth1"
+ chk_join_nr 1 1 1
+ chk_add_nr 1 1
+
+ kill ${extra_bind}
+ fi
+}
+
syncookies_tests()
{
# single subflow, syncookies
@@ -4192,6 +4347,7 @@ all_tests_sorted=(
M@mixed_tests
b@backup_tests
p@add_addr_ports_tests
+ B@bind_tests
k@syncookies_tests
S@checksum_tests
d@deny_join_id0_tests
diff --git a/tools/testing/selftests/net/mptcp/mptcp_lib.sh b/tools/testing/selftests/net/mptcp/mptcp_lib.sh
index f4388900016a..5fea7e7df628 100644
--- a/tools/testing/selftests/net/mptcp/mptcp_lib.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_lib.sh
@@ -106,23 +106,32 @@ mptcp_lib_pr_info() {
mptcp_lib_print_info "INFO: ${*}"
}
-# $1-2: listener/connector ns ; $3 port ; $4-5 listener/connector stat file
+mptcp_lib_pr_nstat() {
+ local ns="${1}"
+ local hist="/tmp/${ns}.out"
+
+ if [ -f "${hist}" ]; then
+ awk '$2 != 0 { print " "$0 }' "${hist}"
+ else
+ ip netns exec "${ns}" nstat -as | grep Tcp
+ fi
+}
+
+# $1-2: listener/connector ns ; $3 port
mptcp_lib_pr_err_stats() {
local lns="${1}"
local cns="${2}"
local port="${3}"
- local lstat="${4}"
- local cstat="${5}"
echo -en "${MPTCP_LIB_COLOR_RED}"
{
printf "\nnetns %s (listener) socket stat for %d:\n" "${lns}" "${port}"
ip netns exec "${lns}" ss -Menitam -o "sport = :${port}"
- cat "${lstat}"
+ mptcp_lib_pr_nstat "${lns}"
printf "\nnetns %s (connector) socket stat for %d:\n" "${cns}" "${port}"
ip netns exec "${cns}" ss -Menitam -o "dport = :${port}"
- [ "${lstat}" != "${cstat}" ] && cat "${cstat}"
+ [ "${lns}" != "${cns}" ] && mptcp_lib_pr_nstat "${cns}"
} 1>&2
echo -en "${MPTCP_LIB_COLOR_RESET}"
}
@@ -341,6 +350,19 @@ mptcp_lib_evts_get_info() {
mptcp_lib_get_info_value "${1}" "^type:${3:-1},"
}
+mptcp_lib_wait_timeout() {
+ local timeout_test="${1}"
+ local listener_ns="${2}"
+ local connector_ns="${3}"
+ local port="${4}"
+ shift 4 # rest are PIDs
+
+ sleep "${timeout_test}"
+ mptcp_lib_print_err "timeout"
+ mptcp_lib_pr_err_stats "${listener_ns}" "${connector_ns}" "${port}"
+ kill "${@}" 2>/dev/null
+}
+
# $1: PID
mptcp_lib_kill_wait() {
[ "${1}" -eq 0 ] && return 0
@@ -376,14 +398,36 @@ mptcp_lib_is_v6() {
[ -z "${1##*:*}" ]
}
+mptcp_lib_nstat_init() {
+ local ns="${1}"
+
+ rm -f "/tmp/${ns}."{nstat,out}
+ NSTAT_HISTORY="/tmp/${ns}.nstat" ip netns exec "${ns}" nstat -n
+}
+
+mptcp_lib_nstat_get() {
+ local ns="${1}"
+
+ # filter out non-*TCP stats, and the rate (last column)
+ NSTAT_HISTORY="/tmp/${ns}.nstat" ip netns exec "${ns}" nstat -sz |
+ grep -o ".*Tcp\S\+\s\+[0-9]\+" > "/tmp/${ns}.out"
+}
+
# $1: ns, $2: MIB counter
+# Get the counter from the history (mptcp_lib_nstat_{init,get}()) if available.
+# If not, get the counter from nstat ignoring any history.
mptcp_lib_get_counter() {
local ns="${1}"
local counter="${2}"
+ local hist="/tmp/${ns}.out"
local count
- count=$(ip netns exec "${ns}" nstat -asz "${counter}" |
- awk 'NR==1 {next} {print $2}')
+ if [[ -s "${hist}" && "${counter}" == *"Tcp"* ]]; then
+ count=$(awk "/^${counter} / {print \$2; exit}" "${hist}")
+ else
+ count=$(ip netns exec "${ns}" nstat -asz "${counter}" |
+ awk 'NR==1 {next} {print $2}')
+ fi
if [ -z "${count}" ]; then
mptcp_lib_fail_if_expected_feature "${counter} counter"
return 1
diff --git a/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh b/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh
index f01989be6e9b..ab8bce06b262 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh
@@ -169,41 +169,44 @@ do_transfer()
cmsg+=",TCPINQ"
fi
- NSTAT_HISTORY=/tmp/${listener_ns}.nstat ip netns exec ${listener_ns} \
- nstat -n
- NSTAT_HISTORY=/tmp/${connector_ns}.nstat ip netns exec ${connector_ns} \
- nstat -n
-
- timeout ${timeout_test} \
- ip netns exec ${listener_ns} \
- $mptcp_connect -t ${timeout_poll} -l -M 1 -p $port -s ${srv_proto} -c "${cmsg}" \
- ${local_addr} < "$sin" > "$sout" &
+ mptcp_lib_nstat_init "${listener_ns}"
+ mptcp_lib_nstat_init "${connector_ns}"
+
+ ip netns exec ${listener_ns} \
+ $mptcp_connect -t ${timeout_poll} -l -M 1 -p $port -s ${srv_proto} -c "${cmsg}" \
+ ${local_addr} < "$sin" > "$sout" &
local spid=$!
- sleep 1
+ mptcp_lib_wait_local_port_listen "${listener_ns}" "${port}"
- timeout ${timeout_test} \
- ip netns exec ${connector_ns} \
- $mptcp_connect -t ${timeout_poll} -M 2 -p $port -s ${cl_proto} -c "${cmsg}" \
- $connect_addr < "$cin" > "$cout" &
+ ip netns exec ${connector_ns} \
+ $mptcp_connect -t ${timeout_poll} -M 2 -p $port -s ${cl_proto} -c "${cmsg}" \
+ $connect_addr < "$cin" > "$cout" &
local cpid=$!
+ mptcp_lib_wait_timeout "${timeout_test}" "${listener_ns}" \
+ "${connector_ns}" "${port}" "${cpid}" "${spid}" &
+ local timeout_pid=$!
+
wait $cpid
local retc=$?
wait $spid
local rets=$?
- NSTAT_HISTORY=/tmp/${listener_ns}.nstat ip netns exec ${listener_ns} \
- nstat | grep Tcp > /tmp/${listener_ns}.out
- NSTAT_HISTORY=/tmp/${connector_ns}.nstat ip netns exec ${connector_ns} \
- nstat | grep Tcp > /tmp/${connector_ns}.out
+ if kill -0 $timeout_pid; then
+ # Finished before the timeout: kill the background job
+ mptcp_lib_kill_group_wait $timeout_pid
+ timeout_pid=0
+ fi
+
+ mptcp_lib_nstat_get "${listener_ns}"
+ mptcp_lib_nstat_get "${connector_ns}"
print_title "Transfer ${ip:2}"
- if [ ${rets} -ne 0 ] || [ ${retc} -ne 0 ]; then
+ if [ ${rets} -ne 0 ] || [ ${retc} -ne 0 ] || [ ${timeout_pid} -ne 0 ]; then
mptcp_lib_pr_fail "client exit code $retc, server $rets"
- mptcp_lib_pr_err_stats "${listener_ns}" "${connector_ns}" "${port}" \
- "/tmp/${listener_ns}.out" "/tmp/${connector_ns}.out"
+ mptcp_lib_pr_err_stats "${listener_ns}" "${connector_ns}" "${port}"
mptcp_lib_result_fail "transfer ${ip}"
diff --git a/tools/testing/selftests/net/mptcp/simult_flows.sh b/tools/testing/selftests/net/mptcp/simult_flows.sh
index 1903e8e84a31..806aaa7d2d61 100755
--- a/tools/testing/selftests/net/mptcp/simult_flows.sh
+++ b/tools/testing/selftests/net/mptcp/simult_flows.sh
@@ -155,48 +155,53 @@ do_transfer()
sleep 1
fi
- NSTAT_HISTORY=/tmp/${ns3}.nstat ip netns exec ${ns3} \
- nstat -n
- NSTAT_HISTORY=/tmp/${ns1}.nstat ip netns exec ${ns1} \
- nstat -n
-
- timeout ${timeout_test} \
- ip netns exec ${ns3} \
- ./mptcp_connect -jt ${timeout_poll} -l -p $port -T $max_time \
- 0.0.0.0 < "$sin" > "$sout" &
+ mptcp_lib_nstat_init "${ns3}"
+ mptcp_lib_nstat_init "${ns1}"
+
+ ip netns exec ${ns3} \
+ ./mptcp_connect -jt ${timeout_poll} -l -p $port -T $max_time \
+ 0.0.0.0 < "$sin" > "$sout" &
local spid=$!
mptcp_lib_wait_local_port_listen "${ns3}" "${port}"
- timeout ${timeout_test} \
- ip netns exec ${ns1} \
- ./mptcp_connect -jt ${timeout_poll} -p $port -T $max_time \
- 10.0.3.3 < "$cin" > "$cout" &
+ ip netns exec ${ns1} \
+ ./mptcp_connect -jt ${timeout_poll} -p $port -T $max_time \
+ 10.0.3.3 < "$cin" > "$cout" &
local cpid=$!
+ mptcp_lib_wait_timeout "${timeout_test}" "${ns3}" "${ns1}" "${port}" \
+ "${cpid}" "${spid}" &
+ local timeout_pid=$!
+
wait $cpid
local retc=$?
wait $spid
local rets=$?
+ if kill -0 $timeout_pid; then
+ # Finished before the timeout: kill the background job
+ mptcp_lib_kill_group_wait $timeout_pid
+ timeout_pid=0
+ fi
+
if $capture; then
sleep 1
kill ${cappid_listener}
kill ${cappid_connector}
fi
- NSTAT_HISTORY=/tmp/${ns3}.nstat ip netns exec ${ns3} \
- nstat | grep Tcp > /tmp/${ns3}.out
- NSTAT_HISTORY=/tmp/${ns1}.nstat ip netns exec ${ns1} \
- nstat | grep Tcp > /tmp/${ns1}.out
+ mptcp_lib_nstat_get "${ns3}"
+ mptcp_lib_nstat_get "${ns1}"
cmp $sin $cout > /dev/null 2>&1
local cmps=$?
cmp $cin $sout > /dev/null 2>&1
local cmpc=$?
- if [ $retc -eq 0 ] && [ $rets -eq 0 ] && \
- [ $cmpc -eq 0 ] && [ $cmps -eq 0 ]; then
+ if [ $retc -eq 0 ] && [ $rets -eq 0 ] &&
+ [ $cmpc -eq 0 ] && [ $cmps -eq 0 ] &&
+ [ $timeout_pid -eq 0 ]; then
printf "%-16s" " max $max_time "
mptcp_lib_pr_ok
cat "$capout"
@@ -204,8 +209,7 @@ do_transfer()
fi
mptcp_lib_pr_fail "client exit code $retc, server $rets"
- mptcp_lib_pr_err_stats "${ns3}" "${ns1}" "${port}" \
- "/tmp/${ns3}.out" "/tmp/${ns1}.out"
+ mptcp_lib_pr_err_stats "${ns3}" "${ns1}" "${port}"
ls -l $sin $cout
ls -l $cin $sout
diff --git a/tools/testing/selftests/net/mptcp/userspace_pm.sh b/tools/testing/selftests/net/mptcp/userspace_pm.sh
index 87323942cb8a..e9ae1806ab07 100755
--- a/tools/testing/selftests/net/mptcp/userspace_pm.sh
+++ b/tools/testing/selftests/net/mptcp/userspace_pm.sh
@@ -211,7 +211,8 @@ make_connection()
ip netns exec "$ns1" \
./mptcp_connect -s MPTCP -w 300 -p $app_port -l $listen_addr > /dev/null 2>&1 &
local server_pid=$!
- sleep 0.5
+
+ mptcp_lib_wait_local_port_listen "${ns1}" "${port}"
# Run the client, transfer $file and stay connected to the server
# to conduct tests
diff --git a/tools/testing/selftests/net/netfilter/nft_flowtable.sh b/tools/testing/selftests/net/netfilter/nft_flowtable.sh
index 45832df98295..a68bc882fa4e 100755
--- a/tools/testing/selftests/net/netfilter/nft_flowtable.sh
+++ b/tools/testing/selftests/net/netfilter/nft_flowtable.sh
@@ -127,6 +127,8 @@ ip -net "$nsr1" addr add fee1:2::1/64 dev veth1 nodad
ip -net "$nsr2" addr add 192.168.10.2/24 dev veth0
ip -net "$nsr2" addr add fee1:2::2/64 dev veth0 nodad
+ip netns exec "$nsr1" sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
+ip netns exec "$nsr2" sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
for i in 0 1; do
ip netns exec "$nsr1" sysctl net.ipv4.conf.veth$i.forwarding=1 > /dev/null
ip netns exec "$nsr2" sysctl net.ipv4.conf.veth$i.forwarding=1 > /dev/null
@@ -153,7 +155,9 @@ ip -net "$ns1" route add default via dead:1::1
ip -net "$ns2" route add default via dead:2::1
ip -net "$nsr1" route add default via 192.168.10.2
+ip -6 -net "$nsr1" route add default via fee1:2::2
ip -net "$nsr2" route add default via 192.168.10.1
+ip -6 -net "$nsr2" route add default via fee1:2::1
ip netns exec "$nsr1" nft -f - <<EOF
table inet filter {
@@ -352,8 +356,9 @@ test_tcp_forwarding_ip()
local nsa=$1
local nsb=$2
local pmtu=$3
- local dstip=$4
- local dstport=$5
+ local proto=$4
+ local dstip=$5
+ local dstport=$6
local lret=0
local socatc
local socatl
@@ -363,12 +368,14 @@ test_tcp_forwarding_ip()
infile="$nsin_small"
fi
- timeout "$SOCAT_TIMEOUT" ip netns exec "$nsb" socat -4 TCP-LISTEN:12345,reuseaddr STDIO < "$infile" > "$ns2out" &
+ timeout "$SOCAT_TIMEOUT" ip netns exec "$nsb" socat -${proto} \
+ TCP"${proto}"-LISTEN:12345,reuseaddr STDIO < "$infile" > "$ns2out" &
lpid=$!
busywait 1000 listener_ready
- timeout "$SOCAT_TIMEOUT" ip netns exec "$nsa" socat -4 TCP:"$dstip":"$dstport" STDIO < "$infile" > "$ns1out"
+ timeout "$SOCAT_TIMEOUT" ip netns exec "$nsa" socat -${proto} \
+ TCP"${proto}":"$dstip":"$dstport" STDIO < "$infile" > "$ns1out"
socatc=$?
wait $lpid
@@ -394,8 +401,11 @@ test_tcp_forwarding_ip()
test_tcp_forwarding()
{
local pmtu="$3"
+ local proto="$4"
+ local dstip="$5"
+ local dstport="$6"
- test_tcp_forwarding_ip "$1" "$2" "$pmtu" 10.0.2.99 12345
+ test_tcp_forwarding_ip "$1" "$2" "$pmtu" "$proto" "$dstip" "$dstport"
return $?
}
@@ -403,6 +413,9 @@ test_tcp_forwarding()
test_tcp_forwarding_set_dscp()
{
local pmtu="$3"
+ local proto="$4"
+ local dstip="$5"
+ local dstport="$6"
ip netns exec "$nsr1" nft -f - <<EOF
table netdev dscpmangle {
@@ -413,7 +426,7 @@ table netdev dscpmangle {
}
EOF
if [ $? -eq 0 ]; then
- test_tcp_forwarding_ip "$1" "$2" "$3" 10.0.2.99 12345
+ test_tcp_forwarding_ip "$1" "$2" "$pmtu" "$proto" "$dstip" "$dstport"
check_dscp "dscp_ingress" "$pmtu"
ip netns exec "$nsr1" nft delete table netdev dscpmangle
@@ -430,7 +443,7 @@ table netdev dscpmangle {
}
EOF
if [ $? -eq 0 ]; then
- test_tcp_forwarding_ip "$1" "$2" "$pmtu" 10.0.2.99 12345
+ test_tcp_forwarding_ip "$1" "$2" "$pmtu" "$proto" "$dstip" "$dstport"
check_dscp "dscp_egress" "$pmtu"
ip netns exec "$nsr1" nft delete table netdev dscpmangle
@@ -441,7 +454,7 @@ fi
# partial. If flowtable really works, then both dscp-is-0 and dscp-is-cs3
# counters should have seen packets (before and after ft offload kicks in).
ip netns exec "$nsr1" nft -a insert rule inet filter forward ip dscp set cs3
- test_tcp_forwarding_ip "$1" "$2" "$pmtu" 10.0.2.99 12345
+ test_tcp_forwarding_ip "$1" "$2" "$pmtu" "$proto" "$dstip" "$dstport"
check_dscp "dscp_fwd" "$pmtu"
}
@@ -455,7 +468,7 @@ test_tcp_forwarding_nat()
[ "$pmtu" -eq 0 ] && what="$what (pmtu disabled)"
- test_tcp_forwarding_ip "$nsa" "$nsb" "$pmtu" 10.0.2.99 12345
+ test_tcp_forwarding_ip "$nsa" "$nsb" "$pmtu" 4 10.0.2.99 12345
lret=$?
if [ "$lret" -eq 0 ] ; then
@@ -465,7 +478,7 @@ test_tcp_forwarding_nat()
echo "PASS: flow offload for ns1/ns2 with masquerade $what"
fi
- test_tcp_forwarding_ip "$1" "$2" "$pmtu" 10.6.6.6 1666
+ test_tcp_forwarding_ip "$1" "$2" "$pmtu" 4 10.6.6.6 1666
lret=$?
if [ "$pmtu" -eq 1 ] ;then
check_counters "flow offload for ns1/ns2 with dnat $what"
@@ -487,7 +500,7 @@ make_file "$nsin_small" "$filesize_small"
# Due to MTU mismatch in both directions, all packets (except small packets like pure
# acks) have to be handled by normal forwarding path. Therefore, packet counters
# are not checked.
-if test_tcp_forwarding "$ns1" "$ns2" 0; then
+if test_tcp_forwarding "$ns1" "$ns2" 0 4 10.0.2.99 12345; then
echo "PASS: flow offloaded for ns1/ns2"
else
echo "FAIL: flow offload for ns1/ns2:" 1>&2
@@ -495,6 +508,14 @@ else
ret=1
fi
+if test_tcp_forwarding "$ns1" "$ns2" 0 6 "[dead:2::99]" 12345; then
+ echo "PASS: IPv6 flow offloaded for ns1/ns2"
+else
+ echo "FAIL: IPv6 flow offload for ns1/ns2:" 1>&2
+ ip netns exec "$nsr1" nft list ruleset
+ ret=1
+fi
+
# delete default route, i.e. ns2 won't be able to reach ns1 and
# will depend on ns1 being masqueraded in nsr1.
# expect ns1 has nsr1 address.
@@ -520,7 +541,7 @@ table ip nat {
EOF
check_dscp "dscp_none" "0"
-if ! test_tcp_forwarding_set_dscp "$ns1" "$ns2" 0 ""; then
+if ! test_tcp_forwarding_set_dscp "$ns1" "$ns2" 0 4 10.0.2.99 12345; then
echo "FAIL: flow offload for ns1/ns2 with dscp update and no pmtu discovery" 1>&2
exit 0
fi
@@ -546,7 +567,7 @@ ip netns exec "$ns2" sysctl net.ipv4.ip_no_pmtu_disc=0 > /dev/null
ip netns exec "$nsr1" nft reset counters table inet filter >/dev/null
ip netns exec "$ns2" nft reset counters table inet filter >/dev/null
-if ! test_tcp_forwarding_set_dscp "$ns1" "$ns2" 1 ""; then
+if ! test_tcp_forwarding_set_dscp "$ns1" "$ns2" 1 4 10.0.2.99 12345; then
echo "FAIL: flow offload for ns1/ns2 with dscp update and pmtu discovery" 1>&2
exit 0
fi
@@ -558,6 +579,73 @@ if ! test_tcp_forwarding_nat "$ns1" "$ns2" 1 ""; then
ip netns exec "$nsr1" nft list ruleset
fi
+# IPIP tunnel test:
+# Add IPIP tunnel interfaces and check flowtable acceleration.
+test_ipip() {
+if ! ip -net "$nsr1" link add name tun0 type ipip \
+ local 192.168.10.1 remote 192.168.10.2 >/dev/null;then
+ echo "SKIP: could not add ipip tunnel"
+ [ "$ret" -eq 0 ] && ret=$ksft_skip
+ return
+fi
+ip -net "$nsr1" link set tun0 up
+ip -net "$nsr1" addr add 192.168.100.1/24 dev tun0
+ip netns exec "$nsr1" sysctl net.ipv4.conf.tun0.forwarding=1 > /dev/null
+
+ip -net "$nsr2" link add name tun0 type ipip local 192.168.10.2 remote 192.168.10.1
+ip -net "$nsr2" link set tun0 up
+ip -net "$nsr2" addr add 192.168.100.2/24 dev tun0
+ip netns exec "$nsr2" sysctl net.ipv4.conf.tun0.forwarding=1 > /dev/null
+
+ip -net "$nsr1" route change default via 192.168.100.2
+ip -net "$nsr2" route change default via 192.168.100.1
+ip -net "$ns2" route add default via 10.0.2.1
+
+ip netns exec "$nsr1" nft -a insert rule inet filter forward 'meta oif tun0 accept'
+ip netns exec "$nsr1" nft -a insert rule inet filter forward \
+ 'meta oif "veth0" tcp sport 12345 ct mark set 1 flow add @f1 counter name routed_repl accept'
+
+if ! test_tcp_forwarding_nat "$ns1" "$ns2" 1 "IPIP tunnel"; then
+ echo "FAIL: flow offload for ns1/ns2 with IPIP tunnel" 1>&2
+ ip netns exec "$nsr1" nft list ruleset
+ ret=1
+fi
+
+# Create vlan tagged devices for IPIP traffic.
+ip -net "$nsr1" link add link veth1 name veth1.10 type vlan id 10
+ip -net "$nsr1" link set veth1.10 up
+ip -net "$nsr1" addr add 192.168.20.1/24 dev veth1.10
+ip netns exec "$nsr1" sysctl net.ipv4.conf.veth1/10.forwarding=1 > /dev/null
+ip netns exec "$nsr1" nft -a insert rule inet filter forward 'meta oif veth1.10 accept'
+ip -net "$nsr1" link add name tun1 type ipip local 192.168.20.1 remote 192.168.20.2
+ip -net "$nsr1" link set tun1 up
+ip -net "$nsr1" addr add 192.168.200.1/24 dev tun1
+ip -net "$nsr1" route change default via 192.168.200.2
+ip netns exec "$nsr1" sysctl net.ipv4.conf.tun1.forwarding=1 > /dev/null
+ip netns exec "$nsr1" nft -a insert rule inet filter forward 'meta oif tun1 accept'
+
+ip -net "$nsr2" link add link veth0 name veth0.10 type vlan id 10
+ip -net "$nsr2" link set veth0.10 up
+ip -net "$nsr2" addr add 192.168.20.2/24 dev veth0.10
+ip netns exec "$nsr2" sysctl net.ipv4.conf.veth0/10.forwarding=1 > /dev/null
+ip -net "$nsr2" link add name tun1 type ipip local 192.168.20.2 remote 192.168.20.1
+ip -net "$nsr2" link set tun1 up
+ip -net "$nsr2" addr add 192.168.200.2/24 dev tun1
+ip -net "$nsr2" route change default via 192.168.200.1
+ip netns exec "$nsr2" sysctl net.ipv4.conf.tun1.forwarding=1 > /dev/null
+
+if ! test_tcp_forwarding_nat "$ns1" "$ns2" 1 "IPIP tunnel over vlan"; then
+ echo "FAIL: flow offload for ns1/ns2 with IPIP tunnel over vlan" 1>&2
+ ip netns exec "$nsr1" nft list ruleset
+ ret=1
+fi
+
+# Restore the previous configuration
+ip -net "$nsr1" route change default via 192.168.10.2
+ip -net "$nsr2" route change default via 192.168.10.1
+ip -net "$ns2" route del default via 10.0.2.1
+}
+
# Another test:
# Add bridge interface br0 to Router1, with NAT enabled.
test_bridge() {
@@ -643,6 +731,8 @@ ip -net "$nsr1" addr add dead:1::1/64 dev veth0 nodad
ip -net "$nsr1" link set up dev veth0
}
+test_ipip
+
test_bridge
KEY_SHA="0x"$(ps -af | sha1sum | cut -d " " -f 1)
@@ -683,7 +773,7 @@ ip -net "$ns2" route del 192.168.10.1 via 10.0.2.1
ip -net "$ns2" route add default via 10.0.2.1
ip -net "$ns2" route add default via dead:2::1
-if test_tcp_forwarding "$ns1" "$ns2" 1; then
+if test_tcp_forwarding "$ns1" "$ns2" 1 4 10.0.2.99 12345; then
check_counters "ipsec tunnel mode for ns1/ns2"
else
echo "FAIL: ipsec tunnel mode for ns1/ns2"
@@ -691,6 +781,14 @@ else
ip netns exec "$nsr1" cat /proc/net/xfrm_stat 1>&2
fi
+if test_tcp_forwarding "$ns1" "$ns2" 1 6 "[dead:2::99]" 12345; then
+ check_counters "IPv6 ipsec tunnel mode for ns1/ns2"
+else
+ echo "FAIL: IPv6 ipsec tunnel mode for ns1/ns2"
+ ip netns exec "$nsr1" nft list ruleset 1>&2
+ ip netns exec "$nsr1" cat /proc/net/xfrm_stat 1>&2
+fi
+
if [ "$1" = "" ]; then
low=1280
mtu=$((65536 - low))
diff --git a/tools/testing/selftests/net/netfilter/sctp_collision.c b/tools/testing/selftests/net/netfilter/sctp_collision.c
index 21bb1cfd8a85..b282d1785c9b 100644
--- a/tools/testing/selftests/net/netfilter/sctp_collision.c
+++ b/tools/testing/selftests/net/netfilter/sctp_collision.c
@@ -9,9 +9,10 @@
int main(int argc, char *argv[])
{
struct sockaddr_in saddr = {}, daddr = {};
- int sd, ret, len = sizeof(daddr);
+ socklen_t len = sizeof(daddr);
struct timeval tv = {25, 0};
char buf[] = "hello";
+ int sd, ret;
if (argc != 6 || (strcmp(argv[1], "server") && strcmp(argv[1], "client"))) {
printf("%s <server|client> <LOCAL_IP> <LOCAL_PORT> <REMOTE_IP> <REMOTE_PORT>\n",
diff --git a/tools/testing/selftests/net/netlink-dumps.c b/tools/testing/selftests/net/netlink-dumps.c
index 7618ebe528a4..679b6c77ace7 100644
--- a/tools/testing/selftests/net/netlink-dumps.c
+++ b/tools/testing/selftests/net/netlink-dumps.c
@@ -143,6 +143,7 @@ TEST(dump_extack)
EXPECT_EQ(n, -1);
EXPECT_EQ(errno, ENOBUFS);
+ ret = NO_CTRL;
for (i = 0; i < cnt; i++) {
struct ext_ack ea = {};
diff --git a/tools/testing/selftests/net/packetdrill/tcp_rto_synack_rto_max.pkt b/tools/testing/selftests/net/packetdrill/tcp_rto_synack_rto_max.pkt
new file mode 100644
index 000000000000..47550df124ce
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_rto_synack_rto_max.pkt
@@ -0,0 +1,54 @@
+// SPDX-License-Identifier: GPL-2.0
+//
+// Test SYN+ACK RTX with 1s RTO.
+//
+`./defaults.sh
+ ./set_sysctls.py /proc/sys/net/ipv4/tcp_rto_max_ms=1000`
+
+//
+// Test 1: TFO SYN+ACK
+//
+ 0 socket(..., SOCK_STREAM|SOCK_NONBLOCK, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+ +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN, [1], 4) = 0
+
+ +0 < S 0:10(10) win 1000 <mss 1460,sackOK,nop,nop,FO TFO_COOKIE,nop,nop>
+ +0 > S. 0:0(0) ack 11 <mss 1460,nop,nop,sackOK>
+
+// RTO must be capped to 1s
+ +1 > S. 0:0(0) ack 11 <mss 1460,nop,nop,sackOK>
+ +1 > S. 0:0(0) ack 11 <mss 1460,nop,nop,sackOK>
+ +1 > S. 0:0(0) ack 11 <mss 1460,nop,nop,sackOK>
+
+ +0 < . 11:11(0) ack 1 win 1000 <mss 1460,nop,nop,sackOK>
+ +0 accept(3, ..., ...) = 4
+ +0 %{ assert (tcpi_options & TCPI_OPT_SYN_DATA) != 0, tcpi_options }%
+
+ +0 close(4) = 0
+ +0 close(3) = 0
+
+
+//
+// Test 2: non-TFO SYN+ACK
+//
+ +0 socket(..., SOCK_STREAM|SOCK_NONBLOCK, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < S 0:0(0) win 1000 <mss 1460,sackOK,nop,nop>
+ +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK>
+
+// RTO must be capped to 1s
+ +1 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK>
+ +1 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK>
+ +1 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK>
+
+ +0 < . 1:1(0) ack 1 win 1000 <mss 1460,nop,nop,sackOK>
+ +0 accept(3, ..., ...) = 4
+ +0 %{ assert (tcpi_options & TCPI_OPT_SYN_DATA) == 0, tcpi_options }%
+
+ +0 close(4) = 0
+ +0 close(3) = 0
diff --git a/tools/testing/selftests/net/packetdrill/tcp_syscall_bad_arg_sendmsg-empty-iov.pkt b/tools/testing/selftests/net/packetdrill/tcp_syscall_bad_arg_sendmsg-empty-iov.pkt
index b2b2cdf27e20..454441e7ecff 100644
--- a/tools/testing/selftests/net/packetdrill/tcp_syscall_bad_arg_sendmsg-empty-iov.pkt
+++ b/tools/testing/selftests/net/packetdrill/tcp_syscall_bad_arg_sendmsg-empty-iov.pkt
@@ -1,6 +1,10 @@
// SPDX-License-Identifier: GPL-2.0
// Test that we correctly skip zero-length IOVs.
+
+--send_omit_free // do not reuse send buffers with zerocopy
+
`./defaults.sh`
+
0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+0 setsockopt(3, SOL_SOCKET, SO_ZEROCOPY, [1], 4) = 0
+0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
diff --git a/tools/testing/selftests/net/packetdrill/tcp_user_timeout_user-timeout-probe.pkt b/tools/testing/selftests/net/packetdrill/tcp_user_timeout_user-timeout-probe.pkt
index 183051ba0cae..6882b8240a8a 100644
--- a/tools/testing/selftests/net/packetdrill/tcp_user_timeout_user-timeout-probe.pkt
+++ b/tools/testing/selftests/net/packetdrill/tcp_user_timeout_user-timeout-probe.pkt
@@ -23,14 +23,16 @@
// install a qdisc dropping all packets
+0 `tc qdisc delete dev tun0 root 2>/dev/null ; tc qdisc add dev tun0 root pfifo limit 0`
+
+0 write(4, ..., 24) = 24
// When qdisc is congested we retry every 500ms
// (TCP_RESOURCE_PROBE_INTERVAL) and therefore
// we retry 6 times before hitting 3s timeout.
// First verify that the connection is alive:
-+3.250 write(4, ..., 24) = 24
++3 write(4, ..., 24) = 24
+
// Now verify that shortly after that the socket is dead:
- +.100 write(4, ..., 24) = -1 ETIMEDOUT (Connection timed out)
++1 write(4, ..., 24) = -1 ETIMEDOUT (Connection timed out)
+0 %{ assert tcpi_probes == 6, tcpi_probes; \
assert tcpi_backoff == 0, tcpi_backoff }%
diff --git a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_basic.pkt b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_basic.pkt
index a82c8899d36b..0a0700afdaa3 100644
--- a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_basic.pkt
+++ b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_basic.pkt
@@ -4,6 +4,8 @@
// send a packet with MSG_ZEROCOPY and receive the notification ID
// repeat and verify IDs are consecutive
+--send_omit_free // do not reuse send buffers with zerocopy
+
`./defaults.sh`
0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
diff --git a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_batch.pkt b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_batch.pkt
index c01915e7f4a1..df91675d2991 100644
--- a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_batch.pkt
+++ b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_batch.pkt
@@ -3,6 +3,8 @@
//
// send multiple packets, then read one range of all notifications.
+--send_omit_free // do not reuse send buffers with zerocopy
+
`./defaults.sh`
0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
diff --git a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_client.pkt b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_client.pkt
index 6509882932e9..2963cfcb14df 100644
--- a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_client.pkt
+++ b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_client.pkt
@@ -1,6 +1,8 @@
// SPDX-License-Identifier: GPL-2.0
// Minimal client-side zerocopy test
+--send_omit_free // do not reuse send buffers with zerocopy
+
`./defaults.sh`
0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4
diff --git a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_closed.pkt b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_closed.pkt
index 2cd78755cb2a..ea0c2fa73c2d 100644
--- a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_closed.pkt
+++ b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_closed.pkt
@@ -7,6 +7,8 @@
// First send on a closed socket and wait for (absent) notification.
// Then connect and send and verify that notification nr. is zero.
+--send_omit_free // do not reuse send buffers with zerocopy
+
`./defaults.sh`
0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4
diff --git a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_epoll_edge.pkt b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_epoll_edge.pkt
index 7671c20e01cf..4df978a9b82e 100644
--- a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_epoll_edge.pkt
+++ b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_epoll_edge.pkt
@@ -7,6 +7,9 @@
// fire two sends with MSG_ZEROCOPY and receive the acks. confirm that EPOLLERR
// is correctly fired only once, when EPOLLET is set. send another packet with
// MSG_ZEROCOPY. confirm that EPOLLERR is correctly fired again only once.
+
+--send_omit_free // do not reuse send buffers with zerocopy
+
`./defaults.sh`
0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
diff --git a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_epoll_exclusive.pkt b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_epoll_exclusive.pkt
index fadc480fdb7f..36b6edc4858c 100644
--- a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_epoll_exclusive.pkt
+++ b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_epoll_exclusive.pkt
@@ -8,6 +8,9 @@
// fire two sends with MSG_ZEROCOPY and receive the acks. confirm that EPOLLERR
// is correctly fired only once, when EPOLLET is set. send another packet with
// MSG_ZEROCOPY. confirm that EPOLLERR is correctly fired again only once.
+
+--send_omit_free // do not reuse send buffers with zerocopy
+
`./defaults.sh`
0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
diff --git a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_epoll_oneshot.pkt b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_epoll_oneshot.pkt
index 5bfa0d1d2f4a..1bea6f3b4558 100644
--- a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_epoll_oneshot.pkt
+++ b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_epoll_oneshot.pkt
@@ -8,6 +8,9 @@
// is correctly fired only once, when EPOLLONESHOT is set. send another packet
// with MSG_ZEROCOPY. confirm that EPOLLERR is not fired. Rearm the FD and
// confirm that EPOLLERR is correctly set.
+
+--send_omit_free // do not reuse send buffers with zerocopy
+
`./defaults.sh`
0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
diff --git a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_fastopen-client.pkt b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_fastopen-client.pkt
index 4a73bbf46961..e27c21ff5d18 100644
--- a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_fastopen-client.pkt
+++ b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_fastopen-client.pkt
@@ -8,6 +8,8 @@
// one will have no data in the initial send. On return 0 the
// zerocopy notification counter is not incremented. Verify this too.
+--send_omit_free // do not reuse send buffers with zerocopy
+
`./defaults.sh`
// Send a FastOpen request, no cookie yet so no data in SYN
diff --git a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_fastopen-server.pkt b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_fastopen-server.pkt
index 36086c5877ce..b1fa77c77dfa 100644
--- a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_fastopen-server.pkt
+++ b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_fastopen-server.pkt
@@ -4,6 +4,8 @@
// send data with MSG_FASTOPEN | MSG_ZEROCOPY and verify that the
// kernel returns the notification ID.
+--send_omit_free // do not reuse send buffers with zerocopy
+
`./defaults.sh
./set_sysctls.py /proc/sys/net/ipv4/tcp_fastopen=0x207`
diff --git a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_maxfrags.pkt b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_maxfrags.pkt
index 672f817faca0..2f5317d0a9fa 100644
--- a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_maxfrags.pkt
+++ b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_maxfrags.pkt
@@ -7,6 +7,8 @@
// because each iovec element becomes a frag
// 3) the PSH bit is set on an skb when it runs out of fragments
+--send_omit_free // do not reuse send buffers with zerocopy
+
`./defaults.sh`
0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
diff --git a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_small.pkt b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_small.pkt
index a9a1ac0aea4f..9d5272c6b207 100644
--- a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_small.pkt
+++ b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_small.pkt
@@ -4,6 +4,8 @@
// verify that SO_EE_CODE_ZEROCOPY_COPIED is set on zerocopy
// packets of all sizes, including the smallest payload, 1B.
+--send_omit_free // do not reuse send buffers with zerocopy
+
`./defaults.sh`
0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
diff --git a/tools/testing/selftests/net/rtnetlink.sh b/tools/testing/selftests/net/rtnetlink.sh
index 163a084d525d..248c2b91fe42 100755
--- a/tools/testing/selftests/net/rtnetlink.sh
+++ b/tools/testing/selftests/net/rtnetlink.sh
@@ -8,6 +8,7 @@ ALL_TESTS="
kci_test_polrouting
kci_test_route_get
kci_test_addrlft
+ kci_test_addrlft_route_cleanup
kci_test_promote_secondaries
kci_test_tc
kci_test_gre
@@ -323,6 +324,25 @@ kci_test_addrlft()
end_test "PASS: preferred_lft addresses have expired"
}
+kci_test_addrlft_route_cleanup()
+{
+ local ret=0
+ local test_addr="2001:db8:99::1/64"
+ local test_prefix="2001:db8:99::/64"
+
+ run_cmd ip -6 addr add $test_addr dev "$devdummy" valid_lft 300 preferred_lft 300
+ run_cmd_grep "$test_prefix proto kernel" ip -6 route show dev "$devdummy"
+ run_cmd ip -6 addr del $test_addr dev "$devdummy"
+ run_cmd_grep_fail "$test_prefix" ip -6 route show dev "$devdummy"
+
+ if [ $ret -ne 0 ]; then
+ end_test "FAIL: route not cleaned up when address with valid_lft deleted"
+ return 1
+ fi
+
+ end_test "PASS: route cleaned up when address with valid_lft deleted"
+}
+
kci_test_promote_secondaries()
{
run_cmd ifconfig "$devdummy"
diff --git a/tools/testing/selftests/net/setup_loopback.sh b/tools/testing/selftests/net/setup_loopback.sh
deleted file mode 100644
index 2070b57849de..000000000000
--- a/tools/testing/selftests/net/setup_loopback.sh
+++ /dev/null
@@ -1,120 +0,0 @@
-#!/bin/bash
-# SPDX-License-Identifier: GPL-2.0
-
-readonly FLUSH_PATH="/sys/class/net/${dev}/gro_flush_timeout"
-readonly IRQ_PATH="/sys/class/net/${dev}/napi_defer_hard_irqs"
-readonly FLUSH_TIMEOUT="$(< ${FLUSH_PATH})"
-readonly HARD_IRQS="$(< ${IRQ_PATH})"
-readonly server_ns=$(mktemp -u server-XXXXXXXX)
-readonly client_ns=$(mktemp -u client-XXXXXXXX)
-
-netdev_check_for_carrier() {
- local -r dev="$1"
-
- for i in {1..5}; do
- carrier="$(cat /sys/class/net/${dev}/carrier)"
- if [[ "${carrier}" -ne 1 ]] ; then
- echo "carrier not ready yet..." >&2
- sleep 1
- else
- echo "carrier ready" >&2
- break
- fi
- done
- echo "${carrier}"
-}
-
-# Assumes that there is no existing ipvlan device on the physical device
-setup_loopback_environment() {
- local dev="$1"
-
- # Fail hard if cannot turn on loopback mode for current NIC
- ethtool -K "${dev}" loopback on || exit 1
- sleep 1
-
- # Check for the carrier
- carrier=$(netdev_check_for_carrier ${dev})
- if [[ "${carrier}" -ne 1 ]] ; then
- echo "setup_loopback_environment failed"
- exit 1
- fi
-}
-
-setup_macvlan_ns(){
- local -r link_dev="$1"
- local -r ns_name="$2"
- local -r ns_dev="$3"
- local -r ns_mac="$4"
- local -r addr="$5"
-
- ip link add link "${link_dev}" dev "${ns_dev}" \
- address "${ns_mac}" type macvlan
- exit_code=$?
- if [[ "${exit_code}" -ne 0 ]]; then
- echo "setup_macvlan_ns failed"
- exit $exit_code
- fi
-
- [[ -e /var/run/netns/"${ns_name}" ]] || ip netns add "${ns_name}"
- ip link set dev "${ns_dev}" netns "${ns_name}"
- ip -netns "${ns_name}" link set dev "${ns_dev}" up
- if [[ -n "${addr}" ]]; then
- ip -netns "${ns_name}" addr add dev "${ns_dev}" "${addr}"
- fi
-
- sleep 1
-}
-
-cleanup_macvlan_ns(){
- while (( $# >= 2 )); do
- ns_name="$1"
- ns_dev="$2"
- ip -netns "${ns_name}" link del dev "${ns_dev}"
- ip netns del "${ns_name}"
- shift 2
- done
-}
-
-cleanup_loopback(){
- local -r dev="$1"
-
- ethtool -K "${dev}" loopback off
- sleep 1
-
- # Check for the carrier
- carrier=$(netdev_check_for_carrier ${dev})
- if [[ "${carrier}" -ne 1 ]] ; then
- echo "setup_loopback_environment failed"
- exit 1
- fi
-}
-
-setup_interrupt() {
- # Use timer on host to trigger the network stack
- # Also disable device interrupt to not depend on NIC interrupt
- # Reduce test flakiness caused by unexpected interrupts
- echo 100000 >"${FLUSH_PATH}"
- echo 50 >"${IRQ_PATH}"
-}
-
-setup_ns() {
- # Set up server_ns namespace and client_ns namespace
- setup_macvlan_ns "${dev}" ${server_ns} server "${SERVER_MAC}"
- setup_macvlan_ns "${dev}" ${client_ns} client "${CLIENT_MAC}"
-}
-
-cleanup_ns() {
- cleanup_macvlan_ns ${server_ns} server ${client_ns} client
-}
-
-setup() {
- setup_loopback_environment "${dev}"
- setup_interrupt
-}
-
-cleanup() {
- cleanup_loopback "${dev}"
-
- echo "${FLUSH_TIMEOUT}" >"${FLUSH_PATH}"
- echo "${HARD_IRQS}" >"${IRQ_PATH}"
-}
diff --git a/tools/testing/selftests/net/setup_veth.sh b/tools/testing/selftests/net/setup_veth.sh
deleted file mode 100644
index 152bf4c65747..000000000000
--- a/tools/testing/selftests/net/setup_veth.sh
+++ /dev/null
@@ -1,45 +0,0 @@
-#!/bin/bash
-# SPDX-License-Identifier: GPL-2.0
-
-readonly server_ns=$(mktemp -u server-XXXXXXXX)
-readonly client_ns=$(mktemp -u client-XXXXXXXX)
-
-setup_veth_ns() {
- local -r link_dev="$1"
- local -r ns_name="$2"
- local -r ns_dev="$3"
- local -r ns_mac="$4"
-
- [[ -e /var/run/netns/"${ns_name}" ]] || ip netns add "${ns_name}"
- echo 200000 > "/sys/class/net/${ns_dev}/gro_flush_timeout"
- echo 1 > "/sys/class/net/${ns_dev}/napi_defer_hard_irqs"
- ip link set dev "${ns_dev}" netns "${ns_name}" mtu 65535
- ip -netns "${ns_name}" link set dev "${ns_dev}" up
-
- ip netns exec "${ns_name}" ethtool -K "${ns_dev}" gro on tso off
-}
-
-setup_ns() {
- # Set up server_ns namespace and client_ns namespace
- ip link add name server type veth peer name client
-
- setup_veth_ns "${dev}" ${server_ns} server "${SERVER_MAC}"
- setup_veth_ns "${dev}" ${client_ns} client "${CLIENT_MAC}"
-}
-
-cleanup_ns() {
- local ns_name
-
- for ns_name in ${client_ns} ${server_ns}; do
- [[ -e /var/run/netns/"${ns_name}" ]] && ip netns del "${ns_name}"
- done
-}
-
-setup() {
- # no global init setup step needed
- :
-}
-
-cleanup() {
- cleanup_ns
-}
diff --git a/tools/testing/selftests/net/so_txtime.c b/tools/testing/selftests/net/so_txtime.c
index 8457b7ccbc09..b76df1efc2ef 100644
--- a/tools/testing/selftests/net/so_txtime.c
+++ b/tools/testing/selftests/net/so_txtime.c
@@ -174,7 +174,7 @@ static int do_recv_errqueue_timeout(int fdt)
msg.msg_controllen = sizeof(control);
while (1) {
- const char *reason;
+ const char *reason = NULL;
ret = recvmsg(fdt, &msg, MSG_ERRQUEUE);
if (ret == -1 && errno == EAGAIN)
diff --git a/tools/testing/selftests/net/tls.c b/tools/testing/selftests/net/tls.c
index 5c6d8215021c..da1b50b30719 100644
--- a/tools/testing/selftests/net/tls.c
+++ b/tools/testing/selftests/net/tls.c
@@ -2856,6 +2856,147 @@ TEST_F(tls_err, oob_pressure)
EXPECT_EQ(send(self->fd2, buf, 5, MSG_OOB), 5);
}
+/*
+ * Parse a stream of TLS records and ensure that each record respects
+ * the specified @max_payload_len.
+ */
+static size_t parse_tls_records(struct __test_metadata *_metadata,
+ const __u8 *rx_buf, int rx_len, int overhead,
+ __u16 max_payload_len)
+{
+ const __u8 *rec = rx_buf;
+ size_t total_plaintext_rx = 0;
+ const __u8 rec_header_len = 5;
+
+ while (rec < rx_buf + rx_len) {
+ __u16 record_payload_len;
+ __u16 plaintext_len;
+
+ /* Sanity check that it's a TLS header for application data */
+ ASSERT_EQ(rec[0], 23);
+ ASSERT_EQ(rec[1], 0x3);
+ ASSERT_EQ(rec[2], 0x3);
+
+ memcpy(&record_payload_len, rec + 3, 2);
+ record_payload_len = ntohs(record_payload_len);
+ ASSERT_GE(record_payload_len, overhead);
+
+ plaintext_len = record_payload_len - overhead;
+ total_plaintext_rx += plaintext_len;
+
+ /* Plaintext must not exceed the specified limit */
+ ASSERT_LE(plaintext_len, max_payload_len);
+ rec += rec_header_len + record_payload_len;
+ }
+
+ return total_plaintext_rx;
+}
+
+TEST(tls_12_tx_max_payload_len)
+{
+ struct tls_crypto_info_keys tls12;
+ int cfd, ret, fd, overhead;
+ size_t total_plaintext_rx = 0;
+ __u8 tx[1024], rx[2000];
+ __u16 limit = 128;
+ __u16 opt = 0;
+ unsigned int optlen = sizeof(opt);
+ bool notls;
+
+ tls_crypto_info_init(TLS_1_2_VERSION, TLS_CIPHER_AES_CCM_128,
+ &tls12, 0);
+
+ ulp_sock_pair(_metadata, &fd, &cfd, &notls);
+
+ if (notls)
+ exit(KSFT_SKIP);
+
+ /* Don't install keys on fd, we'll parse raw records */
+ ret = setsockopt(cfd, SOL_TLS, TLS_TX, &tls12, tls12.len);
+ ASSERT_EQ(ret, 0);
+
+ ret = setsockopt(cfd, SOL_TLS, TLS_TX_MAX_PAYLOAD_LEN, &limit,
+ sizeof(limit));
+ ASSERT_EQ(ret, 0);
+
+ ret = getsockopt(cfd, SOL_TLS, TLS_TX_MAX_PAYLOAD_LEN, &opt, &optlen);
+ EXPECT_EQ(ret, 0);
+ EXPECT_EQ(limit, opt);
+ EXPECT_EQ(optlen, sizeof(limit));
+
+ memset(tx, 0, sizeof(tx));
+ ASSERT_EQ(send(cfd, tx, sizeof(tx), 0), sizeof(tx));
+ close(cfd);
+
+ ret = recv(fd, rx, sizeof(rx), 0);
+
+ /*
+ * 16B tag + 8B IV -- record header (5B) is not counted but we'll
+ * need it to walk the record stream
+ */
+ overhead = 16 + 8;
+ total_plaintext_rx = parse_tls_records(_metadata, rx, ret, overhead,
+ limit);
+
+ ASSERT_EQ(total_plaintext_rx, sizeof(tx));
+ close(fd);
+}
+
+TEST(tls_12_tx_max_payload_len_open_rec)
+{
+ struct tls_crypto_info_keys tls12;
+ int cfd, ret, fd, overhead;
+ size_t total_plaintext_rx = 0;
+ __u8 tx[1024], rx[2000];
+ __u16 tx_partial = 256;
+ __u16 og_limit = 512, limit = 128;
+ bool notls;
+
+ tls_crypto_info_init(TLS_1_2_VERSION, TLS_CIPHER_AES_CCM_128,
+ &tls12, 0);
+
+ ulp_sock_pair(_metadata, &fd, &cfd, &notls);
+
+ if (notls)
+ exit(KSFT_SKIP);
+
+ /* Don't install keys on fd, we'll parse raw records */
+ ret = setsockopt(cfd, SOL_TLS, TLS_TX, &tls12, tls12.len);
+ ASSERT_EQ(ret, 0);
+
+ ret = setsockopt(cfd, SOL_TLS, TLS_TX_MAX_PAYLOAD_LEN, &og_limit,
+ sizeof(og_limit));
+ ASSERT_EQ(ret, 0);
+
+ memset(tx, 0, sizeof(tx));
+ ASSERT_EQ(send(cfd, tx, tx_partial, MSG_MORE), tx_partial);
+
+ /*
+ * Changing the payload limit with a pending open record should
+ * not be allowed.
+ */
+ ret = setsockopt(cfd, SOL_TLS, TLS_TX_MAX_PAYLOAD_LEN, &limit,
+ sizeof(limit));
+ ASSERT_EQ(ret, -1);
+ ASSERT_EQ(errno, EBUSY);
+
+ ASSERT_EQ(send(cfd, tx + tx_partial, sizeof(tx) - tx_partial, MSG_EOR),
+ sizeof(tx) - tx_partial);
+ close(cfd);
+
+ ret = recv(fd, rx, sizeof(rx), 0);
+
+ /*
+ * 16B tag + 8B IV -- record header (5B) is not counted but we'll
+ * need it to walk the record stream
+ */
+ overhead = 16 + 8;
+ total_plaintext_rx = parse_tls_records(_metadata, rx, ret, overhead,
+ og_limit);
+ ASSERT_EQ(total_plaintext_rx, sizeof(tx));
+ close(fd);
+}
+
TEST(non_established) {
struct tls12_crypto_info_aes_gcm_256 tls12;
struct sockaddr_in addr;
diff --git a/tools/testing/selftests/net/toeplitz.sh b/tools/testing/selftests/net/toeplitz.sh
deleted file mode 100755
index 8ff172f7bb1b..000000000000
--- a/tools/testing/selftests/net/toeplitz.sh
+++ /dev/null
@@ -1,199 +0,0 @@
-#!/bin/bash
-# SPDX-License-Identifier: GPL-2.0
-#
-# extended toeplitz test: test rxhash plus, optionally, either (1) rss mapping
-# from rxhash to rx queue ('-rss') or (2) rps mapping from rxhash to cpu
-# ('-rps <rps_map>')
-#
-# irq-pattern-prefix can be derived from /sys/kernel/irq/*/action,
-# which is a driver-specific encoding.
-#
-# invoke as ./toeplitz.sh (-i <iface>) -u|-t -4|-6 \
-# [(-rss -irq_prefix <irq-pattern-prefix>)|(-rps <rps_map>)]
-
-source setup_loopback.sh
-readonly SERVER_IP4="192.168.1.200/24"
-readonly SERVER_IP6="fda8::1/64"
-readonly SERVER_MAC="aa:00:00:00:00:02"
-
-readonly CLIENT_IP4="192.168.1.100/24"
-readonly CLIENT_IP6="fda8::2/64"
-readonly CLIENT_MAC="aa:00:00:00:00:01"
-
-PORT=8000
-KEY="$(</proc/sys/net/core/netdev_rss_key)"
-TEST_RSS=false
-RPS_MAP=""
-PROTO_FLAG=""
-IP_FLAG=""
-DEV="eth0"
-
-# Return the number of rxqs among which RSS is configured to spread packets.
-# This is determined by reading the RSS indirection table using ethtool.
-get_rss_cfg_num_rxqs() {
- echo $(ethtool -x "${DEV}" |
- grep -E [[:space:]]+[0-9]+:[[:space:]]+ |
- cut -d: -f2- |
- awk '{$1=$1};1' |
- tr ' ' '\n' |
- sort -u |
- wc -l)
-}
-
-# Return a list of the receive irq handler cpus.
-# The list is ordered by the irqs, so first rxq-0 cpu, then rxq-1 cpu, etc.
-# Reads /sys/kernel/irq/ in order, so algorithm depends on
-# irq_{rxq-0} < irq_{rxq-1}, etc.
-get_rx_irq_cpus() {
- CPUS=""
- # sort so that irq 2 is read before irq 10
- SORTED_IRQS=$(for i in /sys/kernel/irq/*; do echo $i; done | sort -V)
- # Consider only as many queues as RSS actually uses. We assume that
- # if RSS_CFG_NUM_RXQS=N, then RSS uses rxqs 0-(N-1).
- RSS_CFG_NUM_RXQS=$(get_rss_cfg_num_rxqs)
- RXQ_COUNT=0
-
- for i in ${SORTED_IRQS}
- do
- [[ "${RXQ_COUNT}" -lt "${RSS_CFG_NUM_RXQS}" ]] || break
- # lookup relevant IRQs by action name
- [[ -e "$i/actions" ]] || continue
- cat "$i/actions" | grep -q "${IRQ_PATTERN}" || continue
- irqname=$(<"$i/actions")
-
- # does the IRQ get called
- irqcount=$(cat "$i/per_cpu_count" | tr -d '0,')
- [[ -n "${irqcount}" ]] || continue
-
- # lookup CPU
- irq=$(basename "$i")
- cpu=$(cat "/proc/irq/$irq/smp_affinity_list")
-
- if [[ -z "${CPUS}" ]]; then
- CPUS="${cpu}"
- else
- CPUS="${CPUS},${cpu}"
- fi
- RXQ_COUNT=$((RXQ_COUNT+1))
- done
-
- echo "${CPUS}"
-}
-
-get_disable_rfs_cmd() {
- echo "echo 0 > /proc/sys/net/core/rps_sock_flow_entries;"
-}
-
-get_set_rps_bitmaps_cmd() {
- CMD=""
- for i in /sys/class/net/${DEV}/queues/rx-*/rps_cpus
- do
- CMD="${CMD} echo $1 > ${i};"
- done
-
- echo "${CMD}"
-}
-
-get_disable_rps_cmd() {
- echo "$(get_set_rps_bitmaps_cmd 0)"
-}
-
-die() {
- echo "$1"
- exit 1
-}
-
-check_nic_rxhash_enabled() {
- local -r pattern="receive-hashing:\ on"
-
- ethtool -k "${DEV}" | grep -q "${pattern}" || die "rxhash must be enabled"
-}
-
-parse_opts() {
- local prog=$0
- shift 1
-
- while [[ "$1" =~ "-" ]]; do
- if [[ "$1" = "-irq_prefix" ]]; then
- shift
- IRQ_PATTERN="^$1-[0-9]*$"
- elif [[ "$1" = "-u" || "$1" = "-t" ]]; then
- PROTO_FLAG="$1"
- elif [[ "$1" = "-4" ]]; then
- IP_FLAG="$1"
- SERVER_IP="${SERVER_IP4}"
- CLIENT_IP="${CLIENT_IP4}"
- elif [[ "$1" = "-6" ]]; then
- IP_FLAG="$1"
- SERVER_IP="${SERVER_IP6}"
- CLIENT_IP="${CLIENT_IP6}"
- elif [[ "$1" = "-rss" ]]; then
- TEST_RSS=true
- elif [[ "$1" = "-rps" ]]; then
- shift
- RPS_MAP="$1"
- elif [[ "$1" = "-i" ]]; then
- shift
- DEV="$1"
- else
- die "Usage: ${prog} (-i <iface>) -u|-t -4|-6 \
- [(-rss -irq_prefix <irq-pattern-prefix>)|(-rps <rps_map>)]"
- fi
- shift
- done
-}
-
-setup() {
- setup_loopback_environment "${DEV}"
-
- # Set up server_ns namespace and client_ns namespace
- setup_macvlan_ns "${DEV}" $server_ns server \
- "${SERVER_MAC}" "${SERVER_IP}"
- setup_macvlan_ns "${DEV}" $client_ns client \
- "${CLIENT_MAC}" "${CLIENT_IP}"
-}
-
-cleanup() {
- cleanup_macvlan_ns $server_ns server $client_ns client
- cleanup_loopback "${DEV}"
-}
-
-parse_opts $0 $@
-
-setup
-trap cleanup EXIT
-
-check_nic_rxhash_enabled
-
-# Actual test starts here
-if [[ "${TEST_RSS}" = true ]]; then
- # RPS/RFS must be disabled because they move packets between cpus,
- # which breaks the PACKET_FANOUT_CPU identification of RSS decisions.
- eval "$(get_disable_rfs_cmd) $(get_disable_rps_cmd)" \
- ip netns exec $server_ns ./toeplitz "${IP_FLAG}" "${PROTO_FLAG}" \
- -d "${PORT}" -i "${DEV}" -k "${KEY}" -T 1000 \
- -C "$(get_rx_irq_cpus)" -s -v &
-elif [[ ! -z "${RPS_MAP}" ]]; then
- eval "$(get_disable_rfs_cmd) $(get_set_rps_bitmaps_cmd ${RPS_MAP})" \
- ip netns exec $server_ns ./toeplitz "${IP_FLAG}" "${PROTO_FLAG}" \
- -d "${PORT}" -i "${DEV}" -k "${KEY}" -T 1000 \
- -r "0x${RPS_MAP}" -s -v &
-else
- ip netns exec $server_ns ./toeplitz "${IP_FLAG}" "${PROTO_FLAG}" \
- -d "${PORT}" -i "${DEV}" -k "${KEY}" -T 1000 -s -v &
-fi
-
-server_pid=$!
-
-ip netns exec $client_ns ./toeplitz_client.sh "${PROTO_FLAG}" \
- "${IP_FLAG}" "${SERVER_IP%%/*}" "${PORT}" &
-
-client_pid=$!
-
-wait "${server_pid}"
-exit_code=$?
-kill -9 "${client_pid}"
-if [[ "${exit_code}" -eq 0 ]]; then
- echo "Test Succeeded!"
-fi
-exit "${exit_code}"
diff --git a/tools/testing/selftests/net/toeplitz_client.sh b/tools/testing/selftests/net/toeplitz_client.sh
deleted file mode 100755
index 2fef34f4aba1..000000000000
--- a/tools/testing/selftests/net/toeplitz_client.sh
+++ /dev/null
@@ -1,28 +0,0 @@
-#!/bin/bash
-# SPDX-License-Identifier: GPL-2.0
-#
-# A simple program for generating traffic for the toeplitz test.
-#
-# This program sends packets periodically for, conservatively, 20 seconds. The
-# intent is for the calling program to kill this program once it is no longer
-# needed, rather than waiting for the 20 second expiration.
-
-send_traffic() {
- expiration=$((SECONDS+20))
- while [[ "${SECONDS}" -lt "${expiration}" ]]
- do
- if [[ "${PROTO}" == "-u" ]]; then
- echo "msg $i" | nc "${IPVER}" -u -w 0 "${ADDR}" "${PORT}"
- else
- echo "msg $i" | nc "${IPVER}" -w 0 "${ADDR}" "${PORT}"
- fi
- sleep 0.001
- done
-}
-
-PROTO=$1
-IPVER=$2
-ADDR=$3
-PORT=$4
-
-send_traffic
diff --git a/tools/testing/selftests/net/traceroute.sh b/tools/testing/selftests/net/traceroute.sh
index dbb34c7e09ce..a7c6ab8a0347 100755
--- a/tools/testing/selftests/net/traceroute.sh
+++ b/tools/testing/selftests/net/traceroute.sh
@@ -36,6 +36,35 @@ run_cmd()
return $rc
}
+__check_traceroute_version()
+{
+ local cmd=$1; shift
+ local req_ver=$1; shift
+ local ver
+
+ req_ver=$(echo "$req_ver" | sed 's/\.//g')
+ ver=$($cmd -V 2>&1 | grep -Eo '[0-9]+.[0-9]+.[0-9]+' | sed 's/\.//g')
+ if [[ $ver -lt $req_ver ]]; then
+ return 1
+ else
+ return 0
+ fi
+}
+
+check_traceroute6_version()
+{
+ local req_ver=$1; shift
+
+ __check_traceroute_version traceroute6 "$req_ver"
+}
+
+check_traceroute_version()
+{
+ local req_ver=$1; shift
+
+ __check_traceroute_version traceroute "$req_ver"
+}
+
################################################################################
# create namespaces and interconnects
@@ -59,6 +88,8 @@ create_ns()
ip netns exec ${ns} ip -6 ro add unreachable default metric 8192
ip netns exec ${ns} sysctl -qw net.ipv4.ip_forward=1
+ ip netns exec ${ns} sysctl -qw net.ipv4.icmp_ratelimit=0
+ ip netns exec ${ns} sysctl -qw net.ipv6.icmp.ratelimit=0
ip netns exec ${ns} sysctl -qw net.ipv6.conf.all.keep_addr_on_down=1
ip netns exec ${ns} sysctl -qw net.ipv6.conf.all.forwarding=1
ip netns exec ${ns} sysctl -qw net.ipv6.conf.default.forwarding=1
@@ -298,6 +329,144 @@ run_traceroute6_vrf()
}
################################################################################
+# traceroute6 with ICMP extensions test
+#
+# Verify that in this scenario
+#
+# ---- ---- ----
+# |H1|--------------------------|R1|--------------------------|H2|
+# ---- N1 ---- N2 ----
+#
+# ICMP extensions are correctly reported. The loopback interfaces on all the
+# nodes are assigned global addresses and the interfaces connecting the nodes
+# are assigned IPv6 link-local addresses.
+
+cleanup_traceroute6_ext()
+{
+ cleanup_all_ns
+}
+
+setup_traceroute6_ext()
+{
+ # Start clean
+ cleanup_traceroute6_ext
+
+ setup_ns h1 r1 h2
+ create_ns "$h1"
+ create_ns "$r1"
+ create_ns "$h2"
+
+ # Setup N1
+ connect_ns "$h1" eth1 - fe80::1/64 "$r1" eth1 - fe80::2/64
+ # Setup N2
+ connect_ns "$r1" eth2 - fe80::3/64 "$h2" eth2 - fe80::4/64
+
+ # Setup H1
+ ip -n "$h1" address add 2001:db8:1::1/128 dev lo
+ ip -n "$h1" route add ::/0 nexthop via fe80::2 dev eth1
+
+ # Setup R1
+ ip -n "$r1" address add 2001:db8:1::2/128 dev lo
+ ip -n "$r1" route add 2001:db8:1::1/128 nexthop via fe80::1 dev eth1
+ ip -n "$r1" route add 2001:db8:1::3/128 nexthop via fe80::4 dev eth2
+
+ # Setup H2
+ ip -n "$h2" address add 2001:db8:1::3/128 dev lo
+ ip -n "$h2" route add ::/0 nexthop via fe80::3 dev eth2
+
+ # Prime the network
+ ip netns exec "$h1" ping6 -c5 2001:db8:1::3 >/dev/null 2>&1
+}
+
+traceroute6_ext_iio_iif_test()
+{
+ local r1_ifindex h2_ifindex
+ local pkt_len=$1; shift
+
+ # Test that incoming interface info is not appended by default.
+ run_cmd "$h1" "traceroute6 -e 2001:db8:1::3 $pkt_len | grep INC"
+ check_fail $? "Incoming interface info appended by default when should not"
+
+ # Test that the extension is appended when enabled.
+ run_cmd "$r1" "bash -c \"echo 0x01 > /proc/sys/net/ipv6/icmp/errors_extension_mask\""
+ check_err $? "Failed to enable incoming interface info extension on R1"
+
+ run_cmd "$h1" "traceroute6 -e 2001:db8:1::3 $pkt_len | grep INC"
+ check_err $? "Incoming interface info not appended after enable"
+
+ # Test that the extension is not appended when disabled.
+ run_cmd "$r1" "bash -c \"echo 0x00 > /proc/sys/net/ipv6/icmp/errors_extension_mask\""
+ check_err $? "Failed to disable incoming interface info extension on R1"
+
+ run_cmd "$h1" "traceroute6 -e 2001:db8:1::3 $pkt_len | grep INC"
+ check_fail $? "Incoming interface info appended after disable"
+
+ # Test that the extension is sent correctly from both R1 and H2.
+ run_cmd "$r1" "sysctl -w net.ipv6.icmp.errors_extension_mask=0x01"
+ r1_ifindex=$(ip -n "$r1" -j link show dev eth1 | jq '.[]["ifindex"]')
+ run_cmd "$h1" "traceroute6 -e 2001:db8:1::3 $pkt_len | grep '<INC:$r1_ifindex,\"eth1\",mtu=1500>'"
+ check_err $? "Wrong incoming interface info reported from R1"
+
+ run_cmd "$h2" "sysctl -w net.ipv6.icmp.errors_extension_mask=0x01"
+ h2_ifindex=$(ip -n "$h2" -j link show dev eth2 | jq '.[]["ifindex"]')
+ run_cmd "$h1" "traceroute6 -e 2001:db8:1::3 $pkt_len | grep '<INC:$h2_ifindex,\"eth2\",mtu=1500>'"
+ check_err $? "Wrong incoming interface info reported from H2"
+
+ # Add a global address on the incoming interface of R1 and check that
+ # it is reported.
+ run_cmd "$r1" "ip address add 2001:db8:100::1/64 dev eth1 nodad"
+ run_cmd "$h1" "traceroute6 -e 2001:db8:1::3 $pkt_len | grep '<INC:$r1_ifindex,2001:db8:100::1,\"eth1\",mtu=1500>'"
+ check_err $? "Wrong incoming interface info reported from R1 after address addition"
+ run_cmd "$r1" "ip address del 2001:db8:100::1/64 dev eth1"
+
+ # Change name and MTU and make sure the result is still correct.
+ run_cmd "$r1" "ip link set dev eth1 name eth1tag mtu 1501"
+ run_cmd "$h1" "traceroute6 -e 2001:db8:1::3 $pkt_len | grep '<INC:$r1_ifindex,\"eth1tag\",mtu=1501>'"
+ check_err $? "Wrong incoming interface info reported from R1 after name and MTU change"
+ run_cmd "$r1" "ip link set dev eth1tag name eth1 mtu 1500"
+
+ run_cmd "$r1" "sysctl -w net.ipv6.icmp.errors_extension_mask=0x00"
+ run_cmd "$h2" "sysctl -w net.ipv6.icmp.errors_extension_mask=0x00"
+}
+
+run_traceroute6_ext()
+{
+ # Need at least version 2.1.5 for RFC 5837 support.
+ if ! check_traceroute6_version 2.1.5; then
+ log_test_skip "traceroute6 too old, missing ICMP extensions support"
+ return
+ fi
+
+ setup_traceroute6_ext
+
+ RET=0
+
+ ## General ICMP extensions tests
+
+ # Test that ICMP extensions are disabled by default.
+ run_cmd "$h1" "sysctl net.ipv6.icmp.errors_extension_mask | grep \"= 0$\""
+ check_err $? "ICMP extensions are not disabled by default"
+
+ # Test that unsupported values are rejected. Do not use "sysctl" as
+ # older versions do not return an error code upon failure.
+ run_cmd "$h1" "bash -c \"echo 0x80 > /proc/sys/net/ipv6/icmp/errors_extension_mask\""
+ check_fail $? "Unsupported sysctl value was not rejected"
+
+ ## Extension-specific tests
+
+ # Incoming interface info test. Test with various packet sizes,
+ # including the default one.
+ traceroute6_ext_iio_iif_test
+ traceroute6_ext_iio_iif_test 127
+ traceroute6_ext_iio_iif_test 128
+ traceroute6_ext_iio_iif_test 129
+
+ log_test "IPv6 traceroute with ICMP extensions"
+
+ cleanup_traceroute6_ext
+}
+
+################################################################################
# traceroute test
#
# Verify that traceroute from H1 to H2 shows 1.0.3.1 and 1.0.1.1 when
@@ -438,14 +607,157 @@ run_traceroute_vrf()
}
################################################################################
+# traceroute with ICMP extensions test
+#
+# Verify that in this scenario
+#
+# ---- ---- ----
+# |H1|--------------------------|R1|--------------------------|H2|
+# ---- N1 ---- N2 ----
+#
+# ICMP extensions are correctly reported. The loopback interfaces on all the
+# nodes are assigned global addresses and the interfaces connecting the nodes
+# are assigned IPv6 link-local addresses.
+
+cleanup_traceroute_ext()
+{
+ cleanup_all_ns
+}
+
+setup_traceroute_ext()
+{
+ # Start clean
+ cleanup_traceroute_ext
+
+ setup_ns h1 r1 h2
+ create_ns "$h1"
+ create_ns "$r1"
+ create_ns "$h2"
+
+ # Setup N1
+ connect_ns "$h1" eth1 - fe80::1/64 "$r1" eth1 - fe80::2/64
+ # Setup N2
+ connect_ns "$r1" eth2 - fe80::3/64 "$h2" eth2 - fe80::4/64
+
+ # Setup H1
+ ip -n "$h1" address add 192.0.2.1/32 dev lo
+ ip -n "$h1" route add 0.0.0.0/0 nexthop via inet6 fe80::2 dev eth1
+
+ # Setup R1
+ ip -n "$r1" address add 192.0.2.2/32 dev lo
+ ip -n "$r1" route add 192.0.2.1/32 nexthop via inet6 fe80::1 dev eth1
+ ip -n "$r1" route add 192.0.2.3/32 nexthop via inet6 fe80::4 dev eth2
+
+ # Setup H2
+ ip -n "$h2" address add 192.0.2.3/32 dev lo
+ ip -n "$h2" route add 0.0.0.0/0 nexthop via inet6 fe80::3 dev eth2
+
+ # Prime the network
+ ip netns exec "$h1" ping -c5 192.0.2.3 >/dev/null 2>&1
+}
+
+traceroute_ext_iio_iif_test()
+{
+ local r1_ifindex h2_ifindex
+ local pkt_len=$1; shift
+
+ # Test that incoming interface info is not appended by default.
+ run_cmd "$h1" "traceroute -e 192.0.2.3 $pkt_len | grep INC"
+ check_fail $? "Incoming interface info appended by default when should not"
+
+ # Test that the extension is appended when enabled.
+ run_cmd "$r1" "bash -c \"echo 0x01 > /proc/sys/net/ipv4/icmp_errors_extension_mask\""
+ check_err $? "Failed to enable incoming interface info extension on R1"
+
+ run_cmd "$h1" "traceroute -e 192.0.2.3 $pkt_len | grep INC"
+ check_err $? "Incoming interface info not appended after enable"
+
+ # Test that the extension is not appended when disabled.
+ run_cmd "$r1" "bash -c \"echo 0x00 > /proc/sys/net/ipv4/icmp_errors_extension_mask\""
+ check_err $? "Failed to disable incoming interface info extension on R1"
+
+ run_cmd "$h1" "traceroute -e 192.0.2.3 $pkt_len | grep INC"
+ check_fail $? "Incoming interface info appended after disable"
+
+ # Test that the extension is sent correctly from both R1 and H2.
+ run_cmd "$r1" "sysctl -w net.ipv4.icmp_errors_extension_mask=0x01"
+ r1_ifindex=$(ip -n "$r1" -j link show dev eth1 | jq '.[]["ifindex"]')
+ run_cmd "$h1" "traceroute -e 192.0.2.3 $pkt_len | grep '<INC:$r1_ifindex,\"eth1\",mtu=1500>'"
+ check_err $? "Wrong incoming interface info reported from R1"
+
+ run_cmd "$h2" "sysctl -w net.ipv4.icmp_errors_extension_mask=0x01"
+ h2_ifindex=$(ip -n "$h2" -j link show dev eth2 | jq '.[]["ifindex"]')
+ run_cmd "$h1" "traceroute -e 192.0.2.3 $pkt_len | grep '<INC:$h2_ifindex,\"eth2\",mtu=1500>'"
+ check_err $? "Wrong incoming interface info reported from H2"
+
+ # Add a global address on the incoming interface of R1 and check that
+ # it is reported.
+ run_cmd "$r1" "ip address add 198.51.100.1/24 dev eth1"
+ run_cmd "$h1" "traceroute -e 192.0.2.3 $pkt_len | grep '<INC:$r1_ifindex,198.51.100.1,\"eth1\",mtu=1500>'"
+ check_err $? "Wrong incoming interface info reported from R1 after address addition"
+ run_cmd "$r1" "ip address del 198.51.100.1/24 dev eth1"
+
+ # Change name and MTU and make sure the result is still correct.
+ # Re-add the route towards H1 since it was deleted when we removed the
+ # last IPv4 address from eth1 on R1.
+ run_cmd "$r1" "ip route add 192.0.2.1/32 nexthop via inet6 fe80::1 dev eth1"
+ run_cmd "$r1" "ip link set dev eth1 name eth1tag mtu 1501"
+ run_cmd "$h1" "traceroute -e 192.0.2.3 $pkt_len | grep '<INC:$r1_ifindex,\"eth1tag\",mtu=1501>'"
+ check_err $? "Wrong incoming interface info reported from R1 after name and MTU change"
+ run_cmd "$r1" "ip link set dev eth1tag name eth1 mtu 1500"
+
+ run_cmd "$r1" "sysctl -w net.ipv4.icmp_errors_extension_mask=0x00"
+ run_cmd "$h2" "sysctl -w net.ipv4.icmp_errors_extension_mask=0x00"
+}
+
+run_traceroute_ext()
+{
+ # Need at least version 2.1.5 for RFC 5837 support.
+ if ! check_traceroute_version 2.1.5; then
+ log_test_skip "traceroute too old, missing ICMP extensions support"
+ return
+ fi
+
+ setup_traceroute_ext
+
+ RET=0
+
+ ## General ICMP extensions tests
+
+ # Test that ICMP extensions are disabled by default.
+ run_cmd "$h1" "sysctl net.ipv4.icmp_errors_extension_mask | grep \"= 0$\""
+ check_err $? "ICMP extensions are not disabled by default"
+
+ # Test that unsupported values are rejected. Do not use "sysctl" as
+ # older versions do not return an error code upon failure.
+ run_cmd "$h1" "bash -c \"echo 0x80 > /proc/sys/net/ipv4/icmp_errors_extension_mask\""
+ check_fail $? "Unsupported sysctl value was not rejected"
+
+ ## Extension-specific tests
+
+ # Incoming interface info test. Test with various packet sizes,
+ # including the default one.
+ traceroute_ext_iio_iif_test
+ traceroute_ext_iio_iif_test 127
+ traceroute_ext_iio_iif_test 128
+ traceroute_ext_iio_iif_test 129
+
+ log_test "IPv4 traceroute with ICMP extensions"
+
+ cleanup_traceroute_ext
+}
+
+################################################################################
# Run tests
run_tests()
{
run_traceroute6
run_traceroute6_vrf
+ run_traceroute6_ext
run_traceroute
run_traceroute_vrf
+ run_traceroute_ext
}
################################################################################
@@ -462,6 +774,7 @@ done
require_command traceroute6
require_command traceroute
+require_command jq
run_tests
diff --git a/tools/testing/selftests/net/txtimestamp.c b/tools/testing/selftests/net/txtimestamp.c
index dae91eb97d69..bcc14688661d 100644
--- a/tools/testing/selftests/net/txtimestamp.c
+++ b/tools/testing/selftests/net/txtimestamp.c
@@ -217,7 +217,7 @@ static void print_timestamp_usr(void)
static void print_timestamp(struct scm_timestamping *tss, int tstype,
int tskey, int payload_len)
{
- const char *tsname;
+ const char *tsname = NULL;
validate_key(tskey, tstype);
diff --git a/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json b/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json
index 0091bcd91c2c..47de27fd4f90 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json
@@ -1005,5 +1005,33 @@
"teardown": [
"$TC qdisc del dev $DUMMY clsact"
]
+ },
+ {
+ "id": "4366",
+ "name": "CAKE with QFQ Parent - CAKE enqueue with packets dropping",
+ "category": [
+ "qdisc",
+ "cake",
+ "netem"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup":[
+ "$TC qdisc add dev $DUMMY handle 1: root qfq",
+ "$TC class add dev $DUMMY parent 1: classid 1:1 qfq maxpkt 1024",
+ "$TC qdisc add dev $DUMMY parent 1:1 handle 2: cake memlimit 9",
+ "$TC filter add dev $DUMMY protocol ip parent 1: prio 1 u32 match ip protocol 1 0xff flowid 1:1",
+ "ping -I$DUMMY -f -c1 -s64 -W1 10.10.10.1 || true",
+ "$TC qdisc replace dev $DUMMY parent 1:1 handle 3: netem delay 0ms"
+ ],
+ "cmdUnderTest": "ping -I$DUMMY -f -c1 -s64 -W1 10.10.10.1 || true",
+ "expExitCode": "0",
+ "verifyCmd": "$TC -s qdisc show dev $DUMMY",
+ "matchPattern": "qdisc qfq 1:",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
}
]
diff --git a/tools/testing/selftests/ublk/kublk.c b/tools/testing/selftests/ublk/kublk.c
index 6b8123c12a7a..f8fa102a627f 100644
--- a/tools/testing/selftests/ublk/kublk.c
+++ b/tools/testing/selftests/ublk/kublk.c
@@ -836,56 +836,70 @@ static int ublk_process_io(struct ublk_thread *t)
return reapped;
}
-static void ublk_thread_set_sched_affinity(const struct ublk_thread *t,
- cpu_set_t *cpuset)
-{
- if (sched_setaffinity(0, sizeof(*cpuset), cpuset) < 0)
- ublk_err("ublk dev %u thread %u set affinity failed",
- t->dev->dev_info.dev_id, t->idx);
-}
-
struct ublk_thread_info {
struct ublk_dev *dev;
+ pthread_t thread;
unsigned idx;
sem_t *ready;
cpu_set_t *affinity;
unsigned long long extra_flags;
};
-static void *ublk_io_handler_fn(void *data)
+static void ublk_thread_set_sched_affinity(const struct ublk_thread_info *info)
{
- struct ublk_thread_info *info = data;
- struct ublk_thread *t = &info->dev->threads[info->idx];
+ if (pthread_setaffinity_np(pthread_self(), sizeof(*info->affinity), info->affinity) < 0)
+ ublk_err("ublk dev %u thread %u set affinity failed",
+ info->dev->dev_info.dev_id, info->idx);
+}
+
+static __attribute__((noinline)) int __ublk_io_handler_fn(struct ublk_thread_info *info)
+{
+ struct ublk_thread t = {
+ .dev = info->dev,
+ .idx = info->idx,
+ };
int dev_id = info->dev->dev_info.dev_id;
int ret;
- t->dev = info->dev;
- t->idx = info->idx;
-
- ret = ublk_thread_init(t, info->extra_flags);
+ ret = ublk_thread_init(&t, info->extra_flags);
if (ret) {
ublk_err("ublk dev %d thread %u init failed\n",
- dev_id, t->idx);
- return NULL;
+ dev_id, t.idx);
+ return ret;
}
- /* IO perf is sensitive with queue pthread affinity on NUMA machine*/
- if (info->affinity)
- ublk_thread_set_sched_affinity(t, info->affinity);
sem_post(info->ready);
ublk_dbg(UBLK_DBG_THREAD, "tid %d: ublk dev %d thread %u started\n",
- gettid(), dev_id, t->idx);
+ gettid(), dev_id, t.idx);
/* submit all io commands to ublk driver */
- ublk_submit_fetch_commands(t);
+ ublk_submit_fetch_commands(&t);
do {
- if (ublk_process_io(t) < 0)
+ if (ublk_process_io(&t) < 0)
break;
} while (1);
ublk_dbg(UBLK_DBG_THREAD, "tid %d: ublk dev %d thread %d exiting\n",
- gettid(), dev_id, t->idx);
- ublk_thread_deinit(t);
+ gettid(), dev_id, t.idx);
+ ublk_thread_deinit(&t);
+ return 0;
+}
+
+static void *ublk_io_handler_fn(void *data)
+{
+ struct ublk_thread_info *info = data;
+
+ /*
+ * IO perf is sensitive with queue pthread affinity on NUMA machine
+ *
+ * Set sched_affinity at beginning, so following allocated memory/pages
+ * could be CPU/NUMA aware.
+ */
+ if (info->affinity)
+ ublk_thread_set_sched_affinity(info);
+
+ __ublk_io_handler_fn(info);
+
return NULL;
}
@@ -983,14 +997,13 @@ static int ublk_start_daemon(const struct dev_ctx *ctx, struct ublk_dev *dev)
*/
if (dev->nthreads == dinfo->nr_hw_queues)
tinfo[i].affinity = &affinity_buf[i];
- pthread_create(&dev->threads[i].thread, NULL,
+ pthread_create(&tinfo[i].thread, NULL,
ublk_io_handler_fn,
&tinfo[i]);
}
for (i = 0; i < dev->nthreads; i++)
sem_wait(&ready);
- free(tinfo);
free(affinity_buf);
/* everything is fine now, start us */
@@ -1013,7 +1026,8 @@ static int ublk_start_daemon(const struct dev_ctx *ctx, struct ublk_dev *dev)
/* wait until we are terminated */
for (i = 0; i < dev->nthreads; i++)
- pthread_join(dev->threads[i].thread, &thread_ret);
+ pthread_join(tinfo[i].thread, &thread_ret);
+ free(tinfo);
fail:
for (i = 0; i < dinfo->nr_hw_queues; i++)
ublk_queue_deinit(&dev->q[i]);
diff --git a/tools/testing/selftests/ublk/kublk.h b/tools/testing/selftests/ublk/kublk.h
index 5e55484fb0aa..fe42705c6d42 100644
--- a/tools/testing/selftests/ublk/kublk.h
+++ b/tools/testing/selftests/ublk/kublk.h
@@ -175,23 +175,20 @@ struct ublk_queue {
struct ublk_thread {
struct ublk_dev *dev;
- struct io_uring ring;
- unsigned int cmd_inflight;
- unsigned int io_inflight;
-
- pthread_t thread;
unsigned idx;
#define UBLKS_T_STOPPING (1U << 0)
#define UBLKS_T_IDLE (1U << 1)
unsigned state;
+ unsigned int cmd_inflight;
+ unsigned int io_inflight;
+ struct io_uring ring;
};
struct ublk_dev {
struct ublk_tgt tgt;
struct ublksrv_ctrl_dev_info dev_info;
struct ublk_queue q[UBLK_MAX_QUEUES];
- struct ublk_thread threads[UBLK_MAX_THREADS];
unsigned nthreads;
unsigned per_io_tasks;
diff --git a/tools/testing/selftests/vsock/vmtest.sh b/tools/testing/selftests/vsock/vmtest.sh
index 8ceeb8a7894f..c7b270dd77a9 100755
--- a/tools/testing/selftests/vsock/vmtest.sh
+++ b/tools/testing/selftests/vsock/vmtest.sh
@@ -7,6 +7,8 @@
# * virtme-ng
# * busybox-static (used by virtme-ng)
# * qemu (used by virtme-ng)
+#
+# shellcheck disable=SC2317,SC2119
readonly SCRIPT_DIR="$(cd -P -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd -P)"
readonly KERNEL_CHECKOUT=$(realpath "${SCRIPT_DIR}"/../../../../)
@@ -22,8 +24,9 @@ readonly SSH_HOST_PORT=2222
readonly VSOCK_CID=1234
readonly WAIT_PERIOD=3
readonly WAIT_PERIOD_MAX=60
-readonly WAIT_TOTAL=$(( WAIT_PERIOD * WAIT_PERIOD_MAX ))
-readonly QEMU_PIDFILE=$(mktemp /tmp/qemu_vsock_vmtest_XXXX.pid)
+readonly WAIT_QEMU=5
+readonly PIDFILE_TEMPLATE=/tmp/vsock_vmtest_XXXX.pid
+declare -A PIDFILES
# virtme-ng offers a netdev for ssh when using "--ssh", but we also need a
# control port forwarded for vsock_test. Because virtme-ng doesn't support
@@ -33,12 +36,6 @@ readonly QEMU_PIDFILE=$(mktemp /tmp/qemu_vsock_vmtest_XXXX.pid)
# add the kernel cmdline options that virtme-init uses to setup the interface.
readonly QEMU_TEST_PORT_FWD="hostfwd=tcp::${TEST_HOST_PORT}-:${TEST_GUEST_PORT}"
readonly QEMU_SSH_PORT_FWD="hostfwd=tcp::${SSH_HOST_PORT}-:${SSH_GUEST_PORT}"
-readonly QEMU_OPTS="\
- -netdev user,id=n0,${QEMU_TEST_PORT_FWD},${QEMU_SSH_PORT_FWD} \
- -device virtio-net-pci,netdev=n0 \
- -device vhost-vsock-pci,guest-cid=${VSOCK_CID} \
- --pidfile ${QEMU_PIDFILE} \
-"
readonly KERNEL_CMDLINE="\
virtme.dhcp net.ifnames=0 biosdevname=0 \
virtme.ssh virtme_ssh_channel=tcp virtme_ssh_user=$USER \
@@ -51,6 +48,8 @@ readonly TEST_DESCS=(
"Run vsock_test using the loopback transport in the VM."
)
+readonly USE_SHARED_VM=(vm_server_host_client vm_client_host_server vm_loopback)
+
VERBOSE=0
usage() {
@@ -84,21 +83,33 @@ die() {
exit "${KSFT_FAIL}"
}
+check_result() {
+ local rc arg
+
+ rc=$1
+ arg=$2
+
+ cnt_total=$(( cnt_total + 1 ))
+
+ if [[ ${rc} -eq ${KSFT_PASS} ]]; then
+ cnt_pass=$(( cnt_pass + 1 ))
+ echo "ok ${cnt_total} ${arg}"
+ elif [[ ${rc} -eq ${KSFT_SKIP} ]]; then
+ cnt_skip=$(( cnt_skip + 1 ))
+ echo "ok ${cnt_total} ${arg} # SKIP"
+ elif [[ ${rc} -eq ${KSFT_FAIL} ]]; then
+ cnt_fail=$(( cnt_fail + 1 ))
+ echo "not ok ${cnt_total} ${arg} # exit=${rc}"
+ fi
+}
+
vm_ssh() {
ssh -q -o UserKnownHostsFile=/dev/null -p ${SSH_HOST_PORT} localhost "$@"
return $?
}
cleanup() {
- if [[ -s "${QEMU_PIDFILE}" ]]; then
- pkill -SIGTERM -F "${QEMU_PIDFILE}" > /dev/null 2>&1
- fi
-
- # If failure occurred during or before qemu start up, then we need
- # to clean this up ourselves.
- if [[ -e "${QEMU_PIDFILE}" ]]; then
- rm "${QEMU_PIDFILE}"
- fi
+ terminate_pidfiles "${!PIDFILES[@]}"
}
check_args() {
@@ -147,7 +158,7 @@ check_vng() {
local version
local ok
- tested_versions=("1.33" "1.36")
+ tested_versions=("1.33" "1.36" "1.37")
version="$(vng --version)"
ok=0
@@ -188,10 +199,37 @@ handle_build() {
popd &>/dev/null
}
+create_pidfile() {
+ local pidfile
+
+ pidfile=$(mktemp "${PIDFILE_TEMPLATE}")
+ PIDFILES["${pidfile}"]=1
+
+ echo "${pidfile}"
+}
+
+terminate_pidfiles() {
+ local pidfile
+
+ for pidfile in "$@"; do
+ if [[ -s "${pidfile}" ]]; then
+ pkill -SIGTERM -F "${pidfile}" > /dev/null 2>&1
+ fi
+
+ if [[ -e "${pidfile}" ]]; then
+ rm -f "${pidfile}"
+ fi
+
+ unset "PIDFILES[${pidfile}]"
+ done
+}
+
vm_start() {
+ local pidfile=$1
local logfile=/dev/null
local verbose_opt=""
local kernel_opt=""
+ local qemu_opts=""
local qemu
qemu=$(command -v "${QEMU}")
@@ -201,6 +239,13 @@ vm_start() {
logfile=/dev/stdout
fi
+ qemu_opts="\
+ -netdev user,id=n0,${QEMU_TEST_PORT_FWD},${QEMU_SSH_PORT_FWD} \
+ -device virtio-net-pci,netdev=n0 \
+ -device vhost-vsock-pci,guest-cid=${VSOCK_CID} \
+ --pidfile ${pidfile}
+ "
+
if [[ "${BUILD}" -eq 1 ]]; then
kernel_opt="${KERNEL_CHECKOUT}"
fi
@@ -209,16 +254,14 @@ vm_start() {
--run \
${kernel_opt} \
${verbose_opt} \
- --qemu-opts="${QEMU_OPTS}" \
+ --qemu-opts="${qemu_opts}" \
--qemu="${qemu}" \
--user root \
--append "${KERNEL_CMDLINE}" \
--rw &> ${logfile} &
- if ! timeout ${WAIT_TOTAL} \
- bash -c 'while [[ ! -s '"${QEMU_PIDFILE}"' ]]; do sleep 1; done; exit 0'; then
- die "failed to boot VM"
- fi
+ timeout "${WAIT_QEMU}" \
+ bash -c 'while [[ ! -s '"${pidfile}"' ]]; do sleep 1; done; exit 0'
}
vm_wait_for_ssh() {
@@ -251,9 +294,11 @@ wait_for_listener()
# for tcp protocol additionally check the socket state
[ "${protocol}" = "tcp" ] && pattern="${pattern}0A"
+
for i in $(seq "${max_intervals}"); do
- if awk '{print $2" "$4}' /proc/net/"${protocol}"* | \
- grep -q "${pattern}"; then
+ if awk -v pattern="${pattern}" \
+ 'BEGIN {rc=1} $2" "$4 ~ pattern {rc=0} END {exit rc}' \
+ /proc/net/"${protocol}"*; then
break
fi
sleep "${interval}"
@@ -270,113 +315,196 @@ EOF
}
host_wait_for_listener() {
- wait_for_listener "${TEST_HOST_PORT_LISTENER}" "${WAIT_PERIOD}" "${WAIT_PERIOD_MAX}"
+ local port=$1
+
+ wait_for_listener "${port}" "${WAIT_PERIOD}" "${WAIT_PERIOD_MAX}"
}
-__log_stdin() {
- cat | awk '{ printf "%s:\t%s\n","'"${prefix}"'", $0 }'
+vm_vsock_test() {
+ local host=$1
+ local cid=$2
+ local port=$3
+ local rc
+
+ # log output and use pipefail to respect vsock_test errors
+ set -o pipefail
+ if [[ "${host}" != server ]]; then
+ vm_ssh -- "${VSOCK_TEST}" \
+ --mode=client \
+ --control-host="${host}" \
+ --peer-cid="${cid}" \
+ --control-port="${port}" \
+ 2>&1 | log_guest
+ rc=$?
+ else
+ vm_ssh -- "${VSOCK_TEST}" \
+ --mode=server \
+ --peer-cid="${cid}" \
+ --control-port="${port}" \
+ 2>&1 | log_guest &
+ rc=$?
+
+ if [[ $rc -ne 0 ]]; then
+ set +o pipefail
+ return $rc
+ fi
+
+ vm_wait_for_listener "${port}"
+ rc=$?
+ fi
+ set +o pipefail
+
+ return $rc
}
-__log_args() {
- echo "$*" | awk '{ printf "%s:\t%s\n","'"${prefix}"'", $0 }'
+host_vsock_test() {
+ local host=$1
+ local cid=$2
+ local port=$3
+ local rc
+
+ # log output and use pipefail to respect vsock_test errors
+ set -o pipefail
+ if [[ "${host}" != server ]]; then
+ ${VSOCK_TEST} \
+ --mode=client \
+ --peer-cid="${cid}" \
+ --control-host="${host}" \
+ --control-port="${port}" 2>&1 | log_host
+ rc=$?
+ else
+ ${VSOCK_TEST} \
+ --mode=server \
+ --peer-cid="${cid}" \
+ --control-port="${port}" 2>&1 | log_host &
+ rc=$?
+
+ if [[ $rc -ne 0 ]]; then
+ set +o pipefail
+ return $rc
+ fi
+
+ host_wait_for_listener "${port}"
+ rc=$?
+ fi
+ set +o pipefail
+
+ return $rc
}
log() {
- local prefix="$1"
+ local redirect
+ local prefix
- shift
- local redirect=
if [[ ${VERBOSE} -eq 0 ]]; then
redirect=/dev/null
else
redirect=/dev/stdout
fi
+ prefix="${LOG_PREFIX:-}"
+
if [[ "$#" -eq 0 ]]; then
- __log_stdin | tee -a "${LOG}" > ${redirect}
+ if [[ -n "${prefix}" ]]; then
+ awk -v prefix="${prefix}" '{printf "%s: %s\n", prefix, $0}'
+ else
+ cat
+ fi
else
- __log_args "$@" | tee -a "${LOG}" > ${redirect}
- fi
-}
-
-log_setup() {
- log "setup" "$@"
+ if [[ -n "${prefix}" ]]; then
+ echo "${prefix}: " "$@"
+ else
+ echo "$@"
+ fi
+ fi | tee -a "${LOG}" > "${redirect}"
}
log_host() {
- local testname=$1
-
- shift
- log "test:${testname}:host" "$@"
+ LOG_PREFIX=host log "$@"
}
log_guest() {
- local testname=$1
-
- shift
- log "test:${testname}:guest" "$@"
+ LOG_PREFIX=guest log "$@"
}
test_vm_server_host_client() {
- local testname="${FUNCNAME[0]#test_}"
+ if ! vm_vsock_test "server" 2 "${TEST_GUEST_PORT}"; then
+ return "${KSFT_FAIL}"
+ fi
- vm_ssh -- "${VSOCK_TEST}" \
- --mode=server \
- --control-port="${TEST_GUEST_PORT}" \
- --peer-cid=2 \
- 2>&1 | log_guest "${testname}" &
+ if ! host_vsock_test "127.0.0.1" "${VSOCK_CID}" "${TEST_HOST_PORT}"; then
+ return "${KSFT_FAIL}"
+ fi
- vm_wait_for_listener "${TEST_GUEST_PORT}"
+ return "${KSFT_PASS}"
+}
- ${VSOCK_TEST} \
- --mode=client \
- --control-host=127.0.0.1 \
- --peer-cid="${VSOCK_CID}" \
- --control-port="${TEST_HOST_PORT}" 2>&1 | log_host "${testname}"
+test_vm_client_host_server() {
+ if ! host_vsock_test "server" "${VSOCK_CID}" "${TEST_HOST_PORT_LISTENER}"; then
+ return "${KSFT_FAIL}"
+ fi
- return $?
+ if ! vm_vsock_test "10.0.2.2" 2 "${TEST_HOST_PORT_LISTENER}"; then
+ return "${KSFT_FAIL}"
+ fi
+
+ return "${KSFT_PASS}"
}
-test_vm_client_host_server() {
- local testname="${FUNCNAME[0]#test_}"
+test_vm_loopback() {
+ local port=60000 # non-forwarded local port
- ${VSOCK_TEST} \
- --mode "server" \
- --control-port "${TEST_HOST_PORT_LISTENER}" \
- --peer-cid "${VSOCK_CID}" 2>&1 | log_host "${testname}" &
+ vm_ssh -- modprobe vsock_loopback &> /dev/null || :
- host_wait_for_listener
+ if ! vm_vsock_test "server" 1 "${port}"; then
+ return "${KSFT_FAIL}"
+ fi
- vm_ssh -- "${VSOCK_TEST}" \
- --mode=client \
- --control-host=10.0.2.2 \
- --peer-cid=2 \
- --control-port="${TEST_HOST_PORT_LISTENER}" 2>&1 | log_guest "${testname}"
+ if ! vm_vsock_test "127.0.0.1" 1 "${port}"; then
+ return "${KSFT_FAIL}"
+ fi
- return $?
+ return "${KSFT_PASS}"
}
-test_vm_loopback() {
- local testname="${FUNCNAME[0]#test_}"
- local port=60000 # non-forwarded local port
+shared_vm_test() {
+ local tname
+
+ tname="${1}"
+
+ for testname in "${USE_SHARED_VM[@]}"; do
+ if [[ "${tname}" == "${testname}" ]]; then
+ return 0
+ fi
+ done
- vm_ssh -- "${VSOCK_TEST}" \
- --mode=server \
- --control-port="${port}" \
- --peer-cid=1 2>&1 | log_guest "${testname}" &
+ return 1
+}
- vm_wait_for_listener "${port}"
+shared_vm_tests_requested() {
+ for arg in "$@"; do
+ if shared_vm_test "${arg}"; then
+ return 0
+ fi
+ done
- vm_ssh -- "${VSOCK_TEST}" \
- --mode=client \
- --control-host="127.0.0.1" \
- --control-port="${port}" \
- --peer-cid=1 2>&1 | log_guest "${testname}"
+ return 1
+}
- return $?
+run_shared_vm_tests() {
+ local arg
+
+ for arg in "$@"; do
+ if ! shared_vm_test "${arg}"; then
+ continue
+ fi
+
+ run_shared_vm_test "${arg}"
+ check_result "$?" "${arg}"
+ done
}
-run_test() {
+run_shared_vm_test() {
local host_oops_cnt_before
local host_warn_cnt_before
local vm_oops_cnt_before
@@ -399,31 +527,32 @@ run_test() {
host_oops_cnt_after=$(dmesg | grep -i 'Oops' | wc -l)
if [[ ${host_oops_cnt_after} -gt ${host_oops_cnt_before} ]]; then
- echo "FAIL: kernel oops detected on host" | log_host "${name}"
+ echo "FAIL: kernel oops detected on host" | log_host
rc=$KSFT_FAIL
fi
host_warn_cnt_after=$(dmesg --level=warn | grep -c -i 'vsock')
if [[ ${host_warn_cnt_after} -gt ${host_warn_cnt_before} ]]; then
- echo "FAIL: kernel warning detected on host" | log_host "${name}"
+ echo "FAIL: kernel warning detected on host" | log_host
rc=$KSFT_FAIL
fi
vm_oops_cnt_after=$(vm_ssh -- dmesg | grep -i 'Oops' | wc -l)
if [[ ${vm_oops_cnt_after} -gt ${vm_oops_cnt_before} ]]; then
- echo "FAIL: kernel oops detected on vm" | log_host "${name}"
+ echo "FAIL: kernel oops detected on vm" | log_host
rc=$KSFT_FAIL
fi
vm_warn_cnt_after=$(vm_ssh -- dmesg --level=warn | grep -c -i 'vsock')
if [[ ${vm_warn_cnt_after} -gt ${vm_warn_cnt_before} ]]; then
- echo "FAIL: kernel warning detected on vm" | log_host "${name}"
+ echo "FAIL: kernel warning detected on vm" | log_host
rc=$KSFT_FAIL
fi
return "${rc}"
}
+BUILD=0
QEMU="qemu-system-$(uname -m)"
while getopts :hvsq:b o
@@ -452,30 +581,21 @@ handle_build
echo "1..${#ARGS[@]}"
-log_setup "Booting up VM"
-vm_start
-vm_wait_for_ssh
-log_setup "VM booted up"
-
cnt_pass=0
cnt_fail=0
cnt_skip=0
cnt_total=0
-for arg in "${ARGS[@]}"; do
- run_test "${arg}"
- rc=$?
- if [[ ${rc} -eq $KSFT_PASS ]]; then
- cnt_pass=$(( cnt_pass + 1 ))
- echo "ok ${cnt_total} ${arg}"
- elif [[ ${rc} -eq $KSFT_SKIP ]]; then
- cnt_skip=$(( cnt_skip + 1 ))
- echo "ok ${cnt_total} ${arg} # SKIP"
- elif [[ ${rc} -eq $KSFT_FAIL ]]; then
- cnt_fail=$(( cnt_fail + 1 ))
- echo "not ok ${cnt_total} ${arg} # exit=$rc"
- fi
- cnt_total=$(( cnt_total + 1 ))
-done
+
+if shared_vm_tests_requested "${ARGS[@]}"; then
+ log_host "Booting up VM"
+ pidfile="$(create_pidfile)"
+ vm_start "${pidfile}"
+ vm_wait_for_ssh
+ log_host "VM booted up"
+
+ run_shared_vm_tests "${ARGS[@]}"
+ terminate_pidfiles "${pidfile}"
+fi
echo "SUMMARY: PASS=${cnt_pass} SKIP=${cnt_skip} FAIL=${cnt_fail}"
echo "Log: ${LOG}"