diff options
Diffstat (limited to 'tools')
115 files changed, 7142 insertions, 1506 deletions
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index f5713f59ac10..be7d8e060e10 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -7205,6 +7205,7 @@ enum { TCP_BPF_SYN_MAC = 1007, /* Copy the MAC, IP[46], and TCP header */ TCP_BPF_SOCK_OPS_CB_FLAGS = 1008, /* Get or Set TCP sock ops flags */ SK_BPF_CB_FLAGS = 1009, /* Get or set sock ops flags in socket */ + SK_BPF_BYPASS_PROT_MEM = 1010, /* Get or Set sk->sk_bypass_prot_mem */ }; enum { diff --git a/tools/include/uapi/linux/netdev.h b/tools/include/uapi/linux/netdev.h index 48eb49aa03d4..e0b579a1df4f 100644 --- a/tools/include/uapi/linux/netdev.h +++ b/tools/include/uapi/linux/netdev.h @@ -2,6 +2,7 @@ /* Do not edit directly, auto-generated from: */ /* Documentation/netlink/specs/netdev.yaml */ /* YNL-GEN uapi header */ +/* To regenerate run: tools/net/ynl/ynl-regen.sh */ #ifndef _UAPI_LINUX_NETDEV_H #define _UAPI_LINUX_NETDEV_H @@ -80,6 +81,7 @@ enum netdev_qstats_scope { enum netdev_napi_threaded { NETDEV_NAPI_THREADED_DISABLED, NETDEV_NAPI_THREADED_ENABLED, + NETDEV_NAPI_THREADED_BUSY_POLL, }; enum { diff --git a/tools/net/ynl/Makefile b/tools/net/ynl/Makefile index 211df5a93ad9..7736b492f559 100644 --- a/tools/net/ynl/Makefile +++ b/tools/net/ynl/Makefile @@ -12,10 +12,13 @@ endif libdir ?= $(prefix)/$(libdir_relative) includedir ?= $(prefix)/include -SUBDIRS = lib generated samples +SPECDIR=../../../Documentation/netlink/specs + +SUBDIRS = lib generated samples ynltool tests all: $(SUBDIRS) libynl.a +ynltool: | lib generated libynl.a samples: | lib generated libynl.a: | lib generated @echo -e "\tAR $@" @@ -48,5 +51,27 @@ install: libynl.a lib/*.h @echo -e "\tINSTALL pyynl" @pip install --prefix=$(DESTDIR)$(prefix) . @make -C generated install + @make -C tests install + +run_tests: + @$(MAKE) -C tests run_tests + +lint: + yamllint $(SPECDIR) + +schema_check: + @N=1; \ + for spec in $(SPECDIR)/*.yaml ; do \ + NAME=$$(basename $$spec) ; \ + OUTPUT=$$(./pyynl/cli.py --spec $$spec --validate) ; \ + if [ $$? -eq 0 ] ; then \ + echo "ok $$N $$NAME schema validation" ; \ + else \ + echo "not ok $$N $$NAME schema validation" ; \ + echo "$$OUTPUT" ; \ + echo ; \ + fi ; \ + N=$$((N+1)) ; \ + done -.PHONY: all clean distclean install $(SUBDIRS) +.PHONY: all clean distclean install run_tests lint schema_check $(SUBDIRS) diff --git a/tools/net/ynl/pyynl/cli.py b/tools/net/ynl/pyynl/cli.py index 8c192e900bd3..af02a5b7e5a2 100755 --- a/tools/net/ynl/pyynl/cli.py +++ b/tools/net/ynl/pyynl/cli.py @@ -7,9 +7,10 @@ import os import pathlib import pprint import sys +import textwrap sys.path.append(pathlib.Path(__file__).resolve().parent.as_posix()) -from lib import YnlFamily, Netlink, NlError +from lib import YnlFamily, Netlink, NlError, SpecFamily sys_schema_dir='/usr/share/ynl' relative_schema_dir='../../../../Documentation/netlink' @@ -39,6 +40,60 @@ class YnlEncoder(json.JSONEncoder): return json.JSONEncoder.default(self, obj) +def print_attr_list(ynl, attr_names, attr_set, indent=2): + """Print a list of attributes with their types and documentation.""" + prefix = ' ' * indent + for attr_name in attr_names: + if attr_name in attr_set.attrs: + attr = attr_set.attrs[attr_name] + attr_info = f'{prefix}- {attr_name}: {attr.type}' + if 'enum' in attr.yaml: + enum_name = attr.yaml['enum'] + attr_info += f" (enum: {enum_name})" + # Print enum values if available + if enum_name in ynl.consts: + const = ynl.consts[enum_name] + enum_values = list(const.entries.keys()) + attr_info += f"\n{prefix} {const.type.capitalize()}: {', '.join(enum_values)}" + + # Show nested attributes reference and recursively display them + nested_set_name = None + if attr.type == 'nest' and 'nested-attributes' in attr.yaml: + nested_set_name = attr.yaml['nested-attributes'] + attr_info += f" -> {nested_set_name}" + + if attr.yaml.get('doc'): + doc_text = textwrap.indent(attr.yaml['doc'], prefix + ' ') + attr_info += f"\n{doc_text}" + print(attr_info) + + # Recursively show nested attributes + if nested_set_name in ynl.attr_sets: + nested_set = ynl.attr_sets[nested_set_name] + # Filter out 'unspec' and other unused attrs + nested_names = [n for n in nested_set.attrs.keys() + if nested_set.attrs[n].type != 'unused'] + if nested_names: + print_attr_list(ynl, nested_names, nested_set, indent + 4) + + +def print_mode_attrs(ynl, mode, mode_spec, attr_set, print_request=True): + """Print a given mode (do/dump/event/notify).""" + mode_title = mode.capitalize() + + if print_request and 'request' in mode_spec and 'attributes' in mode_spec['request']: + print(f'\n{mode_title} request attributes:') + print_attr_list(ynl, mode_spec['request']['attributes'], attr_set) + + if 'reply' in mode_spec and 'attributes' in mode_spec['reply']: + print(f'\n{mode_title} reply attributes:') + print_attr_list(ynl, mode_spec['reply']['attributes'], attr_set) + + if 'attributes' in mode_spec: + print(f'\n{mode_title} attributes:') + print_attr_list(ynl, mode_spec['attributes'], attr_set) + + def main(): description = """ YNL CLI utility - a general purpose netlink utility that uses YAML @@ -70,6 +125,9 @@ def main(): group.add_argument('--dump', dest='dump', metavar='DUMP-OPERATION', type=str) group.add_argument('--list-ops', action='store_true') group.add_argument('--list-msgs', action='store_true') + group.add_argument('--list-attrs', dest='list_attrs', metavar='OPERATION', type=str, + help='List attributes for an operation') + group.add_argument('--validate', action='store_true') parser.add_argument('--duration', dest='duration', type=int, help='when subscribed, watch for DURATION seconds') @@ -111,15 +169,25 @@ def main(): if args.family: spec = f"{spec_dir()}/{args.family}.yaml" - if args.schema is None and spec.startswith(sys_schema_dir): - args.schema = '' # disable schema validation when installed - if args.process_unknown is None: - args.process_unknown = True else: spec = args.spec if not os.path.isfile(spec): raise Exception(f"Spec file {spec} does not exist") + if args.validate: + try: + SpecFamily(spec, args.schema) + except Exception as error: + print(error) + exit(1) + return + + if args.family: # set behaviour when using installed specs + if args.schema is None and spec.startswith(sys_schema_dir): + args.schema = '' # disable schema validation when installed + if args.process_unknown is None: + args.process_unknown = True + ynl = YnlFamily(spec, args.schema, args.process_unknown, recv_size=args.dbg_small_recv) if args.dbg_small_recv: @@ -135,6 +203,28 @@ def main(): for op_name, op in ynl.msgs.items(): print(op_name, " [", ", ".join(op.modes), "]") + if args.list_attrs: + op = ynl.msgs.get(args.list_attrs) + if not op: + print(f'Operation {args.list_attrs} not found') + exit(1) + + print(f'Operation: {op.name}') + print(op.yaml['doc']) + + for mode in ['do', 'dump', 'event']: + if mode in op.yaml: + print_mode_attrs(ynl, mode, op.yaml[mode], op.attr_set, True) + + if 'notify' in op.yaml: + mode_spec = op.yaml['notify'] + ref_spec = ynl.msgs.get(mode_spec).yaml.get('do') + if ref_spec: + print_mode_attrs(ynl, 'notify', ref_spec, op.attr_set, False) + + if 'mcgrp' in op.yaml: + print(f"\nMulticast group: {op.yaml['mcgrp']}") + try: if args.do: reply = ynl.do(args.do, attrs, args.flags) diff --git a/tools/net/ynl/pyynl/lib/ynl.py b/tools/net/ynl/pyynl/lib/ynl.py index 62383c70ebb9..36d36eb7e3b8 100644 --- a/tools/net/ynl/pyynl/lib/ynl.py +++ b/tools/net/ynl/pyynl/lib/ynl.py @@ -100,12 +100,21 @@ class Netlink: 'bitfield32', 'sint', 'uint']) class NlError(Exception): - def __init__(self, nl_msg): - self.nl_msg = nl_msg - self.error = -nl_msg.error - - def __str__(self): - return f"Netlink error: {os.strerror(self.error)}\n{self.nl_msg}" + def __init__(self, nl_msg): + self.nl_msg = nl_msg + self.error = -nl_msg.error + + def __str__(self): + msg = "Netlink error: " + + extack = self.nl_msg.extack.copy() if self.nl_msg.extack else {} + if 'msg' in extack: + msg += extack['msg'] + ': ' + del extack['msg'] + msg += os.strerror(self.error) + if extack: + msg += ' ' + str(extack) + return msg class ConfigError(Exception): @@ -976,6 +985,15 @@ class YnlFamily(SpecFamily): raw = bytes.fromhex(string) else: raw = int(string, 16) + elif attr_spec.display_hint == 'mac': + # Parse MAC address in format "00:11:22:33:44:55" or "001122334455" + if ':' in string: + mac_bytes = [int(x, 16) for x in string.split(':')] + else: + if len(string) % 2 != 0: + raise Exception(f"Invalid MAC address format: {string}") + mac_bytes = [int(string[i:i+2], 16) for i in range(0, len(string), 2)] + raw = bytes(mac_bytes) else: raise Exception(f"Display hint '{attr_spec.display_hint}' not implemented" f" when parsing '{attr_spec['name']}'") @@ -1039,15 +1057,15 @@ class YnlFamily(SpecFamily): self.check_ntf() def operation_do_attributes(self, name): - """ - For a given operation name, find and return a supported - set of attributes (as a dict). - """ - op = self.find_operation(name) - if not op: - return None - - return op['do']['request']['attributes'].copy() + """ + For a given operation name, find and return a supported + set of attributes (as a dict). + """ + op = self.find_operation(name) + if not op: + return None + + return op['do']['request']['attributes'].copy() def _encode_message(self, op, vals, flags, req_seq): nl_flags = Netlink.NLM_F_REQUEST | Netlink.NLM_F_ACK diff --git a/tools/net/ynl/pyynl/ynl_gen_c.py b/tools/net/ynl/pyynl/ynl_gen_c.py index aadeb3abcad8..b517d0c605ad 100755 --- a/tools/net/ynl/pyynl/ynl_gen_c.py +++ b/tools/net/ynl/pyynl/ynl_gen_c.py @@ -1205,7 +1205,7 @@ class SubMessage(SpecSubMessage): class Family(SpecFamily): - def __init__(self, file_name, exclude_ops): + def __init__(self, file_name, exclude_ops, fn_prefix): # Added by resolve: self.c_name = None delattr(self, "c_name") @@ -1237,6 +1237,8 @@ class Family(SpecFamily): else: self.uapi_header_name = self.ident_name + self.fn_prefix = fn_prefix if fn_prefix else f'{self.ident_name}-nl' + def resolve(self): self.resolve_up(super()) @@ -2911,12 +2913,12 @@ def print_kernel_op_table_fwd(family, cw, terminate): continue if 'do' in op: - name = c_lower(f"{family.ident_name}-nl-{op_name}-doit") + name = c_lower(f"{family.fn_prefix}-{op_name}-doit") cw.write_func_prot('int', name, ['struct sk_buff *skb', 'struct genl_info *info'], suffix=';') if 'dump' in op: - name = c_lower(f"{family.ident_name}-nl-{op_name}-dumpit") + name = c_lower(f"{family.fn_prefix}-{op_name}-dumpit") cw.write_func_prot('int', name, ['struct sk_buff *skb', 'struct netlink_callback *cb'], suffix=';') cw.nl() @@ -2942,7 +2944,7 @@ def print_kernel_op_table(family, cw): for x in op['dont-validate']])), ) for op_mode in ['do', 'dump']: if op_mode in op: - name = c_lower(f"{family.ident_name}-nl-{op_name}-{op_mode}it") + name = c_lower(f"{family.fn_prefix}-{op_name}-{op_mode}it") members.append((op_mode + 'it', name)) if family.kernel_policy == 'per-op': struct = Struct(family, op['attribute-set'], @@ -2980,7 +2982,7 @@ def print_kernel_op_table(family, cw): members.append(('validate', ' | '.join([c_upper('genl-dont-validate-' + x) for x in dont_validate])), ) - name = c_lower(f"{family.ident_name}-nl-{op_name}-{op_mode}it") + name = c_lower(f"{family.fn_prefix}-{op_name}-{op_mode}it") if 'pre' in op[op_mode]: members.append((cb_names[op_mode]['pre'], c_lower(op[op_mode]['pre']))) members.append((op_mode + 'it', name)) @@ -3402,6 +3404,7 @@ def main(): help='Do not overwrite the output file if the new output is identical to the old') parser.add_argument('--exclude-op', action='append', default=[]) parser.add_argument('-o', dest='out_file', type=str, default=None) + parser.add_argument('--function-prefix', dest='fn_prefix', type=str) args = parser.parse_args() if args.header is None: @@ -3410,7 +3413,7 @@ def main(): exclude_ops = [re.compile(expr) for expr in args.exclude_op] try: - parsed = Family(args.spec, exclude_ops) + parsed = Family(args.spec, exclude_ops, args.fn_prefix) if parsed.license != '((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause)': print('Spec license:', parsed.license) print('License must be: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause)') @@ -3430,11 +3433,16 @@ def main(): cw.p("/* Do not edit directly, auto-generated from: */") cw.p(f"/*\t{spec_kernel} */") cw.p(f"/* YNL-GEN {args.mode} {'header' if args.header else 'source'} */") - if args.exclude_op or args.user_header: + if args.exclude_op or args.user_header or args.fn_prefix: line = '' - line += ' --user-header '.join([''] + args.user_header) - line += ' --exclude-op '.join([''] + args.exclude_op) + if args.user_header: + line += ' --user-header '.join([''] + args.user_header) + if args.exclude_op: + line += ' --exclude-op '.join([''] + args.exclude_op) + if args.fn_prefix: + line += f' --function-prefix {args.fn_prefix}' cw.p(f'/* YNL-ARG{line} */') + cw.p('/* To regenerate run: tools/net/ynl/ynl-regen.sh */') cw.nl() if args.mode == 'uapi': diff --git a/tools/net/ynl/samples/.gitignore b/tools/net/ynl/samples/.gitignore index 7f5fca7682d7..05087ee323ba 100644 --- a/tools/net/ynl/samples/.gitignore +++ b/tools/net/ynl/samples/.gitignore @@ -7,3 +7,4 @@ rt-addr rt-link rt-route tc +tc-filter-add diff --git a/tools/net/ynl/samples/Makefile b/tools/net/ynl/samples/Makefile index c9494a564da4..d76cbd41cbb1 100644 --- a/tools/net/ynl/samples/Makefile +++ b/tools/net/ynl/samples/Makefile @@ -19,6 +19,7 @@ include $(wildcard *.d) all: $(BINS) CFLAGS_page-pool=$(CFLAGS_netdev) +CFLAGS_tc-filter-add:=$(CFLAGS_tc) $(BINS): ../lib/ynl.a ../generated/protos.a $(SRCS) @echo -e '\tCC sample $@' diff --git a/tools/net/ynl/samples/page-pool.c b/tools/net/ynl/samples/page-pool.c deleted file mode 100644 index e5d521320fbf..000000000000 --- a/tools/net/ynl/samples/page-pool.c +++ /dev/null @@ -1,149 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#define _GNU_SOURCE - -#include <stdio.h> -#include <string.h> - -#include <ynl.h> - -#include <net/if.h> - -#include "netdev-user.h" - -struct stat { - unsigned int ifc; - - struct { - unsigned int cnt; - size_t refs, bytes; - } live[2]; - - size_t alloc_slow, alloc_fast, recycle_ring, recycle_cache; -}; - -struct stats_array { - unsigned int i, max; - struct stat *s; -}; - -static struct stat *find_ifc(struct stats_array *a, unsigned int ifindex) -{ - unsigned int i; - - for (i = 0; i < a->i; i++) { - if (a->s[i].ifc == ifindex) - return &a->s[i]; - } - - a->i++; - if (a->i == a->max) { - a->max *= 2; - a->s = reallocarray(a->s, a->max, sizeof(*a->s)); - } - a->s[i].ifc = ifindex; - return &a->s[i]; -} - -static void count(struct stat *s, unsigned int l, - struct netdev_page_pool_get_rsp *pp) -{ - s->live[l].cnt++; - if (pp->_present.inflight) - s->live[l].refs += pp->inflight; - if (pp->_present.inflight_mem) - s->live[l].bytes += pp->inflight_mem; -} - -int main(int argc, char **argv) -{ - struct netdev_page_pool_stats_get_list *pp_stats; - struct netdev_page_pool_get_list *pools; - struct stats_array a = {}; - struct ynl_error yerr; - struct ynl_sock *ys; - - ys = ynl_sock_create(&ynl_netdev_family, &yerr); - if (!ys) { - fprintf(stderr, "YNL: %s\n", yerr.msg); - return 1; - } - - a.max = 128; - a.s = calloc(a.max, sizeof(*a.s)); - if (!a.s) - goto err_close; - - pools = netdev_page_pool_get_dump(ys); - if (!pools) - goto err_free; - - ynl_dump_foreach(pools, pp) { - struct stat *s = find_ifc(&a, pp->ifindex); - - count(s, 1, pp); - if (pp->_present.detach_time) - count(s, 0, pp); - } - netdev_page_pool_get_list_free(pools); - - pp_stats = netdev_page_pool_stats_get_dump(ys); - if (!pp_stats) - goto err_free; - - ynl_dump_foreach(pp_stats, pp) { - struct stat *s = find_ifc(&a, pp->info.ifindex); - - if (pp->_present.alloc_fast) - s->alloc_fast += pp->alloc_fast; - if (pp->_present.alloc_refill) - s->alloc_fast += pp->alloc_refill; - if (pp->_present.alloc_slow) - s->alloc_slow += pp->alloc_slow; - if (pp->_present.recycle_ring) - s->recycle_ring += pp->recycle_ring; - if (pp->_present.recycle_cached) - s->recycle_cache += pp->recycle_cached; - } - netdev_page_pool_stats_get_list_free(pp_stats); - - for (unsigned int i = 0; i < a.i; i++) { - char ifname[IF_NAMESIZE]; - struct stat *s = &a.s[i]; - const char *name; - double recycle; - - if (!s->ifc) { - name = "<orphan>\t"; - } else { - name = if_indextoname(s->ifc, ifname); - if (name) - printf("%8s", name); - printf("[%u]\t", s->ifc); - } - - printf("page pools: %u (zombies: %u)\n", - s->live[1].cnt, s->live[0].cnt); - printf("\t\trefs: %zu bytes: %zu (refs: %zu bytes: %zu)\n", - s->live[1].refs, s->live[1].bytes, - s->live[0].refs, s->live[0].bytes); - - /* We don't know how many pages are sitting in cache and ring - * so we will under-count the recycling rate a bit. - */ - recycle = (double)(s->recycle_ring + s->recycle_cache) / - (s->alloc_fast + s->alloc_slow) * 100; - printf("\t\trecycling: %.1lf%% (alloc: %zu:%zu recycle: %zu:%zu)\n", - recycle, s->alloc_slow, s->alloc_fast, - s->recycle_ring, s->recycle_cache); - } - - ynl_sock_destroy(ys); - return 0; - -err_free: - free(a.s); -err_close: - fprintf(stderr, "YNL: %s\n", ys->err.msg); - ynl_sock_destroy(ys); - return 2; -} diff --git a/tools/net/ynl/samples/tc-filter-add.c b/tools/net/ynl/samples/tc-filter-add.c new file mode 100644 index 000000000000..97871e9e9edc --- /dev/null +++ b/tools/net/ynl/samples/tc-filter-add.c @@ -0,0 +1,335 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <stdio.h> +#include <string.h> +#include <stdlib.h> +#include <arpa/inet.h> +#include <linux/pkt_sched.h> +#include <linux/tc_act/tc_vlan.h> +#include <linux/tc_act/tc_gact.h> +#include <linux/if_ether.h> +#include <net/if.h> + +#include <ynl.h> + +#include "tc-user.h" + +#define TC_HANDLE (0xFFFF << 16) + +const char *vlan_act_name(struct tc_vlan *p) +{ + switch (p->v_action) { + case TCA_VLAN_ACT_POP: + return "pop"; + case TCA_VLAN_ACT_PUSH: + return "push"; + case TCA_VLAN_ACT_MODIFY: + return "modify"; + default: + break; + } + + return "not supported"; +} + +const char *gact_act_name(struct tc_gact *p) +{ + switch (p->action) { + case TC_ACT_SHOT: + return "drop"; + case TC_ACT_OK: + return "ok"; + case TC_ACT_PIPE: + return "pipe"; + default: + break; + } + + return "not supported"; +} + +static void print_vlan(struct tc_act_vlan_attrs *vlan) +{ + printf("%s ", vlan_act_name(vlan->parms)); + if (vlan->_present.push_vlan_id) + printf("id %u ", vlan->push_vlan_id); + if (vlan->_present.push_vlan_protocol) + printf("protocol %#x ", ntohs(vlan->push_vlan_protocol)); + if (vlan->_present.push_vlan_priority) + printf("priority %u ", vlan->push_vlan_priority); +} + +static void print_gact(struct tc_act_gact_attrs *gact) +{ + struct tc_gact *p = gact->parms; + + printf("%s ", gact_act_name(p)); +} + +static void flower_print(struct tc_flower_attrs *flower, const char *kind) +{ + struct tc_act_attrs *a; + unsigned int i; + + printf("%s:\n", kind); + + if (flower->_present.key_vlan_id) + printf(" vlan_id: %u\n", flower->key_vlan_id); + if (flower->_present.key_vlan_prio) + printf(" vlan_prio: %u\n", flower->key_vlan_prio); + if (flower->_present.key_num_of_vlans) + printf(" num_of_vlans: %u\n", flower->key_num_of_vlans); + + for (i = 0; i < flower->_count.act; i++) { + a = &flower->act[i]; + printf("action order: %i %s ", i + 1, a->kind); + if (a->options._present.vlan) + print_vlan(&a->options.vlan); + else if (a->options._present.gact) + print_gact(&a->options.gact); + printf("\n"); + } + printf("\n"); +} + +static void tc_filter_print(struct tc_gettfilter_rsp *f) +{ + struct tc_options_msg *opt = &f->options; + + if (opt->_present.flower) + flower_print(&opt->flower, f->kind); + else if (f->_len.kind) + printf("%s pref %u proto: %#x\n", f->kind, + (f->_hdr.tcm_info >> 16), + ntohs(TC_H_MIN(f->_hdr.tcm_info))); +} + +static int tc_filter_add(struct ynl_sock *ys, int ifi) +{ + struct tc_newtfilter_req *req; + struct tc_act_attrs *acts; + struct tc_vlan p = { + .action = TC_ACT_PIPE, + .v_action = TCA_VLAN_ACT_PUSH + }; + __u16 flags = NLM_F_REQUEST | NLM_F_EXCL | NLM_F_CREATE; + int ret; + + req = tc_newtfilter_req_alloc(); + if (!req) { + fprintf(stderr, "tc_newtfilter_req_alloc failed\n"); + return -1; + } + memset(req, 0, sizeof(*req)); + + acts = tc_act_attrs_alloc(3); + if (!acts) { + fprintf(stderr, "tc_act_attrs_alloc\n"); + tc_newtfilter_req_free(req); + return -1; + } + memset(acts, 0, sizeof(*acts) * 3); + + req->_hdr.tcm_ifindex = ifi; + req->_hdr.tcm_parent = TC_H_MAKE(TC_H_CLSACT, TC_H_MIN_INGRESS); + req->_hdr.tcm_info = TC_H_MAKE(1 << 16, htons(ETH_P_8021Q)); + req->chain = 0; + + tc_newtfilter_req_set_nlflags(req, flags); + tc_newtfilter_req_set_kind(req, "flower"); + tc_newtfilter_req_set_options_flower_key_vlan_id(req, 100); + tc_newtfilter_req_set_options_flower_key_vlan_prio(req, 5); + tc_newtfilter_req_set_options_flower_key_num_of_vlans(req, 3); + + __tc_newtfilter_req_set_options_flower_act(req, acts, 3); + + /* Skip action at index 0 because in TC, the action array + * index starts at 1, with each index defining the action's + * order. In contrast, in YNL indexed arrays start at index 0. + */ + tc_act_attrs_set_kind(&acts[1], "vlan"); + tc_act_attrs_set_options_vlan_parms(&acts[1], &p, sizeof(p)); + tc_act_attrs_set_options_vlan_push_vlan_id(&acts[1], 200); + tc_act_attrs_set_kind(&acts[2], "vlan"); + tc_act_attrs_set_options_vlan_parms(&acts[2], &p, sizeof(p)); + tc_act_attrs_set_options_vlan_push_vlan_id(&acts[2], 300); + + tc_newtfilter_req_set_options_flower_flags(req, 0); + tc_newtfilter_req_set_options_flower_key_eth_type(req, htons(0x8100)); + + ret = tc_newtfilter(ys, req); + if (ret) + fprintf(stderr, "tc_newtfilter: %s\n", ys->err.msg); + + tc_newtfilter_req_free(req); + + return ret; +} + +static int tc_filter_show(struct ynl_sock *ys, int ifi) +{ + struct tc_gettfilter_req_dump *req; + struct tc_gettfilter_list *rsp; + + req = tc_gettfilter_req_dump_alloc(); + if (!req) { + fprintf(stderr, "tc_gettfilter_req_dump_alloc failed\n"); + return -1; + } + memset(req, 0, sizeof(*req)); + + req->_hdr.tcm_ifindex = ifi; + req->_hdr.tcm_parent = TC_H_MAKE(TC_H_CLSACT, TC_H_MIN_INGRESS); + req->_present.chain = 1; + req->chain = 0; + + rsp = tc_gettfilter_dump(ys, req); + tc_gettfilter_req_dump_free(req); + if (!rsp) { + fprintf(stderr, "YNL: %s\n", ys->err.msg); + return -1; + } + + if (ynl_dump_empty(rsp)) + fprintf(stderr, "Error: no filters reported\n"); + else + ynl_dump_foreach(rsp, flt) tc_filter_print(flt); + + tc_gettfilter_list_free(rsp); + + return 0; +} + +static int tc_filter_del(struct ynl_sock *ys, int ifi) +{ + struct tc_deltfilter_req *req; + __u16 flags = NLM_F_REQUEST; + int ret; + + req = tc_deltfilter_req_alloc(); + if (!req) { + fprintf(stderr, "tc_deltfilter_req_alloc failed\n"); + return -1; + } + memset(req, 0, sizeof(*req)); + + req->_hdr.tcm_ifindex = ifi; + req->_hdr.tcm_parent = TC_H_MAKE(TC_H_CLSACT, TC_H_MIN_INGRESS); + req->_hdr.tcm_info = TC_H_MAKE(1 << 16, htons(ETH_P_8021Q)); + tc_deltfilter_req_set_nlflags(req, flags); + + ret = tc_deltfilter(ys, req); + if (ret) + fprintf(stderr, "tc_deltfilter failed: %s\n", ys->err.msg); + + tc_deltfilter_req_free(req); + + return ret; +} + +static int tc_clsact_add(struct ynl_sock *ys, int ifi) +{ + struct tc_newqdisc_req *req; + __u16 flags = NLM_F_REQUEST | NLM_F_EXCL | NLM_F_CREATE; + int ret; + + req = tc_newqdisc_req_alloc(); + if (!req) { + fprintf(stderr, "tc_newqdisc_req_alloc failed\n"); + return -1; + } + memset(req, 0, sizeof(*req)); + + req->_hdr.tcm_ifindex = ifi; + req->_hdr.tcm_parent = TC_H_CLSACT; + req->_hdr.tcm_handle = TC_HANDLE; + tc_newqdisc_req_set_nlflags(req, flags); + tc_newqdisc_req_set_kind(req, "clsact"); + + ret = tc_newqdisc(ys, req); + if (ret) + fprintf(stderr, "tc_newqdisc failed: %s\n", ys->err.msg); + + tc_newqdisc_req_free(req); + + return ret; +} + +static int tc_clsact_del(struct ynl_sock *ys, int ifi) +{ + struct tc_delqdisc_req *req; + __u16 flags = NLM_F_REQUEST; + int ret; + + req = tc_delqdisc_req_alloc(); + if (!req) { + fprintf(stderr, "tc_delqdisc_req_alloc failed\n"); + return -1; + } + memset(req, 0, sizeof(*req)); + + req->_hdr.tcm_ifindex = ifi; + req->_hdr.tcm_parent = TC_H_CLSACT; + req->_hdr.tcm_handle = TC_HANDLE; + tc_delqdisc_req_set_nlflags(req, flags); + + ret = tc_delqdisc(ys, req); + if (ret) + fprintf(stderr, "tc_delqdisc failed: %s\n", ys->err.msg); + + tc_delqdisc_req_free(req); + + return ret; +} + +static int tc_filter_config(struct ynl_sock *ys, int ifi) +{ + int ret = 0; + + if (tc_filter_add(ys, ifi)) + return -1; + + ret = tc_filter_show(ys, ifi); + + if (tc_filter_del(ys, ifi)) + return -1; + + return ret; +} + +int main(int argc, char **argv) +{ + struct ynl_error yerr; + struct ynl_sock *ys; + int ifi, ret = 0; + + if (argc < 2) { + fprintf(stderr, "Usage: %s <interface_name>\n", argv[0]); + return 1; + } + ifi = if_nametoindex(argv[1]); + if (!ifi) { + perror("if_nametoindex"); + return 1; + } + + ys = ynl_sock_create(&ynl_tc_family, &yerr); + if (!ys) { + fprintf(stderr, "YNL: %s\n", yerr.msg); + return 1; + } + + if (tc_clsact_add(ys, ifi)) { + ret = 2; + goto err_destroy; + } + + if (tc_filter_config(ys, ifi)) + ret = 3; + + if (tc_clsact_del(ys, ifi)) + ret = 4; + +err_destroy: + ynl_sock_destroy(ys); + return ret; +} diff --git a/tools/net/ynl/tests/Makefile b/tools/net/ynl/tests/Makefile new file mode 100644 index 000000000000..c1df2e001255 --- /dev/null +++ b/tools/net/ynl/tests/Makefile @@ -0,0 +1,32 @@ +# SPDX-License-Identifier: GPL-2.0 +# Makefile for YNL tests + +TESTS := \ + test_ynl_cli.sh \ + test_ynl_ethtool.sh \ +# end of TESTS + +all: $(TESTS) + +run_tests: + @for test in $(TESTS); do \ + ./$$test; \ + done + +install: $(TESTS) + @mkdir -p $(DESTDIR)/usr/bin + @mkdir -p $(DESTDIR)/usr/share/kselftest + @cp ../../../testing/selftests/kselftest/ktap_helpers.sh $(DESTDIR)/usr/share/kselftest/ + @for test in $(TESTS); do \ + name=$$(basename $$test .sh); \ + sed -e 's|^ynl=.*|ynl="ynl"|' \ + -e 's|^ynl_ethtool=.*|ynl_ethtool="ynl-ethtool"|' \ + -e 's|KSELFTEST_KTAP_HELPERS=.*|KSELFTEST_KTAP_HELPERS="/usr/share/kselftest/ktap_helpers.sh"|' \ + $$test > $(DESTDIR)/usr/bin/$$name; \ + chmod +x $(DESTDIR)/usr/bin/$$name; \ + done + +clean distclean: + @# Nothing to clean + +.PHONY: all install clean run_tests diff --git a/tools/net/ynl/tests/config b/tools/net/ynl/tests/config new file mode 100644 index 000000000000..339f1309c03f --- /dev/null +++ b/tools/net/ynl/tests/config @@ -0,0 +1,6 @@ +CONFIG_DUMMY=m +CONFIG_INET_DIAG=y +CONFIG_IPV6=y +CONFIG_NET_NS=y +CONFIG_NETDEVSIM=m +CONFIG_VETH=m diff --git a/tools/net/ynl/tests/test_ynl_cli.sh b/tools/net/ynl/tests/test_ynl_cli.sh new file mode 100755 index 000000000000..7c0722a08117 --- /dev/null +++ b/tools/net/ynl/tests/test_ynl_cli.sh @@ -0,0 +1,327 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# Test YNL CLI functionality + +# Load KTAP test helpers +KSELFTEST_KTAP_HELPERS="$(dirname "$(realpath "$0")")/../../../testing/selftests/kselftest/ktap_helpers.sh" +# shellcheck source=../../../testing/selftests/kselftest/ktap_helpers.sh +source "$KSELFTEST_KTAP_HELPERS" + +# Default ynl path for direct execution, can be overridden by make install +ynl="../pyynl/cli.py" + +readonly NSIM_ID="1338" +readonly NSIM_DEV_NAME="nsim${NSIM_ID}" +readonly VETH_A="veth_a" +readonly VETH_B="veth_b" + +testns="ynl-$(mktemp -u XXXXXX)" +TESTS_NO=0 + +# Test listing available families +cli_list_families() +{ + if $ynl --list-families &>/dev/null; then + ktap_test_pass "YNL CLI list families" + else + ktap_test_fail "YNL CLI list families" + fi +} +TESTS_NO=$((TESTS_NO + 1)) + +# Test netdev family operations (dev-get, queue-get) +cli_netdev_ops() +{ + local dev_output + local ifindex + + ifindex=$(ip netns exec "$testns" cat /sys/class/net/"$NSIM_DEV_NAME"/ifindex 2>/dev/null) + + dev_output=$(ip netns exec "$testns" $ynl --family netdev \ + --do dev-get --json "{\"ifindex\": $ifindex}" 2>/dev/null) + + if ! echo "$dev_output" | grep -q "ifindex"; then + ktap_test_fail "YNL CLI netdev operations (netdev dev-get output missing ifindex)" + return + fi + + if ! ip netns exec "$testns" $ynl --family netdev \ + --dump queue-get --json "{\"ifindex\": $ifindex}" &>/dev/null; then + ktap_test_fail "YNL CLI netdev operations (failed to get netdev queue info)" + return + fi + + ktap_test_pass "YNL CLI netdev operations" +} +TESTS_NO=$((TESTS_NO + 1)) + +# Test ethtool family operations (rings-get, linkinfo-get) +cli_ethtool_ops() +{ + local rings_output + local linkinfo_output + + rings_output=$(ip netns exec "$testns" $ynl --family ethtool \ + --do rings-get --json "{\"header\": {\"dev-name\": \"$NSIM_DEV_NAME\"}}" 2>/dev/null) + + if ! echo "$rings_output" | grep -q "header"; then + ktap_test_fail "YNL CLI ethtool operations (ethtool rings-get output missing header)" + return + fi + + linkinfo_output=$(ip netns exec "$testns" $ynl --family ethtool \ + --do linkinfo-get --json "{\"header\": {\"dev-name\": \"$VETH_A\"}}" 2>/dev/null) + + if ! echo "$linkinfo_output" | grep -q "header"; then + ktap_test_fail "YNL CLI ethtool operations (ethtool linkinfo-get output missing header)" + return + fi + + ktap_test_pass "YNL CLI ethtool operations" +} +TESTS_NO=$((TESTS_NO + 1)) + +# Test rt-route family operations +cli_rt_route_ops() +{ + local ifindex + + if ! $ynl --list-families 2>/dev/null | grep -q "rt-route"; then + ktap_test_skip "YNL CLI rt-route operations (rt-route family not available)" + return + fi + + ifindex=$(ip netns exec "$testns" cat /sys/class/net/"$NSIM_DEV_NAME"/ifindex 2>/dev/null) + + # Add route: 192.0.2.0/24 dev $dev scope link + if ! ip netns exec "$testns" $ynl --family rt-route --do newroute --create \ + --json "{\"dst\": \"192.0.2.0\", \"oif\": $ifindex, \"rtm-dst-len\": 24, \"rtm-family\": 2, \"rtm-scope\": 253, \"rtm-type\": 1, \"rtm-protocol\": 3, \"rtm-table\": 254}" &>/dev/null; then + ktap_test_fail "YNL CLI rt-route operations (failed to add route)" + return + fi + + local route_output + route_output=$(ip netns exec "$testns" $ynl --family rt-route --dump getroute 2>/dev/null) + if echo "$route_output" | grep -q "192.0.2.0"; then + ktap_test_pass "YNL CLI rt-route operations" + else + ktap_test_fail "YNL CLI rt-route operations (failed to verify route)" + fi + + ip netns exec "$testns" $ynl --family rt-route --do delroute \ + --json "{\"dst\": \"192.0.2.0\", \"oif\": $ifindex, \"rtm-dst-len\": 24, \"rtm-family\": 2, \"rtm-scope\": 253, \"rtm-type\": 1, \"rtm-protocol\": 3, \"rtm-table\": 254}" &>/dev/null +} +TESTS_NO=$((TESTS_NO + 1)) + +# Test rt-addr family operations +cli_rt_addr_ops() +{ + local ifindex + + if ! $ynl --list-families 2>/dev/null | grep -q "rt-addr"; then + ktap_test_skip "YNL CLI rt-addr operations (rt-addr family not available)" + return + fi + + ifindex=$(ip netns exec "$testns" cat /sys/class/net/"$NSIM_DEV_NAME"/ifindex 2>/dev/null) + + if ! ip netns exec "$testns" $ynl --family rt-addr --do newaddr \ + --json "{\"ifa-index\": $ifindex, \"local\": \"192.0.2.100\", \"ifa-prefixlen\": 24, \"ifa-family\": 2}" &>/dev/null; then + ktap_test_fail "YNL CLI rt-addr operations (failed to add address)" + return + fi + + local addr_output + addr_output=$(ip netns exec "$testns" $ynl --family rt-addr --dump getaddr 2>/dev/null) + if echo "$addr_output" | grep -q "192.0.2.100"; then + ktap_test_pass "YNL CLI rt-addr operations" + else + ktap_test_fail "YNL CLI rt-addr operations (failed to verify address)" + fi + + ip netns exec "$testns" $ynl --family rt-addr --do deladdr \ + --json "{\"ifa-index\": $ifindex, \"local\": \"192.0.2.100\", \"ifa-prefixlen\": 24, \"ifa-family\": 2}" &>/dev/null +} +TESTS_NO=$((TESTS_NO + 1)) + +# Test rt-link family operations +cli_rt_link_ops() +{ + if ! $ynl --list-families 2>/dev/null | grep -q "rt-link"; then + ktap_test_skip "YNL CLI rt-link operations (rt-link family not available)" + return + fi + + if ! ip netns exec "$testns" $ynl --family rt-link --do newlink --create \ + --json "{\"ifname\": \"dummy0\", \"linkinfo\": {\"kind\": \"dummy\"}}" &>/dev/null; then + ktap_test_fail "YNL CLI rt-link operations (failed to add link)" + return + fi + + local link_output + link_output=$(ip netns exec "$testns" $ynl --family rt-link --dump getlink 2>/dev/null) + if echo "$link_output" | grep -q "$NSIM_DEV_NAME" && echo "$link_output" | grep -q "dummy0"; then + ktap_test_pass "YNL CLI rt-link operations" + else + ktap_test_fail "YNL CLI rt-link operations (failed to verify link)" + fi + + ip netns exec "$testns" $ynl --family rt-link --do dellink \ + --json "{\"ifname\": \"dummy0\"}" &>/dev/null +} +TESTS_NO=$((TESTS_NO + 1)) + +# Test rt-neigh family operations +cli_rt_neigh_ops() +{ + local ifindex + + if ! $ynl --list-families 2>/dev/null | grep -q "rt-neigh"; then + ktap_test_skip "YNL CLI rt-neigh operations (rt-neigh family not available)" + return + fi + + ifindex=$(ip netns exec "$testns" cat /sys/class/net/"$NSIM_DEV_NAME"/ifindex 2>/dev/null) + + # Add neighbor: 192.0.2.1 dev nsim1338 lladdr 11:22:33:44:55:66 PERMANENT + if ! ip netns exec "$testns" $ynl --family rt-neigh --do newneigh --create \ + --json "{\"ndm-ifindex\": $ifindex, \"dst\": \"192.0.2.1\", \"lladdr\": \"11:22:33:44:55:66\", \"ndm-family\": 2, \"ndm-state\": 128}" &>/dev/null; then + ktap_test_fail "YNL CLI rt-neigh operations (failed to add neighbor)" + fi + + local neigh_output + neigh_output=$(ip netns exec "$testns" $ynl --family rt-neigh --dump getneigh 2>/dev/null) + if echo "$neigh_output" | grep -q "192.0.2.1"; then + ktap_test_pass "YNL CLI rt-neigh operations" + else + ktap_test_fail "YNL CLI rt-neigh operations (failed to verify neighbor)" + fi + + ip netns exec "$testns" $ynl --family rt-neigh --do delneigh \ + --json "{\"ndm-ifindex\": $ifindex, \"dst\": \"192.0.2.1\", \"lladdr\": \"11:22:33:44:55:66\", \"ndm-family\": 2}" &>/dev/null +} +TESTS_NO=$((TESTS_NO + 1)) + +# Test rt-rule family operations +cli_rt_rule_ops() +{ + if ! $ynl --list-families 2>/dev/null | grep -q "rt-rule"; then + ktap_test_skip "YNL CLI rt-rule operations (rt-rule family not available)" + return + fi + + # Add rule: from 192.0.2.0/24 lookup 100 none + if ! ip netns exec "$testns" $ynl --family rt-rule --do newrule \ + --json "{\"family\": 2, \"src-len\": 24, \"src\": \"192.0.2.0\", \"table\": 100}" &>/dev/null; then + ktap_test_fail "YNL CLI rt-rule operations (failed to add rule)" + return + fi + + local rule_output + rule_output=$(ip netns exec "$testns" $ynl --family rt-rule --dump getrule 2>/dev/null) + if echo "$rule_output" | grep -q "192.0.2.0"; then + ktap_test_pass "YNL CLI rt-rule operations" + else + ktap_test_fail "YNL CLI rt-rule operations (failed to verify rule)" + fi + + ip netns exec "$testns" $ynl --family rt-rule --do delrule \ + --json "{\"family\": 2, \"src-len\": 24, \"src\": \"192.0.2.0\", \"table\": 100}" &>/dev/null +} +TESTS_NO=$((TESTS_NO + 1)) + +# Test nlctrl family operations +cli_nlctrl_ops() +{ + local family_output + + if ! family_output=$($ynl --family nlctrl \ + --do getfamily --json "{\"family-name\": \"netdev\"}" 2>/dev/null); then + ktap_test_fail "YNL CLI nlctrl getfamily (failed to get nlctrl family info)" + return + fi + + if ! echo "$family_output" | grep -q "family-name"; then + ktap_test_fail "YNL CLI nlctrl getfamily (nlctrl getfamily output missing family-name)" + return + fi + + if ! echo "$family_output" | grep -q "family-id"; then + ktap_test_fail "YNL CLI nlctrl getfamily (nlctrl getfamily output missing family-id)" + return + fi + + ktap_test_pass "YNL CLI nlctrl getfamily" +} +TESTS_NO=$((TESTS_NO + 1)) + +setup() +{ + modprobe netdevsim &> /dev/null + if ! [ -f /sys/bus/netdevsim/new_device ]; then + ktap_skip_all "netdevsim module not available" + exit "$KSFT_SKIP" + fi + + if ! ip netns add "$testns" 2>/dev/null; then + ktap_skip_all "failed to create test namespace" + exit "$KSFT_SKIP" + fi + + echo "$NSIM_ID 1" | ip netns exec "$testns" tee /sys/bus/netdevsim/new_device >/dev/null 2>&1 || { + ktap_skip_all "failed to create netdevsim device" + exit "$KSFT_SKIP" + } + + local dev + dev=$(ip netns exec "$testns" ls /sys/bus/netdevsim/devices/netdevsim$NSIM_ID/net 2>/dev/null | head -1) + if [[ -z "$dev" ]]; then + ktap_skip_all "failed to find netdevsim device" + exit "$KSFT_SKIP" + fi + + ip -netns "$testns" link set dev "$dev" name "$NSIM_DEV_NAME" 2>/dev/null || { + ktap_skip_all "failed to rename netdevsim device" + exit "$KSFT_SKIP" + } + + ip -netns "$testns" link set dev "$NSIM_DEV_NAME" up 2>/dev/null + + if ! ip -n "$testns" link add "$VETH_A" type veth peer name "$VETH_B" 2>/dev/null; then + ktap_skip_all "failed to create veth pair" + exit "$KSFT_SKIP" + fi + + ip -n "$testns" link set "$VETH_A" up 2>/dev/null + ip -n "$testns" link set "$VETH_B" up 2>/dev/null +} + +cleanup() +{ + ip netns exec "$testns" bash -c "echo $NSIM_ID > /sys/bus/netdevsim/del_device" 2>/dev/null || true + ip netns del "$testns" 2>/dev/null || true +} + +# Check if ynl command is available +if ! command -v $ynl &>/dev/null && [[ ! -x $ynl ]]; then + ktap_skip_all "ynl command not found: $ynl" + exit "$KSFT_SKIP" +fi + +trap cleanup EXIT + +ktap_print_header +setup +ktap_set_plan "${TESTS_NO}" + +cli_list_families +cli_netdev_ops +cli_ethtool_ops +cli_rt_route_ops +cli_rt_addr_ops +cli_rt_link_ops +cli_rt_neigh_ops +cli_rt_rule_ops +cli_nlctrl_ops + +ktap_finished diff --git a/tools/net/ynl/tests/test_ynl_ethtool.sh b/tools/net/ynl/tests/test_ynl_ethtool.sh new file mode 100755 index 000000000000..b826269017f4 --- /dev/null +++ b/tools/net/ynl/tests/test_ynl_ethtool.sh @@ -0,0 +1,222 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# Test YNL ethtool functionality + +# Load KTAP test helpers +KSELFTEST_KTAP_HELPERS="$(dirname "$(realpath "$0")")/../../../testing/selftests/kselftest/ktap_helpers.sh" +# shellcheck source=../../../testing/selftests/kselftest/ktap_helpers.sh +source "$KSELFTEST_KTAP_HELPERS" + +# Default ynl-ethtool path for direct execution, can be overridden by make install +ynl_ethtool="../pyynl/ethtool.py" + +readonly NSIM_ID="1337" +readonly NSIM_DEV_NAME="nsim${NSIM_ID}" +readonly VETH_A="veth_a" +readonly VETH_B="veth_b" + +testns="ynl-ethtool-$(mktemp -u XXXXXX)" +TESTS_NO=0 + +# Uses veth device as netdevsim doesn't support basic ethtool device info +ethtool_device_info() +{ + local info_output + + info_output=$(ip netns exec "$testns" $ynl_ethtool "$VETH_A" 2>/dev/null) + + if ! echo "$info_output" | grep -q "Settings for"; then + ktap_test_fail "YNL ethtool device info (device info output missing expected content)" + return + fi + + ktap_test_pass "YNL ethtool device info" +} +TESTS_NO=$((TESTS_NO + 1)) + +ethtool_statistics() +{ + local stats_output + + stats_output=$(ip netns exec "$testns" $ynl_ethtool --statistics "$NSIM_DEV_NAME" 2>/dev/null) + + if ! echo "$stats_output" | grep -q -E "(NIC statistics|packets|bytes)"; then + ktap_test_fail "YNL ethtool statistics (statistics output missing expected content)" + return + fi + + ktap_test_pass "YNL ethtool statistics" +} +TESTS_NO=$((TESTS_NO + 1)) + +ethtool_ring_params() +{ + local ring_output + + ring_output=$(ip netns exec "$testns" $ynl_ethtool --show-ring "$NSIM_DEV_NAME" 2>/dev/null) + + if ! echo "$ring_output" | grep -q -E "(Ring parameters|RX|TX)"; then + ktap_test_fail "YNL ethtool ring parameters (ring parameters output missing expected content)" + return + fi + + if ! ip netns exec "$testns" $ynl_ethtool --set-ring "$NSIM_DEV_NAME" rx 64 2>/dev/null; then + ktap_test_fail "YNL ethtool ring parameters (set-ring command failed unexpectedly)" + return + fi + + ktap_test_pass "YNL ethtool ring parameters (show/set)" +} +TESTS_NO=$((TESTS_NO + 1)) + +ethtool_coalesce_params() +{ + if ! ip netns exec "$testns" $ynl_ethtool --show-coalesce "$NSIM_DEV_NAME" &>/dev/null; then + ktap_test_fail "YNL ethtool coalesce parameters (failed to get coalesce parameters)" + return + fi + + if ! ip netns exec "$testns" $ynl_ethtool --set-coalesce "$NSIM_DEV_NAME" rx-usecs 50 2>/dev/null; then + ktap_test_fail "YNL ethtool coalesce parameters (set-coalesce command failed unexpectedly)" + return + fi + + ktap_test_pass "YNL ethtool coalesce parameters (show/set)" +} +TESTS_NO=$((TESTS_NO + 1)) + +ethtool_pause_params() +{ + if ! ip netns exec "$testns" $ynl_ethtool --show-pause "$NSIM_DEV_NAME" &>/dev/null; then + ktap_test_fail "YNL ethtool pause parameters (failed to get pause parameters)" + return + fi + + if ! ip netns exec "$testns" $ynl_ethtool --set-pause "$NSIM_DEV_NAME" tx 1 rx 1 2>/dev/null; then + ktap_test_fail "YNL ethtool pause parameters (set-pause command failed unexpectedly)" + return + fi + + ktap_test_pass "YNL ethtool pause parameters (show/set)" +} +TESTS_NO=$((TESTS_NO + 1)) + +ethtool_features_info() +{ + local features_output + + features_output=$(ip netns exec "$testns" $ynl_ethtool --show-features "$NSIM_DEV_NAME" 2>/dev/null) + + if ! echo "$features_output" | grep -q -E "(Features|offload)"; then + ktap_test_fail "YNL ethtool features info (features output missing expected content)" + return + fi + + ktap_test_pass "YNL ethtool features info (show/set)" +} +TESTS_NO=$((TESTS_NO + 1)) + +ethtool_channels_info() +{ + local channels_output + + channels_output=$(ip netns exec "$testns" $ynl_ethtool --show-channels "$NSIM_DEV_NAME" 2>/dev/null) + + if ! echo "$channels_output" | grep -q -E "(Channel|Combined|RX|TX)"; then + ktap_test_fail "YNL ethtool channels info (channels output missing expected content)" + return + fi + + if ! ip netns exec "$testns" $ynl_ethtool --set-channels "$NSIM_DEV_NAME" combined-count 1 2>/dev/null; then + ktap_test_fail "YNL ethtool channels info (set-channels command failed unexpectedly)" + return + fi + + ktap_test_pass "YNL ethtool channels info (show/set)" +} +TESTS_NO=$((TESTS_NO + 1)) + +ethtool_time_stamping() +{ + local ts_output + + ts_output=$(ip netns exec "$testns" $ynl_ethtool --show-time-stamping "$NSIM_DEV_NAME" 2>/dev/null) + + if ! echo "$ts_output" | grep -q -E "(Time stamping|timestamping|SOF_TIMESTAMPING)"; then + ktap_test_fail "YNL ethtool time stamping (time stamping output missing expected content)" + return + fi + + ktap_test_pass "YNL ethtool time stamping" +} +TESTS_NO=$((TESTS_NO + 1)) + +setup() +{ + modprobe netdevsim &> /dev/null + if ! [ -f /sys/bus/netdevsim/new_device ]; then + ktap_skip_all "netdevsim module not available" + exit "$KSFT_SKIP" + fi + + if ! ip netns add "$testns" 2>/dev/null; then + ktap_skip_all "failed to create test namespace" + exit "$KSFT_SKIP" + fi + + echo "$NSIM_ID 1" | ip netns exec "$testns" tee /sys/bus/netdevsim/new_device >/dev/null 2>&1 || { + ktap_skip_all "failed to create netdevsim device" + exit "$KSFT_SKIP" + } + + local dev + dev=$(ip netns exec "$testns" ls /sys/bus/netdevsim/devices/netdevsim$NSIM_ID/net 2>/dev/null | head -1) + if [[ -z "$dev" ]]; then + ktap_skip_all "failed to find netdevsim device" + exit "$KSFT_SKIP" + fi + + ip -netns "$testns" link set dev "$dev" name "$NSIM_DEV_NAME" 2>/dev/null || { + ktap_skip_all "failed to rename netdevsim device" + exit "$KSFT_SKIP" + } + + ip -netns "$testns" link set dev "$NSIM_DEV_NAME" up 2>/dev/null + + if ! ip -n "$testns" link add "$VETH_A" type veth peer name "$VETH_B" 2>/dev/null; then + ktap_skip_all "failed to create veth pair" + exit "$KSFT_SKIP" + fi + + ip -n "$testns" link set "$VETH_A" up 2>/dev/null + ip -n "$testns" link set "$VETH_B" up 2>/dev/null +} + +cleanup() +{ + ip netns exec "$testns" bash -c "echo $NSIM_ID > /sys/bus/netdevsim/del_device" 2>/dev/null || true + ip netns del "$testns" 2>/dev/null || true +} + +# Check if ynl-ethtool command is available +if ! command -v $ynl_ethtool &>/dev/null && [[ ! -x $ynl_ethtool ]]; then + ktap_skip_all "ynl-ethtool command not found: $ynl_ethtool" + exit "$KSFT_SKIP" +fi + +trap cleanup EXIT + +ktap_print_header +setup +ktap_set_plan "${TESTS_NO}" + +ethtool_device_info +ethtool_statistics +ethtool_ring_params +ethtool_coalesce_params +ethtool_pause_params +ethtool_features_info +ethtool_channels_info +ethtool_time_stamping + +ktap_finished diff --git a/tools/net/ynl/ynltool/.gitignore b/tools/net/ynl/ynltool/.gitignore new file mode 100644 index 000000000000..690d399c921a --- /dev/null +++ b/tools/net/ynl/ynltool/.gitignore @@ -0,0 +1,2 @@ +ynltool +*.d diff --git a/tools/net/ynl/ynltool/Makefile b/tools/net/ynl/ynltool/Makefile new file mode 100644 index 000000000000..f5b1de32daa5 --- /dev/null +++ b/tools/net/ynl/ynltool/Makefile @@ -0,0 +1,55 @@ +# SPDX-License-Identifier: GPL-2.0-only + +include ../Makefile.deps + +INSTALL ?= install +prefix ?= /usr + +CC := gcc +CFLAGS := -Wall -Wextra -Werror -O2 +ifeq ("$(DEBUG)","1") + CFLAGS += -g -fsanitize=address -fsanitize=leak -static-libasan +endif +CFLAGS += -I../lib -I../generated -I../../../include/uapi/ + +SRC_VERSION := \ + $(shell make --no-print-directory -sC ../../../.. kernelversion || \ + echo "unknown") + +CFLAGS += -DSRC_VERSION='"$(SRC_VERSION)"' + +SRCS := $(wildcard *.c) +OBJS := $(patsubst %.c,$(OUTPUT)%.o,$(SRCS)) + +YNLTOOL := $(OUTPUT)ynltool + +include $(wildcard *.d) + +all: $(YNLTOOL) + +Q = @ + +$(YNLTOOL): ../libynl.a $(OBJS) + $(Q)echo -e "\tLINK $@" + $(Q)$(CC) $(CFLAGS) -o $@ $(OBJS) ../libynl.a -lm + +%.o: %.c ../libynl.a + $(Q)echo -e "\tCC $@" + $(Q)$(COMPILE.c) -MMD -c -o $@ $< + +../libynl.a: + $(Q)$(MAKE) -C ../ + +clean: + rm -f *.o *.d *~ + +distclean: clean + rm -f $(YNLTOOL) + +bindir ?= /usr/bin + +install: $(YNLTOOL) + $(INSTALL) -m 0755 $(YNLTOOL) $(DESTDIR)$(bindir)/$(YNLTOOL) + +.PHONY: all clean distclean +.DEFAULT_GOAL=all diff --git a/tools/net/ynl/ynltool/json_writer.c b/tools/net/ynl/ynltool/json_writer.c new file mode 100644 index 000000000000..c8685e592cd3 --- /dev/null +++ b/tools/net/ynl/ynltool/json_writer.c @@ -0,0 +1,288 @@ +// SPDX-License-Identifier: (GPL-2.0-or-later OR BSD-2-Clause) +/* + * Simple streaming JSON writer + * + * This takes care of the annoying bits of JSON syntax like the commas + * after elements + * + * Authors: Stephen Hemminger <stephen@networkplumber.org> + */ + +#include <stdio.h> +#include <stdbool.h> +#include <stdarg.h> +#include <assert.h> +#include <malloc.h> +#include <inttypes.h> +#include <stdint.h> + +#include "json_writer.h" + +struct json_writer { + FILE *out; + unsigned depth; + bool pretty; + char sep; +}; + +static void jsonw_indent(json_writer_t *self) +{ + unsigned i; + for (i = 0; i < self->depth; ++i) + fputs(" ", self->out); +} + +static void jsonw_eol(json_writer_t *self) +{ + if (!self->pretty) + return; + + putc('\n', self->out); + jsonw_indent(self); +} + +static void jsonw_eor(json_writer_t *self) +{ + if (self->sep != '\0') + putc(self->sep, self->out); + self->sep = ','; +} + +static void jsonw_puts(json_writer_t *self, const char *str) +{ + putc('"', self->out); + for (; *str; ++str) + switch (*str) { + case '\t': + fputs("\\t", self->out); + break; + case '\n': + fputs("\\n", self->out); + break; + case '\r': + fputs("\\r", self->out); + break; + case '\f': + fputs("\\f", self->out); + break; + case '\b': + fputs("\\b", self->out); + break; + case '\\': + fputs("\\\\", self->out); + break; + case '"': + fputs("\\\"", self->out); + break; + default: + putc(*str, self->out); + } + putc('"', self->out); +} + +json_writer_t *jsonw_new(FILE *f) +{ + json_writer_t *self = malloc(sizeof(*self)); + if (self) { + self->out = f; + self->depth = 0; + self->pretty = false; + self->sep = '\0'; + } + return self; +} + +void jsonw_destroy(json_writer_t **self_p) +{ + json_writer_t *self = *self_p; + + assert(self->depth == 0); + fputs("\n", self->out); + fflush(self->out); + free(self); + *self_p = NULL; +} + +void jsonw_pretty(json_writer_t *self, bool on) +{ + self->pretty = on; +} + +void jsonw_reset(json_writer_t *self) +{ + assert(self->depth == 0); + self->sep = '\0'; +} + +static void jsonw_begin(json_writer_t *self, int c) +{ + jsonw_eor(self); + putc(c, self->out); + ++self->depth; + self->sep = '\0'; +} + +static void jsonw_end(json_writer_t *self, int c) +{ + assert(self->depth > 0); + + --self->depth; + if (self->sep != '\0') + jsonw_eol(self); + putc(c, self->out); + self->sep = ','; +} + +void jsonw_name(json_writer_t *self, const char *name) +{ + jsonw_eor(self); + jsonw_eol(self); + self->sep = '\0'; + jsonw_puts(self, name); + putc(':', self->out); + if (self->pretty) + putc(' ', self->out); +} + +void jsonw_vprintf_enquote(json_writer_t *self, const char *fmt, va_list ap) +{ + jsonw_eor(self); + putc('"', self->out); + vfprintf(self->out, fmt, ap); + putc('"', self->out); +} + +void jsonw_printf(json_writer_t *self, const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + jsonw_eor(self); + vfprintf(self->out, fmt, ap); + va_end(ap); +} + +void jsonw_start_object(json_writer_t *self) +{ + jsonw_begin(self, '{'); +} + +void jsonw_end_object(json_writer_t *self) +{ + jsonw_end(self, '}'); +} + +void jsonw_start_array(json_writer_t *self) +{ + jsonw_begin(self, '['); +} + +void jsonw_end_array(json_writer_t *self) +{ + jsonw_end(self, ']'); +} + +void jsonw_string(json_writer_t *self, const char *value) +{ + jsonw_eor(self); + jsonw_puts(self, value); +} + +void jsonw_bool(json_writer_t *self, bool val) +{ + jsonw_printf(self, "%s", val ? "true" : "false"); +} + +void jsonw_null(json_writer_t *self) +{ + jsonw_printf(self, "null"); +} + +void jsonw_float_fmt(json_writer_t *self, const char *fmt, double num) +{ + jsonw_printf(self, fmt, num); +} + +void jsonw_float(json_writer_t *self, double num) +{ + jsonw_printf(self, "%g", num); +} + +void jsonw_hu(json_writer_t *self, unsigned short num) +{ + jsonw_printf(self, "%hu", num); +} + +void jsonw_uint(json_writer_t *self, uint64_t num) +{ + jsonw_printf(self, "%"PRIu64, num); +} + +void jsonw_lluint(json_writer_t *self, unsigned long long int num) +{ + jsonw_printf(self, "%llu", num); +} + +void jsonw_int(json_writer_t *self, int64_t num) +{ + jsonw_printf(self, "%"PRId64, num); +} + +void jsonw_string_field(json_writer_t *self, const char *prop, const char *val) +{ + jsonw_name(self, prop); + jsonw_string(self, val); +} + +void jsonw_bool_field(json_writer_t *self, const char *prop, bool val) +{ + jsonw_name(self, prop); + jsonw_bool(self, val); +} + +void jsonw_float_field(json_writer_t *self, const char *prop, double val) +{ + jsonw_name(self, prop); + jsonw_float(self, val); +} + +void jsonw_float_field_fmt(json_writer_t *self, + const char *prop, + const char *fmt, + double val) +{ + jsonw_name(self, prop); + jsonw_float_fmt(self, fmt, val); +} + +void jsonw_uint_field(json_writer_t *self, const char *prop, uint64_t num) +{ + jsonw_name(self, prop); + jsonw_uint(self, num); +} + +void jsonw_hu_field(json_writer_t *self, const char *prop, unsigned short num) +{ + jsonw_name(self, prop); + jsonw_hu(self, num); +} + +void jsonw_lluint_field(json_writer_t *self, + const char *prop, + unsigned long long int num) +{ + jsonw_name(self, prop); + jsonw_lluint(self, num); +} + +void jsonw_int_field(json_writer_t *self, const char *prop, int64_t num) +{ + jsonw_name(self, prop); + jsonw_int(self, num); +} + +void jsonw_null_field(json_writer_t *self, const char *prop) +{ + jsonw_name(self, prop); + jsonw_null(self); +} diff --git a/tools/net/ynl/ynltool/json_writer.h b/tools/net/ynl/ynltool/json_writer.h new file mode 100644 index 000000000000..0f1e63c88f6a --- /dev/null +++ b/tools/net/ynl/ynltool/json_writer.h @@ -0,0 +1,75 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ +/* + * Simple streaming JSON writer + * + * This takes care of the annoying bits of JSON syntax like the commas + * after elements + * + * Authors: Stephen Hemminger <stephen@networkplumber.org> + */ + +#ifndef _JSON_WRITER_H_ +#define _JSON_WRITER_H_ + +#include <stdbool.h> +#include <stdint.h> +#include <stdarg.h> +#include <stdio.h> + +/* Opaque class structure */ +typedef struct json_writer json_writer_t; + +/* Create a new JSON stream */ +json_writer_t *jsonw_new(FILE *f); +/* End output to JSON stream */ +void jsonw_destroy(json_writer_t **self_p); + +/* Cause output to have pretty whitespace */ +void jsonw_pretty(json_writer_t *self, bool on); + +/* Reset separator to create new JSON */ +void jsonw_reset(json_writer_t *self); + +/* Add property name */ +void jsonw_name(json_writer_t *self, const char *name); + +/* Add value */ +void __attribute__((format(printf, 2, 0))) jsonw_vprintf_enquote(json_writer_t *self, + const char *fmt, + va_list ap); +void __attribute__((format(printf, 2, 3))) jsonw_printf(json_writer_t *self, + const char *fmt, ...); +void jsonw_string(json_writer_t *self, const char *value); +void jsonw_bool(json_writer_t *self, bool value); +void jsonw_float(json_writer_t *self, double number); +void jsonw_float_fmt(json_writer_t *self, const char *fmt, double num); +void jsonw_uint(json_writer_t *self, uint64_t number); +void jsonw_hu(json_writer_t *self, unsigned short number); +void jsonw_int(json_writer_t *self, int64_t number); +void jsonw_null(json_writer_t *self); +void jsonw_lluint(json_writer_t *self, unsigned long long int num); + +/* Useful Combinations of name and value */ +void jsonw_string_field(json_writer_t *self, const char *prop, const char *val); +void jsonw_bool_field(json_writer_t *self, const char *prop, bool value); +void jsonw_float_field(json_writer_t *self, const char *prop, double num); +void jsonw_uint_field(json_writer_t *self, const char *prop, uint64_t num); +void jsonw_hu_field(json_writer_t *self, const char *prop, unsigned short num); +void jsonw_int_field(json_writer_t *self, const char *prop, int64_t num); +void jsonw_null_field(json_writer_t *self, const char *prop); +void jsonw_lluint_field(json_writer_t *self, const char *prop, + unsigned long long int num); +void jsonw_float_field_fmt(json_writer_t *self, const char *prop, + const char *fmt, double val); + +/* Collections */ +void jsonw_start_object(json_writer_t *self); +void jsonw_end_object(json_writer_t *self); + +void jsonw_start_array(json_writer_t *self); +void jsonw_end_array(json_writer_t *self); + +/* Override default exception handling */ +typedef void (jsonw_err_handler_fn)(const char *); + +#endif /* _JSON_WRITER_H_ */ diff --git a/tools/net/ynl/ynltool/main.c b/tools/net/ynl/ynltool/main.c new file mode 100644 index 000000000000..5d0f428eed0a --- /dev/null +++ b/tools/net/ynl/ynltool/main.c @@ -0,0 +1,242 @@ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +/* Copyright (C) 2017-2018 Netronome Systems, Inc. */ +/* Copyright Meta Platforms, Inc. and affiliates */ + +#include <ctype.h> +#include <errno.h> +#include <getopt.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <stdarg.h> + +#include "main.h" + +const char *bin_name; +static int last_argc; +static char **last_argv; +static int (*last_do_help)(int argc, char **argv); +json_writer_t *json_wtr; +bool pretty_output; +bool json_output; + +static void __attribute__((noreturn)) clean_and_exit(int i) +{ + if (json_output) + jsonw_destroy(&json_wtr); + + exit(i); +} + +void usage(void) +{ + last_do_help(last_argc - 1, last_argv + 1); + + clean_and_exit(-1); +} + +static int do_help(int argc __attribute__((unused)), + char **argv __attribute__((unused))) +{ + if (json_output) { + jsonw_null(json_wtr); + return 0; + } + + fprintf(stderr, + "Usage: %s [OPTIONS] OBJECT { COMMAND | help }\n" + " %s version\n" + "\n" + " OBJECT := { page-pool | qstats }\n" + " " HELP_SPEC_OPTIONS "\n" + "", + bin_name, bin_name); + + return 0; +} + +static int do_version(int argc __attribute__((unused)), + char **argv __attribute__((unused))) +{ + if (json_output) { + jsonw_start_object(json_wtr); + jsonw_name(json_wtr, "version"); + jsonw_printf(json_wtr, SRC_VERSION); + jsonw_end_object(json_wtr); + } else { + printf("%s " SRC_VERSION "\n", bin_name); + } + return 0; +} + +static const struct cmd commands[] = { + { "help", do_help }, + { "page-pool", do_page_pool }, + { "qstats", do_qstats }, + { "version", do_version }, + { 0 } +}; + +int cmd_select(const struct cmd *cmds, int argc, char **argv, + int (*help)(int argc, char **argv)) +{ + unsigned int i; + + last_argc = argc; + last_argv = argv; + last_do_help = help; + + if (argc < 1 && cmds[0].func) + return cmds[0].func(argc, argv); + + for (i = 0; cmds[i].cmd; i++) { + if (is_prefix(*argv, cmds[i].cmd)) { + if (!cmds[i].func) { + p_err("command '%s' is not available", cmds[i].cmd); + return -1; + } + return cmds[i].func(argc - 1, argv + 1); + } + } + + help(argc - 1, argv + 1); + + return -1; +} + +bool is_prefix(const char *pfx, const char *str) +{ + if (!pfx) + return false; + if (strlen(str) < strlen(pfx)) + return false; + + return !memcmp(str, pfx, strlen(pfx)); +} + +/* Last argument MUST be NULL pointer */ +int detect_common_prefix(const char *arg, ...) +{ + unsigned int count = 0; + const char *ref; + char msg[256]; + va_list ap; + + snprintf(msg, sizeof(msg), "ambiguous prefix: '%s' could be '", arg); + va_start(ap, arg); + while ((ref = va_arg(ap, const char *))) { + if (!is_prefix(arg, ref)) + continue; + count++; + if (count > 1) + strncat(msg, "' or '", sizeof(msg) - strlen(msg) - 1); + strncat(msg, ref, sizeof(msg) - strlen(msg) - 1); + } + va_end(ap); + strncat(msg, "'", sizeof(msg) - strlen(msg) - 1); + + if (count >= 2) { + p_err("%s", msg); + return -1; + } + + return 0; +} + +void p_err(const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + if (json_output) { + jsonw_start_object(json_wtr); + jsonw_name(json_wtr, "error"); + jsonw_vprintf_enquote(json_wtr, fmt, ap); + jsonw_end_object(json_wtr); + } else { + fprintf(stderr, "Error: "); + vfprintf(stderr, fmt, ap); + fprintf(stderr, "\n"); + } + va_end(ap); +} + +void p_info(const char *fmt, ...) +{ + va_list ap; + + if (json_output) + return; + + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + fprintf(stderr, "\n"); + va_end(ap); +} + +int main(int argc, char **argv) +{ + static const struct option options[] = { + { "json", no_argument, NULL, 'j' }, + { "help", no_argument, NULL, 'h' }, + { "pretty", no_argument, NULL, 'p' }, + { "version", no_argument, NULL, 'V' }, + { 0 } + }; + bool version_requested = false; + int opt, ret; + + setlinebuf(stdout); + + last_do_help = do_help; + pretty_output = false; + json_output = false; + bin_name = "ynltool"; + + opterr = 0; + while ((opt = getopt_long(argc, argv, "Vhjp", + options, NULL)) >= 0) { + switch (opt) { + case 'V': + version_requested = true; + break; + case 'h': + return do_help(argc, argv); + case 'p': + pretty_output = true; + /* fall through */ + case 'j': + if (!json_output) { + json_wtr = jsonw_new(stdout); + if (!json_wtr) { + p_err("failed to create JSON writer"); + return -1; + } + json_output = true; + } + jsonw_pretty(json_wtr, pretty_output); + break; + default: + p_err("unrecognized option '%s'", argv[optind - 1]); + if (json_output) + clean_and_exit(-1); + else + usage(); + } + } + + argc -= optind; + argv += optind; + if (argc < 0) + usage(); + + if (version_requested) + ret = do_version(argc, argv); + else + ret = cmd_select(commands, argc, argv, do_help); + + if (json_output) + jsonw_destroy(&json_wtr); + + return ret; +} diff --git a/tools/net/ynl/ynltool/main.h b/tools/net/ynl/ynltool/main.h new file mode 100644 index 000000000000..c7039f9ac55a --- /dev/null +++ b/tools/net/ynl/ynltool/main.h @@ -0,0 +1,66 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ +/* Copyright (C) 2017-2018 Netronome Systems, Inc. */ +/* Copyright Meta Platforms, Inc. and affiliates */ + +#ifndef __YNLTOOL_H +#define __YNLTOOL_H + +#ifndef _GNU_SOURCE +#define _GNU_SOURCE +#endif +#include <stdbool.h> +#include <stdio.h> +#include <stdlib.h> +#include <errno.h> +#include <string.h> + +#include "json_writer.h" + +#define NEXT_ARG() ({ argc--; argv++; if (argc < 0) usage(); }) +#define NEXT_ARGP() ({ (*argc)--; (*argv)++; if (*argc < 0) usage(); }) +#define BAD_ARG() ({ p_err("what is '%s'?", *argv); -1; }) +#define GET_ARG() ({ argc--; *argv++; }) +#define REQ_ARGS(cnt) \ + ({ \ + int _cnt = (cnt); \ + bool _res; \ + \ + if (argc < _cnt) { \ + p_err("'%s' needs at least %d arguments, %d found", \ + argv[-1], _cnt, argc); \ + _res = false; \ + } else { \ + _res = true; \ + } \ + _res; \ + }) + +#define HELP_SPEC_OPTIONS \ + "OPTIONS := { {-j|--json} [{-p|--pretty}] }" + +extern const char *bin_name; + +extern json_writer_t *json_wtr; +extern bool json_output; +extern bool pretty_output; + +void __attribute__((format(printf, 1, 2))) p_err(const char *fmt, ...); +void __attribute__((format(printf, 1, 2))) p_info(const char *fmt, ...); + +bool is_prefix(const char *pfx, const char *str); +int detect_common_prefix(const char *arg, ...); +void usage(void) __attribute__((noreturn)); + +struct cmd { + const char *cmd; + int (*func)(int argc, char **argv); +}; + +int cmd_select(const struct cmd *cmds, int argc, char **argv, + int (*help)(int argc, char **argv)); + +/* subcommands */ +int do_page_pool(int argc, char **argv); +int do_qstats(int argc, char **argv); + +#endif /* __YNLTOOL_H */ diff --git a/tools/net/ynl/ynltool/page-pool.c b/tools/net/ynl/ynltool/page-pool.c new file mode 100644 index 000000000000..4b24492abab7 --- /dev/null +++ b/tools/net/ynl/ynltool/page-pool.c @@ -0,0 +1,461 @@ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <errno.h> +#include <net/if.h> + +#include <ynl.h> +#include "netdev-user.h" + +#include "main.h" + +struct pp_stat { + unsigned int ifc; + + struct { + unsigned int cnt; + size_t refs, bytes; + } live[2]; + + size_t alloc_slow, alloc_fast, recycle_ring, recycle_cache; +}; + +struct pp_stats_array { + unsigned int i, max; + struct pp_stat *s; +}; + +static struct pp_stat *find_ifc(struct pp_stats_array *a, unsigned int ifindex) +{ + unsigned int i; + + for (i = 0; i < a->i; i++) { + if (a->s[i].ifc == ifindex) + return &a->s[i]; + } + + a->i++; + if (a->i == a->max) { + a->max *= 2; + a->s = reallocarray(a->s, a->max, sizeof(*a->s)); + } + a->s[i].ifc = ifindex; + return &a->s[i]; +} + +static void count_pool(struct pp_stat *s, unsigned int l, + struct netdev_page_pool_get_rsp *pp) +{ + s->live[l].cnt++; + if (pp->_present.inflight) + s->live[l].refs += pp->inflight; + if (pp->_present.inflight_mem) + s->live[l].bytes += pp->inflight_mem; +} + +/* We don't know how many pages are sitting in cache and ring + * so we will under-count the recycling rate a bit. + */ +static void print_json_recycling_stats(struct pp_stat *s) +{ + double recycle; + + if (s->alloc_fast + s->alloc_slow) { + recycle = (double)(s->recycle_ring + s->recycle_cache) / + (s->alloc_fast + s->alloc_slow) * 100; + jsonw_float_field(json_wtr, "recycling_pct", recycle); + } + + jsonw_name(json_wtr, "alloc"); + jsonw_start_object(json_wtr); + jsonw_uint_field(json_wtr, "slow", s->alloc_slow); + jsonw_uint_field(json_wtr, "fast", s->alloc_fast); + jsonw_end_object(json_wtr); + + jsonw_name(json_wtr, "recycle"); + jsonw_start_object(json_wtr); + jsonw_uint_field(json_wtr, "ring", s->recycle_ring); + jsonw_uint_field(json_wtr, "cache", s->recycle_cache); + jsonw_end_object(json_wtr); +} + +static void print_plain_recycling_stats(struct pp_stat *s) +{ + double recycle; + + if (s->alloc_fast + s->alloc_slow) { + recycle = (double)(s->recycle_ring + s->recycle_cache) / + (s->alloc_fast + s->alloc_slow) * 100; + printf("recycling: %.1lf%% (alloc: %zu:%zu recycle: %zu:%zu)", + recycle, s->alloc_slow, s->alloc_fast, + s->recycle_ring, s->recycle_cache); + } +} + +static void print_json_stats(struct pp_stats_array *a) +{ + jsonw_start_array(json_wtr); + + for (unsigned int i = 0; i < a->i; i++) { + char ifname[IF_NAMESIZE]; + struct pp_stat *s = &a->s[i]; + const char *name; + + jsonw_start_object(json_wtr); + + if (!s->ifc) { + jsonw_string_field(json_wtr, "ifname", "<orphan>"); + jsonw_uint_field(json_wtr, "ifindex", 0); + } else { + name = if_indextoname(s->ifc, ifname); + if (name) + jsonw_string_field(json_wtr, "ifname", name); + jsonw_uint_field(json_wtr, "ifindex", s->ifc); + } + + jsonw_uint_field(json_wtr, "page_pools", s->live[1].cnt); + jsonw_uint_field(json_wtr, "zombies", s->live[0].cnt); + + jsonw_name(json_wtr, "live"); + jsonw_start_object(json_wtr); + jsonw_uint_field(json_wtr, "refs", s->live[1].refs); + jsonw_uint_field(json_wtr, "bytes", s->live[1].bytes); + jsonw_end_object(json_wtr); + + jsonw_name(json_wtr, "zombie"); + jsonw_start_object(json_wtr); + jsonw_uint_field(json_wtr, "refs", s->live[0].refs); + jsonw_uint_field(json_wtr, "bytes", s->live[0].bytes); + jsonw_end_object(json_wtr); + + if (s->alloc_fast || s->alloc_slow) + print_json_recycling_stats(s); + + jsonw_end_object(json_wtr); + } + + jsonw_end_array(json_wtr); +} + +static void print_plain_stats(struct pp_stats_array *a) +{ + for (unsigned int i = 0; i < a->i; i++) { + char ifname[IF_NAMESIZE]; + struct pp_stat *s = &a->s[i]; + const char *name; + + if (!s->ifc) { + printf("<orphan>\t"); + } else { + name = if_indextoname(s->ifc, ifname); + if (name) + printf("%8s", name); + printf("[%u]\t", s->ifc); + } + + printf("page pools: %u (zombies: %u)\n", + s->live[1].cnt, s->live[0].cnt); + printf("\t\trefs: %zu bytes: %zu (refs: %zu bytes: %zu)\n", + s->live[1].refs, s->live[1].bytes, + s->live[0].refs, s->live[0].bytes); + + if (s->alloc_fast || s->alloc_slow) { + printf("\t\t"); + print_plain_recycling_stats(s); + printf("\n"); + } + } +} + +static bool +find_pool_stat_in_list(struct netdev_page_pool_stats_get_list *pp_stats, + __u64 pool_id, struct pp_stat *pstat) +{ + ynl_dump_foreach(pp_stats, pp) { + if (!pp->_present.info || !pp->info._present.id) + continue; + if (pp->info.id != pool_id) + continue; + + memset(pstat, 0, sizeof(*pstat)); + if (pp->_present.alloc_fast) + pstat->alloc_fast = pp->alloc_fast; + if (pp->_present.alloc_refill) + pstat->alloc_fast += pp->alloc_refill; + if (pp->_present.alloc_slow) + pstat->alloc_slow = pp->alloc_slow; + if (pp->_present.recycle_ring) + pstat->recycle_ring = pp->recycle_ring; + if (pp->_present.recycle_cached) + pstat->recycle_cache = pp->recycle_cached; + return true; + } + return false; +} + +static void +print_json_pool_list(struct netdev_page_pool_get_list *pools, + struct netdev_page_pool_stats_get_list *pp_stats, + bool zombies_only) +{ + jsonw_start_array(json_wtr); + + ynl_dump_foreach(pools, pp) { + char ifname[IF_NAMESIZE]; + struct pp_stat pstat; + const char *name; + + if (zombies_only && !pp->_present.detach_time) + continue; + + jsonw_start_object(json_wtr); + + jsonw_uint_field(json_wtr, "id", pp->id); + + if (pp->_present.ifindex) { + name = if_indextoname(pp->ifindex, ifname); + if (name) + jsonw_string_field(json_wtr, "ifname", name); + jsonw_uint_field(json_wtr, "ifindex", pp->ifindex); + } + + if (pp->_present.napi_id) + jsonw_uint_field(json_wtr, "napi_id", pp->napi_id); + + if (pp->_present.inflight) + jsonw_uint_field(json_wtr, "refs", pp->inflight); + + if (pp->_present.inflight_mem) + jsonw_uint_field(json_wtr, "bytes", pp->inflight_mem); + + if (pp->_present.detach_time) + jsonw_uint_field(json_wtr, "detach_time", pp->detach_time); + + if (pp->_present.dmabuf) + jsonw_uint_field(json_wtr, "dmabuf", pp->dmabuf); + + if (find_pool_stat_in_list(pp_stats, pp->id, &pstat) && + (pstat.alloc_fast || pstat.alloc_slow)) + print_json_recycling_stats(&pstat); + + jsonw_end_object(json_wtr); + } + + jsonw_end_array(json_wtr); +} + +static void +print_plain_pool_list(struct netdev_page_pool_get_list *pools, + struct netdev_page_pool_stats_get_list *pp_stats, + bool zombies_only) +{ + ynl_dump_foreach(pools, pp) { + char ifname[IF_NAMESIZE]; + struct pp_stat pstat; + const char *name; + + if (zombies_only && !pp->_present.detach_time) + continue; + + printf("pool id: %llu", pp->id); + + if (pp->_present.ifindex) { + name = if_indextoname(pp->ifindex, ifname); + if (name) + printf(" dev: %s", name); + printf("[%u]", pp->ifindex); + } + + if (pp->_present.napi_id) + printf(" napi: %llu", pp->napi_id); + + printf("\n"); + + if (pp->_present.inflight || pp->_present.inflight_mem) { + printf(" inflight:"); + if (pp->_present.inflight) + printf(" %llu pages", pp->inflight); + if (pp->_present.inflight_mem) + printf(" %llu bytes", pp->inflight_mem); + printf("\n"); + } + + if (pp->_present.detach_time) + printf(" detached: %llu\n", pp->detach_time); + + if (pp->_present.dmabuf) + printf(" dmabuf: %u\n", pp->dmabuf); + + if (find_pool_stat_in_list(pp_stats, pp->id, &pstat) && + (pstat.alloc_fast || pstat.alloc_slow)) { + printf(" "); + print_plain_recycling_stats(&pstat); + printf("\n"); + } + } +} + +static void aggregate_device_stats(struct pp_stats_array *a, + struct netdev_page_pool_get_list *pools, + struct netdev_page_pool_stats_get_list *pp_stats) +{ + ynl_dump_foreach(pools, pp) { + struct pp_stat *s = find_ifc(a, pp->ifindex); + + count_pool(s, 1, pp); + if (pp->_present.detach_time) + count_pool(s, 0, pp); + } + + ynl_dump_foreach(pp_stats, pp) { + struct pp_stat *s = find_ifc(a, pp->info.ifindex); + + if (pp->_present.alloc_fast) + s->alloc_fast += pp->alloc_fast; + if (pp->_present.alloc_refill) + s->alloc_fast += pp->alloc_refill; + if (pp->_present.alloc_slow) + s->alloc_slow += pp->alloc_slow; + if (pp->_present.recycle_ring) + s->recycle_ring += pp->recycle_ring; + if (pp->_present.recycle_cached) + s->recycle_cache += pp->recycle_cached; + } +} + +static int do_stats(int argc, char **argv) +{ + struct netdev_page_pool_stats_get_list *pp_stats; + struct netdev_page_pool_get_list *pools; + enum { + GROUP_BY_DEVICE, + GROUP_BY_POOL, + } group_by = GROUP_BY_DEVICE; + bool zombies_only = false; + struct pp_stats_array a = {}; + struct ynl_error yerr; + struct ynl_sock *ys; + int ret = 0; + + /* Parse options */ + while (argc > 0) { + if (is_prefix(*argv, "group-by")) { + NEXT_ARG(); + + if (!REQ_ARGS(1)) + return -1; + + if (is_prefix(*argv, "device")) { + group_by = GROUP_BY_DEVICE; + } else if (is_prefix(*argv, "pp") || + is_prefix(*argv, "page-pool") || + is_prefix(*argv, "none")) { + group_by = GROUP_BY_POOL; + } else { + p_err("invalid group-by value '%s'", *argv); + return -1; + } + NEXT_ARG(); + } else if (is_prefix(*argv, "zombies")) { + zombies_only = true; + group_by = GROUP_BY_POOL; + NEXT_ARG(); + } else { + p_err("unknown option '%s'", *argv); + return -1; + } + } + + ys = ynl_sock_create(&ynl_netdev_family, &yerr); + if (!ys) { + p_err("YNL: %s", yerr.msg); + return -1; + } + + pools = netdev_page_pool_get_dump(ys); + if (!pools) { + p_err("failed to get page pools: %s", ys->err.msg); + ret = -1; + goto exit_close; + } + + pp_stats = netdev_page_pool_stats_get_dump(ys); + if (!pp_stats) { + p_err("failed to get page pool stats: %s", ys->err.msg); + ret = -1; + goto exit_free_pp_list; + } + + /* If grouping by pool, print individual pools */ + if (group_by == GROUP_BY_POOL) { + if (json_output) + print_json_pool_list(pools, pp_stats, zombies_only); + else + print_plain_pool_list(pools, pp_stats, zombies_only); + } else { + /* Aggregated stats mode (group-by device) */ + a.max = 64; + a.s = calloc(a.max, sizeof(*a.s)); + if (!a.s) { + p_err("failed to allocate stats array"); + ret = -1; + goto exit_free_stats_list; + } + + aggregate_device_stats(&a, pools, pp_stats); + + if (json_output) + print_json_stats(&a); + else + print_plain_stats(&a); + + free(a.s); + } + +exit_free_stats_list: + netdev_page_pool_stats_get_list_free(pp_stats); +exit_free_pp_list: + netdev_page_pool_get_list_free(pools); +exit_close: + ynl_sock_destroy(ys); + return ret; +} + +static int do_help(int argc __attribute__((unused)), + char **argv __attribute__((unused))) +{ + if (json_output) { + jsonw_null(json_wtr); + return 0; + } + + fprintf(stderr, + "Usage: %s page-pool { COMMAND | help }\n" + " %s page-pool stats [ OPTIONS ]\n" + "\n" + " OPTIONS := { group-by { device | page-pool | none } | zombies }\n" + "\n" + " stats - Display page pool statistics\n" + " stats group-by device - Group statistics by network device (default)\n" + " stats group-by page-pool | pp | none\n" + " - Show individual page pool details (no grouping)\n" + " stats zombies - Show only zombie page pools (detached but with\n" + " pages in flight). Implies group-by page-pool.\n" + "", + bin_name, bin_name); + + return 0; +} + +static const struct cmd page_pool_cmds[] = { + { "help", do_help }, + { "stats", do_stats }, + { 0 } +}; + +int do_page_pool(int argc, char **argv) +{ + return cmd_select(page_pool_cmds, argc, argv, do_help); +} diff --git a/tools/net/ynl/ynltool/qstats.c b/tools/net/ynl/ynltool/qstats.c new file mode 100644 index 000000000000..31fb45709ffa --- /dev/null +++ b/tools/net/ynl/ynltool/qstats.c @@ -0,0 +1,621 @@ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <errno.h> +#include <net/if.h> +#include <math.h> + +#include <ynl.h> +#include "netdev-user.h" + +#include "main.h" + +static enum netdev_qstats_scope scope; /* default - device */ + +struct queue_balance { + unsigned int ifindex; + enum netdev_queue_type type; + unsigned int queue_count; + __u64 *rx_packets; + __u64 *rx_bytes; + __u64 *tx_packets; + __u64 *tx_bytes; +}; + +static void print_json_qstats(struct netdev_qstats_get_list *qstats) +{ + jsonw_start_array(json_wtr); + + ynl_dump_foreach(qstats, qs) { + char ifname[IF_NAMESIZE]; + const char *name; + + jsonw_start_object(json_wtr); + + name = if_indextoname(qs->ifindex, ifname); + if (name) + jsonw_string_field(json_wtr, "ifname", name); + jsonw_uint_field(json_wtr, "ifindex", qs->ifindex); + + if (qs->_present.queue_type) + jsonw_string_field(json_wtr, "queue-type", + netdev_queue_type_str(qs->queue_type)); + if (qs->_present.queue_id) + jsonw_uint_field(json_wtr, "queue-id", qs->queue_id); + + if (qs->_present.rx_packets || qs->_present.rx_bytes || + qs->_present.rx_alloc_fail || qs->_present.rx_hw_drops || + qs->_present.rx_csum_complete || qs->_present.rx_hw_gro_packets) { + jsonw_name(json_wtr, "rx"); + jsonw_start_object(json_wtr); + if (qs->_present.rx_packets) + jsonw_uint_field(json_wtr, "packets", qs->rx_packets); + if (qs->_present.rx_bytes) + jsonw_uint_field(json_wtr, "bytes", qs->rx_bytes); + if (qs->_present.rx_alloc_fail) + jsonw_uint_field(json_wtr, "alloc-fail", qs->rx_alloc_fail); + if (qs->_present.rx_hw_drops) + jsonw_uint_field(json_wtr, "hw-drops", qs->rx_hw_drops); + if (qs->_present.rx_hw_drop_overruns) + jsonw_uint_field(json_wtr, "hw-drop-overruns", qs->rx_hw_drop_overruns); + if (qs->_present.rx_hw_drop_ratelimits) + jsonw_uint_field(json_wtr, "hw-drop-ratelimits", qs->rx_hw_drop_ratelimits); + if (qs->_present.rx_csum_complete) + jsonw_uint_field(json_wtr, "csum-complete", qs->rx_csum_complete); + if (qs->_present.rx_csum_unnecessary) + jsonw_uint_field(json_wtr, "csum-unnecessary", qs->rx_csum_unnecessary); + if (qs->_present.rx_csum_none) + jsonw_uint_field(json_wtr, "csum-none", qs->rx_csum_none); + if (qs->_present.rx_csum_bad) + jsonw_uint_field(json_wtr, "csum-bad", qs->rx_csum_bad); + if (qs->_present.rx_hw_gro_packets) + jsonw_uint_field(json_wtr, "hw-gro-packets", qs->rx_hw_gro_packets); + if (qs->_present.rx_hw_gro_bytes) + jsonw_uint_field(json_wtr, "hw-gro-bytes", qs->rx_hw_gro_bytes); + if (qs->_present.rx_hw_gro_wire_packets) + jsonw_uint_field(json_wtr, "hw-gro-wire-packets", qs->rx_hw_gro_wire_packets); + if (qs->_present.rx_hw_gro_wire_bytes) + jsonw_uint_field(json_wtr, "hw-gro-wire-bytes", qs->rx_hw_gro_wire_bytes); + jsonw_end_object(json_wtr); + } + + if (qs->_present.tx_packets || qs->_present.tx_bytes || + qs->_present.tx_hw_drops || qs->_present.tx_csum_none || + qs->_present.tx_hw_gso_packets) { + jsonw_name(json_wtr, "tx"); + jsonw_start_object(json_wtr); + if (qs->_present.tx_packets) + jsonw_uint_field(json_wtr, "packets", qs->tx_packets); + if (qs->_present.tx_bytes) + jsonw_uint_field(json_wtr, "bytes", qs->tx_bytes); + if (qs->_present.tx_hw_drops) + jsonw_uint_field(json_wtr, "hw-drops", qs->tx_hw_drops); + if (qs->_present.tx_hw_drop_errors) + jsonw_uint_field(json_wtr, "hw-drop-errors", qs->tx_hw_drop_errors); + if (qs->_present.tx_hw_drop_ratelimits) + jsonw_uint_field(json_wtr, "hw-drop-ratelimits", qs->tx_hw_drop_ratelimits); + if (qs->_present.tx_csum_none) + jsonw_uint_field(json_wtr, "csum-none", qs->tx_csum_none); + if (qs->_present.tx_needs_csum) + jsonw_uint_field(json_wtr, "needs-csum", qs->tx_needs_csum); + if (qs->_present.tx_hw_gso_packets) + jsonw_uint_field(json_wtr, "hw-gso-packets", qs->tx_hw_gso_packets); + if (qs->_present.tx_hw_gso_bytes) + jsonw_uint_field(json_wtr, "hw-gso-bytes", qs->tx_hw_gso_bytes); + if (qs->_present.tx_hw_gso_wire_packets) + jsonw_uint_field(json_wtr, "hw-gso-wire-packets", qs->tx_hw_gso_wire_packets); + if (qs->_present.tx_hw_gso_wire_bytes) + jsonw_uint_field(json_wtr, "hw-gso-wire-bytes", qs->tx_hw_gso_wire_bytes); + if (qs->_present.tx_stop) + jsonw_uint_field(json_wtr, "stop", qs->tx_stop); + if (qs->_present.tx_wake) + jsonw_uint_field(json_wtr, "wake", qs->tx_wake); + jsonw_end_object(json_wtr); + } + + jsonw_end_object(json_wtr); + } + + jsonw_end_array(json_wtr); +} + +static void print_one(bool present, const char *name, unsigned long long val, + int *line) +{ + if (!present) + return; + + if (!*line) { + printf(" "); + ++(*line); + } + + /* Don't waste space on tx- and rx- prefix, its implied by queue type */ + if (scope == NETDEV_QSTATS_SCOPE_QUEUE && + (name[0] == 'r' || name[0] == 't') && + name[1] == 'x' && name[2] == '-') + name += 3; + + printf(" %15s: %15llu", name, val); + + if (++(*line) == 3) { + printf("\n"); + *line = 0; + } +} + +static void print_plain_qstats(struct netdev_qstats_get_list *qstats) +{ + ynl_dump_foreach(qstats, qs) { + char ifname[IF_NAMESIZE]; + const char *name; + int n; + + name = if_indextoname(qs->ifindex, ifname); + if (name) + printf("%s", name); + else + printf("ifindex:%u", qs->ifindex); + + if (qs->_present.queue_type && qs->_present.queue_id) + printf("\t%s-%-3u", + netdev_queue_type_str(qs->queue_type), + qs->queue_id); + else + printf("\t "); + + n = 1; + + /* Basic counters */ + print_one(qs->_present.rx_packets, "rx-packets", qs->rx_packets, &n); + print_one(qs->_present.rx_bytes, "rx-bytes", qs->rx_bytes, &n); + print_one(qs->_present.tx_packets, "tx-packets", qs->tx_packets, &n); + print_one(qs->_present.tx_bytes, "tx-bytes", qs->tx_bytes, &n); + + /* RX error/drop counters */ + print_one(qs->_present.rx_alloc_fail, "rx-alloc-fail", + qs->rx_alloc_fail, &n); + print_one(qs->_present.rx_hw_drops, "rx-hw-drops", + qs->rx_hw_drops, &n); + print_one(qs->_present.rx_hw_drop_overruns, "rx-hw-drop-overruns", + qs->rx_hw_drop_overruns, &n); + print_one(qs->_present.rx_hw_drop_ratelimits, "rx-hw-drop-ratelimits", + qs->rx_hw_drop_ratelimits, &n); + + /* RX checksum counters */ + print_one(qs->_present.rx_csum_complete, "rx-csum-complete", + qs->rx_csum_complete, &n); + print_one(qs->_present.rx_csum_unnecessary, "rx-csum-unnecessary", + qs->rx_csum_unnecessary, &n); + print_one(qs->_present.rx_csum_none, "rx-csum-none", + qs->rx_csum_none, &n); + print_one(qs->_present.rx_csum_bad, "rx-csum-bad", + qs->rx_csum_bad, &n); + + /* RX GRO counters */ + print_one(qs->_present.rx_hw_gro_packets, "rx-hw-gro-packets", + qs->rx_hw_gro_packets, &n); + print_one(qs->_present.rx_hw_gro_bytes, "rx-hw-gro-bytes", + qs->rx_hw_gro_bytes, &n); + print_one(qs->_present.rx_hw_gro_wire_packets, "rx-hw-gro-wire-packets", + qs->rx_hw_gro_wire_packets, &n); + print_one(qs->_present.rx_hw_gro_wire_bytes, "rx-hw-gro-wire-bytes", + qs->rx_hw_gro_wire_bytes, &n); + + /* TX error/drop counters */ + print_one(qs->_present.tx_hw_drops, "tx-hw-drops", + qs->tx_hw_drops, &n); + print_one(qs->_present.tx_hw_drop_errors, "tx-hw-drop-errors", + qs->tx_hw_drop_errors, &n); + print_one(qs->_present.tx_hw_drop_ratelimits, "tx-hw-drop-ratelimits", + qs->tx_hw_drop_ratelimits, &n); + + /* TX checksum counters */ + print_one(qs->_present.tx_csum_none, "tx-csum-none", + qs->tx_csum_none, &n); + print_one(qs->_present.tx_needs_csum, "tx-needs-csum", + qs->tx_needs_csum, &n); + + /* TX GSO counters */ + print_one(qs->_present.tx_hw_gso_packets, "tx-hw-gso-packets", + qs->tx_hw_gso_packets, &n); + print_one(qs->_present.tx_hw_gso_bytes, "tx-hw-gso-bytes", + qs->tx_hw_gso_bytes, &n); + print_one(qs->_present.tx_hw_gso_wire_packets, "tx-hw-gso-wire-packets", + qs->tx_hw_gso_wire_packets, &n); + print_one(qs->_present.tx_hw_gso_wire_bytes, "tx-hw-gso-wire-bytes", + qs->tx_hw_gso_wire_bytes, &n); + + /* TX queue control */ + print_one(qs->_present.tx_stop, "tx-stop", qs->tx_stop, &n); + print_one(qs->_present.tx_wake, "tx-wake", qs->tx_wake, &n); + + if (n) + printf("\n"); + } +} + +static int do_show(int argc, char **argv) +{ + struct netdev_qstats_get_list *qstats; + struct netdev_qstats_get_req *req; + struct ynl_error yerr; + struct ynl_sock *ys; + int ret = 0; + + /* Parse options */ + while (argc > 0) { + if (is_prefix(*argv, "scope") || is_prefix(*argv, "group-by")) { + NEXT_ARG(); + + if (!REQ_ARGS(1)) + return -1; + + if (is_prefix(*argv, "queue")) { + scope = NETDEV_QSTATS_SCOPE_QUEUE; + } else if (is_prefix(*argv, "device")) { + scope = 0; + } else { + p_err("invalid scope value '%s'", *argv); + return -1; + } + NEXT_ARG(); + } else { + p_err("unknown option '%s'", *argv); + return -1; + } + } + + ys = ynl_sock_create(&ynl_netdev_family, &yerr); + if (!ys) { + p_err("YNL: %s", yerr.msg); + return -1; + } + + req = netdev_qstats_get_req_alloc(); + if (!req) { + p_err("failed to allocate qstats request"); + ret = -1; + goto exit_close; + } + + if (scope) + netdev_qstats_get_req_set_scope(req, scope); + + qstats = netdev_qstats_get_dump(ys, req); + netdev_qstats_get_req_free(req); + if (!qstats) { + p_err("failed to get queue stats: %s", ys->err.msg); + ret = -1; + goto exit_close; + } + + /* Print the stats as returned by the kernel */ + if (json_output) + print_json_qstats(qstats); + else + print_plain_qstats(qstats); + + netdev_qstats_get_list_free(qstats); +exit_close: + ynl_sock_destroy(ys); + return ret; +} + +static void compute_stats(__u64 *values, unsigned int count, + double *mean, double *stddev, __u64 *min, __u64 *max) +{ + double sum = 0.0, variance = 0.0; + unsigned int i; + + *min = ~0ULL; + *max = 0; + + if (count == 0) { + *mean = 0; + *stddev = 0; + *min = 0; + return; + } + + for (i = 0; i < count; i++) { + sum += values[i]; + if (values[i] < *min) + *min = values[i]; + if (values[i] > *max) + *max = values[i]; + } + + *mean = sum / count; + + if (count > 1) { + for (i = 0; i < count; i++) { + double diff = values[i] - *mean; + + variance += diff * diff; + } + *stddev = sqrt(variance / (count - 1)); + } else { + *stddev = 0; + } +} + +static void print_balance_stats(const char *name, enum netdev_queue_type type, + __u64 *values, unsigned int count) +{ + double mean, stddev, cv, ns; + __u64 min, max; + + if ((name[0] == 'r' && type != NETDEV_QUEUE_TYPE_RX) || + (name[0] == 't' && type != NETDEV_QUEUE_TYPE_TX)) + return; + + compute_stats(values, count, &mean, &stddev, &min, &max); + + cv = mean > 0 ? (stddev / mean) * 100.0 : 0.0; + ns = min + max > 0 ? (double)2 * (max - min) / (max + min) * 100 : 0.0; + + printf(" %-12s: cv=%.1f%% ns=%.1f%% stddev=%.0f\n", + name, cv, ns, stddev); + printf(" %-12s min=%llu max=%llu mean=%.0f\n", + "", min, max, mean); +} + +static void +print_balance_stats_json(const char *name, enum netdev_queue_type type, + __u64 *values, unsigned int count) +{ + double mean, stddev, cv, ns; + __u64 min, max; + + if ((name[0] == 'r' && type != NETDEV_QUEUE_TYPE_RX) || + (name[0] == 't' && type != NETDEV_QUEUE_TYPE_TX)) + return; + + compute_stats(values, count, &mean, &stddev, &min, &max); + + cv = mean > 0 ? (stddev / mean) * 100.0 : 0.0; + ns = min + max > 0 ? (double)2 * (max - min) / (max + min) * 100 : 0.0; + + jsonw_name(json_wtr, name); + jsonw_start_object(json_wtr); + jsonw_uint_field(json_wtr, "queue-count", count); + jsonw_uint_field(json_wtr, "min", min); + jsonw_uint_field(json_wtr, "max", max); + jsonw_float_field(json_wtr, "mean", mean); + jsonw_float_field(json_wtr, "stddev", stddev); + jsonw_float_field(json_wtr, "coefficient-of-variation", cv); + jsonw_float_field(json_wtr, "normalized-spread", ns); + jsonw_end_object(json_wtr); +} + +static int cmp_ifindex_type(const void *a, const void *b) +{ + const struct netdev_qstats_get_rsp *qa = a; + const struct netdev_qstats_get_rsp *qb = b; + + if (qa->ifindex != qb->ifindex) + return qa->ifindex - qb->ifindex; + if (qa->queue_type != qb->queue_type) + return qa->queue_type - qb->queue_type; + return qa->queue_id - qb->queue_id; +} + +static int do_balance(int argc, char **argv __attribute__((unused))) +{ + struct netdev_qstats_get_list *qstats; + struct netdev_qstats_get_req *req; + struct netdev_qstats_get_rsp **sorted; + struct ynl_error yerr; + struct ynl_sock *ys; + unsigned int count = 0; + unsigned int i, j; + int ret = 0; + + if (argc > 0) { + p_err("balance command takes no arguments"); + return -1; + } + + ys = ynl_sock_create(&ynl_netdev_family, &yerr); + if (!ys) { + p_err("YNL: %s", yerr.msg); + return -1; + } + + req = netdev_qstats_get_req_alloc(); + if (!req) { + p_err("failed to allocate qstats request"); + ret = -1; + goto exit_close; + } + + /* Always use queue scope for balance analysis */ + netdev_qstats_get_req_set_scope(req, NETDEV_QSTATS_SCOPE_QUEUE); + + qstats = netdev_qstats_get_dump(ys, req); + netdev_qstats_get_req_free(req); + if (!qstats) { + p_err("failed to get queue stats: %s", ys->err.msg); + ret = -1; + goto exit_close; + } + + /* Count and sort queues */ + ynl_dump_foreach(qstats, qs) + count++; + + if (count == 0) { + if (json_output) + jsonw_start_array(json_wtr); + else + printf("No queue statistics available\n"); + goto exit_free_qstats; + } + + sorted = calloc(count, sizeof(*sorted)); + if (!sorted) { + p_err("failed to allocate sorted array"); + ret = -1; + goto exit_free_qstats; + } + + i = 0; + ynl_dump_foreach(qstats, qs) + sorted[i++] = qs; + + qsort(sorted, count, sizeof(*sorted), cmp_ifindex_type); + + if (json_output) + jsonw_start_array(json_wtr); + + /* Process each device/queue-type combination */ + i = 0; + while (i < count) { + __u64 *rx_packets, *rx_bytes, *tx_packets, *tx_bytes; + enum netdev_queue_type type = sorted[i]->queue_type; + unsigned int ifindex = sorted[i]->ifindex; + unsigned int queue_count = 0; + char ifname[IF_NAMESIZE]; + const char *name; + + /* Count queues for this device/type */ + for (j = i; j < count && sorted[j]->ifindex == ifindex && + sorted[j]->queue_type == type; j++) + queue_count++; + + /* Skip if no packets/bytes (inactive queues) */ + if (!sorted[i]->_present.rx_packets && + !sorted[i]->_present.rx_bytes && + !sorted[i]->_present.tx_packets && + !sorted[i]->_present.tx_bytes) + goto next_ifc; + + /* Allocate arrays for statistics */ + rx_packets = calloc(queue_count, sizeof(*rx_packets)); + rx_bytes = calloc(queue_count, sizeof(*rx_bytes)); + tx_packets = calloc(queue_count, sizeof(*tx_packets)); + tx_bytes = calloc(queue_count, sizeof(*tx_bytes)); + + if (!rx_packets || !rx_bytes || !tx_packets || !tx_bytes) { + p_err("failed to allocate statistics arrays"); + free(rx_packets); + free(rx_bytes); + free(tx_packets); + free(tx_bytes); + ret = -1; + goto exit_free_sorted; + } + + /* Collect statistics */ + for (j = 0; j < queue_count; j++) { + rx_packets[j] = sorted[i + j]->_present.rx_packets ? + sorted[i + j]->rx_packets : 0; + rx_bytes[j] = sorted[i + j]->_present.rx_bytes ? + sorted[i + j]->rx_bytes : 0; + tx_packets[j] = sorted[i + j]->_present.tx_packets ? + sorted[i + j]->tx_packets : 0; + tx_bytes[j] = sorted[i + j]->_present.tx_bytes ? + sorted[i + j]->tx_bytes : 0; + } + + name = if_indextoname(ifindex, ifname); + + if (json_output) { + jsonw_start_object(json_wtr); + if (name) + jsonw_string_field(json_wtr, "ifname", name); + jsonw_uint_field(json_wtr, "ifindex", ifindex); + jsonw_string_field(json_wtr, "queue-type", + netdev_queue_type_str(type)); + + print_balance_stats_json("rx-packets", type, + rx_packets, queue_count); + print_balance_stats_json("rx-bytes", type, + rx_bytes, queue_count); + print_balance_stats_json("tx-packets", type, + tx_packets, queue_count); + print_balance_stats_json("tx-bytes", type, + tx_bytes, queue_count); + + jsonw_end_object(json_wtr); + } else { + if (name) + printf("%s", name); + else + printf("ifindex:%u", ifindex); + printf(" %s %d queues:\n", + netdev_queue_type_str(type), queue_count); + + print_balance_stats("rx-packets", type, + rx_packets, queue_count); + print_balance_stats("rx-bytes", type, + rx_bytes, queue_count); + print_balance_stats("tx-packets", type, + tx_packets, queue_count); + print_balance_stats("tx-bytes", type, + tx_bytes, queue_count); + printf("\n"); + } + + free(rx_packets); + free(rx_bytes); + free(tx_packets); + free(tx_bytes); + +next_ifc: + i += queue_count; + } + + if (json_output) + jsonw_end_array(json_wtr); + +exit_free_sorted: + free(sorted); +exit_free_qstats: + netdev_qstats_get_list_free(qstats); +exit_close: + ynl_sock_destroy(ys); + return ret; +} + +static int do_help(int argc __attribute__((unused)), + char **argv __attribute__((unused))) +{ + if (json_output) { + jsonw_null(json_wtr); + return 0; + } + + fprintf(stderr, + "Usage: %s qstats { COMMAND | help }\n" + " %s qstats [ show ] [ OPTIONS ]\n" + " %s qstats balance\n" + "\n" + " OPTIONS := { scope queue | group-by { device | queue } }\n" + "\n" + " show - Display queue statistics (default)\n" + " Statistics are aggregated for the entire device.\n" + " show scope queue - Display per-queue statistics\n" + " show group-by device - Display device-aggregated statistics (default)\n" + " show group-by queue - Display per-queue statistics\n" + " balance - Analyze traffic distribution balance.\n" + "", + bin_name, bin_name, bin_name); + + return 0; +} + +static const struct cmd qstats_cmds[] = { + { "show", do_show }, + { "balance", do_balance }, + { "help", do_help }, + { 0 } +}; + +int do_qstats(int argc, char **argv) +{ + return cmd_select(qstats_cmds, argc, argv, do_help); +} diff --git a/tools/perf/trace/beauty/include/linux/socket.h b/tools/perf/trace/beauty/include/linux/socket.h index 3b262487ec06..77d7c59f5d8b 100644 --- a/tools/perf/trace/beauty/include/linux/socket.h +++ b/tools/perf/trace/beauty/include/linux/socket.h @@ -34,10 +34,7 @@ typedef __kernel_sa_family_t sa_family_t; struct sockaddr { sa_family_t sa_family; /* address family, AF_xxx */ - union { - char sa_data_min[14]; /* Minimum 14 bytes of protocol address */ - DECLARE_FLEX_ARRAY(char, sa_data); - }; + char sa_data[14]; /* 14 bytes of protocol address */ }; struct linger { diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config index f2a2fd236ca8..558839e3c185 100644 --- a/tools/testing/selftests/bpf/config +++ b/tools/testing/selftests/bpf/config @@ -126,3 +126,8 @@ CONFIG_XDP_SOCKETS=y CONFIG_XFRM_INTERFACE=y CONFIG_TCP_CONG_DCTCP=y CONFIG_TCP_CONG_BBR=y +CONFIG_INFINIBAND=y +CONFIG_SMC=y +CONFIG_SMC_HS_CTRL_BPF=y +CONFIG_DIBS=y +CONFIG_DIBS_LO=y
\ No newline at end of file diff --git a/tools/testing/selftests/bpf/prog_tests/sk_bypass_prot_mem.c b/tools/testing/selftests/bpf/prog_tests/sk_bypass_prot_mem.c new file mode 100644 index 000000000000..e4940583924b --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/sk_bypass_prot_mem.c @@ -0,0 +1,292 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright 2025 Google LLC */ + +#include <test_progs.h> +#include "sk_bypass_prot_mem.skel.h" +#include "network_helpers.h" + +#define NR_PAGES 32 +#define NR_SOCKETS 2 +#define BUF_TOTAL (NR_PAGES * 4096 / NR_SOCKETS) +#define BUF_SINGLE 1024 +#define NR_SEND (BUF_TOTAL / BUF_SINGLE) + +struct test_case { + char name[8]; + int family; + int type; + int (*create_sockets)(struct test_case *test_case, int sk[], int len); + long (*get_memory_allocated)(struct test_case *test_case, struct sk_bypass_prot_mem *skel); +}; + +static int tcp_create_sockets(struct test_case *test_case, int sk[], int len) +{ + int server, i, err = 0; + + server = start_server(test_case->family, test_case->type, NULL, 0, 0); + if (!ASSERT_GE(server, 0, "start_server_str")) + return server; + + /* Keep for-loop so we can change NR_SOCKETS easily. */ + for (i = 0; i < len; i += 2) { + sk[i] = connect_to_fd(server, 0); + if (sk[i] < 0) { + ASSERT_GE(sk[i], 0, "connect_to_fd"); + err = sk[i]; + break; + } + + sk[i + 1] = accept(server, NULL, NULL); + if (sk[i + 1] < 0) { + ASSERT_GE(sk[i + 1], 0, "accept"); + err = sk[i + 1]; + break; + } + } + + close(server); + + return err; +} + +static int udp_create_sockets(struct test_case *test_case, int sk[], int len) +{ + int i, j, err, rcvbuf = BUF_TOTAL; + + /* Keep for-loop so we can change NR_SOCKETS easily. */ + for (i = 0; i < len; i += 2) { + sk[i] = start_server(test_case->family, test_case->type, NULL, 0, 0); + if (sk[i] < 0) { + ASSERT_GE(sk[i], 0, "start_server"); + return sk[i]; + } + + sk[i + 1] = connect_to_fd(sk[i], 0); + if (sk[i + 1] < 0) { + ASSERT_GE(sk[i + 1], 0, "connect_to_fd"); + return sk[i + 1]; + } + + err = connect_fd_to_fd(sk[i], sk[i + 1], 0); + if (err) { + ASSERT_EQ(err, 0, "connect_fd_to_fd"); + return err; + } + + for (j = 0; j < 2; j++) { + err = setsockopt(sk[i + j], SOL_SOCKET, SO_RCVBUF, &rcvbuf, sizeof(int)); + if (err) { + ASSERT_EQ(err, 0, "setsockopt(SO_RCVBUF)"); + return err; + } + } + } + + return 0; +} + +static long get_memory_allocated(struct test_case *test_case, + bool *activated, long *memory_allocated) +{ + int sk; + + *activated = true; + + /* AF_INET and AF_INET6 share the same memory_allocated. + * tcp_init_sock() is called by AF_INET and AF_INET6, + * but udp_lib_init_sock() is inline. + */ + sk = socket(AF_INET, test_case->type, 0); + if (!ASSERT_GE(sk, 0, "get_memory_allocated")) + return -1; + + close(sk); + + return *memory_allocated; +} + +static long tcp_get_memory_allocated(struct test_case *test_case, struct sk_bypass_prot_mem *skel) +{ + return get_memory_allocated(test_case, + &skel->bss->tcp_activated, + &skel->bss->tcp_memory_allocated); +} + +static long udp_get_memory_allocated(struct test_case *test_case, struct sk_bypass_prot_mem *skel) +{ + return get_memory_allocated(test_case, + &skel->bss->udp_activated, + &skel->bss->udp_memory_allocated); +} + +static int check_bypass(struct test_case *test_case, + struct sk_bypass_prot_mem *skel, bool bypass) +{ + char buf[BUF_SINGLE] = {}; + long memory_allocated[2]; + int sk[NR_SOCKETS]; + int err, i, j; + + for (i = 0; i < ARRAY_SIZE(sk); i++) + sk[i] = -1; + + err = test_case->create_sockets(test_case, sk, ARRAY_SIZE(sk)); + if (err) + goto close; + + memory_allocated[0] = test_case->get_memory_allocated(test_case, skel); + + /* allocate pages >= NR_PAGES */ + for (i = 0; i < ARRAY_SIZE(sk); i++) { + for (j = 0; j < NR_SEND; j++) { + int bytes = send(sk[i], buf, sizeof(buf), 0); + + /* Avoid too noisy logs when something failed. */ + if (bytes != sizeof(buf)) { + ASSERT_EQ(bytes, sizeof(buf), "send"); + if (bytes < 0) { + err = bytes; + goto drain; + } + } + } + } + + memory_allocated[1] = test_case->get_memory_allocated(test_case, skel); + + if (bypass) + ASSERT_LE(memory_allocated[1], memory_allocated[0] + 10, "bypass"); + else + ASSERT_GT(memory_allocated[1], memory_allocated[0] + NR_PAGES, "no bypass"); + +drain: + if (test_case->type == SOCK_DGRAM) { + /* UDP starts purging sk->sk_receive_queue after one RCU + * grace period, then udp_memory_allocated goes down, + * so drain the queue before close(). + */ + for (i = 0; i < ARRAY_SIZE(sk); i++) { + for (j = 0; j < NR_SEND; j++) { + int bytes = recv(sk[i], buf, 1, MSG_DONTWAIT | MSG_TRUNC); + + if (bytes == sizeof(buf)) + continue; + if (bytes != -1 || errno != EAGAIN) + PRINT_FAIL("bytes: %d, errno: %s\n", bytes, strerror(errno)); + break; + } + } + } + +close: + for (i = 0; i < ARRAY_SIZE(sk); i++) { + if (sk[i] < 0) + break; + + close(sk[i]); + } + + return err; +} + +static void run_test(struct test_case *test_case) +{ + struct sk_bypass_prot_mem *skel; + struct nstoken *nstoken; + int cgroup, err; + + skel = sk_bypass_prot_mem__open_and_load(); + if (!ASSERT_OK_PTR(skel, "open_and_load")) + return; + + skel->bss->nr_cpus = libbpf_num_possible_cpus(); + + err = sk_bypass_prot_mem__attach(skel); + if (!ASSERT_OK(err, "attach")) + goto destroy_skel; + + cgroup = test__join_cgroup("/sk_bypass_prot_mem"); + if (!ASSERT_GE(cgroup, 0, "join_cgroup")) + goto destroy_skel; + + err = make_netns("sk_bypass_prot_mem"); + if (!ASSERT_EQ(err, 0, "make_netns")) + goto close_cgroup; + + nstoken = open_netns("sk_bypass_prot_mem"); + if (!ASSERT_OK_PTR(nstoken, "open_netns")) + goto remove_netns; + + err = check_bypass(test_case, skel, false); + if (!ASSERT_EQ(err, 0, "test_bypass(false)")) + goto close_netns; + + err = write_sysctl("/proc/sys/net/core/bypass_prot_mem", "1"); + if (!ASSERT_EQ(err, 0, "write_sysctl(1)")) + goto close_netns; + + err = check_bypass(test_case, skel, true); + if (!ASSERT_EQ(err, 0, "test_bypass(true by sysctl)")) + goto close_netns; + + err = write_sysctl("/proc/sys/net/core/bypass_prot_mem", "0"); + if (!ASSERT_EQ(err, 0, "write_sysctl(0)")) + goto close_netns; + + skel->links.sock_create = bpf_program__attach_cgroup(skel->progs.sock_create, cgroup); + if (!ASSERT_OK_PTR(skel->links.sock_create, "attach_cgroup(sock_create)")) + goto close_netns; + + err = check_bypass(test_case, skel, true); + ASSERT_EQ(err, 0, "test_bypass(true by bpf)"); + +close_netns: + close_netns(nstoken); +remove_netns: + remove_netns("sk_bypass_prot_mem"); +close_cgroup: + close(cgroup); +destroy_skel: + sk_bypass_prot_mem__destroy(skel); +} + +static struct test_case test_cases[] = { + { + .name = "TCP ", + .family = AF_INET, + .type = SOCK_STREAM, + .create_sockets = tcp_create_sockets, + .get_memory_allocated = tcp_get_memory_allocated, + }, + { + .name = "UDP ", + .family = AF_INET, + .type = SOCK_DGRAM, + .create_sockets = udp_create_sockets, + .get_memory_allocated = udp_get_memory_allocated, + }, + { + .name = "TCPv6", + .family = AF_INET6, + .type = SOCK_STREAM, + .create_sockets = tcp_create_sockets, + .get_memory_allocated = tcp_get_memory_allocated, + }, + { + .name = "UDPv6", + .family = AF_INET6, + .type = SOCK_DGRAM, + .create_sockets = udp_create_sockets, + .get_memory_allocated = udp_get_memory_allocated, + }, +}; + +void serial_test_sk_bypass_prot_mem(void) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(test_cases); i++) { + if (test__start_subtest(test_cases[i].name)) + run_test(&test_cases[i]); + } +} diff --git a/tools/testing/selftests/bpf/prog_tests/test_bpf_smc.c b/tools/testing/selftests/bpf/prog_tests/test_bpf_smc.c new file mode 100644 index 000000000000..de22734abc4d --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/test_bpf_smc.c @@ -0,0 +1,390 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <test_progs.h> +#include <linux/genetlink.h> +#include "network_helpers.h" +#include "bpf_smc.skel.h" + +#ifndef IPPROTO_SMC +#define IPPROTO_SMC 256 +#endif + +#define CLIENT_IP "127.0.0.1" +#define SERVER_IP "127.0.1.0" +#define SERVER_IP_VIA_RISK_PATH "127.0.2.0" + +#define SERVICE_1 80 +#define SERVICE_2 443 +#define SERVICE_3 8443 + +#define TEST_NS "bpf_smc_netns" + +static struct netns_obj *test_netns; + +struct smc_policy_ip_key { + __u32 sip; + __u32 dip; +}; + +struct smc_policy_ip_value { + __u8 mode; +}; + +#if defined(__s390x__) +/* s390x has default seid */ +static bool setup_ueid(void) { return true; } +static void cleanup_ueid(void) {} +#else +enum { + SMC_NETLINK_ADD_UEID = 10, + SMC_NETLINK_REMOVE_UEID +}; + +enum { + SMC_NLA_EID_TABLE_UNSPEC, + SMC_NLA_EID_TABLE_ENTRY, /* string */ +}; + +struct msgtemplate { + struct nlmsghdr n; + struct genlmsghdr g; + char buf[1024]; +}; + +#define GENLMSG_DATA(glh) ((void *)(NLMSG_DATA(glh) + GENL_HDRLEN)) +#define GENLMSG_PAYLOAD(glh) (NLMSG_PAYLOAD(glh, 0) - GENL_HDRLEN) +#define NLA_DATA(na) ((void *)((char *)(na) + NLA_HDRLEN)) +#define NLA_PAYLOAD(len) ((len) - NLA_HDRLEN) + +#define SMC_GENL_FAMILY_NAME "SMC_GEN_NETLINK" +#define SMC_BPFTEST_UEID "SMC-BPFTEST-UEID" + +static uint16_t smc_nl_family_id = -1; + +static int send_cmd(int fd, __u16 nlmsg_type, __u32 nlmsg_pid, + __u16 nlmsg_flags, __u8 genl_cmd, __u16 nla_type, + void *nla_data, int nla_len) +{ + struct nlattr *na; + struct sockaddr_nl nladdr; + int r, buflen; + char *buf; + + struct msgtemplate msg = {0}; + + msg.n.nlmsg_len = NLMSG_LENGTH(GENL_HDRLEN); + msg.n.nlmsg_type = nlmsg_type; + msg.n.nlmsg_flags = nlmsg_flags; + msg.n.nlmsg_seq = 0; + msg.n.nlmsg_pid = nlmsg_pid; + msg.g.cmd = genl_cmd; + msg.g.version = 1; + na = (struct nlattr *)GENLMSG_DATA(&msg); + na->nla_type = nla_type; + na->nla_len = nla_len + 1 + NLA_HDRLEN; + memcpy(NLA_DATA(na), nla_data, nla_len); + msg.n.nlmsg_len += NLMSG_ALIGN(na->nla_len); + + buf = (char *)&msg; + buflen = msg.n.nlmsg_len; + memset(&nladdr, 0, sizeof(nladdr)); + nladdr.nl_family = AF_NETLINK; + + while ((r = sendto(fd, buf, buflen, 0, (struct sockaddr *)&nladdr, + sizeof(nladdr))) < buflen) { + if (r > 0) { + buf += r; + buflen -= r; + } else if (errno != EAGAIN) { + return -1; + } + } + return 0; +} + +static bool get_smc_nl_family_id(void) +{ + struct sockaddr_nl nl_src; + struct msgtemplate msg; + struct nlattr *nl; + int fd, ret; + pid_t pid; + + fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_GENERIC); + if (!ASSERT_OK_FD(fd, "nl_family socket")) + return false; + + pid = getpid(); + + memset(&nl_src, 0, sizeof(nl_src)); + nl_src.nl_family = AF_NETLINK; + nl_src.nl_pid = pid; + + ret = bind(fd, (struct sockaddr *)&nl_src, sizeof(nl_src)); + if (!ASSERT_OK(ret, "nl_family bind")) + goto fail; + + ret = send_cmd(fd, GENL_ID_CTRL, pid, + NLM_F_REQUEST, CTRL_CMD_GETFAMILY, + CTRL_ATTR_FAMILY_NAME, (void *)SMC_GENL_FAMILY_NAME, + strlen(SMC_GENL_FAMILY_NAME)); + if (!ASSERT_OK(ret, "nl_family query")) + goto fail; + + ret = recv(fd, &msg, sizeof(msg), 0); + if (!ASSERT_FALSE(msg.n.nlmsg_type == NLMSG_ERROR || ret < 0 || + !NLMSG_OK(&msg.n, ret), "nl_family response")) + goto fail; + + nl = (struct nlattr *)GENLMSG_DATA(&msg); + nl = (struct nlattr *)((char *)nl + NLA_ALIGN(nl->nla_len)); + if (!ASSERT_EQ(nl->nla_type, CTRL_ATTR_FAMILY_ID, "nl_family nla type")) + goto fail; + + smc_nl_family_id = *(uint16_t *)NLA_DATA(nl); + close(fd); + return true; +fail: + close(fd); + return false; +} + +static bool smc_ueid(int op) +{ + struct sockaddr_nl nl_src; + struct msgtemplate msg; + struct nlmsgerr *err; + char test_ueid[32]; + int fd, ret; + pid_t pid; + + /* UEID required */ + memset(test_ueid, '\x20', sizeof(test_ueid)); + memcpy(test_ueid, SMC_BPFTEST_UEID, strlen(SMC_BPFTEST_UEID)); + fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_GENERIC); + if (!ASSERT_OK_FD(fd, "ueid socket")) + return false; + + pid = getpid(); + memset(&nl_src, 0, sizeof(nl_src)); + nl_src.nl_family = AF_NETLINK; + nl_src.nl_pid = pid; + + ret = bind(fd, (struct sockaddr *)&nl_src, sizeof(nl_src)); + if (!ASSERT_OK(ret, "ueid bind")) + goto fail; + + ret = send_cmd(fd, smc_nl_family_id, pid, + NLM_F_REQUEST | NLM_F_ACK, op, SMC_NLA_EID_TABLE_ENTRY, + (void *)test_ueid, sizeof(test_ueid)); + if (!ASSERT_OK(ret, "ueid cmd")) + goto fail; + + ret = recv(fd, &msg, sizeof(msg), 0); + if (!ASSERT_FALSE(ret < 0 || + !NLMSG_OK(&msg.n, ret), "ueid response")) + goto fail; + + if (msg.n.nlmsg_type == NLMSG_ERROR) { + err = NLMSG_DATA(&msg); + switch (op) { + case SMC_NETLINK_REMOVE_UEID: + if (!ASSERT_FALSE((err->error && err->error != -ENOENT), + "ueid remove")) + goto fail; + break; + case SMC_NETLINK_ADD_UEID: + if (!ASSERT_OK(err->error, "ueid add")) + goto fail; + break; + default: + break; + } + } + close(fd); + return true; +fail: + close(fd); + return false; +} + +static bool setup_ueid(void) +{ + /* get smc nl id */ + if (!get_smc_nl_family_id()) + return false; + /* clear old ueid for bpftest */ + smc_ueid(SMC_NETLINK_REMOVE_UEID); + /* smc-loopback required ueid */ + return smc_ueid(SMC_NETLINK_ADD_UEID); +} + +static void cleanup_ueid(void) +{ + smc_ueid(SMC_NETLINK_REMOVE_UEID); +} +#endif /* __s390x__ */ + +static bool setup_netns(void) +{ + test_netns = netns_new(TEST_NS, true); + if (!ASSERT_OK_PTR(test_netns, "open net namespace")) + goto fail_netns; + + SYS(fail_ip, "ip addr add 127.0.1.0/8 dev lo"); + SYS(fail_ip, "ip addr add 127.0.2.0/8 dev lo"); + + return true; +fail_ip: + netns_free(test_netns); +fail_netns: + return false; +} + +static void cleanup_netns(void) +{ + netns_free(test_netns); +} + +static bool setup_smc(void) +{ + if (!setup_ueid()) + return false; + + if (!setup_netns()) + goto fail_netns; + + return true; +fail_netns: + cleanup_ueid(); + return false; +} + +static int set_client_addr_cb(int fd, void *opts) +{ + const char *src = (const char *)opts; + struct sockaddr_in localaddr; + + localaddr.sin_family = AF_INET; + localaddr.sin_port = htons(0); + localaddr.sin_addr.s_addr = inet_addr(src); + return !ASSERT_OK(bind(fd, &localaddr, sizeof(localaddr)), "client bind"); +} + +static void run_link(const char *src, const char *dst, int port) +{ + struct network_helper_opts opts = {0}; + int server, client; + + server = start_server_str(AF_INET, SOCK_STREAM, dst, port, NULL); + if (!ASSERT_OK_FD(server, "start service_1")) + return; + + opts.proto = IPPROTO_TCP; + opts.post_socket_cb = set_client_addr_cb; + opts.cb_opts = (void *)src; + + client = connect_to_fd_opts(server, &opts); + if (!ASSERT_OK_FD(client, "start connect")) + goto fail_client; + + close(client); +fail_client: + close(server); +} + +static void block_link(int map_fd, const char *src, const char *dst) +{ + struct smc_policy_ip_value val = { .mode = /* block */ 0 }; + struct smc_policy_ip_key key = { + .sip = inet_addr(src), + .dip = inet_addr(dst), + }; + + bpf_map_update_elem(map_fd, &key, &val, BPF_ANY); +} + +/* + * This test describes a real-life service topology as follows: + * + * +-------------> service_1 + * link 1 | | + * +--------------------> server | link 2 + * | | V + * | +-------------> service_2 + * | link 3 + * client -------------------> server_via_unsafe_path -> service_3 + * + * Among them, + * 1. link-1 is very suitable for using SMC. + * 2. link-2 is not suitable for using SMC, because the mode of this link is + * kind of short-link services. + * 3. link-3 is also not suitable for using SMC, because the RDMA link is + * unavailable and needs to go through a long timeout before it can fallback + * to TCP. + * To achieve this goal, we use a customized SMC ip strategy via smc_hs_ctrl. + */ +static void test_topo(void) +{ + struct bpf_smc *skel; + int rc, map_fd; + + skel = bpf_smc__open_and_load(); + if (!ASSERT_OK_PTR(skel, "bpf_smc__open_and_load")) + return; + + rc = bpf_smc__attach(skel); + if (!ASSERT_OK(rc, "bpf_smc__attach")) + goto fail; + + map_fd = bpf_map__fd(skel->maps.smc_policy_ip); + if (!ASSERT_OK_FD(map_fd, "bpf_map__fd")) + goto fail; + + /* Mock the process of transparent replacement, since we will modify + * protocol to ipproto_smc accropding to it via + * fmod_ret/update_socket_protocol. + */ + write_sysctl("/proc/sys/net/smc/hs_ctrl", "linkcheck"); + + /* Configure ip strat */ + block_link(map_fd, CLIENT_IP, SERVER_IP_VIA_RISK_PATH); + block_link(map_fd, SERVER_IP, SERVER_IP); + + /* should go with smc */ + run_link(CLIENT_IP, SERVER_IP, SERVICE_1); + /* should go with smc fallback */ + run_link(SERVER_IP, SERVER_IP, SERVICE_2); + + ASSERT_EQ(skel->bss->smc_cnt, 2, "smc count"); + ASSERT_EQ(skel->bss->fallback_cnt, 1, "fallback count"); + + /* should go with smc */ + run_link(CLIENT_IP, SERVER_IP, SERVICE_2); + + ASSERT_EQ(skel->bss->smc_cnt, 3, "smc count"); + ASSERT_EQ(skel->bss->fallback_cnt, 1, "fallback count"); + + /* should go with smc fallback */ + run_link(CLIENT_IP, SERVER_IP_VIA_RISK_PATH, SERVICE_3); + + ASSERT_EQ(skel->bss->smc_cnt, 4, "smc count"); + ASSERT_EQ(skel->bss->fallback_cnt, 2, "fallback count"); + +fail: + bpf_smc__destroy(skel); +} + +void test_bpf_smc(void) +{ + if (!setup_smc()) { + printf("setup for smc test failed, test SKIP:\n"); + test__skip(); + return; + } + + if (test__start_subtest("topo")) + test_topo(); + + cleanup_ueid(); + cleanup_netns(); +} diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c b/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c index 178292d1251a..ee94c281888a 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c @@ -124,10 +124,10 @@ static int send_test_packet(int ifindex) int n, sock = -1; __u8 packet[sizeof(struct ethhdr) + TEST_PAYLOAD_LEN]; - /* The ethernet header is not relevant for this test and doesn't need to - * be meaningful. - */ - struct ethhdr eth = { 0 }; + /* We use the Ethernet header only to identify the test packet */ + struct ethhdr eth = { + .h_source = { 0x12, 0x34, 0xDE, 0xAD, 0xBE, 0xEF }, + }; memcpy(packet, ð, sizeof(eth)); memcpy(packet + sizeof(eth), test_payload, TEST_PAYLOAD_LEN); @@ -160,8 +160,16 @@ static int write_test_packet(int tap_fd) __u8 packet[sizeof(struct ethhdr) + TEST_PAYLOAD_LEN]; int n; - /* The ethernet header doesn't need to be valid for this test */ - memset(packet, 0, sizeof(struct ethhdr)); + /* The Ethernet header is mostly not relevant. We use it to identify the + * test packet and some BPF helpers we exercise expect to operate on + * Ethernet frames carrying IP packets. Pretend that's the case. + */ + struct ethhdr eth = { + .h_source = { 0x12, 0x34, 0xDE, 0xAD, 0xBE, 0xEF }, + .h_proto = htons(ETH_P_IP), + }; + + memcpy(packet, ð, sizeof(eth)); memcpy(packet + sizeof(struct ethhdr), test_payload, TEST_PAYLOAD_LEN); n = write(tap_fd, packet, sizeof(packet)); @@ -171,31 +179,19 @@ static int write_test_packet(int tap_fd) return 0; } -static void assert_test_result(const struct bpf_map *result_map) -{ - int err; - __u32 map_key = 0; - __u8 map_value[TEST_PAYLOAD_LEN]; - - err = bpf_map__lookup_elem(result_map, &map_key, sizeof(map_key), - &map_value, TEST_PAYLOAD_LEN, BPF_ANY); - if (!ASSERT_OK(err, "lookup test_result")) - return; - - ASSERT_MEMEQ(&map_value, &test_payload, TEST_PAYLOAD_LEN, - "test_result map contains test payload"); -} - -static bool clear_test_result(struct bpf_map *result_map) +static void dump_err_stream(const struct bpf_program *prog) { - const __u8 v[sizeof(test_payload)] = {}; - const __u32 k = 0; - int err; - - err = bpf_map__update_elem(result_map, &k, sizeof(k), v, sizeof(v), BPF_ANY); - ASSERT_OK(err, "update test_result"); + char buf[512]; + int ret; - return err == 0; + ret = 0; + do { + ret = bpf_prog_stream_read(bpf_program__fd(prog), + BPF_STREAM_STDERR, buf, sizeof(buf), + NULL); + if (ret > 0) + fwrite(buf, sizeof(buf[0]), ret, stderr); + } while (ret > 0); } void test_xdp_context_veth(void) @@ -270,11 +266,14 @@ void test_xdp_context_veth(void) if (!ASSERT_GE(tx_ifindex, 0, "if_nametoindex tx")) goto close; + skel->bss->test_pass = false; + ret = send_test_packet(tx_ifindex); if (!ASSERT_OK(ret, "send_test_packet")) goto close; - assert_test_result(skel->maps.test_result); + if (!ASSERT_TRUE(skel->bss->test_pass, "test_pass")) + dump_err_stream(tc_prog); close: close_netns(nstoken); @@ -286,7 +285,7 @@ close: static void test_tuntap(struct bpf_program *xdp_prog, struct bpf_program *tc_prio_1_prog, struct bpf_program *tc_prio_2_prog, - struct bpf_map *result_map) + bool *test_pass) { LIBBPF_OPTS(bpf_tc_hook, tc_hook, .attach_point = BPF_TC_INGRESS); LIBBPF_OPTS(bpf_tc_opts, tc_opts, .handle = 1, .priority = 1); @@ -295,8 +294,7 @@ static void test_tuntap(struct bpf_program *xdp_prog, int tap_ifindex; int ret; - if (!clear_test_result(result_map)) - return; + *test_pass = false; ns = netns_new(TAP_NETNS, true); if (!ASSERT_OK_PTR(ns, "create and open ns")) @@ -340,7 +338,8 @@ static void test_tuntap(struct bpf_program *xdp_prog, if (!ASSERT_OK(ret, "write_test_packet")) goto close; - assert_test_result(result_map); + if (!ASSERT_TRUE(*test_pass, "test_pass")) + dump_err_stream(tc_prio_2_prog ? : tc_prio_1_prog); close: if (tap_fd >= 0) @@ -411,7 +410,8 @@ static void test_tuntap_mirred(struct bpf_program *xdp_prog, if (!ASSERT_OK(ret, "write_test_packet")) goto close; - ASSERT_TRUE(*test_pass, "test_pass"); + if (!ASSERT_TRUE(*test_pass, "test_pass")) + dump_err_stream(tc_prog); close: if (tap_fd >= 0) @@ -431,61 +431,82 @@ void test_xdp_context_tuntap(void) test_tuntap(skel->progs.ing_xdp, skel->progs.ing_cls, NULL, /* tc prio 2 */ - skel->maps.test_result); + &skel->bss->test_pass); if (test__start_subtest("dynptr_read")) test_tuntap(skel->progs.ing_xdp, skel->progs.ing_cls_dynptr_read, NULL, /* tc prio 2 */ - skel->maps.test_result); + &skel->bss->test_pass); if (test__start_subtest("dynptr_slice")) test_tuntap(skel->progs.ing_xdp, skel->progs.ing_cls_dynptr_slice, NULL, /* tc prio 2 */ - skel->maps.test_result); + &skel->bss->test_pass); if (test__start_subtest("dynptr_write")) test_tuntap(skel->progs.ing_xdp_zalloc_meta, skel->progs.ing_cls_dynptr_write, skel->progs.ing_cls_dynptr_read, - skel->maps.test_result); + &skel->bss->test_pass); if (test__start_subtest("dynptr_slice_rdwr")) test_tuntap(skel->progs.ing_xdp_zalloc_meta, skel->progs.ing_cls_dynptr_slice_rdwr, skel->progs.ing_cls_dynptr_slice, - skel->maps.test_result); + &skel->bss->test_pass); if (test__start_subtest("dynptr_offset")) test_tuntap(skel->progs.ing_xdp_zalloc_meta, skel->progs.ing_cls_dynptr_offset_wr, skel->progs.ing_cls_dynptr_offset_rd, - skel->maps.test_result); + &skel->bss->test_pass); if (test__start_subtest("dynptr_offset_oob")) test_tuntap(skel->progs.ing_xdp, skel->progs.ing_cls_dynptr_offset_oob, skel->progs.ing_cls, - skel->maps.test_result); - if (test__start_subtest("clone_data_meta_empty_on_data_write")) + &skel->bss->test_pass); + if (test__start_subtest("clone_data_meta_survives_data_write")) test_tuntap_mirred(skel->progs.ing_xdp, - skel->progs.clone_data_meta_empty_on_data_write, + skel->progs.clone_data_meta_survives_data_write, &skel->bss->test_pass); - if (test__start_subtest("clone_data_meta_empty_on_meta_write")) + if (test__start_subtest("clone_data_meta_survives_meta_write")) test_tuntap_mirred(skel->progs.ing_xdp, - skel->progs.clone_data_meta_empty_on_meta_write, + skel->progs.clone_data_meta_survives_meta_write, &skel->bss->test_pass); - if (test__start_subtest("clone_dynptr_empty_on_data_slice_write")) + if (test__start_subtest("clone_meta_dynptr_survives_data_slice_write")) test_tuntap_mirred(skel->progs.ing_xdp, - skel->progs.clone_dynptr_empty_on_data_slice_write, + skel->progs.clone_meta_dynptr_survives_data_slice_write, &skel->bss->test_pass); - if (test__start_subtest("clone_dynptr_empty_on_meta_slice_write")) + if (test__start_subtest("clone_meta_dynptr_survives_meta_slice_write")) test_tuntap_mirred(skel->progs.ing_xdp, - skel->progs.clone_dynptr_empty_on_meta_slice_write, + skel->progs.clone_meta_dynptr_survives_meta_slice_write, &skel->bss->test_pass); - if (test__start_subtest("clone_dynptr_rdonly_before_data_dynptr_write")) + if (test__start_subtest("clone_meta_dynptr_rw_before_data_dynptr_write")) test_tuntap_mirred(skel->progs.ing_xdp, - skel->progs.clone_dynptr_rdonly_before_data_dynptr_write, + skel->progs.clone_meta_dynptr_rw_before_data_dynptr_write, &skel->bss->test_pass); - if (test__start_subtest("clone_dynptr_rdonly_before_meta_dynptr_write")) + if (test__start_subtest("clone_meta_dynptr_rw_before_meta_dynptr_write")) test_tuntap_mirred(skel->progs.ing_xdp, - skel->progs.clone_dynptr_rdonly_before_meta_dynptr_write, + skel->progs.clone_meta_dynptr_rw_before_meta_dynptr_write, &skel->bss->test_pass); + /* Tests for BPF helpers which touch headroom */ + if (test__start_subtest("helper_skb_vlan_push_pop")) + test_tuntap(skel->progs.ing_xdp, + skel->progs.helper_skb_vlan_push_pop, + NULL, /* tc prio 2 */ + &skel->bss->test_pass); + if (test__start_subtest("helper_skb_adjust_room")) + test_tuntap(skel->progs.ing_xdp, + skel->progs.helper_skb_adjust_room, + NULL, /* tc prio 2 */ + &skel->bss->test_pass); + if (test__start_subtest("helper_skb_change_head_tail")) + test_tuntap(skel->progs.ing_xdp, + skel->progs.helper_skb_change_head_tail, + NULL, /* tc prio 2 */ + &skel->bss->test_pass); + if (test__start_subtest("helper_skb_change_proto")) + test_tuntap(skel->progs.ing_xdp, + skel->progs.helper_skb_change_proto, + NULL, /* tc prio 2 */ + &skel->bss->test_pass); test_xdp_meta__destroy(skel); } diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_tcp4.c b/tools/testing/selftests/bpf/progs/bpf_iter_tcp4.c index 164640db3a29..b1e509b231cd 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_tcp4.c +++ b/tools/testing/selftests/bpf/progs/bpf_iter_tcp4.c @@ -99,13 +99,13 @@ static int dump_tcp_sock(struct seq_file *seq, struct tcp_sock *tp, icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT || icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) { timer_active = 1; - timer_expires = icsk->icsk_retransmit_timer.expires; + timer_expires = sp->tcp_retransmit_timer.expires; } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) { timer_active = 4; - timer_expires = icsk->icsk_retransmit_timer.expires; - } else if (timer_pending(&sp->sk_timer)) { + timer_expires = sp->tcp_retransmit_timer.expires; + } else if (timer_pending(&icsk->icsk_keepalive_timer)) { timer_active = 2; - timer_expires = sp->sk_timer.expires; + timer_expires = icsk->icsk_keepalive_timer.expires; } else { timer_active = 0; timer_expires = bpf_jiffies64(); diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_tcp6.c b/tools/testing/selftests/bpf/progs/bpf_iter_tcp6.c index 591c703f5032..dbc7166aee91 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_tcp6.c +++ b/tools/testing/selftests/bpf/progs/bpf_iter_tcp6.c @@ -99,13 +99,13 @@ static int dump_tcp6_sock(struct seq_file *seq, struct tcp6_sock *tp, icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT || icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) { timer_active = 1; - timer_expires = icsk->icsk_retransmit_timer.expires; + timer_expires = sp->tcp_retransmit_timer.expires; } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) { timer_active = 4; - timer_expires = icsk->icsk_retransmit_timer.expires; - } else if (timer_pending(&sp->sk_timer)) { + timer_expires = sp->tcp_retransmit_timer.expires; + } else if (timer_pending(&icsk->icsk_keepalive_timer)) { timer_active = 2; - timer_expires = sp->sk_timer.expires; + timer_expires = icsk->icsk_keepalive_timer.expires; } else { timer_active = 0; timer_expires = bpf_jiffies64(); diff --git a/tools/testing/selftests/bpf/progs/bpf_smc.c b/tools/testing/selftests/bpf/progs/bpf_smc.c new file mode 100644 index 000000000000..70d8b08f5914 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/bpf_smc.c @@ -0,0 +1,117 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include "vmlinux.h" + +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> +#include "bpf_tracing_net.h" + +char _license[] SEC("license") = "GPL"; + +enum { + BPF_SMC_LISTEN = 10, +}; + +struct smc_sock___local { + struct sock sk; + struct smc_sock *listen_smc; + bool use_fallback; +} __attribute__((preserve_access_index)); + +int smc_cnt = 0; +int fallback_cnt = 0; + +SEC("fentry/smc_release") +int BPF_PROG(bpf_smc_release, struct socket *sock) +{ + /* only count from one side (client) */ + if (sock->sk->__sk_common.skc_state == BPF_SMC_LISTEN) + return 0; + smc_cnt++; + return 0; +} + +SEC("fentry/smc_switch_to_fallback") +int BPF_PROG(bpf_smc_switch_to_fallback, struct smc_sock___local *smc) +{ + /* only count from one side (client) */ + if (smc && !smc->listen_smc) + fallback_cnt++; + return 0; +} + +/* go with default value if no strat was found */ +bool default_ip_strat_value = true; + +struct smc_policy_ip_key { + __u32 sip; + __u32 dip; +}; + +struct smc_policy_ip_value { + __u8 mode; +}; + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(key_size, sizeof(struct smc_policy_ip_key)); + __uint(value_size, sizeof(struct smc_policy_ip_value)); + __uint(max_entries, 128); + __uint(map_flags, BPF_F_NO_PREALLOC); +} smc_policy_ip SEC(".maps"); + +static bool smc_check(__u32 src, __u32 dst) +{ + struct smc_policy_ip_value *value; + struct smc_policy_ip_key key = { + .sip = src, + .dip = dst, + }; + + value = bpf_map_lookup_elem(&smc_policy_ip, &key); + return value ? value->mode : default_ip_strat_value; +} + +SEC("fmod_ret/update_socket_protocol") +int BPF_PROG(smc_run, int family, int type, int protocol) +{ + struct task_struct *task; + + if (family != AF_INET && family != AF_INET6) + return protocol; + + if ((type & 0xf) != SOCK_STREAM) + return protocol; + + if (protocol != 0 && protocol != IPPROTO_TCP) + return protocol; + + task = bpf_get_current_task_btf(); + /* Prevent from affecting other tests */ + if (!task || !task->nsproxy->net_ns->smc.hs_ctrl) + return protocol; + + return IPPROTO_SMC; +} + +SEC("struct_ops") +int BPF_PROG(bpf_smc_set_tcp_option_cond, const struct tcp_sock *tp, + struct inet_request_sock *ireq) +{ + return smc_check(ireq->req.__req_common.skc_daddr, + ireq->req.__req_common.skc_rcv_saddr); +} + +SEC("struct_ops") +int BPF_PROG(bpf_smc_set_tcp_option, struct tcp_sock *tp) +{ + return smc_check(tp->inet_conn.icsk_inet.sk.__sk_common.skc_rcv_saddr, + tp->inet_conn.icsk_inet.sk.__sk_common.skc_daddr); +} + +SEC(".struct_ops") +struct smc_hs_ctrl linkcheck = { + .name = "linkcheck", + .syn_option = (void *)bpf_smc_set_tcp_option, + .synack_option = (void *)bpf_smc_set_tcp_option_cond, +}; diff --git a/tools/testing/selftests/bpf/progs/sk_bypass_prot_mem.c b/tools/testing/selftests/bpf/progs/sk_bypass_prot_mem.c new file mode 100644 index 000000000000..09a00d11ffcc --- /dev/null +++ b/tools/testing/selftests/bpf/progs/sk_bypass_prot_mem.c @@ -0,0 +1,104 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright 2025 Google LLC */ + +#include "bpf_tracing_net.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> +#include <errno.h> + +extern int tcp_memory_per_cpu_fw_alloc __ksym; +extern int udp_memory_per_cpu_fw_alloc __ksym; + +int nr_cpus; +bool tcp_activated, udp_activated; +long tcp_memory_allocated, udp_memory_allocated; + +struct sk_prot { + long *memory_allocated; + int *memory_per_cpu_fw_alloc; +}; + +static int drain_memory_per_cpu_fw_alloc(__u32 i, struct sk_prot *sk_prot_ctx) +{ + int *memory_per_cpu_fw_alloc; + + memory_per_cpu_fw_alloc = bpf_per_cpu_ptr(sk_prot_ctx->memory_per_cpu_fw_alloc, i); + if (memory_per_cpu_fw_alloc) + *sk_prot_ctx->memory_allocated += *memory_per_cpu_fw_alloc; + + return 0; +} + +static long get_memory_allocated(struct sock *_sk, int *memory_per_cpu_fw_alloc) +{ + struct sock *sk = bpf_core_cast(_sk, struct sock); + struct sk_prot sk_prot_ctx; + long memory_allocated; + + /* net_aligned_data.{tcp,udp}_memory_allocated was not available. */ + memory_allocated = sk->__sk_common.skc_prot->memory_allocated->counter; + + sk_prot_ctx.memory_allocated = &memory_allocated; + sk_prot_ctx.memory_per_cpu_fw_alloc = memory_per_cpu_fw_alloc; + + bpf_loop(nr_cpus, drain_memory_per_cpu_fw_alloc, &sk_prot_ctx, 0); + + return memory_allocated; +} + +static void fentry_init_sock(struct sock *sk, bool *activated, + long *memory_allocated, int *memory_per_cpu_fw_alloc) +{ + if (!*activated) + return; + + *memory_allocated = get_memory_allocated(sk, memory_per_cpu_fw_alloc); + *activated = false; +} + +SEC("fentry/tcp_init_sock") +int BPF_PROG(fentry_tcp_init_sock, struct sock *sk) +{ + fentry_init_sock(sk, &tcp_activated, + &tcp_memory_allocated, &tcp_memory_per_cpu_fw_alloc); + return 0; +} + +SEC("fentry/udp_init_sock") +int BPF_PROG(fentry_udp_init_sock, struct sock *sk) +{ + fentry_init_sock(sk, &udp_activated, + &udp_memory_allocated, &udp_memory_per_cpu_fw_alloc); + return 0; +} + +SEC("cgroup/sock_create") +int sock_create(struct bpf_sock *ctx) +{ + int err, val = 1; + + err = bpf_setsockopt(ctx, SOL_SOCKET, SK_BPF_BYPASS_PROT_MEM, + &val, sizeof(val)); + if (err) + goto err; + + val = 0; + + err = bpf_getsockopt(ctx, SOL_SOCKET, SK_BPF_BYPASS_PROT_MEM, + &val, sizeof(val)); + if (err) + goto err; + + if (val != 1) { + err = -EINVAL; + goto err; + } + + return 1; + +err: + bpf_set_retval(err); + return 0; +} + +char LICENSE[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_xdp_meta.c b/tools/testing/selftests/bpf/progs/test_xdp_meta.c index d79cb74b571e..0a0f371a2dec 100644 --- a/tools/testing/selftests/bpf/progs/test_xdp_meta.c +++ b/tools/testing/selftests/bpf/progs/test_xdp_meta.c @@ -4,6 +4,7 @@ #include <linux/if_ether.h> #include <linux/pkt_cls.h> +#include <bpf/bpf_endian.h> #include <bpf/bpf_helpers.h> #include "bpf_kfuncs.h" @@ -11,37 +12,72 @@ #define ctx_ptr(ctx, mem) (void *)(unsigned long)ctx->mem -/* Demonstrates how metadata can be passed from an XDP program to a TC program - * using bpf_xdp_adjust_meta. - * For the sake of testing the metadata support in drivers, the XDP program uses - * a fixed-size payload after the Ethernet header as metadata. The TC program - * copies the metadata it receives into a map so it can be checked from - * userspace. +/* Demonstrate passing metadata from XDP to TC using bpf_xdp_adjust_meta. + * + * The XDP program extracts a fixed-size payload following the Ethernet header + * and stores it as packet metadata to test the driver's metadata support. The + * TC program then verifies if the passed metadata is correct. */ -struct { - __uint(type, BPF_MAP_TYPE_ARRAY); - __uint(max_entries, 1); - __type(key, __u32); - __uint(value_size, META_SIZE); -} test_result SEC(".maps"); - bool test_pass; +static const __u8 smac_want[ETH_ALEN] = { + 0x12, 0x34, 0xDE, 0xAD, 0xBE, 0xEF, +}; + +static const __u8 meta_want[META_SIZE] = { + 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, + 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, + 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, + 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, +}; + +static bool check_smac(const struct ethhdr *eth) +{ + return !__builtin_memcmp(eth->h_source, smac_want, ETH_ALEN); +} + +static bool check_metadata(const char *file, int line, __u8 *meta_have) +{ + if (!__builtin_memcmp(meta_have, meta_want, META_SIZE)) + return true; + + bpf_stream_printk(BPF_STREAM_STDERR, + "FAIL:%s:%d: metadata mismatch\n" + " have:\n %pI6\n %pI6\n" + " want:\n %pI6\n %pI6\n", + file, line, + &meta_have[0x00], &meta_have[0x10], + &meta_want[0x00], &meta_want[0x10]); + return false; +} + +#define check_metadata(meta_have) check_metadata(__FILE__, __LINE__, meta_have) + +static bool check_skb_metadata(const char *file, int line, struct __sk_buff *skb) +{ + __u8 *data_meta = ctx_ptr(skb, data_meta); + __u8 *data = ctx_ptr(skb, data); + + return data_meta + META_SIZE <= data && (check_metadata)(file, line, data_meta); +} + +#define check_skb_metadata(skb) check_skb_metadata(__FILE__, __LINE__, skb) + SEC("tc") int ing_cls(struct __sk_buff *ctx) { - __u8 *data, *data_meta; - __u32 key = 0; - - data_meta = ctx_ptr(ctx, data_meta); - data = ctx_ptr(ctx, data); + __u8 *meta_have = ctx_ptr(ctx, data_meta); + __u8 *data = ctx_ptr(ctx, data); - if (data_meta + META_SIZE > data) - return TC_ACT_SHOT; + if (meta_have + META_SIZE > data) + goto out; - bpf_map_update_elem(&test_result, &key, data_meta, BPF_ANY); + if (!check_metadata(meta_have)) + goto out; + test_pass = true; +out: return TC_ACT_SHOT; } @@ -49,17 +85,17 @@ int ing_cls(struct __sk_buff *ctx) SEC("tc") int ing_cls_dynptr_read(struct __sk_buff *ctx) { + __u8 meta_have[META_SIZE]; struct bpf_dynptr meta; - const __u32 zero = 0; - __u8 *dst; - - dst = bpf_map_lookup_elem(&test_result, &zero); - if (!dst) - return TC_ACT_SHOT; bpf_dynptr_from_skb_meta(ctx, 0, &meta); - bpf_dynptr_read(dst, META_SIZE, &meta, 0, 0); + bpf_dynptr_read(meta_have, META_SIZE, &meta, 0, 0); + if (!check_metadata(meta_have)) + goto out; + + test_pass = true; +out: return TC_ACT_SHOT; } @@ -86,20 +122,18 @@ SEC("tc") int ing_cls_dynptr_slice(struct __sk_buff *ctx) { struct bpf_dynptr meta; - const __u32 zero = 0; - __u8 *dst, *src; - - dst = bpf_map_lookup_elem(&test_result, &zero); - if (!dst) - return TC_ACT_SHOT; + __u8 *meta_have; bpf_dynptr_from_skb_meta(ctx, 0, &meta); - src = bpf_dynptr_slice(&meta, 0, NULL, META_SIZE); - if (!src) - return TC_ACT_SHOT; + meta_have = bpf_dynptr_slice(&meta, 0, NULL, META_SIZE); + if (!meta_have) + goto out; - __builtin_memcpy(dst, src, META_SIZE); + if (!check_metadata(meta_have)) + goto out; + test_pass = true; +out: return TC_ACT_SHOT; } @@ -129,14 +163,12 @@ int ing_cls_dynptr_slice_rdwr(struct __sk_buff *ctx) SEC("tc") int ing_cls_dynptr_offset_rd(struct __sk_buff *ctx) { - struct bpf_dynptr meta; const __u32 chunk_len = META_SIZE / 4; - const __u32 zero = 0; + __u8 meta_have[META_SIZE]; + struct bpf_dynptr meta; __u8 *dst, *src; - dst = bpf_map_lookup_elem(&test_result, &zero); - if (!dst) - return TC_ACT_SHOT; + dst = meta_have; /* 1. Regular read */ bpf_dynptr_from_skb_meta(ctx, 0, &meta); @@ -155,9 +187,14 @@ int ing_cls_dynptr_offset_rd(struct __sk_buff *ctx) /* 4. Read from a slice starting at an offset */ src = bpf_dynptr_slice(&meta, 2 * chunk_len, NULL, chunk_len); if (!src) - return TC_ACT_SHOT; + goto out; __builtin_memcpy(dst, src, chunk_len); + if (!check_metadata(meta_have)) + goto out; + + test_pass = true; +out: return TC_ACT_SHOT; } @@ -254,7 +291,7 @@ int ing_xdp_zalloc_meta(struct xdp_md *ctx) /* Drop any non-test packets */ if (eth + 1 > ctx_ptr(ctx, data_end)) return XDP_DROP; - if (eth->h_proto != 0) + if (!check_smac(eth)) return XDP_DROP; ret = bpf_xdp_adjust_meta(ctx, -META_SIZE); @@ -294,9 +331,9 @@ int ing_xdp(struct xdp_md *ctx) /* The Linux networking stack may send other packets on the test * interface that interfere with the test. Just drop them. - * The test packets can be recognized by their ethertype of zero. + * The test packets can be recognized by their source MAC address. */ - if (eth->h_proto != 0) + if (!check_smac(eth)) return XDP_DROP; __builtin_memcpy(data_meta, payload, META_SIZE); @@ -304,22 +341,25 @@ int ing_xdp(struct xdp_md *ctx) } /* - * Check that skb->data_meta..skb->data is empty if prog writes to packet - * _payload_ using packet pointers. Applies only to cloned skbs. + * Check that, when operating on a cloned packet, skb->data_meta..skb->data is + * kept intact if prog writes to packet _payload_ using packet pointers. */ SEC("tc") -int clone_data_meta_empty_on_data_write(struct __sk_buff *ctx) +int clone_data_meta_survives_data_write(struct __sk_buff *ctx) { + __u8 *meta_have = ctx_ptr(ctx, data_meta); struct ethhdr *eth = ctx_ptr(ctx, data); if (eth + 1 > ctx_ptr(ctx, data_end)) goto out; /* Ignore non-test packets */ - if (eth->h_proto != 0) + if (!check_smac(eth)) + goto out; + + if (meta_have + META_SIZE > eth) goto out; - /* Expect no metadata */ - if (ctx->data_meta != ctx->data) + if (!check_metadata(meta_have)) goto out; /* Packet write to trigger unclone in prologue */ @@ -331,40 +371,44 @@ out: } /* - * Check that skb->data_meta..skb->data is empty if prog writes to packet - * _metadata_ using packet pointers. Applies only to cloned skbs. + * Check that, when operating on a cloned packet, skb->data_meta..skb->data is + * kept intact if prog writes to packet _metadata_ using packet pointers. */ SEC("tc") -int clone_data_meta_empty_on_meta_write(struct __sk_buff *ctx) +int clone_data_meta_survives_meta_write(struct __sk_buff *ctx) { + __u8 *meta_have = ctx_ptr(ctx, data_meta); struct ethhdr *eth = ctx_ptr(ctx, data); - __u8 *md = ctx_ptr(ctx, data_meta); if (eth + 1 > ctx_ptr(ctx, data_end)) goto out; /* Ignore non-test packets */ - if (eth->h_proto != 0) + if (!check_smac(eth)) goto out; - if (md + 1 > ctx_ptr(ctx, data)) { - /* Expect no metadata */ - test_pass = true; - } else { - /* Metadata write to trigger unclone in prologue */ - *md = 42; - } + if (meta_have + META_SIZE > eth) + goto out; + + if (!check_metadata(meta_have)) + goto out; + + /* Metadata write to trigger unclone in prologue */ + *meta_have = 42; + + test_pass = true; out: return TC_ACT_SHOT; } /* - * Check that skb_meta dynptr is writable but empty if prog writes to packet - * _payload_ using a dynptr slice. Applies only to cloned skbs. + * Check that, when operating on a cloned packet, metadata remains intact if + * prog creates a r/w slice to packet _payload_. */ SEC("tc") -int clone_dynptr_empty_on_data_slice_write(struct __sk_buff *ctx) +int clone_meta_dynptr_survives_data_slice_write(struct __sk_buff *ctx) { struct bpf_dynptr data, meta; + __u8 meta_have[META_SIZE]; struct ethhdr *eth; bpf_dynptr_from_skb(ctx, 0, &data); @@ -372,51 +416,45 @@ int clone_dynptr_empty_on_data_slice_write(struct __sk_buff *ctx) if (!eth) goto out; /* Ignore non-test packets */ - if (eth->h_proto != 0) + if (!check_smac(eth)) goto out; - /* Expect no metadata */ bpf_dynptr_from_skb_meta(ctx, 0, &meta); - if (bpf_dynptr_is_rdonly(&meta) || bpf_dynptr_size(&meta) > 0) + bpf_dynptr_read(meta_have, META_SIZE, &meta, 0, 0); + if (!check_metadata(meta_have)) goto out; - /* Packet write to trigger unclone in prologue */ - eth->h_proto = 42; - test_pass = true; out: return TC_ACT_SHOT; } /* - * Check that skb_meta dynptr is writable but empty if prog writes to packet - * _metadata_ using a dynptr slice. Applies only to cloned skbs. + * Check that, when operating on a cloned packet, metadata remains intact if + * prog creates an r/w slice to packet _metadata_. */ SEC("tc") -int clone_dynptr_empty_on_meta_slice_write(struct __sk_buff *ctx) +int clone_meta_dynptr_survives_meta_slice_write(struct __sk_buff *ctx) { struct bpf_dynptr data, meta; const struct ethhdr *eth; - __u8 *md; + __u8 *meta_have; bpf_dynptr_from_skb(ctx, 0, &data); eth = bpf_dynptr_slice(&data, 0, NULL, sizeof(*eth)); if (!eth) goto out; /* Ignore non-test packets */ - if (eth->h_proto != 0) + if (!check_smac(eth)) goto out; - /* Expect no metadata */ bpf_dynptr_from_skb_meta(ctx, 0, &meta); - if (bpf_dynptr_is_rdonly(&meta) || bpf_dynptr_size(&meta) > 0) + meta_have = bpf_dynptr_slice_rdwr(&meta, 0, NULL, META_SIZE); + if (!meta_have) goto out; - /* Metadata write to trigger unclone in prologue */ - bpf_dynptr_from_skb_meta(ctx, 0, &meta); - md = bpf_dynptr_slice_rdwr(&meta, 0, NULL, sizeof(*md)); - if (md) - *md = 42; + if (!check_metadata(meta_have)) + goto out; test_pass = true; out: @@ -424,34 +462,40 @@ out: } /* - * Check that skb_meta dynptr is read-only before prog writes to packet payload - * using dynptr_write helper. Applies only to cloned skbs. + * Check that, when operating on a cloned packet, skb_meta dynptr is read-write + * before prog writes to packet _payload_ using dynptr_write helper and metadata + * remains intact before and after the write. */ SEC("tc") -int clone_dynptr_rdonly_before_data_dynptr_write(struct __sk_buff *ctx) +int clone_meta_dynptr_rw_before_data_dynptr_write(struct __sk_buff *ctx) { struct bpf_dynptr data, meta; + __u8 meta_have[META_SIZE]; const struct ethhdr *eth; + int err; bpf_dynptr_from_skb(ctx, 0, &data); eth = bpf_dynptr_slice(&data, 0, NULL, sizeof(*eth)); if (!eth) goto out; /* Ignore non-test packets */ - if (eth->h_proto != 0) + if (!check_smac(eth)) goto out; - /* Expect read-only metadata before unclone */ + /* Expect read-write metadata before unclone */ bpf_dynptr_from_skb_meta(ctx, 0, &meta); - if (!bpf_dynptr_is_rdonly(&meta) || bpf_dynptr_size(&meta) != META_SIZE) + if (bpf_dynptr_is_rdonly(&meta)) + goto out; + + err = bpf_dynptr_read(meta_have, META_SIZE, &meta, 0, 0); + if (err || !check_metadata(meta_have)) goto out; /* Helper write to payload will unclone the packet */ bpf_dynptr_write(&data, offsetof(struct ethhdr, h_proto), "x", 1, 0); - /* Expect no metadata after unclone */ - bpf_dynptr_from_skb_meta(ctx, 0, &meta); - if (bpf_dynptr_is_rdonly(&meta) || bpf_dynptr_size(&meta) != 0) + err = bpf_dynptr_read(meta_have, META_SIZE, &meta, 0, 0); + if (err || !check_metadata(meta_have)) goto out; test_pass = true; @@ -460,31 +504,165 @@ out: } /* - * Check that skb_meta dynptr is read-only if prog writes to packet - * metadata using dynptr_write helper. Applies only to cloned skbs. + * Check that, when operating on a cloned packet, skb_meta dynptr is read-write + * before prog writes to packet _metadata_ using dynptr_write helper and + * metadata remains intact before and after the write. */ SEC("tc") -int clone_dynptr_rdonly_before_meta_dynptr_write(struct __sk_buff *ctx) +int clone_meta_dynptr_rw_before_meta_dynptr_write(struct __sk_buff *ctx) { struct bpf_dynptr data, meta; + __u8 meta_have[META_SIZE]; const struct ethhdr *eth; + int err; bpf_dynptr_from_skb(ctx, 0, &data); eth = bpf_dynptr_slice(&data, 0, NULL, sizeof(*eth)); if (!eth) goto out; /* Ignore non-test packets */ - if (eth->h_proto != 0) + if (!check_smac(eth)) goto out; - /* Expect read-only metadata */ + /* Expect read-write metadata before unclone */ bpf_dynptr_from_skb_meta(ctx, 0, &meta); - if (!bpf_dynptr_is_rdonly(&meta) || bpf_dynptr_size(&meta) != META_SIZE) + if (bpf_dynptr_is_rdonly(&meta)) goto out; - /* Metadata write. Expect failure. */ - bpf_dynptr_from_skb_meta(ctx, 0, &meta); - if (bpf_dynptr_write(&meta, 0, "x", 1, 0) != -EINVAL) + err = bpf_dynptr_read(meta_have, META_SIZE, &meta, 0, 0); + if (err || !check_metadata(meta_have)) + goto out; + + /* Helper write to metadata will unclone the packet */ + bpf_dynptr_write(&meta, 0, &meta_have[0], 1, 0); + + err = bpf_dynptr_read(meta_have, META_SIZE, &meta, 0, 0); + if (err || !check_metadata(meta_have)) + goto out; + + test_pass = true; +out: + return TC_ACT_SHOT; +} + +SEC("tc") +int helper_skb_vlan_push_pop(struct __sk_buff *ctx) +{ + int err; + + /* bpf_skb_vlan_push assumes HW offload for primary VLAN tag. Only + * secondary tag push triggers an actual MAC header modification. + */ + err = bpf_skb_vlan_push(ctx, 0, 42); + if (err) + goto out; + err = bpf_skb_vlan_push(ctx, 0, 207); + if (err) + goto out; + + if (!check_skb_metadata(ctx)) + goto out; + + err = bpf_skb_vlan_pop(ctx); + if (err) + goto out; + err = bpf_skb_vlan_pop(ctx); + if (err) + goto out; + + if (!check_skb_metadata(ctx)) + goto out; + + test_pass = true; +out: + return TC_ACT_SHOT; +} + +SEC("tc") +int helper_skb_adjust_room(struct __sk_buff *ctx) +{ + int err; + + /* Grow a 1 byte hole after the MAC header */ + err = bpf_skb_adjust_room(ctx, 1, BPF_ADJ_ROOM_MAC, 0); + if (err) + goto out; + + if (!check_skb_metadata(ctx)) + goto out; + + /* Shrink a 1 byte hole after the MAC header */ + err = bpf_skb_adjust_room(ctx, -1, BPF_ADJ_ROOM_MAC, 0); + if (err) + goto out; + + if (!check_skb_metadata(ctx)) + goto out; + + /* Grow a 256 byte hole to trigger head reallocation */ + err = bpf_skb_adjust_room(ctx, 256, BPF_ADJ_ROOM_MAC, 0); + if (err) + goto out; + + if (!check_skb_metadata(ctx)) + goto out; + + test_pass = true; +out: + return TC_ACT_SHOT; +} + +SEC("tc") +int helper_skb_change_head_tail(struct __sk_buff *ctx) +{ + int err; + + /* Reserve 1 extra in the front for packet data */ + err = bpf_skb_change_head(ctx, 1, 0); + if (err) + goto out; + + if (!check_skb_metadata(ctx)) + goto out; + + /* Reserve 256 extra bytes in the front to trigger head reallocation */ + err = bpf_skb_change_head(ctx, 256, 0); + if (err) + goto out; + + if (!check_skb_metadata(ctx)) + goto out; + + /* Reserve 4k extra bytes in the back to trigger head reallocation */ + err = bpf_skb_change_tail(ctx, ctx->len + 4096, 0); + if (err) + goto out; + + if (!check_skb_metadata(ctx)) + goto out; + + test_pass = true; +out: + return TC_ACT_SHOT; +} + +SEC("tc") +int helper_skb_change_proto(struct __sk_buff *ctx) +{ + int err; + + err = bpf_skb_change_proto(ctx, bpf_htons(ETH_P_IPV6), 0); + if (err) + goto out; + + if (!check_skb_metadata(ctx)) + goto out; + + err = bpf_skb_change_proto(ctx, bpf_htons(ETH_P_IP), 0); + if (err) + goto out; + + if (!check_skb_metadata(ctx)) goto out; test_pass = true; diff --git a/tools/testing/selftests/bpf/test_kmods/bpf_testmod.c b/tools/testing/selftests/bpf/test_kmods/bpf_testmod.c index ed0a4721d8fd..1669a7eeda26 100644 --- a/tools/testing/selftests/bpf/test_kmods/bpf_testmod.c +++ b/tools/testing/selftests/bpf/test_kmods/bpf_testmod.c @@ -926,7 +926,7 @@ __bpf_kfunc int bpf_kfunc_call_kernel_connect(struct addr_args *args) goto out; } - err = kernel_connect(sock, (struct sockaddr *)&args->addr, + err = kernel_connect(sock, (struct sockaddr_unsized *)&args->addr, args->addrlen, 0); out: mutex_unlock(&sock_lock); @@ -949,7 +949,7 @@ __bpf_kfunc int bpf_kfunc_call_kernel_bind(struct addr_args *args) goto out; } - err = kernel_bind(sock, (struct sockaddr *)&args->addr, args->addrlen); + err = kernel_bind(sock, (struct sockaddr_unsized *)&args->addr, args->addrlen); out: mutex_unlock(&sock_lock); diff --git a/tools/testing/selftests/drivers/net/.gitignore b/tools/testing/selftests/drivers/net/.gitignore index 585ecb4d5dc4..3633c7a3ed65 100644 --- a/tools/testing/selftests/drivers/net/.gitignore +++ b/tools/testing/selftests/drivers/net/.gitignore @@ -1,3 +1,4 @@ # SPDX-License-Identifier: GPL-2.0-only +gro napi_id_helper psp_responder diff --git a/tools/testing/selftests/drivers/net/Makefile b/tools/testing/selftests/drivers/net/Makefile index 71ee69e524d7..f5c71d993750 100644 --- a/tools/testing/selftests/drivers/net/Makefile +++ b/tools/testing/selftests/drivers/net/Makefile @@ -6,10 +6,12 @@ TEST_INCLUDES := $(wildcard lib/py/*.py) \ ../../net/lib.sh \ TEST_GEN_FILES := \ + gro \ napi_id_helper \ # end of TEST_GEN_FILES TEST_PROGS := \ + gro.py \ hds.py \ napi_id.py \ napi_threaded.py \ @@ -23,6 +25,7 @@ TEST_PROGS := \ ping.py \ psp.py \ queues.py \ + ring_reconfig.py \ shaper.py \ stats.py \ xdp.py \ diff --git a/tools/testing/selftests/drivers/net/bonding/bond_macvlan_ipvlan.sh b/tools/testing/selftests/drivers/net/bonding/bond_macvlan_ipvlan.sh index c4711272fe45..559f300f965a 100755 --- a/tools/testing/selftests/drivers/net/bonding/bond_macvlan_ipvlan.sh +++ b/tools/testing/selftests/drivers/net/bonding/bond_macvlan_ipvlan.sh @@ -30,6 +30,7 @@ check_connection() local message=${3} RET=0 + sleep 0.25 ip netns exec ${ns} ping ${target} -c 4 -i 0.1 &>/dev/null check_err $? "ping failed" log_test "${bond_mode}/${xvlan_type}_${xvlan_mode}: ${message}" diff --git a/tools/testing/selftests/net/gro.c b/tools/testing/selftests/drivers/net/gro.c index cfc39f70635d..995b492f5bcb 100644 --- a/tools/testing/selftests/net/gro.c +++ b/tools/testing/selftests/drivers/net/gro.c @@ -57,7 +57,8 @@ #include <string.h> #include <unistd.h> -#include "../kselftest.h" +#include "../../kselftest.h" +#include "../../net/lib/ksft.h" #define DPORT 8000 #define SPORT 1500 @@ -1127,6 +1128,8 @@ static void gro_receiver(void) set_timeout(rxfd); bind_packetsocket(rxfd); + ksft_ready(); + memset(correct_payload, 0, sizeof(correct_payload)); if (strcmp(testname, "data") == 0) { diff --git a/tools/testing/selftests/drivers/net/gro.py b/tools/testing/selftests/drivers/net/gro.py new file mode 100755 index 000000000000..ba83713bf7b5 --- /dev/null +++ b/tools/testing/selftests/drivers/net/gro.py @@ -0,0 +1,164 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 + +""" +GRO (Generic Receive Offload) conformance tests. + +Validates that GRO coalescing works correctly by running the gro +binary in different configurations and checking for correct packet +coalescing behavior. + +Test cases: + - data: Data packets with same size/headers and correct seq numbers coalesce + - ack: Pure ACK packets do not coalesce + - flags: Packets with PSH, SYN, URG, RST flags do not coalesce + - tcp: Packets with incorrect checksum, non-consecutive seqno don't coalesce + - ip: Packets with different ECN, TTL, TOS, or IP options don't coalesce + - large: Packets larger than GRO_MAX_SIZE don't coalesce +""" + +import os +from lib.py import ksft_run, ksft_exit, ksft_pr +from lib.py import NetDrvEpEnv, KsftXfailEx +from lib.py import cmd, defer, bkg, ip +from lib.py import ksft_variants + + +def _resolve_dmac(cfg, ipver): + """ + Find the destination MAC address remote host should use to send packets + towards the local host. It may be a router / gateway address. + """ + + attr = "dmac" + ipver + # Cache the response across test cases + if hasattr(cfg, attr): + return getattr(cfg, attr) + + route = ip(f"-{ipver} route get {cfg.addr_v[ipver]}", + json=True, host=cfg.remote)[0] + gw = route.get("gateway") + # Local L2 segment, address directly + if not gw: + setattr(cfg, attr, cfg.dev['address']) + return getattr(cfg, attr) + + # ping to make sure neighbor is resolved, + # bind to an interface, for v6 the GW is likely link local + cmd(f"ping -c1 -W0 -I{cfg.remote_ifname} {gw}", host=cfg.remote) + + neigh = ip(f"neigh get {gw} dev {cfg.remote_ifname}", + json=True, host=cfg.remote)[0] + setattr(cfg, attr, neigh['lladdr']) + return getattr(cfg, attr) + + +def _write_defer_restore(cfg, path, val, defer_undo=False): + with open(path, "r", encoding="utf-8") as fp: + orig_val = fp.read().strip() + if str(val) == orig_val: + return + with open(path, "w", encoding="utf-8") as fp: + fp.write(val) + if defer_undo: + defer(_write_defer_restore, cfg, path, orig_val) + + +def _set_mtu_restore(dev, mtu, host): + if dev['mtu'] < mtu: + ip(f"link set dev {dev['ifname']} mtu {mtu}", host=host) + defer(ip, f"link set dev {dev['ifname']} mtu {dev['mtu']}", host=host) + + +def _setup(cfg, test_name): + """ Setup hardware loopback mode for GRO testing. """ + + if not hasattr(cfg, "bin_remote"): + cfg.bin_local = cfg.test_dir / "gro" + cfg.bin_remote = cfg.remote.deploy(cfg.bin_local) + + # "large" test needs at least 4k MTU + if test_name == "large": + _set_mtu_restore(cfg.dev, 4096, None) + _set_mtu_restore(cfg.remote_dev, 4096, cfg.remote) + + flush_path = f"/sys/class/net/{cfg.ifname}/gro_flush_timeout" + irq_path = f"/sys/class/net/{cfg.ifname}/napi_defer_hard_irqs" + + _write_defer_restore(cfg, flush_path, "200000", defer_undo=True) + _write_defer_restore(cfg, irq_path, "10", defer_undo=True) + + try: + # Disable TSO for local tests + cfg.require_nsim() # will raise KsftXfailEx if not running on nsim + + cmd(f"ethtool -K {cfg.ifname} gro on tso off") + cmd(f"ethtool -K {cfg.remote_ifname} gro on tso off", host=cfg.remote) + except KsftXfailEx: + pass + +def _gro_variants(): + """Generator that yields all combinations of protocol and test types.""" + + for protocol in ["ipv4", "ipv6", "ipip"]: + for test_name in ["data", "ack", "flags", "tcp", "ip", "large"]: + yield protocol, test_name + + +@ksft_variants(_gro_variants()) +def test(cfg, protocol, test_name): + """Run a single GRO test with retries.""" + + ipver = "6" if protocol[-1] == "6" else "4" + cfg.require_ipver(ipver) + + _setup(cfg, test_name) + + base_cmd_args = [ + f"--{protocol}", + f"--dmac {_resolve_dmac(cfg, ipver)}", + f"--smac {cfg.remote_dev['address']}", + f"--daddr {cfg.addr_v[ipver]}", + f"--saddr {cfg.remote_addr_v[ipver]}", + f"--test {test_name}", + "--verbose" + ] + base_args = " ".join(base_cmd_args) + + # Each test is run 6 times to deflake, because given the receive timing, + # not all packets that should coalesce will be considered in the same flow + # on every try. + max_retries = 6 + for attempt in range(max_retries): + rx_cmd = f"{cfg.bin_local} {base_args} --rx --iface {cfg.ifname}" + tx_cmd = f"{cfg.bin_remote} {base_args} --iface {cfg.remote_ifname}" + + fail_now = attempt >= max_retries - 1 + + with bkg(rx_cmd, ksft_ready=True, exit_wait=True, + fail=fail_now) as rx_proc: + cmd(tx_cmd, host=cfg.remote) + + if rx_proc.ret == 0: + return + + ksft_pr(rx_proc.stdout.strip().replace('\n', '\n# ')) + ksft_pr(rx_proc.stderr.strip().replace('\n', '\n# ')) + + if test_name == "large" and os.environ.get("KSFT_MACHINE_SLOW"): + ksft_pr(f"Ignoring {protocol}/{test_name} failure due to slow environment") + return + + ksft_pr(f"Attempt {attempt + 1}/{max_retries} failed, retrying...") + + +def main() -> None: + """ Ksft boiler plate main """ + + with NetDrvEpEnv(__file__) as cfg: + ksft_run(cases=[test], args=(cfg,)) + ksft_exit() + + +if __name__ == "__main__": + main() diff --git a/tools/testing/selftests/drivers/net/hw/.gitignore b/tools/testing/selftests/drivers/net/hw/.gitignore index 6942bf575497..46540468a775 100644 --- a/tools/testing/selftests/drivers/net/hw/.gitignore +++ b/tools/testing/selftests/drivers/net/hw/.gitignore @@ -1,3 +1,4 @@ # SPDX-License-Identifier: GPL-2.0-only iou-zcrx ncdevmem +toeplitz diff --git a/tools/testing/selftests/drivers/net/hw/Makefile b/tools/testing/selftests/drivers/net/hw/Makefile index 8133d1a0051c..9c163ba6feee 100644 --- a/tools/testing/selftests/drivers/net/hw/Makefile +++ b/tools/testing/selftests/drivers/net/hw/Makefile @@ -1,10 +1,26 @@ # SPDX-License-Identifier: GPL-2.0+ OR MIT -TEST_GEN_FILES = iou-zcrx +# Check if io_uring supports zero-copy receive +HAS_IOURING_ZCRX := $(shell \ + echo -e '#include <liburing.h>\n' \ + 'void *func = (void *)io_uring_register_ifq;\n' \ + 'int main() {return 0;}' | \ + $(CC) -luring -x c - -o /dev/null 2>&1 && echo y) + +ifeq ($(HAS_IOURING_ZCRX),y) +COND_GEN_FILES += iou-zcrx +else +$(warning excluding iouring tests, liburing not installed or too old) +endif + +TEST_GEN_FILES := \ + $(COND_GEN_FILES) \ +# end of TEST_GEN_FILES TEST_PROGS = \ csum.py \ devlink_port_split.py \ + devlink_rate_tc_bw.py \ devmem.py \ ethtool.sh \ ethtool_extended_state.sh \ @@ -21,6 +37,7 @@ TEST_PROGS = \ rss_ctx.py \ rss_flow_label.py \ rss_input_xfrm.py \ + toeplitz.py \ tso.py \ xsk_reconfig.py \ # @@ -38,7 +55,10 @@ TEST_INCLUDES := \ # # YNL files, must be before "include ..lib.mk" -YNL_GEN_FILES := ncdevmem +YNL_GEN_FILES := \ + ncdevmem \ + toeplitz \ +# end of YNL_GEN_FILES TEST_GEN_FILES += $(YNL_GEN_FILES) TEST_GEN_FILES += $(patsubst %.c,%.o,$(wildcard *.bpf.c)) @@ -54,4 +74,6 @@ include ../../../net/ynl.mk include ../../../net/bpf.mk +ifeq ($(HAS_IOURING_ZCRX),y) $(OUTPUT)/iou-zcrx: LDLIBS += -luring +endif diff --git a/tools/testing/selftests/drivers/net/hw/devlink_rate_tc_bw.py b/tools/testing/selftests/drivers/net/hw/devlink_rate_tc_bw.py index ead6784d1910..4e4faa9275bb 100755 --- a/tools/testing/selftests/drivers/net/hw/devlink_rate_tc_bw.py +++ b/tools/testing/selftests/drivers/net/hw/devlink_rate_tc_bw.py @@ -21,21 +21,21 @@ Test Cases: ---------- 1. test_no_tc_mapping_bandwidth: - Verifies that without TC mapping, bandwidth is NOT distributed according to - the configured 80/20 split between TC4 and TC3 - - This test should fail if bandwidth matches the 80/20 split without TC + the configured 20/80 split between TC3 and TC4 + - This test should fail if bandwidth matches the 20/80 split without TC mapping - - Expected: Bandwidth should NOT be distributed as 80/20 + - Expected: Bandwidth should NOT be distributed as 20/80 2. test_tc_mapping_bandwidth: - Configures TC mapping using mqprio qdisc - Verifies that with TC mapping, bandwidth IS distributed according to the - configured 80/20 split between TC3 and TC4 - - Expected: Bandwidth should be distributed as 80/20 + configured 20/80 split between TC3 and TC4 + - Expected: Bandwidth should be distributed as 20/80 Bandwidth Distribution: ---------------------- -- TC3 (VLAN 101): Configured for 80% of total bandwidth -- TC4 (VLAN 102): Configured for 20% of total bandwidth +- TC3 (VLAN 101): Configured for 20% of total bandwidth +- TC4 (VLAN 102): Configured for 80% of total bandwidth - Total bandwidth: 1Gbps - Tolerance: +-12% @@ -64,43 +64,40 @@ from lib.py import KsftSkipEx, KsftFailEx, KsftXfailEx from lib.py import NetDrvEpEnv, DevlinkFamily from lib.py import NlError from lib.py import cmd, defer, ethtool, ip +from lib.py import Iperf3Runner class BandwidthValidator: """ - Validates bandwidth totals and per-TC shares against expected values - with a tolerance. + Validates total bandwidth and individual shares with tolerance + relative to the overall total. """ - def __init__(self): + def __init__(self, shares): self.tolerance_percent = 12 - self.expected_total_gbps = 1.0 - self.total_min_expected = self.min_expected(self.expected_total_gbps) - self.total_max_expected = self.max_expected(self.expected_total_gbps) - self.tc_expected_percent = { - 3: 20.0, - 4: 80.0, - } + self.expected_total = sum(shares.values()) + self.bounds = {} + + for name, exp in shares.items(): + self.bounds[name] = (self.min_expected(exp), self.max_expected(exp)) def min_expected(self, value): """Calculates the minimum acceptable value based on tolerance.""" - return value - (value * self.tolerance_percent / 100) + return value - (self.expected_total * self.tolerance_percent / 100) def max_expected(self, value): """Calculates the maximum acceptable value based on tolerance.""" - return value + (value * self.tolerance_percent / 100) - - def bound(self, expected, value): - """Returns True if value is within expected tolerance.""" - return self.min_expected(expected) <= value <= self.max_expected(expected) + return value + (self.expected_total * self.tolerance_percent / 100) - def tc_bandwidth_bound(self, value, tc_ix): + def bound(self, values): """ - Returns True if the given bandwidth value is within tolerance - for the TC's expected bandwidth. + Return True if all given values fall within tolerance. """ - expected = self.tc_expected_percent[tc_ix] - return self.bound(expected, value) + for name, value in values.items(): + low, high = self.bounds[name] + if not low <= value <= high: + return False + return True def setup_vf(cfg, set_tc_mapping=True): @@ -116,8 +113,8 @@ def setup_vf(cfg, set_tc_mapping=True): except Exception as exc: raise KsftSkipEx(f"Failed to enable switchdev mode on {cfg.pci}") from exc try: - cmd(f"echo 1 > /sys/class/net/{cfg.ifname}/device/sriov_numvfs") - defer(cmd, f"echo 0 > /sys/class/net/{cfg.ifname}/device/sriov_numvfs") + cmd(f"echo 1 > /sys/class/net/{cfg.ifname}/device/sriov_numvfs", shell=True) + defer(cmd, f"echo 0 > /sys/class/net/{cfg.ifname}/device/sriov_numvfs", shell=True) except Exception as exc: raise KsftSkipEx(f"Failed to enable SR-IOV on {cfg.ifname}") from exc @@ -139,8 +136,8 @@ def setup_vlans_on_vf(vf_ifc): Sets up two VLAN interfaces on the given VF, each mapped to a different TC. """ vlan_configs = [ - {"vlan_id": 101, "tc": 3, "ip": "198.51.100.2"}, - {"vlan_id": 102, "tc": 4, "ip": "198.51.100.10"}, + {"vlan_id": 101, "tc": 3, "ip": "198.51.100.1"}, + {"vlan_id": 102, "tc": 4, "ip": "198.51.100.9"}, ] for config in vlan_configs: @@ -224,13 +221,13 @@ def setup_devlink_rate(cfg): raise KsftFailEx(f"rate_set failed on VF port {port_index}") from exc -def setup_remote_server(cfg): +def setup_remote_vlans(cfg): """ - Sets up VLAN interfaces and starts iperf3 servers on the remote side. + Sets up VLAN interfaces on the remote side. """ remote_dev = cfg.remote_ifname vlan_ids = [101, 102] - remote_ips = ["198.51.100.1", "198.51.100.9"] + remote_ips = ["198.51.100.2", "198.51.100.10"] for vlan_id, ip_addr in zip(vlan_ids, remote_ips): vlan_dev = f"{remote_dev}.{vlan_id}" @@ -238,14 +235,13 @@ def setup_remote_server(cfg): f"type vlan id {vlan_id}", host=cfg.remote) cmd(f"ip addr add {ip_addr}/29 dev {vlan_dev}", host=cfg.remote) cmd(f"ip link set dev {vlan_dev} up", host=cfg.remote) - cmd(f"iperf3 -s -1 -B {ip_addr}",background=True, host=cfg.remote) defer(cmd, f"ip link del {vlan_dev}", host=cfg.remote) def setup_test_environment(cfg, set_tc_mapping=True): """ Sets up the complete test environment including VF creation, VLANs, - bridge configuration, devlink rate setup, and the remote server. + bridge configuration and devlink rate setup. """ vf_ifc = setup_vf(cfg, set_tc_mapping) ksft_pr(f"Created VF interface: {vf_ifc}") @@ -256,51 +252,39 @@ def setup_test_environment(cfg, set_tc_mapping=True): setup_bridge(cfg) setup_devlink_rate(cfg) - setup_remote_server(cfg) - time.sleep(2) + setup_remote_vlans(cfg) -def run_iperf_client(server_ip, local_ip, barrier, min_expected_gbps=0.1): +def measure_bandwidth(cfg, server_ip, client_ip, barrier): """ - Runs a single iperf3 client instance, binding to the given local IP. - Waits on a barrier to synchronize with other threads. + Synchronizes with peers and runs an iperf3-based bandwidth measurement + between the given endpoints. Returns average Gbps. """ + runner = Iperf3Runner(cfg, server_ip=server_ip, client_ip=client_ip) try: barrier.wait(timeout=10) except Exception as exc: raise KsftFailEx("iperf3 barrier wait timed") from exc - iperf_cmd = ["iperf3", "-c", server_ip, "-B", local_ip, "-J"] - result = subprocess.run(iperf_cmd, capture_output=True, text=True, - check=True) - try: - output = json.loads(result.stdout) - bits_per_second = output["end"]["sum_received"]["bits_per_second"] - gbps = bits_per_second / 1e9 - if gbps < min_expected_gbps: - ksft_pr( - f"iperf3 bandwidth too low: {gbps:.2f} Gbps " - f"(expected ≥ {min_expected_gbps} Gbps)" - ) - return None - return gbps - except json.JSONDecodeError as exc: - ksft_pr(f"Failed to parse iperf3 JSON output: {exc}") - return None + bw_gbps = runner.measure_bandwidth(reverse=True) + except Exception as exc: + raise KsftFailEx("iperf3 bandwidth measurement failed") from exc + + return bw_gbps -def run_bandwidth_test(): +def run_bandwidth_test(cfg): """ - Launches iperf3 client threads for each VLAN/TC pair and collects results. + Runs parallel bandwidth measurements for each VLAN/TC pair and collects results. """ - def _run_iperf_client_thread(server_ip, local_ip, results, barrier, tc_ix): - results[tc_ix] = run_iperf_client(server_ip, local_ip, barrier) + def _run_measure_bandwidth_thread(local_ip, remote_ip, results, barrier, tc_ix): + results[tc_ix] = measure_bandwidth(cfg, local_ip, remote_ip, barrier) vf_vlan_data = [ # (local_ip, remote_ip, TC) - ("198.51.100.2", "198.51.100.1", 3), - ("198.51.100.10", "198.51.100.9", 4), + ("198.51.100.1", "198.51.100.2", 3), + ("198.51.100.9", "198.51.100.10", 4), ] results = {} @@ -309,8 +293,8 @@ def run_bandwidth_test(): for local_ip, remote_ip, tc_ix in vf_vlan_data: thread = threading.Thread( - target=_run_iperf_client_thread, - args=(remote_ip, local_ip, results, start_barrier, tc_ix) + target=_run_measure_bandwidth_thread, + args=(local_ip, remote_ip, results, start_barrier, tc_ix) ) thread.start() threads.append(thread) @@ -320,10 +304,11 @@ def run_bandwidth_test(): for tc_ix, tc_bw in results.items(): if tc_bw is None: - raise KsftFailEx("iperf3 client failed; cannot evaluate bandwidth") + raise KsftFailEx("iperf3 failed; cannot evaluate bandwidth") return results + def calculate_bandwidth_percentages(results): """ Calculates the percentage of total bandwidth received by TC3 and TC4. @@ -364,59 +349,48 @@ def verify_total_bandwidth(bw_data, validator): """ total = bw_data['total_bw'] - if validator.bound(validator.expected_total_gbps, total): + if validator.bound({"total": total}): return - if total < validator.total_min_expected: + low, high = validator.bounds["total"] + + if total < low: raise KsftSkipEx( f"Total bandwidth {total:.2f} Gbps < minimum " - f"{validator.total_min_expected:.2f} Gbps; " - f"parent tx_max ({validator.expected_total_gbps:.1f} G) " + f"{low:.2f} Gbps; " + f"parent tx_max ({validator.expected_total:.1f} G) " f"not reached, cannot validate share" ) raise KsftFailEx( f"Total bandwidth {total:.2f} Gbps exceeds allowed ceiling " - f"{validator.total_max_expected:.2f} Gbps " - f"(VF tx_max set to {validator.expected_total_gbps:.1f} G)" + f"{high:.2f} Gbps " + f"(VF tx_max set to {validator.expected_total:.1f} G)" ) -def check_bandwidth_distribution(bw_data, validator): - """ - Checks whether the measured TC3 and TC4 bandwidth percentages - fall within their expected tolerance ranges. - - Returns: - bool: True if both TC3 and TC4 percentages are within bounds. - """ - tc3_valid = validator.tc_bandwidth_bound(bw_data['tc3_percentage'], 3) - tc4_valid = validator.tc_bandwidth_bound(bw_data['tc4_percentage'], 4) - - return tc3_valid and tc4_valid - - def run_bandwidth_distribution_test(cfg, set_tc_mapping): """ - Runs parallel iperf3 tests for both TCs and collects results. + Runs parallel bandwidth measurements for both TCs and collects results. """ setup_test_environment(cfg, set_tc_mapping) - bandwidths = run_bandwidth_test() + bandwidths = run_bandwidth_test(cfg) bw_data = calculate_bandwidth_percentages(bandwidths) test_name = "with TC mapping" if set_tc_mapping else "without TC mapping" print_bandwidth_results(bw_data, test_name) - verify_total_bandwidth(bw_data, cfg.bw_validator) + verify_total_bandwidth(bw_data, cfg.traffic_bw_validator) - return check_bandwidth_distribution(bw_data, cfg.bw_validator) + return cfg.tc_bw_validator.bound({"tc3": bw_data['tc3_percentage'], + "tc4": bw_data['tc4_percentage']}) def test_no_tc_mapping_bandwidth(cfg): """ - Verifies that bandwidth is not split 80/20 without traffic class mapping. + Verifies that bandwidth is not split 20/80 without traffic class mapping. """ - pass_bw_msg = "Bandwidth is NOT distributed as 80/20 without TC mapping" - fail_bw_msg = "Bandwidth matched 80/20 split without TC mapping" + pass_bw_msg = "Bandwidth is NOT distributed as 20/80 without TC mapping" + fail_bw_msg = "Bandwidth matched 20/80 split without TC mapping" is_mlx5 = "driver: mlx5" in ethtool(f"-i {cfg.ifname}").stdout if run_bandwidth_distribution_test(cfg, set_tc_mapping=False): @@ -430,13 +404,13 @@ def test_no_tc_mapping_bandwidth(cfg): def test_tc_mapping_bandwidth(cfg): """ - Verifies that bandwidth is correctly split 80/20 between TC3 and TC4 + Verifies that bandwidth is correctly split 20/80 between TC3 and TC4 when traffic class mapping is set. """ if run_bandwidth_distribution_test(cfg, set_tc_mapping=True): - ksft_pr("Bandwidth is distributed as 80/20 with TC mapping") + ksft_pr("Bandwidth is distributed as 20/80 with TC mapping") else: - raise KsftFailEx("Bandwidth did not match 80/20 split with TC mapping") + raise KsftFailEx("Bandwidth did not match 20/80 split with TC mapping") def main() -> None: @@ -451,9 +425,9 @@ def main() -> None: ) if not cfg.pci: raise KsftSkipEx("Could not get PCI address of the interface") - cfg.require_cmd("iperf3", local=True, remote=True) - cfg.bw_validator = BandwidthValidator() + cfg.traffic_bw_validator = BandwidthValidator({"total": 1}) + cfg.tc_bw_validator = BandwidthValidator({"tc3": 20, "tc4": 80}) cases = [test_no_tc_mapping_bandwidth, test_tc_mapping_bandwidth] diff --git a/tools/testing/selftests/drivers/net/hw/lib/py/__init__.py b/tools/testing/selftests/drivers/net/hw/lib/py/__init__.py index fb010a48a5a1..766bfc4ad842 100644 --- a/tools/testing/selftests/drivers/net/hw/lib/py/__init__.py +++ b/tools/testing/selftests/drivers/net/hw/lib/py/__init__.py @@ -25,10 +25,10 @@ try: fd_read_timeout, ip, rand_port, wait_port_listen, wait_file from net.lib.py import KsftSkipEx, KsftFailEx, KsftXfailEx from net.lib.py import ksft_disruptive, ksft_exit, ksft_pr, ksft_run, \ - ksft_setup + ksft_setup, ksft_variants, KsftNamedVariant from net.lib.py import ksft_eq, ksft_ge, ksft_in, ksft_is, ksft_lt, \ ksft_ne, ksft_not_in, ksft_raises, ksft_true, ksft_gt, ksft_not_none - from drivers.net.lib.py import GenerateTraffic, Remote + from drivers.net.lib.py import GenerateTraffic, Remote, Iperf3Runner from drivers.net.lib.py import NetDrvEnv, NetDrvEpEnv __all__ = ["NetNS", "NetNSEnter", "NetdevSimDev", @@ -40,11 +40,12 @@ try: "wait_port_listen", "wait_file", "KsftSkipEx", "KsftFailEx", "KsftXfailEx", "ksft_disruptive", "ksft_exit", "ksft_pr", "ksft_run", - "ksft_setup", + "ksft_setup", "ksft_variants", "KsftNamedVariant", "ksft_eq", "ksft_ge", "ksft_in", "ksft_is", "ksft_lt", "ksft_ne", "ksft_not_in", "ksft_raises", "ksft_true", "ksft_gt", "ksft_not_none", "ksft_not_none", - "NetDrvEnv", "NetDrvEpEnv", "GenerateTraffic", "Remote"] + "NetDrvEnv", "NetDrvEpEnv", "GenerateTraffic", "Remote", + "Iperf3Runner"] except ModuleNotFoundError as e: print("Failed importing `net` library from kernel sources") print(str(e)) diff --git a/tools/testing/selftests/net/toeplitz.c b/tools/testing/selftests/drivers/net/hw/toeplitz.c index 9ba03164d73a..a4d04438c313 100644 --- a/tools/testing/selftests/net/toeplitz.c +++ b/tools/testing/selftests/drivers/net/hw/toeplitz.c @@ -52,7 +52,11 @@ #include <sys/types.h> #include <unistd.h> -#include "../kselftest.h" +#include <ynl.h> +#include "ethtool-user.h" + +#include "../../../kselftest.h" +#include "../../../net/lib/ksft.h" #define TOEPLITZ_KEY_MIN_LEN 40 #define TOEPLITZ_KEY_MAX_LEN 60 @@ -64,6 +68,7 @@ #define FOUR_TUPLE_MAX_LEN ((sizeof(struct in6_addr) * 2) + (sizeof(uint16_t) * 2)) #define RSS_MAX_CPUS (1 << 16) /* real constraint is PACKET_FANOUT_MAX */ +#define RSS_MAX_INDIR (1 << 16) #define RPS_MAX_CPUS 16UL /* must be a power of 2 */ @@ -101,6 +106,8 @@ struct ring_state { static unsigned int rx_irq_cpus[RSS_MAX_CPUS]; /* map from rxq to cpu */ static int rps_silo_to_cpu[RPS_MAX_CPUS]; static unsigned char toeplitz_key[TOEPLITZ_KEY_MAX_LEN]; +static unsigned int rss_indir_tbl[RSS_MAX_INDIR]; +static unsigned int rss_indir_tbl_size; static struct ring_state rings[RSS_MAX_CPUS]; static inline uint32_t toeplitz(const unsigned char *four_tuple, @@ -129,7 +136,12 @@ static inline uint32_t toeplitz(const unsigned char *four_tuple, /* Compare computed cpu with arrival cpu from packet_fanout_cpu */ static void verify_rss(uint32_t rx_hash, int cpu) { - int queue = rx_hash % cfg_num_queues; + int queue; + + if (rss_indir_tbl_size) + queue = rss_indir_tbl[rx_hash % rss_indir_tbl_size]; + else + queue = rx_hash % cfg_num_queues; log_verbose(" rxq %d (cpu %d)", queue, rx_irq_cpus[queue]); if (rx_irq_cpus[queue] != cpu) { @@ -482,6 +494,56 @@ static void parse_rps_bitmap(const char *arg) rps_silo_to_cpu[cfg_num_rps_cpus++] = i; } +static void read_rss_dev_info_ynl(void) +{ + struct ethtool_rss_get_req *req; + struct ethtool_rss_get_rsp *rsp; + struct ynl_sock *ys; + + ys = ynl_sock_create(&ynl_ethtool_family, NULL); + if (!ys) + error(1, errno, "ynl_sock_create failed"); + + req = ethtool_rss_get_req_alloc(); + if (!req) + error(1, errno, "ethtool_rss_get_req_alloc failed"); + + ethtool_rss_get_req_set_header_dev_name(req, cfg_ifname); + + rsp = ethtool_rss_get(ys, req); + if (!rsp) + error(1, ys->err.code, "YNL: %s", ys->err.msg); + + if (!rsp->_len.hkey) + error(1, 0, "RSS key not available for %s", cfg_ifname); + + if (rsp->_len.hkey < TOEPLITZ_KEY_MIN_LEN || + rsp->_len.hkey > TOEPLITZ_KEY_MAX_LEN) + error(1, 0, "RSS key length %u out of bounds [%u, %u]", + rsp->_len.hkey, TOEPLITZ_KEY_MIN_LEN, + TOEPLITZ_KEY_MAX_LEN); + + memcpy(toeplitz_key, rsp->hkey, rsp->_len.hkey); + + if (rsp->_count.indir > RSS_MAX_INDIR) + error(1, 0, "RSS indirection table too large (%u > %u)", + rsp->_count.indir, RSS_MAX_INDIR); + + /* If indir table not available we'll fallback to simple modulo math */ + if (rsp->_count.indir) { + memcpy(rss_indir_tbl, rsp->indir, + rsp->_count.indir * sizeof(rss_indir_tbl[0])); + rss_indir_tbl_size = rsp->_count.indir; + + log_verbose("RSS indirection table size: %u\n", + rss_indir_tbl_size); + } + + ethtool_rss_get_rsp_free(rsp); + ethtool_rss_get_req_free(req); + ynl_sock_destroy(ys); +} + static void parse_opts(int argc, char **argv) { static struct option long_options[] = { @@ -550,7 +612,7 @@ static void parse_opts(int argc, char **argv) } if (!have_toeplitz) - error(1, 0, "Must supply rss key ('-k')"); + read_rss_dev_info_ynl(); num_cpus = get_nprocs(); if (num_cpus > RSS_MAX_CPUS) @@ -576,6 +638,10 @@ int main(int argc, char **argv) fd_sink = setup_sink(); setup_rings(); + + /* Signal to test framework that we're ready to receive */ + ksft_ready(); + process_rings(); cleanup_rings(); diff --git a/tools/testing/selftests/drivers/net/hw/toeplitz.py b/tools/testing/selftests/drivers/net/hw/toeplitz.py new file mode 100755 index 000000000000..d2db5ee9e358 --- /dev/null +++ b/tools/testing/selftests/drivers/net/hw/toeplitz.py @@ -0,0 +1,211 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 + +""" +Toeplitz Rx hashing test: + - rxhash (the hash value calculation itself); + - RSS mapping from rxhash to rx queue; + - RPS mapping from rxhash to cpu. +""" + +import glob +import os +import socket +from lib.py import ksft_run, ksft_exit, ksft_pr +from lib.py import NetDrvEpEnv, EthtoolFamily, NetdevFamily +from lib.py import cmd, bkg, rand_port, defer +from lib.py import ksft_in +from lib.py import ksft_variants, KsftNamedVariant, KsftSkipEx, KsftFailEx + +# "define" for the ID of the Toeplitz hash function +ETH_RSS_HASH_TOP = 1 + + +def _check_rps_and_rfs_not_configured(cfg): + """Verify that RPS is not already configured.""" + + for rps_file in glob.glob(f"/sys/class/net/{cfg.ifname}/queues/rx-*/rps_cpus"): + with open(rps_file, "r", encoding="utf-8") as fp: + val = fp.read().strip() + if set(val) - {"0", ","}: + raise KsftSkipEx(f"RPS already configured on {rps_file}: {val}") + + rfs_file = "/proc/sys/net/core/rps_sock_flow_entries" + with open(rfs_file, "r", encoding="utf-8") as fp: + val = fp.read().strip() + if val != "0": + raise KsftSkipEx(f"RFS already configured {rfs_file}: {val}") + + +def _get_cpu_for_irq(irq): + with open(f"/proc/irq/{irq}/smp_affinity_list", "r", + encoding="utf-8") as fp: + data = fp.read().strip() + if "," in data or "-" in data: + raise KsftFailEx(f"IRQ{irq} is not mapped to a single core: {data}") + return int(data) + + +def _get_irq_cpus(cfg): + """ + Read the list of IRQs for the device Rx queues. + """ + queues = cfg.netnl.queue_get({"ifindex": cfg.ifindex}, dump=True) + napis = cfg.netnl.napi_get({"ifindex": cfg.ifindex}, dump=True) + + # Remap into ID-based dicts + napis = {n["id"]: n for n in napis} + queues = {f"{q['type']}{q['id']}": q for q in queues} + + cpus = [] + for rx in range(9999): + name = f"rx{rx}" + if name not in queues: + break + cpus.append(_get_cpu_for_irq(napis[queues[name]["napi-id"]]["irq"])) + + return cpus + + +def _get_unused_cpus(cfg, count=2): + """ + Get CPUs that are not used by Rx queues. + Returns a list of at least 'count' CPU numbers. + """ + + # Get CPUs used by Rx queues + rx_cpus = set(_get_irq_cpus(cfg)) + + # Get total number of CPUs + num_cpus = os.cpu_count() + + # Find unused CPUs + unused_cpus = [cpu for cpu in range(num_cpus) if cpu not in rx_cpus] + + if len(unused_cpus) < count: + raise KsftSkipEx(f"Need at {count} CPUs not used by Rx queues, found {len(unused_cpus)}") + + return unused_cpus[:count] + + +def _configure_rps(cfg, rps_cpus): + """Configure RPS for all Rx queues.""" + + mask = 0 + for cpu in rps_cpus: + mask |= (1 << cpu) + mask = hex(mask)[2:] + + # Set RPS bitmap for all rx queues + for rps_file in glob.glob(f"/sys/class/net/{cfg.ifname}/queues/rx-*/rps_cpus"): + with open(rps_file, "w", encoding="utf-8") as fp: + fp.write(mask) + + return mask + + +def _send_traffic(cfg, proto_flag, ipver, port): + """Send 20 packets of requested type.""" + + # Determine protocol and IP version for socat + if proto_flag == "-u": + proto = "UDP" + else: + proto = "TCP" + + baddr = f"[{cfg.addr_v['6']}]" if ipver == "6" else cfg.addr_v["4"] + + # Run socat in a loop to send traffic periodically + # Use sh -c with a loop similar to toeplitz_client.sh + socat_cmd = f""" + for i in `seq 20`; do + echo "msg $i" | socat -{ipver} -t 0.1 - {proto}:{baddr}:{port}; + sleep 0.001; + done + """ + + cmd(socat_cmd, shell=True, host=cfg.remote) + + +def _test_variants(): + for grp in ["", "rss", "rps"]: + for l4 in ["tcp", "udp"]: + for l3 in ["4", "6"]: + name = f"{l4}_ipv{l3}" + if grp: + name = f"{grp}_{name}" + yield KsftNamedVariant(name, "-" + l4[0], l3, grp) + + +@ksft_variants(_test_variants()) +def test(cfg, proto_flag, ipver, grp): + """Run a single toeplitz test.""" + + cfg.require_ipver(ipver) + + # Check that rxhash is enabled + ksft_in("receive-hashing: on", cmd(f"ethtool -k {cfg.ifname}").stdout) + + rss = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex}}) + # Make sure NIC is configured to use Toeplitz hash, and no key xfrm. + if rss.get('hfunc') != ETH_RSS_HASH_TOP or rss.get('input-xfrm'): + cfg.ethnl.rss_set({"header": {"dev-index": cfg.ifindex}, + "hfunc": ETH_RSS_HASH_TOP, + "input-xfrm": {}}) + defer(cfg.ethnl.rss_set, {"header": {"dev-index": cfg.ifindex}, + "hfunc": rss.get('hfunc'), + "input-xfrm": rss.get('input-xfrm', {}) + }) + + port = rand_port(socket.SOCK_DGRAM) + + toeplitz_path = cfg.test_dir / "toeplitz" + rx_cmd = [ + str(toeplitz_path), + "-" + ipver, + proto_flag, + "-d", str(port), + "-i", cfg.ifname, + "-T", "4000", + "-s", + "-v" + ] + + if grp: + _check_rps_and_rfs_not_configured(cfg) + if grp == "rss": + irq_cpus = ",".join([str(x) for x in _get_irq_cpus(cfg)]) + rx_cmd += ["-C", irq_cpus] + ksft_pr(f"RSS using CPUs: {irq_cpus}") + elif grp == "rps": + # Get CPUs not used by Rx queues and configure them for RPS + rps_cpus = _get_unused_cpus(cfg, count=2) + rps_mask = _configure_rps(cfg, rps_cpus) + defer(_configure_rps, cfg, []) + rx_cmd += ["-r", rps_mask] + ksft_pr(f"RPS using CPUs: {rps_cpus}, mask: {rps_mask}") + + # Run rx in background, it will exit once it has seen enough packets + with bkg(" ".join(rx_cmd), ksft_ready=True, exit_wait=True) as rx_proc: + while rx_proc.proc.poll() is None: + _send_traffic(cfg, proto_flag, ipver, port) + + # Check rx result + ksft_pr("Receiver output:") + ksft_pr(rx_proc.stdout.strip().replace('\n', '\n# ')) + if rx_proc.stderr: + ksft_pr(rx_proc.stderr.strip().replace('\n', '\n# ')) + + +def main() -> None: + """Ksft boilerplate main.""" + + with NetDrvEpEnv(__file__) as cfg: + cfg.ethnl = EthtoolFamily() + cfg.netnl = NetdevFamily() + ksft_run(cases=[test], args=(cfg,)) + ksft_exit() + + +if __name__ == "__main__": + main() diff --git a/tools/testing/selftests/drivers/net/lib/py/__init__.py b/tools/testing/selftests/drivers/net/lib/py/__init__.py index b0c6300150fb..8b75faa9af6d 100644 --- a/tools/testing/selftests/drivers/net/lib/py/__init__.py +++ b/tools/testing/selftests/drivers/net/lib/py/__init__.py @@ -25,7 +25,7 @@ try: fd_read_timeout, ip, rand_port, wait_port_listen, wait_file from net.lib.py import KsftSkipEx, KsftFailEx, KsftXfailEx from net.lib.py import ksft_disruptive, ksft_exit, ksft_pr, ksft_run, \ - ksft_setup + ksft_setup, ksft_variants, KsftNamedVariant from net.lib.py import ksft_eq, ksft_ge, ksft_in, ksft_is, ksft_lt, \ ksft_ne, ksft_not_in, ksft_raises, ksft_true, ksft_gt, ksft_not_none @@ -38,16 +38,17 @@ try: "wait_port_listen", "wait_file", "KsftSkipEx", "KsftFailEx", "KsftXfailEx", "ksft_disruptive", "ksft_exit", "ksft_pr", "ksft_run", - "ksft_setup", + "ksft_setup", "ksft_variants", "KsftNamedVariant", "ksft_eq", "ksft_ge", "ksft_in", "ksft_is", "ksft_lt", "ksft_ne", "ksft_not_in", "ksft_raises", "ksft_true", "ksft_gt", "ksft_not_none", "ksft_not_none"] from .env import NetDrvEnv, NetDrvEpEnv - from .load import GenerateTraffic + from .load import GenerateTraffic, Iperf3Runner from .remote import Remote - __all__ += ["NetDrvEnv", "NetDrvEpEnv", "GenerateTraffic", "Remote"] + __all__ += ["NetDrvEnv", "NetDrvEpEnv", "GenerateTraffic", "Remote", + "Iperf3Runner"] except ModuleNotFoundError as e: print("Failed importing `net` library from kernel sources") print(str(e)) diff --git a/tools/testing/selftests/drivers/net/lib/py/env.py b/tools/testing/selftests/drivers/net/lib/py/env.py index 01be3d9b9720..8b644fd84ff2 100644 --- a/tools/testing/selftests/drivers/net/lib/py/env.py +++ b/tools/testing/selftests/drivers/net/lib/py/env.py @@ -168,6 +168,8 @@ class NetDrvEpEnv(NetDrvEnvBase): # resolve remote interface name self.remote_ifname = self.resolve_remote_ifc() + self.remote_dev = ip("-d link show dev " + self.remote_ifname, + host=self.remote, json=True)[0] self._required_cmd = {} diff --git a/tools/testing/selftests/drivers/net/lib/py/load.py b/tools/testing/selftests/drivers/net/lib/py/load.py index c4e808407cc4..f181fa2d38fc 100644 --- a/tools/testing/selftests/drivers/net/lib/py/load.py +++ b/tools/testing/selftests/drivers/net/lib/py/load.py @@ -2,21 +2,89 @@ import re import time +import json from lib.py import ksft_pr, cmd, ip, rand_port, wait_port_listen -class GenerateTraffic: - def __init__(self, env, port=None): - env.require_cmd("iperf3", local=True, remote=True) +class Iperf3Runner: + """ + Sets up and runs iperf3 traffic. + """ + def __init__(self, env, port=None, server_ip=None, client_ip=None): + env.require_cmd("iperf3", local=True, remote=True) self.env = env - self.port = rand_port() if port is None else port - self._iperf_server = cmd(f"iperf3 -s -1 -p {self.port}", background=True) + self.server_ip = server_ip + self.client_ip = client_ip + + def _build_server(self): + cmdline = f"iperf3 -s -1 -p {self.port}" + if self.server_ip: + cmdline += f" -B {self.server_ip}" + return cmdline + + def _build_client(self, streams, duration, reverse): + host = self.env.addr if self.server_ip is None else self.server_ip + cmdline = f"iperf3 -c {host} -p {self.port} -P {streams} -t {duration} -J" + if self.client_ip: + cmdline += f" -B {self.client_ip}" + if reverse: + cmdline += " --reverse" + return cmdline + + def start_server(self): + """ + Starts an iperf3 server with optional bind IP. + """ + cmdline = self._build_server() + proc = cmd(cmdline, background=True) wait_port_listen(self.port) time.sleep(0.1) - self._iperf_client = cmd(f"iperf3 -c {env.addr} -P 16 -p {self.port} -t 86400", - background=True, host=env.remote) + return proc + + def start_client(self, background=False, streams=1, duration=10, reverse=False): + """ + Starts the iperf3 client with the configured options. + """ + cmdline = self._build_client(streams, duration, reverse) + return cmd(cmdline, background=background, host=self.env.remote) + + def measure_bandwidth(self, reverse=False): + """ + Runs an iperf3 measurement and returns the average bandwidth (Gbps). + Discards the first and last few reporting intervals and uses only the + middle part of the run where throughput is typically stable. + """ + self.start_server() + result = self.start_client(duration=10, reverse=reverse) + + if result.ret != 0: + raise RuntimeError("iperf3 failed to run successfully") + try: + out = json.loads(result.stdout) + except json.JSONDecodeError as exc: + raise ValueError("Failed to parse iperf3 JSON output") from exc + + intervals = out.get("intervals", []) + samples = [i["sum"]["bits_per_second"] / 1e9 for i in intervals] + if len(samples) < 10: + raise ValueError(f"iperf3 returned too few intervals: {len(samples)}") + # Discard potentially unstable first and last 3 seconds. + stable = samples[3:-3] + + avg = sum(stable) / len(stable) + + return avg + + +class GenerateTraffic: + def __init__(self, env, port=None): + self.env = env + self.runner = Iperf3Runner(env, port) + + self._iperf_server = self.runner.start_server() + self._iperf_client = self.runner.start_client(background=True, streams=16, duration=86400) # Wait for traffic to ramp up if not self._wait_pkts(pps=1000): @@ -61,7 +129,7 @@ class GenerateTraffic: def _wait_client_stopped(self, sleep=0.005, timeout=5): end = time.monotonic() + timeout - live_port_pattern = re.compile(fr":{self.port:04X} 0[^6] ") + live_port_pattern = re.compile(fr":{self.runner.port:04X} 0[^6] ") while time.monotonic() < end: data = cmd("cat /proc/net/tcp*", host=self.env.remote).stdout diff --git a/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh b/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh index 87f89fd92f8c..ae8abff4be40 100644 --- a/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh +++ b/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh @@ -249,7 +249,7 @@ function listen_port_and_save_to() { # Just wait for 2 seconds timeout 2 ip netns exec "${NAMESPACE}" \ - socat "${SOCAT_MODE}":"${PORT}",fork "${OUTPUT}" + socat "${SOCAT_MODE}":"${PORT}",fork "${OUTPUT}" 2> /dev/null } # Only validate that the message arrived properly diff --git a/tools/testing/selftests/drivers/net/netcons_basic.sh b/tools/testing/selftests/drivers/net/netcons_basic.sh index a3446b569976..2022f3061738 100755 --- a/tools/testing/selftests/drivers/net/netcons_basic.sh +++ b/tools/testing/selftests/drivers/net/netcons_basic.sh @@ -28,8 +28,6 @@ OUTPUT_FILE="/tmp/${TARGET}" # Check for basic system dependency and exit if not found check_for_dependencies -# Set current loglevel to KERN_INFO(6), and default to KERN_NOTICE(5) -echo "6 5" > /proc/sys/kernel/printk # Remove the namespace, interfaces and netconsole target on exit trap cleanup EXIT @@ -39,6 +37,9 @@ do for IP_VERSION in "ipv6" "ipv4" do echo "Running with target mode: ${FORMAT} (${IP_VERSION})" + # Set current loglevel to KERN_INFO(6), and default to + # KERN_NOTICE(5) + echo "6 5" > /proc/sys/kernel/printk # Create one namespace and two interfaces set_network "${IP_VERSION}" # Create a dynamic target for netconsole diff --git a/tools/testing/selftests/drivers/net/netcons_overflow.sh b/tools/testing/selftests/drivers/net/netcons_overflow.sh index 29bad56448a2..06089643b771 100755 --- a/tools/testing/selftests/drivers/net/netcons_overflow.sh +++ b/tools/testing/selftests/drivers/net/netcons_overflow.sh @@ -15,7 +15,7 @@ SCRIPTDIR=$(dirname "$(readlink -e "${BASH_SOURCE[0]}")") source "${SCRIPTDIR}"/lib/sh/lib_netcons.sh # This is coming from netconsole code. Check for it in drivers/net/netconsole.c -MAX_USERDATA_ITEMS=16 +MAX_USERDATA_ITEMS=256 # Function to create userdata entries function create_userdata_max_entries() { diff --git a/tools/testing/selftests/drivers/net/netdevsim/Makefile b/tools/testing/selftests/drivers/net/netdevsim/Makefile index df10c7243511..1a228c5430f5 100644 --- a/tools/testing/selftests/drivers/net/netdevsim/Makefile +++ b/tools/testing/selftests/drivers/net/netdevsim/Makefile @@ -8,7 +8,6 @@ TEST_PROGS := \ ethtool-features.sh \ ethtool-fec.sh \ ethtool-pause.sh \ - ethtool-ring.sh \ fib.sh \ fib_notifications.sh \ hw_stats_l3.sh \ diff --git a/tools/testing/selftests/drivers/net/netdevsim/devlink.sh b/tools/testing/selftests/drivers/net/netdevsim/devlink.sh index 030762b203d7..1b529ccaf050 100755 --- a/tools/testing/selftests/drivers/net/netdevsim/devlink.sh +++ b/tools/testing/selftests/drivers/net/netdevsim/devlink.sh @@ -3,7 +3,8 @@ lib_dir=$(dirname $0)/../../../net/forwarding -ALL_TESTS="fw_flash_test params_test regions_test reload_test \ +ALL_TESTS="fw_flash_test params_test \ + params_default_test regions_test reload_test \ netns_reload_test resource_test dev_info_test \ empty_reporter_test dummy_reporter_test rate_test" NUM_NETIFS=0 @@ -78,17 +79,28 @@ fw_flash_test() param_get() { local name=$1 + local attr=${2:-value} + local cmode=${3:-driverinit} cmd_jq "devlink dev param show $DL_HANDLE name $name -j" \ - '.[][][].values[] | select(.cmode == "driverinit").value' + '.[][][].values[] | select(.cmode == "'"$cmode"'").'"$attr" } param_set() { local name=$1 local value=$2 + local cmode=${3:-driverinit} - devlink dev param set $DL_HANDLE name $name cmode driverinit value $value + devlink dev param set $DL_HANDLE name $name cmode $cmode value $value +} + +param_set_default() +{ + local name=$1 + local cmode=${2:-driverinit} + + devlink dev param set $DL_HANDLE name $name default cmode $cmode } check_value() @@ -97,12 +109,18 @@ check_value() local phase_name=$2 local expected_param_value=$3 local expected_debugfs_value=$4 + local cmode=${5:-driverinit} local value + local attr="value" - value=$(param_get $name) - check_err $? "Failed to get $name param value" + if [[ "$phase_name" == *"default"* ]]; then + attr="default" + fi + + value=$(param_get $name $attr $cmode) + check_err $? "Failed to get $name param $attr" [ "$value" == "$expected_param_value" ] - check_err $? "Unexpected $phase_name $name param value" + check_err $? "Unexpected $phase_name $name param $attr" value=$(<$DEBUGFS_DIR/$name) check_err $? "Failed to get $name debugfs value" [ "$value" == "$expected_debugfs_value" ] @@ -135,6 +153,92 @@ params_test() log_test "params test" } +value_to_debugfs() +{ + local value=$1 + + case "$value" in + true) + echo "Y" + ;; + false) + echo "N" + ;; + *) + echo "$value" + ;; + esac +} + +test_default() +{ + local param_name=$1 + local new_value=$2 + local expected_default=$3 + local cmode=${4:-driverinit} + local default_debugfs + local new_debugfs + local expected_debugfs + + default_debugfs=$(value_to_debugfs $expected_default) + new_debugfs=$(value_to_debugfs $new_value) + + expected_debugfs=$default_debugfs + check_value $param_name initial-default $expected_default $expected_debugfs $cmode + + param_set $param_name $new_value $cmode + check_err $? "Failed to set $param_name to $new_value" + + expected_debugfs=$([ "$cmode" == "runtime" ] && echo "$new_debugfs" || echo "$default_debugfs") + check_value $param_name post-set $new_value $expected_debugfs $cmode + + devlink dev reload $DL_HANDLE + check_err $? "Failed to reload device" + + expected_debugfs=$new_debugfs + check_value $param_name post-reload-new-value $new_value $expected_debugfs $cmode + + param_set_default $param_name $cmode + check_err $? "Failed to set $param_name to default" + + expected_debugfs=$([ "$cmode" == "runtime" ] && echo "$default_debugfs" || echo "$new_debugfs") + check_value $param_name post-set-default $expected_default $expected_debugfs $cmode + + devlink dev reload $DL_HANDLE + check_err $? "Failed to reload device" + + expected_debugfs=$default_debugfs + check_value $param_name post-reload-default $expected_default $expected_debugfs $cmode +} + +params_default_test() +{ + RET=0 + + if ! devlink dev param help 2>&1 | grep -q "value VALUE | default"; then + echo "SKIP: devlink cli missing default feature" + return + fi + + # Remove side effects of previous tests. Use plain param_set, because + # param_set_default is a feature under test here. + param_set max_macs 32 driverinit + check_err $? "Failed to reset max_macs to default value" + param_set test1 true driverinit + check_err $? "Failed to reset test1 to default value" + param_set test2 1234 runtime + check_err $? "Failed to reset test2 to default value" + + devlink dev reload $DL_HANDLE + check_err $? "Failed to reload device for clean state" + + test_default max_macs 16 32 driverinit + test_default test1 false true driverinit + test_default test2 100 1234 runtime + + log_test "params default test" +} + check_region_size() { local name=$1 diff --git a/tools/testing/selftests/drivers/net/netdevsim/ethtool-ring.sh b/tools/testing/selftests/drivers/net/netdevsim/ethtool-ring.sh deleted file mode 100755 index c969559ffa7a..000000000000 --- a/tools/testing/selftests/drivers/net/netdevsim/ethtool-ring.sh +++ /dev/null @@ -1,85 +0,0 @@ -#!/bin/bash -# SPDX-License-Identifier: GPL-2.0-only - -source ethtool-common.sh - -function get_value { - local query="${SETTINGS_MAP[$1]}" - - echo $(ethtool -g $NSIM_NETDEV | \ - tail -n +$CURR_SETT_LINE | \ - awk -F':' -v pattern="$query:" '$0 ~ pattern {gsub(/[\t ]/, "", $2); print $2}') -} - -function update_current_settings { - for key in ${!SETTINGS_MAP[@]}; do - CURRENT_SETTINGS[$key]=$(get_value $key) - done - echo ${CURRENT_SETTINGS[@]} -} - -if ! ethtool -h | grep -q set-ring >/dev/null; then - echo "SKIP: No --set-ring support in ethtool" - exit 4 -fi - -NSIM_NETDEV=$(make_netdev) - -set -o pipefail - -declare -A SETTINGS_MAP=( - ["rx"]="RX" - ["rx-mini"]="RX Mini" - ["rx-jumbo"]="RX Jumbo" - ["tx"]="TX" -) - -declare -A EXPECTED_SETTINGS=( - ["rx"]="" - ["rx-mini"]="" - ["rx-jumbo"]="" - ["tx"]="" -) - -declare -A CURRENT_SETTINGS=( - ["rx"]="" - ["rx-mini"]="" - ["rx-jumbo"]="" - ["tx"]="" -) - -MAX_VALUE=$((RANDOM % $((2**32-1)))) -RING_MAX_LIST=$(ls $NSIM_DEV_DFS/ethtool/ring/) - -for ring_max_entry in $RING_MAX_LIST; do - echo $MAX_VALUE > $NSIM_DEV_DFS/ethtool/ring/$ring_max_entry -done - -CURR_SETT_LINE=$(ethtool -g $NSIM_NETDEV | grep -i -m1 -n 'Current hardware settings' | cut -f1 -d:) - -# populate the expected settings map -for key in ${!SETTINGS_MAP[@]}; do - EXPECTED_SETTINGS[$key]=$(get_value $key) -done - -# test -for key in ${!SETTINGS_MAP[@]}; do - value=$((RANDOM % $MAX_VALUE)) - - ethtool -G $NSIM_NETDEV "$key" "$value" - - EXPECTED_SETTINGS[$key]="$value" - expected=${EXPECTED_SETTINGS[@]} - current=$(update_current_settings) - - check $? "$current" "$expected" - set +x -done - -if [ $num_errors -eq 0 ]; then - echo "PASSED all $((num_passes)) checks" - exit 0 -else - echo "FAILED $num_errors/$((num_errors+num_passes)) checks" - exit 1 -fi diff --git a/tools/testing/selftests/drivers/net/psp.py b/tools/testing/selftests/drivers/net/psp.py index 4ae7a785ff10..06559ef49b9a 100755 --- a/tools/testing/selftests/drivers/net/psp.py +++ b/tools/testing/selftests/drivers/net/psp.py @@ -109,6 +109,10 @@ def _check_data_outq(s, exp_len, force_wait=False): time.sleep(0.01) ksft_eq(outq, exp_len) + +def _get_stat(cfg, key): + return cfg.pspnl.get_stats({'dev-id': cfg.psp_dev_id})[key] + # # Test case boiler plate # @@ -171,11 +175,16 @@ def dev_rotate(cfg): """ Test key rotation """ _init_psp_dev(cfg) + prev_rotations = _get_stat(cfg, 'key-rotations') + rot = cfg.pspnl.key_rotate({"id": cfg.psp_dev_id}) ksft_eq(rot['id'], cfg.psp_dev_id) rot = cfg.pspnl.key_rotate({"id": cfg.psp_dev_id}) ksft_eq(rot['id'], cfg.psp_dev_id) + cur_rotations = _get_stat(cfg, 'key-rotations') + ksft_eq(cur_rotations, prev_rotations + 2) + def dev_rotate_spi(cfg): """ Test key rotation and SPI check """ @@ -475,6 +484,7 @@ def data_stale_key(cfg): """ Test send on a double-rotated key """ _init_psp_dev(cfg) + prev_stale = _get_stat(cfg, 'stale-events') s = _make_psp_conn(cfg) try: rx_assoc = cfg.pspnl.rx_assoc({"version": 0, @@ -495,6 +505,9 @@ def data_stale_key(cfg): cfg.pspnl.key_rotate({"id": cfg.psp_dev_id}) cfg.pspnl.key_rotate({"id": cfg.psp_dev_id}) + cur_stale = _get_stat(cfg, 'stale-events') + ksft_gt(cur_stale, prev_stale) + s.send(b'0123456789' * 200) _check_data_outq(s, 2000, force_wait=True) finally: diff --git a/tools/testing/selftests/drivers/net/ring_reconfig.py b/tools/testing/selftests/drivers/net/ring_reconfig.py new file mode 100755 index 000000000000..f9530a8b0856 --- /dev/null +++ b/tools/testing/selftests/drivers/net/ring_reconfig.py @@ -0,0 +1,167 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 + +""" +Test channel and ring size configuration via ethtool (-L / -G). +""" + +from lib.py import ksft_run, ksft_exit, ksft_pr +from lib.py import ksft_eq +from lib.py import NetDrvEpEnv, EthtoolFamily, GenerateTraffic +from lib.py import defer, NlError + + +def channels(cfg) -> None: + """ + Twiddle channel counts in various combinations of parameters. + We're only looking for driver adhering to the requested config + if the config is accepted and crashes. + """ + ehdr = {'header':{'dev-index': cfg.ifindex}} + chans = cfg.eth.channels_get(ehdr) + + all_keys = ["rx", "tx", "combined"] + mixes = [{"combined"}, {"rx", "tx"}, {"rx", "combined"}, {"tx", "combined"}, + {"rx", "tx", "combined"},] + + # Get the set of keys that device actually supports + restore = {} + supported = set() + for key in all_keys: + if key + "-max" in chans: + supported.add(key) + restore |= {key + "-count": chans[key + "-count"]} + + defer(cfg.eth.channels_set, ehdr | restore) + + def test_config(config): + try: + cfg.eth.channels_set(ehdr | config) + get = cfg.eth.channels_get(ehdr) + for k, v in config.items(): + ksft_eq(get.get(k, 0), v) + except NlError as e: + failed.append(mix) + ksft_pr("Can't set", config, e) + else: + ksft_pr("Okay", config) + + failed = [] + for mix in mixes: + if not mix.issubset(supported): + continue + + # Set all the values in the mix to 1, other supported to 0 + config = {} + for key in all_keys: + config[key + "-count"] = 1 if key in mix else 0 + test_config(config) + + for mix in mixes: + if not mix.issubset(supported): + continue + if mix in failed: + continue + + # Set all the values in the mix to max, other supported to 0 + config = {} + for key in all_keys: + config[key + "-count"] = chans[key + '-max'] if key in mix else 0 + test_config(config) + + +def _configure_min_ring_cnt(cfg) -> None: + """ Try to configure a single Rx/Tx ring. """ + ehdr = {'header':{'dev-index': cfg.ifindex}} + chans = cfg.eth.channels_get(ehdr) + + all_keys = ["rx-count", "tx-count", "combined-count"] + restore = {} + config = {} + for key in all_keys: + if key in chans: + restore[key] = chans[key] + config[key] = 0 + + if chans.get('combined-count', 0) > 1: + config['combined-count'] = 1 + elif chans.get('rx-count', 0) > 1 and chans.get('tx-count', 0) > 1: + config['tx-count'] = 1 + config['rx-count'] = 1 + else: + # looks like we're already on 1 channel + return + + cfg.eth.channels_set(ehdr | config) + defer(cfg.eth.channels_set, ehdr | restore) + + +def ringparam(cfg) -> None: + """ + Tweak the ringparam configuration. Try to run some traffic over min + ring size to make sure it actually functions. + """ + ehdr = {'header':{'dev-index': cfg.ifindex}} + rings = cfg.eth.rings_get(ehdr) + + restore = {} + maxes = {} + params = set() + for key in rings.keys(): + if 'max' in key: + param = key[:-4] + maxes[param] = rings[key] + params.add(param) + restore[param] = rings[param] + + defer(cfg.eth.rings_set, ehdr | restore) + + # Speed up the reconfig by configuring just one ring + _configure_min_ring_cnt(cfg) + + # Try to reach min on all settings + for param in params: + val = rings[param] + while True: + try: + cfg.eth.rings_set({'header':{'dev-index': cfg.ifindex}, + param: val // 2}) + if val == 0: + break + val //= 2 + except NlError: + break + + get = cfg.eth.rings_get(ehdr) + ksft_eq(get[param], val) + + ksft_pr(f"Reached min for '{param}' at {val} (max {rings[param]})") + + GenerateTraffic(cfg).wait_pkts_and_stop(10000) + + # Try max across all params, if the driver supports large rings + # this may OOM so we ignore errors + try: + ksft_pr("Applying max settings") + config = {p: maxes[p] for p in params} + cfg.eth.rings_set(ehdr | config) + except NlError as e: + ksft_pr("Can't set max params", config, e) + else: + GenerateTraffic(cfg).wait_pkts_and_stop(10000) + + +def main() -> None: + """ Ksft boiler plate main """ + + with NetDrvEpEnv(__file__) as cfg: + cfg.eth = EthtoolFamily() + + ksft_run([channels, + ringparam], + args=(cfg, )) + ksft_exit() + + +if __name__ == "__main__": + main() diff --git a/tools/testing/selftests/drivers/net/stats.py b/tools/testing/selftests/drivers/net/stats.py index 04d0a2a13e73..b08e4d48b15c 100755 --- a/tools/testing/selftests/drivers/net/stats.py +++ b/tools/testing/selftests/drivers/net/stats.py @@ -263,14 +263,15 @@ def procfs_downup_hammer(cfg) -> None: Reading stats via procfs only holds the RCU lock, drivers often try to sleep when reading the stats, or don't protect against races. """ - # Max out the queues, we'll flip between max and 1 + # Set a large number of queues, + # we'll flip between min(max_queues, 64) and 1 channels = ethnl.channels_get({'header': {'dev-index': cfg.ifindex}}) if channels['combined-count'] == 0: rx_type = 'rx' else: rx_type = 'combined' cur_queue_cnt = channels[f'{rx_type}-count'] - max_queue_cnt = channels[f'{rx_type}-max'] + max_queue_cnt = min(channels[f'{rx_type}-max'], 64) cmd(f"ethtool -L {cfg.ifname} {rx_type} {max_queue_cnt}") defer(cmd, f"ethtool -L {cfg.ifname} {rx_type} {cur_queue_cnt}") diff --git a/tools/testing/selftests/drivers/net/xdp.py b/tools/testing/selftests/drivers/net/xdp.py index a148004e1c36..e54df158dfe9 100755 --- a/tools/testing/selftests/drivers/net/xdp.py +++ b/tools/testing/selftests/drivers/net/xdp.py @@ -12,6 +12,7 @@ from dataclasses import dataclass from enum import Enum from lib.py import ksft_run, ksft_exit, ksft_eq, ksft_ge, ksft_ne, ksft_pr +from lib.py import KsftNamedVariant, ksft_variants from lib.py import KsftFailEx, NetDrvEpEnv from lib.py import EthtoolFamily, NetdevFamily, NlError from lib.py import bkg, cmd, rand_port, wait_port_listen @@ -672,7 +673,18 @@ def test_xdp_native_adjst_head_shrnk_data(cfg): _validate_res(res, offset_lst, pkt_sz_lst) -def _test_xdp_native_ifc_stats(cfg, act): +@ksft_variants([ + KsftNamedVariant("pass", XDPAction.PASS), + KsftNamedVariant("drop", XDPAction.DROP), + KsftNamedVariant("tx", XDPAction.TX), +]) +def test_xdp_native_qstats(cfg, act): + """ + Send 1000 messages. Expect XDP action specified in @act. + Make sure the packets were counted to interface level qstats + (Rx, and Tx if act is TX). + """ + cfg.require_cmd("socat") bpf_info = BPFProgInfo("xdp_prog", "xdp_native.bpf.o", "xdp", 1500) @@ -687,9 +699,12 @@ def _test_xdp_native_ifc_stats(cfg, act): "/dev/null" # Listener runs on "remote" in case of XDP_TX rx_host = cfg.remote if act == XDPAction.TX else None - # We want to spew 2000 packets quickly, bash seems to do a good enough job - tx_udp = f"exec 5<>/dev/udp/{cfg.addr}/{port}; " \ - "for i in `seq 2000`; do echo a >&5; done; exec 5>&-" + # We want to spew 1000 packets quickly, bash seems to do a good enough job + # Each reopening of the socket gives us a differenot local port (for RSS) + tx_udp = "for _ in `seq 20`; do " \ + f"exec 5<>/dev/udp/{cfg.addr}/{port}; " \ + "for i in `seq 50`; do echo a >&5; done; " \ + "exec 5>&-; done" cfg.wait_hw_stats_settle() # Qstats have more clearly defined semantics than rtnetlink. @@ -704,11 +719,11 @@ def _test_xdp_native_ifc_stats(cfg, act): cfg.wait_hw_stats_settle() after = cfg.netnl.qstats_get({"ifindex": cfg.ifindex}, dump=True)[0] - ksft_ge(after['rx-packets'] - before['rx-packets'], 2000) + expected_pkts = 1000 + ksft_ge(after['rx-packets'] - before['rx-packets'], expected_pkts) if act == XDPAction.TX: - ksft_ge(after['tx-packets'] - before['tx-packets'], 2000) + ksft_ge(after['tx-packets'] - before['tx-packets'], expected_pkts) - expected_pkts = 2000 stats = _get_stats(prog_info["maps"]["map_xdp_stats"]) ksft_eq(stats[XDPStats.RX.value], expected_pkts, "XDP RX stats mismatch") if act == XDPAction.TX: @@ -730,30 +745,6 @@ def _test_xdp_native_ifc_stats(cfg, act): ksft_ge(after['tx-packets'], before['tx-packets']) -def test_xdp_native_qstats_pass(cfg): - """ - Send 2000 messages, expect XDP_PASS, make sure the packets were counted - to interface level qstats (Rx). - """ - _test_xdp_native_ifc_stats(cfg, XDPAction.PASS) - - -def test_xdp_native_qstats_drop(cfg): - """ - Send 2000 messages, expect XDP_DROP, make sure the packets were counted - to interface level qstats (Rx). - """ - _test_xdp_native_ifc_stats(cfg, XDPAction.DROP) - - -def test_xdp_native_qstats_tx(cfg): - """ - Send 2000 messages, expect XDP_TX, make sure the packets were counted - to interface level qstats (Rx and Tx) - """ - _test_xdp_native_ifc_stats(cfg, XDPAction.TX) - - def main(): """ Main function to execute the XDP tests. @@ -778,9 +769,7 @@ def main(): test_xdp_native_adjst_tail_shrnk_data, test_xdp_native_adjst_head_grow_data, test_xdp_native_adjst_head_shrnk_data, - test_xdp_native_qstats_pass, - test_xdp_native_qstats_drop, - test_xdp_native_qstats_tx, + test_xdp_native_qstats, ], args=(cfg,)) ksft_exit() diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore index 8f9850a71f54..6930fe926c58 100644 --- a/tools/testing/selftests/net/.gitignore +++ b/tools/testing/selftests/net/.gitignore @@ -4,10 +4,8 @@ bind_timewait bind_wildcard busy_poller cmsg_sender -diag_uid epoll_busy_poll fin_ack_lat -gro hwtstamp_config io_uring_zerocopy_tx ioam6_parser @@ -18,7 +16,6 @@ ipv6_flowlabel ipv6_flowlabel_mgr ipv6_fragmentation log.txt -msg_oob msg_zerocopy netlink-dumps nettest @@ -35,9 +32,6 @@ reuseport_bpf_numa reuseport_dualstack rxtimestamp sctp_hello -scm_inq -scm_pidfd -scm_rights sk_bind_sendto_listen sk_connect_zero_addr sk_so_peek_off @@ -45,7 +39,6 @@ skf_net_off socket so_incoming_cpu so_netns_cookie -so_peek_off so_txtime so_rcv_listener stress_reuseport_listen @@ -57,7 +50,6 @@ tcp_port_share tfo timestamping tls -toeplitz tools tun txring_overwrite @@ -65,4 +57,3 @@ txtimestamp udpgso udpgso_bench_rx udpgso_bench_tx -unix_connect diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index b5127e968108..b66ba04f19d9 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -38,7 +38,6 @@ TEST_PROGS := \ fq_band_pktlimit.sh \ gre_gso.sh \ gre_ipv6_lladdr.sh \ - gro.sh \ icmp.sh \ icmp_redirect.sh \ io_uring_zerocopy_tx.sh \ @@ -121,8 +120,6 @@ TEST_PROGS := \ # end of TEST_PROGS TEST_PROGS_EXTENDED := \ - toeplitz.sh \ - toeplitz_client.sh \ xfrm_policy_add_speed.sh \ # end of TEST_PROGS_EXTENDED @@ -130,7 +127,6 @@ TEST_GEN_FILES := \ bind_bhash \ cmsg_sender \ fin_ack_lat \ - gro \ hwtstamp_config \ io_uring_zerocopy_tx \ ioam6_parser \ @@ -159,7 +155,6 @@ TEST_GEN_FILES := \ tcp_mmap \ tfo \ timestamping \ - toeplitz \ txring_overwrite \ txtimestamp \ udpgso \ @@ -193,8 +188,6 @@ TEST_FILES := \ in_netns.sh \ lib.sh \ settings \ - setup_loopback.sh \ - setup_veth.sh \ # end of TEST_FILES # YNL files, must be before "include ..lib.mk" diff --git a/tools/testing/selftests/net/af_unix/.gitignore b/tools/testing/selftests/net/af_unix/.gitignore new file mode 100644 index 000000000000..240b26740c9e --- /dev/null +++ b/tools/testing/selftests/net/af_unix/.gitignore @@ -0,0 +1,8 @@ +diag_uid +msg_oob +scm_inq +scm_pidfd +scm_rights +so_peek_off +unix_connect +unix_connreset diff --git a/tools/testing/selftests/net/af_unix/Makefile b/tools/testing/selftests/net/af_unix/Makefile index 528d14c598bb..3cd677b72072 100644 --- a/tools/testing/selftests/net/af_unix/Makefile +++ b/tools/testing/selftests/net/af_unix/Makefile @@ -8,6 +8,7 @@ TEST_GEN_PROGS := \ scm_rights \ so_peek_off \ unix_connect \ + unix_connreset \ # end of TEST_GEN_PROGS include ../../lib.mk diff --git a/tools/testing/selftests/net/af_unix/so_peek_off.c b/tools/testing/selftests/net/af_unix/so_peek_off.c index 1a77728128e5..86e7b0fb522d 100644 --- a/tools/testing/selftests/net/af_unix/so_peek_off.c +++ b/tools/testing/selftests/net/af_unix/so_peek_off.c @@ -36,8 +36,8 @@ FIXTURE_VARIANT_ADD(so_peek_off, seqpacket) FIXTURE_SETUP(so_peek_off) { struct timeval timeout = { - .tv_sec = 0, - .tv_usec = 3000, + .tv_sec = 5, + .tv_usec = 0, }; int ret; diff --git a/tools/testing/selftests/net/af_unix/unix_connreset.c b/tools/testing/selftests/net/af_unix/unix_connreset.c new file mode 100644 index 000000000000..08c1de8f5a98 --- /dev/null +++ b/tools/testing/selftests/net/af_unix/unix_connreset.c @@ -0,0 +1,180 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Selftest for AF_UNIX socket close and ECONNRESET behaviour. + * + * This test verifies: + * 1. SOCK_STREAM returns EOF when the peer closes normally. + * 2. SOCK_STREAM returns ECONNRESET if peer closes with unread data. + * 3. SOCK_SEQPACKET returns EOF when the peer closes normally. + * 4. SOCK_SEQPACKET returns ECONNRESET if the peer closes with unread data. + * 5. SOCK_DGRAM does not return ECONNRESET when the peer closes. + * + * These tests document the intended Linux behaviour. + * + */ + +#define _GNU_SOURCE +#include <string.h> +#include <fcntl.h> +#include <unistd.h> +#include <errno.h> +#include <sys/socket.h> +#include <sys/un.h> +#include "../../kselftest_harness.h" + +#define SOCK_PATH "/tmp/af_unix_connreset.sock" + +static void remove_socket_file(void) +{ + unlink(SOCK_PATH); +} + +FIXTURE(unix_sock) +{ + int server; + int client; + int child; +}; + +FIXTURE_VARIANT(unix_sock) +{ + int socket_type; + const char *name; +}; + +FIXTURE_VARIANT_ADD(unix_sock, stream) { + .socket_type = SOCK_STREAM, + .name = "SOCK_STREAM", +}; + +FIXTURE_VARIANT_ADD(unix_sock, dgram) { + .socket_type = SOCK_DGRAM, + .name = "SOCK_DGRAM", +}; + +FIXTURE_VARIANT_ADD(unix_sock, seqpacket) { + .socket_type = SOCK_SEQPACKET, + .name = "SOCK_SEQPACKET", +}; + +FIXTURE_SETUP(unix_sock) +{ + struct sockaddr_un addr = {}; + int err; + + addr.sun_family = AF_UNIX; + strcpy(addr.sun_path, SOCK_PATH); + remove_socket_file(); + + self->server = socket(AF_UNIX, variant->socket_type, 0); + ASSERT_LT(-1, self->server); + + err = bind(self->server, (struct sockaddr *)&addr, sizeof(addr)); + ASSERT_EQ(0, err); + + if (variant->socket_type == SOCK_STREAM || + variant->socket_type == SOCK_SEQPACKET) { + err = listen(self->server, 1); + ASSERT_EQ(0, err); + } + + self->client = socket(AF_UNIX, variant->socket_type | SOCK_NONBLOCK, 0); + ASSERT_LT(-1, self->client); + + err = connect(self->client, (struct sockaddr *)&addr, sizeof(addr)); + ASSERT_EQ(0, err); +} + +FIXTURE_TEARDOWN(unix_sock) +{ + if (variant->socket_type == SOCK_STREAM || + variant->socket_type == SOCK_SEQPACKET) + close(self->child); + + close(self->client); + close(self->server); + remove_socket_file(); +} + +/* Test 1: peer closes normally */ +TEST_F(unix_sock, eof) +{ + char buf[16] = {}; + ssize_t n; + + if (variant->socket_type == SOCK_STREAM || + variant->socket_type == SOCK_SEQPACKET) { + self->child = accept(self->server, NULL, NULL); + ASSERT_LT(-1, self->child); + + close(self->child); + } else { + close(self->server); + } + + n = recv(self->client, buf, sizeof(buf), 0); + + if (variant->socket_type == SOCK_STREAM || + variant->socket_type == SOCK_SEQPACKET) { + ASSERT_EQ(0, n); + } else { + ASSERT_EQ(-1, n); + ASSERT_EQ(EAGAIN, errno); + } +} + +/* Test 2: peer closes with unread data */ +TEST_F(unix_sock, reset_unread_behavior) +{ + char buf[16] = {}; + ssize_t n; + + /* Send data that will remain unread */ + send(self->client, "hello", 5, 0); + + if (variant->socket_type == SOCK_DGRAM) { + /* No real connection, just close the server */ + close(self->server); + } else { + self->child = accept(self->server, NULL, NULL); + ASSERT_LT(-1, self->child); + + /* Peer closes before client reads */ + close(self->child); + } + + n = recv(self->client, buf, sizeof(buf), 0); + ASSERT_EQ(-1, n); + + if (variant->socket_type == SOCK_STREAM || + variant->socket_type == SOCK_SEQPACKET) { + ASSERT_EQ(ECONNRESET, errno); + } else { + ASSERT_EQ(EAGAIN, errno); + } +} + +/* Test 3: closing unaccepted (embryo) server socket should reset client. */ +TEST_F(unix_sock, reset_closed_embryo) +{ + char buf[16] = {}; + ssize_t n; + + if (variant->socket_type == SOCK_DGRAM) { + snprintf(_metadata->results->reason, + sizeof(_metadata->results->reason), + "Test only applies to SOCK_STREAM and SOCK_SEQPACKET"); + exit(KSFT_XFAIL); + } + + /* Close server without accept()ing */ + close(self->server); + + n = recv(self->client, buf, sizeof(buf), 0); + + ASSERT_EQ(-1, n); + ASSERT_EQ(ECONNRESET, errno); +} + +TEST_HARNESS_MAIN + diff --git a/tools/testing/selftests/net/arp_ndisc_evict_nocarrier.sh b/tools/testing/selftests/net/arp_ndisc_evict_nocarrier.sh index 92eb880c52f2..00758f00efbf 100755 --- a/tools/testing/selftests/net/arp_ndisc_evict_nocarrier.sh +++ b/tools/testing/selftests/net/arp_ndisc_evict_nocarrier.sh @@ -75,7 +75,7 @@ setup_v4() { ip neigh get $V4_ADDR1 dev veth0 >/dev/null 2>&1 if [ $? -ne 0 ]; then cleanup_v4 - echo "failed" + echo "failed; is the system using MACAddressPolicy=persistent ?" exit 1 fi diff --git a/tools/testing/selftests/net/busy_poll_test.sh b/tools/testing/selftests/net/busy_poll_test.sh index 7d2d40812074..5ec1c85c1623 100755 --- a/tools/testing/selftests/net/busy_poll_test.sh +++ b/tools/testing/selftests/net/busy_poll_test.sh @@ -27,6 +27,8 @@ NAPI_DEFER_HARD_IRQS=100 GRO_FLUSH_TIMEOUT=50000 SUSPEND_TIMEOUT=20000000 +NAPI_THREADED_MODE_BUSY_POLL=2 + setup_ns() { set -e @@ -62,6 +64,9 @@ cleanup_ns() test_busypoll() { suspend_value=${1:-0} + napi_threaded_value=${2:-0} + prefer_busy_poll_value=${3:-$PREFER_BUSY_POLL} + tmp_file=$(mktemp) out_file=$(mktemp) @@ -73,10 +78,11 @@ test_busypoll() -b${SERVER_IP} \ -m${MAX_EVENTS} \ -u${BUSY_POLL_USECS} \ - -P${PREFER_BUSY_POLL} \ + -P${prefer_busy_poll_value} \ -g${BUSY_POLL_BUDGET} \ -i${NSIM_SV_IFIDX} \ -s${suspend_value} \ + -t${napi_threaded_value} \ -o${out_file}& wait_local_port_listen nssv ${SERVER_PORT} tcp @@ -109,6 +115,15 @@ test_busypoll_with_suspend() return $? } +test_busypoll_with_napi_threaded() +{ + # Only enable napi threaded poll. Set suspend timeout and prefer busy + # poll to 0. + test_busypoll 0 ${NAPI_THREADED_MODE_BUSY_POLL} 0 + + return $? +} + ### ### Code start ### @@ -154,6 +169,13 @@ if [ $? -ne 0 ]; then exit 1 fi +test_busypoll_with_napi_threaded +if [ $? -ne 0 ]; then + echo "test_busypoll_with_napi_threaded failed" + cleanup_ns + exit 1 +fi + echo "$NSIM_SV_FD:$NSIM_SV_IFIDX" > $NSIM_DEV_SYS_UNLINK echo $NSIM_CL_ID > $NSIM_DEV_SYS_DEL diff --git a/tools/testing/selftests/net/busy_poller.c b/tools/testing/selftests/net/busy_poller.c index 04c7ff577bb8..3a81f9c94795 100644 --- a/tools/testing/selftests/net/busy_poller.c +++ b/tools/testing/selftests/net/busy_poller.c @@ -65,15 +65,16 @@ static uint32_t cfg_busy_poll_usecs; static uint16_t cfg_busy_poll_budget; static uint8_t cfg_prefer_busy_poll; -/* IRQ params */ +/* NAPI params */ static uint32_t cfg_defer_hard_irqs; static uint64_t cfg_gro_flush_timeout; static uint64_t cfg_irq_suspend_timeout; +static enum netdev_napi_threaded cfg_napi_threaded_poll = NETDEV_NAPI_THREADED_DISABLED; static void usage(const char *filepath) { error(1, 0, - "Usage: %s -p<port> -b<addr> -m<max_events> -u<busy_poll_usecs> -P<prefer_busy_poll> -g<busy_poll_budget> -o<outfile> -d<defer_hard_irqs> -r<gro_flush_timeout> -s<irq_suspend_timeout> -i<ifindex>", + "Usage: %s -p<port> -b<addr> -m<max_events> -u<busy_poll_usecs> -P<prefer_busy_poll> -g<busy_poll_budget> -o<outfile> -d<defer_hard_irqs> -r<gro_flush_timeout> -s<irq_suspend_timeout> -t<napi_threaded_poll> -i<ifindex>", filepath); } @@ -86,7 +87,7 @@ static void parse_opts(int argc, char **argv) if (argc <= 1) usage(argv[0]); - while ((c = getopt(argc, argv, "p:m:b:u:P:g:o:d:r:s:i:")) != -1) { + while ((c = getopt(argc, argv, "p:m:b:u:P:g:o:d:r:s:i:t:")) != -1) { /* most options take integer values, except o and b, so reduce * code duplication a bit for the common case by calling * strtoull here and leave bounds checking and casting per @@ -168,6 +169,12 @@ static void parse_opts(int argc, char **argv) cfg_ifindex = (int)tmp; break; + case 't': + if (tmp > 2) + error(1, ERANGE, "napi threaded poll value must be 0-2"); + + cfg_napi_threaded_poll = (enum netdev_napi_threaded)tmp; + break; } } @@ -247,6 +254,9 @@ static void setup_queue(void) netdev_napi_set_req_set_irq_suspend_timeout(set_req, cfg_irq_suspend_timeout); + if (cfg_napi_threaded_poll) + netdev_napi_set_req_set_threaded(set_req, cfg_napi_threaded_poll); + if (netdev_napi_set(ys, set_req)) error(1, 0, "can't set NAPI params: %s\n", yerr.msg); diff --git a/tools/testing/selftests/net/fib_tests.sh b/tools/testing/selftests/net/fib_tests.sh index a94b73a53f72..a88f797c549a 100755 --- a/tools/testing/selftests/net/fib_tests.sh +++ b/tools/testing/selftests/net/fib_tests.sh @@ -11,7 +11,8 @@ TESTS="unregister down carrier nexthop suppress ipv6_notify ipv4_notify \ ipv6_rt ipv4_rt ipv6_addr_metric ipv4_addr_metric ipv6_route_metrics \ ipv4_route_metrics ipv4_route_v6_gw rp_filter ipv4_del_addr \ ipv6_del_addr ipv4_mangle ipv6_mangle ipv4_bcast_neigh fib6_gc_test \ - ipv4_mpath_list ipv6_mpath_list ipv4_mpath_balance ipv6_mpath_balance" + ipv4_mpath_list ipv6_mpath_list ipv4_mpath_balance ipv6_mpath_balance \ + fib6_ra_to_static" VERBOSE=0 PAUSE_ON_FAIL=no @@ -1476,6 +1477,68 @@ ipv6_route_metrics_test() route_cleanup } +fib6_ra_to_static() +{ + setup + + echo + echo "Fib6 route promotion from RA-learned to static test" + set -e + + # ra6 is required for the test. (ipv6toolkit) + if [ ! -x "$(command -v ra6)" ]; then + echo "SKIP: ra6 not found." + set +e + cleanup &> /dev/null + return + fi + + # Create a pair of veth devices to send a RA message from one + # device to another. + $IP link add veth1 type veth peer name veth2 + $IP link set dev veth1 up + $IP link set dev veth2 up + $IP -6 address add 2001:10::1/64 dev veth1 nodad + $IP -6 address add 2001:10::2/64 dev veth2 nodad + + # Make veth1 ready to receive RA messages. + $NS_EXEC sysctl -wq net.ipv6.conf.veth1.accept_ra=2 + + # Send a RA message with a prefix from veth2. + $NS_EXEC ra6 -i veth2 -d 2001:10::1 -P 2001:12::/64\#LA\#120\#60 + + # Wait for the RA message. + sleep 1 + + # systemd may mess up the test. Make sure that + # systemd-networkd.service and systemd-networkd.socket are stopped. + check_rt_num_clean 2 $($IP -6 route list|grep expires|wc -l) || return + + # Configure static address on the same prefix + $IP -6 address add 2001:12::dead/64 dev veth1 nodad + + # On-link route won't expire anymore, default route still owned by RA + check_rt_num 1 $($IP -6 route list |grep expires|wc -l) + + # Send a second RA message with a prefix from veth2. + $NS_EXEC ra6 -i veth2 -d 2001:10::1 -P 2001:12::/64\#LA\#120\#60 + sleep 1 + + # Expire is not back, on-link route is still static + check_rt_num 1 $($IP -6 route list |grep expires|wc -l) + + $IP -6 address del 2001:12::dead/64 dev veth1 nodad + + # Expire is back, on-link route is now owned by RA again + check_rt_num 2 $($IP -6 route list |grep expires|wc -l) + + log_test $ret 0 "ipv6 promote RA route to static" + + set +e + + cleanup &> /dev/null +} + # add route for a prefix, flushing any existing routes first # expected to be the first step of a test add_route() @@ -2798,6 +2861,7 @@ do ipv6_mpath_list) ipv6_mpath_list_test;; ipv4_mpath_balance) ipv4_mpath_balance_test;; ipv6_mpath_balance) ipv6_mpath_balance_test;; + fib6_ra_to_static) fib6_ra_to_static;; help) echo "Test names: $TESTS"; exit 0;; esac diff --git a/tools/testing/selftests/net/forwarding/bridge_mdb.sh b/tools/testing/selftests/net/forwarding/bridge_mdb.sh index 8c1597ebc2d3..e86d77946585 100755 --- a/tools/testing/selftests/net/forwarding/bridge_mdb.sh +++ b/tools/testing/selftests/net/forwarding/bridge_mdb.sh @@ -28,6 +28,7 @@ ALL_TESTS=" cfg_test fwd_test ctrl_test + disable_test " NUM_NETIFS=4 @@ -64,7 +65,10 @@ h2_destroy() switch_create() { - ip link add name br0 type bridge vlan_filtering 1 vlan_default_pvid 0 \ + local vlan_filtering=$1; shift + + ip link add name br0 type bridge \ + vlan_filtering "$vlan_filtering" vlan_default_pvid 0 \ mcast_snooping 1 mcast_igmp_version 3 mcast_mld_version 2 bridge vlan add vid 10 dev br0 self bridge vlan add vid 20 dev br0 self @@ -118,7 +122,7 @@ setup_prepare() h1_create h2_create - switch_create + switch_create 1 } cleanup() @@ -1357,6 +1361,98 @@ ctrl_test() ctrl_mldv2_is_in_test } +check_group() +{ + local group=$1; shift + local vid=$1; shift + local should_fail=$1; shift + local when=$1; shift + local -a vidkws + + if ((vid)); then + vidkws=(vid "$vid") + fi + + bridge mdb get dev br0 grp "$group" "${vidkws[@]}" 2>/dev/null | + grep -q "port $swp1" + check_err_fail "$should_fail" $? "$group seen $when snooping disable:" +} + +__disable_test() +{ + local vid=$1; shift + local what=$1; shift + local -a vidkws + + if ((vid)); then + vidkws=(vid "$vid") + fi + + RET=0 + + bridge mdb add dev br0 port "$swp1" grp ff0e::1 permanent \ + "${vidkws[@]}" filter_mode include source_list 2001:db8:1::1 + bridge mdb add dev br0 port "$swp1" grp ff0e::2 permanent \ + "${vidkws[@]}" filter_mode exclude + + bridge mdb add dev br0 port "$swp1" grp ff0e::3 \ + "${vidkws[@]}" filter_mode include source_list 2001:db8:1::2 + bridge mdb add dev br0 port "$swp1" grp ff0e::4 \ + "${vidkws[@]}" filter_mode exclude + + bridge mdb add dev br0 port "$swp1" grp 239.1.1.1 permanent \ + "${vidkws[@]}" filter_mode include source_list 192.0.2.1 + bridge mdb add dev br0 port "$swp1" grp 239.1.1.2 permanent \ + "${vidkws[@]}" filter_mode exclude + + bridge mdb add dev br0 port "$swp1" grp 239.1.1.3 \ + "${vidkws[@]}" filter_mode include source_list 192.0.2.2 + bridge mdb add dev br0 port "$swp1" grp 239.1.1.4 \ + "${vidkws[@]}" filter_mode exclude + + check_group ff0e::1 "$vid" 0 "before" + check_group ff0e::2 "$vid" 0 "before" + check_group ff0e::3 "$vid" 0 "before" + check_group ff0e::4 "$vid" 0 "before" + + check_group 239.1.1.1 "$vid" 0 "before" + check_group 239.1.1.2 "$vid" 0 "before" + check_group 239.1.1.3 "$vid" 0 "before" + check_group 239.1.1.4 "$vid" 0 "before" + + ip link set dev br0 type bridge mcast_snooping 0 + + check_group ff0e::1 "$vid" 0 "after" + check_group ff0e::2 "$vid" 0 "after" + check_group ff0e::3 "$vid" 1 "after" + check_group ff0e::4 "$vid" 1 "after" + + check_group 239.1.1.1 "$vid" 0 "after" + check_group 239.1.1.2 "$vid" 0 "after" + check_group 239.1.1.3 "$vid" 1 "after" + check_group 239.1.1.4 "$vid" 1 "after" + + log_test "$what: Flush after disable" + + ip link set dev br0 type bridge mcast_snooping 1 + sleep 10 +} + +disable_test() +{ + __disable_test 10 802.1q + + switch_destroy + switch_create 0 + setup_wait + + __disable_test 0 802.1d + + switch_destroy + switch_create 1 + setup_wait +} + if ! bridge mdb help 2>&1 | grep -q "flush"; then echo "SKIP: iproute2 too old, missing bridge mdb flush support" exit $ksft_skip diff --git a/tools/testing/selftests/net/gro.sh b/tools/testing/selftests/net/gro.sh deleted file mode 100755 index 4c5144c6f652..000000000000 --- a/tools/testing/selftests/net/gro.sh +++ /dev/null @@ -1,105 +0,0 @@ -#!/bin/bash -# SPDX-License-Identifier: GPL-2.0 - -readonly SERVER_MAC="aa:00:00:00:00:02" -readonly CLIENT_MAC="aa:00:00:00:00:01" -readonly TESTS=("data" "ack" "flags" "tcp" "ip" "large") -readonly PROTOS=("ipv4" "ipv6" "ipip") -dev="" -test="all" -proto="ipv4" - -run_test() { - local server_pid=0 - local exit_code=0 - local protocol=$1 - local test=$2 - local ARGS=( "--${protocol}" "--dmac" "${SERVER_MAC}" \ - "--smac" "${CLIENT_MAC}" "--test" "${test}" "--verbose" ) - - setup_ns - # Each test is run 6 times to deflake, because given the receive timing, - # not all packets that should coalesce will be considered in the same flow - # on every try. - for tries in {1..6}; do - # Actual test starts here - ip netns exec $server_ns ./gro "${ARGS[@]}" "--rx" "--iface" "server" \ - 1>>log.txt & - server_pid=$! - sleep 0.5 # to allow for socket init - ip netns exec $client_ns ./gro "${ARGS[@]}" "--iface" "client" \ - 1>>log.txt - wait "${server_pid}" - exit_code=$? - if [[ ${test} == "large" && -n "${KSFT_MACHINE_SLOW}" && \ - ${exit_code} -ne 0 ]]; then - echo "Ignoring errors due to slow environment" 1>&2 - exit_code=0 - fi - if [[ "${exit_code}" -eq 0 ]]; then - break; - fi - done - cleanup_ns - echo ${exit_code} -} - -run_all_tests() { - local failed_tests=() - for proto in "${PROTOS[@]}"; do - for test in "${TESTS[@]}"; do - echo "running test ${proto} ${test}" >&2 - exit_code=$(run_test $proto $test) - if [[ "${exit_code}" -ne 0 ]]; then - failed_tests+=("${proto}_${test}") - fi; - done; - done - if [[ ${#failed_tests[@]} -ne 0 ]]; then - echo "failed tests: ${failed_tests[*]}. \ - Please see log.txt for more logs" - exit 1 - else - echo "All Tests Succeeded!" - fi; -} - -usage() { - echo "Usage: $0 \ - [-i <DEV>] \ - [-t data|ack|flags|tcp|ip|large] \ - [-p <ipv4|ipv6>]" 1>&2; - exit 1; -} - -while getopts "i:t:p:" opt; do - case "${opt}" in - i) - dev="${OPTARG}" - ;; - t) - test="${OPTARG}" - ;; - p) - proto="${OPTARG}" - ;; - *) - usage - ;; - esac -done - -if [ -n "$dev" ]; then - source setup_loopback.sh -else - source setup_veth.sh -fi - -setup -trap cleanup EXIT -if [[ "${test}" == "all" ]]; then - run_all_tests -else - exit_code=$(run_test "${proto}" "${test}") - exit $exit_code -fi; diff --git a/tools/testing/selftests/net/io_uring_zerocopy_tx.c b/tools/testing/selftests/net/io_uring_zerocopy_tx.c index 76e604e4810e..7bfeeb133705 100644 --- a/tools/testing/selftests/net/io_uring_zerocopy_tx.c +++ b/tools/testing/selftests/net/io_uring_zerocopy_tx.c @@ -106,14 +106,14 @@ static void do_tx(int domain, int type, int protocol) ret = io_uring_queue_init(512, &ring, 0); if (ret) - error(1, ret, "io_uring: queue init"); + error(1, -ret, "io_uring: queue init"); iov.iov_base = payload; iov.iov_len = cfg_payload_len; ret = io_uring_register_buffers(&ring, &iov, 1); if (ret) - error(1, ret, "io_uring: buffer registration"); + error(1, -ret, "io_uring: buffer registration"); tstop = gettimeofday_ms() + cfg_runtime_ms; do { @@ -149,24 +149,24 @@ static void do_tx(int domain, int type, int protocol) ret = io_uring_submit(&ring); if (ret != cfg_nr_reqs) - error(1, ret, "submit"); + error(1, -ret, "submit"); if (cfg_cork) do_setsockopt(fd, IPPROTO_UDP, UDP_CORK, 0); for (i = 0; i < cfg_nr_reqs; i++) { ret = io_uring_wait_cqe(&ring, &cqe); if (ret) - error(1, ret, "wait cqe"); + error(1, -ret, "wait cqe"); if (cqe->user_data != NONZC_TAG && cqe->user_data != ZC_TAG) - error(1, -EINVAL, "invalid cqe->user_data"); + error(1, EINVAL, "invalid cqe->user_data"); if (cqe->flags & IORING_CQE_F_NOTIF) { if (cqe->flags & IORING_CQE_F_MORE) - error(1, -EINVAL, "invalid notif flags"); + error(1, EINVAL, "invalid notif flags"); if (compl_cqes <= 0) - error(1, -EINVAL, "notification mismatch"); + error(1, EINVAL, "notification mismatch"); compl_cqes--; i--; io_uring_cqe_seen(&ring); @@ -174,14 +174,14 @@ static void do_tx(int domain, int type, int protocol) } if (cqe->flags & IORING_CQE_F_MORE) { if (cqe->user_data != ZC_TAG) - error(1, cqe->res, "unexpected F_MORE"); + error(1, -cqe->res, "unexpected F_MORE"); compl_cqes++; } if (cqe->res >= 0) { packets++; bytes += cqe->res; } else if (cqe->res != -EAGAIN) { - error(1, cqe->res, "send failed"); + error(1, -cqe->res, "send failed"); } io_uring_cqe_seen(&ring); } @@ -190,11 +190,11 @@ static void do_tx(int domain, int type, int protocol) while (compl_cqes) { ret = io_uring_wait_cqe(&ring, &cqe); if (ret) - error(1, ret, "wait cqe"); + error(1, -ret, "wait cqe"); if (cqe->flags & IORING_CQE_F_MORE) - error(1, -EINVAL, "invalid notif flags"); + error(1, EINVAL, "invalid notif flags"); if (!(cqe->flags & IORING_CQE_F_NOTIF)) - error(1, -EINVAL, "missing notif flag"); + error(1, EINVAL, "missing notif flag"); io_uring_cqe_seen(&ring); compl_cqes--; diff --git a/tools/testing/selftests/net/lib/Makefile b/tools/testing/selftests/net/lib/Makefile index ce795bc0a1af..5339f56329e1 100644 --- a/tools/testing/selftests/net/lib/Makefile +++ b/tools/testing/selftests/net/lib/Makefile @@ -8,6 +8,7 @@ CFLAGS += -I../../ TEST_FILES := \ ../../../../net/ynl \ ../../../../../Documentation/netlink/specs \ + ksft_setup_loopback.sh \ # end of TEST_FILES TEST_GEN_FILES := \ diff --git a/tools/testing/selftests/net/lib/ksft_setup_loopback.sh b/tools/testing/selftests/net/lib/ksft_setup_loopback.sh new file mode 100755 index 000000000000..3defbb1919c5 --- /dev/null +++ b/tools/testing/selftests/net/lib/ksft_setup_loopback.sh @@ -0,0 +1,111 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# Setup script for running ksft tests over a real interface in loopback mode. +# This scripts replaces the historical setup_loopback.sh. It puts +# a (presumably) real hardware interface into loopback mode, creates macvlan +# interfaces on top and places them in a network namespace for isolation. +# +# NETIF env variable must be exported to indicate the real target device. +# Note that the test will override NETIF with one of the macvlans, the +# actual ksft test will only see the macvlans. +# +# Example use: +# export NETIF=eth0 +# ./net/lib/ksft_setup_loopback.sh ./drivers/net/gro.py + +if [ -z "$NETIF" ]; then + echo "Error: NETIF variable not set" + exit 1 +fi +if ! [ -d "/sys/class/net/$NETIF" ]; then + echo "Error: Can't find $NETIF, invalid netdevice" + exit 1 +fi + +# Save original settings for cleanup +readonly FLUSH_PATH="/sys/class/net/${NETIF}/gro_flush_timeout" +readonly IRQ_PATH="/sys/class/net/${NETIF}/napi_defer_hard_irqs" +FLUSH_TIMEOUT="$(< "${FLUSH_PATH}")" +readonly FLUSH_TIMEOUT +HARD_IRQS="$(< "${IRQ_PATH}")" +readonly HARD_IRQS + +SERVER_NS=$(mktemp -u server-XXXXXXXX) +readonly SERVER_NS +CLIENT_NS=$(mktemp -u client-XXXXXXXX) +readonly CLIENT_NS +readonly SERVER_MAC="aa:00:00:00:00:02" +readonly CLIENT_MAC="aa:00:00:00:00:01" + +# ksft expects addresses to communicate with remote +export LOCAL_V6=2001:db8:1::1 +export REMOTE_V6=2001:db8:1::2 + +cleanup() { + local exit_code=$? + + echo "Cleaning up..." + + # Remove macvlan interfaces and namespaces + ip -netns "${SERVER_NS}" link del dev server 2>/dev/null || true + ip netns del "${SERVER_NS}" 2>/dev/null || true + ip -netns "${CLIENT_NS}" link del dev client 2>/dev/null || true + ip netns del "${CLIENT_NS}" 2>/dev/null || true + + # Disable loopback + ethtool -K "${NETIF}" loopback off 2>/dev/null || true + sleep 1 + + echo "${FLUSH_TIMEOUT}" >"${FLUSH_PATH}" + echo "${HARD_IRQS}" >"${IRQ_PATH}" + + exit $exit_code +} + +trap cleanup EXIT INT TERM + +# Enable loopback mode +echo "Enabling loopback on ${NETIF}..." +ethtool -K "${NETIF}" loopback on || { + echo "Failed to enable loopback mode" + exit 1 +} +# The interface may need time to get carrier back, but selftests +# will wait for carrier, so no need to wait / sleep here. + +# Use timer on host to trigger the network stack +# Also disable device interrupt to not depend on NIC interrupt +# Reduce test flakiness caused by unexpected interrupts +echo 100000 >"${FLUSH_PATH}" +echo 50 >"${IRQ_PATH}" + +# Create server namespace with macvlan +ip netns add "${SERVER_NS}" +ip link add link "${NETIF}" dev server address "${SERVER_MAC}" type macvlan +ip link set dev server netns "${SERVER_NS}" +ip -netns "${SERVER_NS}" link set dev server up +ip -netns "${SERVER_NS}" addr add $LOCAL_V6/64 dev server +ip -netns "${SERVER_NS}" link set dev lo up + +# Create client namespace with macvlan +ip netns add "${CLIENT_NS}" +ip link add link "${NETIF}" dev client address "${CLIENT_MAC}" type macvlan +ip link set dev client netns "${CLIENT_NS}" +ip -netns "${CLIENT_NS}" link set dev client up +ip -netns "${CLIENT_NS}" addr add $REMOTE_V6/64 dev client +ip -netns "${CLIENT_NS}" link set dev lo up + +echo "Setup complete!" +echo " Device: ${NETIF}" +echo " Server NS: ${SERVER_NS}" +echo " Client NS: ${CLIENT_NS}" +echo "" + +# Setup environment variables for tests +export NETIF=server +export REMOTE_TYPE=netns +export REMOTE_ARGS="${CLIENT_NS}" + +# Run the command +ip netns exec "${SERVER_NS}" "$@" diff --git a/tools/testing/selftests/net/lib/py/__init__.py b/tools/testing/selftests/net/lib/py/__init__.py index 97b7cf2b20eb..40f9ce307dd1 100644 --- a/tools/testing/selftests/net/lib/py/__init__.py +++ b/tools/testing/selftests/net/lib/py/__init__.py @@ -8,7 +8,8 @@ from .consts import KSRC from .ksft import KsftFailEx, KsftSkipEx, KsftXfailEx, ksft_pr, ksft_eq, \ ksft_ne, ksft_true, ksft_not_none, ksft_in, ksft_not_in, ksft_is, \ ksft_ge, ksft_gt, ksft_lt, ksft_raises, ksft_busy_wait, \ - ktap_result, ksft_disruptive, ksft_setup, ksft_run, ksft_exit + ktap_result, ksft_disruptive, ksft_setup, ksft_run, ksft_exit, \ + ksft_variants, KsftNamedVariant from .netns import NetNS, NetNSEnter from .nsim import NetdevSim, NetdevSimDev from .utils import CmdExitFailure, fd_read_timeout, cmd, bkg, defer, \ @@ -21,7 +22,7 @@ __all__ = ["KSRC", "ksft_ne", "ksft_true", "ksft_not_none", "ksft_in", "ksft_not_in", "ksft_is", "ksft_ge", "ksft_gt", "ksft_lt", "ksft_raises", "ksft_busy_wait", "ktap_result", "ksft_disruptive", "ksft_setup", - "ksft_run", "ksft_exit", + "ksft_run", "ksft_exit", "ksft_variants", "KsftNamedVariant", "NetNS", "NetNSEnter", "CmdExitFailure", "fd_read_timeout", "cmd", "bkg", "defer", "bpftool", "ip", "ethtool", "bpftrace", "rand_port", diff --git a/tools/testing/selftests/net/lib/py/ksft.py b/tools/testing/selftests/net/lib/py/ksft.py index 83b1574f7719..531e7fa1b3ea 100644 --- a/tools/testing/selftests/net/lib/py/ksft.py +++ b/tools/testing/selftests/net/lib/py/ksft.py @@ -1,12 +1,12 @@ # SPDX-License-Identifier: GPL-2.0 -import builtins import functools import inspect import signal import sys import time import traceback +from collections import namedtuple from .consts import KSFT_MAIN_NAME from .utils import global_defer_queue @@ -136,7 +136,7 @@ def ksft_busy_wait(cond, sleep=0.005, deadline=1, comment=""): time.sleep(sleep) -def ktap_result(ok, cnt=1, case="", comment=""): +def ktap_result(ok, cnt=1, case_name="", comment=""): global KSFT_RESULT_ALL KSFT_RESULT_ALL = KSFT_RESULT_ALL and ok @@ -146,8 +146,8 @@ def ktap_result(ok, cnt=1, case="", comment=""): res += "ok " res += str(cnt) + " " res += KSFT_MAIN_NAME - if case: - res += "." + str(case.__name__) + if case_name: + res += "." + case_name if comment: res += " # " + comment print(res, flush=True) @@ -163,7 +163,7 @@ def ksft_flush_defer(): entry = global_defer_queue.pop() try: entry.exec_only() - except: + except Exception: ksft_pr(f"Exception while handling defer / cleanup (callback {i} of {qlen_start})!") tb = traceback.format_exc() for line in tb.strip().split('\n'): @@ -171,6 +171,10 @@ def ksft_flush_defer(): KSFT_RESULT = False +KsftCaseFunction = namedtuple("KsftCaseFunction", + ['name', 'original_func', 'variants']) + + def ksft_disruptive(func): """ Decorator that marks the test as disruptive (e.g. the test @@ -181,11 +185,47 @@ def ksft_disruptive(func): @functools.wraps(func) def wrapper(*args, **kwargs): if not KSFT_DISRUPTIVE: - raise KsftSkipEx(f"marked as disruptive") + raise KsftSkipEx("marked as disruptive") return func(*args, **kwargs) return wrapper +class KsftNamedVariant: + """ Named string name + argument list tuple for @ksft_variants """ + + def __init__(self, name, *params): + self.params = params + self.name = name or "_".join([str(x) for x in self.params]) + + +def ksft_variants(params): + """ + Decorator defining the sets of inputs for a test. + The parameters will be included in the name of the resulting sub-case. + Parameters can be either single object, tuple or a KsftNamedVariant. + The argument can be a list or a generator. + + Example: + + @ksft_variants([ + (1, "a"), + (2, "b"), + KsftNamedVariant("three", 3, "c"), + ]) + def my_case(cfg, a, b): + pass # ... + + ksft_run(cases=[my_case], args=(cfg, )) + + Will generate cases: + my_case.1_a + my_case.2_b + my_case.three + """ + + return lambda func: KsftCaseFunction(func.__name__, func, params) + + def ksft_setup(env): """ Setup test framework global state from the environment. @@ -199,7 +239,7 @@ def ksft_setup(env): return False try: return bool(int(value)) - except: + except Exception: raise Exception(f"failed to parse {name}") if "DISRUPTIVE" in env: @@ -220,9 +260,13 @@ def _ksft_intr(signum, frame): ksft_pr(f"Ignoring SIGTERM (cnt: {term_cnt}), already exiting...") -def ksft_run(cases=None, globs=None, case_pfx=None, args=()): +def _ksft_generate_test_cases(cases, globs, case_pfx, args): + """Generate a flat list of (func, args, name) tuples""" + cases = cases or [] + test_cases = [] + # If using the globs method find all relevant functions if globs and case_pfx: for key, value in globs.items(): if not callable(value): @@ -232,6 +276,27 @@ def ksft_run(cases=None, globs=None, case_pfx=None, args=()): cases.append(value) break + for func in cases: + if isinstance(func, KsftCaseFunction): + # Parametrized test - create case for each param + for param in func.variants: + if not isinstance(param, KsftNamedVariant): + if not isinstance(param, tuple): + param = (param, ) + param = KsftNamedVariant(None, *param) + + test_cases.append((func.original_func, + (*args, *param.params), + func.name + "." + param.name)) + else: + test_cases.append((func, args, func.__name__)) + + return test_cases + + +def ksft_run(cases=None, globs=None, case_pfx=None, args=()): + test_cases = _ksft_generate_test_cases(cases, globs, case_pfx, args) + global term_cnt term_cnt = 0 prev_sigterm = signal.signal(signal.SIGTERM, _ksft_intr) @@ -239,19 +304,19 @@ def ksft_run(cases=None, globs=None, case_pfx=None, args=()): totals = {"pass": 0, "fail": 0, "skip": 0, "xfail": 0} print("TAP version 13", flush=True) - print("1.." + str(len(cases)), flush=True) + print("1.." + str(len(test_cases)), flush=True) global KSFT_RESULT cnt = 0 stop = False - for case in cases: + for func, args, name in test_cases: KSFT_RESULT = True cnt += 1 comment = "" cnt_key = "" try: - case(*args) + func(*args) except KsftSkipEx as e: comment = "SKIP " + str(e) cnt_key = 'skip' @@ -268,12 +333,26 @@ def ksft_run(cases=None, globs=None, case_pfx=None, args=()): KSFT_RESULT = False cnt_key = 'fail' - ksft_flush_defer() + try: + ksft_flush_defer() + except BaseException as e: + tb = traceback.format_exc() + for line in tb.strip().split('\n'): + ksft_pr("Exception|", line) + if isinstance(e, KeyboardInterrupt): + ksft_pr() + ksft_pr("WARN: defer() interrupted, cleanup may be incomplete.") + ksft_pr(" Attempting to finish cleanup before exiting.") + ksft_pr(" Interrupt again to exit immediately.") + ksft_pr() + stop = True + # Flush was interrupted, try to finish the job best we can + ksft_flush_defer() if not cnt_key: cnt_key = 'pass' if KSFT_RESULT else 'fail' - ktap_result(KSFT_RESULT, cnt, case, comment=comment) + ktap_result(KSFT_RESULT, cnt, name, comment=comment) totals[cnt_key] += 1 if stop: diff --git a/tools/testing/selftests/net/lib/py/nsim.py b/tools/testing/selftests/net/lib/py/nsim.py index 1a8cbe9acc48..7c640ed64c0b 100644 --- a/tools/testing/selftests/net/lib/py/nsim.py +++ b/tools/testing/selftests/net/lib/py/nsim.py @@ -27,7 +27,7 @@ class NetdevSim: self.port_index = port_index self.ns = ns self.dfs_dir = "%s/ports/%u/" % (nsimdev.dfs_dir, port_index) - ret = ip("-j link show dev %s" % ifname, ns=ns) + ret = ip("-d -j link show dev %s" % ifname, ns=ns) self.dev = json.loads(ret.stdout)[0] self.ifindex = self.dev["ifindex"] diff --git a/tools/testing/selftests/net/lib/py/utils.py b/tools/testing/selftests/net/lib/py/utils.py index cb40ecef9456..106ee1f2df86 100644 --- a/tools/testing/selftests/net/lib/py/utils.py +++ b/tools/testing/selftests/net/lib/py/utils.py @@ -32,7 +32,7 @@ class cmd: Use bkg() instead to run a command in the background. """ def __init__(self, comm, shell=None, fail=True, ns=None, background=False, - host=None, timeout=5, ksft_wait=None): + host=None, timeout=5, ksft_ready=None, ksft_wait=None): if ns: comm = f'ip netns exec {ns} ' + comm @@ -52,21 +52,25 @@ class cmd: # ksft_wait lets us wait for the background process to fully start, # we pass an FD to the child process, and wait for it to write back. # Similarly term_fd tells child it's time to exit. - pass_fds = () + pass_fds = [] env = os.environ.copy() if ksft_wait is not None: - rfd, ready_fd = os.pipe() wait_fd, self.ksft_term_fd = os.pipe() - pass_fds = (ready_fd, wait_fd, ) - env["KSFT_READY_FD"] = str(ready_fd) + pass_fds.append(wait_fd) env["KSFT_WAIT_FD"] = str(wait_fd) + ksft_ready = True # ksft_wait implies ready + if ksft_ready is not None: + rfd, ready_fd = os.pipe() + pass_fds.append(ready_fd) + env["KSFT_READY_FD"] = str(ready_fd) self.proc = subprocess.Popen(comm, shell=shell, stdout=subprocess.PIPE, stderr=subprocess.PIPE, pass_fds=pass_fds, env=env) if ksft_wait is not None: - os.close(ready_fd) os.close(wait_fd) + if ksft_ready is not None: + os.close(ready_fd) msg = fd_read_timeout(rfd, ksft_wait) os.close(rfd) if not msg: @@ -116,10 +120,10 @@ class bkg(cmd): with bkg("my_binary", ksft_wait=5): """ def __init__(self, comm, shell=None, fail=None, ns=None, host=None, - exit_wait=False, ksft_wait=None): + exit_wait=False, ksft_ready=None, ksft_wait=None): super().__init__(comm, background=True, shell=shell, fail=fail, ns=ns, host=host, - ksft_wait=ksft_wait) + ksft_ready=ksft_ready, ksft_wait=ksft_wait) self.terminate = not exit_wait and not ksft_wait self._exit_wait = exit_wait self.check_fail = fail diff --git a/tools/testing/selftests/net/lib/xdp_native.bpf.c b/tools/testing/selftests/net/lib/xdp_native.bpf.c index c368fc045f4b..64f05229ab24 100644 --- a/tools/testing/selftests/net/lib/xdp_native.bpf.c +++ b/tools/testing/selftests/net/lib/xdp_native.bpf.c @@ -332,7 +332,7 @@ static __u16 csum_fold_helper(__u32 csum) } static int xdp_adjst_tail_shrnk_data(struct xdp_md *ctx, __u16 offset, - __u32 hdr_len) + unsigned long hdr_len) { char tmp_buff[MAX_ADJST_OFFSET]; __u32 buff_pos, udp_csum = 0; @@ -422,8 +422,9 @@ static int xdp_adjst_tail(struct xdp_md *ctx, __u16 port) { struct udphdr *udph = NULL; __s32 *adjust_offset, *val; - __u32 key, hdr_len; + unsigned long hdr_len; void *offset_ptr; + __u32 key; __u8 tag; int ret; diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.c b/tools/testing/selftests/net/mptcp/mptcp_connect.c index fc7e22b503d3..404a77bf366a 100644 --- a/tools/testing/selftests/net/mptcp/mptcp_connect.c +++ b/tools/testing/selftests/net/mptcp/mptcp_connect.c @@ -1072,6 +1072,8 @@ static void check_getpeername_connect(int fd) socklen_t salen = sizeof(ss); char a[INET6_ADDRSTRLEN]; char b[INET6_ADDRSTRLEN]; + const char *iface; + size_t len; if (getpeername(fd, (struct sockaddr *)&ss, &salen) < 0) { perror("getpeername"); @@ -1081,7 +1083,13 @@ static void check_getpeername_connect(int fd) xgetnameinfo((struct sockaddr *)&ss, salen, a, sizeof(a), b, sizeof(b)); - if (strcmp(cfg_host, a) || strcmp(cfg_port, b)) + iface = strchr(cfg_host, '%'); + if (iface) + len = iface - cfg_host; + else + len = strlen(cfg_host) + 1; + + if (strncmp(cfg_host, a, len) || strcmp(cfg_port, b)) fprintf(stderr, "%s: %s vs %s, %s vs %s\n", __func__, cfg_host, a, cfg_port, b); } diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.sh b/tools/testing/selftests/net/mptcp/mptcp_connect.sh index 9b7b93f8eb0c..a6447f7a31fe 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_connect.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_connect.sh @@ -375,81 +375,75 @@ do_transfer() local capfile="${rndh}-${connector_ns:0:3}-${listener_ns:0:3}-${cl_proto}-${srv_proto}-${connect_addr}-${port}" local capopt="-i any -s 65535 -B 32768 ${capuser}" - ip netns exec ${listener_ns} tcpdump ${capopt} -w "${capfile}-listener.pcap" >> "${capout}" 2>&1 & + ip netns exec ${listener_ns} tcpdump ${capopt} \ + -w "${capfile}-listener.pcap" >> "${capout}" 2>&1 & local cappid_listener=$! - ip netns exec ${connector_ns} tcpdump ${capopt} -w "${capfile}-connector.pcap" >> "${capout}" 2>&1 & - local cappid_connector=$! + if [ ${listener_ns} != ${connector_ns} ]; then + ip netns exec ${connector_ns} tcpdump ${capopt} \ + -w "${capfile}-connector.pcap" >> "${capout}" 2>&1 & + local cappid_connector=$! + fi sleep 1 fi - NSTAT_HISTORY=/tmp/${listener_ns}.nstat ip netns exec ${listener_ns} \ - nstat -n + mptcp_lib_nstat_init "${listener_ns}" if [ ${listener_ns} != ${connector_ns} ]; then - NSTAT_HISTORY=/tmp/${connector_ns}.nstat ip netns exec ${connector_ns} \ - nstat -n - fi - - local stat_synrx_last_l - local stat_ackrx_last_l - local stat_cookietx_last - local stat_cookierx_last - local stat_csum_err_s - local stat_csum_err_c - local stat_tcpfb_last_l - stat_synrx_last_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableSYNRX") - stat_ackrx_last_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableACKRX") - stat_cookietx_last=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtSyncookiesSent") - stat_cookierx_last=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtSyncookiesRecv") - stat_csum_err_s=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtDataCsumErr") - stat_csum_err_c=$(mptcp_lib_get_counter "${connector_ns}" "MPTcpExtDataCsumErr") - stat_tcpfb_last_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableFallbackACK") - - timeout ${timeout_test} \ - ip netns exec ${listener_ns} \ - ./mptcp_connect -t ${timeout_poll} -l -p $port -s ${srv_proto} \ - $extra_args $local_addr < "$sin" > "$sout" & + mptcp_lib_nstat_init "${connector_ns}" + fi + + ip netns exec ${listener_ns} \ + ./mptcp_connect -t ${timeout_poll} -l -p $port -s ${srv_proto} \ + $extra_args $local_addr < "$sin" > "$sout" & local spid=$! mptcp_lib_wait_local_port_listen "${listener_ns}" "${port}" local start start=$(date +%s%3N) - timeout ${timeout_test} \ - ip netns exec ${connector_ns} \ - ./mptcp_connect -t ${timeout_poll} -p $port -s ${cl_proto} \ - $extra_args $connect_addr < "$cin" > "$cout" & + ip netns exec ${connector_ns} \ + ./mptcp_connect -t ${timeout_poll} -p $port -s ${cl_proto} \ + $extra_args $connect_addr < "$cin" > "$cout" & local cpid=$! + mptcp_lib_wait_timeout "${timeout_test}" "${listener_ns}" \ + "${connector_ns}" "${port}" "${cpid}" "${spid}" & + local timeout_pid=$! + wait $cpid local retc=$? wait $spid local rets=$? + if kill -0 $timeout_pid; then + # Finished before the timeout: kill the background job + mptcp_lib_kill_group_wait $timeout_pid + timeout_pid=0 + fi + local stop stop=$(date +%s%3N) if $capture; then sleep 1 kill ${cappid_listener} - kill ${cappid_connector} + if [ ${listener_ns} != ${connector_ns} ]; then + kill ${cappid_connector} + fi fi - NSTAT_HISTORY=/tmp/${listener_ns}.nstat ip netns exec ${listener_ns} \ - nstat | grep Tcp > /tmp/${listener_ns}.out + mptcp_lib_nstat_get "${listener_ns}" if [ ${listener_ns} != ${connector_ns} ]; then - NSTAT_HISTORY=/tmp/${connector_ns}.nstat ip netns exec ${connector_ns} \ - nstat | grep Tcp > /tmp/${connector_ns}.out + mptcp_lib_nstat_get "${connector_ns}" fi local duration duration=$((stop-start)) printf "(duration %05sms) " "${duration}" - if [ ${rets} -ne 0 ] || [ ${retc} -ne 0 ]; then + if [ ${rets} -ne 0 ] || [ ${retc} -ne 0 ] || [ ${timeout_pid} -ne 0 ]; then mptcp_lib_pr_fail "client exit code $retc, server $rets" - mptcp_lib_pr_err_stats "${listener_ns}" "${connector_ns}" "${port}" \ - "/tmp/${listener_ns}.out" "/tmp/${connector_ns}.out" + mptcp_lib_pr_err_stats "${listener_ns}" "${connector_ns}" "${port}" echo cat "$capout" @@ -463,38 +457,38 @@ do_transfer() rets=$? local extra="" - local stat_synrx_now_l - local stat_ackrx_now_l - local stat_cookietx_now - local stat_cookierx_now - local stat_ooo_now - local stat_tcpfb_now_l - stat_synrx_now_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableSYNRX") - stat_ackrx_now_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableACKRX") - stat_cookietx_now=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtSyncookiesSent") - stat_cookierx_now=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtSyncookiesRecv") - stat_ooo_now=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtTCPOFOQueue") - stat_tcpfb_now_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableFallbackACK") - - expect_synrx=$((stat_synrx_last_l)) - expect_ackrx=$((stat_ackrx_last_l)) + local stat_synrx + local stat_ackrx + local stat_cookietx + local stat_cookierx + local stat_ooo + local stat_tcpfb + stat_synrx=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableSYNRX") + stat_ackrx=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableACKRX") + stat_cookietx=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtSyncookiesSent") + stat_cookierx=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtSyncookiesRecv") + stat_ooo=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtTCPOFOQueue") + stat_tcpfb=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableFallbackACK") + + expect_synrx=0 + expect_ackrx=0 cookies=$(ip netns exec ${listener_ns} sysctl net.ipv4.tcp_syncookies) cookies=${cookies##*=} if [ ${cl_proto} = "MPTCP" ] && [ ${srv_proto} = "MPTCP" ]; then - expect_synrx=$((stat_synrx_last_l+connect_per_transfer)) - expect_ackrx=$((stat_ackrx_last_l+connect_per_transfer)) + expect_synrx=${connect_per_transfer} + expect_ackrx=${connect_per_transfer} fi - if [ ${stat_synrx_now_l} -lt ${expect_synrx} ]; then - mptcp_lib_pr_fail "lower MPC SYN rx (${stat_synrx_now_l})" \ + if [ ${stat_synrx} -lt ${expect_synrx} ]; then + mptcp_lib_pr_fail "lower MPC SYN rx (${stat_synrx})" \ "than expected (${expect_synrx})" retc=1 fi - if [ ${stat_ackrx_now_l} -lt ${expect_ackrx} ]; then - if [ ${stat_ooo_now} -eq 0 ]; then - mptcp_lib_pr_fail "lower MPC ACK rx (${stat_ackrx_now_l})" \ + if [ ${stat_ackrx} -lt ${expect_ackrx} ]; then + if [ ${stat_ooo} -eq 0 ]; then + mptcp_lib_pr_fail "lower MPC ACK rx (${stat_ackrx})" \ "than expected (${expect_ackrx})" rets=1 else @@ -508,47 +502,45 @@ do_transfer() csum_err_s=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtDataCsumErr") csum_err_c=$(mptcp_lib_get_counter "${connector_ns}" "MPTcpExtDataCsumErr") - local csum_err_s_nr=$((csum_err_s - stat_csum_err_s)) - if [ $csum_err_s_nr -gt 0 ]; then - mptcp_lib_pr_fail "server got ${csum_err_s_nr} data checksum error[s]" + if [ $csum_err_s -gt 0 ]; then + mptcp_lib_pr_fail "server got ${csum_err_s} data checksum error[s]" rets=1 fi - local csum_err_c_nr=$((csum_err_c - stat_csum_err_c)) - if [ $csum_err_c_nr -gt 0 ]; then - mptcp_lib_pr_fail "client got ${csum_err_c_nr} data checksum error[s]" + if [ $csum_err_c -gt 0 ]; then + mptcp_lib_pr_fail "client got ${csum_err_c} data checksum error[s]" retc=1 fi fi - if [ ${stat_ooo_now} -eq 0 ] && [ ${stat_tcpfb_last_l} -ne ${stat_tcpfb_now_l} ]; then + if [ ${stat_ooo} -eq 0 ] && [ ${stat_tcpfb} -gt 0 ]; then mptcp_lib_pr_fail "unexpected fallback to TCP" rets=1 fi if [ $cookies -eq 2 ];then - if [ $stat_cookietx_last -ge $stat_cookietx_now ] ;then + if [ $stat_cookietx -eq 0 ] ;then extra+=" WARN: CookieSent: did not advance" fi - if [ $stat_cookierx_last -ge $stat_cookierx_now ] ;then + if [ $stat_cookierx -eq 0 ] ;then extra+=" WARN: CookieRecv: did not advance" fi else - if [ $stat_cookietx_last -ne $stat_cookietx_now ] ;then + if [ $stat_cookietx -gt 0 ] ;then extra+=" WARN: CookieSent: changed" fi - if [ $stat_cookierx_last -ne $stat_cookierx_now ] ;then + if [ $stat_cookierx -gt 0 ] ;then extra+=" WARN: CookieRecv: changed" fi fi - if [ ${stat_synrx_now_l} -gt ${expect_synrx} ]; then + if [ ${stat_synrx} -gt ${expect_synrx} ]; then extra+=" WARN: SYNRX: expect ${expect_synrx}," - extra+=" got ${stat_synrx_now_l} (probably retransmissions)" + extra+=" got ${stat_synrx} (probably retransmissions)" fi - if [ ${stat_ackrx_now_l} -gt ${expect_ackrx} ]; then + if [ ${stat_ackrx} -gt ${expect_ackrx} ]; then extra+=" WARN: ACKRX: expect ${expect_ackrx}," - extra+=" got ${stat_ackrx_now_l} (probably retransmissions)" + extra+=" got ${stat_ackrx} (probably retransmissions)" fi if [ $retc -eq 0 ] && [ $rets -eq 0 ]; then diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index 43f31f8d587f..b2e6e548f796 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -62,6 +62,7 @@ unset sflags unset fastclose unset fullmesh unset speed +unset bind_addr unset join_syn_rej unset join_csum_ns1 unset join_csum_ns2 @@ -645,6 +646,27 @@ wait_mpj() done } +wait_ll_ready() +{ + local ns="${1}" + + local i + for i in $(seq 50); do + ip -n "${ns}" -6 addr show scope link | grep "inet6 fe80" | + grep -qw "tentative" || break + sleep 0.1 + done +} + +get_ll_addr() +{ + local ns="${1}" + local iface="${2}" + + ip -n "${ns}" -6 addr show dev "${iface}" scope link | + grep "inet6 fe80" | sed 's#.*\(fe80::.*\)/.*#\1#' +} + kill_events_pids() { mptcp_lib_kill_wait $evts_ns1_pid @@ -951,6 +973,9 @@ do_transfer() local FAILING_LINKS=${FAILING_LINKS:-""} local fastclose=${fastclose:-""} local speed=${speed:-"fast"} + local bind_addr=${bind_addr:-"::"} + local listener_in="${sin}" + local connector_in="${cin}" port=$(get_port) :> "$cout" @@ -958,10 +983,8 @@ do_transfer() cond_start_capture ${listener_ns} - NSTAT_HISTORY=/tmp/${listener_ns}.nstat ip netns exec ${listener_ns} \ - nstat -n - NSTAT_HISTORY=/tmp/${connector_ns}.nstat ip netns exec ${connector_ns} \ - nstat -n + mptcp_lib_nstat_init "${listener_ns}" + mptcp_lib_nstat_init "${connector_ns}" local extra_args if [ $speed = "fast" ]; then @@ -999,42 +1022,40 @@ do_transfer() extra_srv_args="$extra_args $extra_srv_args" if [ "$test_linkfail" -gt 1 ];then - timeout ${timeout_test} \ - ip netns exec ${listener_ns} \ - ./mptcp_connect -t ${timeout_poll} -l -p $port -s ${srv_proto} \ - $extra_srv_args "::" < "$sinfail" > "$sout" & - else - timeout ${timeout_test} \ - ip netns exec ${listener_ns} \ - ./mptcp_connect -t ${timeout_poll} -l -p $port -s ${srv_proto} \ - $extra_srv_args "::" < "$sin" > "$sout" & + listener_in="${sinfail}" fi + ip netns exec ${listener_ns} \ + ./mptcp_connect -t ${timeout_poll} -l -p ${port} -s ${srv_proto} \ + ${extra_srv_args} "${bind_addr}" < "${listener_in}" > "${sout}" & local spid=$! mptcp_lib_wait_local_port_listen "${listener_ns}" "${port}" extra_cl_args="$extra_args $extra_cl_args" if [ "$test_linkfail" -eq 0 ];then - timeout ${timeout_test} \ - ip netns exec ${connector_ns} \ - ./mptcp_connect -t ${timeout_poll} -p $port -s ${cl_proto} \ - $extra_cl_args $connect_addr < "$cin" > "$cout" & + ip netns exec ${connector_ns} \ + ./mptcp_connect -t ${timeout_poll} -p $port -s ${cl_proto} \ + $extra_cl_args $connect_addr < "$cin" > "$cout" & elif [ "$test_linkfail" -eq 1 ] || [ "$test_linkfail" -eq 2 ];then + connector_in="${cinsent}" ( cat "$cinfail" ; sleep 2; link_failure $listener_ns ; cat "$cinfail" ) | \ tee "$cinsent" | \ - timeout ${timeout_test} \ ip netns exec ${connector_ns} \ ./mptcp_connect -t ${timeout_poll} -p $port -s ${cl_proto} \ $extra_cl_args $connect_addr > "$cout" & else + connector_in="${cinsent}" tee "$cinsent" < "$cinfail" | \ - timeout ${timeout_test} \ - ip netns exec ${connector_ns} \ - ./mptcp_connect -t ${timeout_poll} -p $port -s ${cl_proto} \ - $extra_cl_args $connect_addr > "$cout" & + ip netns exec ${connector_ns} \ + ./mptcp_connect -t ${timeout_poll} -p $port -s ${cl_proto} \ + $extra_cl_args $connect_addr > "$cout" & fi local cpid=$! + mptcp_lib_wait_timeout "${timeout_test}" "${listener_ns}" \ + "${connector_ns}" "${port}" "${cpid}" "${spid}" & + local timeout_pid=$! + pm_nl_set_endpoint $listener_ns $connector_ns $connect_addr check_cestab $listener_ns $connector_ns @@ -1043,31 +1064,26 @@ do_transfer() wait $spid local rets=$? + if kill -0 $timeout_pid; then + # Finished before the timeout: kill the background job + mptcp_lib_kill_group_wait $timeout_pid + timeout_pid=0 + fi + cond_stop_capture - NSTAT_HISTORY=/tmp/${listener_ns}.nstat ip netns exec ${listener_ns} \ - nstat | grep Tcp > /tmp/${listener_ns}.out - NSTAT_HISTORY=/tmp/${connector_ns}.nstat ip netns exec ${connector_ns} \ - nstat | grep Tcp > /tmp/${connector_ns}.out + mptcp_lib_nstat_get "${listener_ns}" + mptcp_lib_nstat_get "${connector_ns}" - if [ ${rets} -ne 0 ] || [ ${retc} -ne 0 ]; then + if [ ${rets} -ne 0 ] || [ ${retc} -ne 0 ] || [ ${timeout_pid} -ne 0 ]; then fail_test "client exit code $retc, server $rets" - mptcp_lib_pr_err_stats "${listener_ns}" "${connector_ns}" "${port}" \ - "/tmp/${listener_ns}.out" "/tmp/${connector_ns}.out" + mptcp_lib_pr_err_stats "${listener_ns}" "${connector_ns}" "${port}" return 1 fi - if [ "$test_linkfail" -gt 1 ];then - check_transfer $sinfail $cout "file received by client" $trunc_size - else - check_transfer $sin $cout "file received by client" $trunc_size - fi + check_transfer $listener_in $cout "file received by client" $trunc_size retc=$? - if [ "$test_linkfail" -eq 0 ];then - check_transfer $cin $sout "file received by server" $trunc_size - else - check_transfer $cinsent $sout "file received by server" $trunc_size - fi + check_transfer $connector_in $sout "file received by server" $trunc_size rets=$? [ $retc -eq 0 ] && [ $rets -eq 0 ] @@ -1136,12 +1152,20 @@ run_tests() do_transfer ${listener_ns} ${connector_ns} MPTCP MPTCP ${connect_addr} } +_dump_stats() +{ + local ns="${1}" + local side="${2}" + + mptcp_lib_print_err "${side} ns stats (${ns2})" + mptcp_lib_pr_nstat "${ns}" + echo +} + dump_stats() { - echo Server ns stats - ip netns exec $ns1 nstat -as | grep Tcp - echo Client ns stats - ip netns exec $ns2 nstat -as | grep Tcp + _dump_stats "${ns1}" "Server" + _dump_stats "${ns2}" "Client" } chk_csum_nr() @@ -2952,7 +2976,11 @@ mixed_tests() pm_nl_add_endpoint $ns1 10.0.1.1 flags signal speed=slow \ run_tests $ns1 $ns2 dead:beef:2::1 - chk_join_nr 1 1 1 + if mptcp_lib_kallsyms_has "mptcp_pm_get_endp_fullmesh_max$"; then + chk_join_nr 0 0 0 + else + chk_join_nr 1 1 1 + fi fi # fullmesh still tries to create all the possibly subflows with @@ -3233,6 +3261,133 @@ add_addr_ports_tests() fi } +bind_tests() +{ + # bind to one address should not allow extra subflows to other addresses + if reset "bind main address v4, no join v4"; then + pm_nl_set_limits $ns1 0 2 + pm_nl_set_limits $ns2 2 2 + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal + bind_addr="10.0.1.1" \ + run_tests $ns1 $ns2 10.0.1.1 + join_syn_tx=1 \ + chk_join_nr 0 0 0 + chk_add_nr 1 1 + fi + + # bind to one address should not allow extra subflows to other addresses + if reset "bind main address v6, no join v6"; then + pm_nl_set_limits $ns1 0 2 + pm_nl_set_limits $ns2 2 2 + pm_nl_add_endpoint $ns1 dead:beef:2::1 flags signal + bind_addr="dead:beef:1::1" \ + run_tests $ns1 $ns2 dead:beef:1::1 + join_syn_tx=1 \ + chk_join_nr 0 0 0 + chk_add_nr 1 1 + fi + + # multiple binds to allow extra subflows to other addresses + if reset "multiple bind to allow joins v4"; then + local extra_bind + + pm_nl_set_limits $ns1 0 2 + pm_nl_set_limits $ns2 2 2 + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal + + # Launching another app listening on a different address + # Note: it could be a totally different app, e.g. nc, socat, ... + ip netns exec ${ns1} ./mptcp_connect -l -t -1 -p "$(get_port)" \ + -s MPTCP 10.0.2.1 & + extra_bind=$! + + bind_addr="10.0.1.1" \ + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 1 1 1 + chk_add_nr 1 1 + + kill ${extra_bind} + fi + + # multiple binds to allow extra subflows to other addresses + if reset "multiple bind to allow joins v6"; then + local extra_bind + + pm_nl_set_limits $ns1 0 2 + pm_nl_set_limits $ns2 2 2 + pm_nl_add_endpoint $ns1 dead:beef:2::1 flags signal + + # Launching another app listening on a different address + # Note: it could be a totally different app, e.g. nc, socat, ... + ip netns exec ${ns1} ./mptcp_connect -l -t -1 -p "$(get_port)" \ + -s MPTCP dead:beef:2::1 & + extra_bind=$! + + bind_addr="dead:beef:1::1" \ + run_tests $ns1 $ns2 dead:beef:1::1 + chk_join_nr 1 1 1 + chk_add_nr 1 1 + + kill ${extra_bind} + fi + + # multiple binds to allow extra subflows to other addresses: v6 LL case + if reset "multiple bind to allow joins v6 link-local routing"; then + local extra_bind ns1ll1 ns1ll2 + + ns1ll1="$(get_ll_addr $ns1 ns1eth1)" + ns1ll2="$(get_ll_addr $ns1 ns1eth2)" + + pm_nl_set_limits $ns1 0 2 + pm_nl_set_limits $ns2 2 2 + pm_nl_add_endpoint $ns1 "${ns1ll2}" flags signal + + wait_ll_ready $ns1 # to be able to bind + wait_ll_ready $ns2 # also needed to bind on the client side + ip netns exec ${ns1} ./mptcp_connect -l -t -1 -p "$(get_port)" \ + -s MPTCP "${ns1ll2}%ns1eth2" & + extra_bind=$! + + bind_addr="${ns1ll1}%ns1eth1" \ + run_tests $ns1 $ns2 "${ns1ll1}%ns2eth1" + # it is not possible to connect to the announced LL addr without + # specifying the outgoing interface. + join_connect_err=1 \ + chk_join_nr 0 0 0 + chk_add_nr 1 1 + + kill ${extra_bind} + fi + + # multiple binds to allow extra subflows to v6 LL addresses: laminar + if reset "multiple bind to allow joins v6 link-local laminar" && + continue_if mptcp_lib_kallsyms_has "mptcp_pm_get_endp_laminar_max$"; then + local extra_bind ns1ll1 ns1ll2 ns2ll2 + + ns1ll1="$(get_ll_addr $ns1 ns1eth1)" + ns1ll2="$(get_ll_addr $ns1 ns1eth2)" + ns2ll2="$(get_ll_addr $ns2 ns2eth2)" + + pm_nl_set_limits $ns1 0 2 + pm_nl_set_limits $ns2 2 2 + pm_nl_add_endpoint $ns1 "${ns1ll2}" flags signal + pm_nl_add_endpoint $ns2 "${ns2ll2}" flags laminar dev ns2eth2 + + wait_ll_ready $ns1 # to be able to bind + wait_ll_ready $ns2 # also needed to bind on the client side + ip netns exec ${ns1} ./mptcp_connect -l -t -1 -p "$(get_port)" \ + -s MPTCP "${ns1ll2}%ns1eth2" & + extra_bind=$! + + bind_addr="${ns1ll1}%ns1eth1" \ + run_tests $ns1 $ns2 "${ns1ll1}%ns2eth1" + chk_join_nr 1 1 1 + chk_add_nr 1 1 + + kill ${extra_bind} + fi +} + syncookies_tests() { # single subflow, syncookies @@ -4192,6 +4347,7 @@ all_tests_sorted=( M@mixed_tests b@backup_tests p@add_addr_ports_tests + B@bind_tests k@syncookies_tests S@checksum_tests d@deny_join_id0_tests diff --git a/tools/testing/selftests/net/mptcp/mptcp_lib.sh b/tools/testing/selftests/net/mptcp/mptcp_lib.sh index f4388900016a..5fea7e7df628 100644 --- a/tools/testing/selftests/net/mptcp/mptcp_lib.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_lib.sh @@ -106,23 +106,32 @@ mptcp_lib_pr_info() { mptcp_lib_print_info "INFO: ${*}" } -# $1-2: listener/connector ns ; $3 port ; $4-5 listener/connector stat file +mptcp_lib_pr_nstat() { + local ns="${1}" + local hist="/tmp/${ns}.out" + + if [ -f "${hist}" ]; then + awk '$2 != 0 { print " "$0 }' "${hist}" + else + ip netns exec "${ns}" nstat -as | grep Tcp + fi +} + +# $1-2: listener/connector ns ; $3 port mptcp_lib_pr_err_stats() { local lns="${1}" local cns="${2}" local port="${3}" - local lstat="${4}" - local cstat="${5}" echo -en "${MPTCP_LIB_COLOR_RED}" { printf "\nnetns %s (listener) socket stat for %d:\n" "${lns}" "${port}" ip netns exec "${lns}" ss -Menitam -o "sport = :${port}" - cat "${lstat}" + mptcp_lib_pr_nstat "${lns}" printf "\nnetns %s (connector) socket stat for %d:\n" "${cns}" "${port}" ip netns exec "${cns}" ss -Menitam -o "dport = :${port}" - [ "${lstat}" != "${cstat}" ] && cat "${cstat}" + [ "${lns}" != "${cns}" ] && mptcp_lib_pr_nstat "${cns}" } 1>&2 echo -en "${MPTCP_LIB_COLOR_RESET}" } @@ -341,6 +350,19 @@ mptcp_lib_evts_get_info() { mptcp_lib_get_info_value "${1}" "^type:${3:-1}," } +mptcp_lib_wait_timeout() { + local timeout_test="${1}" + local listener_ns="${2}" + local connector_ns="${3}" + local port="${4}" + shift 4 # rest are PIDs + + sleep "${timeout_test}" + mptcp_lib_print_err "timeout" + mptcp_lib_pr_err_stats "${listener_ns}" "${connector_ns}" "${port}" + kill "${@}" 2>/dev/null +} + # $1: PID mptcp_lib_kill_wait() { [ "${1}" -eq 0 ] && return 0 @@ -376,14 +398,36 @@ mptcp_lib_is_v6() { [ -z "${1##*:*}" ] } +mptcp_lib_nstat_init() { + local ns="${1}" + + rm -f "/tmp/${ns}."{nstat,out} + NSTAT_HISTORY="/tmp/${ns}.nstat" ip netns exec "${ns}" nstat -n +} + +mptcp_lib_nstat_get() { + local ns="${1}" + + # filter out non-*TCP stats, and the rate (last column) + NSTAT_HISTORY="/tmp/${ns}.nstat" ip netns exec "${ns}" nstat -sz | + grep -o ".*Tcp\S\+\s\+[0-9]\+" > "/tmp/${ns}.out" +} + # $1: ns, $2: MIB counter +# Get the counter from the history (mptcp_lib_nstat_{init,get}()) if available. +# If not, get the counter from nstat ignoring any history. mptcp_lib_get_counter() { local ns="${1}" local counter="${2}" + local hist="/tmp/${ns}.out" local count - count=$(ip netns exec "${ns}" nstat -asz "${counter}" | - awk 'NR==1 {next} {print $2}') + if [[ -s "${hist}" && "${counter}" == *"Tcp"* ]]; then + count=$(awk "/^${counter} / {print \$2; exit}" "${hist}") + else + count=$(ip netns exec "${ns}" nstat -asz "${counter}" | + awk 'NR==1 {next} {print $2}') + fi if [ -z "${count}" ]; then mptcp_lib_fail_if_expected_feature "${counter} counter" return 1 diff --git a/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh b/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh index f01989be6e9b..ab8bce06b262 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh @@ -169,41 +169,44 @@ do_transfer() cmsg+=",TCPINQ" fi - NSTAT_HISTORY=/tmp/${listener_ns}.nstat ip netns exec ${listener_ns} \ - nstat -n - NSTAT_HISTORY=/tmp/${connector_ns}.nstat ip netns exec ${connector_ns} \ - nstat -n - - timeout ${timeout_test} \ - ip netns exec ${listener_ns} \ - $mptcp_connect -t ${timeout_poll} -l -M 1 -p $port -s ${srv_proto} -c "${cmsg}" \ - ${local_addr} < "$sin" > "$sout" & + mptcp_lib_nstat_init "${listener_ns}" + mptcp_lib_nstat_init "${connector_ns}" + + ip netns exec ${listener_ns} \ + $mptcp_connect -t ${timeout_poll} -l -M 1 -p $port -s ${srv_proto} -c "${cmsg}" \ + ${local_addr} < "$sin" > "$sout" & local spid=$! - sleep 1 + mptcp_lib_wait_local_port_listen "${listener_ns}" "${port}" - timeout ${timeout_test} \ - ip netns exec ${connector_ns} \ - $mptcp_connect -t ${timeout_poll} -M 2 -p $port -s ${cl_proto} -c "${cmsg}" \ - $connect_addr < "$cin" > "$cout" & + ip netns exec ${connector_ns} \ + $mptcp_connect -t ${timeout_poll} -M 2 -p $port -s ${cl_proto} -c "${cmsg}" \ + $connect_addr < "$cin" > "$cout" & local cpid=$! + mptcp_lib_wait_timeout "${timeout_test}" "${listener_ns}" \ + "${connector_ns}" "${port}" "${cpid}" "${spid}" & + local timeout_pid=$! + wait $cpid local retc=$? wait $spid local rets=$? - NSTAT_HISTORY=/tmp/${listener_ns}.nstat ip netns exec ${listener_ns} \ - nstat | grep Tcp > /tmp/${listener_ns}.out - NSTAT_HISTORY=/tmp/${connector_ns}.nstat ip netns exec ${connector_ns} \ - nstat | grep Tcp > /tmp/${connector_ns}.out + if kill -0 $timeout_pid; then + # Finished before the timeout: kill the background job + mptcp_lib_kill_group_wait $timeout_pid + timeout_pid=0 + fi + + mptcp_lib_nstat_get "${listener_ns}" + mptcp_lib_nstat_get "${connector_ns}" print_title "Transfer ${ip:2}" - if [ ${rets} -ne 0 ] || [ ${retc} -ne 0 ]; then + if [ ${rets} -ne 0 ] || [ ${retc} -ne 0 ] || [ ${timeout_pid} -ne 0 ]; then mptcp_lib_pr_fail "client exit code $retc, server $rets" - mptcp_lib_pr_err_stats "${listener_ns}" "${connector_ns}" "${port}" \ - "/tmp/${listener_ns}.out" "/tmp/${connector_ns}.out" + mptcp_lib_pr_err_stats "${listener_ns}" "${connector_ns}" "${port}" mptcp_lib_result_fail "transfer ${ip}" diff --git a/tools/testing/selftests/net/mptcp/simult_flows.sh b/tools/testing/selftests/net/mptcp/simult_flows.sh index 1903e8e84a31..806aaa7d2d61 100755 --- a/tools/testing/selftests/net/mptcp/simult_flows.sh +++ b/tools/testing/selftests/net/mptcp/simult_flows.sh @@ -155,48 +155,53 @@ do_transfer() sleep 1 fi - NSTAT_HISTORY=/tmp/${ns3}.nstat ip netns exec ${ns3} \ - nstat -n - NSTAT_HISTORY=/tmp/${ns1}.nstat ip netns exec ${ns1} \ - nstat -n - - timeout ${timeout_test} \ - ip netns exec ${ns3} \ - ./mptcp_connect -jt ${timeout_poll} -l -p $port -T $max_time \ - 0.0.0.0 < "$sin" > "$sout" & + mptcp_lib_nstat_init "${ns3}" + mptcp_lib_nstat_init "${ns1}" + + ip netns exec ${ns3} \ + ./mptcp_connect -jt ${timeout_poll} -l -p $port -T $max_time \ + 0.0.0.0 < "$sin" > "$sout" & local spid=$! mptcp_lib_wait_local_port_listen "${ns3}" "${port}" - timeout ${timeout_test} \ - ip netns exec ${ns1} \ - ./mptcp_connect -jt ${timeout_poll} -p $port -T $max_time \ - 10.0.3.3 < "$cin" > "$cout" & + ip netns exec ${ns1} \ + ./mptcp_connect -jt ${timeout_poll} -p $port -T $max_time \ + 10.0.3.3 < "$cin" > "$cout" & local cpid=$! + mptcp_lib_wait_timeout "${timeout_test}" "${ns3}" "${ns1}" "${port}" \ + "${cpid}" "${spid}" & + local timeout_pid=$! + wait $cpid local retc=$? wait $spid local rets=$? + if kill -0 $timeout_pid; then + # Finished before the timeout: kill the background job + mptcp_lib_kill_group_wait $timeout_pid + timeout_pid=0 + fi + if $capture; then sleep 1 kill ${cappid_listener} kill ${cappid_connector} fi - NSTAT_HISTORY=/tmp/${ns3}.nstat ip netns exec ${ns3} \ - nstat | grep Tcp > /tmp/${ns3}.out - NSTAT_HISTORY=/tmp/${ns1}.nstat ip netns exec ${ns1} \ - nstat | grep Tcp > /tmp/${ns1}.out + mptcp_lib_nstat_get "${ns3}" + mptcp_lib_nstat_get "${ns1}" cmp $sin $cout > /dev/null 2>&1 local cmps=$? cmp $cin $sout > /dev/null 2>&1 local cmpc=$? - if [ $retc -eq 0 ] && [ $rets -eq 0 ] && \ - [ $cmpc -eq 0 ] && [ $cmps -eq 0 ]; then + if [ $retc -eq 0 ] && [ $rets -eq 0 ] && + [ $cmpc -eq 0 ] && [ $cmps -eq 0 ] && + [ $timeout_pid -eq 0 ]; then printf "%-16s" " max $max_time " mptcp_lib_pr_ok cat "$capout" @@ -204,8 +209,7 @@ do_transfer() fi mptcp_lib_pr_fail "client exit code $retc, server $rets" - mptcp_lib_pr_err_stats "${ns3}" "${ns1}" "${port}" \ - "/tmp/${ns3}.out" "/tmp/${ns1}.out" + mptcp_lib_pr_err_stats "${ns3}" "${ns1}" "${port}" ls -l $sin $cout ls -l $cin $sout diff --git a/tools/testing/selftests/net/mptcp/userspace_pm.sh b/tools/testing/selftests/net/mptcp/userspace_pm.sh index 87323942cb8a..e9ae1806ab07 100755 --- a/tools/testing/selftests/net/mptcp/userspace_pm.sh +++ b/tools/testing/selftests/net/mptcp/userspace_pm.sh @@ -211,7 +211,8 @@ make_connection() ip netns exec "$ns1" \ ./mptcp_connect -s MPTCP -w 300 -p $app_port -l $listen_addr > /dev/null 2>&1 & local server_pid=$! - sleep 0.5 + + mptcp_lib_wait_local_port_listen "${ns1}" "${port}" # Run the client, transfer $file and stay connected to the server # to conduct tests diff --git a/tools/testing/selftests/net/netfilter/nft_flowtable.sh b/tools/testing/selftests/net/netfilter/nft_flowtable.sh index 45832df98295..a68bc882fa4e 100755 --- a/tools/testing/selftests/net/netfilter/nft_flowtable.sh +++ b/tools/testing/selftests/net/netfilter/nft_flowtable.sh @@ -127,6 +127,8 @@ ip -net "$nsr1" addr add fee1:2::1/64 dev veth1 nodad ip -net "$nsr2" addr add 192.168.10.2/24 dev veth0 ip -net "$nsr2" addr add fee1:2::2/64 dev veth0 nodad +ip netns exec "$nsr1" sysctl net.ipv6.conf.all.forwarding=1 > /dev/null +ip netns exec "$nsr2" sysctl net.ipv6.conf.all.forwarding=1 > /dev/null for i in 0 1; do ip netns exec "$nsr1" sysctl net.ipv4.conf.veth$i.forwarding=1 > /dev/null ip netns exec "$nsr2" sysctl net.ipv4.conf.veth$i.forwarding=1 > /dev/null @@ -153,7 +155,9 @@ ip -net "$ns1" route add default via dead:1::1 ip -net "$ns2" route add default via dead:2::1 ip -net "$nsr1" route add default via 192.168.10.2 +ip -6 -net "$nsr1" route add default via fee1:2::2 ip -net "$nsr2" route add default via 192.168.10.1 +ip -6 -net "$nsr2" route add default via fee1:2::1 ip netns exec "$nsr1" nft -f - <<EOF table inet filter { @@ -352,8 +356,9 @@ test_tcp_forwarding_ip() local nsa=$1 local nsb=$2 local pmtu=$3 - local dstip=$4 - local dstport=$5 + local proto=$4 + local dstip=$5 + local dstport=$6 local lret=0 local socatc local socatl @@ -363,12 +368,14 @@ test_tcp_forwarding_ip() infile="$nsin_small" fi - timeout "$SOCAT_TIMEOUT" ip netns exec "$nsb" socat -4 TCP-LISTEN:12345,reuseaddr STDIO < "$infile" > "$ns2out" & + timeout "$SOCAT_TIMEOUT" ip netns exec "$nsb" socat -${proto} \ + TCP"${proto}"-LISTEN:12345,reuseaddr STDIO < "$infile" > "$ns2out" & lpid=$! busywait 1000 listener_ready - timeout "$SOCAT_TIMEOUT" ip netns exec "$nsa" socat -4 TCP:"$dstip":"$dstport" STDIO < "$infile" > "$ns1out" + timeout "$SOCAT_TIMEOUT" ip netns exec "$nsa" socat -${proto} \ + TCP"${proto}":"$dstip":"$dstport" STDIO < "$infile" > "$ns1out" socatc=$? wait $lpid @@ -394,8 +401,11 @@ test_tcp_forwarding_ip() test_tcp_forwarding() { local pmtu="$3" + local proto="$4" + local dstip="$5" + local dstport="$6" - test_tcp_forwarding_ip "$1" "$2" "$pmtu" 10.0.2.99 12345 + test_tcp_forwarding_ip "$1" "$2" "$pmtu" "$proto" "$dstip" "$dstport" return $? } @@ -403,6 +413,9 @@ test_tcp_forwarding() test_tcp_forwarding_set_dscp() { local pmtu="$3" + local proto="$4" + local dstip="$5" + local dstport="$6" ip netns exec "$nsr1" nft -f - <<EOF table netdev dscpmangle { @@ -413,7 +426,7 @@ table netdev dscpmangle { } EOF if [ $? -eq 0 ]; then - test_tcp_forwarding_ip "$1" "$2" "$3" 10.0.2.99 12345 + test_tcp_forwarding_ip "$1" "$2" "$pmtu" "$proto" "$dstip" "$dstport" check_dscp "dscp_ingress" "$pmtu" ip netns exec "$nsr1" nft delete table netdev dscpmangle @@ -430,7 +443,7 @@ table netdev dscpmangle { } EOF if [ $? -eq 0 ]; then - test_tcp_forwarding_ip "$1" "$2" "$pmtu" 10.0.2.99 12345 + test_tcp_forwarding_ip "$1" "$2" "$pmtu" "$proto" "$dstip" "$dstport" check_dscp "dscp_egress" "$pmtu" ip netns exec "$nsr1" nft delete table netdev dscpmangle @@ -441,7 +454,7 @@ fi # partial. If flowtable really works, then both dscp-is-0 and dscp-is-cs3 # counters should have seen packets (before and after ft offload kicks in). ip netns exec "$nsr1" nft -a insert rule inet filter forward ip dscp set cs3 - test_tcp_forwarding_ip "$1" "$2" "$pmtu" 10.0.2.99 12345 + test_tcp_forwarding_ip "$1" "$2" "$pmtu" "$proto" "$dstip" "$dstport" check_dscp "dscp_fwd" "$pmtu" } @@ -455,7 +468,7 @@ test_tcp_forwarding_nat() [ "$pmtu" -eq 0 ] && what="$what (pmtu disabled)" - test_tcp_forwarding_ip "$nsa" "$nsb" "$pmtu" 10.0.2.99 12345 + test_tcp_forwarding_ip "$nsa" "$nsb" "$pmtu" 4 10.0.2.99 12345 lret=$? if [ "$lret" -eq 0 ] ; then @@ -465,7 +478,7 @@ test_tcp_forwarding_nat() echo "PASS: flow offload for ns1/ns2 with masquerade $what" fi - test_tcp_forwarding_ip "$1" "$2" "$pmtu" 10.6.6.6 1666 + test_tcp_forwarding_ip "$1" "$2" "$pmtu" 4 10.6.6.6 1666 lret=$? if [ "$pmtu" -eq 1 ] ;then check_counters "flow offload for ns1/ns2 with dnat $what" @@ -487,7 +500,7 @@ make_file "$nsin_small" "$filesize_small" # Due to MTU mismatch in both directions, all packets (except small packets like pure # acks) have to be handled by normal forwarding path. Therefore, packet counters # are not checked. -if test_tcp_forwarding "$ns1" "$ns2" 0; then +if test_tcp_forwarding "$ns1" "$ns2" 0 4 10.0.2.99 12345; then echo "PASS: flow offloaded for ns1/ns2" else echo "FAIL: flow offload for ns1/ns2:" 1>&2 @@ -495,6 +508,14 @@ else ret=1 fi +if test_tcp_forwarding "$ns1" "$ns2" 0 6 "[dead:2::99]" 12345; then + echo "PASS: IPv6 flow offloaded for ns1/ns2" +else + echo "FAIL: IPv6 flow offload for ns1/ns2:" 1>&2 + ip netns exec "$nsr1" nft list ruleset + ret=1 +fi + # delete default route, i.e. ns2 won't be able to reach ns1 and # will depend on ns1 being masqueraded in nsr1. # expect ns1 has nsr1 address. @@ -520,7 +541,7 @@ table ip nat { EOF check_dscp "dscp_none" "0" -if ! test_tcp_forwarding_set_dscp "$ns1" "$ns2" 0 ""; then +if ! test_tcp_forwarding_set_dscp "$ns1" "$ns2" 0 4 10.0.2.99 12345; then echo "FAIL: flow offload for ns1/ns2 with dscp update and no pmtu discovery" 1>&2 exit 0 fi @@ -546,7 +567,7 @@ ip netns exec "$ns2" sysctl net.ipv4.ip_no_pmtu_disc=0 > /dev/null ip netns exec "$nsr1" nft reset counters table inet filter >/dev/null ip netns exec "$ns2" nft reset counters table inet filter >/dev/null -if ! test_tcp_forwarding_set_dscp "$ns1" "$ns2" 1 ""; then +if ! test_tcp_forwarding_set_dscp "$ns1" "$ns2" 1 4 10.0.2.99 12345; then echo "FAIL: flow offload for ns1/ns2 with dscp update and pmtu discovery" 1>&2 exit 0 fi @@ -558,6 +579,73 @@ if ! test_tcp_forwarding_nat "$ns1" "$ns2" 1 ""; then ip netns exec "$nsr1" nft list ruleset fi +# IPIP tunnel test: +# Add IPIP tunnel interfaces and check flowtable acceleration. +test_ipip() { +if ! ip -net "$nsr1" link add name tun0 type ipip \ + local 192.168.10.1 remote 192.168.10.2 >/dev/null;then + echo "SKIP: could not add ipip tunnel" + [ "$ret" -eq 0 ] && ret=$ksft_skip + return +fi +ip -net "$nsr1" link set tun0 up +ip -net "$nsr1" addr add 192.168.100.1/24 dev tun0 +ip netns exec "$nsr1" sysctl net.ipv4.conf.tun0.forwarding=1 > /dev/null + +ip -net "$nsr2" link add name tun0 type ipip local 192.168.10.2 remote 192.168.10.1 +ip -net "$nsr2" link set tun0 up +ip -net "$nsr2" addr add 192.168.100.2/24 dev tun0 +ip netns exec "$nsr2" sysctl net.ipv4.conf.tun0.forwarding=1 > /dev/null + +ip -net "$nsr1" route change default via 192.168.100.2 +ip -net "$nsr2" route change default via 192.168.100.1 +ip -net "$ns2" route add default via 10.0.2.1 + +ip netns exec "$nsr1" nft -a insert rule inet filter forward 'meta oif tun0 accept' +ip netns exec "$nsr1" nft -a insert rule inet filter forward \ + 'meta oif "veth0" tcp sport 12345 ct mark set 1 flow add @f1 counter name routed_repl accept' + +if ! test_tcp_forwarding_nat "$ns1" "$ns2" 1 "IPIP tunnel"; then + echo "FAIL: flow offload for ns1/ns2 with IPIP tunnel" 1>&2 + ip netns exec "$nsr1" nft list ruleset + ret=1 +fi + +# Create vlan tagged devices for IPIP traffic. +ip -net "$nsr1" link add link veth1 name veth1.10 type vlan id 10 +ip -net "$nsr1" link set veth1.10 up +ip -net "$nsr1" addr add 192.168.20.1/24 dev veth1.10 +ip netns exec "$nsr1" sysctl net.ipv4.conf.veth1/10.forwarding=1 > /dev/null +ip netns exec "$nsr1" nft -a insert rule inet filter forward 'meta oif veth1.10 accept' +ip -net "$nsr1" link add name tun1 type ipip local 192.168.20.1 remote 192.168.20.2 +ip -net "$nsr1" link set tun1 up +ip -net "$nsr1" addr add 192.168.200.1/24 dev tun1 +ip -net "$nsr1" route change default via 192.168.200.2 +ip netns exec "$nsr1" sysctl net.ipv4.conf.tun1.forwarding=1 > /dev/null +ip netns exec "$nsr1" nft -a insert rule inet filter forward 'meta oif tun1 accept' + +ip -net "$nsr2" link add link veth0 name veth0.10 type vlan id 10 +ip -net "$nsr2" link set veth0.10 up +ip -net "$nsr2" addr add 192.168.20.2/24 dev veth0.10 +ip netns exec "$nsr2" sysctl net.ipv4.conf.veth0/10.forwarding=1 > /dev/null +ip -net "$nsr2" link add name tun1 type ipip local 192.168.20.2 remote 192.168.20.1 +ip -net "$nsr2" link set tun1 up +ip -net "$nsr2" addr add 192.168.200.2/24 dev tun1 +ip -net "$nsr2" route change default via 192.168.200.1 +ip netns exec "$nsr2" sysctl net.ipv4.conf.tun1.forwarding=1 > /dev/null + +if ! test_tcp_forwarding_nat "$ns1" "$ns2" 1 "IPIP tunnel over vlan"; then + echo "FAIL: flow offload for ns1/ns2 with IPIP tunnel over vlan" 1>&2 + ip netns exec "$nsr1" nft list ruleset + ret=1 +fi + +# Restore the previous configuration +ip -net "$nsr1" route change default via 192.168.10.2 +ip -net "$nsr2" route change default via 192.168.10.1 +ip -net "$ns2" route del default via 10.0.2.1 +} + # Another test: # Add bridge interface br0 to Router1, with NAT enabled. test_bridge() { @@ -643,6 +731,8 @@ ip -net "$nsr1" addr add dead:1::1/64 dev veth0 nodad ip -net "$nsr1" link set up dev veth0 } +test_ipip + test_bridge KEY_SHA="0x"$(ps -af | sha1sum | cut -d " " -f 1) @@ -683,7 +773,7 @@ ip -net "$ns2" route del 192.168.10.1 via 10.0.2.1 ip -net "$ns2" route add default via 10.0.2.1 ip -net "$ns2" route add default via dead:2::1 -if test_tcp_forwarding "$ns1" "$ns2" 1; then +if test_tcp_forwarding "$ns1" "$ns2" 1 4 10.0.2.99 12345; then check_counters "ipsec tunnel mode for ns1/ns2" else echo "FAIL: ipsec tunnel mode for ns1/ns2" @@ -691,6 +781,14 @@ else ip netns exec "$nsr1" cat /proc/net/xfrm_stat 1>&2 fi +if test_tcp_forwarding "$ns1" "$ns2" 1 6 "[dead:2::99]" 12345; then + check_counters "IPv6 ipsec tunnel mode for ns1/ns2" +else + echo "FAIL: IPv6 ipsec tunnel mode for ns1/ns2" + ip netns exec "$nsr1" nft list ruleset 1>&2 + ip netns exec "$nsr1" cat /proc/net/xfrm_stat 1>&2 +fi + if [ "$1" = "" ]; then low=1280 mtu=$((65536 - low)) diff --git a/tools/testing/selftests/net/netfilter/sctp_collision.c b/tools/testing/selftests/net/netfilter/sctp_collision.c index 21bb1cfd8a85..b282d1785c9b 100644 --- a/tools/testing/selftests/net/netfilter/sctp_collision.c +++ b/tools/testing/selftests/net/netfilter/sctp_collision.c @@ -9,9 +9,10 @@ int main(int argc, char *argv[]) { struct sockaddr_in saddr = {}, daddr = {}; - int sd, ret, len = sizeof(daddr); + socklen_t len = sizeof(daddr); struct timeval tv = {25, 0}; char buf[] = "hello"; + int sd, ret; if (argc != 6 || (strcmp(argv[1], "server") && strcmp(argv[1], "client"))) { printf("%s <server|client> <LOCAL_IP> <LOCAL_PORT> <REMOTE_IP> <REMOTE_PORT>\n", diff --git a/tools/testing/selftests/net/netlink-dumps.c b/tools/testing/selftests/net/netlink-dumps.c index 7618ebe528a4..679b6c77ace7 100644 --- a/tools/testing/selftests/net/netlink-dumps.c +++ b/tools/testing/selftests/net/netlink-dumps.c @@ -143,6 +143,7 @@ TEST(dump_extack) EXPECT_EQ(n, -1); EXPECT_EQ(errno, ENOBUFS); + ret = NO_CTRL; for (i = 0; i < cnt; i++) { struct ext_ack ea = {}; diff --git a/tools/testing/selftests/net/packetdrill/tcp_rto_synack_rto_max.pkt b/tools/testing/selftests/net/packetdrill/tcp_rto_synack_rto_max.pkt new file mode 100644 index 000000000000..47550df124ce --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_rto_synack_rto_max.pkt @@ -0,0 +1,54 @@ +// SPDX-License-Identifier: GPL-2.0 +// +// Test SYN+ACK RTX with 1s RTO. +// +`./defaults.sh + ./set_sysctls.py /proc/sys/net/ipv4/tcp_rto_max_ms=1000` + +// +// Test 1: TFO SYN+ACK +// + 0 socket(..., SOCK_STREAM|SOCK_NONBLOCK, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN, [1], 4) = 0 + + +0 < S 0:10(10) win 1000 <mss 1460,sackOK,nop,nop,FO TFO_COOKIE,nop,nop> + +0 > S. 0:0(0) ack 11 <mss 1460,nop,nop,sackOK> + +// RTO must be capped to 1s + +1 > S. 0:0(0) ack 11 <mss 1460,nop,nop,sackOK> + +1 > S. 0:0(0) ack 11 <mss 1460,nop,nop,sackOK> + +1 > S. 0:0(0) ack 11 <mss 1460,nop,nop,sackOK> + + +0 < . 11:11(0) ack 1 win 1000 <mss 1460,nop,nop,sackOK> + +0 accept(3, ..., ...) = 4 + +0 %{ assert (tcpi_options & TCPI_OPT_SYN_DATA) != 0, tcpi_options }% + + +0 close(4) = 0 + +0 close(3) = 0 + + +// +// Test 2: non-TFO SYN+ACK +// + +0 socket(..., SOCK_STREAM|SOCK_NONBLOCK, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 1000 <mss 1460,sackOK,nop,nop> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK> + +// RTO must be capped to 1s + +1 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK> + +1 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK> + +1 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK> + + +0 < . 1:1(0) ack 1 win 1000 <mss 1460,nop,nop,sackOK> + +0 accept(3, ..., ...) = 4 + +0 %{ assert (tcpi_options & TCPI_OPT_SYN_DATA) == 0, tcpi_options }% + + +0 close(4) = 0 + +0 close(3) = 0 diff --git a/tools/testing/selftests/net/packetdrill/tcp_syscall_bad_arg_sendmsg-empty-iov.pkt b/tools/testing/selftests/net/packetdrill/tcp_syscall_bad_arg_sendmsg-empty-iov.pkt index b2b2cdf27e20..454441e7ecff 100644 --- a/tools/testing/selftests/net/packetdrill/tcp_syscall_bad_arg_sendmsg-empty-iov.pkt +++ b/tools/testing/selftests/net/packetdrill/tcp_syscall_bad_arg_sendmsg-empty-iov.pkt @@ -1,6 +1,10 @@ // SPDX-License-Identifier: GPL-2.0 // Test that we correctly skip zero-length IOVs. + +--send_omit_free // do not reuse send buffers with zerocopy + `./defaults.sh` + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 +0 setsockopt(3, SOL_SOCKET, SO_ZEROCOPY, [1], 4) = 0 +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 diff --git a/tools/testing/selftests/net/packetdrill/tcp_user_timeout_user-timeout-probe.pkt b/tools/testing/selftests/net/packetdrill/tcp_user_timeout_user-timeout-probe.pkt index 183051ba0cae..6882b8240a8a 100644 --- a/tools/testing/selftests/net/packetdrill/tcp_user_timeout_user-timeout-probe.pkt +++ b/tools/testing/selftests/net/packetdrill/tcp_user_timeout_user-timeout-probe.pkt @@ -23,14 +23,16 @@ // install a qdisc dropping all packets +0 `tc qdisc delete dev tun0 root 2>/dev/null ; tc qdisc add dev tun0 root pfifo limit 0` + +0 write(4, ..., 24) = 24 // When qdisc is congested we retry every 500ms // (TCP_RESOURCE_PROBE_INTERVAL) and therefore // we retry 6 times before hitting 3s timeout. // First verify that the connection is alive: -+3.250 write(4, ..., 24) = 24 ++3 write(4, ..., 24) = 24 + // Now verify that shortly after that the socket is dead: - +.100 write(4, ..., 24) = -1 ETIMEDOUT (Connection timed out) ++1 write(4, ..., 24) = -1 ETIMEDOUT (Connection timed out) +0 %{ assert tcpi_probes == 6, tcpi_probes; \ assert tcpi_backoff == 0, tcpi_backoff }% diff --git a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_basic.pkt b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_basic.pkt index a82c8899d36b..0a0700afdaa3 100644 --- a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_basic.pkt +++ b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_basic.pkt @@ -4,6 +4,8 @@ // send a packet with MSG_ZEROCOPY and receive the notification ID // repeat and verify IDs are consecutive +--send_omit_free // do not reuse send buffers with zerocopy + `./defaults.sh` 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 diff --git a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_batch.pkt b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_batch.pkt index c01915e7f4a1..df91675d2991 100644 --- a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_batch.pkt +++ b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_batch.pkt @@ -3,6 +3,8 @@ // // send multiple packets, then read one range of all notifications. +--send_omit_free // do not reuse send buffers with zerocopy + `./defaults.sh` 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 diff --git a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_client.pkt b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_client.pkt index 6509882932e9..2963cfcb14df 100644 --- a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_client.pkt +++ b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_client.pkt @@ -1,6 +1,8 @@ // SPDX-License-Identifier: GPL-2.0 // Minimal client-side zerocopy test +--send_omit_free // do not reuse send buffers with zerocopy + `./defaults.sh` 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4 diff --git a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_closed.pkt b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_closed.pkt index 2cd78755cb2a..ea0c2fa73c2d 100644 --- a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_closed.pkt +++ b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_closed.pkt @@ -7,6 +7,8 @@ // First send on a closed socket and wait for (absent) notification. // Then connect and send and verify that notification nr. is zero. +--send_omit_free // do not reuse send buffers with zerocopy + `./defaults.sh` 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4 diff --git a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_epoll_edge.pkt b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_epoll_edge.pkt index 7671c20e01cf..4df978a9b82e 100644 --- a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_epoll_edge.pkt +++ b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_epoll_edge.pkt @@ -7,6 +7,9 @@ // fire two sends with MSG_ZEROCOPY and receive the acks. confirm that EPOLLERR // is correctly fired only once, when EPOLLET is set. send another packet with // MSG_ZEROCOPY. confirm that EPOLLERR is correctly fired again only once. + +--send_omit_free // do not reuse send buffers with zerocopy + `./defaults.sh` 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 diff --git a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_epoll_exclusive.pkt b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_epoll_exclusive.pkt index fadc480fdb7f..36b6edc4858c 100644 --- a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_epoll_exclusive.pkt +++ b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_epoll_exclusive.pkt @@ -8,6 +8,9 @@ // fire two sends with MSG_ZEROCOPY and receive the acks. confirm that EPOLLERR // is correctly fired only once, when EPOLLET is set. send another packet with // MSG_ZEROCOPY. confirm that EPOLLERR is correctly fired again only once. + +--send_omit_free // do not reuse send buffers with zerocopy + `./defaults.sh` 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 diff --git a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_epoll_oneshot.pkt b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_epoll_oneshot.pkt index 5bfa0d1d2f4a..1bea6f3b4558 100644 --- a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_epoll_oneshot.pkt +++ b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_epoll_oneshot.pkt @@ -8,6 +8,9 @@ // is correctly fired only once, when EPOLLONESHOT is set. send another packet // with MSG_ZEROCOPY. confirm that EPOLLERR is not fired. Rearm the FD and // confirm that EPOLLERR is correctly set. + +--send_omit_free // do not reuse send buffers with zerocopy + `./defaults.sh` 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 diff --git a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_fastopen-client.pkt b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_fastopen-client.pkt index 4a73bbf46961..e27c21ff5d18 100644 --- a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_fastopen-client.pkt +++ b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_fastopen-client.pkt @@ -8,6 +8,8 @@ // one will have no data in the initial send. On return 0 the // zerocopy notification counter is not incremented. Verify this too. +--send_omit_free // do not reuse send buffers with zerocopy + `./defaults.sh` // Send a FastOpen request, no cookie yet so no data in SYN diff --git a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_fastopen-server.pkt b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_fastopen-server.pkt index 36086c5877ce..b1fa77c77dfa 100644 --- a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_fastopen-server.pkt +++ b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_fastopen-server.pkt @@ -4,6 +4,8 @@ // send data with MSG_FASTOPEN | MSG_ZEROCOPY and verify that the // kernel returns the notification ID. +--send_omit_free // do not reuse send buffers with zerocopy + `./defaults.sh ./set_sysctls.py /proc/sys/net/ipv4/tcp_fastopen=0x207` diff --git a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_maxfrags.pkt b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_maxfrags.pkt index 672f817faca0..2f5317d0a9fa 100644 --- a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_maxfrags.pkt +++ b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_maxfrags.pkt @@ -7,6 +7,8 @@ // because each iovec element becomes a frag // 3) the PSH bit is set on an skb when it runs out of fragments +--send_omit_free // do not reuse send buffers with zerocopy + `./defaults.sh` 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 diff --git a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_small.pkt b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_small.pkt index a9a1ac0aea4f..9d5272c6b207 100644 --- a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_small.pkt +++ b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_small.pkt @@ -4,6 +4,8 @@ // verify that SO_EE_CODE_ZEROCOPY_COPIED is set on zerocopy // packets of all sizes, including the smallest payload, 1B. +--send_omit_free // do not reuse send buffers with zerocopy + `./defaults.sh` 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 diff --git a/tools/testing/selftests/net/rtnetlink.sh b/tools/testing/selftests/net/rtnetlink.sh index 163a084d525d..248c2b91fe42 100755 --- a/tools/testing/selftests/net/rtnetlink.sh +++ b/tools/testing/selftests/net/rtnetlink.sh @@ -8,6 +8,7 @@ ALL_TESTS=" kci_test_polrouting kci_test_route_get kci_test_addrlft + kci_test_addrlft_route_cleanup kci_test_promote_secondaries kci_test_tc kci_test_gre @@ -323,6 +324,25 @@ kci_test_addrlft() end_test "PASS: preferred_lft addresses have expired" } +kci_test_addrlft_route_cleanup() +{ + local ret=0 + local test_addr="2001:db8:99::1/64" + local test_prefix="2001:db8:99::/64" + + run_cmd ip -6 addr add $test_addr dev "$devdummy" valid_lft 300 preferred_lft 300 + run_cmd_grep "$test_prefix proto kernel" ip -6 route show dev "$devdummy" + run_cmd ip -6 addr del $test_addr dev "$devdummy" + run_cmd_grep_fail "$test_prefix" ip -6 route show dev "$devdummy" + + if [ $ret -ne 0 ]; then + end_test "FAIL: route not cleaned up when address with valid_lft deleted" + return 1 + fi + + end_test "PASS: route cleaned up when address with valid_lft deleted" +} + kci_test_promote_secondaries() { run_cmd ifconfig "$devdummy" diff --git a/tools/testing/selftests/net/setup_loopback.sh b/tools/testing/selftests/net/setup_loopback.sh deleted file mode 100644 index 2070b57849de..000000000000 --- a/tools/testing/selftests/net/setup_loopback.sh +++ /dev/null @@ -1,120 +0,0 @@ -#!/bin/bash -# SPDX-License-Identifier: GPL-2.0 - -readonly FLUSH_PATH="/sys/class/net/${dev}/gro_flush_timeout" -readonly IRQ_PATH="/sys/class/net/${dev}/napi_defer_hard_irqs" -readonly FLUSH_TIMEOUT="$(< ${FLUSH_PATH})" -readonly HARD_IRQS="$(< ${IRQ_PATH})" -readonly server_ns=$(mktemp -u server-XXXXXXXX) -readonly client_ns=$(mktemp -u client-XXXXXXXX) - -netdev_check_for_carrier() { - local -r dev="$1" - - for i in {1..5}; do - carrier="$(cat /sys/class/net/${dev}/carrier)" - if [[ "${carrier}" -ne 1 ]] ; then - echo "carrier not ready yet..." >&2 - sleep 1 - else - echo "carrier ready" >&2 - break - fi - done - echo "${carrier}" -} - -# Assumes that there is no existing ipvlan device on the physical device -setup_loopback_environment() { - local dev="$1" - - # Fail hard if cannot turn on loopback mode for current NIC - ethtool -K "${dev}" loopback on || exit 1 - sleep 1 - - # Check for the carrier - carrier=$(netdev_check_for_carrier ${dev}) - if [[ "${carrier}" -ne 1 ]] ; then - echo "setup_loopback_environment failed" - exit 1 - fi -} - -setup_macvlan_ns(){ - local -r link_dev="$1" - local -r ns_name="$2" - local -r ns_dev="$3" - local -r ns_mac="$4" - local -r addr="$5" - - ip link add link "${link_dev}" dev "${ns_dev}" \ - address "${ns_mac}" type macvlan - exit_code=$? - if [[ "${exit_code}" -ne 0 ]]; then - echo "setup_macvlan_ns failed" - exit $exit_code - fi - - [[ -e /var/run/netns/"${ns_name}" ]] || ip netns add "${ns_name}" - ip link set dev "${ns_dev}" netns "${ns_name}" - ip -netns "${ns_name}" link set dev "${ns_dev}" up - if [[ -n "${addr}" ]]; then - ip -netns "${ns_name}" addr add dev "${ns_dev}" "${addr}" - fi - - sleep 1 -} - -cleanup_macvlan_ns(){ - while (( $# >= 2 )); do - ns_name="$1" - ns_dev="$2" - ip -netns "${ns_name}" link del dev "${ns_dev}" - ip netns del "${ns_name}" - shift 2 - done -} - -cleanup_loopback(){ - local -r dev="$1" - - ethtool -K "${dev}" loopback off - sleep 1 - - # Check for the carrier - carrier=$(netdev_check_for_carrier ${dev}) - if [[ "${carrier}" -ne 1 ]] ; then - echo "setup_loopback_environment failed" - exit 1 - fi -} - -setup_interrupt() { - # Use timer on host to trigger the network stack - # Also disable device interrupt to not depend on NIC interrupt - # Reduce test flakiness caused by unexpected interrupts - echo 100000 >"${FLUSH_PATH}" - echo 50 >"${IRQ_PATH}" -} - -setup_ns() { - # Set up server_ns namespace and client_ns namespace - setup_macvlan_ns "${dev}" ${server_ns} server "${SERVER_MAC}" - setup_macvlan_ns "${dev}" ${client_ns} client "${CLIENT_MAC}" -} - -cleanup_ns() { - cleanup_macvlan_ns ${server_ns} server ${client_ns} client -} - -setup() { - setup_loopback_environment "${dev}" - setup_interrupt -} - -cleanup() { - cleanup_loopback "${dev}" - - echo "${FLUSH_TIMEOUT}" >"${FLUSH_PATH}" - echo "${HARD_IRQS}" >"${IRQ_PATH}" -} diff --git a/tools/testing/selftests/net/setup_veth.sh b/tools/testing/selftests/net/setup_veth.sh deleted file mode 100644 index 152bf4c65747..000000000000 --- a/tools/testing/selftests/net/setup_veth.sh +++ /dev/null @@ -1,45 +0,0 @@ -#!/bin/bash -# SPDX-License-Identifier: GPL-2.0 - -readonly server_ns=$(mktemp -u server-XXXXXXXX) -readonly client_ns=$(mktemp -u client-XXXXXXXX) - -setup_veth_ns() { - local -r link_dev="$1" - local -r ns_name="$2" - local -r ns_dev="$3" - local -r ns_mac="$4" - - [[ -e /var/run/netns/"${ns_name}" ]] || ip netns add "${ns_name}" - echo 200000 > "/sys/class/net/${ns_dev}/gro_flush_timeout" - echo 1 > "/sys/class/net/${ns_dev}/napi_defer_hard_irqs" - ip link set dev "${ns_dev}" netns "${ns_name}" mtu 65535 - ip -netns "${ns_name}" link set dev "${ns_dev}" up - - ip netns exec "${ns_name}" ethtool -K "${ns_dev}" gro on tso off -} - -setup_ns() { - # Set up server_ns namespace and client_ns namespace - ip link add name server type veth peer name client - - setup_veth_ns "${dev}" ${server_ns} server "${SERVER_MAC}" - setup_veth_ns "${dev}" ${client_ns} client "${CLIENT_MAC}" -} - -cleanup_ns() { - local ns_name - - for ns_name in ${client_ns} ${server_ns}; do - [[ -e /var/run/netns/"${ns_name}" ]] && ip netns del "${ns_name}" - done -} - -setup() { - # no global init setup step needed - : -} - -cleanup() { - cleanup_ns -} diff --git a/tools/testing/selftests/net/so_txtime.c b/tools/testing/selftests/net/so_txtime.c index 8457b7ccbc09..b76df1efc2ef 100644 --- a/tools/testing/selftests/net/so_txtime.c +++ b/tools/testing/selftests/net/so_txtime.c @@ -174,7 +174,7 @@ static int do_recv_errqueue_timeout(int fdt) msg.msg_controllen = sizeof(control); while (1) { - const char *reason; + const char *reason = NULL; ret = recvmsg(fdt, &msg, MSG_ERRQUEUE); if (ret == -1 && errno == EAGAIN) diff --git a/tools/testing/selftests/net/tls.c b/tools/testing/selftests/net/tls.c index 5c6d8215021c..da1b50b30719 100644 --- a/tools/testing/selftests/net/tls.c +++ b/tools/testing/selftests/net/tls.c @@ -2856,6 +2856,147 @@ TEST_F(tls_err, oob_pressure) EXPECT_EQ(send(self->fd2, buf, 5, MSG_OOB), 5); } +/* + * Parse a stream of TLS records and ensure that each record respects + * the specified @max_payload_len. + */ +static size_t parse_tls_records(struct __test_metadata *_metadata, + const __u8 *rx_buf, int rx_len, int overhead, + __u16 max_payload_len) +{ + const __u8 *rec = rx_buf; + size_t total_plaintext_rx = 0; + const __u8 rec_header_len = 5; + + while (rec < rx_buf + rx_len) { + __u16 record_payload_len; + __u16 plaintext_len; + + /* Sanity check that it's a TLS header for application data */ + ASSERT_EQ(rec[0], 23); + ASSERT_EQ(rec[1], 0x3); + ASSERT_EQ(rec[2], 0x3); + + memcpy(&record_payload_len, rec + 3, 2); + record_payload_len = ntohs(record_payload_len); + ASSERT_GE(record_payload_len, overhead); + + plaintext_len = record_payload_len - overhead; + total_plaintext_rx += plaintext_len; + + /* Plaintext must not exceed the specified limit */ + ASSERT_LE(plaintext_len, max_payload_len); + rec += rec_header_len + record_payload_len; + } + + return total_plaintext_rx; +} + +TEST(tls_12_tx_max_payload_len) +{ + struct tls_crypto_info_keys tls12; + int cfd, ret, fd, overhead; + size_t total_plaintext_rx = 0; + __u8 tx[1024], rx[2000]; + __u16 limit = 128; + __u16 opt = 0; + unsigned int optlen = sizeof(opt); + bool notls; + + tls_crypto_info_init(TLS_1_2_VERSION, TLS_CIPHER_AES_CCM_128, + &tls12, 0); + + ulp_sock_pair(_metadata, &fd, &cfd, ¬ls); + + if (notls) + exit(KSFT_SKIP); + + /* Don't install keys on fd, we'll parse raw records */ + ret = setsockopt(cfd, SOL_TLS, TLS_TX, &tls12, tls12.len); + ASSERT_EQ(ret, 0); + + ret = setsockopt(cfd, SOL_TLS, TLS_TX_MAX_PAYLOAD_LEN, &limit, + sizeof(limit)); + ASSERT_EQ(ret, 0); + + ret = getsockopt(cfd, SOL_TLS, TLS_TX_MAX_PAYLOAD_LEN, &opt, &optlen); + EXPECT_EQ(ret, 0); + EXPECT_EQ(limit, opt); + EXPECT_EQ(optlen, sizeof(limit)); + + memset(tx, 0, sizeof(tx)); + ASSERT_EQ(send(cfd, tx, sizeof(tx), 0), sizeof(tx)); + close(cfd); + + ret = recv(fd, rx, sizeof(rx), 0); + + /* + * 16B tag + 8B IV -- record header (5B) is not counted but we'll + * need it to walk the record stream + */ + overhead = 16 + 8; + total_plaintext_rx = parse_tls_records(_metadata, rx, ret, overhead, + limit); + + ASSERT_EQ(total_plaintext_rx, sizeof(tx)); + close(fd); +} + +TEST(tls_12_tx_max_payload_len_open_rec) +{ + struct tls_crypto_info_keys tls12; + int cfd, ret, fd, overhead; + size_t total_plaintext_rx = 0; + __u8 tx[1024], rx[2000]; + __u16 tx_partial = 256; + __u16 og_limit = 512, limit = 128; + bool notls; + + tls_crypto_info_init(TLS_1_2_VERSION, TLS_CIPHER_AES_CCM_128, + &tls12, 0); + + ulp_sock_pair(_metadata, &fd, &cfd, ¬ls); + + if (notls) + exit(KSFT_SKIP); + + /* Don't install keys on fd, we'll parse raw records */ + ret = setsockopt(cfd, SOL_TLS, TLS_TX, &tls12, tls12.len); + ASSERT_EQ(ret, 0); + + ret = setsockopt(cfd, SOL_TLS, TLS_TX_MAX_PAYLOAD_LEN, &og_limit, + sizeof(og_limit)); + ASSERT_EQ(ret, 0); + + memset(tx, 0, sizeof(tx)); + ASSERT_EQ(send(cfd, tx, tx_partial, MSG_MORE), tx_partial); + + /* + * Changing the payload limit with a pending open record should + * not be allowed. + */ + ret = setsockopt(cfd, SOL_TLS, TLS_TX_MAX_PAYLOAD_LEN, &limit, + sizeof(limit)); + ASSERT_EQ(ret, -1); + ASSERT_EQ(errno, EBUSY); + + ASSERT_EQ(send(cfd, tx + tx_partial, sizeof(tx) - tx_partial, MSG_EOR), + sizeof(tx) - tx_partial); + close(cfd); + + ret = recv(fd, rx, sizeof(rx), 0); + + /* + * 16B tag + 8B IV -- record header (5B) is not counted but we'll + * need it to walk the record stream + */ + overhead = 16 + 8; + total_plaintext_rx = parse_tls_records(_metadata, rx, ret, overhead, + og_limit); + ASSERT_EQ(total_plaintext_rx, sizeof(tx)); + close(fd); +} + TEST(non_established) { struct tls12_crypto_info_aes_gcm_256 tls12; struct sockaddr_in addr; diff --git a/tools/testing/selftests/net/toeplitz.sh b/tools/testing/selftests/net/toeplitz.sh deleted file mode 100755 index 8ff172f7bb1b..000000000000 --- a/tools/testing/selftests/net/toeplitz.sh +++ /dev/null @@ -1,199 +0,0 @@ -#!/bin/bash -# SPDX-License-Identifier: GPL-2.0 -# -# extended toeplitz test: test rxhash plus, optionally, either (1) rss mapping -# from rxhash to rx queue ('-rss') or (2) rps mapping from rxhash to cpu -# ('-rps <rps_map>') -# -# irq-pattern-prefix can be derived from /sys/kernel/irq/*/action, -# which is a driver-specific encoding. -# -# invoke as ./toeplitz.sh (-i <iface>) -u|-t -4|-6 \ -# [(-rss -irq_prefix <irq-pattern-prefix>)|(-rps <rps_map>)] - -source setup_loopback.sh -readonly SERVER_IP4="192.168.1.200/24" -readonly SERVER_IP6="fda8::1/64" -readonly SERVER_MAC="aa:00:00:00:00:02" - -readonly CLIENT_IP4="192.168.1.100/24" -readonly CLIENT_IP6="fda8::2/64" -readonly CLIENT_MAC="aa:00:00:00:00:01" - -PORT=8000 -KEY="$(</proc/sys/net/core/netdev_rss_key)" -TEST_RSS=false -RPS_MAP="" -PROTO_FLAG="" -IP_FLAG="" -DEV="eth0" - -# Return the number of rxqs among which RSS is configured to spread packets. -# This is determined by reading the RSS indirection table using ethtool. -get_rss_cfg_num_rxqs() { - echo $(ethtool -x "${DEV}" | - grep -E [[:space:]]+[0-9]+:[[:space:]]+ | - cut -d: -f2- | - awk '{$1=$1};1' | - tr ' ' '\n' | - sort -u | - wc -l) -} - -# Return a list of the receive irq handler cpus. -# The list is ordered by the irqs, so first rxq-0 cpu, then rxq-1 cpu, etc. -# Reads /sys/kernel/irq/ in order, so algorithm depends on -# irq_{rxq-0} < irq_{rxq-1}, etc. -get_rx_irq_cpus() { - CPUS="" - # sort so that irq 2 is read before irq 10 - SORTED_IRQS=$(for i in /sys/kernel/irq/*; do echo $i; done | sort -V) - # Consider only as many queues as RSS actually uses. We assume that - # if RSS_CFG_NUM_RXQS=N, then RSS uses rxqs 0-(N-1). - RSS_CFG_NUM_RXQS=$(get_rss_cfg_num_rxqs) - RXQ_COUNT=0 - - for i in ${SORTED_IRQS} - do - [[ "${RXQ_COUNT}" -lt "${RSS_CFG_NUM_RXQS}" ]] || break - # lookup relevant IRQs by action name - [[ -e "$i/actions" ]] || continue - cat "$i/actions" | grep -q "${IRQ_PATTERN}" || continue - irqname=$(<"$i/actions") - - # does the IRQ get called - irqcount=$(cat "$i/per_cpu_count" | tr -d '0,') - [[ -n "${irqcount}" ]] || continue - - # lookup CPU - irq=$(basename "$i") - cpu=$(cat "/proc/irq/$irq/smp_affinity_list") - - if [[ -z "${CPUS}" ]]; then - CPUS="${cpu}" - else - CPUS="${CPUS},${cpu}" - fi - RXQ_COUNT=$((RXQ_COUNT+1)) - done - - echo "${CPUS}" -} - -get_disable_rfs_cmd() { - echo "echo 0 > /proc/sys/net/core/rps_sock_flow_entries;" -} - -get_set_rps_bitmaps_cmd() { - CMD="" - for i in /sys/class/net/${DEV}/queues/rx-*/rps_cpus - do - CMD="${CMD} echo $1 > ${i};" - done - - echo "${CMD}" -} - -get_disable_rps_cmd() { - echo "$(get_set_rps_bitmaps_cmd 0)" -} - -die() { - echo "$1" - exit 1 -} - -check_nic_rxhash_enabled() { - local -r pattern="receive-hashing:\ on" - - ethtool -k "${DEV}" | grep -q "${pattern}" || die "rxhash must be enabled" -} - -parse_opts() { - local prog=$0 - shift 1 - - while [[ "$1" =~ "-" ]]; do - if [[ "$1" = "-irq_prefix" ]]; then - shift - IRQ_PATTERN="^$1-[0-9]*$" - elif [[ "$1" = "-u" || "$1" = "-t" ]]; then - PROTO_FLAG="$1" - elif [[ "$1" = "-4" ]]; then - IP_FLAG="$1" - SERVER_IP="${SERVER_IP4}" - CLIENT_IP="${CLIENT_IP4}" - elif [[ "$1" = "-6" ]]; then - IP_FLAG="$1" - SERVER_IP="${SERVER_IP6}" - CLIENT_IP="${CLIENT_IP6}" - elif [[ "$1" = "-rss" ]]; then - TEST_RSS=true - elif [[ "$1" = "-rps" ]]; then - shift - RPS_MAP="$1" - elif [[ "$1" = "-i" ]]; then - shift - DEV="$1" - else - die "Usage: ${prog} (-i <iface>) -u|-t -4|-6 \ - [(-rss -irq_prefix <irq-pattern-prefix>)|(-rps <rps_map>)]" - fi - shift - done -} - -setup() { - setup_loopback_environment "${DEV}" - - # Set up server_ns namespace and client_ns namespace - setup_macvlan_ns "${DEV}" $server_ns server \ - "${SERVER_MAC}" "${SERVER_IP}" - setup_macvlan_ns "${DEV}" $client_ns client \ - "${CLIENT_MAC}" "${CLIENT_IP}" -} - -cleanup() { - cleanup_macvlan_ns $server_ns server $client_ns client - cleanup_loopback "${DEV}" -} - -parse_opts $0 $@ - -setup -trap cleanup EXIT - -check_nic_rxhash_enabled - -# Actual test starts here -if [[ "${TEST_RSS}" = true ]]; then - # RPS/RFS must be disabled because they move packets between cpus, - # which breaks the PACKET_FANOUT_CPU identification of RSS decisions. - eval "$(get_disable_rfs_cmd) $(get_disable_rps_cmd)" \ - ip netns exec $server_ns ./toeplitz "${IP_FLAG}" "${PROTO_FLAG}" \ - -d "${PORT}" -i "${DEV}" -k "${KEY}" -T 1000 \ - -C "$(get_rx_irq_cpus)" -s -v & -elif [[ ! -z "${RPS_MAP}" ]]; then - eval "$(get_disable_rfs_cmd) $(get_set_rps_bitmaps_cmd ${RPS_MAP})" \ - ip netns exec $server_ns ./toeplitz "${IP_FLAG}" "${PROTO_FLAG}" \ - -d "${PORT}" -i "${DEV}" -k "${KEY}" -T 1000 \ - -r "0x${RPS_MAP}" -s -v & -else - ip netns exec $server_ns ./toeplitz "${IP_FLAG}" "${PROTO_FLAG}" \ - -d "${PORT}" -i "${DEV}" -k "${KEY}" -T 1000 -s -v & -fi - -server_pid=$! - -ip netns exec $client_ns ./toeplitz_client.sh "${PROTO_FLAG}" \ - "${IP_FLAG}" "${SERVER_IP%%/*}" "${PORT}" & - -client_pid=$! - -wait "${server_pid}" -exit_code=$? -kill -9 "${client_pid}" -if [[ "${exit_code}" -eq 0 ]]; then - echo "Test Succeeded!" -fi -exit "${exit_code}" diff --git a/tools/testing/selftests/net/toeplitz_client.sh b/tools/testing/selftests/net/toeplitz_client.sh deleted file mode 100755 index 2fef34f4aba1..000000000000 --- a/tools/testing/selftests/net/toeplitz_client.sh +++ /dev/null @@ -1,28 +0,0 @@ -#!/bin/bash -# SPDX-License-Identifier: GPL-2.0 -# -# A simple program for generating traffic for the toeplitz test. -# -# This program sends packets periodically for, conservatively, 20 seconds. The -# intent is for the calling program to kill this program once it is no longer -# needed, rather than waiting for the 20 second expiration. - -send_traffic() { - expiration=$((SECONDS+20)) - while [[ "${SECONDS}" -lt "${expiration}" ]] - do - if [[ "${PROTO}" == "-u" ]]; then - echo "msg $i" | nc "${IPVER}" -u -w 0 "${ADDR}" "${PORT}" - else - echo "msg $i" | nc "${IPVER}" -w 0 "${ADDR}" "${PORT}" - fi - sleep 0.001 - done -} - -PROTO=$1 -IPVER=$2 -ADDR=$3 -PORT=$4 - -send_traffic diff --git a/tools/testing/selftests/net/traceroute.sh b/tools/testing/selftests/net/traceroute.sh index dbb34c7e09ce..a7c6ab8a0347 100755 --- a/tools/testing/selftests/net/traceroute.sh +++ b/tools/testing/selftests/net/traceroute.sh @@ -36,6 +36,35 @@ run_cmd() return $rc } +__check_traceroute_version() +{ + local cmd=$1; shift + local req_ver=$1; shift + local ver + + req_ver=$(echo "$req_ver" | sed 's/\.//g') + ver=$($cmd -V 2>&1 | grep -Eo '[0-9]+.[0-9]+.[0-9]+' | sed 's/\.//g') + if [[ $ver -lt $req_ver ]]; then + return 1 + else + return 0 + fi +} + +check_traceroute6_version() +{ + local req_ver=$1; shift + + __check_traceroute_version traceroute6 "$req_ver" +} + +check_traceroute_version() +{ + local req_ver=$1; shift + + __check_traceroute_version traceroute "$req_ver" +} + ################################################################################ # create namespaces and interconnects @@ -59,6 +88,8 @@ create_ns() ip netns exec ${ns} ip -6 ro add unreachable default metric 8192 ip netns exec ${ns} sysctl -qw net.ipv4.ip_forward=1 + ip netns exec ${ns} sysctl -qw net.ipv4.icmp_ratelimit=0 + ip netns exec ${ns} sysctl -qw net.ipv6.icmp.ratelimit=0 ip netns exec ${ns} sysctl -qw net.ipv6.conf.all.keep_addr_on_down=1 ip netns exec ${ns} sysctl -qw net.ipv6.conf.all.forwarding=1 ip netns exec ${ns} sysctl -qw net.ipv6.conf.default.forwarding=1 @@ -298,6 +329,144 @@ run_traceroute6_vrf() } ################################################################################ +# traceroute6 with ICMP extensions test +# +# Verify that in this scenario +# +# ---- ---- ---- +# |H1|--------------------------|R1|--------------------------|H2| +# ---- N1 ---- N2 ---- +# +# ICMP extensions are correctly reported. The loopback interfaces on all the +# nodes are assigned global addresses and the interfaces connecting the nodes +# are assigned IPv6 link-local addresses. + +cleanup_traceroute6_ext() +{ + cleanup_all_ns +} + +setup_traceroute6_ext() +{ + # Start clean + cleanup_traceroute6_ext + + setup_ns h1 r1 h2 + create_ns "$h1" + create_ns "$r1" + create_ns "$h2" + + # Setup N1 + connect_ns "$h1" eth1 - fe80::1/64 "$r1" eth1 - fe80::2/64 + # Setup N2 + connect_ns "$r1" eth2 - fe80::3/64 "$h2" eth2 - fe80::4/64 + + # Setup H1 + ip -n "$h1" address add 2001:db8:1::1/128 dev lo + ip -n "$h1" route add ::/0 nexthop via fe80::2 dev eth1 + + # Setup R1 + ip -n "$r1" address add 2001:db8:1::2/128 dev lo + ip -n "$r1" route add 2001:db8:1::1/128 nexthop via fe80::1 dev eth1 + ip -n "$r1" route add 2001:db8:1::3/128 nexthop via fe80::4 dev eth2 + + # Setup H2 + ip -n "$h2" address add 2001:db8:1::3/128 dev lo + ip -n "$h2" route add ::/0 nexthop via fe80::3 dev eth2 + + # Prime the network + ip netns exec "$h1" ping6 -c5 2001:db8:1::3 >/dev/null 2>&1 +} + +traceroute6_ext_iio_iif_test() +{ + local r1_ifindex h2_ifindex + local pkt_len=$1; shift + + # Test that incoming interface info is not appended by default. + run_cmd "$h1" "traceroute6 -e 2001:db8:1::3 $pkt_len | grep INC" + check_fail $? "Incoming interface info appended by default when should not" + + # Test that the extension is appended when enabled. + run_cmd "$r1" "bash -c \"echo 0x01 > /proc/sys/net/ipv6/icmp/errors_extension_mask\"" + check_err $? "Failed to enable incoming interface info extension on R1" + + run_cmd "$h1" "traceroute6 -e 2001:db8:1::3 $pkt_len | grep INC" + check_err $? "Incoming interface info not appended after enable" + + # Test that the extension is not appended when disabled. + run_cmd "$r1" "bash -c \"echo 0x00 > /proc/sys/net/ipv6/icmp/errors_extension_mask\"" + check_err $? "Failed to disable incoming interface info extension on R1" + + run_cmd "$h1" "traceroute6 -e 2001:db8:1::3 $pkt_len | grep INC" + check_fail $? "Incoming interface info appended after disable" + + # Test that the extension is sent correctly from both R1 and H2. + run_cmd "$r1" "sysctl -w net.ipv6.icmp.errors_extension_mask=0x01" + r1_ifindex=$(ip -n "$r1" -j link show dev eth1 | jq '.[]["ifindex"]') + run_cmd "$h1" "traceroute6 -e 2001:db8:1::3 $pkt_len | grep '<INC:$r1_ifindex,\"eth1\",mtu=1500>'" + check_err $? "Wrong incoming interface info reported from R1" + + run_cmd "$h2" "sysctl -w net.ipv6.icmp.errors_extension_mask=0x01" + h2_ifindex=$(ip -n "$h2" -j link show dev eth2 | jq '.[]["ifindex"]') + run_cmd "$h1" "traceroute6 -e 2001:db8:1::3 $pkt_len | grep '<INC:$h2_ifindex,\"eth2\",mtu=1500>'" + check_err $? "Wrong incoming interface info reported from H2" + + # Add a global address on the incoming interface of R1 and check that + # it is reported. + run_cmd "$r1" "ip address add 2001:db8:100::1/64 dev eth1 nodad" + run_cmd "$h1" "traceroute6 -e 2001:db8:1::3 $pkt_len | grep '<INC:$r1_ifindex,2001:db8:100::1,\"eth1\",mtu=1500>'" + check_err $? "Wrong incoming interface info reported from R1 after address addition" + run_cmd "$r1" "ip address del 2001:db8:100::1/64 dev eth1" + + # Change name and MTU and make sure the result is still correct. + run_cmd "$r1" "ip link set dev eth1 name eth1tag mtu 1501" + run_cmd "$h1" "traceroute6 -e 2001:db8:1::3 $pkt_len | grep '<INC:$r1_ifindex,\"eth1tag\",mtu=1501>'" + check_err $? "Wrong incoming interface info reported from R1 after name and MTU change" + run_cmd "$r1" "ip link set dev eth1tag name eth1 mtu 1500" + + run_cmd "$r1" "sysctl -w net.ipv6.icmp.errors_extension_mask=0x00" + run_cmd "$h2" "sysctl -w net.ipv6.icmp.errors_extension_mask=0x00" +} + +run_traceroute6_ext() +{ + # Need at least version 2.1.5 for RFC 5837 support. + if ! check_traceroute6_version 2.1.5; then + log_test_skip "traceroute6 too old, missing ICMP extensions support" + return + fi + + setup_traceroute6_ext + + RET=0 + + ## General ICMP extensions tests + + # Test that ICMP extensions are disabled by default. + run_cmd "$h1" "sysctl net.ipv6.icmp.errors_extension_mask | grep \"= 0$\"" + check_err $? "ICMP extensions are not disabled by default" + + # Test that unsupported values are rejected. Do not use "sysctl" as + # older versions do not return an error code upon failure. + run_cmd "$h1" "bash -c \"echo 0x80 > /proc/sys/net/ipv6/icmp/errors_extension_mask\"" + check_fail $? "Unsupported sysctl value was not rejected" + + ## Extension-specific tests + + # Incoming interface info test. Test with various packet sizes, + # including the default one. + traceroute6_ext_iio_iif_test + traceroute6_ext_iio_iif_test 127 + traceroute6_ext_iio_iif_test 128 + traceroute6_ext_iio_iif_test 129 + + log_test "IPv6 traceroute with ICMP extensions" + + cleanup_traceroute6_ext +} + +################################################################################ # traceroute test # # Verify that traceroute from H1 to H2 shows 1.0.3.1 and 1.0.1.1 when @@ -438,14 +607,157 @@ run_traceroute_vrf() } ################################################################################ +# traceroute with ICMP extensions test +# +# Verify that in this scenario +# +# ---- ---- ---- +# |H1|--------------------------|R1|--------------------------|H2| +# ---- N1 ---- N2 ---- +# +# ICMP extensions are correctly reported. The loopback interfaces on all the +# nodes are assigned global addresses and the interfaces connecting the nodes +# are assigned IPv6 link-local addresses. + +cleanup_traceroute_ext() +{ + cleanup_all_ns +} + +setup_traceroute_ext() +{ + # Start clean + cleanup_traceroute_ext + + setup_ns h1 r1 h2 + create_ns "$h1" + create_ns "$r1" + create_ns "$h2" + + # Setup N1 + connect_ns "$h1" eth1 - fe80::1/64 "$r1" eth1 - fe80::2/64 + # Setup N2 + connect_ns "$r1" eth2 - fe80::3/64 "$h2" eth2 - fe80::4/64 + + # Setup H1 + ip -n "$h1" address add 192.0.2.1/32 dev lo + ip -n "$h1" route add 0.0.0.0/0 nexthop via inet6 fe80::2 dev eth1 + + # Setup R1 + ip -n "$r1" address add 192.0.2.2/32 dev lo + ip -n "$r1" route add 192.0.2.1/32 nexthop via inet6 fe80::1 dev eth1 + ip -n "$r1" route add 192.0.2.3/32 nexthop via inet6 fe80::4 dev eth2 + + # Setup H2 + ip -n "$h2" address add 192.0.2.3/32 dev lo + ip -n "$h2" route add 0.0.0.0/0 nexthop via inet6 fe80::3 dev eth2 + + # Prime the network + ip netns exec "$h1" ping -c5 192.0.2.3 >/dev/null 2>&1 +} + +traceroute_ext_iio_iif_test() +{ + local r1_ifindex h2_ifindex + local pkt_len=$1; shift + + # Test that incoming interface info is not appended by default. + run_cmd "$h1" "traceroute -e 192.0.2.3 $pkt_len | grep INC" + check_fail $? "Incoming interface info appended by default when should not" + + # Test that the extension is appended when enabled. + run_cmd "$r1" "bash -c \"echo 0x01 > /proc/sys/net/ipv4/icmp_errors_extension_mask\"" + check_err $? "Failed to enable incoming interface info extension on R1" + + run_cmd "$h1" "traceroute -e 192.0.2.3 $pkt_len | grep INC" + check_err $? "Incoming interface info not appended after enable" + + # Test that the extension is not appended when disabled. + run_cmd "$r1" "bash -c \"echo 0x00 > /proc/sys/net/ipv4/icmp_errors_extension_mask\"" + check_err $? "Failed to disable incoming interface info extension on R1" + + run_cmd "$h1" "traceroute -e 192.0.2.3 $pkt_len | grep INC" + check_fail $? "Incoming interface info appended after disable" + + # Test that the extension is sent correctly from both R1 and H2. + run_cmd "$r1" "sysctl -w net.ipv4.icmp_errors_extension_mask=0x01" + r1_ifindex=$(ip -n "$r1" -j link show dev eth1 | jq '.[]["ifindex"]') + run_cmd "$h1" "traceroute -e 192.0.2.3 $pkt_len | grep '<INC:$r1_ifindex,\"eth1\",mtu=1500>'" + check_err $? "Wrong incoming interface info reported from R1" + + run_cmd "$h2" "sysctl -w net.ipv4.icmp_errors_extension_mask=0x01" + h2_ifindex=$(ip -n "$h2" -j link show dev eth2 | jq '.[]["ifindex"]') + run_cmd "$h1" "traceroute -e 192.0.2.3 $pkt_len | grep '<INC:$h2_ifindex,\"eth2\",mtu=1500>'" + check_err $? "Wrong incoming interface info reported from H2" + + # Add a global address on the incoming interface of R1 and check that + # it is reported. + run_cmd "$r1" "ip address add 198.51.100.1/24 dev eth1" + run_cmd "$h1" "traceroute -e 192.0.2.3 $pkt_len | grep '<INC:$r1_ifindex,198.51.100.1,\"eth1\",mtu=1500>'" + check_err $? "Wrong incoming interface info reported from R1 after address addition" + run_cmd "$r1" "ip address del 198.51.100.1/24 dev eth1" + + # Change name and MTU and make sure the result is still correct. + # Re-add the route towards H1 since it was deleted when we removed the + # last IPv4 address from eth1 on R1. + run_cmd "$r1" "ip route add 192.0.2.1/32 nexthop via inet6 fe80::1 dev eth1" + run_cmd "$r1" "ip link set dev eth1 name eth1tag mtu 1501" + run_cmd "$h1" "traceroute -e 192.0.2.3 $pkt_len | grep '<INC:$r1_ifindex,\"eth1tag\",mtu=1501>'" + check_err $? "Wrong incoming interface info reported from R1 after name and MTU change" + run_cmd "$r1" "ip link set dev eth1tag name eth1 mtu 1500" + + run_cmd "$r1" "sysctl -w net.ipv4.icmp_errors_extension_mask=0x00" + run_cmd "$h2" "sysctl -w net.ipv4.icmp_errors_extension_mask=0x00" +} + +run_traceroute_ext() +{ + # Need at least version 2.1.5 for RFC 5837 support. + if ! check_traceroute_version 2.1.5; then + log_test_skip "traceroute too old, missing ICMP extensions support" + return + fi + + setup_traceroute_ext + + RET=0 + + ## General ICMP extensions tests + + # Test that ICMP extensions are disabled by default. + run_cmd "$h1" "sysctl net.ipv4.icmp_errors_extension_mask | grep \"= 0$\"" + check_err $? "ICMP extensions are not disabled by default" + + # Test that unsupported values are rejected. Do not use "sysctl" as + # older versions do not return an error code upon failure. + run_cmd "$h1" "bash -c \"echo 0x80 > /proc/sys/net/ipv4/icmp_errors_extension_mask\"" + check_fail $? "Unsupported sysctl value was not rejected" + + ## Extension-specific tests + + # Incoming interface info test. Test with various packet sizes, + # including the default one. + traceroute_ext_iio_iif_test + traceroute_ext_iio_iif_test 127 + traceroute_ext_iio_iif_test 128 + traceroute_ext_iio_iif_test 129 + + log_test "IPv4 traceroute with ICMP extensions" + + cleanup_traceroute_ext +} + +################################################################################ # Run tests run_tests() { run_traceroute6 run_traceroute6_vrf + run_traceroute6_ext run_traceroute run_traceroute_vrf + run_traceroute_ext } ################################################################################ @@ -462,6 +774,7 @@ done require_command traceroute6 require_command traceroute +require_command jq run_tests diff --git a/tools/testing/selftests/net/txtimestamp.c b/tools/testing/selftests/net/txtimestamp.c index dae91eb97d69..bcc14688661d 100644 --- a/tools/testing/selftests/net/txtimestamp.c +++ b/tools/testing/selftests/net/txtimestamp.c @@ -217,7 +217,7 @@ static void print_timestamp_usr(void) static void print_timestamp(struct scm_timestamping *tss, int tstype, int tskey, int payload_len) { - const char *tsname; + const char *tsname = NULL; validate_key(tskey, tstype); diff --git a/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json b/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json index 0091bcd91c2c..47de27fd4f90 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json +++ b/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json @@ -1005,5 +1005,33 @@ "teardown": [ "$TC qdisc del dev $DUMMY clsact" ] + }, + { + "id": "4366", + "name": "CAKE with QFQ Parent - CAKE enqueue with packets dropping", + "category": [ + "qdisc", + "cake", + "netem" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup":[ + "$TC qdisc add dev $DUMMY handle 1: root qfq", + "$TC class add dev $DUMMY parent 1: classid 1:1 qfq maxpkt 1024", + "$TC qdisc add dev $DUMMY parent 1:1 handle 2: cake memlimit 9", + "$TC filter add dev $DUMMY protocol ip parent 1: prio 1 u32 match ip protocol 1 0xff flowid 1:1", + "ping -I$DUMMY -f -c1 -s64 -W1 10.10.10.1 || true", + "$TC qdisc replace dev $DUMMY parent 1:1 handle 3: netem delay 0ms" + ], + "cmdUnderTest": "ping -I$DUMMY -f -c1 -s64 -W1 10.10.10.1 || true", + "expExitCode": "0", + "verifyCmd": "$TC -s qdisc show dev $DUMMY", + "matchPattern": "qdisc qfq 1:", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1: root" + ] } ] diff --git a/tools/testing/selftests/ublk/kublk.c b/tools/testing/selftests/ublk/kublk.c index 6b8123c12a7a..f8fa102a627f 100644 --- a/tools/testing/selftests/ublk/kublk.c +++ b/tools/testing/selftests/ublk/kublk.c @@ -836,56 +836,70 @@ static int ublk_process_io(struct ublk_thread *t) return reapped; } -static void ublk_thread_set_sched_affinity(const struct ublk_thread *t, - cpu_set_t *cpuset) -{ - if (sched_setaffinity(0, sizeof(*cpuset), cpuset) < 0) - ublk_err("ublk dev %u thread %u set affinity failed", - t->dev->dev_info.dev_id, t->idx); -} - struct ublk_thread_info { struct ublk_dev *dev; + pthread_t thread; unsigned idx; sem_t *ready; cpu_set_t *affinity; unsigned long long extra_flags; }; -static void *ublk_io_handler_fn(void *data) +static void ublk_thread_set_sched_affinity(const struct ublk_thread_info *info) { - struct ublk_thread_info *info = data; - struct ublk_thread *t = &info->dev->threads[info->idx]; + if (pthread_setaffinity_np(pthread_self(), sizeof(*info->affinity), info->affinity) < 0) + ublk_err("ublk dev %u thread %u set affinity failed", + info->dev->dev_info.dev_id, info->idx); +} + +static __attribute__((noinline)) int __ublk_io_handler_fn(struct ublk_thread_info *info) +{ + struct ublk_thread t = { + .dev = info->dev, + .idx = info->idx, + }; int dev_id = info->dev->dev_info.dev_id; int ret; - t->dev = info->dev; - t->idx = info->idx; - - ret = ublk_thread_init(t, info->extra_flags); + ret = ublk_thread_init(&t, info->extra_flags); if (ret) { ublk_err("ublk dev %d thread %u init failed\n", - dev_id, t->idx); - return NULL; + dev_id, t.idx); + return ret; } - /* IO perf is sensitive with queue pthread affinity on NUMA machine*/ - if (info->affinity) - ublk_thread_set_sched_affinity(t, info->affinity); sem_post(info->ready); ublk_dbg(UBLK_DBG_THREAD, "tid %d: ublk dev %d thread %u started\n", - gettid(), dev_id, t->idx); + gettid(), dev_id, t.idx); /* submit all io commands to ublk driver */ - ublk_submit_fetch_commands(t); + ublk_submit_fetch_commands(&t); do { - if (ublk_process_io(t) < 0) + if (ublk_process_io(&t) < 0) break; } while (1); ublk_dbg(UBLK_DBG_THREAD, "tid %d: ublk dev %d thread %d exiting\n", - gettid(), dev_id, t->idx); - ublk_thread_deinit(t); + gettid(), dev_id, t.idx); + ublk_thread_deinit(&t); + return 0; +} + +static void *ublk_io_handler_fn(void *data) +{ + struct ublk_thread_info *info = data; + + /* + * IO perf is sensitive with queue pthread affinity on NUMA machine + * + * Set sched_affinity at beginning, so following allocated memory/pages + * could be CPU/NUMA aware. + */ + if (info->affinity) + ublk_thread_set_sched_affinity(info); + + __ublk_io_handler_fn(info); + return NULL; } @@ -983,14 +997,13 @@ static int ublk_start_daemon(const struct dev_ctx *ctx, struct ublk_dev *dev) */ if (dev->nthreads == dinfo->nr_hw_queues) tinfo[i].affinity = &affinity_buf[i]; - pthread_create(&dev->threads[i].thread, NULL, + pthread_create(&tinfo[i].thread, NULL, ublk_io_handler_fn, &tinfo[i]); } for (i = 0; i < dev->nthreads; i++) sem_wait(&ready); - free(tinfo); free(affinity_buf); /* everything is fine now, start us */ @@ -1013,7 +1026,8 @@ static int ublk_start_daemon(const struct dev_ctx *ctx, struct ublk_dev *dev) /* wait until we are terminated */ for (i = 0; i < dev->nthreads; i++) - pthread_join(dev->threads[i].thread, &thread_ret); + pthread_join(tinfo[i].thread, &thread_ret); + free(tinfo); fail: for (i = 0; i < dinfo->nr_hw_queues; i++) ublk_queue_deinit(&dev->q[i]); diff --git a/tools/testing/selftests/ublk/kublk.h b/tools/testing/selftests/ublk/kublk.h index 5e55484fb0aa..fe42705c6d42 100644 --- a/tools/testing/selftests/ublk/kublk.h +++ b/tools/testing/selftests/ublk/kublk.h @@ -175,23 +175,20 @@ struct ublk_queue { struct ublk_thread { struct ublk_dev *dev; - struct io_uring ring; - unsigned int cmd_inflight; - unsigned int io_inflight; - - pthread_t thread; unsigned idx; #define UBLKS_T_STOPPING (1U << 0) #define UBLKS_T_IDLE (1U << 1) unsigned state; + unsigned int cmd_inflight; + unsigned int io_inflight; + struct io_uring ring; }; struct ublk_dev { struct ublk_tgt tgt; struct ublksrv_ctrl_dev_info dev_info; struct ublk_queue q[UBLK_MAX_QUEUES]; - struct ublk_thread threads[UBLK_MAX_THREADS]; unsigned nthreads; unsigned per_io_tasks; diff --git a/tools/testing/selftests/vsock/vmtest.sh b/tools/testing/selftests/vsock/vmtest.sh index 8ceeb8a7894f..c7b270dd77a9 100755 --- a/tools/testing/selftests/vsock/vmtest.sh +++ b/tools/testing/selftests/vsock/vmtest.sh @@ -7,6 +7,8 @@ # * virtme-ng # * busybox-static (used by virtme-ng) # * qemu (used by virtme-ng) +# +# shellcheck disable=SC2317,SC2119 readonly SCRIPT_DIR="$(cd -P -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd -P)" readonly KERNEL_CHECKOUT=$(realpath "${SCRIPT_DIR}"/../../../../) @@ -22,8 +24,9 @@ readonly SSH_HOST_PORT=2222 readonly VSOCK_CID=1234 readonly WAIT_PERIOD=3 readonly WAIT_PERIOD_MAX=60 -readonly WAIT_TOTAL=$(( WAIT_PERIOD * WAIT_PERIOD_MAX )) -readonly QEMU_PIDFILE=$(mktemp /tmp/qemu_vsock_vmtest_XXXX.pid) +readonly WAIT_QEMU=5 +readonly PIDFILE_TEMPLATE=/tmp/vsock_vmtest_XXXX.pid +declare -A PIDFILES # virtme-ng offers a netdev for ssh when using "--ssh", but we also need a # control port forwarded for vsock_test. Because virtme-ng doesn't support @@ -33,12 +36,6 @@ readonly QEMU_PIDFILE=$(mktemp /tmp/qemu_vsock_vmtest_XXXX.pid) # add the kernel cmdline options that virtme-init uses to setup the interface. readonly QEMU_TEST_PORT_FWD="hostfwd=tcp::${TEST_HOST_PORT}-:${TEST_GUEST_PORT}" readonly QEMU_SSH_PORT_FWD="hostfwd=tcp::${SSH_HOST_PORT}-:${SSH_GUEST_PORT}" -readonly QEMU_OPTS="\ - -netdev user,id=n0,${QEMU_TEST_PORT_FWD},${QEMU_SSH_PORT_FWD} \ - -device virtio-net-pci,netdev=n0 \ - -device vhost-vsock-pci,guest-cid=${VSOCK_CID} \ - --pidfile ${QEMU_PIDFILE} \ -" readonly KERNEL_CMDLINE="\ virtme.dhcp net.ifnames=0 biosdevname=0 \ virtme.ssh virtme_ssh_channel=tcp virtme_ssh_user=$USER \ @@ -51,6 +48,8 @@ readonly TEST_DESCS=( "Run vsock_test using the loopback transport in the VM." ) +readonly USE_SHARED_VM=(vm_server_host_client vm_client_host_server vm_loopback) + VERBOSE=0 usage() { @@ -84,21 +83,33 @@ die() { exit "${KSFT_FAIL}" } +check_result() { + local rc arg + + rc=$1 + arg=$2 + + cnt_total=$(( cnt_total + 1 )) + + if [[ ${rc} -eq ${KSFT_PASS} ]]; then + cnt_pass=$(( cnt_pass + 1 )) + echo "ok ${cnt_total} ${arg}" + elif [[ ${rc} -eq ${KSFT_SKIP} ]]; then + cnt_skip=$(( cnt_skip + 1 )) + echo "ok ${cnt_total} ${arg} # SKIP" + elif [[ ${rc} -eq ${KSFT_FAIL} ]]; then + cnt_fail=$(( cnt_fail + 1 )) + echo "not ok ${cnt_total} ${arg} # exit=${rc}" + fi +} + vm_ssh() { ssh -q -o UserKnownHostsFile=/dev/null -p ${SSH_HOST_PORT} localhost "$@" return $? } cleanup() { - if [[ -s "${QEMU_PIDFILE}" ]]; then - pkill -SIGTERM -F "${QEMU_PIDFILE}" > /dev/null 2>&1 - fi - - # If failure occurred during or before qemu start up, then we need - # to clean this up ourselves. - if [[ -e "${QEMU_PIDFILE}" ]]; then - rm "${QEMU_PIDFILE}" - fi + terminate_pidfiles "${!PIDFILES[@]}" } check_args() { @@ -147,7 +158,7 @@ check_vng() { local version local ok - tested_versions=("1.33" "1.36") + tested_versions=("1.33" "1.36" "1.37") version="$(vng --version)" ok=0 @@ -188,10 +199,37 @@ handle_build() { popd &>/dev/null } +create_pidfile() { + local pidfile + + pidfile=$(mktemp "${PIDFILE_TEMPLATE}") + PIDFILES["${pidfile}"]=1 + + echo "${pidfile}" +} + +terminate_pidfiles() { + local pidfile + + for pidfile in "$@"; do + if [[ -s "${pidfile}" ]]; then + pkill -SIGTERM -F "${pidfile}" > /dev/null 2>&1 + fi + + if [[ -e "${pidfile}" ]]; then + rm -f "${pidfile}" + fi + + unset "PIDFILES[${pidfile}]" + done +} + vm_start() { + local pidfile=$1 local logfile=/dev/null local verbose_opt="" local kernel_opt="" + local qemu_opts="" local qemu qemu=$(command -v "${QEMU}") @@ -201,6 +239,13 @@ vm_start() { logfile=/dev/stdout fi + qemu_opts="\ + -netdev user,id=n0,${QEMU_TEST_PORT_FWD},${QEMU_SSH_PORT_FWD} \ + -device virtio-net-pci,netdev=n0 \ + -device vhost-vsock-pci,guest-cid=${VSOCK_CID} \ + --pidfile ${pidfile} + " + if [[ "${BUILD}" -eq 1 ]]; then kernel_opt="${KERNEL_CHECKOUT}" fi @@ -209,16 +254,14 @@ vm_start() { --run \ ${kernel_opt} \ ${verbose_opt} \ - --qemu-opts="${QEMU_OPTS}" \ + --qemu-opts="${qemu_opts}" \ --qemu="${qemu}" \ --user root \ --append "${KERNEL_CMDLINE}" \ --rw &> ${logfile} & - if ! timeout ${WAIT_TOTAL} \ - bash -c 'while [[ ! -s '"${QEMU_PIDFILE}"' ]]; do sleep 1; done; exit 0'; then - die "failed to boot VM" - fi + timeout "${WAIT_QEMU}" \ + bash -c 'while [[ ! -s '"${pidfile}"' ]]; do sleep 1; done; exit 0' } vm_wait_for_ssh() { @@ -251,9 +294,11 @@ wait_for_listener() # for tcp protocol additionally check the socket state [ "${protocol}" = "tcp" ] && pattern="${pattern}0A" + for i in $(seq "${max_intervals}"); do - if awk '{print $2" "$4}' /proc/net/"${protocol}"* | \ - grep -q "${pattern}"; then + if awk -v pattern="${pattern}" \ + 'BEGIN {rc=1} $2" "$4 ~ pattern {rc=0} END {exit rc}' \ + /proc/net/"${protocol}"*; then break fi sleep "${interval}" @@ -270,113 +315,196 @@ EOF } host_wait_for_listener() { - wait_for_listener "${TEST_HOST_PORT_LISTENER}" "${WAIT_PERIOD}" "${WAIT_PERIOD_MAX}" + local port=$1 + + wait_for_listener "${port}" "${WAIT_PERIOD}" "${WAIT_PERIOD_MAX}" } -__log_stdin() { - cat | awk '{ printf "%s:\t%s\n","'"${prefix}"'", $0 }' +vm_vsock_test() { + local host=$1 + local cid=$2 + local port=$3 + local rc + + # log output and use pipefail to respect vsock_test errors + set -o pipefail + if [[ "${host}" != server ]]; then + vm_ssh -- "${VSOCK_TEST}" \ + --mode=client \ + --control-host="${host}" \ + --peer-cid="${cid}" \ + --control-port="${port}" \ + 2>&1 | log_guest + rc=$? + else + vm_ssh -- "${VSOCK_TEST}" \ + --mode=server \ + --peer-cid="${cid}" \ + --control-port="${port}" \ + 2>&1 | log_guest & + rc=$? + + if [[ $rc -ne 0 ]]; then + set +o pipefail + return $rc + fi + + vm_wait_for_listener "${port}" + rc=$? + fi + set +o pipefail + + return $rc } -__log_args() { - echo "$*" | awk '{ printf "%s:\t%s\n","'"${prefix}"'", $0 }' +host_vsock_test() { + local host=$1 + local cid=$2 + local port=$3 + local rc + + # log output and use pipefail to respect vsock_test errors + set -o pipefail + if [[ "${host}" != server ]]; then + ${VSOCK_TEST} \ + --mode=client \ + --peer-cid="${cid}" \ + --control-host="${host}" \ + --control-port="${port}" 2>&1 | log_host + rc=$? + else + ${VSOCK_TEST} \ + --mode=server \ + --peer-cid="${cid}" \ + --control-port="${port}" 2>&1 | log_host & + rc=$? + + if [[ $rc -ne 0 ]]; then + set +o pipefail + return $rc + fi + + host_wait_for_listener "${port}" + rc=$? + fi + set +o pipefail + + return $rc } log() { - local prefix="$1" + local redirect + local prefix - shift - local redirect= if [[ ${VERBOSE} -eq 0 ]]; then redirect=/dev/null else redirect=/dev/stdout fi + prefix="${LOG_PREFIX:-}" + if [[ "$#" -eq 0 ]]; then - __log_stdin | tee -a "${LOG}" > ${redirect} + if [[ -n "${prefix}" ]]; then + awk -v prefix="${prefix}" '{printf "%s: %s\n", prefix, $0}' + else + cat + fi else - __log_args "$@" | tee -a "${LOG}" > ${redirect} - fi -} - -log_setup() { - log "setup" "$@" + if [[ -n "${prefix}" ]]; then + echo "${prefix}: " "$@" + else + echo "$@" + fi + fi | tee -a "${LOG}" > "${redirect}" } log_host() { - local testname=$1 - - shift - log "test:${testname}:host" "$@" + LOG_PREFIX=host log "$@" } log_guest() { - local testname=$1 - - shift - log "test:${testname}:guest" "$@" + LOG_PREFIX=guest log "$@" } test_vm_server_host_client() { - local testname="${FUNCNAME[0]#test_}" + if ! vm_vsock_test "server" 2 "${TEST_GUEST_PORT}"; then + return "${KSFT_FAIL}" + fi - vm_ssh -- "${VSOCK_TEST}" \ - --mode=server \ - --control-port="${TEST_GUEST_PORT}" \ - --peer-cid=2 \ - 2>&1 | log_guest "${testname}" & + if ! host_vsock_test "127.0.0.1" "${VSOCK_CID}" "${TEST_HOST_PORT}"; then + return "${KSFT_FAIL}" + fi - vm_wait_for_listener "${TEST_GUEST_PORT}" + return "${KSFT_PASS}" +} - ${VSOCK_TEST} \ - --mode=client \ - --control-host=127.0.0.1 \ - --peer-cid="${VSOCK_CID}" \ - --control-port="${TEST_HOST_PORT}" 2>&1 | log_host "${testname}" +test_vm_client_host_server() { + if ! host_vsock_test "server" "${VSOCK_CID}" "${TEST_HOST_PORT_LISTENER}"; then + return "${KSFT_FAIL}" + fi - return $? + if ! vm_vsock_test "10.0.2.2" 2 "${TEST_HOST_PORT_LISTENER}"; then + return "${KSFT_FAIL}" + fi + + return "${KSFT_PASS}" } -test_vm_client_host_server() { - local testname="${FUNCNAME[0]#test_}" +test_vm_loopback() { + local port=60000 # non-forwarded local port - ${VSOCK_TEST} \ - --mode "server" \ - --control-port "${TEST_HOST_PORT_LISTENER}" \ - --peer-cid "${VSOCK_CID}" 2>&1 | log_host "${testname}" & + vm_ssh -- modprobe vsock_loopback &> /dev/null || : - host_wait_for_listener + if ! vm_vsock_test "server" 1 "${port}"; then + return "${KSFT_FAIL}" + fi - vm_ssh -- "${VSOCK_TEST}" \ - --mode=client \ - --control-host=10.0.2.2 \ - --peer-cid=2 \ - --control-port="${TEST_HOST_PORT_LISTENER}" 2>&1 | log_guest "${testname}" + if ! vm_vsock_test "127.0.0.1" 1 "${port}"; then + return "${KSFT_FAIL}" + fi - return $? + return "${KSFT_PASS}" } -test_vm_loopback() { - local testname="${FUNCNAME[0]#test_}" - local port=60000 # non-forwarded local port +shared_vm_test() { + local tname + + tname="${1}" + + for testname in "${USE_SHARED_VM[@]}"; do + if [[ "${tname}" == "${testname}" ]]; then + return 0 + fi + done - vm_ssh -- "${VSOCK_TEST}" \ - --mode=server \ - --control-port="${port}" \ - --peer-cid=1 2>&1 | log_guest "${testname}" & + return 1 +} - vm_wait_for_listener "${port}" +shared_vm_tests_requested() { + for arg in "$@"; do + if shared_vm_test "${arg}"; then + return 0 + fi + done - vm_ssh -- "${VSOCK_TEST}" \ - --mode=client \ - --control-host="127.0.0.1" \ - --control-port="${port}" \ - --peer-cid=1 2>&1 | log_guest "${testname}" + return 1 +} - return $? +run_shared_vm_tests() { + local arg + + for arg in "$@"; do + if ! shared_vm_test "${arg}"; then + continue + fi + + run_shared_vm_test "${arg}" + check_result "$?" "${arg}" + done } -run_test() { +run_shared_vm_test() { local host_oops_cnt_before local host_warn_cnt_before local vm_oops_cnt_before @@ -399,31 +527,32 @@ run_test() { host_oops_cnt_after=$(dmesg | grep -i 'Oops' | wc -l) if [[ ${host_oops_cnt_after} -gt ${host_oops_cnt_before} ]]; then - echo "FAIL: kernel oops detected on host" | log_host "${name}" + echo "FAIL: kernel oops detected on host" | log_host rc=$KSFT_FAIL fi host_warn_cnt_after=$(dmesg --level=warn | grep -c -i 'vsock') if [[ ${host_warn_cnt_after} -gt ${host_warn_cnt_before} ]]; then - echo "FAIL: kernel warning detected on host" | log_host "${name}" + echo "FAIL: kernel warning detected on host" | log_host rc=$KSFT_FAIL fi vm_oops_cnt_after=$(vm_ssh -- dmesg | grep -i 'Oops' | wc -l) if [[ ${vm_oops_cnt_after} -gt ${vm_oops_cnt_before} ]]; then - echo "FAIL: kernel oops detected on vm" | log_host "${name}" + echo "FAIL: kernel oops detected on vm" | log_host rc=$KSFT_FAIL fi vm_warn_cnt_after=$(vm_ssh -- dmesg --level=warn | grep -c -i 'vsock') if [[ ${vm_warn_cnt_after} -gt ${vm_warn_cnt_before} ]]; then - echo "FAIL: kernel warning detected on vm" | log_host "${name}" + echo "FAIL: kernel warning detected on vm" | log_host rc=$KSFT_FAIL fi return "${rc}" } +BUILD=0 QEMU="qemu-system-$(uname -m)" while getopts :hvsq:b o @@ -452,30 +581,21 @@ handle_build echo "1..${#ARGS[@]}" -log_setup "Booting up VM" -vm_start -vm_wait_for_ssh -log_setup "VM booted up" - cnt_pass=0 cnt_fail=0 cnt_skip=0 cnt_total=0 -for arg in "${ARGS[@]}"; do - run_test "${arg}" - rc=$? - if [[ ${rc} -eq $KSFT_PASS ]]; then - cnt_pass=$(( cnt_pass + 1 )) - echo "ok ${cnt_total} ${arg}" - elif [[ ${rc} -eq $KSFT_SKIP ]]; then - cnt_skip=$(( cnt_skip + 1 )) - echo "ok ${cnt_total} ${arg} # SKIP" - elif [[ ${rc} -eq $KSFT_FAIL ]]; then - cnt_fail=$(( cnt_fail + 1 )) - echo "not ok ${cnt_total} ${arg} # exit=$rc" - fi - cnt_total=$(( cnt_total + 1 )) -done + +if shared_vm_tests_requested "${ARGS[@]}"; then + log_host "Booting up VM" + pidfile="$(create_pidfile)" + vm_start "${pidfile}" + vm_wait_for_ssh + log_host "VM booted up" + + run_shared_vm_tests "${ARGS[@]}" + terminate_pidfiles "${pidfile}" +fi echo "SUMMARY: PASS=${cnt_pass} SKIP=${cnt_skip} FAIL=${cnt_fail}" echo "Log: ${LOG}" diff --git a/tools/testing/vsock/vsock_test.c b/tools/testing/vsock/vsock_test.c index d4517386e551..9e1250790f33 100644 --- a/tools/testing/vsock/vsock_test.c +++ b/tools/testing/vsock/vsock_test.c @@ -2015,6 +2015,11 @@ static void test_stream_transport_change_client(const struct test_opts *opts) exit(EXIT_FAILURE); } + /* Although setting SO_LINGER does not affect the original test + * for null-ptr-deref, it may trigger a lockdep warning. + */ + enable_so_linger(s, 1); + ret = connect(s, (struct sockaddr *)&sa, sizeof(sa)); /* The connect can fail due to signals coming from the thread, * or because the receiver connection queue is full. @@ -2352,7 +2357,7 @@ static struct test_case test_cases[] = { .run_server = test_stream_nolinger_server, }, { - .name = "SOCK_STREAM transport change null-ptr-deref", + .name = "SOCK_STREAM transport change null-ptr-deref, lockdep warn", .run_client = test_stream_transport_change_client, .run_server = test_stream_transport_change_server, }, |
