summaryrefslogtreecommitdiff
path: root/tools/testing/selftests/net
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2026-02-11 19:31:52 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2026-02-11 19:31:52 -0800
commit37a93dd5c49b5fda807fd204edf2547c3493319c (patch)
treece1ef5a642b9ea3d7242156438eb96dc5607a752 /tools/testing/selftests/net
parent098b6e44cbaa2d526d06af90c862d13fb414a0ec (diff)
parent83310d613382f74070fc8b402f3f6c2af8439ead (diff)
Merge tag 'net-next-7.0' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-nextipvs/mainipvs/HEADipvs-next/mainipvs-next/HEADdavem/net-next/maindavem/net-next/HEAD
Pull networking updates from Paolo Abeni: "Core & protocols: - A significant effort all around the stack to guide the compiler to make the right choice when inlining code, to avoid unneeded calls for small helper and stack canary overhead in the fast-path. This generates better and faster code with very small or no text size increases, as in many cases the call generated more code than the actual inlined helper. - Extend AccECN implementation so that is now functionally complete, also allow the user-space enabling it on a per network namespace basis. - Add support for memory providers with large (above 4K) rx buffer. Paired with hw-gro, larger rx buffer sizes reduce the number of buffers traversing the stack, dincreasing single stream CPU usage by up to ~30%. - Do not add HBH header to Big TCP GSO packets. This simplifies the RX path, the TX path and the NIC drivers, and is possible because user-space taps can now interpret correctly such packets without the HBH hint. - Allow IPv6 routes to be configured with a gateway address that is resolved out of a different interface than the one specified, aligning IPv6 to IPv4 behavior. - Multi-queue aware sch_cake. This makes it possible to scale the rate shaper of sch_cake across multiple CPUs, while still enforcing a single global rate on the interface. - Add support for the nbcon (new buffer console) infrastructure to netconsole, enabling lock-free, priority-based console operations that are safer in crash scenarios. - Improve the TCP ipv6 output path to cache the flow information, saving cpu cycles, reducing cache line misses and stack use. - Improve netfilter packet tracker to resolve clashes for most protocols, avoiding unneeded drops on rare occasions. - Add IP6IP6 tunneling acceleration to the flowtable infrastructure. - Reduce tcp socket size by one cache line. - Notify neighbour changes atomically, avoiding inconsistencies between the notification sequence and the actual states sequence. - Add vsock namespace support, allowing complete isolation of vsocks across different network namespaces. - Improve xsk generic performances with cache-alignment-oriented optimizations. - Support netconsole automatic target recovery, allowing netconsole to reestablish targets when underlying low-level interface comes back online. Driver API: - Support for switching the working mode (automatic vs manual) of a DPLL device via netlink. - Introduce PHY ports representation to expose multiple front-facing media ports over a single MAC. - Introduce "rx-polarity" and "tx-polarity" device tree properties, to generalize polarity inversion requirements for differential signaling. - Add helper to create, prepare and enable managed clocks. Device drivers: - Add Huawei hinic3 PF etherner driver. - Add DWMAC glue driver for Motorcomm YT6801 PCIe ethernet controller. - Add ethernet driver for MaxLinear MxL862xx switches - Remove parallel-port Ethernet driver. - Convert existing driver timestamp configuration reporting to hwtstamp_get and remove legacy ioctl(). - Convert existing drivers to .get_rx_ring_count(), simplifing the RX ring count retrieval. Also remove the legacy fallback path. - Ethernet high-speed NICs: - Broadcom (bnxt, bng): - bnxt: add FW interface update to support FEC stats histogram and NVRAM defragmentation - bng: add TSO and H/W GRO support - nVidia/Mellanox (mlx5): - improve latency of channel restart operations, reducing the used H/W resources - add TSO support for UDP over GRE over VLAN - add flow counters support for hardware steering (HWS) rules - use a static memory area to store headers for H/W GRO, leading to 12% RX tput improvement - Intel (100G, ice, idpf): - ice: reorganizes layout of Tx and Rx rings for cacheline locality and utilizes __cacheline_group* macros on the new layouts - ice: introduces Synchronous Ethernet (SyncE) support - Meta (fbnic): - adds debugfs for firmware mailbox and tx/rx rings vectors - Ethernet virtual: - geneve: introduce GRO/GSO support for double UDP encapsulation - Ethernet NICs consumer, and embedded: - Synopsys (stmmac): - some code refactoring and cleanups - RealTek (r8169): - add support for RTL8127ATF (10G Fiber SFP) - add dash and LTR support - Airoha: - AN8811HB 2.5 Gbps phy support - Freescale (fec): - add XDP zero-copy support - Thunderbolt: - add get link setting support to allow bonding - Renesas: - add support for RZ/G3L GBETH SoC - Ethernet switches: - Maxlinear: - support R(G)MII slow rate configuration - add support for Intel GSW150 - Motorcomm (yt921x): - add DCB/QoS support - TI: - icssm-prueth: support bridging (STP/RSTP) via the switchdev framework - Ethernet PHYs: - Realtek: - enable SGMII and 2500Base-X in-band auto-negotiation - simplify and reunify C22/C45 drivers - Micrel: convert bindings to DT schema - CAN: - move skb headroom content into skb extensions, making CAN metadata access more robust - CAN drivers: - rcar_canfd: - add support for FD-only mode - add support for the RZ/T2H SoC - sja1000: cleanup the CAN state handling - WiFi: - implement EPPKE/802.1X over auth frames support - split up drop reasons better, removing generic RX_DROP - additional FTM capabilities: 6 GHz support, supported number of spatial streams and supported number of LTF repetitions - better mac80211 iterators to enumerate resources - initial UHR (Wi-Fi 8) support for cfg80211/mac80211 - WiFi drivers: - Qualcomm/Atheros: - ath11k: support for Channel Frequency Response measurement - ath12k: a significant driver refactor to support multi-wiphy devices and and pave the way for future device support in the same driver (rather than splitting to ath13k) - ath12k: support for the QCC2072 chipset - Intel: - iwlwifi: partial Neighbor Awareness Networking (NAN) support - iwlwifi: initial support for U-NII-9 and IEEE 802.11bn - RealTek (rtw89): - preparations for RTL8922DE support - Bluetooth: - implement setsockopt(BT_PHY) to set the connection packet type/PHY - set link_policy on incoming ACL connections - Bluetooth drivers: - btusb: add support for MediaTek7920, Realtek RTL8761BU and 8851BE - btqca: add WCN6855 firmware priority selection feature" * tag 'net-next-7.0' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next: (1254 commits) bnge/bng_re: Add a new HSI net: macb: Fix tx/rx malfunction after phy link down and up af_unix: Fix memleak of newsk in unix_stream_connect(). net: ti: icssg-prueth: Add optional dependency on HSR net: dsa: add basic initial driver for MxL862xx switches net: mdio: add unlocked mdiodev C45 bus accessors net: dsa: add tag format for MxL862xx switches dt-bindings: net: dsa: add MaxLinear MxL862xx selftests: drivers: net: hw: Modify toeplitz.c to poll for packets octeontx2-pf: Unregister devlink on probe failure net: renesas: rswitch: fix forwarding offload statemachine ionic: Rate limit unknown xcvr type messages tcp: inet6_csk_xmit() optimization tcp: populate inet->cork.fl.u.ip6 in tcp_v6_syn_recv_sock() tcp: populate inet->cork.fl.u.ip6 in tcp_v6_connect() ipv6: inet6_csk_xmit() and inet6_csk_update_pmtu() use inet->cork.fl.u.ip6 ipv6: use inet->cork.fl.u.ip6 and np->final in ip6_datagram_dst_update() ipv6: use np->final in inet6_sk_rebuild_header() ipv6: add daddr/final storage in struct ipv6_pinfo net: stmmac: qcom-ethqos: fix qcom_ethqos_serdes_powerup() ...
Diffstat (limited to 'tools/testing/selftests/net')
-rw-r--r--tools/testing/selftests/net/.gitignore1
-rw-r--r--tools/testing/selftests/net/Makefile18
-rw-r--r--tools/testing/selftests/net/config1
-rwxr-xr-xtools/testing/selftests/net/double_udp_encap.sh393
-rwxr-xr-xtools/testing/selftests/net/fib-onlink-tests.sh28
-rwxr-xr-xtools/testing/selftests/net/forwarding/local_termination.sh18
-rw-r--r--tools/testing/selftests/net/hsr/Makefile2
-rwxr-xr-xtools/testing/selftests/net/hsr/hsr_ping.sh207
-rwxr-xr-xtools/testing/selftests/net/hsr/link_faults.sh378
-rwxr-xr-xtools/testing/selftests/net/hsr/prp_ping.sh147
-rw-r--r--tools/testing/selftests/net/hsr/settings2
-rw-r--r--tools/testing/selftests/net/icmp_rfc4884.c679
-rwxr-xr-xtools/testing/selftests/net/ioam6.sh2
-rw-r--r--tools/testing/selftests/net/ipsec.c11
-rw-r--r--tools/testing/selftests/net/lib/csum.c2
-rw-r--r--tools/testing/selftests/net/lib/py/ksft.py44
-rw-r--r--tools/testing/selftests/net/lib/py/utils.py32
-rw-r--r--tools/testing/selftests/net/mptcp/Makefile1
-rw-r--r--tools/testing/selftests/net/mptcp/mptcp_connect.c98
-rwxr-xr-xtools/testing/selftests/net/mptcp/mptcp_connect_splice.sh5
-rw-r--r--tools/testing/selftests/net/mptcp/mptcp_diag.c27
-rwxr-xr-xtools/testing/selftests/net/mptcp/mptcp_join.sh107
-rw-r--r--tools/testing/selftests/net/netfilter/config1
-rwxr-xr-xtools/testing/selftests/net/netfilter/nft_flowtable.sh69
-rwxr-xr-xtools/testing/selftests/net/netfilter/nft_queue.sh142
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_accecn_2nd_data_as_first.pkt24
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_accecn_2nd_data_as_first_connect.pkt30
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_accecn_3rd_ack_after_synack_rxmt.pkt19
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_accecn_3rd_ack_ce_updates_received_ce.pkt18
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_accecn_3rd_ack_lost_data_ce.pkt22
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_accecn_3rd_dups.pkt26
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_accecn_acc_ecn_disabled.pkt13
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_accecn_accecn_then_notecn_syn.pkt28
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_accecn_accecn_to_rfc3168.pkt18
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_accecn_client_accecn_options_drop.pkt34
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_accecn_client_accecn_options_lost.pkt38
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_accecn_clientside_disabled.pkt12
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_accecn_close_local_close_then_remote_fin.pkt25
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_accecn_delivered_2ndlargeack.pkt25
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_accecn_delivered_falseoverflow_detect.pkt31
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_accecn_delivered_largeack.pkt24
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_accecn_delivered_largeack2.pkt25
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_accecn_delivered_maxack.pkt25
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_accecn_delivered_updates.pkt70
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_accecn_ecn3.pkt12
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_accecn_ecn_field_updates_opt.pkt35
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_accecn_ipflags_drop.pkt14
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_accecn_listen_opt_drop.pkt16
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_accecn_multiple_syn_ack_drop.pkt28
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_accecn_multiple_syn_drop.pkt18
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_accecn_negotiation_bleach.pkt23
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_accecn_negotiation_connect.pkt23
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_accecn_negotiation_listen.pkt26
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_accecn_negotiation_noopt_connect.pkt23
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_accecn_negotiation_optenable.pkt23
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_accecn_no_ecn_after_accecn.pkt20
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_accecn_noopt.pkt27
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_accecn_noprogress.pkt27
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_accecn_notecn_then_accecn_syn.pkt28
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_accecn_rfc3168_to_fallback.pkt18
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_accecn_rfc3168_to_rfc3168.pkt18
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_accecn_sack_space_grab.pkt28
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_accecn_sack_space_grab_with_ts.pkt39
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_accecn_serverside_accecn_disabled1.pkt20
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_accecn_serverside_accecn_disabled2.pkt20
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_accecn_serverside_broken.pkt19
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_accecn_serverside_ecn_disabled.pkt19
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_accecn_serverside_only.pkt18
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_accecn_syn_ace_flags_acked_after_retransmit.pkt18
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_accecn_syn_ace_flags_drop.pkt16
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_accecn_syn_ack_ace_flags_acked_after_retransmit.pkt27
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_accecn_syn_ack_ace_flags_drop.pkt26
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_accecn_syn_ce.pkt13
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_accecn_syn_ect0.pkt13
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_accecn_syn_ect1.pkt13
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_accecn_synack_ce.pkt27
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_accecn_synack_ce_updates_delivered_ce.pkt22
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_accecn_synack_ect0.pkt24
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_accecn_synack_ect1.pkt24
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_accecn_synack_rexmit.pkt15
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_accecn_synack_rxmt.pkt25
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_accecn_tsnoprogress.pkt26
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_accecn_tsprogress.pkt25
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_basic_client.pkt24
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_basic_server.pkt35
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_timestamping_tcp_tx_timestamp_bug.pkt70
-rw-r--r--tools/testing/selftests/net/tfo.c13
-rwxr-xr-xtools/testing/selftests/net/tfo_passive.sh13
-rw-r--r--tools/testing/selftests/net/tls.c16
-rw-r--r--tools/testing/selftests/net/tun.c898
-rw-r--r--tools/testing/selftests/net/tuntap_helpers.h390
-rw-r--r--tools/testing/selftests/net/txtimestamp.c10
92 files changed, 4909 insertions, 338 deletions
diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore
index 6930fe926c58..97ad4d551d44 100644
--- a/tools/testing/selftests/net/.gitignore
+++ b/tools/testing/selftests/net/.gitignore
@@ -7,6 +7,7 @@ cmsg_sender
epoll_busy_poll
fin_ack_lat
hwtstamp_config
+icmp_rfc4884
io_uring_zerocopy_tx
ioam6_parser
ip_defrag
diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
index 45c4ea381bc3..afdea6d95bde 100644
--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile
@@ -22,6 +22,7 @@ TEST_PROGS := \
cmsg_so_mark.sh \
cmsg_so_priority.sh \
cmsg_time.sh \
+ double_udp_encap.sh \
drop_monitor_tests.sh \
fcnal-ipv4.sh \
fcnal-ipv6.sh \
@@ -167,6 +168,7 @@ TEST_GEN_PROGS := \
bind_timewait \
bind_wildcard \
epoll_busy_poll \
+ icmp_rfc4884 \
ipv6_fragmentation \
proc_net_pktgen \
reuseaddr_conflict \
@@ -181,7 +183,6 @@ TEST_GEN_PROGS := \
tap \
tcp_port_share \
tls \
- tun \
# end of TEST_GEN_PROGS
TEST_FILES := \
@@ -193,7 +194,11 @@ TEST_FILES := \
# YNL files, must be before "include ..lib.mk"
YNL_GEN_FILES := busy_poller
-YNL_GEN_PROGS := netlink-dumps
+YNL_GEN_PROGS := \
+ netlink-dumps \
+ tun \
+# end of YNL_GEN_PROGS
+
TEST_GEN_FILES += $(YNL_GEN_FILES)
TEST_GEN_PROGS += $(YNL_GEN_PROGS)
@@ -204,7 +209,14 @@ TEST_INCLUDES := forwarding/lib.sh
include ../lib.mk
# YNL build
-YNL_GENS := netdev
+YNL_GENS := \
+ netdev \
+ rt-addr \
+ rt-link \
+ rt-neigh \
+ rt-route \
+# end of YNL_GENS
+
include ynl.mk
$(OUTPUT)/epoll_busy_poll: LDLIBS += -lcap
diff --git a/tools/testing/selftests/net/config b/tools/testing/selftests/net/config
index b84362b9b508..cd49b7dfe216 100644
--- a/tools/testing/selftests/net/config
+++ b/tools/testing/selftests/net/config
@@ -77,6 +77,7 @@ CONFIG_NET_DROP_MONITOR=m
CONFIG_NETFILTER=y
CONFIG_NETFILTER_ADVANCED=y
CONFIG_NETFILTER_XTABLES_LEGACY=y
+CONFIG_NETFILTER_XT_MATCH_BPF=m
CONFIG_NETFILTER_XT_MATCH_LENGTH=m
CONFIG_NETFILTER_XT_MATCH_POLICY=m
CONFIG_NETFILTER_XT_NAT=m
diff --git a/tools/testing/selftests/net/double_udp_encap.sh b/tools/testing/selftests/net/double_udp_encap.sh
new file mode 100755
index 000000000000..9aaf97cdf141
--- /dev/null
+++ b/tools/testing/selftests/net/double_udp_encap.sh
@@ -0,0 +1,393 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+source lib.sh
+
+# shellcheck disable=SC2155 # prefer RO variable over return value from cmd
+readonly CLI="$(dirname "$(readlink -f "$0")")/../../../net/ynl/pyynl/cli.py"
+
+readonly SRC=1
+readonly DST=2
+
+readonly NET_V4=192.168.1.
+readonly NET_V6=2001:db8::
+readonly OL1_NET_V4=172.16.1.
+readonly OL1_NET_V6=2001:db8:1::
+readonly OL2_NET_V4=172.16.2.
+readonly OL2_NET_V6=2001:db8:2::
+
+trap cleanup_all_ns EXIT
+
+# shellcheck disable=SC2329 # can't figure out usage trough a variable
+is_ipv6() {
+ if [[ $1 =~ .*:.* ]]; then
+ return 0
+ fi
+ return 1
+}
+
+# shellcheck disable=SC2329 # can't figure out usage trough a variable
+create_gnv_endpoint() {
+ local -r netns=$1
+ local -r bm_rem_addr=$2
+ local -r gnv_dev=$3
+ local -r gnv_id=$4
+ local opts=$5
+ local gnv_json
+ local rem
+
+ if is_ipv6 "$bm_rem_addr"; then
+ rem=remote6
+ else
+ rem=remote
+ fi
+
+ # add ynl opt separator, if needed
+ [ -n "$opts" ] && opts=", $opts"
+
+ gnv_json="{ \"id\": $gnv_id, \"$rem\": \"$bm_rem_addr\"$opts }"
+ ip netns exec "$netns" "$CLI" --family rt-link --create --excl \
+ --do newlink --json "{\"ifname\": \"$gnv_dev\",
+ \"linkinfo\": {\"kind\":\"geneve\",
+ \"data\": $gnv_json } }" > /dev/null
+ ip -n "$netns" link set dev "$gnv_dev" up
+}
+
+# shellcheck disable=SC2329 # can't figure out usage trough a variable
+create_vxlan_endpoint() {
+ local -r netns=$1
+ local -r bm_rem_addr=$2
+ local -r vxlan_dev=$3
+ local -r vxlan_id=$4
+ local -r opts_str=$5
+ local oldifs
+ local -a opts
+ local opt
+
+ # convert the arguments from yaml format
+ oldifs=$IFS
+ IFS=','
+ for opt in $opts_str; do
+ local pattern='"port":'
+
+ [ -n "$opt" ] || continue
+
+ opts+=("${opt/$pattern*/dstport}" "${opt/$pattern/}")
+ done
+ IFS=$oldifs
+ [ ${#opts[@]} -gt 0 ] || opts+=("dstport" "4789")
+
+ ip -n "$netns" link add "$vxlan_dev" type vxlan id "$vxlan_id" \
+ remote "$bm_rem_addr" "${opts[@]}"
+ ip -n "$netns" link set dev "$vxlan_dev" up
+}
+
+create_ns() {
+ local nested_opt='"port":6082'
+ local create_endpoint
+ local options="$1"
+ local feature
+ local dev
+ local id
+ local ns
+
+ RET=0
+
+ # +-------------+ +-------------+
+ # | NS_SRC | | NS_NST_DST |
+ # | | | |
+ # | gnv_nst1 | | gnv_nst2 |
+ # | + | | + |
+ # | | | | | |
+ # | + | | + |
+ # | gnv1 | | gnv2 |
+ # | + | | + |
+ # | | | | | |
+ # | + veth1 +--------+ veth2 + |
+ # | | | |
+ # +-------------+ +-------------+
+
+ setup_ns NS_SRC NS_DST
+
+ # concatenate caller provided options and default one
+ [ -n "$2" ] && nested_opt="$nested_opt,$2"
+
+ ip link add name "veth$SRC" netns "$NS_SRC" type veth \
+ peer name "veth$DST" netns "$NS_DST"
+ case "$ENCAP" in
+ vxlan)
+ create_endpoint=create_vxlan_endpoint
+ dev=vx
+ ;;
+ geneve)
+ create_endpoint=create_gnv_endpoint
+ dev=gnv
+ ;;
+ esac
+
+ id=1
+ for ns in "${NS_LIST[@]}"; do
+ ip -n "$ns" link set dev "veth$id" up
+
+ # ensure the sender can do large write just after 3whs
+ ip netns exec "$ns" \
+ sysctl -qw net.ipv4.tcp_wmem="4096 4194304 4194304"
+
+ # note that 3 - $SRC == $DST and 3 - $DST == $SRC
+ if [ $FAMILY = "4" ]; then
+ ip -n "$ns" addr add dev "veth$id" "$NET_V4$id/24"
+ $create_endpoint "$ns" "$NET_V4$((3 - id))" \
+ "$dev$id" 4 "$options"
+ ip -n "$ns" addr add dev "$dev$id" "$OL1_NET_V4$id/24"
+
+ # nested tunnel devices
+ # pmtu can't be propagated to upper layer devices;
+ # need manual adjust
+ $create_endpoint "$ns" "$OL1_NET_V4$((3 - id))" \
+ "$dev"_nst"$id" 40 "$nested_opt"
+ ip -n "$ns" addr add dev "$dev"_nst"$id" \
+ "$OL2_NET_V4$id/24"
+ ip -n "$ns" link set dev "$dev"_nst"$id" mtu 1392
+ else
+ ip -n "$ns" addr add dev "veth$id" "$NET_V6$id/64" \
+ nodad
+ $create_endpoint "$ns" "$NET_V6$((3 - id))" \
+ "$dev"6"$id" 6 "$options"
+ ip -n "$ns" addr add dev "$dev"6"$id" \
+ "$OL1_NET_V6$id/64" nodad
+
+ $create_endpoint "$ns" "$OL1_NET_V6$((3 - id))" \
+ "$dev"6_nst"$id" 60 "$nested_opt"
+ ip -n "$ns" addr add dev "$dev"6_nst"$id" \
+ "$OL2_NET_V6$id/64" nodad
+ ip -n "$ns" link set dev "$dev"6_nst"$id" mtu 1352
+ fi
+ id=$((id+1))
+ done
+
+ # enable GRO heuristic on the veth peer and ensure UDP L4 over tunnel is
+ # actually segmented
+ for feature in tso tx-udp_tnl-segmentation; do
+ ip netns exec "$NS_SRC" ethtool -K "veth$SRC" \
+ "$feature" off 2>/dev/null
+ done
+}
+
+create_ns_gso() {
+ local dev
+
+ create_ns "$@"
+ if [ "$ENCAP" = "geneve" ]; then
+ dev=gnv
+ else
+ dev=vx
+ fi
+ [ "$FAMILY" = "6" ] && dev="$dev"6
+ ip netns exec "$NS_SRC" ethtool -K "$dev$SRC" \
+ tx-gso-partial on \
+ tx-udp_tnl-segmentation on \
+ tx-udp_tnl-csum-segmentation on
+}
+
+create_ns_gso_gro() {
+ create_ns_gso "$@"
+ ip netns exec "$NS_DST" ethtool -K "veth$DST" gro on
+ ip netns exec "$NS_SRC" ethtool -K "veth$SRC" tx off >/dev/null 2>&1
+}
+
+run_test() {
+ local -r dst=$NET$DST
+ local -r msg=$1
+ local -r total_size=$2
+ local -r encappkts=$3
+ local inner_proto_offset=0
+ local inner_maclen=14
+ local rx_family="-4"
+ local ipt=iptables
+ local bpf_filter
+ local -a rx_args
+ local wire_pkts
+ local rcvpkts
+ local encl=8
+ local dport
+ local pkts
+ local snd
+
+ if [ $FAMILY = "6" ]; then
+ ipt=ip6tables
+ else
+ # rx program does not support '-6' and implies ipv6 usage by
+ # default
+ rx_args=("$rx_family")
+ fi
+
+ # The received can only check fixed size packet
+ pkts=$((total_size / GSO_SIZE))
+ if [ -n "$4" ]; then
+ wire_pkts=$4
+ elif [ $((total_size % GSO_SIZE)) -eq 0 ]; then
+ wire_pkts=1
+ rx_args+=("-l" "$GSO_SIZE")
+ else
+ wire_pkts=2
+ pkts=$((pkts + 1))
+ fi
+
+ if [ "$ENCAP" = "geneve" ]; then
+ dport=6081
+ else
+ dport=4789
+ fi
+
+ # Either:
+ # - IPv4, nested tunnel carries UDP over IPv4, with dport 6082,
+ # innermost is TCP over IPv4 on port 8000
+ # - IPv6, nested tunnel carries UDP over IPv6, with dport 6082,
+ # innermost is TCP over IPv6 on port 8000
+ # The nested tunnel port is 6082 and the nested encap len is 8
+ # regardless of the encap type (no geneve opts).
+ # In inherit protocol mode there is no nested mac hdr and the nested
+ # l3 protocol type field belongs to the geneve hdr.
+ [ "$USE_HINT" = true ] && encl=16
+ [ "$INHERIT" = true ] && inner_maclen=0
+ [ "$INHERIT" = true ] && inner_proto_offset=-4
+ local inner=$((inner_maclen+encl))
+ local proto=$((inner_maclen+encl+inner_proto_offset))
+ bpf_filter=$(nfbpf_compile "(ip &&
+ ip[$((40+encl))] == 0x08 && ip[$((41+encl))] == 0x00 &&
+ ip[$((51+encl))] == 0x11 &&
+ ip[$((64+encl))] == 0x17 && ip[$((65+encl))] == 0xc2 &&
+ ip[$((76+proto))] == 0x08 && ip[$((77+proto))] == 0x00 &&
+ ip[$((87+inner))] == 0x6 &&
+ ip[$((100+inner))] == 0x1f && ip[$((101+inner))] == 0x40) ||
+ (ip6 &&
+ ip6[$((60+encl))] == 0x86 && ip6[$((61+encl))] == 0xdd &&
+ ip6[$((68+encl))] == 0x11 &&
+ ip6[$((104+encl))] == 0x17 && ip6[$((105+encl))] == 0xc2 &&
+ ip6[$((116+proto))] == 0x86 && ip6[$((117+proto))] == 0xdd &&
+ ip6[$((124+inner))] == 0x6 &&
+ ip6[$((160+inner))] == 0x1f && ip6[$((161+inner))] == 0x40)")
+
+ # ignore shorts packet, to avoid arp/mld induced noise
+ ip netns exec "$NS_SRC" "$ipt" -A OUTPUT -p udp --dport "$dport" \
+ -m length --length 600:65535 -m bpf --bytecode "$bpf_filter"
+ ip netns exec "$NS_DST" "$ipt" -A INPUT -p udp --dport "$dport" \
+ -m length --length 600:65535 -m bpf --bytecode "$bpf_filter"
+ ip netns exec "$NS_DST" ./udpgso_bench_rx -C 2000 -t -R 100 \
+ -n "$pkts" "${rx_args[@]}" &
+ local pid=$!
+ wait_local_port_listen "$NS_DST" 8000 tcp
+ ip netns exec "$NS_SRC" ./udpgso_bench_tx -"$FAMILY" -t -M 1 \
+ -s "$total_size" -D "$dst"
+ local ret=$?
+ check_err "$ret" "client failure exit code $ret"
+ wait "$pid"
+ ret=$?
+ check_err "$ret" "sever failure exit code $ret"
+
+ snd=$(ip netns exec "$NS_SRC" "$ipt"-save -c |
+ grep "dport $dport" | sed -e 's/\[//' -e 's/:.*//')
+
+ [ "$snd" = "$wire_pkts" ]
+ # shellcheck disable=SC2319 # known false positive
+ check_err $? "send $snd packets on the lowest link, expected $wire_pkts"
+
+ rcvpkts=$(ip netns exec "$NS_DST" "$ipt"-save -c | \
+ grep "dport $dport" | sed -e 's/\[//' -e 's/:.*//')
+
+ [ "$rcvpkts" = "$encappkts" ]
+ check_err $? "received $rcvpkts $ENCAP packets, expected $encappkts"
+ log_test "$msg"
+}
+
+run_tests() {
+ for FAMILY in 4 6; do
+ NET=$OL2_NET_V4
+ GSO_SIZE=1340 # 1392 - 20 - 32
+
+ if [ $FAMILY = 6 ]; then
+ NET=$OL2_NET_V6
+ GSO_SIZE=1280 # 1352 - 40 - 32
+ fi
+
+ echo "IPv$FAMILY"
+
+ unset USE_HINT
+ unset INHERIT
+
+ # "geneve" must be last encap in list, so that later
+ # test cases will run on it
+ for ENCAP in "vxlan" "geneve"; do
+ create_ns
+ run_test "No GSO - $ENCAP" $((GSO_SIZE * 4)) 4 4
+ cleanup_all_ns
+
+ create_ns_gso
+ run_test "GSO without GRO - $ENCAP" $((GSO_SIZE * 4)) \
+ 4 1
+ cleanup_all_ns
+
+ # IPv4 only test
+ [ $FAMILY = "4" ] || continue
+ create_ns_gso
+ ip netns exec "$NS_SRC" \
+ sysctl -qw net.ipv4.ip_no_pmtu_disc=1
+ run_test "GSO disable due to no fixedid - $ENCAP" \
+ $((GSO_SIZE * 4)) 4 4
+ cleanup_all_ns
+ done
+
+ # GRO tests imply/require geneve encap, the only one providing
+ # GRO hints
+ create_ns_gso_gro
+ run_test "double tunnel GRO, no hints" $((GSO_SIZE * 4)) 4
+ cleanup_all_ns
+
+ # hint option is expected for all the following tests in the RX
+ # path
+ USE_HINT=true
+ create_ns_gso_gro \
+ '"gro-hint":1,"udp-zero-csum6-tx":1,"udp-zero-csum6-rx":1' \
+ '"udp-zero-csum6-tx":1,"udp-zero-csum6-rx":1'
+ run_test "double tunnel GRO" $((GSO_SIZE * 4)) 1
+ cleanup_all_ns
+
+ create_ns_gso_gro '"gro-hint":1,"udp-csum":1' '"udp-csum":1'
+ run_test "double tunnel GRO - csum complete" $((GSO_SIZE * 4))\
+ 1
+ cleanup_all_ns
+
+ create_ns_gso_gro '"gro-hint":1' \
+ '"udp-csum":0,"udp-zero-csum6-tx":1,"udp-zero-csum6-rx":1'
+ run_test "double tunnel GRO - no nested csum" \
+ $((GSO_SIZE * 4)) 1
+ cleanup_all_ns
+
+ create_ns_gso_gro \
+ '"gro-hint":1,"udp-zero-csum6-tx":1,"udp-zero-csum6-rx":1' \
+ '"udp-csum":1'
+ run_test "double tunnel GRO - nested csum, outer 0-csum, skip"\
+ $((GSO_SIZE * 4)) 4
+ cleanup_all_ns
+
+ INHERIT=true
+ create_ns_gso_gro '"gro-hint":1,"udp-csum":1' \
+ '"udp-csum":1,"inner-proto-inherit":1'
+ run_test "double tunnel GRO - nested inherit proto" \
+ $((GSO_SIZE * 4)) 1
+ cleanup_all_ns
+ unset INHERIT
+
+ create_ns_gso_gro '"gro-hint":1'
+ run_test "double tunnel GRO - short last pkt" \
+ $((GSO_SIZE * 4 + GSO_SIZE / 2)) 2
+ cleanup_all_ns
+ done
+}
+
+require_command nfbpf_compile
+require_command jq
+
+# tcp retransmisions will break the accounting
+xfail_on_slow run_tests
+exit "$EXIT_STATUS"
diff --git a/tools/testing/selftests/net/fib-onlink-tests.sh b/tools/testing/selftests/net/fib-onlink-tests.sh
index c01be076b210..e0d45292a298 100755
--- a/tools/testing/selftests/net/fib-onlink-tests.sh
+++ b/tools/testing/selftests/net/fib-onlink-tests.sh
@@ -72,7 +72,8 @@ declare -A TEST_NET4IN6IN6
TEST_NET4IN6[1]=10.1.1.254
TEST_NET4IN6[2]=10.2.1.254
-# mcast address
+# mcast addresses
+MCAST4=233.252.0.1
MCAST6=ff02::1
VRF=lisa
@@ -260,11 +261,15 @@ valid_onlink_ipv4()
run_ip 254 ${TEST_NET4[1]}.1 ${CONGW[1]} ${NETIFS[p1]} 0 "unicast connected"
run_ip 254 ${TEST_NET4[1]}.2 ${RECGW4[1]} ${NETIFS[p1]} 0 "unicast recursive"
+ run_ip 254 ${TEST_NET4[1]}.9 ${CONGW[1]} ${NETIFS[p3]} 0 \
+ "nexthop device mismatch"
log_subsection "VRF ${VRF}"
run_ip ${VRF_TABLE} ${TEST_NET4[2]}.1 ${CONGW[3]} ${NETIFS[p5]} 0 "unicast connected"
run_ip ${VRF_TABLE} ${TEST_NET4[2]}.2 ${RECGW4[2]} ${NETIFS[p5]} 0 "unicast recursive"
+ run_ip ${VRF_TABLE} ${TEST_NET4[2]}.10 ${CONGW[3]} ${NETIFS[p7]} 0 \
+ "nexthop device mismatch"
log_subsection "VRF device, PBR table"
@@ -300,17 +305,15 @@ invalid_onlink_ipv4()
{
run_ip 254 ${TEST_NET4[1]}.11 ${V4ADDRS[p1]} ${NETIFS[p1]} 2 \
"Invalid gw - local unicast address"
+ run_ip 254 ${TEST_NET4[1]}.12 ${MCAST4} ${NETIFS[p1]} 2 \
+ "Invalid gw - multicast address"
run_ip ${VRF_TABLE} ${TEST_NET4[2]}.11 ${V4ADDRS[p5]} ${NETIFS[p5]} 2 \
"Invalid gw - local unicast address, VRF"
+ run_ip ${VRF_TABLE} ${TEST_NET4[2]}.12 ${MCAST4} ${NETIFS[p5]} 2 \
+ "Invalid gw - multicast address, VRF"
run_ip 254 ${TEST_NET4[1]}.101 ${V4ADDRS[p1]} "" 2 "No nexthop device given"
-
- run_ip 254 ${TEST_NET4[1]}.102 ${V4ADDRS[p3]} ${NETIFS[p1]} 2 \
- "Gateway resolves to wrong nexthop device"
-
- run_ip ${VRF_TABLE} ${TEST_NET4[2]}.103 ${V4ADDRS[p7]} ${NETIFS[p5]} 2 \
- "Gateway resolves to wrong nexthop device - VRF"
}
################################################################################
@@ -357,12 +360,16 @@ valid_onlink_ipv6()
run_ip6 254 ${TEST_NET6[1]}::1 ${V6ADDRS[p1]/::*}::64 ${NETIFS[p1]} 0 "unicast connected"
run_ip6 254 ${TEST_NET6[1]}::2 ${RECGW6[1]} ${NETIFS[p1]} 0 "unicast recursive"
run_ip6 254 ${TEST_NET6[1]}::3 ::ffff:${TEST_NET4IN6[1]} ${NETIFS[p1]} 0 "v4-mapped"
+ run_ip6 254 ${TEST_NET6[1]}::a ${V6ADDRS[p1]/::*}::64 ${NETIFS[p3]} 0 \
+ "nexthop device mismatch"
log_subsection "VRF ${VRF}"
run_ip6 ${VRF_TABLE} ${TEST_NET6[2]}::1 ${V6ADDRS[p5]/::*}::64 ${NETIFS[p5]} 0 "unicast connected"
run_ip6 ${VRF_TABLE} ${TEST_NET6[2]}::2 ${RECGW6[2]} ${NETIFS[p5]} 0 "unicast recursive"
run_ip6 ${VRF_TABLE} ${TEST_NET6[2]}::3 ::ffff:${TEST_NET4IN6[2]} ${NETIFS[p5]} 0 "v4-mapped"
+ run_ip6 ${VRF_TABLE} ${TEST_NET6[2]}::b ${V6ADDRS[p5]/::*}::64 \
+ ${NETIFS[p7]} 0 "nexthop device mismatch"
log_subsection "VRF device, PBR table"
@@ -428,13 +435,6 @@ invalid_onlink_ipv6()
run_ip6 254 ${TEST_NET6[1]}::101 ${V6ADDRS[p1]} "" 2 \
"No nexthop device given"
-
- # default VRF validation is done against LOCAL table
- # run_ip6 254 ${TEST_NET6[1]}::102 ${V6ADDRS[p3]/::[0-9]/::64} ${NETIFS[p1]} 2 \
- # "Gateway resolves to wrong nexthop device"
-
- run_ip6 ${VRF_TABLE} ${TEST_NET6[2]}::103 ${V6ADDRS[p7]/::[0-9]/::64} ${NETIFS[p5]} 2 \
- "Gateway resolves to wrong nexthop device - VRF"
}
run_onlink_tests()
diff --git a/tools/testing/selftests/net/forwarding/local_termination.sh b/tools/testing/selftests/net/forwarding/local_termination.sh
index 892895659c7e..1f2bf6e81847 100755
--- a/tools/testing/selftests/net/forwarding/local_termination.sh
+++ b/tools/testing/selftests/net/forwarding/local_termination.sh
@@ -306,39 +306,39 @@ run_test()
if [ $skip_ptp = false ]; then
check_rcv $rcv_if_name "1588v2 over L2 transport, Sync" \
- "ethertype PTP (0x88f7).* PTPv2.* msg type : sync msg" \
+ "ethertype PTP (0x88f7).* PTPv2.* msg type *: sync msg" \
true "$test_name"
check_rcv $rcv_if_name "1588v2 over L2 transport, Follow-Up" \
- "ethertype PTP (0x88f7).* PTPv2.* msg type : follow up msg" \
+ "ethertype PTP (0x88f7).* PTPv2.* msg type *: follow up msg" \
true "$test_name"
check_rcv $rcv_if_name "1588v2 over L2 transport, Peer Delay Request" \
- "ethertype PTP (0x88f7).* PTPv2.* msg type : peer delay req msg" \
+ "ethertype PTP (0x88f7).* PTPv2.* msg type *: peer delay req msg" \
true "$test_name"
check_rcv $rcv_if_name "1588v2 over IPv4, Sync" \
- "ethertype IPv4 (0x0800).* PTPv2.* msg type : sync msg" \
+ "ethertype IPv4 (0x0800).* PTPv2.* msg type *: sync msg" \
true "$test_name"
check_rcv $rcv_if_name "1588v2 over IPv4, Follow-Up" \
- "ethertype IPv4 (0x0800).* PTPv2.* msg type : follow up msg" \
+ "ethertype IPv4 (0x0800).* PTPv2.* msg type *: follow up msg" \
true "$test_name"
check_rcv $rcv_if_name "1588v2 over IPv4, Peer Delay Request" \
- "ethertype IPv4 (0x0800).* PTPv2.* msg type : peer delay req msg" \
+ "ethertype IPv4 (0x0800).* PTPv2.* msg type *: peer delay req msg" \
true "$test_name"
check_rcv $rcv_if_name "1588v2 over IPv6, Sync" \
- "ethertype IPv6 (0x86dd).* PTPv2.* msg type : sync msg" \
+ "ethertype IPv6 (0x86dd).* PTPv2.* msg type *: sync msg" \
true "$test_name"
check_rcv $rcv_if_name "1588v2 over IPv6, Follow-Up" \
- "ethertype IPv6 (0x86dd).* PTPv2.* msg type : follow up msg" \
+ "ethertype IPv6 (0x86dd).* PTPv2.* msg type *: follow up msg" \
true "$test_name"
check_rcv $rcv_if_name "1588v2 over IPv6, Peer Delay Request" \
- "ethertype IPv6 (0x86dd).* PTPv2.* msg type : peer delay req msg" \
+ "ethertype IPv6 (0x86dd).* PTPv2.* msg type *: peer delay req msg" \
true "$test_name"
fi
diff --git a/tools/testing/selftests/net/hsr/Makefile b/tools/testing/selftests/net/hsr/Makefile
index 4b6afc0fe9f8..31fb9326cf53 100644
--- a/tools/testing/selftests/net/hsr/Makefile
+++ b/tools/testing/selftests/net/hsr/Makefile
@@ -5,6 +5,8 @@ top_srcdir = ../../../../..
TEST_PROGS := \
hsr_ping.sh \
hsr_redbox.sh \
+ link_faults.sh \
+ prp_ping.sh \
# end of TEST_PROGS
TEST_FILES += hsr_common.sh
diff --git a/tools/testing/selftests/net/hsr/hsr_ping.sh b/tools/testing/selftests/net/hsr/hsr_ping.sh
index 5a65f4f836be..f4d685df4345 100755
--- a/tools/testing/selftests/net/hsr/hsr_ping.sh
+++ b/tools/testing/selftests/net/hsr/hsr_ping.sh
@@ -27,31 +27,34 @@ while getopts "$optstring" option;do
esac
done
-do_complete_ping_test()
+do_ping_tests()
{
- echo "INFO: Initial validation ping."
- # Each node has to be able each one.
- do_ping "$ns1" 100.64.0.2
- do_ping "$ns2" 100.64.0.1
- do_ping "$ns3" 100.64.0.1
- stop_if_error "Initial validation failed."
-
- do_ping "$ns1" 100.64.0.3
- do_ping "$ns2" 100.64.0.3
- do_ping "$ns3" 100.64.0.2
+ local netid="$1"
- do_ping "$ns1" dead:beef:1::2
- do_ping "$ns1" dead:beef:1::3
- do_ping "$ns2" dead:beef:1::1
- do_ping "$ns2" dead:beef:1::2
- do_ping "$ns3" dead:beef:1::1
- do_ping "$ns3" dead:beef:1::2
+ echo "INFO: Running ping tests."
- stop_if_error "Initial validation failed."
+ echo "INFO: Initial validation ping."
+ # Each node has to be able to reach each one.
+ do_ping "$ns1" "100.64.$netid.2"
+ do_ping "$ns1" "100.64.$netid.3"
+ do_ping "$ns2" "100.64.$netid.1"
+ do_ping "$ns2" "100.64.$netid.3"
+ do_ping "$ns3" "100.64.$netid.1"
+ do_ping "$ns3" "100.64.$netid.2"
+ stop_if_error "Initial validation failed on IPv4."
+
+ do_ping "$ns1" "dead:beef:$netid::2"
+ do_ping "$ns1" "dead:beef:$netid::3"
+ do_ping "$ns2" "dead:beef:$netid::1"
+ do_ping "$ns2" "dead:beef:$netid::2"
+ do_ping "$ns3" "dead:beef:$netid::1"
+ do_ping "$ns3" "dead:beef:$netid::2"
+ stop_if_error "Initial validation failed on IPv6."
# Wait until supervisor all supervision frames have been processed and the node
# entries have been merged. Otherwise duplicate frames will be observed which is
# valid at this stage.
+ echo "INFO: Wait for node table entries to be merged."
WAIT=5
while [ ${WAIT} -gt 0 ]
do
@@ -68,62 +71,30 @@ do_complete_ping_test()
sleep 1
echo "INFO: Longer ping test."
- do_ping_long "$ns1" 100.64.0.2
- do_ping_long "$ns1" dead:beef:1::2
- do_ping_long "$ns1" 100.64.0.3
- do_ping_long "$ns1" dead:beef:1::3
-
- stop_if_error "Longer ping test failed."
-
- do_ping_long "$ns2" 100.64.0.1
- do_ping_long "$ns2" dead:beef:1::1
- do_ping_long "$ns2" 100.64.0.3
- do_ping_long "$ns2" dead:beef:1::2
- stop_if_error "Longer ping test failed."
-
- do_ping_long "$ns3" 100.64.0.1
- do_ping_long "$ns3" dead:beef:1::1
- do_ping_long "$ns3" 100.64.0.2
- do_ping_long "$ns3" dead:beef:1::2
- stop_if_error "Longer ping test failed."
-
- echo "INFO: Cutting one link."
- do_ping_long "$ns1" 100.64.0.3 &
-
- sleep 3
- ip -net "$ns3" link set ns3eth1 down
- wait
-
- ip -net "$ns3" link set ns3eth1 up
-
- stop_if_error "Failed with one link down."
-
- echo "INFO: Delay the link and drop a few packages."
- tc -net "$ns3" qdisc add dev ns3eth1 root netem delay 50ms
- tc -net "$ns2" qdisc add dev ns2eth1 root netem delay 5ms loss 25%
-
- do_ping_long "$ns1" 100.64.0.2
- do_ping_long "$ns1" 100.64.0.3
-
- stop_if_error "Failed with delay and packetloss."
-
- do_ping_long "$ns2" 100.64.0.1
- do_ping_long "$ns2" 100.64.0.3
-
- stop_if_error "Failed with delay and packetloss."
-
- do_ping_long "$ns3" 100.64.0.1
- do_ping_long "$ns3" 100.64.0.2
- stop_if_error "Failed with delay and packetloss."
-
- echo "INFO: All good."
+ do_ping_long "$ns1" "100.64.$netid.2"
+ do_ping_long "$ns1" "dead:beef:$netid::2"
+ do_ping_long "$ns1" "100.64.$netid.3"
+ do_ping_long "$ns1" "dead:beef:$netid::3"
+ stop_if_error "Longer ping test failed (ns1)."
+
+ do_ping_long "$ns2" "100.64.$netid.1"
+ do_ping_long "$ns2" "dead:beef:$netid::1"
+ do_ping_long "$ns2" "100.64.$netid.3"
+ do_ping_long "$ns2" "dead:beef:$netid::3"
+ stop_if_error "Longer ping test failed (ns2)."
+
+ do_ping_long "$ns3" "100.64.$netid.1"
+ do_ping_long "$ns3" "dead:beef:$netid::1"
+ do_ping_long "$ns3" "100.64.$netid.2"
+ do_ping_long "$ns3" "dead:beef:$netid::2"
+ stop_if_error "Longer ping test failed (ns3)."
}
setup_hsr_interfaces()
{
local HSRv="$1"
- echo "INFO: preparing interfaces for HSRv${HSRv}."
+ echo "INFO: Preparing interfaces for HSRv${HSRv}."
# Three HSR nodes. Each node has one link to each of its neighbour, two links in total.
#
# ns1eth1 ----- ns2eth1
@@ -140,17 +111,20 @@ setup_hsr_interfaces()
ip link add ns3eth2 netns "$ns3" type veth peer name ns2eth2 netns "$ns2"
# HSRv0/1
- ip -net "$ns1" link add name hsr1 type hsr slave1 ns1eth1 slave2 ns1eth2 supervision 45 version $HSRv proto 0
- ip -net "$ns2" link add name hsr2 type hsr slave1 ns2eth1 slave2 ns2eth2 supervision 45 version $HSRv proto 0
- ip -net "$ns3" link add name hsr3 type hsr slave1 ns3eth1 slave2 ns3eth2 supervision 45 version $HSRv proto 0
+ ip -net "$ns1" link add name hsr1 type hsr slave1 ns1eth1 \
+ slave2 ns1eth2 supervision 45 version "$HSRv" proto 0
+ ip -net "$ns2" link add name hsr2 type hsr slave1 ns2eth1 \
+ slave2 ns2eth2 supervision 45 version "$HSRv" proto 0
+ ip -net "$ns3" link add name hsr3 type hsr slave1 ns3eth1 \
+ slave2 ns3eth2 supervision 45 version "$HSRv" proto 0
# IP for HSR
ip -net "$ns1" addr add 100.64.0.1/24 dev hsr1
- ip -net "$ns1" addr add dead:beef:1::1/64 dev hsr1 nodad
+ ip -net "$ns1" addr add dead:beef:0::1/64 dev hsr1 nodad
ip -net "$ns2" addr add 100.64.0.2/24 dev hsr2
- ip -net "$ns2" addr add dead:beef:1::2/64 dev hsr2 nodad
+ ip -net "$ns2" addr add dead:beef:0::2/64 dev hsr2 nodad
ip -net "$ns3" addr add 100.64.0.3/24 dev hsr3
- ip -net "$ns3" addr add dead:beef:1::3/64 dev hsr3 nodad
+ ip -net "$ns3" addr add dead:beef:0::3/64 dev hsr3 nodad
ip -net "$ns1" link set address 00:11:22:00:01:01 dev ns1eth1
ip -net "$ns1" link set address 00:11:22:00:01:02 dev ns1eth2
@@ -177,113 +151,56 @@ setup_hsr_interfaces()
setup_vlan_interfaces() {
ip -net "$ns1" link add link hsr1 name hsr1.2 type vlan id 2
- ip -net "$ns1" link add link hsr1 name hsr1.3 type vlan id 3
- ip -net "$ns1" link add link hsr1 name hsr1.4 type vlan id 4
- ip -net "$ns1" link add link hsr1 name hsr1.5 type vlan id 5
-
ip -net "$ns2" link add link hsr2 name hsr2.2 type vlan id 2
- ip -net "$ns2" link add link hsr2 name hsr2.3 type vlan id 3
- ip -net "$ns2" link add link hsr2 name hsr2.4 type vlan id 4
- ip -net "$ns2" link add link hsr2 name hsr2.5 type vlan id 5
-
ip -net "$ns3" link add link hsr3 name hsr3.2 type vlan id 2
- ip -net "$ns3" link add link hsr3 name hsr3.3 type vlan id 3
- ip -net "$ns3" link add link hsr3 name hsr3.4 type vlan id 4
- ip -net "$ns3" link add link hsr3 name hsr3.5 type vlan id 5
ip -net "$ns1" addr add 100.64.2.1/24 dev hsr1.2
- ip -net "$ns1" addr add 100.64.3.1/24 dev hsr1.3
- ip -net "$ns1" addr add 100.64.4.1/24 dev hsr1.4
- ip -net "$ns1" addr add 100.64.5.1/24 dev hsr1.5
+ ip -net "$ns1" addr add dead:beef:2::1/64 dev hsr1.2 nodad
ip -net "$ns2" addr add 100.64.2.2/24 dev hsr2.2
- ip -net "$ns2" addr add 100.64.3.2/24 dev hsr2.3
- ip -net "$ns2" addr add 100.64.4.2/24 dev hsr2.4
- ip -net "$ns2" addr add 100.64.5.2/24 dev hsr2.5
+ ip -net "$ns2" addr add dead:beef:2::2/64 dev hsr2.2 nodad
ip -net "$ns3" addr add 100.64.2.3/24 dev hsr3.2
- ip -net "$ns3" addr add 100.64.3.3/24 dev hsr3.3
- ip -net "$ns3" addr add 100.64.4.3/24 dev hsr3.4
- ip -net "$ns3" addr add 100.64.5.3/24 dev hsr3.5
+ ip -net "$ns3" addr add dead:beef:2::3/64 dev hsr3.2 nodad
ip -net "$ns1" link set dev hsr1.2 up
- ip -net "$ns1" link set dev hsr1.3 up
- ip -net "$ns1" link set dev hsr1.4 up
- ip -net "$ns1" link set dev hsr1.5 up
-
ip -net "$ns2" link set dev hsr2.2 up
- ip -net "$ns2" link set dev hsr2.3 up
- ip -net "$ns2" link set dev hsr2.4 up
- ip -net "$ns2" link set dev hsr2.5 up
-
ip -net "$ns3" link set dev hsr3.2 up
- ip -net "$ns3" link set dev hsr3.3 up
- ip -net "$ns3" link set dev hsr3.4 up
- ip -net "$ns3" link set dev hsr3.5 up
}
-hsr_vlan_ping() {
- do_ping "$ns1" 100.64.2.2
- do_ping "$ns1" 100.64.3.2
- do_ping "$ns1" 100.64.4.2
- do_ping "$ns1" 100.64.5.2
-
- do_ping "$ns1" 100.64.2.3
- do_ping "$ns1" 100.64.3.3
- do_ping "$ns1" 100.64.4.3
- do_ping "$ns1" 100.64.5.3
-
- do_ping "$ns2" 100.64.2.1
- do_ping "$ns2" 100.64.3.1
- do_ping "$ns2" 100.64.4.1
- do_ping "$ns2" 100.64.5.1
-
- do_ping "$ns2" 100.64.2.3
- do_ping "$ns2" 100.64.3.3
- do_ping "$ns2" 100.64.4.3
- do_ping "$ns2" 100.64.5.3
-
- do_ping "$ns3" 100.64.2.1
- do_ping "$ns3" 100.64.3.1
- do_ping "$ns3" 100.64.4.1
- do_ping "$ns3" 100.64.5.1
-
- do_ping "$ns3" 100.64.2.2
- do_ping "$ns3" 100.64.3.2
- do_ping "$ns3" 100.64.4.2
- do_ping "$ns3" 100.64.5.2
+run_ping_tests()
+{
+ echo "INFO: Running ping tests."
+ do_ping_tests 0
}
-run_vlan_tests() {
+run_vlan_tests()
+{
vlan_challenged_hsr1=$(ip net exec "$ns1" ethtool -k hsr1 | grep "vlan-challenged" | awk '{print $2}')
vlan_challenged_hsr2=$(ip net exec "$ns2" ethtool -k hsr2 | grep "vlan-challenged" | awk '{print $2}')
vlan_challenged_hsr3=$(ip net exec "$ns3" ethtool -k hsr3 | grep "vlan-challenged" | awk '{print $2}')
if [[ "$vlan_challenged_hsr1" = "off" || "$vlan_challenged_hsr2" = "off" || "$vlan_challenged_hsr3" = "off" ]]; then
- echo "INFO: Running VLAN tests"
+ echo "INFO: Running VLAN ping tests"
setup_vlan_interfaces
- hsr_vlan_ping
+ do_ping_tests 2
else
echo "INFO: Not Running VLAN tests as the device does not support VLAN"
fi
}
check_prerequisites
-setup_ns ns1 ns2 ns3
-
trap cleanup_all_ns EXIT
+setup_ns ns1 ns2 ns3
setup_hsr_interfaces 0
-do_complete_ping_test
-
+run_ping_tests
run_vlan_tests
setup_ns ns1 ns2 ns3
-
setup_hsr_interfaces 1
-do_complete_ping_test
-
+run_ping_tests
run_vlan_tests
exit $ret
diff --git a/tools/testing/selftests/net/hsr/link_faults.sh b/tools/testing/selftests/net/hsr/link_faults.sh
new file mode 100755
index 000000000000..be526281571c
--- /dev/null
+++ b/tools/testing/selftests/net/hsr/link_faults.sh
@@ -0,0 +1,378 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# shellcheck disable=SC2329
+
+source ../lib.sh
+
+ALL_TESTS="
+ test_clean_hsrv0
+ test_cut_link_hsrv0
+ test_packet_loss_hsrv0
+ test_high_packet_loss_hsrv0
+ test_reordering_hsrv0
+
+ test_clean_hsrv1
+ test_cut_link_hsrv1
+ test_packet_loss_hsrv1
+ test_high_packet_loss_hsrv1
+ test_reordering_hsrv1
+
+ test_clean_prp
+ test_cut_link_prp
+ test_packet_loss_prp
+ test_high_packet_loss_prp
+ test_reordering_prp
+"
+
+# The tests are running ping for 5sec with a relatively short interval in
+# different scenarios with faulty links (cut links, packet loss, delay,
+# reordering) that should be recoverable by HSR/PRP. The ping interval (10ms)
+# is short enough that the base delay (50ms) leads to a queue in the netem
+# qdiscs which is needed for reordering.
+
+setup_hsr_topo()
+{
+ # Three HSR nodes in a ring, every node has a LAN A interface connected
+ # to the LAN B interface of the next node.
+ #
+ # node1 node2
+ #
+ # vethA -------- vethB
+ # hsr1 hsr2
+ # vethB vethA
+ # \ /
+ # vethA vethB
+ # hsr3
+ #
+ # node3
+
+ local ver="$1"
+
+ setup_ns node1 node2 node3
+
+ # veth links
+ # shellcheck disable=SC2154 # variables assigned by setup_ns
+ ip link add vethA netns "$node1" type veth peer name vethB netns "$node2"
+ # shellcheck disable=SC2154 # variables assigned by setup_ns
+ ip link add vethA netns "$node2" type veth peer name vethB netns "$node3"
+ ip link add vethA netns "$node3" type veth peer name vethB netns "$node1"
+
+ # MAC addresses (not needed for HSR operation, but helps with debugging)
+ ip -net "$node1" link set address 00:11:22:00:01:01 dev vethA
+ ip -net "$node1" link set address 00:11:22:00:01:02 dev vethB
+
+ ip -net "$node2" link set address 00:11:22:00:02:01 dev vethA
+ ip -net "$node2" link set address 00:11:22:00:02:02 dev vethB
+
+ ip -net "$node3" link set address 00:11:22:00:03:01 dev vethA
+ ip -net "$node3" link set address 00:11:22:00:03:02 dev vethB
+
+ # HSR interfaces
+ ip -net "$node1" link add name hsr1 type hsr proto 0 version "$ver" \
+ slave1 vethA slave2 vethB supervision 45
+ ip -net "$node2" link add name hsr2 type hsr proto 0 version "$ver" \
+ slave1 vethA slave2 vethB supervision 45
+ ip -net "$node3" link add name hsr3 type hsr proto 0 version "$ver" \
+ slave1 vethA slave2 vethB supervision 45
+
+ # IP addresses
+ ip -net "$node1" addr add 100.64.0.1/24 dev hsr1
+ ip -net "$node2" addr add 100.64.0.2/24 dev hsr2
+ ip -net "$node3" addr add 100.64.0.3/24 dev hsr3
+
+ # Set all links up
+ ip -net "$node1" link set vethA up
+ ip -net "$node1" link set vethB up
+ ip -net "$node1" link set hsr1 up
+
+ ip -net "$node2" link set vethA up
+ ip -net "$node2" link set vethB up
+ ip -net "$node2" link set hsr2 up
+
+ ip -net "$node3" link set vethA up
+ ip -net "$node3" link set vethB up
+ ip -net "$node3" link set hsr3 up
+}
+
+setup_prp_topo()
+{
+ # Two PRP nodes, connected by two links (treated as LAN A and LAN B).
+ #
+ # vethA ----- vethA
+ # prp1 prp2
+ # vethB ----- vethB
+ #
+ # node1 node2
+
+ setup_ns node1 node2
+
+ # veth links
+ ip link add vethA netns "$node1" type veth peer name vethA netns "$node2"
+ ip link add vethB netns "$node1" type veth peer name vethB netns "$node2"
+
+ # MAC addresses will be copied from LAN A interface
+ ip -net "$node1" link set address 00:11:22:00:00:01 dev vethA
+ ip -net "$node2" link set address 00:11:22:00:00:02 dev vethA
+
+ # PRP interfaces
+ ip -net "$node1" link add name prp1 type hsr \
+ slave1 vethA slave2 vethB supervision 45 proto 1
+ ip -net "$node2" link add name prp2 type hsr \
+ slave1 vethA slave2 vethB supervision 45 proto 1
+
+ # IP addresses
+ ip -net "$node1" addr add 100.64.0.1/24 dev prp1
+ ip -net "$node2" addr add 100.64.0.2/24 dev prp2
+
+ # All links up
+ ip -net "$node1" link set vethA up
+ ip -net "$node1" link set vethB up
+ ip -net "$node1" link set prp1 up
+
+ ip -net "$node2" link set vethA up
+ ip -net "$node2" link set vethB up
+ ip -net "$node2" link set prp2 up
+}
+
+wait_for_hsr_node_table()
+{
+ log_info "Wait for node table entries to be merged."
+ WAIT=5
+ while [ "${WAIT}" -gt 0 ]; do
+ nts=$(cat /sys/kernel/debug/hsr/hsr*/node_table)
+
+ # We need entries in the node tables, and they need to be merged
+ if (echo "$nts" | grep -qE "^([0-9a-f]{2}:){5}") && \
+ ! (echo "$nts" | grep -q "00:00:00:00:00:00"); then
+ return
+ fi
+
+ sleep 1
+ ((WAIT--))
+ done
+ check_err 1 "Failed to wait for merged node table entries"
+}
+
+setup_topo()
+{
+ local proto="$1"
+
+ if [ "$proto" = "HSRv0" ]; then
+ setup_hsr_topo 0
+ wait_for_hsr_node_table
+ elif [ "$proto" = "HSRv1" ]; then
+ setup_hsr_topo 1
+ wait_for_hsr_node_table
+ elif [ "$proto" = "PRP" ]; then
+ setup_prp_topo
+ else
+ check_err 1 "Unknown protocol (${proto})"
+ fi
+}
+
+check_ping()
+{
+ local node="$1"
+ local dst="$2"
+ local accepted_dups="$3"
+ local ping_args="-q -i 0.01 -c 400"
+
+ log_info "Running ping $node -> $dst"
+ # shellcheck disable=SC2086
+ output=$(ip netns exec "$node" ping $ping_args "$dst" | \
+ grep "packets transmitted")
+ log_info "$output"
+
+ dups=0
+ loss=0
+
+ if [[ "$output" =~ \+([0-9]+)" duplicates" ]]; then
+ dups="${BASH_REMATCH[1]}"
+ fi
+ if [[ "$output" =~ ([0-9\.]+\%)" packet loss" ]]; then
+ loss="${BASH_REMATCH[1]}"
+ fi
+
+ if [ "$dups" -gt "$accepted_dups" ]; then
+ check_err 1 "Unexpected duplicate packets (${dups})"
+ fi
+ if [ "$loss" != "0%" ]; then
+ check_err 1 "Unexpected packet loss (${loss})"
+ fi
+}
+
+test_clean()
+{
+ local proto="$1"
+
+ RET=0
+ tname="${FUNCNAME[0]} - ${proto}"
+
+ setup_topo "$proto"
+ if ((RET != ksft_pass)); then
+ log_test "${tname} setup"
+ return
+ fi
+
+ check_ping "$node1" "100.64.0.2" 0
+
+ log_test "${tname}"
+}
+
+test_clean_hsrv0()
+{
+ test_clean "HSRv0"
+}
+
+test_clean_hsrv1()
+{
+ test_clean "HSRv1"
+}
+
+test_clean_prp()
+{
+ test_clean "PRP"
+}
+
+test_cut_link()
+{
+ local proto="$1"
+
+ RET=0
+ tname="${FUNCNAME[0]} - ${proto}"
+
+ setup_topo "$proto"
+ if ((RET != ksft_pass)); then
+ log_test "${tname} setup"
+ return
+ fi
+
+ # Cutting link from subshell, so check_ping can run in the normal shell
+ # with access to global variables from the test harness.
+ (
+ sleep 2
+ log_info "Cutting link"
+ ip -net "$node1" link set vethB down
+ ) &
+ check_ping "$node1" "100.64.0.2" 0
+
+ wait
+ log_test "${tname}"
+}
+
+
+test_cut_link_hsrv0()
+{
+ test_cut_link "HSRv0"
+}
+
+test_cut_link_hsrv1()
+{
+ test_cut_link "HSRv1"
+}
+
+test_cut_link_prp()
+{
+ test_cut_link "PRP"
+}
+
+test_packet_loss()
+{
+ local proto="$1"
+ local loss="$2"
+
+ RET=0
+ tname="${FUNCNAME[0]} - ${proto}, ${loss}"
+
+ setup_topo "$proto"
+ if ((RET != ksft_pass)); then
+ log_test "${tname} setup"
+ return
+ fi
+
+ # Packet loss with lower delay makes sure the packets on the lossy link
+ # arrive first.
+ tc -net "$node1" qdisc add dev vethA root netem delay 50ms
+ tc -net "$node1" qdisc add dev vethB root netem delay 20ms loss "$loss"
+
+ check_ping "$node1" "100.64.0.2" 40
+
+ log_test "${tname}"
+}
+
+test_packet_loss_hsrv0()
+{
+ test_packet_loss "HSRv0" "20%"
+}
+
+test_packet_loss_hsrv1()
+{
+ test_packet_loss "HSRv1" "20%"
+}
+
+test_packet_loss_prp()
+{
+ test_packet_loss "PRP" "20%"
+}
+
+test_high_packet_loss_hsrv0()
+{
+ test_packet_loss "HSRv0" "80%"
+}
+
+test_high_packet_loss_hsrv1()
+{
+ test_packet_loss "HSRv1" "80%"
+}
+
+test_high_packet_loss_prp()
+{
+ test_packet_loss "PRP" "80%"
+}
+
+test_reordering()
+{
+ local proto="$1"
+
+ RET=0
+ tname="${FUNCNAME[0]} - ${proto}"
+
+ setup_topo "$proto"
+ if ((RET != ksft_pass)); then
+ log_test "${tname} setup"
+ return
+ fi
+
+ tc -net "$node1" qdisc add dev vethA root netem delay 50ms
+ tc -net "$node1" qdisc add dev vethB root netem delay 50ms reorder 20%
+
+ check_ping "$node1" "100.64.0.2" 40
+
+ log_test "${tname}"
+}
+
+test_reordering_hsrv0()
+{
+ test_reordering "HSRv0"
+}
+
+test_reordering_hsrv1()
+{
+ test_reordering "HSRv1"
+}
+
+test_reordering_prp()
+{
+ test_reordering "PRP"
+}
+
+cleanup()
+{
+ cleanup_all_ns
+}
+
+trap cleanup EXIT
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/hsr/prp_ping.sh b/tools/testing/selftests/net/hsr/prp_ping.sh
new file mode 100755
index 000000000000..fd2ba9f05d4c
--- /dev/null
+++ b/tools/testing/selftests/net/hsr/prp_ping.sh
@@ -0,0 +1,147 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+ipv6=true
+
+source ./hsr_common.sh
+
+optstring="h4"
+usage() {
+ echo "Usage: $0 [OPTION]"
+ echo -e "\t-4: IPv4 only: disable IPv6 tests (default: test both IPv4 and IPv6)"
+}
+
+while getopts "$optstring" option;do
+ case "$option" in
+ "h")
+ usage "$0"
+ exit 0
+ ;;
+ "4")
+ ipv6=false
+ ;;
+ "?")
+ usage "$0"
+ exit 1
+ ;;
+esac
+done
+
+setup_prp_interfaces()
+{
+ echo "INFO: Preparing interfaces for PRP"
+# Two PRP nodes, connected by two links (treated as LAN A and LAN B).
+#
+# vethA ----- vethA
+# prp1 prp2
+# vethB ----- vethB
+#
+# node1 node2
+
+ # Interfaces
+ # shellcheck disable=SC2154 # variables assigned by setup_ns
+ ip link add vethA netns "$node1" type veth peer name vethA netns "$node2"
+ ip link add vethB netns "$node1" type veth peer name vethB netns "$node2"
+
+ # MAC addresses will be copied from LAN A interface
+ ip -net "$node1" link set address 00:11:22:00:00:01 dev vethA
+ ip -net "$node2" link set address 00:11:22:00:00:02 dev vethA
+
+ # PRP
+ ip -net "$node1" link add name prp1 type hsr \
+ slave1 vethA slave2 vethB supervision 45 proto 1
+ ip -net "$node2" link add name prp2 type hsr \
+ slave1 vethA slave2 vethB supervision 45 proto 1
+
+ # IP addresses
+ ip -net "$node1" addr add 100.64.0.1/24 dev prp1
+ ip -net "$node1" addr add dead:beef:0::1/64 dev prp1 nodad
+ ip -net "$node2" addr add 100.64.0.2/24 dev prp2
+ ip -net "$node2" addr add dead:beef:0::2/64 dev prp2 nodad
+
+ # All links up
+ ip -net "$node1" link set vethA up
+ ip -net "$node1" link set vethB up
+ ip -net "$node1" link set prp1 up
+
+ ip -net "$node2" link set vethA up
+ ip -net "$node2" link set vethB up
+ ip -net "$node2" link set prp2 up
+}
+
+setup_vlan_interfaces()
+{
+ # Interfaces
+ ip -net "$node1" link add link prp1 name prp1.2 type vlan id 2
+ ip -net "$node2" link add link prp2 name prp2.2 type vlan id 2
+
+ # IP addresses
+ ip -net "$node1" addr add 100.64.2.1/24 dev prp1.2
+ ip -net "$node1" addr add dead:beef:2::1/64 dev prp1.2 nodad
+
+ ip -net "$node2" addr add 100.64.2.2/24 dev prp2.2
+ ip -net "$node2" addr add dead:beef:2::2/64 dev prp2.2 nodad
+
+ # All links up
+ ip -net "$node1" link set prp1.2 up
+ ip -net "$node2" link set prp2.2 up
+}
+
+do_ping_tests()
+{
+ local netid="$1"
+
+ echo "INFO: Initial validation ping"
+
+ do_ping "$node1" "100.64.$netid.2"
+ do_ping "$node2" "100.64.$netid.1"
+ stop_if_error "Initial validation failed on IPv4"
+
+ do_ping "$node1" "dead:beef:$netid::2"
+ do_ping "$node2" "dead:beef:$netid::1"
+ stop_if_error "Initial validation failed on IPv6"
+
+ echo "INFO: Longer ping test."
+
+ do_ping_long "$node1" "100.64.$netid.2"
+ do_ping_long "$node2" "100.64.$netid.1"
+ stop_if_error "Longer ping test failed on IPv4."
+
+ do_ping_long "$node1" "dead:beef:$netid::2"
+ do_ping_long "$node2" "dead:beef:$netid::1"
+ stop_if_error "Longer ping test failed on IPv6."
+}
+
+run_ping_tests()
+{
+ echo "INFO: Running ping tests"
+ do_ping_tests 0
+}
+
+run_vlan_ping_tests()
+{
+ vlan_challenged_prp1=$(ip net exec "$node1" ethtool -k prp1 | \
+ grep "vlan-challenged" | awk '{print $2}')
+ vlan_challenged_prp2=$(ip net exec "$node2" ethtool -k prp2 | \
+ grep "vlan-challenged" | awk '{print $2}')
+
+ if [[ "$vlan_challenged_prp1" = "off" || \
+ "$vlan_challenged_prp2" = "off" ]]; then
+ echo "INFO: Running VLAN ping tests"
+ setup_vlan_interfaces
+ do_ping_tests 2
+ else
+ echo "INFO: Not Running VLAN tests as the device does not support VLAN"
+ fi
+}
+
+check_prerequisites
+trap cleanup_all_ns EXIT
+
+setup_ns node1 node2
+setup_prp_interfaces
+
+run_ping_tests
+run_vlan_ping_tests
+
+exit $ret
diff --git a/tools/testing/selftests/net/hsr/settings b/tools/testing/selftests/net/hsr/settings
index 0fbc037f2aa8..a953c96aa16e 100644
--- a/tools/testing/selftests/net/hsr/settings
+++ b/tools/testing/selftests/net/hsr/settings
@@ -1 +1 @@
-timeout=50
+timeout=180
diff --git a/tools/testing/selftests/net/icmp_rfc4884.c b/tools/testing/selftests/net/icmp_rfc4884.c
new file mode 100644
index 000000000000..cd826b913557
--- /dev/null
+++ b/tools/testing/selftests/net/icmp_rfc4884.c
@@ -0,0 +1,679 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <arpa/inet.h>
+#include <error.h>
+#include <linux/errqueue.h>
+#include <linux/icmp.h>
+#include <linux/icmpv6.h>
+#include <linux/in6.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <netinet/in.h>
+#include <netinet/udp.h>
+#include <poll.h>
+#include <sched.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/ioctl.h>
+#include <sys/socket.h>
+
+#include "../kselftest_harness.h"
+
+static const unsigned short src_port = 44444;
+static const unsigned short dst_port = 55555;
+static const int min_orig_dgram_len = 128;
+static const int min_payload_len_v4 =
+ min_orig_dgram_len - sizeof(struct iphdr) - sizeof(struct udphdr);
+static const int min_payload_len_v6 =
+ min_orig_dgram_len - sizeof(struct ipv6hdr) - sizeof(struct udphdr);
+static const uint8_t orig_payload_byte = 0xAA;
+
+struct sockaddr_inet {
+ union {
+ struct sockaddr_in6 v6;
+ struct sockaddr_in v4;
+ struct sockaddr sa;
+ };
+ socklen_t len;
+};
+
+struct ip_case_info {
+ int domain;
+ int level;
+ int opt1;
+ int opt2;
+ int proto;
+ int (*build_func)(uint8_t *buf, ssize_t buflen, bool with_ext,
+ int payload_len, bool bad_csum, bool bad_len,
+ bool smaller_len);
+ int min_payload;
+};
+
+static int bringup_loopback(void)
+{
+ struct ifreq ifr = {
+ .ifr_name = "lo"
+ };
+ int fd;
+
+ fd = socket(AF_INET, SOCK_DGRAM, 0);
+ if (fd < 0)
+ return -1;
+
+ if (ioctl(fd, SIOCGIFFLAGS, &ifr) < 0)
+ goto err;
+
+ ifr.ifr_flags = ifr.ifr_flags | IFF_UP;
+
+ if (ioctl(fd, SIOCSIFFLAGS, &ifr) < 0)
+ goto err;
+
+ close(fd);
+ return 0;
+
+err:
+ close(fd);
+ return -1;
+}
+
+static uint16_t csum(const void *buf, size_t len)
+{
+ const uint8_t *data = buf;
+ uint32_t sum = 0;
+
+ while (len > 1) {
+ sum += (data[0] << 8) | data[1];
+ data += 2;
+ len -= 2;
+ }
+
+ if (len == 1)
+ sum += data[0] << 8;
+
+ while (sum >> 16)
+ sum = (sum & 0xFFFF) + (sum >> 16);
+
+ return ~sum & 0xFFFF;
+}
+
+static int poll_err(int fd)
+{
+ struct pollfd pfd;
+
+ memset(&pfd, 0, sizeof(pfd));
+ pfd.fd = fd;
+
+ if (poll(&pfd, 1, 5000) != 1 || pfd.revents != POLLERR)
+ return -1;
+
+ return 0;
+}
+
+static void set_addr(struct sockaddr_inet *addr, int domain,
+ unsigned short port)
+{
+ memset(addr, 0, sizeof(*addr));
+
+ switch (domain) {
+ case AF_INET:
+ addr->v4.sin_family = AF_INET;
+ addr->v4.sin_port = htons(port);
+ addr->v4.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
+ addr->len = sizeof(addr->v4);
+ break;
+ case AF_INET6:
+ addr->v6.sin6_family = AF_INET6;
+ addr->v6.sin6_port = htons(port);
+ addr->v6.sin6_addr = in6addr_loopback;
+ addr->len = sizeof(addr->v6);
+ break;
+ }
+}
+
+static int bind_and_setsockopt(int fd, const struct ip_case_info *info)
+{
+ struct sockaddr_inet addr;
+ int opt = 1;
+
+ set_addr(&addr, info->domain, src_port);
+
+ if (setsockopt(fd, info->level, info->opt1, &opt, sizeof(opt)) < 0)
+ return -1;
+
+ if (setsockopt(fd, info->level, info->opt2, &opt, sizeof(opt)) < 0)
+ return -1;
+
+ return bind(fd, &addr.sa, addr.len);
+}
+
+static int build_rfc4884_ext(uint8_t *buf, size_t buflen, bool bad_csum,
+ bool bad_len, bool smaller_len)
+{
+ struct icmp_extobj_hdr *objh;
+ struct icmp_ext_hdr *exthdr;
+ size_t obj_len, ext_len;
+ uint16_t sum;
+
+ /* Use an object payload of 4 bytes */
+ obj_len = sizeof(*objh) + sizeof(uint32_t);
+ ext_len = sizeof(*exthdr) + obj_len;
+
+ if (ext_len > buflen)
+ return -EINVAL;
+
+ exthdr = (struct icmp_ext_hdr *)buf;
+ objh = (struct icmp_extobj_hdr *)(buf + sizeof(*exthdr));
+
+ exthdr->version = 2;
+ /* When encoding a bad object length, either encode a length too small
+ * to fit the object header or too big to fit in the packet.
+ */
+ if (bad_len)
+ obj_len = smaller_len ? sizeof(*objh) - 1 : obj_len * 2;
+ objh->length = htons(obj_len);
+
+ sum = csum(buf, ext_len);
+ exthdr->checksum = htons(bad_csum ? sum - 1 : sum);
+
+ return ext_len;
+}
+
+static int build_orig_dgram_v4(uint8_t *buf, ssize_t buflen, int payload_len)
+{
+ struct udphdr *udph;
+ struct iphdr *iph;
+ size_t len = 0;
+
+ len = sizeof(*iph) + sizeof(*udph) + payload_len;
+ if (len > buflen)
+ return -EINVAL;
+
+ iph = (struct iphdr *)buf;
+ udph = (struct udphdr *)(buf + sizeof(*iph));
+
+ iph->version = 4;
+ iph->ihl = 5;
+ iph->protocol = IPPROTO_UDP;
+ iph->saddr = htonl(INADDR_LOOPBACK);
+ iph->daddr = htonl(INADDR_LOOPBACK);
+ iph->tot_len = htons(len);
+ iph->check = htons(csum(iph, sizeof(*iph)));
+
+ udph->source = htons(src_port);
+ udph->dest = htons(dst_port);
+ udph->len = htons(sizeof(*udph) + payload_len);
+
+ memset(buf + sizeof(*iph) + sizeof(*udph), orig_payload_byte,
+ payload_len);
+
+ return len;
+}
+
+static int build_orig_dgram_v6(uint8_t *buf, ssize_t buflen, int payload_len)
+{
+ struct udphdr *udph;
+ struct ipv6hdr *iph;
+ size_t len = 0;
+
+ len = sizeof(*iph) + sizeof(*udph) + payload_len;
+ if (len > buflen)
+ return -EINVAL;
+
+ iph = (struct ipv6hdr *)buf;
+ udph = (struct udphdr *)(buf + sizeof(*iph));
+
+ iph->version = 6;
+ iph->payload_len = htons(sizeof(*udph) + payload_len);
+ iph->nexthdr = IPPROTO_UDP;
+ iph->saddr = in6addr_loopback;
+ iph->daddr = in6addr_loopback;
+
+ udph->source = htons(src_port);
+ udph->dest = htons(dst_port);
+ udph->len = htons(sizeof(*udph) + payload_len);
+
+ memset(buf + sizeof(*iph) + sizeof(*udph), orig_payload_byte,
+ payload_len);
+
+ return len;
+}
+
+static int build_icmpv4_pkt(uint8_t *buf, ssize_t buflen, bool with_ext,
+ int payload_len, bool bad_csum, bool bad_len,
+ bool smaller_len)
+{
+ struct icmphdr *icmph;
+ int len, ret;
+
+ len = sizeof(*icmph);
+ memset(buf, 0, buflen);
+
+ icmph = (struct icmphdr *)buf;
+ icmph->type = ICMP_DEST_UNREACH;
+ icmph->code = ICMP_PORT_UNREACH;
+ icmph->checksum = 0;
+
+ ret = build_orig_dgram_v4(buf + len, buflen - len, payload_len);
+ if (ret < 0)
+ return ret;
+
+ len += ret;
+
+ icmph->un.reserved[1] = (len - sizeof(*icmph)) / sizeof(uint32_t);
+
+ if (with_ext) {
+ ret = build_rfc4884_ext(buf + len, buflen - len,
+ bad_csum, bad_len, smaller_len);
+ if (ret < 0)
+ return ret;
+
+ len += ret;
+ }
+
+ icmph->checksum = htons(csum(icmph, len));
+ return len;
+}
+
+static int build_icmpv6_pkt(uint8_t *buf, ssize_t buflen, bool with_ext,
+ int payload_len, bool bad_csum, bool bad_len,
+ bool smaller_len)
+{
+ struct icmp6hdr *icmph;
+ int len, ret;
+
+ len = sizeof(*icmph);
+ memset(buf, 0, buflen);
+
+ icmph = (struct icmp6hdr *)buf;
+ icmph->icmp6_type = ICMPV6_DEST_UNREACH;
+ icmph->icmp6_code = ICMPV6_PORT_UNREACH;
+ icmph->icmp6_cksum = 0;
+
+ ret = build_orig_dgram_v6(buf + len, buflen - len, payload_len);
+ if (ret < 0)
+ return ret;
+
+ len += ret;
+
+ icmph->icmp6_datagram_len = (len - sizeof(*icmph)) / sizeof(uint64_t);
+
+ if (with_ext) {
+ ret = build_rfc4884_ext(buf + len, buflen - len,
+ bad_csum, bad_len, smaller_len);
+ if (ret < 0)
+ return ret;
+
+ len += ret;
+ }
+
+ icmph->icmp6_cksum = htons(csum(icmph, len));
+ return len;
+}
+
+FIXTURE(rfc4884) {};
+
+FIXTURE_SETUP(rfc4884)
+{
+ int ret;
+
+ ret = unshare(CLONE_NEWNET);
+ ASSERT_EQ(ret, 0) {
+ TH_LOG("unshare(CLONE_NEWNET) failed: %s", strerror(errno));
+ }
+
+ ret = bringup_loopback();
+ ASSERT_EQ(ret, 0) TH_LOG("Failed to bring up loopback interface");
+}
+
+FIXTURE_TEARDOWN(rfc4884)
+{
+}
+
+const struct ip_case_info ipv4_info = {
+ .domain = AF_INET,
+ .level = SOL_IP,
+ .opt1 = IP_RECVERR,
+ .opt2 = IP_RECVERR_RFC4884,
+ .proto = IPPROTO_ICMP,
+ .build_func = build_icmpv4_pkt,
+ .min_payload = min_payload_len_v4,
+};
+
+const struct ip_case_info ipv6_info = {
+ .domain = AF_INET6,
+ .level = SOL_IPV6,
+ .opt1 = IPV6_RECVERR,
+ .opt2 = IPV6_RECVERR_RFC4884,
+ .proto = IPPROTO_ICMPV6,
+ .build_func = build_icmpv6_pkt,
+ .min_payload = min_payload_len_v6,
+};
+
+FIXTURE_VARIANT(rfc4884) {
+ /* IPv4/v6 related information */
+ struct ip_case_info info;
+ /* Whether to append an ICMP extension or not */
+ bool with_ext;
+ /* UDP payload length */
+ int payload_len;
+ /* Whether to generate a bad checksum in the ICMP extension structure */
+ bool bad_csum;
+ /* Whether to generate a bad length in the ICMP object header */
+ bool bad_len;
+ /* Whether it is too small to fit the object header or too big to fit
+ * in the packet
+ */
+ bool smaller_len;
+};
+
+/* Tests that a valid ICMPv4 error message with extension and the original
+ * datagram is smaller than 128 bytes, generates an error with zero offset,
+ * and does not raise the SO_EE_RFC4884_FLAG_INVALID flag.
+ */
+FIXTURE_VARIANT_ADD(rfc4884, ipv4_ext_small_payload) {
+ .info = ipv4_info,
+ .with_ext = true,
+ .payload_len = 64,
+ .bad_csum = false,
+ .bad_len = false,
+};
+
+/* Tests that a valid ICMPv4 error message with extension and 128 bytes original
+ * datagram, generates an error with the expected offset, and does not raise the
+ * SO_EE_RFC4884_FLAG_INVALID flag.
+ */
+FIXTURE_VARIANT_ADD(rfc4884, ipv4_ext) {
+ .info = ipv4_info,
+ .with_ext = true,
+ .payload_len = min_payload_len_v4,
+ .bad_csum = false,
+ .bad_len = false,
+};
+
+/* Tests that a valid ICMPv4 error message with extension and the original
+ * datagram is larger than 128 bytes, generates an error with the expected
+ * offset, and does not raise the SO_EE_RFC4884_FLAG_INVALID flag.
+ */
+FIXTURE_VARIANT_ADD(rfc4884, ipv4_ext_large_payload) {
+ .info = ipv4_info,
+ .with_ext = true,
+ .payload_len = 256,
+ .bad_csum = false,
+ .bad_len = false,
+};
+
+/* Tests that a valid ICMPv4 error message without extension and the original
+ * datagram is smaller than 128 bytes, generates an error with zero offset,
+ * and does not raise the SO_EE_RFC4884_FLAG_INVALID flag.
+ */
+FIXTURE_VARIANT_ADD(rfc4884, ipv4_no_ext_small_payload) {
+ .info = ipv4_info,
+ .with_ext = false,
+ .payload_len = 64,
+ .bad_csum = false,
+ .bad_len = false,
+};
+
+/* Tests that a valid ICMPv4 error message without extension and 128 bytes
+ * original datagram, generates an error with zero offset, and does not raise
+ * the SO_EE_RFC4884_FLAG_INVALID flag.
+ */
+FIXTURE_VARIANT_ADD(rfc4884, ipv4_no_ext_min_payload) {
+ .info = ipv4_info,
+ .with_ext = false,
+ .payload_len = min_payload_len_v4,
+ .bad_csum = false,
+ .bad_len = false,
+};
+
+/* Tests that a valid ICMPv4 error message without extension and the original
+ * datagram is larger than 128 bytes, generates an error with zero offset,
+ * and does not raise the SO_EE_RFC4884_FLAG_INVALID flag.
+ */
+FIXTURE_VARIANT_ADD(rfc4884, ipv4_no_ext_large_payload) {
+ .info = ipv4_info,
+ .with_ext = false,
+ .payload_len = 256,
+ .bad_csum = false,
+ .bad_len = false,
+};
+
+/* Tests that an ICMPv4 error message with extension and an invalid checksum,
+ * generates an error with the expected offset, and raises the
+ * SO_EE_RFC4884_FLAG_INVALID flag.
+ */
+FIXTURE_VARIANT_ADD(rfc4884, ipv4_invalid_ext_checksum) {
+ .info = ipv4_info,
+ .with_ext = true,
+ .payload_len = min_payload_len_v4,
+ .bad_csum = true,
+ .bad_len = false,
+};
+
+/* Tests that an ICMPv4 error message with extension and an object length
+ * smaller than the object header, generates an error with the expected offset,
+ * and raises the SO_EE_RFC4884_FLAG_INVALID flag.
+ */
+FIXTURE_VARIANT_ADD(rfc4884, ipv4_invalid_ext_length_small) {
+ .info = ipv4_info,
+ .with_ext = true,
+ .payload_len = min_payload_len_v4,
+ .bad_csum = false,
+ .bad_len = true,
+ .smaller_len = true,
+};
+
+/* Tests that an ICMPv4 error message with extension and an object length that
+ * is too big to fit in the packet, generates an error with the expected offset,
+ * and raises the SO_EE_RFC4884_FLAG_INVALID flag.
+ */
+FIXTURE_VARIANT_ADD(rfc4884, ipv4_invalid_ext_length_large) {
+ .info = ipv4_info,
+ .with_ext = true,
+ .payload_len = min_payload_len_v4,
+ .bad_csum = false,
+ .bad_len = true,
+ .smaller_len = false,
+};
+
+/* Tests that a valid ICMPv6 error message with extension and the original
+ * datagram is smaller than 128 bytes, generates an error with zero offset,
+ * and does not raise the SO_EE_RFC4884_FLAG_INVALID flag.
+ */
+FIXTURE_VARIANT_ADD(rfc4884, ipv6_ext_small_payload) {
+ .info = ipv6_info,
+ .with_ext = true,
+ .payload_len = 64,
+ .bad_csum = false,
+ .bad_len = false,
+};
+
+/* Tests that a valid ICMPv6 error message with extension and 128 bytes original
+ * datagram, generates an error with the expected offset, and does not raise the
+ * SO_EE_RFC4884_FLAG_INVALID flag.
+ */
+FIXTURE_VARIANT_ADD(rfc4884, ipv6_ext) {
+ .info = ipv6_info,
+ .with_ext = true,
+ .payload_len = min_payload_len_v6,
+ .bad_csum = false,
+ .bad_len = false,
+};
+
+/* Tests that a valid ICMPv6 error message with extension and the original
+ * datagram is larger than 128 bytes, generates an error with the expected
+ * offset, and does not raise the SO_EE_RFC4884_FLAG_INVALID flag.
+ */
+FIXTURE_VARIANT_ADD(rfc4884, ipv6_ext_large_payload) {
+ .info = ipv6_info,
+ .with_ext = true,
+ .payload_len = 256,
+ .bad_csum = false,
+ .bad_len = false,
+};
+/* Tests that a valid ICMPv6 error message without extension and the original
+ * datagram is smaller than 128 bytes, generates an error with zero offset,
+ * and does not raise the SO_EE_RFC4884_FLAG_INVALID flag.
+ */
+FIXTURE_VARIANT_ADD(rfc4884, ipv6_no_ext_small_payload) {
+ .info = ipv6_info,
+ .with_ext = false,
+ .payload_len = 64,
+ .bad_csum = false,
+ .bad_len = false,
+};
+
+/* Tests that a valid ICMPv6 error message without extension and 128 bytes
+ * original datagram, generates an error with zero offset, and does not
+ * raise the SO_EE_RFC4884_FLAG_INVALID flag.
+ */
+FIXTURE_VARIANT_ADD(rfc4884, ipv6_no_ext_min_payload) {
+ .info = ipv6_info,
+ .with_ext = false,
+ .payload_len = min_payload_len_v6,
+ .bad_csum = false,
+ .bad_len = false,
+};
+
+/* Tests that a valid ICMPv6 error message without extension and the original
+ * datagram is larger than 128 bytes, generates an error with zero offset,
+ * and does not raise the SO_EE_RFC4884_FLAG_INVALID flag.
+ */
+FIXTURE_VARIANT_ADD(rfc4884, ipv6_no_ext_large_payload) {
+ .info = ipv6_info,
+ .with_ext = false,
+ .payload_len = 256,
+ .bad_csum = false,
+ .bad_len = false,
+};
+
+/* Tests that an ICMPv6 error message with extension and an invalid checksum,
+ * generates an error with the expected offset, and raises the
+ * SO_EE_RFC4884_FLAG_INVALID flag.
+ */
+FIXTURE_VARIANT_ADD(rfc4884, ipv6_invalid_ext_checksum) {
+ .info = ipv6_info,
+ .with_ext = true,
+ .payload_len = min_payload_len_v6,
+ .bad_csum = true,
+ .bad_len = false,
+};
+
+/* Tests that an ICMPv6 error message with extension and an object length
+ * smaller than the object header, generates an error with the expected offset,
+ * and raises the SO_EE_RFC4884_FLAG_INVALID flag.
+ */
+FIXTURE_VARIANT_ADD(rfc4884, ipv6_invalid_ext_length_small) {
+ .info = ipv6_info,
+ .with_ext = true,
+ .payload_len = min_payload_len_v6,
+ .bad_csum = false,
+ .bad_len = true,
+ .smaller_len = true,
+};
+
+/* Tests that an ICMPv6 error message with extension and an object length that
+ * is too big to fit in the packet, generates an error with the expected offset,
+ * and raises the SO_EE_RFC4884_FLAG_INVALID flag.
+ */
+FIXTURE_VARIANT_ADD(rfc4884, ipv6_invalid_ext_length_large) {
+ .info = ipv6_info,
+ .with_ext = true,
+ .payload_len = min_payload_len_v6,
+ .bad_csum = false,
+ .bad_len = true,
+ .smaller_len = false,
+};
+
+static void
+check_rfc4884_offset(struct __test_metadata *_metadata, int sock,
+ const FIXTURE_VARIANT(rfc4884) *v)
+{
+ char rxbuf[1024];
+ char ctrl[1024];
+ struct iovec iov = {
+ .iov_base = rxbuf,
+ .iov_len = sizeof(rxbuf)
+ };
+ struct msghdr msg = {
+ .msg_iov = &iov,
+ .msg_iovlen = 1,
+ .msg_control = ctrl,
+ .msg_controllen = sizeof(ctrl),
+ };
+ struct cmsghdr *cmsg;
+ int recv;
+
+ ASSERT_EQ(poll_err(sock), 0);
+
+ recv = recvmsg(sock, &msg, MSG_ERRQUEUE);
+ ASSERT_GE(recv, 0) TH_LOG("recvmsg(MSG_ERRQUEUE) failed");
+
+ for (cmsg = CMSG_FIRSTHDR(&msg); cmsg; cmsg = CMSG_NXTHDR(&msg, cmsg)) {
+ bool is_invalid, expected_invalid;
+ struct sock_extended_err *ee;
+ int expected_off;
+ uint16_t off;
+
+ if (cmsg->cmsg_level != v->info.level ||
+ cmsg->cmsg_type != v->info.opt1) {
+ TH_LOG("Unrelated cmsgs were encountered in recvmsg()");
+ continue;
+ }
+
+ ee = (struct sock_extended_err *)CMSG_DATA(cmsg);
+ off = ee->ee_rfc4884.len;
+ is_invalid = ee->ee_rfc4884.flags & SO_EE_RFC4884_FLAG_INVALID;
+
+ expected_invalid = v->bad_csum || v->bad_len;
+ ASSERT_EQ(is_invalid, expected_invalid) {
+ TH_LOG("Expected invalidity flag to be %d, but got %d",
+ expected_invalid, is_invalid);
+ }
+
+ expected_off =
+ (v->with_ext && v->payload_len >= v->info.min_payload) ?
+ v->payload_len : 0;
+ ASSERT_EQ(off, expected_off) {
+ TH_LOG("Expected RFC4884 offset %u, got %u",
+ expected_off, off);
+ }
+ break;
+ }
+}
+
+TEST_F(rfc4884, rfc4884)
+{
+ const typeof(variant) v = variant;
+ struct sockaddr_inet addr;
+ uint8_t pkt[1024];
+ int dgram, raw;
+ int len, sent;
+ int err;
+
+ dgram = socket(v->info.domain, SOCK_DGRAM, 0);
+ ASSERT_GE(dgram, 0) TH_LOG("Opening datagram socket failed");
+
+ err = bind_and_setsockopt(dgram, &v->info);
+ ASSERT_EQ(err, 0) TH_LOG("Bind failed");
+
+ raw = socket(v->info.domain, SOCK_RAW, v->info.proto);
+ ASSERT_GE(raw, 0) TH_LOG("Opening raw socket failed");
+
+ len = v->info.build_func(pkt, sizeof(pkt), v->with_ext, v->payload_len,
+ v->bad_csum, v->bad_len, v->smaller_len);
+ ASSERT_GT(len, 0) TH_LOG("Building packet failed");
+
+ set_addr(&addr, v->info.domain, 0);
+ sent = sendto(raw, pkt, len, 0, &addr.sa, addr.len);
+ ASSERT_EQ(len, sent) TH_LOG("Sending packet failed");
+
+ check_rfc4884_offset(_metadata, dgram, v);
+
+ close(dgram);
+ close(raw);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/net/ioam6.sh b/tools/testing/selftests/net/ioam6.sh
index 845c26dd01a9..b2b99889942f 100755
--- a/tools/testing/selftests/net/ioam6.sh
+++ b/tools/testing/selftests/net/ioam6.sh
@@ -273,8 +273,8 @@ setup()
ip -netns $ioam_node_beta link set ioam-veth-betaR name veth1 &>/dev/null
ip -netns $ioam_node_gamma link set ioam-veth-gamma name veth0 &>/dev/null
- ip -netns $ioam_node_alpha addr add 2001:db8:1::50/64 dev veth0 &>/dev/null
ip -netns $ioam_node_alpha addr add 2001:db8:1::2/64 dev veth0 &>/dev/null
+ ip -netns $ioam_node_alpha addr add 2001:db8:1::50/64 dev veth0 &>/dev/null
ip -netns $ioam_node_alpha link set veth0 up &>/dev/null
ip -netns $ioam_node_alpha link set lo up &>/dev/null
ip -netns $ioam_node_alpha route add 2001:db8:2::/64 \
diff --git a/tools/testing/selftests/net/ipsec.c b/tools/testing/selftests/net/ipsec.c
index 0ccf484b1d9d..f4afef51b930 100644
--- a/tools/testing/selftests/net/ipsec.c
+++ b/tools/testing/selftests/net/ipsec.c
@@ -43,6 +43,10 @@
#define BUILD_BUG_ON(condition) ((void)sizeof(char[1 - 2*!!(condition)]))
+#ifndef offsetof
+#define offsetof(TYPE, MEMBER) __builtin_offsetof(TYPE, MEMBER)
+#endif
+
#define IPV4_STR_SZ 16 /* xxx.xxx.xxx.xxx is longest + \0 */
#define MAX_PAYLOAD 2048
#define XFRM_ALGO_KEY_BUF_SIZE 512
@@ -827,13 +831,16 @@ static int xfrm_fill_key(char *name, char *buf,
static int xfrm_state_pack_algo(struct nlmsghdr *nh, size_t req_sz,
struct xfrm_desc *desc)
{
- struct {
+ union {
union {
struct xfrm_algo alg;
struct xfrm_algo_aead aead;
struct xfrm_algo_auth auth;
} u;
- char buf[XFRM_ALGO_KEY_BUF_SIZE];
+ struct {
+ unsigned char __offset_to_FAM[offsetof(struct xfrm_algo_auth, alg_key)];
+ char buf[XFRM_ALGO_KEY_BUF_SIZE];
+ };
} alg = {};
size_t alen, elen, clen, aelen;
unsigned short type;
diff --git a/tools/testing/selftests/net/lib/csum.c b/tools/testing/selftests/net/lib/csum.c
index 27437590eeb5..e28884ce3ab3 100644
--- a/tools/testing/selftests/net/lib/csum.c
+++ b/tools/testing/selftests/net/lib/csum.c
@@ -707,7 +707,7 @@ static uint32_t recv_get_packet_csum_status(struct msghdr *msg)
cm->cmsg_level, cm->cmsg_type);
if (cm->cmsg_len != CMSG_LEN(sizeof(struct tpacket_auxdata)))
- error(1, 0, "cmsg: len=%lu expected=%lu",
+ error(1, 0, "cmsg: len=%zu expected=%zu",
cm->cmsg_len, CMSG_LEN(sizeof(struct tpacket_auxdata)));
aux = (void *)CMSG_DATA(cm);
diff --git a/tools/testing/selftests/net/lib/py/ksft.py b/tools/testing/selftests/net/lib/py/ksft.py
index 531e7fa1b3ea..6cdfb8afccb5 100644
--- a/tools/testing/selftests/net/lib/py/ksft.py
+++ b/tools/testing/selftests/net/lib/py/ksft.py
@@ -8,7 +8,7 @@ import time
import traceback
from collections import namedtuple
from .consts import KSFT_MAIN_NAME
-from .utils import global_defer_queue
+from . import utils
KSFT_RESULT = None
KSFT_RESULT_ALL = True
@@ -32,8 +32,23 @@ class KsftTerminate(KeyboardInterrupt):
def ksft_pr(*objs, **kwargs):
+ """
+ Print logs to stdout.
+
+ Behaves like print() but log lines will be prefixed
+ with # to prevent breaking the TAP output formatting.
+
+ Extra arguments (on top of what print() supports):
+ line_pfx - add extra string before each line
+ """
+ sep = kwargs.pop("sep", " ")
+ pfx = kwargs.pop("line_pfx", "")
+ pfx = "#" + (" " + pfx if pfx else "")
kwargs["flush"] = True
- print("#", *objs, **kwargs)
+
+ text = sep.join(str(obj) for obj in objs)
+ prefixed = f"\n{pfx} ".join(text.split('\n'))
+ print(pfx, prefixed, **kwargs)
def _fail(*args):
@@ -153,21 +168,24 @@ def ktap_result(ok, cnt=1, case_name="", comment=""):
print(res, flush=True)
+def _ksft_defer_arm(state):
+ """ Allow or disallow the use of defer() """
+ utils.GLOBAL_DEFER_ARMED = state
+
+
def ksft_flush_defer():
global KSFT_RESULT
i = 0
- qlen_start = len(global_defer_queue)
- while global_defer_queue:
+ qlen_start = len(utils.GLOBAL_DEFER_QUEUE)
+ while utils.GLOBAL_DEFER_QUEUE:
i += 1
- entry = global_defer_queue.pop()
+ entry = utils.GLOBAL_DEFER_QUEUE.pop()
try:
entry.exec_only()
except Exception:
ksft_pr(f"Exception while handling defer / cleanup (callback {i} of {qlen_start})!")
- tb = traceback.format_exc()
- for line in tb.strip().split('\n'):
- ksft_pr("Defer Exception|", line)
+ ksft_pr(traceback.format_exc(), line_pfx="Defer Exception|")
KSFT_RESULT = False
@@ -315,6 +333,7 @@ def ksft_run(cases=None, globs=None, case_pfx=None, args=()):
comment = ""
cnt_key = ""
+ _ksft_defer_arm(True)
try:
func(*args)
except KsftSkipEx as e:
@@ -325,20 +344,17 @@ def ksft_run(cases=None, globs=None, case_pfx=None, args=()):
cnt_key = 'xfail'
except BaseException as e:
stop |= isinstance(e, KeyboardInterrupt)
- tb = traceback.format_exc()
- for line in tb.strip().split('\n'):
- ksft_pr("Exception|", line)
+ ksft_pr(traceback.format_exc(), line_pfx="Exception|")
if stop:
ksft_pr(f"Stopping tests due to {type(e).__name__}.")
KSFT_RESULT = False
cnt_key = 'fail'
+ _ksft_defer_arm(False)
try:
ksft_flush_defer()
except BaseException as e:
- tb = traceback.format_exc()
- for line in tb.strip().split('\n'):
- ksft_pr("Exception|", line)
+ ksft_pr(traceback.format_exc(), line_pfx="Exception|")
if isinstance(e, KeyboardInterrupt):
ksft_pr()
ksft_pr("WARN: defer() interrupted, cleanup may be incomplete.")
diff --git a/tools/testing/selftests/net/lib/py/utils.py b/tools/testing/selftests/net/lib/py/utils.py
index 106ee1f2df86..85884f3e827b 100644
--- a/tools/testing/selftests/net/lib/py/utils.py
+++ b/tools/testing/selftests/net/lib/py/utils.py
@@ -41,7 +41,9 @@ class cmd:
self.ret = None
self.ksft_term_fd = None
+ self.host = host
self.comm = comm
+
if host:
self.proc = host.cmd(comm)
else:
@@ -99,6 +101,27 @@ class cmd:
raise CmdExitFailure("Command failed: %s\nSTDOUT: %s\nSTDERR: %s" %
(self.proc.args, stdout, stderr), self)
+ def __repr__(self):
+ def str_fmt(name, s):
+ name += ': '
+ return (name + s.strip().replace('\n', '\n' + ' ' * len(name)))
+
+ ret = "CMD"
+ if self.host:
+ ret += "[remote]"
+ if self.ret is None:
+ ret += f" (unterminated): {self.comm}\n"
+ elif self.ret == 0:
+ ret += f" (success): {self.comm}\n"
+ else:
+ ret += f": {self.comm}\n"
+ ret += f" EXIT: {self.ret}\n"
+ if self.stdout:
+ ret += str_fmt(" STDOUT", self.stdout) + "\n"
+ if self.stderr:
+ ret += str_fmt(" STDERR", self.stderr) + "\n"
+ return ret.strip()
+
class bkg(cmd):
"""
@@ -137,11 +160,12 @@ class bkg(cmd):
def __exit__(self, ex_type, ex_value, ex_tb):
# Force termination on exception
- terminate = self.terminate or (self._exit_wait and ex_type)
+ terminate = self.terminate or (self._exit_wait and ex_type is not None)
return self.process(terminate=terminate, fail=self.check_fail)
-global_defer_queue = []
+GLOBAL_DEFER_QUEUE = []
+GLOBAL_DEFER_ARMED = False
class defer:
@@ -153,7 +177,9 @@ class defer:
self.args = args
self.kwargs = kwargs
- self._queue = global_defer_queue
+ if not GLOBAL_DEFER_ARMED:
+ raise Exception("defer queue not armed, did you use defer() outside of a test case?")
+ self._queue = GLOBAL_DEFER_QUEUE
self._queue.append(self)
def __enter__(self):
diff --git a/tools/testing/selftests/net/mptcp/Makefile b/tools/testing/selftests/net/mptcp/Makefile
index 4dd6278cd3dd..22ba0da2adb8 100644
--- a/tools/testing/selftests/net/mptcp/Makefile
+++ b/tools/testing/selftests/net/mptcp/Makefile
@@ -11,6 +11,7 @@ TEST_PROGS := \
mptcp_connect_checksum.sh \
mptcp_connect_mmap.sh \
mptcp_connect_sendfile.sh \
+ mptcp_connect_splice.sh \
mptcp_join.sh \
mptcp_sockopt.sh \
pm_netlink.sh \
diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.c b/tools/testing/selftests/net/mptcp/mptcp_connect.c
index 10f6f99cfd4e..cbe573c4ab3a 100644
--- a/tools/testing/selftests/net/mptcp/mptcp_connect.c
+++ b/tools/testing/selftests/net/mptcp/mptcp_connect.c
@@ -52,6 +52,7 @@ enum cfg_mode {
CFG_MODE_POLL,
CFG_MODE_MMAP,
CFG_MODE_SENDFILE,
+ CFG_MODE_SPLICE,
};
enum cfg_peek {
@@ -124,7 +125,7 @@ static void die_usage(void)
fprintf(stderr, "\t-j -- add additional sleep at connection start and tear down "
"-- for MPJ tests\n");
fprintf(stderr, "\t-l -- listens mode, accepts incoming connection\n");
- fprintf(stderr, "\t-m [poll|mmap|sendfile] -- use poll(default)/mmap+write/sendfile\n");
+ fprintf(stderr, "\t-m [poll|mmap|sendfile|splice] -- use poll(default)/mmap+write/sendfile/splice\n");
fprintf(stderr, "\t-M mark -- set socket packet mark\n");
fprintf(stderr, "\t-o option -- test sockopt <option>\n");
fprintf(stderr, "\t-p num -- use port num\n");
@@ -258,7 +259,7 @@ static void set_transparent(int fd, int pf)
}
}
-static void set_mptfo(int fd, int pf)
+static void set_mptfo(int fd)
{
int qlen = 25;
@@ -335,7 +336,7 @@ static int sock_listen_mptcp(const char * const listenaddr,
set_transparent(sock, pf);
if (cfg_sockopt_types.mptfo)
- set_mptfo(sock, pf);
+ set_mptfo(sock);
if (bind(sock, a->ai_addr, a->ai_addrlen) == 0)
break; /* success */
@@ -406,21 +407,18 @@ static int sock_connect_mptcp(const char * const remoteaddr,
*peer = a;
break; /* success */
}
+ perror("sendto()");
} else {
if (connect(sock, a->ai_addr, a->ai_addrlen) == 0) {
*peer = a;
break; /* success */
}
- }
- if (cfg_sockopt_types.mptfo) {
- perror("sendto()");
- close(sock);
- sock = -1;
- } else {
perror("connect()");
- close(sock);
- sock = -1;
}
+
+ /* error */
+ close(sock);
+ sock = -1;
}
freeaddrinfo(addr);
@@ -935,6 +933,71 @@ static int copyfd_io_sendfile(int infd, int peerfd, int outfd,
return err;
}
+static int do_splice(const int infd, const int outfd, const size_t len,
+ struct wstate *winfo)
+{
+ ssize_t in_bytes, out_bytes;
+ int pipefd[2];
+ int err;
+
+ err = pipe(pipefd);
+ if (err) {
+ perror("pipe");
+ return 2;
+ }
+
+again:
+ in_bytes = splice(infd, NULL, pipefd[1], NULL, len - winfo->total_len,
+ SPLICE_F_MOVE | SPLICE_F_MORE);
+ if (in_bytes < 0) {
+ perror("splice in");
+ err = 3;
+ } else if (in_bytes > 0) {
+ out_bytes = splice(pipefd[0], NULL, outfd, NULL, in_bytes,
+ SPLICE_F_MOVE | SPLICE_F_MORE);
+ if (out_bytes < 0) {
+ perror("splice out");
+ err = 4;
+ } else if (in_bytes != out_bytes) {
+ fprintf(stderr, "Unexpected transfer: %zu vs %zu\n",
+ in_bytes, out_bytes);
+ err = 5;
+ } else {
+ goto again;
+ }
+ }
+
+ close(pipefd[0]);
+ close(pipefd[1]);
+
+ return err;
+}
+
+static int copyfd_io_splice(int infd, int peerfd, int outfd, unsigned int size,
+ bool *in_closed_after_out, struct wstate *winfo)
+{
+ int err;
+
+ if (listen_mode) {
+ err = do_splice(peerfd, outfd, size, winfo);
+ if (err)
+ return err;
+
+ err = do_splice(infd, peerfd, size, winfo);
+ } else {
+ err = do_splice(infd, peerfd, size, winfo);
+ if (err)
+ return err;
+
+ shut_wr(peerfd);
+
+ err = do_splice(peerfd, outfd, size, winfo);
+ *in_closed_after_out = true;
+ }
+
+ return err;
+}
+
static int copyfd_io(int infd, int peerfd, int outfd, bool close_peerfd, struct wstate *winfo)
{
bool in_closed_after_out = false;
@@ -967,6 +1030,14 @@ static int copyfd_io(int infd, int peerfd, int outfd, bool close_peerfd, struct
&in_closed_after_out, winfo);
break;
+ case CFG_MODE_SPLICE:
+ file_size = get_infd_size(infd);
+ if (file_size < 0)
+ return file_size;
+ ret = copyfd_io_splice(infd, peerfd, outfd, file_size,
+ &in_closed_after_out, winfo);
+ break;
+
default:
fprintf(stderr, "Invalid mode %d\n", cfg_mode);
@@ -1296,8 +1367,8 @@ void xdisconnect(int fd)
int main_loop(void)
{
+ struct addrinfo *peer = NULL;
int fd = 0, ret, fd_in = 0;
- struct addrinfo *peer;
struct wstate winfo;
if (cfg_input && cfg_sockopt_types.mptfo) {
@@ -1380,12 +1451,15 @@ int parse_mode(const char *mode)
return CFG_MODE_MMAP;
if (!strcasecmp(mode, "sendfile"))
return CFG_MODE_SENDFILE;
+ if (!strcasecmp(mode, "splice"))
+ return CFG_MODE_SPLICE;
fprintf(stderr, "Unknown test mode: %s\n", mode);
fprintf(stderr, "Supported modes are:\n");
fprintf(stderr, "\t\t\"poll\" - interleaved read/write using poll()\n");
fprintf(stderr, "\t\t\"mmap\" - send entire input file (mmap+write), then read response (-l will read input first)\n");
fprintf(stderr, "\t\t\"sendfile\" - send entire input file (sendfile), then read response (-l will read input first)\n");
+ fprintf(stderr, "\t\t\"splice\" - send entire input file (splice), then read response (-l will read input first)\n");
die_usage();
diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect_splice.sh b/tools/testing/selftests/net/mptcp/mptcp_connect_splice.sh
new file mode 100755
index 000000000000..241254a966c9
--- /dev/null
+++ b/tools/testing/selftests/net/mptcp/mptcp_connect_splice.sh
@@ -0,0 +1,5 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+MPTCP_LIB_KSFT_TEST="$(basename "${0}" .sh)" \
+ "$(dirname "${0}")/mptcp_connect.sh" -m splice "${@}"
diff --git a/tools/testing/selftests/net/mptcp/mptcp_diag.c b/tools/testing/selftests/net/mptcp/mptcp_diag.c
index 8e0b1b8d84b6..5e222ba977e4 100644
--- a/tools/testing/selftests/net/mptcp/mptcp_diag.c
+++ b/tools/testing/selftests/net/mptcp/mptcp_diag.c
@@ -1,21 +1,24 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2025, Kylin Software */
-#include <linux/sock_diag.h>
-#include <linux/rtnetlink.h>
-#include <linux/inet_diag.h>
-#include <linux/netlink.h>
-#include <linux/compiler.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
#include <sys/socket.h>
-#include <netinet/in.h>
-#include <linux/tcp.h>
+
#include <arpa/inet.h>
-#include <unistd.h>
-#include <stdlib.h>
-#include <string.h>
-#include <errno.h>
-#include <stdio.h>
+#include <netinet/in.h>
+
+#include <linux/compiler.h>
+#include <linux/inet_diag.h>
+#include <linux/netlink.h>
+#include <linux/rtnetlink.h>
+#include <linux/sock_diag.h>
+#include <linux/tcp.h>
#ifndef IPPROTO_MPTCP
#define IPPROTO_MPTCP 262
diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh
index e70d3420954f..dc1f200aaa81 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
@@ -603,8 +603,7 @@ wait_rm_addr()
local old_cnt="${2}"
local cnt
- local i
- for i in $(seq 10); do
+ for _ in $(seq 10); do
cnt=$(rm_addr_count ${ns})
[ "$cnt" = "${old_cnt}" ] || break
sleep 0.1
@@ -623,25 +622,22 @@ wait_rm_sf()
local old_cnt="${2}"
local cnt
- local i
- for i in $(seq 10); do
+ for _ in $(seq 10); do
cnt=$(rm_sf_count ${ns})
[ "$cnt" = "${old_cnt}" ] || break
sleep 0.1
done
}
+# $1: expected MPJ ACK Rx counter in $ns1
wait_mpj()
{
- local ns="${1}"
- local cnt old_cnt
-
- old_cnt=$(mptcp_lib_get_counter ${ns} "MPTcpExtMPJoinAckRx")
+ local exp_cnt="${1}"
+ local cnt
- local i
- for i in $(seq 10); do
- cnt=$(mptcp_lib_get_counter ${ns} "MPTcpExtMPJoinAckRx")
- [ "$cnt" = "${old_cnt}" ] || break
+ for _ in $(seq 10); do
+ cnt=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMPJoinAckRx")
+ [ "${cnt}" = "${exp_cnt}" ] && break
sleep 0.1
done
}
@@ -650,8 +646,7 @@ wait_ll_ready()
{
local ns="${1}"
- local i
- for i in $(seq 50); do
+ for _ in $(seq 50); do
ip -n "${ns}" -6 addr show scope link | grep "inet6 fe80" |
grep -qw "tentative" || break
sleep 0.1
@@ -1407,7 +1402,7 @@ chk_join_tx_nr()
count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtMPJoinSynTxCreatSkErr")
if [ -z "$count" ]; then
- rc=${KSFT_SKIP}
+ : # ignore skip
elif [ "$count" != "$create" ]; then
rc=${KSFT_FAIL}
print_check "syn tx create socket error"
@@ -1416,7 +1411,7 @@ chk_join_tx_nr()
count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtMPJoinSynTxBindErr")
if [ -z "$count" ]; then
- rc=${KSFT_SKIP}
+ : # ignore skip
elif [ "$count" != "$bind" ]; then
rc=${KSFT_FAIL}
print_check "syn tx bind error"
@@ -1425,7 +1420,7 @@ chk_join_tx_nr()
count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtMPJoinSynTxConnectErr")
if [ -z "$count" ]; then
- rc=${KSFT_SKIP}
+ : # ignore skip
elif [ "$count" != "$connect" ]; then
rc=${KSFT_FAIL}
print_check "syn tx connect error"
@@ -1451,7 +1446,7 @@ chk_fallback_nr()
count=$(mptcp_lib_get_counter ${!ns} "MPTcpExtInfiniteMapTx")
if [ -z "$count" ]; then
- rc=${KSFT_SKIP}
+ : # ignore skip
elif [ "$count" != "$infinite_map_tx" ]; then
rc=${KSFT_FAIL}
print_check "$ns infinite map tx fallback"
@@ -1460,7 +1455,7 @@ chk_fallback_nr()
count=$(mptcp_lib_get_counter ${!ns} "MPTcpExtDSSCorruptionFallback")
if [ -z "$count" ]; then
- rc=${KSFT_SKIP}
+ : # ignore skip
elif [ "$count" != "$dss_corruption" ]; then
rc=${KSFT_FAIL}
print_check "$ns dss corruption fallback"
@@ -1469,7 +1464,7 @@ chk_fallback_nr()
count=$(mptcp_lib_get_counter ${!ns} "MPTcpExtSimultConnectFallback")
if [ -z "$count" ]; then
- rc=${KSFT_SKIP}
+ : # ignore skip
elif [ "$count" != "$simult_conn" ]; then
rc=${KSFT_FAIL}
print_check "$ns simult conn fallback"
@@ -1478,7 +1473,7 @@ chk_fallback_nr()
count=$(mptcp_lib_get_counter ${!ns} "MPTcpExtMPCapableFallbackACK")
if [ -z "$count" ]; then
- rc=${KSFT_SKIP}
+ : # ignore skip
elif [ "$count" != "$mpc_passive" ]; then
rc=${KSFT_FAIL}
print_check "$ns mpc passive fallback"
@@ -1487,7 +1482,7 @@ chk_fallback_nr()
count=$(mptcp_lib_get_counter ${!ns} "MPTcpExtMPCapableFallbackSYNACK")
if [ -z "$count" ]; then
- rc=${KSFT_SKIP}
+ : # ignore skip
elif [ "$count" != "$mpc_active" ]; then
rc=${KSFT_FAIL}
print_check "$ns mpc active fallback"
@@ -1496,7 +1491,7 @@ chk_fallback_nr()
count=$(mptcp_lib_get_counter ${!ns} "MPTcpExtMPCapableDataFallback")
if [ -z "$count" ]; then
- rc=${KSFT_SKIP}
+ : # ignore skip
elif [ "$count" != "$mpc_data" ]; then
rc=${KSFT_FAIL}
print_check "$ns mpc data fallback"
@@ -1505,7 +1500,7 @@ chk_fallback_nr()
count=$(mptcp_lib_get_counter ${!ns} "MPTcpExtMD5SigFallback")
if [ -z "$count" ]; then
- rc=${KSFT_SKIP}
+ : # ignore skip
elif [ "$count" != "$md5_sig" ]; then
rc=${KSFT_FAIL}
print_check "$ns MD5 Sig fallback"
@@ -1514,7 +1509,7 @@ chk_fallback_nr()
count=$(mptcp_lib_get_counter ${!ns} "MPTcpExtDssFallback")
if [ -z "$count" ]; then
- rc=${KSFT_SKIP}
+ : # ignore skip
elif [ "$count" != "$dss" ]; then
rc=${KSFT_FAIL}
print_check "$ns dss fallback"
@@ -1590,7 +1585,7 @@ chk_join_nr()
count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtMPJoinSynAckHMacFailure")
if [ -z "$count" ]; then
- rc=${KSFT_SKIP}
+ : # ignore skip
elif [ "$count" != "0" ]; then
rc=${KSFT_FAIL}
print_check "synack HMAC"
@@ -1599,7 +1594,7 @@ chk_join_nr()
count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMPJoinAckRx")
if [ -z "$count" ]; then
- rc=${KSFT_SKIP}
+ : # ignore skip
elif [ "$count" != "$ack_nr" ]; then
rc=${KSFT_FAIL}
print_check "ack rx"
@@ -1608,7 +1603,7 @@ chk_join_nr()
count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMPJoinAckHMacFailure")
if [ -z "$count" ]; then
- rc=${KSFT_SKIP}
+ : # ignore skip
elif [ "$count" != "0" ]; then
rc=${KSFT_FAIL}
print_check "ack HMAC"
@@ -1617,7 +1612,7 @@ chk_join_nr()
count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMPJoinRejected")
if [ -z "$count" ]; then
- rc=${KSFT_SKIP}
+ : # ignore skip
elif [ "$count" != "$syn_rej" ]; then
rc=${KSFT_FAIL}
print_check "syn rejected"
@@ -1650,7 +1645,6 @@ chk_stale_nr()
local stale_min=$2
local stale_max=$3
local stale_delta=$4
- local dump_stats
local stale_nr
local recover_nr
@@ -1666,16 +1660,11 @@ chk_stale_nr()
fail_test "got $stale_nr stale[s] $recover_nr recover[s], " \
" expected stale in range [$stale_min..$stale_max]," \
" stale-recover delta $stale_delta"
- dump_stats=1
+ echo $ns stats
+ ip -n $ns -s link show
else
print_ok
fi
-
- if [ "${dump_stats}" = 1 ]; then
- echo $ns stats
- ip netns exec $ns ip -s link show
- ip netns exec $ns nstat -as | grep MPTcp
- fi
}
chk_add_nr()
@@ -3718,7 +3707,6 @@ userspace_pm_add_addr()
tk=$(mptcp_lib_evts_get_info token "$evts")
ip netns exec $1 ./pm_nl_ctl ann $2 token $tk id $3
- sleep 1
}
# $1: ns ; $2: id
@@ -3749,7 +3737,6 @@ userspace_pm_add_sf()
ip netns exec $1 ./pm_nl_ctl csf lip $2 lid $3 \
rip $da rport $dp token $tk
- sleep 1
}
# $1: ns ; $2: addr $3: event type
@@ -3999,9 +3986,11 @@ userspace_tests()
{ timeout_test=120 test_linkfail=128 speed=5 \
run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null
local tests_pid=$!
- wait_mpj $ns1
+ wait_event ns1 MPTCP_LIB_EVENT_ESTABLISHED 1
userspace_pm_add_addr $ns1 10.0.2.1 10
+ wait_event ns2 MPTCP_LIB_EVENT_ANNOUNCED 1
userspace_pm_add_addr $ns1 10.0.3.1 20
+ wait_event ns2 MPTCP_LIB_EVENT_ANNOUNCED 2
chk_join_nr 2 2 2
chk_add_nr 2 2
chk_mptcp_info subflows 2 subflows 2
@@ -4032,8 +4021,9 @@ userspace_tests()
{ timeout_test=120 test_linkfail=128 speed=5 \
run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null
local tests_pid=$!
- wait_mpj $ns2
+ wait_event ns2 MPTCP_LIB_EVENT_ESTABLISHED 1
userspace_pm_add_sf $ns2 10.0.3.2 20
+ wait_event ns2 MPTCP_LIB_EVENT_SUB_ESTABLISHED 1
chk_join_nr 1 1 1
chk_mptcp_info subflows 1 subflows 1
chk_subflows_total 2 2
@@ -4060,10 +4050,11 @@ userspace_tests()
{ timeout_test=120 test_linkfail=128 speed=5 \
run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null
local tests_pid=$!
- wait_mpj $ns2
+ wait_event ns2 MPTCP_LIB_EVENT_ESTABLISHED 1
chk_mptcp_info subflows 0 subflows 0
chk_subflows_total 1 1
userspace_pm_add_sf $ns2 10.0.3.2 0
+ wait_event ns2 MPTCP_LIB_EVENT_SUB_ESTABLISHED 1
userspace_pm_chk_dump_addr "${ns2}" \
"id 0 flags subflow 10.0.3.2" "id 0 subflow"
chk_join_nr 1 1 1
@@ -4081,8 +4072,9 @@ userspace_tests()
{ timeout_test=120 test_linkfail=128 speed=5 \
run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null
local tests_pid=$!
- wait_mpj $ns2
+ wait_event ns2 MPTCP_LIB_EVENT_ESTABLISHED 1
userspace_pm_add_sf $ns2 10.0.3.2 20
+ wait_event ns2 MPTCP_LIB_EVENT_SUB_ESTABLISHED 1
chk_join_nr 1 1 1
chk_mptcp_info subflows 1 subflows 1
chk_subflows_total 2 2
@@ -4105,8 +4097,9 @@ userspace_tests()
{ timeout_test=120 test_linkfail=128 speed=5 \
run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null
local tests_pid=$!
- wait_mpj $ns1
+ wait_event ns1 MPTCP_LIB_EVENT_ESTABLISHED 1
userspace_pm_add_addr $ns1 10.0.2.1 10
+ wait_event ns2 MPTCP_LIB_EVENT_ANNOUNCED 1
chk_join_nr 1 1 1
chk_add_nr 1 1
chk_mptcp_info subflows 1 subflows 1
@@ -4133,6 +4126,7 @@ userspace_tests()
local tests_pid=$!
wait_event ns2 MPTCP_LIB_EVENT_ESTABLISHED 1
userspace_pm_add_sf $ns2 10.0.3.2 20
+ wait_event ns2 MPTCP_LIB_EVENT_SUB_ESTABLISHED 1
chk_mptcp_info subflows 1 subflows 1
chk_subflows_total 2 2
@@ -4158,7 +4152,7 @@ endpoint_tests()
{
# subflow_rebuild_header is needed to support the implicit flag
# userspace pm type prevents add_addr
- if reset "implicit EP" &&
+ if reset_with_events "implicit EP" &&
continue_if mptcp_lib_kallsyms_has "subflow_rebuild_header$"; then
pm_nl_set_limits $ns1 2 2
pm_nl_set_limits $ns2 2 2
@@ -4167,7 +4161,7 @@ endpoint_tests()
run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null
local tests_pid=$!
- wait_mpj $ns1
+ wait_event ns2 MPTCP_LIB_EVENT_ESTABLISHED 1
pm_nl_check_endpoint "creation" \
$ns2 10.0.2.2 id 1 flags implicit
chk_mptcp_info subflows 1 subflows 1
@@ -4181,6 +4175,7 @@ endpoint_tests()
pm_nl_check_endpoint "modif is allowed" \
$ns2 10.0.2.2 id 1 flags signal
mptcp_lib_kill_group_wait $tests_pid
+ kill_events_pids
fi
if reset_with_tcp_filter "delete and re-add" ns2 10.0.3.2 REJECT OUTPUT &&
@@ -4194,7 +4189,7 @@ endpoint_tests()
run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null
local tests_pid=$!
- wait_mpj $ns2
+ wait_event ns2 MPTCP_LIB_EVENT_ESTABLISHED 1
pm_nl_check_endpoint "creation" \
$ns2 10.0.2.2 id 2 flags subflow dev ns2eth2
chk_subflow_nr "before delete id 2" 2
@@ -4206,7 +4201,7 @@ endpoint_tests()
chk_mptcp_info subflows 0 subflows 0
pm_nl_add_endpoint $ns2 10.0.2.2 id 2 dev ns2eth2 flags subflow
- wait_mpj $ns2
+ wait_mpj 2
chk_subflow_nr "after re-add id 2" 2
chk_mptcp_info subflows 1 subflows 1
@@ -4218,7 +4213,7 @@ endpoint_tests()
ip netns exec "${ns2}" ${iptables} -D OUTPUT -s "10.0.3.2" -p tcp -j REJECT
pm_nl_del_endpoint $ns2 3 10.0.3.2
pm_nl_add_endpoint $ns2 10.0.3.2 id 3 flags subflow
- wait_mpj $ns2
+ wait_mpj 3
chk_subflow_nr "after no reject" 3
chk_mptcp_info subflows 2 subflows 2
@@ -4230,7 +4225,7 @@ endpoint_tests()
chk_mptcp_info subflows 2 subflows 2 # only decr for additional sf
pm_nl_add_endpoint $ns2 10.0.1.2 id 1 dev ns2eth1 flags subflow
- wait_mpj $ns2
+ wait_mpj $((3 + i))
chk_subflow_nr "after re-add id 0 ($i)" 3
chk_mptcp_info subflows 3 subflows 3
done
@@ -4272,7 +4267,7 @@ endpoint_tests()
run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null
local tests_pid=$!
- wait_mpj $ns2
+ wait_event ns2 MPTCP_LIB_EVENT_ESTABLISHED 1
pm_nl_check_endpoint "creation" \
$ns1 10.0.2.1 id 1 flags signal
chk_subflow_nr "before delete" 2
@@ -4288,7 +4283,7 @@ endpoint_tests()
pm_nl_add_endpoint $ns1 10.0.2.1 id 1 flags signal
pm_nl_add_endpoint $ns1 10.0.3.1 id 2 flags signal
- wait_mpj $ns2
+ wait_mpj 3
chk_subflow_nr "after re-add" 3
chk_mptcp_info subflows 2 subflows 2
chk_mptcp_info add_addr_signal 2 add_addr_accepted 2
@@ -4300,7 +4295,7 @@ endpoint_tests()
chk_mptcp_info add_addr_signal 2 add_addr_accepted 2
pm_nl_add_endpoint $ns1 10.0.1.1 id 99 flags signal
- wait_mpj $ns2
+ wait_mpj 4
chk_subflow_nr "after re-add ID 0" 3
chk_mptcp_info subflows 3 subflows 3
chk_mptcp_info add_addr_signal 3 add_addr_accepted 2
@@ -4312,7 +4307,7 @@ endpoint_tests()
chk_mptcp_info add_addr_signal 2 add_addr_accepted 2
pm_nl_add_endpoint $ns1 10.0.1.1 id 88 flags signal
- wait_mpj $ns2
+ wait_mpj 5
chk_subflow_nr "after re-re-add ID 0" 3
chk_mptcp_info subflows 3 subflows 3
chk_mptcp_info add_addr_signal 3 add_addr_accepted 2
@@ -4361,9 +4356,9 @@ endpoint_tests()
wait_rm_addr $ns2 0
ip netns exec "${ns2}" ${iptables} -D OUTPUT -s "10.0.3.2" -p tcp -j REJECT
pm_nl_add_endpoint $ns2 10.0.3.2 id 3 flags subflow
- wait_mpj $ns2
+ wait_mpj 1
pm_nl_add_endpoint $ns1 10.0.3.1 id 2 flags signal
- wait_mpj $ns2
+ wait_mpj 2
mptcp_lib_kill_group_wait $tests_pid
join_syn_tx=3 join_connect_err=1 \
diff --git a/tools/testing/selftests/net/netfilter/config b/tools/testing/selftests/net/netfilter/config
index 12ce61fa15a8..979cff56e1f5 100644
--- a/tools/testing/selftests/net/netfilter/config
+++ b/tools/testing/selftests/net/netfilter/config
@@ -29,6 +29,7 @@ CONFIG_IP_NF_RAW=m
CONFIG_IP_SCTP=m
CONFIG_IPV6=y
CONFIG_IPV6_MULTIPLE_TABLES=y
+CONFIG_IPV6_TUNNEL=m
CONFIG_IP_VS=m
CONFIG_IP_VS_PROTO_TCP=y
CONFIG_IP_VS_RR=m
diff --git a/tools/testing/selftests/net/netfilter/nft_flowtable.sh b/tools/testing/selftests/net/netfilter/nft_flowtable.sh
index a68bc882fa4e..7a34ef468975 100755
--- a/tools/testing/selftests/net/netfilter/nft_flowtable.sh
+++ b/tools/testing/selftests/net/netfilter/nft_flowtable.sh
@@ -592,16 +592,33 @@ ip -net "$nsr1" link set tun0 up
ip -net "$nsr1" addr add 192.168.100.1/24 dev tun0
ip netns exec "$nsr1" sysctl net.ipv4.conf.tun0.forwarding=1 > /dev/null
+ip -net "$nsr1" link add name tun6 type ip6tnl local fee1:2::1 remote fee1:2::2
+ip -net "$nsr1" link set tun6 up
+ip -net "$nsr1" addr add fee1:3::1/64 dev tun6 nodad
+
ip -net "$nsr2" link add name tun0 type ipip local 192.168.10.2 remote 192.168.10.1
ip -net "$nsr2" link set tun0 up
ip -net "$nsr2" addr add 192.168.100.2/24 dev tun0
ip netns exec "$nsr2" sysctl net.ipv4.conf.tun0.forwarding=1 > /dev/null
+ip -net "$nsr2" link add name tun6 type ip6tnl local fee1:2::2 remote fee1:2::1 || ret=1
+ip -net "$nsr2" link set tun6 up
+ip -net "$nsr2" addr add fee1:3::2/64 dev tun6 nodad
+
ip -net "$nsr1" route change default via 192.168.100.2
ip -net "$nsr2" route change default via 192.168.100.1
+
+# do not use "route change" and delete old default so
+# socat fails to connect in case new default can't be added.
+ip -6 -net "$nsr1" route delete default
+ip -6 -net "$nsr1" route add default via fee1:3::2
+ip -6 -net "$nsr2" route delete default
+ip -6 -net "$nsr2" route add default via fee1:3::1
ip -net "$ns2" route add default via 10.0.2.1
+ip -6 -net "$ns2" route add default via dead:2::1
ip netns exec "$nsr1" nft -a insert rule inet filter forward 'meta oif tun0 accept'
+ip netns exec "$nsr1" nft -a insert rule inet filter forward 'meta oif tun6 accept'
ip netns exec "$nsr1" nft -a insert rule inet filter forward \
'meta oif "veth0" tcp sport 12345 ct mark set 1 flow add @f1 counter name routed_repl accept'
@@ -611,28 +628,53 @@ if ! test_tcp_forwarding_nat "$ns1" "$ns2" 1 "IPIP tunnel"; then
ret=1
fi
+if test_tcp_forwarding "$ns1" "$ns2" 1 6 "[dead:2::99]" 12345; then
+ echo "PASS: flow offload for ns1/ns2 IP6IP6 tunnel"
+else
+ echo "FAIL: flow offload for ns1/ns2 with IP6IP6 tunnel" 1>&2
+ ip netns exec "$nsr1" nft list ruleset
+ ret=1
+fi
+
# Create vlan tagged devices for IPIP traffic.
ip -net "$nsr1" link add link veth1 name veth1.10 type vlan id 10
ip -net "$nsr1" link set veth1.10 up
ip -net "$nsr1" addr add 192.168.20.1/24 dev veth1.10
+ip -net "$nsr1" addr add fee1:4::1/64 dev veth1.10 nodad
ip netns exec "$nsr1" sysctl net.ipv4.conf.veth1/10.forwarding=1 > /dev/null
ip netns exec "$nsr1" nft -a insert rule inet filter forward 'meta oif veth1.10 accept'
-ip -net "$nsr1" link add name tun1 type ipip local 192.168.20.1 remote 192.168.20.2
-ip -net "$nsr1" link set tun1 up
-ip -net "$nsr1" addr add 192.168.200.1/24 dev tun1
+
+ip -net "$nsr1" link add name tun0.10 type ipip local 192.168.20.1 remote 192.168.20.2
+ip -net "$nsr1" link set tun0.10 up
+ip -net "$nsr1" addr add 192.168.200.1/24 dev tun0.10
ip -net "$nsr1" route change default via 192.168.200.2
-ip netns exec "$nsr1" sysctl net.ipv4.conf.tun1.forwarding=1 > /dev/null
-ip netns exec "$nsr1" nft -a insert rule inet filter forward 'meta oif tun1 accept'
+ip netns exec "$nsr1" sysctl net.ipv4.conf.tun0/10.forwarding=1 > /dev/null
+ip netns exec "$nsr1" nft -a insert rule inet filter forward 'meta oif tun0.10 accept'
+
+ip -net "$nsr1" link add name tun6.10 type ip6tnl local fee1:4::1 remote fee1:4::2
+ip -net "$nsr1" link set tun6.10 up
+ip -net "$nsr1" addr add fee1:5::1/64 dev tun6.10 nodad
+ip -6 -net "$nsr1" route delete default
+ip -6 -net "$nsr1" route add default via fee1:5::2
+ip netns exec "$nsr1" nft -a insert rule inet filter forward 'meta oif tun6.10 accept'
ip -net "$nsr2" link add link veth0 name veth0.10 type vlan id 10
ip -net "$nsr2" link set veth0.10 up
ip -net "$nsr2" addr add 192.168.20.2/24 dev veth0.10
+ip -net "$nsr2" addr add fee1:4::2/64 dev veth0.10 nodad
ip netns exec "$nsr2" sysctl net.ipv4.conf.veth0/10.forwarding=1 > /dev/null
-ip -net "$nsr2" link add name tun1 type ipip local 192.168.20.2 remote 192.168.20.1
-ip -net "$nsr2" link set tun1 up
-ip -net "$nsr2" addr add 192.168.200.2/24 dev tun1
+
+ip -net "$nsr2" link add name tun0.10 type ipip local 192.168.20.2 remote 192.168.20.1
+ip -net "$nsr2" link set tun0.10 up
+ip -net "$nsr2" addr add 192.168.200.2/24 dev tun0.10
ip -net "$nsr2" route change default via 192.168.200.1
-ip netns exec "$nsr2" sysctl net.ipv4.conf.tun1.forwarding=1 > /dev/null
+ip netns exec "$nsr2" sysctl net.ipv4.conf.tun0/10.forwarding=1 > /dev/null
+
+ip -net "$nsr2" link add name tun6.10 type ip6tnl local fee1:4::2 remote fee1:4::1 || ret=1
+ip -net "$nsr2" link set tun6.10 up
+ip -net "$nsr2" addr add fee1:5::2/64 dev tun6.10 nodad
+ip -6 -net "$nsr2" route delete default
+ip -6 -net "$nsr2" route add default via fee1:5::1
if ! test_tcp_forwarding_nat "$ns1" "$ns2" 1 "IPIP tunnel over vlan"; then
echo "FAIL: flow offload for ns1/ns2 with IPIP tunnel over vlan" 1>&2
@@ -640,10 +682,19 @@ if ! test_tcp_forwarding_nat "$ns1" "$ns2" 1 "IPIP tunnel over vlan"; then
ret=1
fi
+if test_tcp_forwarding "$ns1" "$ns2" 1 6 "[dead:2::99]" 12345; then
+ echo "PASS: flow offload for ns1/ns2 IP6IP6 tunnel over vlan"
+else
+ echo "FAIL: flow offload for ns1/ns2 with IP6IP6 tunnel over vlan" 1>&2
+ ip netns exec "$nsr1" nft list ruleset
+ ret=1
+fi
+
# Restore the previous configuration
ip -net "$nsr1" route change default via 192.168.10.2
ip -net "$nsr2" route change default via 192.168.10.1
ip -net "$ns2" route del default via 10.0.2.1
+ip -6 -net "$ns2" route del default via dead:2::1
}
# Another test:
diff --git a/tools/testing/selftests/net/netfilter/nft_queue.sh b/tools/testing/selftests/net/netfilter/nft_queue.sh
index 6136ceec45e0..139bc1211878 100755
--- a/tools/testing/selftests/net/netfilter/nft_queue.sh
+++ b/tools/testing/selftests/net/netfilter/nft_queue.sh
@@ -510,7 +510,7 @@ EOF
udp_listener_ready()
{
- ss -S -N "$1" -uln -o "sport = :12345" | grep -q 12345
+ ss -S -N "$1" -uln -o "sport = :$2" | grep -q "$2"
}
output_files_written()
@@ -518,7 +518,7 @@ output_files_written()
test -s "$1" && test -s "$2"
}
-test_udp_ct_race()
+test_udp_nat_race()
{
ip netns exec "$nsrouter" nft -f /dev/stdin <<EOF
flush ruleset
@@ -545,8 +545,8 @@ EOF
ip netns exec "$nsrouter" ./nf_queue -q 12 -d 1000 &
local nfqpid=$!
- busywait "$BUSYWAIT_TIMEOUT" udp_listener_ready "$ns2"
- busywait "$BUSYWAIT_TIMEOUT" udp_listener_ready "$ns3"
+ busywait "$BUSYWAIT_TIMEOUT" udp_listener_ready "$ns2" 12345
+ busywait "$BUSYWAIT_TIMEOUT" udp_listener_ready "$ns3" 12345
busywait "$BUSYWAIT_TIMEOUT" nf_queue_wait "$nsrouter" 12
# Send two packets, one should end up in ns1, other in ns2.
@@ -557,7 +557,7 @@ EOF
busywait 10000 output_files_written "$TMPFILE1" "$TMPFILE2"
- kill "$nfqpid"
+ kill "$nfqpid" "$rpid1" "$rpid2"
if ! ip netns exec "$nsrouter" bash -c 'conntrack -L -p udp --dport 12345 2>/dev/null | wc -l | grep -q "^1"'; then
echo "FAIL: Expected One udp conntrack entry"
@@ -585,6 +585,135 @@ EOF
echo "PASS: both udp receivers got one packet each"
}
+# Make sure UDPGRO aggregated packets don't lose
+# their skb->nfct entry when nfqueue passes the
+# skb to userspace with software gso segmentation on.
+test_udp_gro_ct()
+{
+ local errprefix="FAIL: test_udp_gro_ct:"
+
+ ip netns exec "$nsrouter" conntrack -F 2>/dev/null
+
+ ip netns exec "$nsrouter" nft -f /dev/stdin <<EOF
+flush ruleset
+table inet udpq {
+ # Number of packets/bytes queued to userspace
+ counter toqueue { }
+ # Number of packets/bytes reinjected from userspace with 'ct new' intact
+ counter fromqueue { }
+ # These two counters should be identical and not 0.
+
+ chain prerouting {
+ type filter hook prerouting priority -300; policy accept;
+
+ # userspace sends small packets, if < 1000, UDPGRO did
+ # not kick in, but test needs a 'new' conntrack with udpgro skb.
+ meta iifname veth0 meta l4proto udp meta length > 1000 accept
+
+ # don't pick up non-gso packets and don't queue them to
+ # userspace.
+ notrack
+ }
+
+ chain postrouting {
+ type filter hook postrouting priority 0; policy accept;
+
+ # Only queue unconfirmed fraglist gro skbs to userspace.
+ udp dport 12346 ct status ! confirmed counter name "toqueue" mark set 1 queue num 1
+ }
+
+ chain validate {
+ type filter hook postrouting priority 1; policy accept;
+ # ... and only count those that were reinjected with the
+ # skb->nfct intact.
+ mark 1 counter name "fromqueue"
+ }
+}
+EOF
+ timeout 10 ip netns exec "$ns2" socat UDP-LISTEN:12346,fork,pf=ipv4 OPEN:"$TMPFILE1",trunc &
+ local rpid=$!
+
+ ip netns exec "$nsrouter" ./nf_queue -G -c -q 1 -t 2 > "$TMPFILE2" &
+ local nfqpid=$!
+
+ ip netns exec "$nsrouter" ethtool -K "veth0" rx-udp-gro-forwarding on rx-gro-list on generic-receive-offload on
+
+ busywait "$BUSYWAIT_TIMEOUT" udp_listener_ready "$ns2" 12346
+ busywait "$BUSYWAIT_TIMEOUT" nf_queue_wait "$nsrouter" 1
+
+ local bs=512
+ local count=$(((32 * 1024 * 1024) / bs))
+ dd if=/dev/zero bs="$bs" count="$count" 2>/dev/null | for i in $(seq 1 16); do
+ timeout 5 ip netns exec "$ns1" \
+ socat -u -b 512 STDIN UDP-DATAGRAM:10.0.2.99:12346,reuseport,bind=0.0.0.0:55221 &
+ done
+
+ busywait 10000 test -s "$TMPFILE1"
+
+ kill "$rpid"
+
+ wait
+
+ local p
+ local b
+ local pqueued
+ local bqueued
+
+ c=$(ip netns exec "$nsrouter" nft list counter inet udpq "toqueue" | grep packets)
+ read p pqueued b bqueued <<EOF
+$c
+EOF
+ local preinject
+ local breinject
+ c=$(ip netns exec "$nsrouter" nft list counter inet udpq "fromqueue" | grep packets)
+ read p preinject b breinject <<EOF
+$c
+EOF
+ ip netns exec "$nsrouter" ethtool -K "veth0" rx-udp-gro-forwarding off
+ ip netns exec "$nsrouter" ethtool -K "veth1" rx-udp-gro-forwarding off
+
+ if [ "$pqueued" -eq 0 ];then
+ # happens when gro did not build at least on aggregate
+ echo "SKIP: No packets were queued"
+ return
+ fi
+
+ local saw_ct_entry=0
+ if ip netns exec "$nsrouter" bash -c 'conntrack -L -p udp --dport 12346 2>/dev/null | wc -l | grep -q "^1"'; then
+ saw_ct_entry=1
+ else
+ echo "$errprefix Expected udp conntrack entry"
+ ip netns exec "$nsrouter" conntrack -L
+ ret=1
+ fi
+
+ if [ "$pqueued" -ge "$preinject" ] ;then
+ echo "$errprefix Expected software segmentation to occur, had $pqueued and $preinject"
+ ret=1
+ return
+ fi
+
+ # sw segmentation adds extra udp and ip headers.
+ local breinject_expect=$((preinject * (512 + 20 + 8)))
+
+ if [ "$breinject" -eq "$breinject_expect" ]; then
+ if [ "$saw_ct_entry" -eq 1 ];then
+ echo "PASS: fraglist gro skb passed with conntrack entry"
+ else
+ echo "$errprefix fraglist gro skb passed without conntrack entry"
+ ret=1
+ fi
+ else
+ echo "$errprefix Counter mismatch, conntrack entry dropped by nfqueue? Queued: $pqueued, $bqueued. Post-queue: $preinject, $breinject. Expected $breinject_expect"
+ ret=1
+ fi
+
+ if ! ip netns exec "$nsrouter" nft delete table inet udpq; then
+ echo "$errprefix: Could not delete udpq table"
+ ret=1
+ fi
+}
+
test_queue_removal()
{
read tainted_then < /proc/sys/kernel/tainted
@@ -663,7 +792,8 @@ test_tcp_localhost_connectclose
test_tcp_localhost_requeue
test_sctp_forward
test_sctp_output
-test_udp_ct_race
+test_udp_nat_race
+test_udp_gro_ct
# should be last, adds vrf device in ns1 and changes routes
test_icmp_vrf
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_2nd_data_as_first.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_2nd_data_as_first.pkt
new file mode 100644
index 000000000000..07e9936e70e6
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_2nd_data_as_first.pkt
@@ -0,0 +1,24 @@
+// 3rd ACK + 1st data segment lost, data segments with ce
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
++0.05 < SEWA 0:0(0) win 32767 <mss 1460,nop,nop,sackOK,nop,wscale 8>
++.002 > SW. 0:0(0) ack 1 <mss 1460,ECN e1b 1 ceb 0 e0b 1,nop,nop,nop,sackOK,nop,wscale 8>
+// 3rd ACK lost
+// 1st data segment lost
++0.05 < [ce] EAP. 1001:2001(1000) ack 1 win 264 <ECN e0b 1 ceb 0 e1b 1,nop>
++.002 > [ect0] WA. 1:1(0) ack 1 <ECN e1b 1 ceb 1000 e0b 1,nop,nop,nop,sack 1001:2001>
++.002 accept(3, ..., ...) = 4
+
++0.2 < [ce] EAP. 1:1001(1000) ack 1 win 264 <ECN e0b 1 ceb 0 e1b 1,nop>
++.001 > [ect0] EWA. 1:1(0) ack 2001 <ECN e1b 1 ceb 2000 e0b 1,nop>
+
++0.05 < [ce] EAP. 2001:3001(1000) ack 1 win 264
++.001 > [ect0] . 1:1(0) ack 3001 <ECN e1b 1 ceb 3000 e0b 1,nop>
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_2nd_data_as_first_connect.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_2nd_data_as_first_connect.pkt
new file mode 100644
index 000000000000..76b8422b34dc
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_2nd_data_as_first_connect.pkt
@@ -0,0 +1,30 @@
+// 3rd ACK + 1st data segment lost, 2nd data segments with ce
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4
++.002 ... 0.052 connect(4, ..., ...) = 0
+
++.002 > [noecn] SEWA 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8>
++0.05 < [noecn] SW. 0:0(0) ack 1 win 32767 <mss 1016,ECN e0b 1 ceb 0 e1b 1,nop,nop,nop,sackOK,nop,wscale 8>
+// 3rd ACK lost
++.002 > [ect0] W. 1:1(0) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop>
+
++0.01 write(4, ..., 2000) = 2000
+// 1st data segment lost + 2nd gets CE
++.002 > [ect0] .5 1:1005(1004) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop>
++.000 > [ect0] P.5 1005:2001(996) ack 1 <ECN e1b 1 ceb 0 e0b 1, nop>
++0.05 < [ect0] .6 1:1(0) ack 1 win 264 <ECN e0b 1 ceb 996 e1b 1,nop,nop,nop,sack 1005:2001>
+
++0.01 %{ assert tcpi_delivered_ce == 1, tcpi_delivered_ce }%
+
++0.002~+0.1 > [ect0] .5 1:1005(1004) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop>
++.05 < [ect0] .6 1:1(0) ack 2001 win 264 <ECN e0b 1005 ceb 996 e1b 1,nop>
+
++0.01 write(4, ..., 1000) = 1000
++0~+0.002 > [ect0] P.5 2001:3001(1000) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop>
+
++0.1 < [ect0] .5 1:1001(1000) ack 3001 win 264 <ECN e0b 1 ceb 0 e1b 1,nop>
++0~+0.01 > [ect0] .5 3001:3001(0) ack 1001 <ECN e1b 1 ceb 0 e0b 1001,nop>
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_3rd_ack_after_synack_rxmt.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_3rd_ack_after_synack_rxmt.pkt
new file mode 100644
index 000000000000..84060e490589
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_3rd_ack_after_synack_rxmt.pkt
@@ -0,0 +1,19 @@
+// Test 3rd ACK flags when SYN-ACK is rexmitted
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4
++.002 ... 0.052 connect(4, ..., ...) = 0
+
++.002 > [noecn] SEWA 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8>
++0.05 < [noecn] SW. 0:0(0) ack 1 win 32767 <mss 1460,ECN e0b 1 ceb 0 e1b 1,nop,nop,nop,sackOK,nop,wscale 8>
++.002 > [ect0] W. 1:1(0) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop>
+
++0.1 < [ect0] S. 0:0(0) ack 1 win 32767 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+
+// Our code currently sends a challenge ACK
+// when it receives a SYN in ESTABLISHED state
+// based on the latest SYN
++.002 > [ect0] A. 1:1(0) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop>
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_3rd_ack_ce_updates_received_ce.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_3rd_ack_ce_updates_received_ce.pkt
new file mode 100644
index 000000000000..d3fe09d0606f
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_3rd_ack_ce_updates_received_ce.pkt
@@ -0,0 +1,18 @@
+// Third ACK CE increases r.cep
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
++0.05 < SEWA 0:0(0) win 32767 <mss 1050,nop,nop,sackOK,nop,wscale 8>
++.002 > SW. 0:0(0) ack 1 <mss 1460,ECN e1b 1 ceb 0 e0b 1,nop,nop,nop,sackOK,nop,wscale 8>
++0.05 < [ce] W. 1:1(0) ack 1 win 264 <ECN e0b 1 ceb 0 e1b 1,nop>
++.002 accept(3, ..., ...) = 4
+
++0.01 write(4, ..., 1000) = 1000
++.002 > [ect0] WAP. 1:1001(1000) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop>
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_3rd_ack_lost_data_ce.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_3rd_ack_lost_data_ce.pkt
new file mode 100644
index 000000000000..d28722db42b1
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_3rd_ack_lost_data_ce.pkt
@@ -0,0 +1,22 @@
+// 3rd ACK lost, CE for the first data segment
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
++0.05 < SEWA 0:0(0) win 32767 <mss 1050,nop,nop,sackOK,nop,wscale 8>
++.002 > SW. 0:0(0) ack 1 <mss 1460,ECN e1b 1 ceb 0 e0b 1,nop,nop,nop,sackOK,nop,wscale 8>
+// 3rd ACK lost
++0.05 < [ce] EAP. 1:1001(1000) ack 1 win 264 <ECN e0b 1 ceb 0 e1b 1,nop>
++.002 > [ect0] WA. 1:1(0) ack 1001 <ECN e1b 1 ceb 1000 e0b 1,nop>
++.002 accept(3, ..., ...) = 4
+
++0.01 %{ assert tcpi_delivered_ce == 0, tcpi_delivered_ce }%
+
++0.05 < [ce] EAP. 1001:2001(1000) ack 1 win 264 <ECN e0b 1 ceb 0 e1b 1,nop>
++.001 > [ect0] EWA. 1:1(0) ack 2001 <ECN e1b 1 ceb 2000 e0b 1 ,nop>
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_3rd_dups.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_3rd_dups.pkt
new file mode 100644
index 000000000000..a4d808116e34
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_3rd_dups.pkt
@@ -0,0 +1,26 @@
+// Test SYN/ACK rexmit triggered 3rd ACK duplicate + CE on first data seg
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < SEWA 0:0(0) win 32792 <mss 1050,nop,nop,sackOK,nop,wscale 8>
++.002 > SW. 0:0(0) ack 1 <mss 1460,ECN e1b 1 ceb 0 e0b 1,nop,nop,nop,sackOK,nop,wscale 8>
+
+// SYN/ACK rexmitted => two 3rd ACKs in-flight
++1.0~+1.1 > SW. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+// Delivered 1st 3rd ACK
++0.05 < [ect0] W. 1:1(0) ack 1 win 257 <ECN e0b 1 ceb 0 e1b 1,nop>
++.002 accept(3, ..., ...) = 4
+
+// Duplicate 3rd ACK delivered
++1.05 < [ect0] W. 1:1(0) ack 1 win 257 <ECN e0b 1 ceb 0 e1b 1,nop>
+
++0.05 < [ce] EAP. 1:1001(1000) ack 1 win 257 <ECN e0b 1 ceb 0 e1b 1,nop>
++.002 > [ect0] WA. 1:1(0) ack 1001 <ECN e1b 1 ceb 1000 e0b 1,nop>
+ +0 read(4, ..., 1000) = 1000
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_acc_ecn_disabled.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_acc_ecn_disabled.pkt
new file mode 100644
index 000000000000..410a303c6d49
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_acc_ecn_disabled.pkt
@@ -0,0 +1,13 @@
+// Test that when accurate ECN is disabled,
+// client uses RFC3168 ECN for SYN
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=1
+`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4
++.002 ... 0.052 connect(4, ..., ...) = 0
+
++.002 > [noecn] SEW 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8>
++0.05 < [noecn] S. 0:0(0) ack 1 win 32767 <mss 1460,sackOK,nop,nop,nop,wscale 8>
++.002 > [noecn] . 1:1(0) ack 1
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_accecn_then_notecn_syn.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_accecn_then_notecn_syn.pkt
new file mode 100644
index 000000000000..10728114b11b
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_accecn_then_notecn_syn.pkt
@@ -0,0 +1,28 @@
+// Test that SYN-ACK with ACE flags and without
+// ACE flags got dropped. Although we disable ECN,
+// we shouldn't consider this as blackholed as
+// these are dropped due to congestion
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+sysctl -q net.ipv4.tcp_ecn_option=2
+`
+
++0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
++0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
++0 bind(3, ..., ...) = 0
++0 listen(3, 1) = 0
+
++0 < [ect0] SEWA 0:0(0) win 32792 <mss 1460,nop,nop,sackOK,nop,wscale 8>
++.002 > [noecn] SA. 0:0(0) ack 1 <mss 1460,ECN e1b 1 ceb 0 e0b 1,nop,nop,nop,sackOK,nop,wscale 8>
+
+// Retransmit SYN
++0.1 < [noecn] S 0:0(0) win 32792 <mss 1460,nop,nop,sackOK,nop,wscale 8>
++.002 > [noecn] SW. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+
++0.1 < [noecn] W. 1:1(0) ack 1 win 320 <ECN e0b 1 ceb 0 e1b 1,nop>
++.002 accept(3, ..., ...) = 4
+
+// Write with AccECN option but with ip-noecn since we received one SYN with ACE=0
++0.01 write(4, ..., 100) = 100
++.002 > [noecn] P5. 1:101(100) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop>
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_accecn_to_rfc3168.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_accecn_to_rfc3168.pkt
new file mode 100644
index 000000000000..04d928f0d44d
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_accecn_to_rfc3168.pkt
@@ -0,0 +1,18 @@
+// Test AccECN -> RFC3168 fallback when sysctl asks for RFC3168 ECN
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=1
+`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < SEWA 0:0(0) win 32792 <mss 1050,nop,nop,sackOK,nop,wscale 8>
++.002 > SE. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
++0.05 < . 1:1(0) ack 1 win 320
++.002 accept(3, ..., ...) = 4
+
++0.01 write(4, ..., 1000) = 1000
++.002 > [ect0] P. 1:1001(1000) ack 1
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_client_accecn_options_drop.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_client_accecn_options_drop.pkt
new file mode 100644
index 000000000000..788af6bea69c
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_client_accecn_options_drop.pkt
@@ -0,0 +1,34 @@
+// Client negotiates AccECN and starts sending
+// AccECN option in last ACK and data segments
+// Middlebox drops AccECN option and client
+// reverts to ACE flags only
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+sysctl -q net.ipv4.tcp_ecn_option=2
+sysctl -q net.ipv4.tcp_ecn_option_beacon=1
+`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < SEWA 0:0(0) win 32792 <mss 1050,nop,nop,sackOK,nop,wscale 8>
++.002 > SW. 0:0(0) ack 1 <mss 1460,ECN e1b 1 ceb 0 e0b 1,nop,nop,nop,sackOK,nop,wscale 8>
++0.05 < [ect0] W. 1:1(0) ack 1 win 257 <ECN e0b 1 ceb 0 e1b 1,nop>
++.002 accept(3, ..., ...) = 4
+
++0.05 < [ect0] EAP. 1:1001(1000) ack 1 win 257 <ECN e0b 1 ceb 0 e1b 1,nop>
++.002 > [ect0] EA. 1:1(0) ack 1001 <ECN e1b 1 ceb 0 e0b 1001,nop>
+ +0 read(4, ..., 1000) = 1000
+
++0.05 < [ect0] EAP. 1:1001(1000) ack 1 win 257 <ECN e0b 1 ceb 0 e1b 1,nop>
++.002 > [ect0] EA. 1:1(0) ack 1001 <ECN e1b 1 ceb 0 e0b 2001,nop,nop,nop,sack 1:1001>
+
++0.05 < [ect0] EAP. 1:1001(1000) ack 1 win 257 <ECN e0b 1 ceb 0 e1b 1,nop>
++.002 > [ect0] EA. 1:1(0) ack 1001 <nop,nop,sack 1:1001>
+
++0.05 < [ect0] EAP. 1001:2001(1000) ack 1 win 257 <ECN e0b 1 ceb 0 e1b 1,nop>
++.002 > [ect0] EA. 1:1(0) ack 2001
+ +0 read(4, ..., 1000) = 1000
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_client_accecn_options_lost.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_client_accecn_options_lost.pkt
new file mode 100644
index 000000000000..f5839c2e682d
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_client_accecn_options_lost.pkt
@@ -0,0 +1,38 @@
+// Client negotiates AccECN and starts sending
+// AccECN option in last ACK and data segments
+// Middlebox accepts AccECN option but some packets
+// are lost due to congestion. Client should
+// continue to send AccECN option
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+sysctl -q net.ipv4.tcp_ecn_option=2
+`
+
++0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4
++.002 ... 0.102 connect(4, ..., ...) = 0
+
++.002 > [noecn] SEWA 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8>
++0.1 < [ect0] SW. 0:0(0) ack 1 win 32767 <mss 1024,ECN e0b 1 ceb 0 e1b 1,nop,nop,nop,sackOK,nop,wscale 8>
++.002 > [ect0] A. 1:1(0) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop>
+
+// Send
++0.01 write(4, ..., 3000) = 3000
++.002 > [ect0] .5 1:1013(1012) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop>
++.002 > [ect0] P.5 1013:2025(1012) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop>
++.002 > [ect0] P.5 2025:3001(976) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop>
+
+// First two segments were lost due to congestion as SACK was
+// received acknowledging 3rd segment
++0.1 < [ect0] .5 1:1(0) ack 1 win 264 <ECN e1b 1 ceb 0 e0b 977,nop,nop,nop,sack 2025:3001>
+
+// Since data with option was SACKed, we can
+// continue to use AccECN option for the rest of
+// the connection. This one is a rexmt
++.02~+0.5 > [ect0] .5 1:1013(1012) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop>
++0.1 < [ect0] .5 1:1(0) ack 3001 win 264 <ECN e1b 1 ceb 0 e0b 3000,nop>
+
+// Send new data, it should contain AccECN option
++0.01 write(4, ..., 2000) = 2000
++.002 > [ect0] .5 3001:4013(1012) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop>
++.002 > [ect0] P.5 4013:5001(988) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop>
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_clientside_disabled.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_clientside_disabled.pkt
new file mode 100644
index 000000000000..c00b36d6a833
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_clientside_disabled.pkt
@@ -0,0 +1,12 @@
+// AccECN sysctl server-side only, no ECN/AccECN
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=5
+`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4
++.002 ... 0.052 connect(4, ..., ...) = 0
+
++.002 > S 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8>
++0.05 < S. 0:0(0) ack 1 win 32767 <mss 1460,sackOK,nop,nop,nop,wscale 8>
++.002 > . 1:1(0) ack 1
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_close_local_close_then_remote_fin.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_close_local_close_then_remote_fin.pkt
new file mode 100644
index 000000000000..f9c27f39f354
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_close_local_close_then_remote_fin.pkt
@@ -0,0 +1,25 @@
+// Test basic connection teardown where local process closes first:
+// the local process calls close() first, so we send a FIN, and receive an ACK.
+// Then we receive a FIN and ACK it.
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+sysctl -q net.ipv4.tcp_ecn_option=0
+`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +.01...0.011 connect(3, ..., ...) = 0
+ +0 > [noecn] SEWA 0:0(0) <...>
+ +0 < [ect1] SW. 0:0(0) ack 1 win 32768 <mss 1000,nop,wscale 6,nop,nop,sackOK>
+ +0 > [ect0] EW. 1:1(0) ack 1
+
+ +0 write(3, ..., 1000) = 1000
+ +0 > [ect0] P5. 1:1001(1000) ack 1
+ +0 < [ect0] .5 1:1(0) ack 1001 win 257
+
+ +0 close(3) = 0
+ +0 > [ect0] F5. 1001:1001(0) ack 1
+ +0 < [ect0] .5 1:1(0) ack 1002 win 257
+
+ +0 < [ect0] F5. 1:1(0) ack 1002 win 257
+ +0 > [ect0] . 1002:1002(0) ack 2
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_delivered_2ndlargeack.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_delivered_2ndlargeack.pkt
new file mode 100644
index 000000000000..6d771234124a
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_delivered_2ndlargeack.pkt
@@ -0,0 +1,25 @@
+// Test a large ACK (> ACE field max)
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+sysctl -q net.ipv4.tcp_ecn_option=0
+`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < SEWA 0:0(0) win 32792 <mss 1460,nop,nop,sackOK,nop,wscale 8>
++.002 > SW. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
++0.05 < [ect0] W. 1:1(0) ack 1 win 264
++.002 accept(3, ..., ...) = 4
+
++0.01 %{ assert tcpi_delivered_ce == 0, tcpi_delivered_ce }%
+
++0.01 write(4, ..., 14600) = 14600
++.002 > [ect0] P.5 1:14601(14600) ack 1
++0.05 < [ect0] .5 1:1(0) ack 1461 win 264
++0.05 < [ect0] .5 1:1(0) ack 14601 win 264
+
++0.01 %{ assert tcpi_delivered_ce == 8, tcpi_delivered_ce }%
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_delivered_falseoverflow_detect.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_delivered_falseoverflow_detect.pkt
new file mode 100644
index 000000000000..76384f52b021
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_delivered_falseoverflow_detect.pkt
@@ -0,0 +1,31 @@
+// Test false overflow detection with option used to rule out overflow
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < SEWA 0:0(0) win 32792 <mss 1460,nop,nop,sackOK,nop,wscale 8>
++.002 > SW. 0:0(0) ack 1 <mss 1460,ECN e1b 1 ceb 0 e0b 1,nop,nop,nop,sackOK,nop,wscale 8>
++0.05 < [ect0] W. 1:1(0) ack 1 win 264 <ECN e0b 1 ceb 0 e1b 1,nop>
++.002 accept(3, ..., ...) = 4
+
++0.01 %{ assert tcpi_delivered_ce == 0, tcpi_delivered_ce }%
+
+// Stop sending option to allow easier testing
++0 `sysctl -q net.ipv4.tcp_ecn_option=0`
+
++0.002 write(4, ..., 14600) = 14600
++.002 > [ect0] P.5 1:14601(14600) ack 1
+
++0.05 < [ect0] .5 1:1(0) ack 1460 win 264 <ECN e0b 1461 ceb 0 e1b 1,nop>
++0.05 < [ect0] .5 1:1(0) ack 14601 win 264 <ECN e0b 14601 ceb 0 e1b 1,nop>
+
++0.01 %{
+assert tcpi_delivered_ce == 0, tcpi_delivered_ce
+assert tcpi_delivered_e0_bytes == 14600, tcpi_delivered_e0_bytes
+}%
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_delivered_largeack.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_delivered_largeack.pkt
new file mode 100644
index 000000000000..8bce5dce35a2
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_delivered_largeack.pkt
@@ -0,0 +1,24 @@
+// Test a large ACK (> ACE field max)
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+sysctl -q net.ipv4.tcp_ecn_option=0
+`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < SEWA 0:0(0) win 32792 <mss 1460,nop,nop,sackOK,nop,wscale 8>
++.002 > SW. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
++0.05 < [ect0] W. 1:1(0) ack 1 win 264
++.002 accept(3, ..., ...) = 4
+
++0.01 %{ assert tcpi_delivered_ce == 0, tcpi_delivered_ce }%
+
++0.01 write(4, ..., 14600) = 14600
++.002 > [ect0] P.5 1:14601(14600) ack 1
++0.05 < [ect0] .5 1:1(0) ack 14601 win 264
+
++0.01 %{ assert tcpi_delivered_ce == 0, tcpi_delivered_ce }%
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_delivered_largeack2.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_delivered_largeack2.pkt
new file mode 100644
index 000000000000..5f2b147214f4
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_delivered_largeack2.pkt
@@ -0,0 +1,25 @@
+// Test a large ACK (> ACE field max)
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+sysctl -q net.ipv4.tcp_ecn_option=0
+`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < SEWA 0:0(0) win 32792 <mss 1460,nop,nop,sackOK,nop,wscale 8>
++.002 > SW. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
++0.05 < [ect0] W. 1:1(0) ack 1 win 264
++.002 accept(3, ..., ...) = 4
+
++0.01 %{ assert tcpi_delivered_ce == 0, tcpi_delivered_ce }%
+
++0.01 write(4, ..., 14600) = 14600
++.002 > [ect0] P.5 1:14601(14600) ack 1
+ // Fake CE
++0.05 < [ect0] .6 1:1(0) ack 14601 win 264
+
++0.01 %{ assert tcpi_delivered_ce == 1, tcpi_delivered_ce }%
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_delivered_maxack.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_delivered_maxack.pkt
new file mode 100644
index 000000000000..fd07bdc14f37
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_delivered_maxack.pkt
@@ -0,0 +1,25 @@
+// Test a large ACK (at ACE field max delta)
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+sysctl -q net.ipv4.tcp_ecn_option=0
+`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < SEWA 0:0(0) win 32792 <mss 1460,nop,nop,sackOK,nop,wscale 8>
++.002 > SW. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
++0.05 < [ect0] W. 1:1(0) ack 1 win 264
++.002 accept(3, ..., ...) = 4
+
++0.01 %{ assert tcpi_delivered_ce == 0, tcpi_delivered_ce }%
+
++0.01 write(4, ..., 14600) = 14600
++.002 > [ect0] P.5 1:14601(14600) ack 1
+ // Fake CE
++0.05 < [ect0] .4 1:1(0) ack 14601 win 264
+
++0.01 %{ assert tcpi_delivered_ce == 7, tcpi_delivered_ce }%
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_delivered_updates.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_delivered_updates.pkt
new file mode 100644
index 000000000000..cb1e70ff2d26
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_delivered_updates.pkt
@@ -0,0 +1,70 @@
+// Test basic AccECN CEP/CEB/E0B/E1B functionality & CEP wrapping
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < SEWA 0:0(0) win 32792 <mss 1050,nop,nop,sackOK,nop,wscale 8>
++.002 > SW. 0:0(0) ack 1 <mss 1460,ECN e1b 1 ceb 0 e0b 1,nop,nop,nop,sackOK,nop,wscale 8>
++0.05 < [ect0] W. 1:1(0) ack 1 win 264 <ECN e0b 1 ceb 0 e1b 1,nop>
++.002 accept(3, ..., ...) = 4
+
++0.01 %{
+assert tcpi_delivered_ce == 0, tcpi_delivered_ce
+assert tcpi_delivered_ce_bytes == 0, tcpi_delivered_ce_bytes
+}%
+
++0.01 write(4, ..., 1000) = 1000
++.002 > [ect0] EAP. 1:1001(1000) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop>
+ // Fake CE
++0.05 < [ect0] WA. 1:1(0) ack 1001 win 264 <ECN e0b 1 ceb 1000 e1b 1,nop>
+
++0.01 %{
+assert tcpi_delivered_ce == 1, tcpi_delivered_ce
+assert tcpi_delivered_ce_bytes == 1000, tcpi_delivered_ce_bytes
+}%
+
++0.01 write(4, ..., 1000) = 1000
++.002 > [ect0] EAP. 1001:2001(1000) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop>
+ // Fake ect0
++0.05 < [ect0] WA. 1:1(0) ack 2001 win 264 <ECN e0b 1001 ceb 1000 e1b 1,nop>
+
++0.01 %{
+assert tcpi_delivered_ce == 1, tcpi_delivered_ce
+assert tcpi_delivered_e0_bytes == 1000, tcpi_delivered_e0_bytes
+}%
+
++0.01 write(4, ..., 1000) = 1000
++.002 > [ect0] EAP. 2001:3001(1000) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop>
+ // Fake ce
++0.05 < [ect0] EWA. 1:1(0) ack 3001 win 264 <ECN e0b 1001 ceb 2000 e1b 1,nop>
+
++0.01 %{
+assert tcpi_delivered_ce == 2, tcpi_delivered_ce
+assert tcpi_delivered_ce_bytes == 2000, tcpi_delivered_ce_bytes
+}%
+
++0.01 write(4, ..., 1000) = 1000
++.002 > [ect0] EAP. 3001:4001(1000) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop>
+ // Fake ect1
++0.05 < [ect0] EWA. 1:1(0) ack 4001 win 264 <ECN e0b 1001 ceb 2000 e1b 1001,nop>
+
++0.01 %{
+assert tcpi_delivered_ce == 2, tcpi_delivered_ce
+assert tcpi_delivered_e1_bytes == 1000, tcpi_delivered_e1_bytes
+}%
+
++0.01 write(4, ..., 1000) = 1000
++.002 > [ect0] EAP. 4001:5001(1000) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop>
+ // Fake ce
++0.05 < [ect0] . 1:1(0) ack 5001 win 264 <ECN e0b 1001 ceb 3000 e1b 1001,nop>
+
++0.01 %{
+assert tcpi_delivered_ce == 3, tcpi_delivered_ce
+assert tcpi_delivered_ce_bytes == 3000, tcpi_delivered_ce_bytes
+}%
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_ecn3.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_ecn3.pkt
new file mode 100644
index 000000000000..6627c7bb2d26
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_ecn3.pkt
@@ -0,0 +1,12 @@
+// Test that tcp_ecn=4 uses RFC3168 ECN for SYN
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=4
+`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4
++.002 ... 0.05 connect(4, ..., ...) = 0
+
++.002 > SEW 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8>
++0.05 < S. 0:0(0) ack 1 win 32767 <mss 1460,nop,nop,sackOK,nop,wscale 8>
++.002 > . 1:1(0) ack 1
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_ecn_field_updates_opt.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_ecn_field_updates_opt.pkt
new file mode 100644
index 000000000000..51879477bb50
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_ecn_field_updates_opt.pkt
@@ -0,0 +1,35 @@
+// Test basic AccECN CEP/CEB/E0B/E1B functionality & CEP wrapping
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < SEWA 0:0(0) win 32792 <mss 1050,nop,nop,sackOK,nop,wscale 8>
++.002 > SW. 0:0(0) ack 1 <mss 1460,ECN e1b 1 ceb 0 e0b 1,nop,nop,nop,sackOK,nop,wscale 8>
++0.05 < [ect0] W. 1:1(0) ack 1 win 257 <ECN e0b 1 ceb 0 e1b 1,nop>
++.002 accept(3, ..., ...) = 4
+
++0.05 < [ce] EAP. 1:1001(1000) ack 1 win 257 <ECN e0b 1 ceb 0 e1b 1,nop>
++.002 > [ect0] WA. 1:1(0) ack 1001 <ECN e1b 1 ceb 1000 e0b 1,nop>
+ +0 read(4, ..., 1000) = 1000
+
++0.05 < [ect0] EAP. 1001:2001(1000) ack 1 win 257 <ECN e0b 1 ceb 0 e1b 1,nop>
++.002 > [ect0] WA. 1:1(0) ack 2001 <ECN e1b 1 ceb 1000 e0b 1001,nop>
+ +0 read(4, ..., 1000) = 1000
+
++0.05 < [ce] EAP. 2001:3001(1000) ack 1 win 257 <ECN e0b 1 ceb 0 e1b 1,nop>
++.002 > [ect0] EWA. 1:1(0) ack 3001 <ECN e1b 1 ceb 2000 e0b 1001,nop>
+ +0 read(4, ..., 1000) = 1000
+
++0.05 < [ect1] EAP. 3001:4001(1000) ack 1 win 257 <ECN e0b 1 ceb 0 e1b 1,nop>
++.002 > [ect0] EWA. 1:1(0) ack 4001 <ECN e1b 1001 ceb 2000 e0b 1001,nop>
+ +0 read(4, ..., 1000) = 1000
+
++0.05 < [ce] EAP. 4001:5001(1000) ack 1 win 257 <ECN e0b 1 ceb 0 e1b 1,nop>
++.002 > [ect0] . 1:1(0) ack 5001 <ECN e1b 1001 ceb 3000 e0b 1001,nop>
+ +0 read(4, ..., 1000) = 1000
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_ipflags_drop.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_ipflags_drop.pkt
new file mode 100644
index 000000000000..0c72fa4a1251
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_ipflags_drop.pkt
@@ -0,0 +1,14 @@
+// Test IP flags drop
+--tolerance_usecs=50000
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4
++.002 ... 1.1 connect(4, ..., ...) = 0
+
++.002 > SEWA 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8>
++.02 ~ +1.1 > SEWA 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8>
++0.05 < S. 0:0(0) ack 1 win 32767 <mss 1460,nop,nop,sackOK,nop,wscale 8>
++.002 > [noecn] . 1:1(0) ack 1
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_listen_opt_drop.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_listen_opt_drop.pkt
new file mode 100644
index 000000000000..171f9433e55f
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_listen_opt_drop.pkt
@@ -0,0 +1,16 @@
+// SYN/ACK option drop test
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < SEWA 0:0(0) win 32792 <mss 1050,nop,nop,sackOK,nop,wscale 8>
++.002 > SW. 0:0(0) ack 1 <mss 1460,ECN e1b 1 ceb 0 e0b 1,nop,nop,nop,sackOK,nop,wscale 8>
++.02 ~+2 > SW. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
++.02 ~+5 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
++.02 ~+8 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_multiple_syn_ack_drop.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_multiple_syn_ack_drop.pkt
new file mode 100644
index 000000000000..0f65cf56cd2b
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_multiple_syn_ack_drop.pkt
@@ -0,0 +1,28 @@
+// Test that SYN-ACK with ACE flags and without
+// ACE flags got dropped. Although we disable ECN,
+// we shouldn't consider this as blackholed as
+// these are dropped due to congestion
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+sysctl -q net.ipv4.tcp_ecn_option=2
+`
+
++0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
++0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
++0 bind(3, ..., ...) = 0
++0 listen(3, 1) = 0
+
++0 < [noecn] SEWA 0:0(0) win 32792 <mss 1460,nop,nop,sackOK,nop,wscale 8>
++.002 > [noecn] SW. 0:0(0) ack 1 <mss 1460,ECN e1b 1 ceb 0 e0b 1,nop,nop,nop,sackOK,nop,wscale 8>
+
+// Retransmit SYN-ACK without option
++1~+1.1 > [noecn] SW. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+
+// SYN-ACK maybe getting blackholed, disable ECN
++2~+2.2 > [noecn] S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
++4~+4.4 > [noecn] S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+
+// Received an ACK after sending 3rd retransmission, not a blackhole
++0.1 < [noecn] . 1:1(0) ack 1 win 320
++.002 accept(3, ..., ...) = 4
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_multiple_syn_drop.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_multiple_syn_drop.pkt
new file mode 100644
index 000000000000..343181633980
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_multiple_syn_drop.pkt
@@ -0,0 +1,18 @@
+// Test that SYN with ACE flags and without
+// ACE flags got dropped. Although we disable
+// ECN, we shouldn't consider this as blackholed
+// as these are dropped due to congestion
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4
++.002 ... 3.1 connect(4, ..., ...) = 0
+
++.002 > [noecn] SEWA 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8>
++.02~+1.1 > [noecn] SEWA 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8>
++.02~+1.1 > [noecn] S 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8>
++.02~+1.1 > [noecn] S 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8>
++0.1 < [noecn] S. 0:0(0) ack 1 win 32767 <mss 1460,nop,nop,sackOK,nop,wscale 8>
++0~+0.01 > [noecn] . 1:1(0) ack 1
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_negotiation_bleach.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_negotiation_bleach.pkt
new file mode 100644
index 000000000000..37dabc4603c8
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_negotiation_bleach.pkt
@@ -0,0 +1,23 @@
+// Test AccECN flags bleach
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < SEWA 0:0(0) win 32792 <mss 1050,nop,nop,sackOK,nop,wscale 8>
++.002 > SW. 0:0(0) ack 1 <mss 1460,ECN e1b 1 ceb 0 e0b 1,nop,nop,nop,sackOK,nop,wscale 8>
++0.05 < [ect0] . 1:1(0) ack 1 win 320 <ECN e0b 1 ceb 0 e1b 1,nop>
++.002 accept(3, ..., ...) = 4
+
++0.01 %{ assert tcpi_delivered_ce == 0, tcpi_delivered_ce }%
+
++0.01 write(4, ..., 1000) = 1000
++.002 > [noecn] EAP. 1:1001(1000) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop>
++0.05 < [ect0] EAP. 1:1(0) ack 1001 win 320
+
++0.01 %{ assert tcpi_delivered_ce == 0, tcpi_delivered_ce }%
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_negotiation_connect.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_negotiation_connect.pkt
new file mode 100644
index 000000000000..5b14892fda51
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_negotiation_connect.pkt
@@ -0,0 +1,23 @@
+// Test basic AccECN negotiation
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4
++.002 ... 0.052 connect(4, ..., ...) = 0
+
++.002 > SEWA 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8>
++0.05 < SW. 0:0(0) ack 1 win 32767 <mss 1460,ECN e0b 1 ceb 0 e1b 1,nop,nop,nop,sackOK,nop,wscale 8>
++.002 > [ect0] W. 1:1(0) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop>
+
++0.01 write(4, ..., 1000) = 1000
++.002 > [ect0] EAP. 1:1001(1000) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop>
++.05 < [ect0] EAP. 1:1(0) ack 1001 win 256 <ECN e0b 1001 ceb 0 e1b 0,nop>
+
++0.01 %{ assert tcpi_delivered_ce == 0, tcpi_delivered_ce }%
+
++0.01 write(4, ..., 1000) = 1000
++.002 > [ect0] EAP. 1001:2001(1000) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop>
+
++0.01 %{ assert tcpi_delivered_ce == 0, tcpi_delivered_ce }%
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_negotiation_listen.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_negotiation_listen.pkt
new file mode 100644
index 000000000000..25f7cb2feb25
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_negotiation_listen.pkt
@@ -0,0 +1,26 @@
+// Test basic AccECN negotiation
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < SEWA 0:0(0) win 32792 <mss 1050,nop,nop,sackOK,nop,wscale 8>
++.002 > SW. 0:0(0) ack 1 <mss 1460,ECN e1b 1 ceb 0 e0b 1,nop,nop,nop,sackOK,nop,wscale 8>
++0.05 < [ect0] W. 1:1(0) ack 1 win 320 <ECN e0b 1 ceb 0 e1b 1,nop>
++.002 accept(3, ..., ...) = 4
+
++0.01 %{ assert tcpi_delivered_ce == 0, tcpi_delivered_ce }%
+
++0.01 write(4, ..., 1000) = 1000
++.002 > [ect0] EAP. 1:1001(1000) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop>
++0.05 < [ect0] EAP. 1:1(0) ack 1001 win 320
+
++0.01 %{ assert tcpi_delivered_ce == 0, tcpi_delivered_ce }%
+
++0.01 write(4, ..., 1000) = 1000
++.002 > [ect0] EAP. 1001:2001(1000) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop>
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_negotiation_noopt_connect.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_negotiation_noopt_connect.pkt
new file mode 100644
index 000000000000..50e08c492a69
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_negotiation_noopt_connect.pkt
@@ -0,0 +1,23 @@
+// Test basic AccECN negotiation without option
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4
++.002 ... 0.052 connect(4, ..., ...) = 0
+
++.002 > SEWA 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8>
++0.05 < SW. 0:0(0) ack 1 win 32767 <mss 1460,nop,nop,sackOK,nop,wscale 8>
++.002 > [ect0] W. 1:1(0) ack 1
+
++0.01 write(4, ..., 1000) = 1000
++.002 > [ect0] EAP. 1:1001(1000) ack 1
++.05 < [ect0] EAP. 1:1(0) ack 1001 win 256
+
++0.01 %{ assert tcpi_delivered_ce == 0, tcpi_delivered_ce }%
+
++0.01 write(4, ..., 1000) = 1000
++.002 > [ect0] EAP. 1001:2001(1000) ack 1
+
++0.01 %{ assert tcpi_delivered_ce == 0, tcpi_delivered_ce }%
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_negotiation_optenable.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_negotiation_optenable.pkt
new file mode 100644
index 000000000000..2904f1ba9975
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_negotiation_optenable.pkt
@@ -0,0 +1,23 @@
+// Test basic AccECN negotiation, late option enable
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4
++.002 ... 0.052 connect(4, ..., ...) = 0
+
++.002 > SEWA 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8>
++0.05 < SW. 0:0(0) ack 1 win 32767 <mss 1460,nop,nop,sackOK,nop,wscale 8>
++.002 > [ect0] W. 1:1(0) ack 1
+
++0.01 write(4, ..., 1000) = 1000
++.002 > [ect0] EAP. 1:1001(1000) ack 1
++.05 < [ect0] EAP. 1:1(0) ack 1001 win 256 <ECN e0b 1001 ceb 0 e1b 1,nop>
+
++0.01 %{ assert tcpi_delivered_ce == 0, tcpi_delivered_ce }%
+
++0.01 write(4, ..., 1000) = 1000
++.002 > [ect0] EAP. 1001:2001(1000) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop>
+
++0.01 %{ assert tcpi_delivered_ce == 0, tcpi_delivered_ce }%
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_no_ecn_after_accecn.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_no_ecn_after_accecn.pkt
new file mode 100644
index 000000000000..64e0fc1c1f14
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_no_ecn_after_accecn.pkt
@@ -0,0 +1,20 @@
+// Test client behavior on receiving a non ECN SYN-ACK
+// after receiving an AccECN SYN-ACK and moving to
+// ESTABLISHED state
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+sysctl -q net.ipv4.tcp_ecn_option=2
+`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4
++.002 ... 0.052 connect(4, ..., ...) = 0
+
++.002 > [noecn] SEWA 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8>
+// Receive an AccECN SYN-ACK and move to ESTABLISHED
++0.05 < [noecn] SW. 0:0(0) ack 1 win 32767 <mss 1460,ECN e0b 1 ceb 0 e1b 1,nop,nop,nop,sackOK,nop,wscale 8>
++.002 > [ect0] W. 1:1(0) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop>
+
+// Receive a non ECN SYN-ACK and send a challenge ACK with ACE feedback
++0.1 < [noecn] S. 0:0(0) ack 1 win 32767 <mss 1460,nop,nop,sackOK,nop,wscale 8>
++.002 > [ect0] W. 1:1(0) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop>
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_noopt.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_noopt.pkt
new file mode 100644
index 000000000000..f407c629a3f7
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_noopt.pkt
@@ -0,0 +1,27 @@
+// Test basic AccECN negotiation with option off using sysctl
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+sysctl -q net.ipv4.tcp_ecn_option=0
+`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < SEWA 0:0(0) win 32792 <mss 1050,nop,nop,sackOK,nop,wscale 8>
++.002 > SW. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
++0.05 < [ect0] W. 1:1(0) ack 1 win 320 <ECN e0b 1 ceb 0 e1b 1,nop>
++.002 accept(3, ..., ...) = 4
+
++0.01 %{ assert tcpi_delivered_ce == 0, tcpi_delivered_ce }%
+
++0.01 write(4, ..., 1000) = 1000
++.002 > [ect0] EAP. 1:1001(1000) ack 1
++0.05 < [ect0] EAP. 1:1(0) ack 1001 win 320
+
++0.01 %{ assert tcpi_delivered_ce == 0, tcpi_delivered_ce }%
+
++0.01 write(4, ..., 1000) = 1000
++.002 > [ect0] EAP. 1001:2001(1000) ack 1
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_noprogress.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_noprogress.pkt
new file mode 100644
index 000000000000..32454e7187f9
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_noprogress.pkt
@@ -0,0 +1,27 @@
+// Test no progress filtering
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < SEWA 0:0(0) win 32792 <mss 1050,nop,nop,sackOK,nop,wscale 8>
++.002 > SW. 0:0(0) ack 1 <mss 1460,ECN e1b 1 ceb 0 e0b 1,nop,nop,nop,sackOK,nop,wscale 8>
++0.05 < [ect0] W. 1:1(0) ack 1 win 264 <ECN e0b 1 ceb 0 e1b 1,nop>
++.002 accept(3, ..., ...) = 4
+
++0.01 %{ assert tcpi_delivered_ce == 0, tcpi_delivered_ce }%
+
++0.01 write(4, ..., 1000) = 1000
++.002 > [ect0] EAP. 1:1001(1000) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop>
+ // Fake CE and claim no progress
++0.05 < [ect0] WA. 1:1(0) ack 1 win 264 <ECN e0b 1 ceb 1000 e1b 1,nop>
+
++0.01 %{
+assert tcpi_delivered_ce == 0, tcpi_delivered_ce
+assert tcpi_delivered_ce_bytes == 0, tcpi_delivered_ce_bytes
+}%
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_notecn_then_accecn_syn.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_notecn_then_accecn_syn.pkt
new file mode 100644
index 000000000000..6597d5f2d778
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_notecn_then_accecn_syn.pkt
@@ -0,0 +1,28 @@
+// Test that SYN-ACK with ACE flags and without
+// ACE flags got dropped. Although we disable ECN,
+// we shouldn't consider this as blackholed as
+// these are dropped due to congestion
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+sysctl -q net.ipv4.tcp_ecn_option=2
+`
+
++0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
++0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
++0 bind(3, ..., ...) = 0
++0 listen(3, 1) = 0
+
++0 < [noecn] S 0:0(0) win 32792 <mss 1460,nop,nop,sackOK,nop,wscale 8>
++.002 > [noecn] S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+
+// Retransmit SYN
++0.1 < [ect0] SEWA 0:0(0) win 32792 <mss 1460,nop,nop,sackOK,nop,wscale 8>
++.002 > [noecn] S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+
++0.1 < [noecn] . 1:1(0) ack 1 win 320
++.002 accept(3, ..., ...) = 4
+
+// Write with AccECN option but with ip-noecn since we received one SYN with ACE=0
++0.01 write(4, ..., 100) = 100
++.002 > [noecn] P. 1:101(100) ack 1
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_rfc3168_to_fallback.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_rfc3168_to_fallback.pkt
new file mode 100644
index 000000000000..0f97dfcfa82d
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_rfc3168_to_fallback.pkt
@@ -0,0 +1,18 @@
+// Test RFC3168 fallback when sysctl asks for AccECN
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < SEW 0:0(0) win 32792 <mss 1050,nop,nop,sackOK,nop,wscale 8>
++.002 > SE. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
++0.05 < . 1:1(0) ack 1 win 320
++.002 accept(3, ..., ...) = 4
+
++0.01 write(4, ..., 1000) = 1000
++.002 > [ect0] P. 1:1001(1000) ack 1
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_rfc3168_to_rfc3168.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_rfc3168_to_rfc3168.pkt
new file mode 100644
index 000000000000..9baffdd66fe5
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_rfc3168_to_rfc3168.pkt
@@ -0,0 +1,18 @@
+// Test RFC3168 ECN when sysctl asks for RFC3168 ECN
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=1
+`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < SEW 0:0(0) win 32792 <mss 1050,nop,nop,sackOK,nop,wscale 8>
++.002 > SE. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
++0.05 < . 1:1(0) ack 1 win 320
++.002 accept(3, ..., ...) = 4
+
++0.01 write(4, ..., 1000) = 1000
++.002 > [ect0] P. 1:1001(1000) ack 1
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_sack_space_grab.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_sack_space_grab.pkt
new file mode 100644
index 000000000000..3fc56f9c6a6f
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_sack_space_grab.pkt
@@ -0,0 +1,28 @@
+// Test SACK space grab to fit AccECN option
+--tcp_ts_tick_usecs=1000
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < SEWA 0:0(0) win 32792 <mss 1050,nop,nop,sackOK,nop,wscale 8>
++.002 > SW. 0:0(0) ack 1 <mss 1460,ECN e1b 1 ceb 0 e0b 1,nop,nop,nop,sackOK,nop,wscale 8>
++0.05 < [ect0] W. 1:1(0) ack 1 win 264 <ECN e0b 1 ceb 0 e1b 1,nop>
++.002 accept(3, ..., ...) = 4
+
++.01 < [ect1] EAP. 1001:2001(1000) ack 1 win 264
++0.002 > [ect0] EA. 1:1(0) ack 1 <ECN e1b 1001 ceb 0 e0b 1,nop,nop,nop,sack 1001:2001>
++.01 < [ect0] EAP. 3001:4001(1000) ack 1 win 264
++0.002 > [ect0] EA. 1:1(0) ack 1 <ECN e1b 1001 ceb 0 e0b 1001,nop,nop,nop,sack 3001:4001 1001:2001>
++.01 < [ce] EAP. 5001:6001(1000) ack 1 win 264
++0.002 > [ect0] WA. 1:1(0) ack 1 <ECN e1b 1001 ceb 1000 e0b 1001,nop,nop,nop,sack 5001:6001 3001:4001 1001:2001>
+// DSACK works?
++.01 < [ect0] EAP. 5001:6001(1000) ack 1 win 264
++0.002 > [ect0] WA. 1:1(0) ack 1 <ECN e1b 1001 ceb 1000 e0b 2001,nop,nop,nop,sack 5001:6001 5001:6001 3001:4001>
++.01 < [ect1] EAP. 6001:7001(1000) ack 1 win 264
++0.002 > [ect0] WA. 1:1(0) ack 1 <ECN e1b 2001 ceb 1000 e0b 2001,nop,nop,nop,sack 5001:7001 3001:4001 1001:2001>
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_sack_space_grab_with_ts.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_sack_space_grab_with_ts.pkt
new file mode 100644
index 000000000000..1c075b5d81ae
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_sack_space_grab_with_ts.pkt
@@ -0,0 +1,39 @@
+// Test SACK space grab to fit AccECN option
+--tcp_ts_tick_usecs=1000
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < SEWA 0:0(0) win 32792 <mss 1050,sackOK,TS val 1 ecr 0,nop,wscale 8>
++.002 > SW. 0:0(0) ack 1 <mss 1460,sackOK,TS val 100 ecr 1,ECN e1b 1 ceb 0 e0b 1,nop,nop,wscale 8>
++0.05 < [ect0] W. 1:1(0) ack 1 win 264 <nop,nop,TS val 2 ecr 100,ECN e0b 1 ceb 0 e1b 1,nop>
++.002 accept(3, ..., ...) = 4
+
+// One SACK block should allow all 3 AccECN fields:
++.01 < [ect1] EAP. 1001:2001(1000) ack 1 win 264 <nop,nop,TS val 3 ecr 100>
++0.002 > [ect0] EA. 1:1(0) ack 1 <nop,nop,TS val 160 ecr 2,ECN e1b 1001 ceb 0 e0b 1,nop,nop,nop,sack 1001:2001>
+
+// Two SACK blocks should fit w/ AccECN if we only need to use 2 AccECN fields: check ect1 arriving.
++.01 < [ect1] EAP. 3001:4001(1000) ack 1 win 264 <nop,nop,TS val 4 ecr 100>
++0.002 > [ect0] EA. 1:1(0) ack 1 <nop,nop,TS val 172 ecr 2,ECN e1b 2001 ceb 0,nop,nop,sack 3001:4001 1001:2001>
+
+// Two SACK blocks should fit w/ AccECN if we only need to use 2 AccECN fields: check CE arriving.
++.01 < [ce] EAP. 5001:6001(1000) ack 1 win 264 <nop,nop,TS val 5 ecr 100>
++0.002 > [ect0] WA. 1:1(0) ack 1 <nop,nop,TS val 184 ecr 2,ECN e1b 2001 ceb 1000,nop,nop,sack 5001:6001 3001:4001>
+
+// Check that DSACK works, using 2 SACK blocks in total, if we only need to use 2 AccECN fields: check ect1 arriving.
++.01 < [ect1] EAP. 5001:6001(1000) ack 1 win 264 <nop,nop,TS val 5 ecr 100>
++0.002 > [ect0] WA. 1:1(0) ack 1 <nop,nop,TS val 196 ecr 2,ECN e1b 3001 ceb 1000,nop,nop,sack 5001:6001 5001:6001>
+
+// Check the case where the AccECN option doesn't fit, because sending ect0
+// with order 1 would rquire 3 AccECN fields,
+// and TS (12 bytes) + 2 SACK blocks (20 bytes) + 3 AccECN fields (2 + 3*3 bytes) > 40 bytes.
+// That's OK; Linux TCP AccECN is optimized for the ECT1 case, not ECT0.
++.01 < [ect0] EAP. 6001:7001(1000) ack 1 win 264 <nop,nop,TS val 5 ecr 100>
++0.002 > [ect0] WA. 1:1(0) ack 1 <nop,nop,TS val 204 ecr 2,nop,nop,sack 5001:7001 3001:4001 1001:2001>
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_serverside_accecn_disabled1.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_serverside_accecn_disabled1.pkt
new file mode 100644
index 000000000000..6b88ab78bfce
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_serverside_accecn_disabled1.pkt
@@ -0,0 +1,20 @@
+// Test against classic ECN server
+// Not-ECT on SYN and server sets 1|0|1 (AE is unused for classic ECN)
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4
++.002 ... 0.052 connect(4, ..., ...) = 0
+
++.002 > [noecn] SEWA 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8>
++0.05 < [noecn] SEA. 0:0(0) ack 1 win 32767 <mss 1460,sackOK,TS val 700 ecr 100,nop,wscale 8>
++.002 > [ect0] W. 1:1(0) ack 1 <nop, nop, TS val 200 ecr 700>
+
++0 write(4, ..., 100) = 100
++.002 > [ect0] P.5 1:101(100) ack 1 <nop,nop,TS val 300 ecr 700>
++0 close(4) = 0
+
++.002 > [ect0] F.5 101:101(0) ack 1 <nop,nop,TS val 400 ecr 700>
++0.1 < [noecn] R. 1:1(0) ack 102 win 4242
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_serverside_accecn_disabled2.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_serverside_accecn_disabled2.pkt
new file mode 100644
index 000000000000..d24ada008ece
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_serverside_accecn_disabled2.pkt
@@ -0,0 +1,20 @@
+// Test against classic ECN server
+// Not-ECT on SYN and server sets 0|0|1
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4
++.002 ... 0.052 connect(4, ..., ...) = 0
+
++.002 > [noecn] SEWA 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8>
++0.05 < [noecn] SE. 0:0(0) ack 1 win 32767 <mss 1460,sackOK,TS val 700 ecr 100,nop,wscale 8>
++.002 > [noecn] . 1:1(0) ack 1 <nop, nop, TS val 200 ecr 700>
+
++0 write(4, ..., 100) = 100
++.002 > [ect0] P. 1:101(100) ack 1 <nop,nop,TS val 300 ecr 700>
++0 close(4) = 0
+
++0 > [noecn] F. 101:101(0) ack 1 <...>
++0.1 < R. 1:1(0) ack 102 win 4242
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_serverside_broken.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_serverside_broken.pkt
new file mode 100644
index 000000000000..a20d7e890ee1
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_serverside_broken.pkt
@@ -0,0 +1,19 @@
+// Test against broken server (1|1|1)
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4
++.002 ... 0.052 connect(4, ..., ...) = 0
+
++.002 > [noecn] SEWA 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8>
++0.05 < [noecn] SEWA. 0:0(0) ack 1 win 32767 <mss 1460,sackOK,TS val 700 ecr 100,nop,wscale 8>
++.002 > [noecn] . 1:1(0) ack 1 <nop, nop, TS val 200 ecr 700>
+
++0 write(4, ..., 100) = 100
++.002 > [noecn] P. 1:101(100) ack 1 <nop,nop,TS val 300 ecr 700>
++0 close(4) = 0
+
++.002 > [noecn] F. 101:101(0) ack 1 <...>
++0.1 < [noecn] R. 1:1(0) ack 102 win 4242
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_serverside_ecn_disabled.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_serverside_ecn_disabled.pkt
new file mode 100644
index 000000000000..428255bedab7
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_serverside_ecn_disabled.pkt
@@ -0,0 +1,19 @@
+// Test against Non ECN server (0|0|0)
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4
++.002 ... 0.052 connect(4, ..., ...) = 0
+
++.002 > [noecn] SEWA 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8>
++0.05 < [noecn] S. 0:0(0) ack 1 win 32767 <mss 1460,sackOK,TS val 700 ecr 100,nop,wscale 8>
++.002 > [noecn] . 1:1(0) ack 1 <nop, nop, TS val 200 ecr 700>
+
++0 write(4, ..., 100) = 100
++.002 > [noecn] P. 1:101(100) ack 1 <nop,nop,TS val 300 ecr 700>
++0 close(4) = 0
+
++.002 > [noecn] F. 101:101(0) ack 1 <nop,nop,TS val 400 ecr 700>
++0.1 < [noecn] R. 1:1(0) ack 102 win 4242
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_serverside_only.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_serverside_only.pkt
new file mode 100644
index 000000000000..e9a5a0d3677c
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_serverside_only.pkt
@@ -0,0 +1,18 @@
+// Test AccECN with sysctl set to server-side only
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=5
+`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < SEWA 0:0(0) win 32792 <mss 1050,nop,nop,sackOK,nop,wscale 8>
++.002 > SW. 0:0(0) ack 1 <mss 1460,ECN e1b 1 ceb 0 e0b 1,nop,nop,nop,sackOK,nop,wscale 8>
++0.05 < [ect0] W. 1:1(0) ack 1 win 320 <ECN e0b 1 ceb 0 e1b 1,nop>
++.002 accept(3, ..., ...) = 4
+
++0.01 write(4, ..., 1000) = 1000
++.002 > [ect0] EAP. 1:1001(1000) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop>
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_syn_ace_flags_acked_after_retransmit.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_syn_ace_flags_acked_after_retransmit.pkt
new file mode 100644
index 000000000000..412fa903105c
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_syn_ace_flags_acked_after_retransmit.pkt
@@ -0,0 +1,18 @@
+// Test that SYN with ACE flags was Acked
+// after 2nd retransmission. In this case,
+// since we got SYN-ACK that supports Accurate
+// ECN, we consider this as successful negotiation
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4
++.002 ... 2.1 connect(4, ..., ...) = 0
+
++.002 > [noecn] SEWA 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8>
++1~+1.1 > [noecn] SEWA 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8>
++1~+1.1 > [noecn] S 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8>
+
++0.1 < [noecn] SW. 0:0(0) ack 1 win 32767 <mss 1016,ECN e0b 1 ceb 0 e1b 1,nop,nop,nop,sackOK,nop,wscale 8>
++0~+0.01 > [ect0] W. 1:1(0) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop>
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_syn_ace_flags_drop.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_syn_ace_flags_drop.pkt
new file mode 100644
index 000000000000..4622754a2270
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_syn_ace_flags_drop.pkt
@@ -0,0 +1,16 @@
+// Test that SYN with ACE flags got dropped
+// We retry one more time with ACE and then
+// fallback to disabled ECN
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4
++.002 ... 2.1 connect(4, ..., ...) = 0
+
++.002 > [noecn] SEWA 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8>
++1~+1.1 > [noecn] SEWA 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8>
++1~+1.1 > [noecn] S 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8>
++0.1 < [noecn] S. 0:0(0) ack 1 win 32767 <mss 1460,nop,nop,sackOK,nop,wscale 8>
++0~+0.01 > [noecn] . 1:1(0) ack 1
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_syn_ack_ace_flags_acked_after_retransmit.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_syn_ack_ace_flags_acked_after_retransmit.pkt
new file mode 100644
index 000000000000..ee15f108cafe
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_syn_ack_ace_flags_acked_after_retransmit.pkt
@@ -0,0 +1,27 @@
+// Test that SYN-ACK with ACE flags was Acked
+// after 2nd retransmission. In this case,
+// since we got the last ACK that supports Accurate
+// ECN, we consider this as successful negotiation
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+sysctl -q net.ipv4.tcp_ecn_option=2
+`
+
++0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
++0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
++0 bind(3, ..., ...) = 0
++0 listen(3, 1) = 0
+
++0 < [noecn] SEWA 0:0(0) win 32792 <mss 1460,nop,nop,sackOK,nop,wscale 8>
++.002 > [noecn] SW. 0:0(0) ack 1 <mss 1460,ECN e1b 1 ceb 0 e0b 1,nop,nop,nop,sackOK,nop,wscale 8>
+
+// Retransmit SYN-ACK without option
++1~+1.1 > [noecn] SW. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+
+// SYN-ACK maybe getting blackholed, disable ECN
++2~+2.2 > [noecn] S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+
+// Received an ACK with ACE flags, state should be set to negotiation succeeded
++0.1 < [noecn] W. 1:1(0) ack 1 win 320 <ECN e0b 1 ceb 0 e1b 1,nop>
++.002 accept(3, ..., ...) = 4
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_syn_ack_ace_flags_drop.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_syn_ack_ace_flags_drop.pkt
new file mode 100644
index 000000000000..ccfe353a8ee4
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_syn_ack_ace_flags_drop.pkt
@@ -0,0 +1,26 @@
+// Test that SYN-ACK with ACE flags got dropped
+// We retry one more time with ACE and then
+// fallback to disabled ECN
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+sysctl -q net.ipv4.tcp_ecn_option=2
+`
+
++0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
++0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
++0 bind(3, ..., ...) = 0
++0 listen(3, 1) = 0
+
++0 < [noecn] SEWA 0:0(0) win 32792 <mss 1460,nop,nop,sackOK,nop,wscale 8>
++.002 > [noecn] SW. 0:0(0) ack 1 <mss 1460,ECN e1b 1 ceb 0 e0b 1,nop,nop,nop,sackOK,nop,wscale 8>
+
+// Retransmit SYN-ACK without option
++1~+1.1 > [noecn] SW. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+
+// SYN-ACK maybe getting blackholed, disable ECN
++2~+2.2 > [noecn] S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+
+// Received an ACK with no ACE flags, state should be set to blackholed
++0.1 < [noecn] . 1:1(0) ack 1 win 320
++0 accept(3, ..., ...) = 4
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_syn_ce.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_syn_ce.pkt
new file mode 100644
index 000000000000..dc83f7a18180
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_syn_ce.pkt
@@ -0,0 +1,13 @@
+// Test AccECN ECN field reflector in SYNACK
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < [ce] SEWA 0:0(0) win 32792 <mss 1050,nop,nop,sackOK,nop,wscale 8>
++.002 > SWA. 0:0(0) ack 1 <mss 1460,ECN e1b 1 ceb 0 e0b 1,nop,nop,nop,sackOK,nop,wscale 8>
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_syn_ect0.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_syn_ect0.pkt
new file mode 100644
index 000000000000..e63a8d018c37
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_syn_ect0.pkt
@@ -0,0 +1,13 @@
+// Test AccECN ECN field reflector in SYNACK
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < [ect0] SEWA 0:0(0) win 32792 <mss 1050,nop,nop,sackOK,nop,wscale 8>
++.002 > SA. 0:0(0) ack 1 <mss 1460,ECN e1b 1 ceb 0 e0b 1,nop,nop,nop,sackOK,nop,wscale 8>
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_syn_ect1.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_syn_ect1.pkt
new file mode 100644
index 000000000000..23c0e43b3dbe
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_syn_ect1.pkt
@@ -0,0 +1,13 @@
+// Test AccECN ECN field reflector in SYNACK
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < [ect1] SEWA 0:0(0) win 32792 <mss 1050,nop,nop,sackOK,nop,wscale 8>
++.002 > SEW. 0:0(0) ack 1 <mss 1460,ECN e1b 1 ceb 0 e0b 1,nop,nop,nop,sackOK,nop,wscale 8>
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_synack_ce.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_synack_ce.pkt
new file mode 100644
index 000000000000..c3497738f680
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_synack_ce.pkt
@@ -0,0 +1,27 @@
+// Test SYNACK CE & received_ce update
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+sysctl -q net.ipv4.tcp_ecn_option=2
+`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4
++.002 ... 0.052 connect(4, ..., ...) = 0
+
++.002 > [noecn] SEWA 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8>
++0.05 < [ce] SW. 0:0(0) ack 1 win 32767 <mss 1460,ECN e0b 1 ceb 0 e1b 1,nop,nop,nop,sackOK,nop,wscale 8>
++.002 > [ect0] WA. 1:1(0) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop>
+
++0.01 write(4, ..., 100) = 100
++.002 > [ect0] P.6 1:101(100) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop>
++0.05 < [ect0] P.5 1:101(100) ack 101 win 256 <ECN e0b 101 ceb 0 e1b 1,nop>
++.002 > [ect0] .6 101:101(0) ack 101 <ECN e1b 1 ceb 0 e0b 101,nop>
+
++0.01 write(4, ..., 100) = 100
++.002 > [ect0] P.6 101:201(100) ack 101 <ECN e1b 1 ceb 0 e0b 101,nop>
+
++0.1 < [ect1] P.5 201:301(100) ack 201 win 256 <ECN e0b 101 ceb 0 e1b 1,nop>
++.002 > [ect0] .6 201:201(0) ack 101 <ECN e1b 101 ceb 0 e0b 101,nop,nop,nop,sack 201:301>
+
++0.01 < [ce] .6 401:501(100) ack 201 win 256 <ECN e0b 101 ceb 0 e1b 1,nop>
++.002 > [ect0] .7 201:201(0) ack 101 <ECN e1b 101 ceb 100 e0b 101,nop,nop,nop,sack 401:501 201:301>
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_synack_ce_updates_delivered_ce.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_synack_ce_updates_delivered_ce.pkt
new file mode 100644
index 000000000000..5fd77f466572
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_synack_ce_updates_delivered_ce.pkt
@@ -0,0 +1,22 @@
+// Reflected SYNACK CE mark increases delivered_ce
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+sysctl -q net.ipv4.tcp_ecn_fallback=0
+`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
++0.05 < SEWA 0:0(0) win 32767 <mss 1050,nop,nop,sackOK,nop,wscale 8>
++.002 > SW. 0:0(0) ack 1 <mss 1460,ECN e1b 1 ceb 0 e0b 1,nop,nop,nop,sackOK,nop,wscale 8>
+// Fake ce for prev, ECT validator must be disabled for this to work
++0.05 < [ect0] WA. 1:1(0) ack 1 win 264 <ECN e0b 1 ceb 0 e1b 1,nop>
++.002 accept(3, ..., ...) = 4
+
++0.01 %{ assert tcpi_delivered_ce == 1, tcpi_delivered_ce }%
+
++0.01 write(4, ..., 1000) = 1000
++.002 > [ect0] EAP. 1:1001(1000) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop>
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_synack_ect0.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_synack_ect0.pkt
new file mode 100644
index 000000000000..f6ad1ea5c0c4
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_synack_ect0.pkt
@@ -0,0 +1,24 @@
+// Test SYN=0 reflector
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4
++.002 ... 0.052 connect(4, ..., ...) = 0
+
++.002 > SEWA 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8>
++0.05 < [ect0] SW. 0:0(0) ack 1 win 32767 <mss 1460,ECN e0b 1 ceb 0 e1b 1,nop,nop,nop,sackOK,nop,wscale 8>
++.002 > [ect0] A. 1:1(0) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop>
+
++0.01 write(4, ..., 100) = 100
++.002 > [ect0] P.5 1:101(100) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop>
++0.05 < [ect0] P.5 1:1(0) ack 101 win 256 <ECN e0b 101 ceb 0 e1b 1,nop>
+
++0.01 < [ect0] P.5 1:101(100) ack 101 win 256 <ECN e0b 1 ceb 0 e1b 1,nop>
++.002 > [ect0] .5 101:101(0) ack 101 <ECN e1b 1 ceb 0 e0b 101,nop>
++0 read(4, ..., 100) = 100
+
++0 close(4) = 0
++0 > F.5 101:101(0) ack 101 <...>
++0.1 < R. 101:101(0) ack 102 win 4242
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_synack_ect1.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_synack_ect1.pkt
new file mode 100644
index 000000000000..7ecfc5fb9dbb
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_synack_ect1.pkt
@@ -0,0 +1,24 @@
+// Test SYN=0 reflector
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4
++.002 ... 0.052 connect(4, ..., ...) = 0
+
++.002 > SEWA 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8>
++0.05 < [ect1] SW. 0:0(0) ack 1 win 32767 <mss 1460,ECN e0b 1 ceb 0 e1b 1,nop,nop,nop,sackOK,nop,wscale 8>
++.002 > [ect0] EW. 1:1(0) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop>
+
++0.01 write(4, ..., 100) = 100
++.002 > [ect0] P.5 1:101(100) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop>
++0.05 < [ect1] P.5 1:1(0) ack 101 win 256 <ECN e0b 101 ceb 0 e1b 1,nop>
+
++0.01 < [ect1] P.5 1:101(100) ack 101 win 256 <ECN e0b 1 ceb 0 e1b 1,nop>
++.002 > [ect0] .5 101:101(0) ack 101 <ECN e1b 101 ceb 0 e0b 1,nop>
++0 read(4, ..., 100) = 100
+
++0 close(4) = 0
++0 > F5. 101:101(0) ack 101 <...>
++0.1 < R. 101:101(0) ack 102 win 4242
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_synack_rexmit.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_synack_rexmit.pkt
new file mode 100644
index 000000000000..9e0959782ef5
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_synack_rexmit.pkt
@@ -0,0 +1,15 @@
+// Test 3rd ACK flags when SYN-ACK is rexmitted
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4
++.002 ... 0.052 connect(4, ..., ...) = 0
+
++.002 > SEWA 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8>
++0.05 < SW. 0:0(0) ack 1 win 32767 <mss 1460,ECN e0b 1 ceb 0 e1b 1,nop,nop,nop,sackOK,nop,wscale 8>
++.002 > [ect0] W. 1:1(0) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop>
+
++0.05 < SW. 0:0(0) ack 1 win 32767 <mss 1460,ECN e0b 1 ceb 0 e1b 1,nop,nop,nop,sackOK,nop,wscale 8>
++.002 > [ect0] W. 1:1(0) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop>
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_synack_rxmt.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_synack_rxmt.pkt
new file mode 100644
index 000000000000..a5a41633af07
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_synack_rxmt.pkt
@@ -0,0 +1,25 @@
+// Test that we retransmit SYN-ACK with ACE and without
+// AccECN options after
+// SYN-ACK was lost and TCP moved to TCPS_SYN_RECEIVED
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+sysctl -q net.ipv4.tcp_ecn_option=2
+`
+
++0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
++0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
++0 bind(3, ..., ...) = 0
++0 listen(3, 1) = 0
+
++0 < [noecn] SEWA 0:0(0) win 32792 <mss 1460,nop,nop,sackOK,nop,wscale 8>
++.002 > [noecn] SW. 0:0(0) ack 1 <mss 1460,ECN e1b 1 ceb 0 e0b 1,nop,nop,nop,sackOK,nop,wscale 8>
+
+// Retransmit SYN-ACK without option
++1~+1.1 > [noecn] SW. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
++0.1 < [noecn] W. 1:1(0) ack 1 win 320 <ECN e0b 1 ceb 0 e1b 1,nop>
++.002 accept(3, ..., ...) = 4
+
+// We try to write with AccECN option
++0.01 write(4, ..., 100) = 100
++.002 > [ect0] P5. 1:101(100) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop>
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_tsnoprogress.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_tsnoprogress.pkt
new file mode 100644
index 000000000000..f3fe2f098966
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_tsnoprogress.pkt
@@ -0,0 +1,26 @@
+// Test TS progress filtering
+--tcp_ts_tick_usecs=1000
+--tolerance_usecs=7000
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < SEWA 0:0(0) win 32792 <mss 1050,sackOK,TS val 1 ecr 0,nop,wscale 8>
++.002 > SW. 0:0(0) ack 1 <mss 1460,sackOK,TS val 10 ecr 1,ECN e1b 1 ceb 0 e0b 1,nop,nop,wscale 8>
++0.05 < [ect0] W. 1:1(0) ack 1 win 264 <nop,nop,TS val 2 ecr 10>
++.002 accept(3, ..., ...) = 4
+
++0.01 %{ assert tcpi_delivered_ce == 0, tcpi_delivered_ce }%
+
++0.01 write(4, ..., 1000) = 1000
++.002 > [ect0] EAP. 1:1001(1000) ack 1 <nop,nop,TS val 83 ecr 2>
+ // Fake CE and claim no progress
++0.05 < [ect0] WA. 1:1(0) ack 1 win 264 <nop,nop,TS val 2 ecr 83>
+
++0.01 %{ assert tcpi_delivered_ce == 0, tcpi_delivered_ce }%
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_tsprogress.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_tsprogress.pkt
new file mode 100644
index 000000000000..1446799d2481
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_tsprogress.pkt
@@ -0,0 +1,25 @@
+// Test TS progress filtering
+--tcp_ts_tick_usecs=1000
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < SEWA 0:0(0) win 32792 <mss 1050,sackOK,TS val 1 ecr 0,nop,wscale 8>
++.002 > SW. 0:0(0) ack 1 <mss 1460,sackOK,TS val 10 ecr 1,ECN e1b 1 ceb 0 e0b 1,nop,nop,wscale 8>
++0.05 < [ect0] W. 1:1(0) ack 1 win 264 <nop,nop,TS val 2 ecr 10>
++.002 accept(3, ..., ...) = 4
+
++0.01 %{ assert tcpi_delivered_ce == 0, tcpi_delivered_ce }%
+
++0.01 write(4, ..., 1000) = 1000
++.002 > [ect0] EAP. 1:1001(1000) ack 1 <nop,nop,TS val 83 ecr 2>
+ // Fake CE and claim no progress
++0.05 < [ect0] WA. 1:1(0) ack 1 win 264 <nop,nop,TS val 3 ecr 83>
+
++0.01 %{ assert tcpi_delivered_ce == 1, tcpi_delivered_ce }%
diff --git a/tools/testing/selftests/net/packetdrill/tcp_basic_client.pkt b/tools/testing/selftests/net/packetdrill/tcp_basic_client.pkt
new file mode 100644
index 000000000000..319f81dd717d
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_basic_client.pkt
@@ -0,0 +1,24 @@
+// SPDX-License-Identifier: GPL-2.0
+//
+// Minimal active open.
+// First to close connection.
+
+`./defaults.sh`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4
+
+ // Connect to server: active open: three-way handshake
+ +0...0 connect(4, ..., ...) = 0
+ +0 > S 0:0(0) <mss 1460,sackOK,TS val 0 ecr 0,nop,wscale 8>
+ +0 < S. 0:0(0) ack 1 win 65535 <mss 1460,sackOK,nop,nop,nop,wscale 7>
+ +0 > . 1:1(0) ack 1
+
+ // Send data
+ +0 send(4, ..., 1000, 0) = 1000
+ +0 > P. 1:1001(1000) ack 1
+ +0 < . 1:1(0) ack 1001 win 257
+
+ +0 close(4) = 0
+ +0 > F. 1001:1001(0) ack 1
+ +0 < F. 1:1(0) ack 1002 win 257
+ +0 > . 1002:1002(0) ack 2
diff --git a/tools/testing/selftests/net/packetdrill/tcp_basic_server.pkt b/tools/testing/selftests/net/packetdrill/tcp_basic_server.pkt
new file mode 100644
index 000000000000..e72a291b666e
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_basic_server.pkt
@@ -0,0 +1,35 @@
+// SPDX-License-Identifier: GPL-2.0
+//
+// Minimal passive open.
+// Peer is first to close.
+
+`./defaults.sh`
+
+ // Open listener socket
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ // Incoming connection: passive open: three-way handshake
+ +0 < S 0:0(0) win 65535 <mss 1000,sackOK,nop,nop,nop,wscale 8>
+ +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+ +0 < . 1:1(0) ack 1 win 257
+
+ // Open connection socket and close listener socket
+ +0 accept(3, ..., ...) = 4
+ +0 close(3) = 0
+
+ // Peer sends data: acknowledge and receive
+ +0 < P. 1:1001(1000) ack 1 win 257
+ +0 > . 1:1(0) ack 1001
+ +0 recv(4, ..., 1000, 0) = 1000
+
+ // Peer initiates connection close
+ +0 < F. 1001:1001(0) ack 1 win 257
+ +.04 > . 1:1(0) ack 1002
+
+ // Local socket also closes its side
+ +0 close(4) = 0
+ +0 > F. 1:1(0) ack 1002
+ +0 < . 1002:1002(0) ack 2 win 257
diff --git a/tools/testing/selftests/net/packetdrill/tcp_timestamping_tcp_tx_timestamp_bug.pkt b/tools/testing/selftests/net/packetdrill/tcp_timestamping_tcp_tx_timestamp_bug.pkt
new file mode 100644
index 000000000000..95a1957a2cf9
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_timestamping_tcp_tx_timestamp_bug.pkt
@@ -0,0 +1,70 @@
+// SPDX-License-Identifier: GPL-2.0
+// Test after "tcp: tcp_tx_timestamp() must look at the rtx queue"
+
+// This test is about receiving the SCM_TSTAMP_ACK,
+// we do not care about its SCM_TIMESTAMPING precision.
+--tolerance_usecs=1000000
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_min_tso_segs=70
+`
+
+// Create a socket and set it to non-blocking.
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 fcntl(3, F_GETFL) = 0x2 (flags O_RDWR)
+ +0 fcntl(3, F_SETFL, O_RDWR|O_NONBLOCK) = 0
+
+// Establish connection and verify that there was no error.
+ +0 connect(3, ..., ...) = -1 EINPROGRESS (Operation now in progress)
+ +0 > S 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8>
++.010 < S. 0:0(0) ack 1 win 65535 <mss 1000,sackOK,TS val 700 ecr 100,nop,wscale 7>
+ +0 > . 1:1(0) ack 1 <nop,nop,TS val 200 ecr 700>
+ +0 getsockopt(3, SOL_SOCKET, SO_ERROR, [0], [4]) = 0
+ +0 setsockopt(3, SOL_SOCKET, SO_SNDBUF, [30000], 4) = 0
+
+ +0 write(3, ..., 9880) = 9880
+ +0 > P. 1:9881(9880) ack 1 <nop,nop,TS val 200 ecr 700>
++.010 < . 1:1(0) ack 9881 win 10000 <nop,nop,TS val 701 ecr 200>
+
+ +0 write(3, ..., 19760) = 19760
+ +0 > P. 9881:29641(19760) ack 1 <nop,nop,TS val 201 ecr 701>
++.010 < . 1:1(0) ack 29641 win 10000 <nop,nop,TS val 702 ecr 201>
+
+ +0 write(3, ..., 39520) = 39520
+ +0 > P. 29641:69161(39520) ack 1 <nop,nop,TS val 202 ecr 702>
++.010 < . 1:1(0) ack 69161 win 10000 <nop,nop,TS val 703 ecr 202>
+
+// One more write to increase cwnd
+ +0 write(3, ..., 79040) = 79040
+ +0 > P. 69161:108681(39520) ack 1 <nop,nop,TS val 203 ecr 703>
+ +0 > P. 108681:148201(39520) ack 1 <nop,nop,TS val 203 ecr 703>
++.010 < . 1:1(0) ack 148201 win 1000 <nop,nop,TS val 704 ecr 203>
+
+ +0 setsockopt(3, SOL_SOCKET, SO_TIMESTAMPING,
+ [SOF_TIMESTAMPING_TX_ACK | SOF_TIMESTAMPING_SOFTWARE |
+ SOF_TIMESTAMPING_OPT_ID], 4) = 0
+
+// We have one write filling one skb
+// last byte can not be stored because of our small SO_SNDBUF
+ +0 write(3, ..., 65209) = 65208
+ +0 > P. 148201:213409(65208) ack 1 <nop,nop,TS val 204 ecr 704>
++.010 < . 1:1(0) ack 213409 win 1000 <nop,nop,TS val 705 ecr 204>
+
+// SCM_TSTAMP_ACK should be received after the last ack at
+// t=60ms.
+ +0 recvmsg(3, {msg_name(...)=...,
+ msg_iov(1)=[{...,0}],
+ msg_flags=MSG_ERRQUEUE|MSG_TRUNC,
+ msg_control=[
+ {cmsg_level=SOL_SOCKET,
+ cmsg_type=SCM_TIMESTAMPING,
+ cmsg_data={scm_sec=0,scm_nsec=60000000}},
+ {cmsg_level=CMSG_LEVEL_IP,
+ cmsg_type=CMSG_TYPE_RECVERR,
+ cmsg_data={ee_errno=ENOMSG,
+ ee_origin=SO_EE_ORIGIN_TIMESTAMPING,
+ ee_type=0,
+ ee_code=0,
+ ee_info=SCM_TSTAMP_ACK,
+ ee_data=65207}}
+ ]}, MSG_ERRQUEUE) = 0
diff --git a/tools/testing/selftests/net/tfo.c b/tools/testing/selftests/net/tfo.c
index 8d82140f0f76..3b1ee2d3d417 100644
--- a/tools/testing/selftests/net/tfo.c
+++ b/tools/testing/selftests/net/tfo.c
@@ -82,8 +82,10 @@ static void run_server(void)
error(1, errno, "getsockopt(SO_INCOMING_NAPI_ID)");
if (read(connfd, buf, 64) < 0)
- perror("read()");
- fprintf(outfile, "%d\n", opt);
+ error(1, errno, "read()");
+
+ if (fprintf(outfile, "%d\n", opt) < 0)
+ error(1, errno, "fprintf()");
fclose(outfile);
close(connfd);
@@ -92,14 +94,17 @@ static void run_server(void)
static void run_client(void)
{
- int fd;
+ int fd, ret;
char *msg = "Hello, world!";
fd = socket(AF_INET6, SOCK_STREAM, 0);
if (fd == -1)
error(1, errno, "socket()");
- sendto(fd, msg, strlen(msg), MSG_FASTOPEN, (struct sockaddr *)&cfg_addr, sizeof(cfg_addr));
+ ret = sendto(fd, msg, strlen(msg), MSG_FASTOPEN,
+ (struct sockaddr *)&cfg_addr, sizeof(cfg_addr));
+ if (ret < 0)
+ error(1, errno, "sendto()");
close(fd);
}
diff --git a/tools/testing/selftests/net/tfo_passive.sh b/tools/testing/selftests/net/tfo_passive.sh
index a4550511830a..f116f888b794 100755
--- a/tools/testing/selftests/net/tfo_passive.sh
+++ b/tools/testing/selftests/net/tfo_passive.sh
@@ -85,12 +85,15 @@ timeout -k 1s 30s ip netns exec nssv ./tfo \
-s \
-p ${SERVER_PORT} \
-o ${out_file}&
+server_pid="$!"
wait_local_port_listen nssv ${SERVER_PORT} tcp
ip netns exec nscl ./tfo -c -h ${SERVER_IP} -p ${SERVER_PORT}
+client_exit_status="$?"
-wait
+wait "$server_pid"
+server_exit_status="$?"
res=$(cat $out_file)
rm $out_file
@@ -101,6 +104,14 @@ if [ "$res" = "0" ]; then
exit 1
fi
+if [ "$client_exit_status" -ne 0 ] || [ "$server_exit_status" -ne 0 ]; then
+ # Note: timeout(1) exits with 124 if it timed out
+ echo "client exited with ${client_exit_status}"
+ echo "server exited with ${server_exit_status}"
+ cleanup_ns
+ exit 1
+fi
+
echo "$NSIM_SV_FD:$NSIM_SV_IFIDX" > $NSIM_DEV_SYS_UNLINK
echo $NSIM_CL_ID > $NSIM_DEV_SYS_DEL
diff --git a/tools/testing/selftests/net/tls.c b/tools/testing/selftests/net/tls.c
index a4d16a460fbe..9e2ccea13d70 100644
--- a/tools/testing/selftests/net/tls.c
+++ b/tools/testing/selftests/net/tls.c
@@ -3260,17 +3260,25 @@ TEST(data_steal) {
ASSERT_EQ(setsockopt(cfd, IPPROTO_TCP, TCP_ULP, "tls", sizeof("tls")), 0);
/* Spawn a child and get it into the read wait path of the underlying
- * TCP socket.
+ * TCP socket (before kernel .recvmsg is replaced with the TLS one).
*/
pid = fork();
ASSERT_GE(pid, 0);
if (!pid) {
- EXPECT_EQ(recv(cfd, buf, sizeof(buf) / 2, MSG_WAITALL),
- sizeof(buf) / 2);
+ EXPECT_EQ(recv(cfd, buf, sizeof(buf) / 2 + 1, MSG_WAITALL),
+ sizeof(buf) / 2 + 1);
exit(!__test_passed(_metadata));
}
- usleep(10000);
+ /* Send a sync byte and poll until it's consumed to ensure
+ * the child is in recv() before we proceed to install TLS.
+ */
+ ASSERT_EQ(send(fd, buf, 1, 0), 1);
+ do {
+ usleep(500);
+ } while (recv(cfd, buf, 1, MSG_PEEK | MSG_DONTWAIT) == 1);
+ EXPECT_EQ(errno, EAGAIN);
+
ASSERT_EQ(setsockopt(fd, SOL_TLS, TLS_TX, &tls, tls.len), 0);
ASSERT_EQ(setsockopt(cfd, SOL_TLS, TLS_RX, &tls, tls.len), 0);
diff --git a/tools/testing/selftests/net/tun.c b/tools/testing/selftests/net/tun.c
index 0efc67b0357a..8a5cd5cb5472 100644
--- a/tools/testing/selftests/net/tun.c
+++ b/tools/testing/selftests/net/tun.c
@@ -8,14 +8,119 @@
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
-#include <linux/if.h>
#include <linux/if_tun.h>
-#include <linux/netlink.h>
-#include <linux/rtnetlink.h>
#include <sys/ioctl.h>
#include <sys/socket.h>
#include "kselftest_harness.h"
+#include "tuntap_helpers.h"
+
+static const char param_dev_geneve_name[] = "geneve1";
+static unsigned char param_hwaddr_outer_dst[] = { 0x00, 0xfe, 0x98,
+ 0x14, 0x22, 0x42 };
+static unsigned char param_hwaddr_outer_src[] = { 0x00, 0xfe, 0x98,
+ 0x94, 0xd2, 0x43 };
+static unsigned char param_hwaddr_inner_dst[] = { 0x00, 0xfe, 0x98,
+ 0x94, 0x22, 0xcc };
+static unsigned char param_hwaddr_inner_src[] = { 0x00, 0xfe, 0x98,
+ 0x94, 0xd2, 0xdd };
+
+static struct in_addr param_ipaddr4_outer_dst = {
+ __constant_htonl(0xac100001),
+};
+
+static struct in_addr param_ipaddr4_outer_src = {
+ __constant_htonl(0xac100002),
+};
+
+static struct in_addr param_ipaddr4_inner_dst = {
+ __constant_htonl(0xac100101),
+};
+
+static struct in_addr param_ipaddr4_inner_src = {
+ __constant_htonl(0xac100102),
+};
+
+static struct in6_addr param_ipaddr6_outer_dst = {
+ { { 0x20, 0x02, 0x0d, 0xb8, 0x01, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 } },
+};
+
+static struct in6_addr param_ipaddr6_outer_src = {
+ { { 0x20, 0x02, 0x0d, 0xb8, 0x01, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2 } },
+};
+
+static struct in6_addr param_ipaddr6_inner_dst = {
+ { { 0x20, 0x02, 0x0d, 0xb8, 0x02, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 } },
+};
+
+static struct in6_addr param_ipaddr6_inner_src = {
+ { { 0x20, 0x02, 0x0d, 0xb8, 0x02, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2 } },
+};
+
+#ifndef BIT
+#define BIT(nr) (1UL << (nr))
+#endif
+
+#define VN_ID 1
+#define VN_PORT 4789
+#define UDP_SRC_PORT 22
+#define UDP_DST_PORT 48878
+#define IPPREFIX_LEN 24
+#define IP6PREFIX_LEN 64
+#define TIMEOUT_SEC 10
+#define TIMEOUT_USEC 100000
+#define MAX_RETRIES 20
+
+#define UDP_TUNNEL_GENEVE_4IN4 0x01
+#define UDP_TUNNEL_GENEVE_6IN4 0x02
+#define UDP_TUNNEL_GENEVE_4IN6 0x04
+#define UDP_TUNNEL_GENEVE_6IN6 0x08
+
+#define UDP_TUNNEL_MAX_SEGMENTS BIT(7)
+
+#define UDP_TUNNEL_OUTER_IPV4 (UDP_TUNNEL_GENEVE_4IN4 | UDP_TUNNEL_GENEVE_6IN4)
+#define UDP_TUNNEL_INNER_IPV4 (UDP_TUNNEL_GENEVE_4IN4 | UDP_TUNNEL_GENEVE_4IN6)
+
+#define UDP_TUNNEL_GENEVE_4IN4_HDRLEN \
+ (ETH_HLEN + 2 * sizeof(struct iphdr) + GENEVE_HLEN + \
+ 2 * sizeof(struct udphdr))
+#define UDP_TUNNEL_GENEVE_6IN6_HDRLEN \
+ (ETH_HLEN + 2 * sizeof(struct ipv6hdr) + GENEVE_HLEN + \
+ 2 * sizeof(struct udphdr))
+#define UDP_TUNNEL_GENEVE_4IN6_HDRLEN \
+ (ETH_HLEN + sizeof(struct iphdr) + sizeof(struct ipv6hdr) + \
+ GENEVE_HLEN + 2 * sizeof(struct udphdr))
+#define UDP_TUNNEL_GENEVE_6IN4_HDRLEN \
+ (ETH_HLEN + sizeof(struct ipv6hdr) + sizeof(struct iphdr) + \
+ GENEVE_HLEN + 2 * sizeof(struct udphdr))
+
+#define UDP_TUNNEL_HDRLEN(type) \
+ ((type) == UDP_TUNNEL_GENEVE_4IN4 ? UDP_TUNNEL_GENEVE_4IN4_HDRLEN : \
+ (type) == UDP_TUNNEL_GENEVE_6IN6 ? UDP_TUNNEL_GENEVE_6IN6_HDRLEN : \
+ (type) == UDP_TUNNEL_GENEVE_4IN6 ? UDP_TUNNEL_GENEVE_4IN6_HDRLEN : \
+ (type) == UDP_TUNNEL_GENEVE_6IN4 ? UDP_TUNNEL_GENEVE_6IN4_HDRLEN : \
+ 0)
+
+#define UDP_TUNNEL_MSS(type) (ETH_DATA_LEN - UDP_TUNNEL_HDRLEN(type))
+#define UDP_TUNNEL_MAX(type, is_tap) \
+ (ETH_MAX_MTU - UDP_TUNNEL_HDRLEN(type) - ((is_tap) ? ETH_HLEN : 0))
+
+#define TUN_VNET_TNL_SIZE sizeof(struct virtio_net_hdr_v1_hash_tunnel)
+#define MAX_VNET_TUNNEL_PACKET_SZ \
+ (TUN_VNET_TNL_SIZE + ETH_HLEN + UDP_TUNNEL_GENEVE_6IN6_HDRLEN + \
+ ETH_MAX_MTU)
+
+struct geneve_setup_config {
+ int family;
+ union {
+ struct in_addr r4;
+ struct in6_addr r6;
+ } remote;
+ __be32 vnid;
+ __be16 vnport;
+ unsigned char hwaddr[6];
+ uint8_t csum;
+};
static int tun_attach(int fd, char *dev)
{
@@ -25,7 +130,7 @@ static int tun_attach(int fd, char *dev)
strcpy(ifr.ifr_name, dev);
ifr.ifr_flags = IFF_ATTACH_QUEUE;
- return ioctl(fd, TUNSETQUEUE, (void *) &ifr);
+ return ioctl(fd, TUNSETQUEUE, (void *)&ifr);
}
static int tun_detach(int fd, char *dev)
@@ -36,7 +141,7 @@ static int tun_detach(int fd, char *dev)
strcpy(ifr.ifr_name, dev);
ifr.ifr_flags = IFF_DETACH_QUEUE;
- return ioctl(fd, TUNSETQUEUE, (void *) &ifr);
+ return ioctl(fd, TUNSETQUEUE, (void *)&ifr);
}
static int tun_alloc(char *dev)
@@ -54,7 +159,7 @@ static int tun_alloc(char *dev)
strcpy(ifr.ifr_name, dev);
ifr.ifr_flags = IFF_TAP | IFF_NAPI | IFF_MULTI_QUEUE;
- err = ioctl(fd, TUNSETIFF, (void *) &ifr);
+ err = ioctl(fd, TUNSETIFF, (void *)&ifr);
if (err < 0) {
fprintf(stderr, "can't TUNSETIFF: %s\n", strerror(errno));
close(fd);
@@ -66,42 +171,315 @@ static int tun_alloc(char *dev)
static int tun_delete(char *dev)
{
- struct {
- struct nlmsghdr nh;
- struct ifinfomsg ifm;
- unsigned char data[64];
- } req;
- struct rtattr *rta;
- int ret, rtnl;
+ return ip_link_del(dev);
+}
+
+static int tun_open(char *dev, const int flags, const int hdrlen,
+ const int features, const unsigned char *mac_addr)
+{
+ struct ifreq ifr = { 0 };
+ int fd, sk = -1;
+
+ fd = open("/dev/net/tun", O_RDWR);
+ if (fd < 0) {
+ perror("open");
+ return -1;
+ }
+
+ ifr.ifr_flags = flags;
+ if (ioctl(fd, TUNSETIFF, (void *)&ifr) < 0) {
+ perror("ioctl(TUNSETIFF)");
+ goto err;
+ }
+ strcpy(dev, ifr.ifr_name);
+
+ if (hdrlen > 0) {
+ if (ioctl(fd, TUNSETVNETHDRSZ, &hdrlen) < 0) {
+ perror("ioctl(TUNSETVNETHDRSZ)");
+ goto err;
+ }
+ }
+
+ if (features) {
+ if (ioctl(fd, TUNSETOFFLOAD, features) < 0) {
+ perror("ioctl(TUNSETOFFLOAD)");
+ goto err;
+ }
+ }
+
+ sk = socket(PF_INET, SOCK_DGRAM, 0);
+ if (sk < 0) {
+ perror("socket");
+ goto err;
+ }
+
+ if (ioctl(sk, SIOCGIFFLAGS, &ifr) < 0) {
+ perror("ioctl(SIOCGIFFLAGS)");
+ goto err;
+ }
+
+ ifr.ifr_flags |= (IFF_UP | IFF_RUNNING);
+ if (ioctl(sk, SIOCSIFFLAGS, &ifr) < 0) {
+ perror("ioctl(SIOCSIFFLAGS)");
+ goto err;
+ }
+
+ if (mac_addr && flags & IFF_TAP) {
+ ifr.ifr_hwaddr.sa_family = ARPHRD_ETHER;
+ memcpy(ifr.ifr_hwaddr.sa_data, mac_addr, ETH_ALEN);
+
+ if (ioctl(sk, SIOCSIFHWADDR, &ifr) < 0) {
+ perror("ioctl(SIOCSIFHWADDR)");
+ goto err;
+ }
+ }
+
+out:
+ if (sk >= 0)
+ close(sk);
+ return fd;
+
+err:
+ close(fd);
+ fd = -1;
+ goto out;
+}
+
+static size_t sockaddr_len(int family)
+{
+ return (family == AF_INET) ? sizeof(struct sockaddr_in) :
+ sizeof(struct sockaddr_in6);
+}
+
+static int geneve_fill_newlink(struct rt_link_newlink_req *req, void *data)
+{
+ struct geneve_setup_config *cfg = data;
+
+#define SET_GENEVE_REMOTE rt_link_newlink_req_set_linkinfo_data_geneve_remote
+#define SET_GENEVE_REMOTE6 rt_link_newlink_req_set_linkinfo_data_geneve_remote6
+
+ rt_link_newlink_req_set_address(req, cfg->hwaddr, ETH_ALEN);
+ rt_link_newlink_req_set_linkinfo_data_geneve_id(req, cfg->vnid);
+ rt_link_newlink_req_set_linkinfo_data_geneve_port(req, cfg->vnport);
+ rt_link_newlink_req_set_linkinfo_data_geneve_udp_csum(req, cfg->csum);
+
+ if (cfg->family == AF_INET)
+ SET_GENEVE_REMOTE(req, cfg->remote.r4.s_addr);
+ else
+ SET_GENEVE_REMOTE6(req, &cfg->remote.r6,
+ sizeof(cfg->remote.r6));
+
+ return 0;
+}
+
+static int geneve_create(const char *dev, int family, void *remote,
+ void *hwaddr)
+{
+ struct geneve_setup_config geneve;
+
+ memset(&geneve, 0, sizeof(geneve));
+ geneve.vnid = VN_ID;
+ geneve.vnport = htons(VN_PORT);
+ geneve.csum = 1;
+ geneve.family = family;
+ if (family == AF_INET)
+ memcpy(&geneve.remote.r4, remote, sizeof(struct in_addr));
+ else
+ memcpy(&geneve.remote.r6, remote, sizeof(struct in6_addr));
+ memcpy(geneve.hwaddr, hwaddr, ETH_ALEN);
+
+ return ip_link_add(dev, "geneve", geneve_fill_newlink, (void *)&geneve);
+}
+
+static int set_pmtu_discover(int fd, bool is_ipv4)
+{
+ int level, name, val;
+
+ if (is_ipv4) {
+ level = SOL_IP;
+ name = IP_MTU_DISCOVER;
+ val = IP_PMTUDISC_DO;
+ } else {
+ level = SOL_IPV6;
+ name = IPV6_MTU_DISCOVER;
+ val = IPV6_PMTUDISC_DO;
+ }
+
+ return setsockopt(fd, level, name, &val, sizeof(val));
+}
+
+static int udp_socket_open(struct sockaddr_storage *ssa, bool do_frag,
+ bool do_connect, struct sockaddr_storage *dsa)
+{
+ struct timeval to = { .tv_sec = TIMEOUT_SEC };
+ int fd, family = ssa->ss_family;
+ int salen = sockaddr_len(family);
+
+ fd = socket(family, SOCK_DGRAM, 0);
+ if (fd < 0)
+ return -1;
+
+ if (bind(fd, (struct sockaddr *)ssa, salen) < 0) {
+ perror("bind");
+ goto err;
+ }
+
+ if (do_connect && connect(fd, (struct sockaddr *)dsa, salen) < 0) {
+ perror("connect");
+ goto err;
+ }
+
+ if (setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, &to, sizeof(to)) < 0) {
+ perror("setsockopt(SO_RCVTIMEO)");
+ goto err;
+ }
+
+ if (!do_frag && set_pmtu_discover(fd, family == AF_INET) < 0) {
+ perror("set_pmtu_discover");
+ goto err;
+ }
+ return fd;
+
+err:
+ close(fd);
+ return -1;
+}
+
+static void parse_route_rsp(struct rt_route_getroute_rsp *rsp, void *rtm_type)
+{
+ *(uint8_t *)rtm_type = rsp->_hdr.rtm_type;
+}
+
+static int ip_route_check(const char *intf, int family, void *addr)
+{
+ uint8_t rtm_type, table = RT_TABLE_LOCAL;
+ int retries = MAX_RETRIES;
- rtnl = socket(AF_NETLINK, SOCK_DGRAM, NETLINK_ROUTE);
- if (rtnl < 0) {
- fprintf(stderr, "can't open rtnl: %s\n", strerror(errno));
- return 1;
+ while (retries-- > 0) {
+ if (ip_route_get(intf, family, table, addr, parse_route_rsp,
+ &rtm_type) == 0 &&
+ rtm_type == RTN_LOCAL)
+ break;
+
+ usleep(TIMEOUT_USEC);
}
- memset(&req, 0, sizeof(req));
- req.nh.nlmsg_len = NLMSG_ALIGN(NLMSG_LENGTH(sizeof(req.ifm)));
- req.nh.nlmsg_flags = NLM_F_REQUEST;
- req.nh.nlmsg_type = RTM_DELLINK;
+ if (retries < 0)
+ return -1;
+
+ return 0;
+}
+
+static int send_gso_udp_msg(int socket, struct sockaddr_storage *addr,
+ uint8_t *send_buf, int send_len, int gso_size)
+{
+ char control[CMSG_SPACE(sizeof(uint16_t))] = { 0 };
+ int alen = sockaddr_len(addr->ss_family);
+ struct msghdr msg = { 0 };
+ struct iovec iov = { 0 };
+ int ret;
+
+ iov.iov_base = send_buf;
+ iov.iov_len = send_len;
+
+ msg.msg_iov = &iov;
+ msg.msg_iovlen = 1;
+ msg.msg_name = addr;
+ msg.msg_namelen = alen;
- req.ifm.ifi_family = AF_UNSPEC;
+ if (gso_size > 0) {
+ struct cmsghdr *cmsg;
- rta = (struct rtattr *)(((char *)&req) + NLMSG_ALIGN(req.nh.nlmsg_len));
- rta->rta_type = IFLA_IFNAME;
- rta->rta_len = RTA_LENGTH(IFNAMSIZ);
- req.nh.nlmsg_len += rta->rta_len;
- memcpy(RTA_DATA(rta), dev, IFNAMSIZ);
+ msg.msg_control = control;
+ msg.msg_controllen = sizeof(control);
- ret = send(rtnl, &req, req.nh.nlmsg_len, 0);
+ cmsg = CMSG_FIRSTHDR(&msg);
+ cmsg->cmsg_level = SOL_UDP;
+ cmsg->cmsg_type = UDP_SEGMENT;
+ cmsg->cmsg_len = CMSG_LEN(sizeof(uint16_t));
+ *(uint16_t *)CMSG_DATA(cmsg) = gso_size;
+ }
+
+ ret = sendmsg(socket, &msg, 0);
if (ret < 0)
- fprintf(stderr, "can't send: %s\n", strerror(errno));
- ret = (unsigned int)ret != req.nh.nlmsg_len;
+ perror("sendmsg");
- close(rtnl);
return ret;
}
+static int validate_hdrlen(uint8_t **cur, int *len, int x)
+{
+ if (*len < x)
+ return -1;
+ *cur += x;
+ *len -= x;
+ return 0;
+}
+
+static int parse_udp_tunnel_vnet_packet(uint8_t *buf, int len, int tunnel_type,
+ bool is_tap)
+{
+ struct ipv6hdr *iph6;
+ struct udphdr *udph;
+ struct iphdr *iph4;
+ uint8_t *cur = buf;
+
+ if (validate_hdrlen(&cur, &len, TUN_VNET_TNL_SIZE))
+ return -1;
+
+ if (is_tap) {
+ if (validate_hdrlen(&cur, &len, ETH_HLEN))
+ return -1;
+ }
+
+ if (tunnel_type & UDP_TUNNEL_OUTER_IPV4) {
+ iph4 = (struct iphdr *)cur;
+ if (validate_hdrlen(&cur, &len, sizeof(struct iphdr)))
+ return -1;
+ if (iph4->version != 4 || iph4->protocol != IPPROTO_UDP)
+ return -1;
+ } else {
+ iph6 = (struct ipv6hdr *)cur;
+ if (validate_hdrlen(&cur, &len, sizeof(struct ipv6hdr)))
+ return -1;
+ if (iph6->version != 6 || iph6->nexthdr != IPPROTO_UDP)
+ return -1;
+ }
+
+ udph = (struct udphdr *)cur;
+ if (validate_hdrlen(&cur, &len, sizeof(struct udphdr)))
+ return -1;
+ if (ntohs(udph->dest) != VN_PORT)
+ return -1;
+
+ if (validate_hdrlen(&cur, &len, GENEVE_HLEN))
+ return -1;
+ if (validate_hdrlen(&cur, &len, ETH_HLEN))
+ return -1;
+
+ if (tunnel_type & UDP_TUNNEL_INNER_IPV4) {
+ iph4 = (struct iphdr *)cur;
+ if (validate_hdrlen(&cur, &len, sizeof(struct iphdr)))
+ return -1;
+ if (iph4->version != 4 || iph4->protocol != IPPROTO_UDP)
+ return -1;
+ } else {
+ iph6 = (struct ipv6hdr *)cur;
+ if (validate_hdrlen(&cur, &len, sizeof(struct ipv6hdr)))
+ return -1;
+ if (iph6->version != 6 || iph6->nexthdr != IPPROTO_UDP)
+ return -1;
+ }
+
+ udph = (struct udphdr *)cur;
+ if (validate_hdrlen(&cur, &len, sizeof(struct udphdr)))
+ return -1;
+ if (ntohs(udph->dest) != UDP_DST_PORT)
+ return -1;
+
+ return len;
+}
+
FIXTURE(tun)
{
char ifname[IFNAMSIZ];
@@ -127,31 +505,36 @@ FIXTURE_TEARDOWN(tun)
close(self->fd2);
}
-TEST_F(tun, delete_detach_close) {
+TEST_F(tun, delete_detach_close)
+{
EXPECT_EQ(tun_delete(self->ifname), 0);
EXPECT_EQ(tun_detach(self->fd, self->ifname), -1);
EXPECT_EQ(errno, 22);
}
-TEST_F(tun, detach_delete_close) {
+TEST_F(tun, detach_delete_close)
+{
EXPECT_EQ(tun_detach(self->fd, self->ifname), 0);
EXPECT_EQ(tun_delete(self->ifname), 0);
}
-TEST_F(tun, detach_close_delete) {
+TEST_F(tun, detach_close_delete)
+{
EXPECT_EQ(tun_detach(self->fd, self->ifname), 0);
close(self->fd);
self->fd = -1;
EXPECT_EQ(tun_delete(self->ifname), 0);
}
-TEST_F(tun, reattach_delete_close) {
+TEST_F(tun, reattach_delete_close)
+{
EXPECT_EQ(tun_detach(self->fd, self->ifname), 0);
EXPECT_EQ(tun_attach(self->fd, self->ifname), 0);
EXPECT_EQ(tun_delete(self->ifname), 0);
}
-TEST_F(tun, reattach_close_delete) {
+TEST_F(tun, reattach_close_delete)
+{
EXPECT_EQ(tun_detach(self->fd, self->ifname), 0);
EXPECT_EQ(tun_attach(self->fd, self->ifname), 0);
close(self->fd);
@@ -159,4 +542,447 @@ TEST_F(tun, reattach_close_delete) {
EXPECT_EQ(tun_delete(self->ifname), 0);
}
+FIXTURE(tun_vnet_udptnl)
+{
+ char ifname[IFNAMSIZ];
+ int fd, sock;
+};
+
+FIXTURE_VARIANT(tun_vnet_udptnl)
+{
+ int tunnel_type;
+ int gso_size;
+ int data_size;
+ int r_num_mss;
+ bool is_tap, no_gso;
+};
+
+/* clang-format off */
+#define TUN_VNET_UDPTNL_VARIANT_ADD(type, desc) \
+ FIXTURE_VARIANT_ADD(tun_vnet_udptnl, desc##_nogsosz_1byte) { \
+ /* no GSO: send a single byte */ \
+ .tunnel_type = type, \
+ .data_size = 1, \
+ .r_num_mss = 1, \
+ .is_tap = true, \
+ .no_gso = true, \
+ }; \
+ FIXTURE_VARIANT_ADD(tun_vnet_udptnl, desc##_nogsosz_1mss) { \
+ /* no GSO: send a single MSS, fall back to no GSO */ \
+ .tunnel_type = type, \
+ .data_size = UDP_TUNNEL_MSS(type), \
+ .r_num_mss = 1, \
+ .is_tap = true, \
+ .no_gso = true, \
+ }; \
+ FIXTURE_VARIANT_ADD(tun_vnet_udptnl, desc##_nogsosz_gtmss) { \
+ /* no GSO: send a single MSS + 1B: fail */ \
+ .tunnel_type = type, \
+ .data_size = UDP_TUNNEL_MSS(type) + 1, \
+ .r_num_mss = 1, \
+ .is_tap = true, \
+ .no_gso = true, \
+ }; \
+ FIXTURE_VARIANT_ADD(tun_vnet_udptnl, desc##_1byte) { \
+ /* GSO: send 1 byte, gso 1 byte, fall back to no GSO */ \
+ .tunnel_type = type, \
+ .gso_size = 1, \
+ .data_size = 1, \
+ .r_num_mss = 1, \
+ .is_tap = true, \
+ .no_gso = true, \
+ }; \
+ FIXTURE_VARIANT_ADD(tun_vnet_udptnl, desc##_1mss) { \
+ /* send a single MSS: fall back to no GSO */ \
+ .tunnel_type = type, \
+ .gso_size = UDP_TUNNEL_MSS(type), \
+ .data_size = UDP_TUNNEL_MSS(type), \
+ .r_num_mss = 1, \
+ .is_tap = true, \
+ .no_gso = true, \
+ }; \
+ FIXTURE_VARIANT_ADD(tun_vnet_udptnl, desc##_ltgso) { \
+ /* data <= MSS < gso: will fall back to no GSO */ \
+ .tunnel_type = type, \
+ .gso_size = UDP_TUNNEL_MSS(type) + 1, \
+ .data_size = UDP_TUNNEL_MSS(type), \
+ .r_num_mss = 1, \
+ .is_tap = true, \
+ .no_gso = true, \
+ }; \
+ FIXTURE_VARIANT_ADD(tun_vnet_udptnl, desc##_gtgso) { \
+ /* GSO: a single MSS + 1B */ \
+ .tunnel_type = type, \
+ .gso_size = UDP_TUNNEL_MSS(type), \
+ .data_size = UDP_TUNNEL_MSS(type) + 1, \
+ .r_num_mss = 2, \
+ .is_tap = true, \
+ }; \
+ FIXTURE_VARIANT_ADD(tun_vnet_udptnl, desc##_2mss) { \
+ /* no GSO: send exactly 2 MSS */ \
+ .tunnel_type = type, \
+ .gso_size = UDP_TUNNEL_MSS(type), \
+ .data_size = UDP_TUNNEL_MSS(type) * 2, \
+ .r_num_mss = 2, \
+ .is_tap = true, \
+ }; \
+ FIXTURE_VARIANT_ADD(tun_vnet_udptnl, desc##_maxbytes) { \
+ /* GSO: send max bytes */ \
+ .tunnel_type = type, \
+ .gso_size = UDP_TUNNEL_MSS(type), \
+ .data_size = UDP_TUNNEL_MAX(type, true), \
+ .r_num_mss = UDP_TUNNEL_MAX(type, true) / \
+ UDP_TUNNEL_MSS(type) + 1, \
+ .is_tap = true, \
+ }; \
+ FIXTURE_VARIANT_ADD(tun_vnet_udptnl, desc##_over_maxbytes) { \
+ /* GSO: send oversize max bytes: fail */ \
+ .tunnel_type = type, \
+ .gso_size = UDP_TUNNEL_MSS(type), \
+ .data_size = ETH_MAX_MTU, \
+ .r_num_mss = ETH_MAX_MTU / UDP_TUNNEL_MSS(type) + 1, \
+ .is_tap = true, \
+ }; \
+ FIXTURE_VARIANT_ADD(tun_vnet_udptnl, desc##_maxsegs) { \
+ /* GSO: send max number of min sized segments */ \
+ .tunnel_type = type, \
+ .gso_size = 1, \
+ .data_size = UDP_TUNNEL_MAX_SEGMENTS, \
+ .r_num_mss = UDP_TUNNEL_MAX_SEGMENTS, \
+ .is_tap = true, \
+ }; \
+ FIXTURE_VARIANT_ADD(tun_vnet_udptnl, desc##_5byte) { \
+ /* GSO: send 5 bytes, gso 2 bytes */ \
+ .tunnel_type = type, \
+ .gso_size = 2, \
+ .data_size = 5, \
+ .r_num_mss = 3, \
+ .is_tap = true, \
+ } /* clang-format on */
+
+TUN_VNET_UDPTNL_VARIANT_ADD(UDP_TUNNEL_GENEVE_4IN4, 4in4);
+TUN_VNET_UDPTNL_VARIANT_ADD(UDP_TUNNEL_GENEVE_6IN4, 6in4);
+TUN_VNET_UDPTNL_VARIANT_ADD(UDP_TUNNEL_GENEVE_4IN6, 4in6);
+TUN_VNET_UDPTNL_VARIANT_ADD(UDP_TUNNEL_GENEVE_6IN6, 6in6);
+
+static void assign_ifaddr_vars(int family, int is_outer, void **srcip,
+ void **dstip, void **srcmac, void **dstmac)
+{
+ if (is_outer) {
+ if (family == AF_INET) {
+ *srcip = (void *)&param_ipaddr4_outer_src;
+ *dstip = (void *)&param_ipaddr4_outer_dst;
+ } else {
+ *srcip = (void *)&param_ipaddr6_outer_src;
+ *dstip = (void *)&param_ipaddr6_outer_dst;
+ }
+ *srcmac = param_hwaddr_outer_src;
+ *dstmac = param_hwaddr_outer_dst;
+ } else {
+ if (family == AF_INET) {
+ *srcip = (void *)&param_ipaddr4_inner_src;
+ *dstip = (void *)&param_ipaddr4_inner_dst;
+ } else {
+ *srcip = (void *)&param_ipaddr6_inner_src;
+ *dstip = (void *)&param_ipaddr6_inner_dst;
+ }
+ *srcmac = param_hwaddr_inner_src;
+ *dstmac = param_hwaddr_inner_dst;
+ }
+}
+
+static void assign_sockaddr_vars(int family, int is_outer,
+ struct sockaddr_storage *src,
+ struct sockaddr_storage *dst)
+{
+ src->ss_family = family;
+ dst->ss_family = family;
+
+ if (family == AF_INET) {
+ struct sockaddr_in *s4 = (struct sockaddr_in *)src;
+ struct sockaddr_in *d4 = (struct sockaddr_in *)dst;
+
+ s4->sin_addr = is_outer ? param_ipaddr4_outer_src :
+ param_ipaddr4_inner_src;
+ d4->sin_addr = is_outer ? param_ipaddr4_outer_dst :
+ param_ipaddr4_inner_dst;
+ if (!is_outer) {
+ s4->sin_port = htons(UDP_SRC_PORT);
+ d4->sin_port = htons(UDP_DST_PORT);
+ }
+ } else {
+ struct sockaddr_in6 *s6 = (struct sockaddr_in6 *)src;
+ struct sockaddr_in6 *d6 = (struct sockaddr_in6 *)dst;
+
+ s6->sin6_addr = is_outer ? param_ipaddr6_outer_src :
+ param_ipaddr6_inner_src;
+ d6->sin6_addr = is_outer ? param_ipaddr6_outer_dst :
+ param_ipaddr6_inner_dst;
+ if (!is_outer) {
+ s6->sin6_port = htons(UDP_SRC_PORT);
+ d6->sin6_port = htons(UDP_DST_PORT);
+ }
+ }
+}
+
+FIXTURE_SETUP(tun_vnet_udptnl)
+{
+ int ret, family, prefix, flags, features;
+ int tunnel_type = variant->tunnel_type;
+ struct sockaddr_storage ssa, dsa;
+ void *sip, *dip, *smac, *dmac;
+
+ flags = (variant->is_tap ? IFF_TAP : IFF_TUN) | IFF_VNET_HDR |
+ IFF_MULTI_QUEUE | IFF_NO_PI;
+ features = TUN_F_CSUM | TUN_F_UDP_TUNNEL_GSO |
+ TUN_F_UDP_TUNNEL_GSO_CSUM | TUN_F_USO4 | TUN_F_USO6;
+ self->fd = tun_open(self->ifname, flags, TUN_VNET_TNL_SIZE, features,
+ param_hwaddr_outer_src);
+ ASSERT_GE(self->fd, 0);
+
+ family = (tunnel_type & UDP_TUNNEL_OUTER_IPV4) ? AF_INET : AF_INET6;
+ prefix = (family == AF_INET) ? IPPREFIX_LEN : IP6PREFIX_LEN;
+ assign_ifaddr_vars(family, 1, &sip, &dip, &smac, &dmac);
+
+ ret = ip_addr_add(self->ifname, family, sip, prefix);
+ ASSERT_EQ(ret, 0);
+ ret = ip_neigh_add(self->ifname, family, dip, dmac);
+ ASSERT_EQ(ret, 0);
+ ret = ip_route_check(self->ifname, family, sip);
+ ASSERT_EQ(ret, 0);
+
+ ret = geneve_create(param_dev_geneve_name, family, dip,
+ param_hwaddr_inner_src);
+ ASSERT_EQ(ret, 0);
+
+ family = (tunnel_type & UDP_TUNNEL_INNER_IPV4) ? AF_INET : AF_INET6;
+ prefix = (family == AF_INET) ? IPPREFIX_LEN : IP6PREFIX_LEN;
+ assign_ifaddr_vars(family, 0, &sip, &dip, &smac, &dmac);
+
+ ret = ip_addr_add(param_dev_geneve_name, family, sip, prefix);
+ ASSERT_EQ(ret, 0);
+ ret = ip_neigh_add(param_dev_geneve_name, family, dip, dmac);
+ ASSERT_EQ(ret, 0);
+ ret = ip_route_check(param_dev_geneve_name, family, sip);
+ ASSERT_EQ(ret, 0);
+
+ assign_sockaddr_vars(family, 0, &ssa, &dsa);
+ self->sock = udp_socket_open(&ssa, false, true, &dsa);
+ ASSERT_GE(self->sock, 0);
+}
+
+FIXTURE_TEARDOWN(tun_vnet_udptnl)
+{
+ int ret;
+
+ if (self->sock != -1)
+ close(self->sock);
+
+ ret = ip_link_del(param_dev_geneve_name);
+ EXPECT_EQ(ret, 0);
+
+ ret = tun_delete(self->ifname);
+ EXPECT_EQ(ret, 0);
+}
+
+static int build_gso_packet_into_tun(const FIXTURE_VARIANT(tun_vnet_udptnl) *
+ variant,
+ uint8_t *buf)
+{
+ int pktlen, hlen, proto, inner_family, outer_family;
+ int tunnel_type = variant->tunnel_type;
+ int payload_len = variant->data_size;
+ int gso_size = variant->gso_size;
+ uint8_t *outer_udph, *cur = buf;
+ void *sip, *dip, *smac, *dmac;
+ bool is_tap = variant->is_tap;
+
+ hlen = (is_tap ? ETH_HLEN : 0) + UDP_TUNNEL_HDRLEN(tunnel_type);
+ inner_family = (tunnel_type & UDP_TUNNEL_INNER_IPV4) ? AF_INET :
+ AF_INET6;
+ outer_family = (tunnel_type & UDP_TUNNEL_OUTER_IPV4) ? AF_INET :
+ AF_INET6;
+
+ cur += build_virtio_net_hdr_v1_hash_tunnel(cur, is_tap, hlen, gso_size,
+ outer_family, inner_family);
+
+ pktlen = hlen + payload_len;
+ assign_ifaddr_vars(outer_family, 1, &sip, &dip, &smac, &dmac);
+
+ if (is_tap) {
+ proto = outer_family == AF_INET ? ETH_P_IP : ETH_P_IPV6;
+ pktlen -= ETH_HLEN;
+ cur += build_eth(cur, proto, dmac, smac);
+ }
+
+ if (outer_family == AF_INET) {
+ pktlen = pktlen - sizeof(struct iphdr);
+ cur += build_ipv4_header(cur, IPPROTO_UDP, pktlen, dip, sip);
+ } else {
+ pktlen = pktlen - sizeof(struct ipv6hdr);
+ cur += build_ipv6_header(cur, IPPROTO_UDP, 0, pktlen, dip, sip);
+ }
+
+ outer_udph = cur;
+ assign_ifaddr_vars(inner_family, 0, &sip, &dip, &smac, &dmac);
+
+ pktlen -= sizeof(struct udphdr);
+ proto = inner_family == AF_INET ? ETH_P_IP : ETH_P_IPV6;
+ cur += build_udp_header(cur, UDP_SRC_PORT, VN_PORT, pktlen);
+ cur += build_geneve_header(cur, VN_ID);
+ cur += build_eth(cur, proto, dmac, smac);
+
+ pktlen = sizeof(struct udphdr) + payload_len;
+ if (inner_family == AF_INET)
+ cur += build_ipv4_header(cur, IPPROTO_UDP, pktlen, dip, sip);
+ else
+ cur += build_ipv6_header(cur, IPPROTO_UDP, 0, pktlen, dip, sip);
+
+ cur += build_udp_packet(cur, UDP_DST_PORT, UDP_SRC_PORT, payload_len,
+ inner_family, false);
+
+ build_udp_packet_csum(outer_udph, outer_family, false);
+
+ return cur - buf;
+}
+
+static int
+receive_gso_packet_from_tunnel(FIXTURE_DATA(tun_vnet_udptnl) * self,
+ const FIXTURE_VARIANT(tun_vnet_udptnl) * variant,
+ int *r_num_mss)
+{
+ uint8_t packet_buf[MAX_VNET_TUNNEL_PACKET_SZ];
+ int len, total_len = 0, socket = self->sock;
+ int payload_len = variant->data_size;
+
+ while (total_len < payload_len) {
+ len = recv(socket, packet_buf, sizeof(packet_buf), 0);
+ if (len <= 0) {
+ if (len < 0 && errno != EAGAIN && errno != EWOULDBLOCK)
+ perror("recv");
+ break;
+ }
+
+ (*r_num_mss)++;
+ total_len += len;
+ }
+
+ return total_len;
+}
+
+static int send_gso_packet_into_tunnel(FIXTURE_DATA(tun_vnet_udptnl) * self,
+ const FIXTURE_VARIANT(tun_vnet_udptnl) *
+ variant)
+{
+ int family = (variant->tunnel_type & UDP_TUNNEL_INNER_IPV4) ? AF_INET :
+ AF_INET6;
+ uint8_t buf[MAX_VNET_TUNNEL_PACKET_SZ] = { 0 };
+ int payload_len = variant->data_size;
+ int gso_size = variant->gso_size;
+ struct sockaddr_storage ssa, dsa;
+
+ assign_sockaddr_vars(family, 0, &ssa, &dsa);
+ return send_gso_udp_msg(self->sock, &dsa, buf, payload_len, gso_size);
+}
+
+static int
+receive_gso_packet_from_tun(FIXTURE_DATA(tun_vnet_udptnl) * self,
+ const FIXTURE_VARIANT(tun_vnet_udptnl) * variant,
+ struct virtio_net_hdr_v1_hash_tunnel *vnet_hdr)
+{
+ struct timeval timeout = { .tv_sec = TIMEOUT_SEC };
+ uint8_t buf[MAX_VNET_TUNNEL_PACKET_SZ];
+ int tunnel_type = variant->tunnel_type;
+ int payload_len = variant->data_size;
+ bool is_tap = variant->is_tap;
+ int ret, len, total_len = 0;
+ int tun_fd = self->fd;
+ fd_set fdset;
+
+ while (total_len < payload_len) {
+ FD_ZERO(&fdset);
+ FD_SET(tun_fd, &fdset);
+
+ ret = select(tun_fd + 1, &fdset, NULL, NULL, &timeout);
+ if (ret <= 0) {
+ perror("select");
+ break;
+ }
+ if (!FD_ISSET(tun_fd, &fdset))
+ continue;
+
+ len = read(tun_fd, buf, sizeof(buf));
+ if (len <= 0) {
+ if (len < 0 && errno != EAGAIN && errno != EWOULDBLOCK)
+ perror("read");
+ break;
+ }
+
+ len = parse_udp_tunnel_vnet_packet(buf, len, tunnel_type,
+ is_tap);
+ if (len < 0)
+ continue;
+
+ if (total_len == 0)
+ memcpy(vnet_hdr, buf, TUN_VNET_TNL_SIZE);
+
+ total_len += len;
+ }
+
+ return total_len;
+}
+
+TEST_F(tun_vnet_udptnl, send_gso_packet)
+{
+ uint8_t pkt[MAX_VNET_TUNNEL_PACKET_SZ];
+ int r_num_mss = 0;
+ int ret, off;
+
+ memset(pkt, 0, sizeof(pkt));
+ off = build_gso_packet_into_tun(variant, pkt);
+ ret = write(self->fd, pkt, off);
+ ASSERT_EQ(ret, off);
+
+ ret = receive_gso_packet_from_tunnel(self, variant, &r_num_mss);
+ ASSERT_EQ(ret, variant->data_size);
+ ASSERT_EQ(r_num_mss, variant->r_num_mss);
+}
+
+TEST_F(tun_vnet_udptnl, recv_gso_packet)
+{
+ struct virtio_net_hdr_v1_hash_tunnel vnet_hdr = { 0 };
+ struct virtio_net_hdr_v1 *vh = &vnet_hdr.hash_hdr.hdr;
+ int ret, gso_type = VIRTIO_NET_HDR_GSO_UDP_L4;
+
+ ret = send_gso_packet_into_tunnel(self, variant);
+ ASSERT_EQ(ret, variant->data_size);
+
+ memset(&vnet_hdr, 0, sizeof(vnet_hdr));
+ ret = receive_gso_packet_from_tun(self, variant, &vnet_hdr);
+ ASSERT_EQ(ret, variant->data_size);
+
+ if (!variant->no_gso) {
+ ASSERT_EQ(vh->gso_size, variant->gso_size);
+ gso_type |= (variant->tunnel_type & UDP_TUNNEL_OUTER_IPV4) ?
+ (VIRTIO_NET_HDR_GSO_UDP_TUNNEL_IPV4) :
+ (VIRTIO_NET_HDR_GSO_UDP_TUNNEL_IPV6);
+ ASSERT_EQ(vh->gso_type, gso_type);
+ }
+}
+
+XFAIL_ADD(tun_vnet_udptnl, 4in4_nogsosz_gtmss, recv_gso_packet);
+XFAIL_ADD(tun_vnet_udptnl, 6in4_nogsosz_gtmss, recv_gso_packet);
+XFAIL_ADD(tun_vnet_udptnl, 4in6_nogsosz_gtmss, recv_gso_packet);
+XFAIL_ADD(tun_vnet_udptnl, 6in6_nogsosz_gtmss, recv_gso_packet);
+
+XFAIL_ADD(tun_vnet_udptnl, 4in4_over_maxbytes, send_gso_packet);
+XFAIL_ADD(tun_vnet_udptnl, 6in4_over_maxbytes, send_gso_packet);
+XFAIL_ADD(tun_vnet_udptnl, 4in6_over_maxbytes, send_gso_packet);
+XFAIL_ADD(tun_vnet_udptnl, 6in6_over_maxbytes, send_gso_packet);
+
+XFAIL_ADD(tun_vnet_udptnl, 4in4_over_maxbytes, recv_gso_packet);
+XFAIL_ADD(tun_vnet_udptnl, 6in4_over_maxbytes, recv_gso_packet);
+XFAIL_ADD(tun_vnet_udptnl, 4in6_over_maxbytes, recv_gso_packet);
+XFAIL_ADD(tun_vnet_udptnl, 6in6_over_maxbytes, recv_gso_packet);
+
TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/net/tuntap_helpers.h b/tools/testing/selftests/net/tuntap_helpers.h
new file mode 100644
index 000000000000..d6c0437136ec
--- /dev/null
+++ b/tools/testing/selftests/net/tuntap_helpers.h
@@ -0,0 +1,390 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#ifndef _TUNTAP_HELPERS_H
+#define _TUNTAP_HELPERS_H
+
+#include <errno.h>
+#include <linux/if_packet.h>
+#include <linux/ipv6.h>
+#include <linux/virtio_net.h>
+#include <netinet/in.h>
+#include <netinet/if_ether.h>
+#include <netinet/udp.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <ynl.h>
+
+#include "rt-route-user.h"
+#include "rt-addr-user.h"
+#include "rt-neigh-user.h"
+#include "rt-link-user.h"
+
+#define GENEVE_HLEN 8
+#define PKT_DATA 0xCB
+#define TUNTAP_DEFAULT_TTL 8
+#define TUNTAP_DEFAULT_IPID 1337
+
+unsigned int if_nametoindex(const char *ifname);
+
+static inline int ip_addr_len(int family)
+{
+ return (family == AF_INET) ? sizeof(struct in_addr) :
+ sizeof(struct in6_addr);
+}
+
+static inline void fill_ifaddr_msg(struct ifaddrmsg *ifam, int family,
+ int prefix, int flags, const char *dev)
+{
+ ifam->ifa_family = family;
+ ifam->ifa_prefixlen = prefix;
+ ifam->ifa_index = if_nametoindex(dev);
+ ifam->ifa_flags = flags;
+ ifam->ifa_scope = RT_SCOPE_UNIVERSE;
+}
+
+static inline int ip_addr_add(const char *dev, int family, void *addr,
+ uint8_t prefix)
+{
+ int nl_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
+ int ifa_flags = IFA_F_PERMANENT | IFA_F_NODAD;
+ int ret = -1, ipalen = ip_addr_len(family);
+ struct rt_addr_newaddr_req *req;
+ struct ynl_sock *ys;
+
+ ys = ynl_sock_create(&ynl_rt_addr_family, NULL);
+ if (!ys)
+ return -1;
+
+ req = rt_addr_newaddr_req_alloc();
+ if (!req)
+ goto err_req_alloc;
+
+ fill_ifaddr_msg(&req->_hdr, family, prefix, ifa_flags, dev);
+ rt_addr_newaddr_req_set_nlflags(req, nl_flags);
+ rt_addr_newaddr_req_set_local(req, addr, ipalen);
+
+ ret = rt_addr_newaddr(ys, req);
+ rt_addr_newaddr_req_free(req);
+err_req_alloc:
+ ynl_sock_destroy(ys);
+ return ret;
+}
+
+static inline void fill_neigh_req_header(struct ndmsg *ndm, int family,
+ int state, const char *dev)
+{
+ ndm->ndm_family = family;
+ ndm->ndm_ifindex = if_nametoindex(dev);
+ ndm->ndm_state = state;
+ ndm->ndm_flags = 0;
+ ndm->ndm_type = RTN_UNICAST;
+}
+
+static inline int ip_neigh_add(const char *dev, int family, void *addr,
+ unsigned char *lladdr)
+{
+ int nl_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
+ int ret = -1, ipalen = ip_addr_len(family);
+ struct rt_neigh_newneigh_req *req;
+ struct ynl_sock *ys;
+
+ ys = ynl_sock_create(&ynl_rt_neigh_family, NULL);
+ if (!ys)
+ return -1;
+
+ req = rt_neigh_newneigh_req_alloc();
+ if (!req)
+ goto err_req_alloc;
+
+ fill_neigh_req_header(&req->_hdr, family, NUD_PERMANENT, dev);
+ rt_neigh_newneigh_req_set_nlflags(req, nl_flags);
+ rt_neigh_newneigh_req_set_dst(req, addr, ipalen);
+ rt_neigh_newneigh_req_set_lladdr(req, lladdr, ETH_ALEN);
+ rt_neigh_newneigh_req_set_ifindex(req, if_nametoindex(dev));
+
+ ret = rt_neigh_newneigh(ys, req);
+ rt_neigh_newneigh_req_free(req);
+err_req_alloc:
+ ynl_sock_destroy(ys);
+ return ret;
+}
+
+static inline void fill_route_req_header(struct rtmsg *rtm, int family,
+ int table)
+{
+ rtm->rtm_family = family;
+ rtm->rtm_table = table;
+}
+
+static inline int
+ip_route_get(const char *dev, int family, int table, void *dst,
+ void (*parse_rsp)(struct rt_route_getroute_rsp *rsp, void *out),
+ void *out)
+{
+ int ret = -1, ipalen = ip_addr_len(family);
+ struct rt_route_getroute_req *req;
+ struct rt_route_getroute_rsp *rsp;
+ struct ynl_sock *ys;
+
+ ys = ynl_sock_create(&ynl_rt_route_family, NULL);
+ if (!ys)
+ return -1;
+
+ req = rt_route_getroute_req_alloc();
+ if (!req)
+ goto err_req_alloc;
+
+ fill_route_req_header(&req->_hdr, family, table);
+ rt_route_getroute_req_set_nlflags(req, NLM_F_REQUEST);
+ rt_route_getroute_req_set_dst(req, dst, ipalen);
+ rt_route_getroute_req_set_oif(req, if_nametoindex(dev));
+
+ rsp = rt_route_getroute(ys, req);
+ if (!rsp)
+ goto err_rsp_get;
+
+ ret = 0;
+ if (parse_rsp)
+ parse_rsp(rsp, out);
+
+ rt_route_getroute_rsp_free(rsp);
+err_rsp_get:
+ rt_route_getroute_req_free(req);
+err_req_alloc:
+ ynl_sock_destroy(ys);
+ return ret;
+}
+
+static inline int
+ip_link_add(const char *dev, char *link_type,
+ int (*fill_link_attr)(struct rt_link_newlink_req *req, void *data),
+ void *data)
+{
+ int nl_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
+ struct rt_link_newlink_req *req;
+ struct ynl_sock *ys;
+ int ret = -1;
+
+ ys = ynl_sock_create(&ynl_rt_link_family, NULL);
+ if (!ys)
+ return -1;
+
+ req = rt_link_newlink_req_alloc();
+ if (!req)
+ goto err_req_alloc;
+
+ req->_hdr.ifi_flags = IFF_UP;
+ rt_link_newlink_req_set_nlflags(req, nl_flags);
+ rt_link_newlink_req_set_ifname(req, dev);
+ rt_link_newlink_req_set_linkinfo_kind(req, link_type);
+
+ if (fill_link_attr && fill_link_attr(req, data) < 0)
+ goto err_attr_fill;
+
+ ret = rt_link_newlink(ys, req);
+err_attr_fill:
+ rt_link_newlink_req_free(req);
+err_req_alloc:
+ ynl_sock_destroy(ys);
+ return ret;
+}
+
+static inline int ip_link_del(const char *dev)
+{
+ struct rt_link_dellink_req *req;
+ struct ynl_sock *ys;
+ int ret = -1;
+
+ ys = ynl_sock_create(&ynl_rt_link_family, NULL);
+ if (!ys)
+ return -1;
+
+ req = rt_link_dellink_req_alloc();
+ if (!req)
+ goto err_req_alloc;
+
+ rt_link_dellink_req_set_nlflags(req, NLM_F_REQUEST);
+ rt_link_dellink_req_set_ifname(req, dev);
+
+ ret = rt_link_dellink(ys, req);
+ rt_link_dellink_req_free(req);
+err_req_alloc:
+ ynl_sock_destroy(ys);
+ return ret;
+}
+
+static inline size_t build_eth(uint8_t *buf, uint16_t proto, unsigned char *src,
+ unsigned char *dest)
+{
+ struct ethhdr *eth = (struct ethhdr *)buf;
+
+ eth->h_proto = htons(proto);
+ memcpy(eth->h_source, src, ETH_ALEN);
+ memcpy(eth->h_dest, dest, ETH_ALEN);
+
+ return ETH_HLEN;
+}
+
+static inline uint32_t add_csum(const uint8_t *buf, int len)
+{
+ uint16_t *sbuf = (uint16_t *)buf;
+ uint32_t sum = 0;
+
+ while (len > 1) {
+ sum += *sbuf++;
+ len -= 2;
+ }
+
+ if (len)
+ sum += *(uint8_t *)sbuf;
+
+ return sum;
+}
+
+static inline uint16_t finish_ip_csum(uint32_t sum)
+{
+ while (sum >> 16)
+ sum = (sum & 0xffff) + (sum >> 16);
+ return ~((uint16_t)sum);
+}
+
+static inline uint16_t build_ip_csum(const uint8_t *buf, int len, uint32_t sum)
+{
+ sum += add_csum(buf, len);
+ return finish_ip_csum(sum);
+}
+
+static inline int build_ipv4_header(uint8_t *buf, uint8_t proto,
+ int payload_len, struct in_addr *src,
+ struct in_addr *dst)
+{
+ struct iphdr *iph = (struct iphdr *)buf;
+
+ iph->ihl = 5;
+ iph->version = 4;
+ iph->ttl = TUNTAP_DEFAULT_TTL;
+ iph->tot_len = htons(sizeof(*iph) + payload_len);
+ iph->id = htons(TUNTAP_DEFAULT_IPID);
+ iph->protocol = proto;
+ iph->saddr = src->s_addr;
+ iph->daddr = dst->s_addr;
+ iph->check = build_ip_csum(buf, iph->ihl << 2, 0);
+
+ return iph->ihl << 2;
+}
+
+static inline void ipv6_set_dsfield(struct ipv6hdr *ip6h, uint8_t dsfield)
+{
+ uint16_t val, *ptr = (uint16_t *)ip6h;
+
+ val = ntohs(*ptr);
+ val &= 0xF00F;
+ val |= ((uint16_t)dsfield) << 4;
+ *ptr = htons(val);
+}
+
+static inline int build_ipv6_header(uint8_t *buf, uint8_t proto,
+ uint8_t dsfield, int payload_len,
+ struct in6_addr *src, struct in6_addr *dst)
+{
+ struct ipv6hdr *ip6h = (struct ipv6hdr *)buf;
+
+ ip6h->version = 6;
+ ip6h->payload_len = htons(payload_len);
+ ip6h->nexthdr = proto;
+ ip6h->hop_limit = TUNTAP_DEFAULT_TTL;
+ ipv6_set_dsfield(ip6h, dsfield);
+ memcpy(&ip6h->saddr, src, sizeof(ip6h->saddr));
+ memcpy(&ip6h->daddr, dst, sizeof(ip6h->daddr));
+
+ return sizeof(struct ipv6hdr);
+}
+
+static inline int build_geneve_header(uint8_t *buf, uint32_t vni)
+{
+ uint16_t protocol = htons(ETH_P_TEB);
+ uint32_t geneve_vni = htonl((vni << 8) & 0xffffff00);
+
+ memcpy(buf + 2, &protocol, 2);
+ memcpy(buf + 4, &geneve_vni, 4);
+ return GENEVE_HLEN;
+}
+
+static inline int build_udp_header(uint8_t *buf, uint16_t sport, uint16_t dport,
+ int payload_len)
+{
+ struct udphdr *udph = (struct udphdr *)buf;
+
+ udph->source = htons(sport);
+ udph->dest = htons(dport);
+ udph->len = htons(sizeof(*udph) + payload_len);
+ return sizeof(*udph);
+}
+
+static inline void build_udp_packet_csum(uint8_t *buf, int family,
+ bool csum_off)
+{
+ struct udphdr *udph = (struct udphdr *)buf;
+ size_t ipalen = ip_addr_len(family);
+ uint32_t sum;
+
+ /* No extension IPv4 and IPv6 headers addresses are the last fields */
+ sum = add_csum(buf - 2 * ipalen, 2 * ipalen);
+ sum += htons(IPPROTO_UDP) + udph->len;
+
+ if (!csum_off)
+ sum += add_csum(buf, udph->len);
+
+ udph->check = finish_ip_csum(sum);
+}
+
+static inline int build_udp_packet(uint8_t *buf, uint16_t sport, uint16_t dport,
+ int payload_len, int family, bool csum_off)
+{
+ struct udphdr *udph = (struct udphdr *)buf;
+
+ build_udp_header(buf, sport, dport, payload_len);
+ memset(buf + sizeof(*udph), PKT_DATA, payload_len);
+ build_udp_packet_csum(buf, family, csum_off);
+
+ return sizeof(*udph) + payload_len;
+}
+
+static inline int build_virtio_net_hdr_v1_hash_tunnel(uint8_t *buf, bool is_tap,
+ int hdr_len, int gso_size,
+ int outer_family,
+ int inner_family)
+{
+ struct virtio_net_hdr_v1_hash_tunnel *vh_tunnel = (void *)buf;
+ struct virtio_net_hdr_v1 *vh = &vh_tunnel->hash_hdr.hdr;
+ int outer_iphlen, inner_iphlen, eth_hlen, gso_type;
+
+ eth_hlen = is_tap ? ETH_HLEN : 0;
+ outer_iphlen = (outer_family == AF_INET) ? sizeof(struct iphdr) :
+ sizeof(struct ipv6hdr);
+ inner_iphlen = (inner_family == AF_INET) ? sizeof(struct iphdr) :
+ sizeof(struct ipv6hdr);
+
+ vh_tunnel->outer_th_offset = eth_hlen + outer_iphlen;
+ vh_tunnel->inner_nh_offset = vh_tunnel->outer_th_offset + ETH_HLEN +
+ GENEVE_HLEN + sizeof(struct udphdr);
+
+ vh->csum_start = vh_tunnel->inner_nh_offset + inner_iphlen;
+ vh->csum_offset = __builtin_offsetof(struct udphdr, check);
+ vh->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
+ vh->hdr_len = hdr_len;
+ vh->gso_size = gso_size;
+
+ if (gso_size) {
+ gso_type = outer_family == AF_INET ?
+ VIRTIO_NET_HDR_GSO_UDP_TUNNEL_IPV4 :
+ VIRTIO_NET_HDR_GSO_UDP_TUNNEL_IPV6;
+ vh->gso_type = VIRTIO_NET_HDR_GSO_UDP_L4 | gso_type;
+ }
+
+ return sizeof(struct virtio_net_hdr_v1_hash_tunnel);
+}
+
+#endif /* _TUNTAP_HELPERS_H */
diff --git a/tools/testing/selftests/net/txtimestamp.c b/tools/testing/selftests/net/txtimestamp.c
index bcc14688661d..170be192f5c7 100644
--- a/tools/testing/selftests/net/txtimestamp.c
+++ b/tools/testing/selftests/net/txtimestamp.c
@@ -206,12 +206,10 @@ static void __print_timestamp(const char *name, struct timespec *cur,
fprintf(stderr, "\n");
}
-static void print_timestamp_usr(void)
+static void record_timestamp_usr(void)
{
if (clock_gettime(CLOCK_REALTIME, &ts_usr))
error(1, errno, "clock_gettime");
-
- __print_timestamp(" USR", &ts_usr, 0, 0);
}
static void print_timestamp(struct scm_timestamping *tss, int tstype,
@@ -599,8 +597,6 @@ static void do_test(int family, unsigned int report_opt)
fill_header_udp(buf + off, family == PF_INET);
}
- print_timestamp_usr();
-
iov.iov_base = buf;
iov.iov_len = total_len;
@@ -655,10 +651,14 @@ static void do_test(int family, unsigned int report_opt)
}
+ record_timestamp_usr();
+
val = sendmsg(fd, &msg, 0);
if (val != total_len)
error(1, errno, "send");
+ __print_timestamp(" USR", &ts_usr, 0, 0);
+
/* wait for all errors to be queued, else ACKs arrive OOO */
if (cfg_sleep_usec)
usleep(cfg_sleep_usec);