diff options
| author | Jakub Kicinski <kuba@kernel.org> | 2026-02-10 20:25:38 -0800 |
|---|---|---|
| committer | Jakub Kicinski <kuba@kernel.org> | 2026-02-10 20:25:38 -0800 |
| commit | 792aaea994537daa78f31a86c948ccbefa8f4706 (patch) | |
| tree | 4f935d26b3edcd681831cc822d965a203dcd3a19 /tools/testing | |
| parent | 3a4687366148a58017997a750f85631292c22b3e (diff) | |
| parent | 648946966a08e4cb1a71619e3d1b12bd7642de7b (diff) | |
Merge tag 'nf-next-26-02-06' of https://git.kernel.org/pub/scm/linux/kernel/git/netfilter/nf-next
Florian Westphal says:
====================
netfilter: updates for net-next
The following patchset contains Netfilter updates for *net-next*:
1) Fix net-next-only use-after-free bug in nf_tables rbtree set:
Expired elements cannot be released right away after unlink anymore
because there is no guarantee that the binary-search blob is going to
be updated. Spotted by syzkaller.
2) Fix esoteric bug in nf_queue with udp fraglist gro, broken since
6.11. Patch 3 adds extends the nfqueue selftest for this.
4) Use dedicated slab for flowtable entries, currently the -512 cache
is used, which is wasteful. From Qingfang Deng.
5) Recent net-next update extended existing test for ip6ip6 tunnels, add
the required /config entry. Test still passed by accident because the
previous tests network setup gets re-used, so also update the test so
it will fail in case the ip6ip6 tunnel interface cannot be added.
6) Fix 'nft get element mytable myset { 1.2.3.4 }' on big endian
platforms, this was broken since code was added in v5.1.
7) Fix nf_tables counter reset support on 32bit platforms, where counter
reset may cause huge values to appear due to wraparound.
Broken since reset feature was added in v6.11. From Anders Grahn.
8-11) update nf_tables rbtree set type to detect partial
operlaps. This will eventually speed up nftables userspace: at this
time userspace does a netlink dump of the set content which slows down
incremental updates on interval sets. From Pablo Neira Ayuso.
* tag 'nf-next-26-02-06' of https://git.kernel.org/pub/scm/linux/kernel/git/netfilter/nf-next:
netfilter: nft_set_rbtree: validate open interval overlap
netfilter: nft_set_rbtree: validate element belonging to interval
netfilter: nft_set_rbtree: check for partial overlaps in anonymous sets
netfilter: nft_set_rbtree: fix bogus EEXIST with NLM_F_CREATE with null interval
netfilter: nft_counter: fix reset of counters on 32bit archs
netfilter: nft_set_hash: fix get operation on big endian
selftests: netfilter: add IPV6_TUNNEL to config
netfilter: flowtable: dedicated slab for flow entry
selftests: netfilter: nft_queue.sh: add udp fraglist gro test case
netfilter: nfnetlink_queue: do shared-unconfirmed check before segmentation
netfilter: nft_set_rbtree: don't gc elements on insert
====================
Link: https://patch.msgid.link/20260206153048.17570-1-fw@strlen.de
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Diffstat (limited to 'tools/testing')
| -rw-r--r-- | tools/testing/selftests/net/netfilter/config | 1 | ||||
| -rwxr-xr-x | tools/testing/selftests/net/netfilter/nft_flowtable.sh | 19 | ||||
| -rwxr-xr-x | tools/testing/selftests/net/netfilter/nft_queue.sh | 142 |
3 files changed, 150 insertions, 12 deletions
diff --git a/tools/testing/selftests/net/netfilter/config b/tools/testing/selftests/net/netfilter/config index 12ce61fa15a8..979cff56e1f5 100644 --- a/tools/testing/selftests/net/netfilter/config +++ b/tools/testing/selftests/net/netfilter/config @@ -29,6 +29,7 @@ CONFIG_IP_NF_RAW=m CONFIG_IP_SCTP=m CONFIG_IPV6=y CONFIG_IPV6_MULTIPLE_TABLES=y +CONFIG_IPV6_TUNNEL=m CONFIG_IP_VS=m CONFIG_IP_VS_PROTO_TCP=y CONFIG_IP_VS_RR=m diff --git a/tools/testing/selftests/net/netfilter/nft_flowtable.sh b/tools/testing/selftests/net/netfilter/nft_flowtable.sh index 14d7f67715ed..7a34ef468975 100755 --- a/tools/testing/selftests/net/netfilter/nft_flowtable.sh +++ b/tools/testing/selftests/net/netfilter/nft_flowtable.sh @@ -601,14 +601,19 @@ ip -net "$nsr2" link set tun0 up ip -net "$nsr2" addr add 192.168.100.2/24 dev tun0 ip netns exec "$nsr2" sysctl net.ipv4.conf.tun0.forwarding=1 > /dev/null -ip -net "$nsr2" link add name tun6 type ip6tnl local fee1:2::2 remote fee1:2::1 +ip -net "$nsr2" link add name tun6 type ip6tnl local fee1:2::2 remote fee1:2::1 || ret=1 ip -net "$nsr2" link set tun6 up ip -net "$nsr2" addr add fee1:3::2/64 dev tun6 nodad ip -net "$nsr1" route change default via 192.168.100.2 ip -net "$nsr2" route change default via 192.168.100.1 -ip -6 -net "$nsr1" route change default via fee1:3::2 -ip -6 -net "$nsr2" route change default via fee1:3::1 + +# do not use "route change" and delete old default so +# socat fails to connect in case new default can't be added. +ip -6 -net "$nsr1" route delete default +ip -6 -net "$nsr1" route add default via fee1:3::2 +ip -6 -net "$nsr2" route delete default +ip -6 -net "$nsr2" route add default via fee1:3::1 ip -net "$ns2" route add default via 10.0.2.1 ip -6 -net "$ns2" route add default via dead:2::1 @@ -649,7 +654,8 @@ ip netns exec "$nsr1" nft -a insert rule inet filter forward 'meta oif tun0.10 a ip -net "$nsr1" link add name tun6.10 type ip6tnl local fee1:4::1 remote fee1:4::2 ip -net "$nsr1" link set tun6.10 up ip -net "$nsr1" addr add fee1:5::1/64 dev tun6.10 nodad -ip -6 -net "$nsr1" route change default via fee1:5::2 +ip -6 -net "$nsr1" route delete default +ip -6 -net "$nsr1" route add default via fee1:5::2 ip netns exec "$nsr1" nft -a insert rule inet filter forward 'meta oif tun6.10 accept' ip -net "$nsr2" link add link veth0 name veth0.10 type vlan id 10 @@ -664,10 +670,11 @@ ip -net "$nsr2" addr add 192.168.200.2/24 dev tun0.10 ip -net "$nsr2" route change default via 192.168.200.1 ip netns exec "$nsr2" sysctl net.ipv4.conf.tun0/10.forwarding=1 > /dev/null -ip -net "$nsr2" link add name tun6.10 type ip6tnl local fee1:4::2 remote fee1:4::1 +ip -net "$nsr2" link add name tun6.10 type ip6tnl local fee1:4::2 remote fee1:4::1 || ret=1 ip -net "$nsr2" link set tun6.10 up ip -net "$nsr2" addr add fee1:5::2/64 dev tun6.10 nodad -ip -6 -net "$nsr2" route change default via fee1:5::1 +ip -6 -net "$nsr2" route delete default +ip -6 -net "$nsr2" route add default via fee1:5::1 if ! test_tcp_forwarding_nat "$ns1" "$ns2" 1 "IPIP tunnel over vlan"; then echo "FAIL: flow offload for ns1/ns2 with IPIP tunnel over vlan" 1>&2 diff --git a/tools/testing/selftests/net/netfilter/nft_queue.sh b/tools/testing/selftests/net/netfilter/nft_queue.sh index 6136ceec45e0..139bc1211878 100755 --- a/tools/testing/selftests/net/netfilter/nft_queue.sh +++ b/tools/testing/selftests/net/netfilter/nft_queue.sh @@ -510,7 +510,7 @@ EOF udp_listener_ready() { - ss -S -N "$1" -uln -o "sport = :12345" | grep -q 12345 + ss -S -N "$1" -uln -o "sport = :$2" | grep -q "$2" } output_files_written() @@ -518,7 +518,7 @@ output_files_written() test -s "$1" && test -s "$2" } -test_udp_ct_race() +test_udp_nat_race() { ip netns exec "$nsrouter" nft -f /dev/stdin <<EOF flush ruleset @@ -545,8 +545,8 @@ EOF ip netns exec "$nsrouter" ./nf_queue -q 12 -d 1000 & local nfqpid=$! - busywait "$BUSYWAIT_TIMEOUT" udp_listener_ready "$ns2" - busywait "$BUSYWAIT_TIMEOUT" udp_listener_ready "$ns3" + busywait "$BUSYWAIT_TIMEOUT" udp_listener_ready "$ns2" 12345 + busywait "$BUSYWAIT_TIMEOUT" udp_listener_ready "$ns3" 12345 busywait "$BUSYWAIT_TIMEOUT" nf_queue_wait "$nsrouter" 12 # Send two packets, one should end up in ns1, other in ns2. @@ -557,7 +557,7 @@ EOF busywait 10000 output_files_written "$TMPFILE1" "$TMPFILE2" - kill "$nfqpid" + kill "$nfqpid" "$rpid1" "$rpid2" if ! ip netns exec "$nsrouter" bash -c 'conntrack -L -p udp --dport 12345 2>/dev/null | wc -l | grep -q "^1"'; then echo "FAIL: Expected One udp conntrack entry" @@ -585,6 +585,135 @@ EOF echo "PASS: both udp receivers got one packet each" } +# Make sure UDPGRO aggregated packets don't lose +# their skb->nfct entry when nfqueue passes the +# skb to userspace with software gso segmentation on. +test_udp_gro_ct() +{ + local errprefix="FAIL: test_udp_gro_ct:" + + ip netns exec "$nsrouter" conntrack -F 2>/dev/null + + ip netns exec "$nsrouter" nft -f /dev/stdin <<EOF +flush ruleset +table inet udpq { + # Number of packets/bytes queued to userspace + counter toqueue { } + # Number of packets/bytes reinjected from userspace with 'ct new' intact + counter fromqueue { } + # These two counters should be identical and not 0. + + chain prerouting { + type filter hook prerouting priority -300; policy accept; + + # userspace sends small packets, if < 1000, UDPGRO did + # not kick in, but test needs a 'new' conntrack with udpgro skb. + meta iifname veth0 meta l4proto udp meta length > 1000 accept + + # don't pick up non-gso packets and don't queue them to + # userspace. + notrack + } + + chain postrouting { + type filter hook postrouting priority 0; policy accept; + + # Only queue unconfirmed fraglist gro skbs to userspace. + udp dport 12346 ct status ! confirmed counter name "toqueue" mark set 1 queue num 1 + } + + chain validate { + type filter hook postrouting priority 1; policy accept; + # ... and only count those that were reinjected with the + # skb->nfct intact. + mark 1 counter name "fromqueue" + } +} +EOF + timeout 10 ip netns exec "$ns2" socat UDP-LISTEN:12346,fork,pf=ipv4 OPEN:"$TMPFILE1",trunc & + local rpid=$! + + ip netns exec "$nsrouter" ./nf_queue -G -c -q 1 -t 2 > "$TMPFILE2" & + local nfqpid=$! + + ip netns exec "$nsrouter" ethtool -K "veth0" rx-udp-gro-forwarding on rx-gro-list on generic-receive-offload on + + busywait "$BUSYWAIT_TIMEOUT" udp_listener_ready "$ns2" 12346 + busywait "$BUSYWAIT_TIMEOUT" nf_queue_wait "$nsrouter" 1 + + local bs=512 + local count=$(((32 * 1024 * 1024) / bs)) + dd if=/dev/zero bs="$bs" count="$count" 2>/dev/null | for i in $(seq 1 16); do + timeout 5 ip netns exec "$ns1" \ + socat -u -b 512 STDIN UDP-DATAGRAM:10.0.2.99:12346,reuseport,bind=0.0.0.0:55221 & + done + + busywait 10000 test -s "$TMPFILE1" + + kill "$rpid" + + wait + + local p + local b + local pqueued + local bqueued + + c=$(ip netns exec "$nsrouter" nft list counter inet udpq "toqueue" | grep packets) + read p pqueued b bqueued <<EOF +$c +EOF + local preinject + local breinject + c=$(ip netns exec "$nsrouter" nft list counter inet udpq "fromqueue" | grep packets) + read p preinject b breinject <<EOF +$c +EOF + ip netns exec "$nsrouter" ethtool -K "veth0" rx-udp-gro-forwarding off + ip netns exec "$nsrouter" ethtool -K "veth1" rx-udp-gro-forwarding off + + if [ "$pqueued" -eq 0 ];then + # happens when gro did not build at least on aggregate + echo "SKIP: No packets were queued" + return + fi + + local saw_ct_entry=0 + if ip netns exec "$nsrouter" bash -c 'conntrack -L -p udp --dport 12346 2>/dev/null | wc -l | grep -q "^1"'; then + saw_ct_entry=1 + else + echo "$errprefix Expected udp conntrack entry" + ip netns exec "$nsrouter" conntrack -L + ret=1 + fi + + if [ "$pqueued" -ge "$preinject" ] ;then + echo "$errprefix Expected software segmentation to occur, had $pqueued and $preinject" + ret=1 + return + fi + + # sw segmentation adds extra udp and ip headers. + local breinject_expect=$((preinject * (512 + 20 + 8))) + + if [ "$breinject" -eq "$breinject_expect" ]; then + if [ "$saw_ct_entry" -eq 1 ];then + echo "PASS: fraglist gro skb passed with conntrack entry" + else + echo "$errprefix fraglist gro skb passed without conntrack entry" + ret=1 + fi + else + echo "$errprefix Counter mismatch, conntrack entry dropped by nfqueue? Queued: $pqueued, $bqueued. Post-queue: $preinject, $breinject. Expected $breinject_expect" + ret=1 + fi + + if ! ip netns exec "$nsrouter" nft delete table inet udpq; then + echo "$errprefix: Could not delete udpq table" + ret=1 + fi +} + test_queue_removal() { read tainted_then < /proc/sys/kernel/tainted @@ -663,7 +792,8 @@ test_tcp_localhost_connectclose test_tcp_localhost_requeue test_sctp_forward test_sctp_output -test_udp_ct_race +test_udp_nat_race +test_udp_gro_ct # should be last, adds vrf device in ns1 and changes routes test_icmp_vrf |
