From 58ca14ed98c87cfe0d1408cc65a9745d9e9b7a56 Mon Sep 17 00:00:00 2001 From: Magnus Karlsson Date: Fri, 12 Aug 2022 13:32:59 +0200 Subject: xsk: Fix corrupted packets for XDP_SHARED_UMEM Fix an issue in XDP_SHARED_UMEM mode together with aligned mode where packets are corrupted for the second and any further sockets bound to the same umem. In other words, this does not affect the first socket bound to the umem. The culprit for this bug is that the initialization of the DMA addresses for the pre-populated xsk buffer pool entries was not performed for any socket but the first one bound to the umem. Only the linear array of DMA addresses was populated. Fix this by populating the DMA addresses in the xsk buffer pool for every socket bound to the same umem. Fixes: 94033cd8e73b8 ("xsk: Optimize for aligned case") Reported-by: Alasdair McWilliam Reported-by: Intrusion Shield Team Signed-off-by: Magnus Karlsson Signed-off-by: Daniel Borkmann Tested-by: Alasdair McWilliam Acked-by: Maciej Fijalkowski Link: https://lore.kernel.org/xdp-newbies/6205E10C-292E-4995-9D10-409649354226@outlook.com/ Link: https://lore.kernel.org/bpf/20220812113259.531-1-magnus.karlsson@gmail.com --- net/xdp/xsk_buff_pool.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/xdp/xsk_buff_pool.c b/net/xdp/xsk_buff_pool.c index f70112176b7c..a71a8c6edf55 100644 --- a/net/xdp/xsk_buff_pool.c +++ b/net/xdp/xsk_buff_pool.c @@ -379,6 +379,16 @@ static void xp_check_dma_contiguity(struct xsk_dma_map *dma_map) static int xp_init_dma_info(struct xsk_buff_pool *pool, struct xsk_dma_map *dma_map) { + if (!pool->unaligned) { + u32 i; + + for (i = 0; i < pool->heads_cnt; i++) { + struct xdp_buff_xsk *xskb = &pool->heads[i]; + + xp_init_xskb_dma(xskb, pool, dma_map->dma_pages, xskb->orig_addr); + } + } + pool->dma_pages = kvcalloc(dma_map->dma_pages_cnt, sizeof(*pool->dma_pages), GFP_KERNEL); if (!pool->dma_pages) return -ENOMEM; @@ -428,12 +438,6 @@ int xp_dma_map(struct xsk_buff_pool *pool, struct device *dev, if (pool->unaligned) xp_check_dma_contiguity(dma_map); - else - for (i = 0; i < pool->heads_cnt; i++) { - struct xdp_buff_xsk *xskb = &pool->heads[i]; - - xp_init_xskb_dma(xskb, pool, dma_map->dma_pages, xskb->orig_addr); - } err = xp_init_dma_info(pool, dma_map); if (err) { -- cgit v1.2.3 From 583585e48d965338e73e1eb383768d16e0922d73 Mon Sep 17 00:00:00 2001 From: Liu Jian Date: Tue, 9 Aug 2022 17:49:15 +0800 Subject: skmsg: Fix wrong last sg check in sk_msg_recvmsg() Fix one kernel NULL pointer dereference as below: [ 224.462334] Call Trace: [ 224.462394] __tcp_bpf_recvmsg+0xd3/0x380 [ 224.462441] ? sock_has_perm+0x78/0xa0 [ 224.462463] tcp_bpf_recvmsg+0x12e/0x220 [ 224.462494] inet_recvmsg+0x5b/0xd0 [ 224.462534] __sys_recvfrom+0xc8/0x130 [ 224.462574] ? syscall_trace_enter+0x1df/0x2e0 [ 224.462606] ? __do_page_fault+0x2de/0x500 [ 224.462635] __x64_sys_recvfrom+0x24/0x30 [ 224.462660] do_syscall_64+0x5d/0x1d0 [ 224.462709] entry_SYSCALL_64_after_hwframe+0x65/0xca In commit 9974d37ea75f ("skmsg: Fix invalid last sg check in sk_msg_recvmsg()"), we change last sg check to sg_is_last(), but in sockmap redirection case (without stream_parser/stream_verdict/ skb_verdict), we did not mark the end of the scatterlist. Check the sk_msg_alloc, sk_msg_page_add, and bpf_msg_push_data functions, they all do not mark the end of sg. They are expected to use sg.end for end judgment. So the judgment of '(i != msg_rx->sg.end)' is added back here. Fixes: 9974d37ea75f ("skmsg: Fix invalid last sg check in sk_msg_recvmsg()") Signed-off-by: Liu Jian Signed-off-by: Daniel Borkmann Acked-by: John Fastabend Acked-by: Jakub Sitnicki Link: https://lore.kernel.org/bpf/20220809094915.150391-1-liujian56@huawei.com --- net/core/skmsg.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/core/skmsg.c b/net/core/skmsg.c index f47338d89d5d..5be4485ff07d 100644 --- a/net/core/skmsg.c +++ b/net/core/skmsg.c @@ -461,7 +461,7 @@ int sk_msg_recvmsg(struct sock *sk, struct sk_psock *psock, struct msghdr *msg, if (copied == len) break; - } while (!sg_is_last(sge)); + } while ((i != msg_rx->sg.end) && !sg_is_last(sge)); if (unlikely(peek)) { msg_rx = sk_psock_next_msg(psock, msg_rx); @@ -471,7 +471,7 @@ int sk_msg_recvmsg(struct sock *sk, struct sk_psock *psock, struct msghdr *msg, } msg_rx->sg.start = i; - if (!sge->length && sg_is_last(sge)) { + if (!sge->length && (i == msg_rx->sg.end || sg_is_last(sge))) { msg_rx = sk_psock_dequeue_msg(psock); kfree_sk_msg(msg_rx); } -- cgit v1.2.3 From 7ec9fce4b31604f8415136a4c07f7dc8ad431aec Mon Sep 17 00:00:00 2001 From: Eyal Birger Date: Thu, 18 Aug 2022 10:41:18 +0300 Subject: ip_tunnel: Respect tunnel key's "flow_flags" in IP tunnels Commit 451ef36bd229 ("ip_tunnels: Add new flow flags field to ip_tunnel_key") added a "flow_flags" member to struct ip_tunnel_key which was later used by the commit in the fixes tag to avoid dropping packets with sources that aren't locally configured when set in bpf_set_tunnel_key(). VXLAN and GENEVE were made to respect this flag, ip tunnels like IPIP and GRE were not. This commit fixes this omission by making ip_tunnel_init_flow() receive the flow flags from the tunnel key in the relevant collect_md paths. Fixes: b8fff748521c ("bpf: Set flow flag to allow any source IP in bpf_tunnel_key") Signed-off-by: Eyal Birger Signed-off-by: Daniel Borkmann Reviewed-by: Paul Chaignon Link: https://lore.kernel.org/bpf/20220818074118.726639-1-eyal.birger@gmail.com --- drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c | 3 ++- include/net/ip_tunnels.h | 4 +++- net/ipv4/ip_gre.c | 2 +- net/ipv4/ip_tunnel.c | 7 ++++--- 4 files changed, 10 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c index 39904dacf4f0..b3472fb94617 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c @@ -423,7 +423,8 @@ mlxsw_sp_span_gretap4_route(const struct net_device *to_dev, parms = mlxsw_sp_ipip_netdev_parms4(to_dev); ip_tunnel_init_flow(&fl4, parms.iph.protocol, *daddrp, *saddrp, - 0, 0, dev_net(to_dev), parms.link, tun->fwmark, 0); + 0, 0, dev_net(to_dev), parms.link, tun->fwmark, 0, + 0); rt = ip_route_output_key(tun->net, &fl4); if (IS_ERR(rt)) diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h index 63fac94f9ace..ced80e2f8b58 100644 --- a/include/net/ip_tunnels.h +++ b/include/net/ip_tunnels.h @@ -246,7 +246,8 @@ static inline void ip_tunnel_init_flow(struct flowi4 *fl4, __be32 daddr, __be32 saddr, __be32 key, __u8 tos, struct net *net, int oif, - __u32 mark, __u32 tun_inner_hash) + __u32 mark, __u32 tun_inner_hash, + __u8 flow_flags) { memset(fl4, 0, sizeof(*fl4)); @@ -263,6 +264,7 @@ static inline void ip_tunnel_init_flow(struct flowi4 *fl4, fl4->fl4_gre_key = key; fl4->flowi4_mark = mark; fl4->flowi4_multipath_hash = tun_inner_hash; + fl4->flowi4_flags = flow_flags; } int ip_tunnel_init(struct net_device *dev); diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 5c58e21f724e..f866d6282b2b 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -609,7 +609,7 @@ static int gre_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb) ip_tunnel_init_flow(&fl4, IPPROTO_GRE, key->u.ipv4.dst, key->u.ipv4.src, tunnel_id_to_key32(key->tun_id), key->tos & ~INET_ECN_MASK, dev_net(dev), 0, - skb->mark, skb_get_hash(skb)); + skb->mark, skb_get_hash(skb), key->flow_flags); rt = ip_route_output_key(dev_net(dev), &fl4); if (IS_ERR(rt)) return PTR_ERR(rt); diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index e65e948cab9f..019f3b0839c5 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -295,7 +295,7 @@ static int ip_tunnel_bind_dev(struct net_device *dev) ip_tunnel_init_flow(&fl4, iph->protocol, iph->daddr, iph->saddr, tunnel->parms.o_key, RT_TOS(iph->tos), dev_net(dev), - tunnel->parms.link, tunnel->fwmark, 0); + tunnel->parms.link, tunnel->fwmark, 0, 0); rt = ip_route_output_key(tunnel->net, &fl4); if (!IS_ERR(rt)) { @@ -570,7 +570,8 @@ void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, } ip_tunnel_init_flow(&fl4, proto, key->u.ipv4.dst, key->u.ipv4.src, tunnel_id_to_key32(key->tun_id), RT_TOS(tos), - dev_net(dev), 0, skb->mark, skb_get_hash(skb)); + dev_net(dev), 0, skb->mark, skb_get_hash(skb), + key->flow_flags); if (tunnel->encap.type != TUNNEL_ENCAP_NONE) goto tx_error; @@ -729,7 +730,7 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, ip_tunnel_init_flow(&fl4, protocol, dst, tnl_params->saddr, tunnel->parms.o_key, RT_TOS(tos), dev_net(dev), tunnel->parms.link, - tunnel->fwmark, skb_get_hash(skb)); + tunnel->fwmark, skb_get_hash(skb), 0); if (ip_tunnel_encap(skb, tunnel, &protocol, &fl4) < 0) goto tx_error; -- cgit v1.2.3