From 4727bab4e9bbeafeff6acdfcb077a7a548cbde30 Mon Sep 17 00:00:00 2001 From: Yunsheng Lin Date: Fri, 21 Oct 2022 10:58:22 +0800 Subject: net: skb: move skb_pp_recycle() to skbuff.c skb_pp_recycle() is only used by skb_free_head() in skbuff.c, so move it to skbuff.c. Signed-off-by: Yunsheng Lin Acked-by: Ilias Apalodimas Signed-off-by: David S. Miller --- include/linux/skbuff.h | 7 ------- 1 file changed, 7 deletions(-) (limited to 'include/linux/skbuff.h') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 7be5bb4c94b6..59c9fd55699d 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -5050,12 +5050,5 @@ static inline void skb_mark_for_recycle(struct sk_buff *skb) } #endif -static inline bool skb_pp_recycle(struct sk_buff *skb, void *data) -{ - if (!IS_ENABLED(CONFIG_PAGE_POOL) || !skb->pp_recycle) - return false; - return page_pool_return_skb_page(virt_to_page(data)); -} - #endif /* __KERNEL__ */ #endif /* _LINUX_SKBUFF_H */ -- cgit v1.2.3 From 354259fa73e2aac92ae5e19522adb69a92c15b49 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 9 Nov 2022 09:57:58 +0000 Subject: net: remove skb->vlan_present skb->vlan_present seems redundant. We can instead derive it from this boolean expression: vlan_present = skb->vlan_proto != 0 || skb->vlan_tci != 0 Add a new union, to access both fields in a single load/store when possible. union { u32 vlan_all; struct { __be16 vlan_proto; __u16 vlan_tci; }; }; This allows following patch to remove a conditional test in GRO stack. Note: We move remcsum_offload to keep TC_AT_INGRESS_MASK and SKB_MONO_DELIVERY_TIME_MASK unchanged. Signed-off-by: Eric Dumazet Acked-by: Yonghong Song Acked-by: Martin KaFai Lau Signed-off-by: Jakub Kicinski --- arch/sparc/net/bpf_jit_comp_32.c | 10 +++++----- .../net/ethernet/marvell/octeontx2/nic/otx2_pf.c | 2 +- include/linux/if_vlan.h | 9 +++------ include/linux/skbuff.h | 18 ++++++++++-------- lib/test_bpf.c | 1 - net/core/filter.c | 22 ++++++++++------------ 6 files changed, 29 insertions(+), 33 deletions(-) (limited to 'include/linux/skbuff.h') diff --git a/arch/sparc/net/bpf_jit_comp_32.c b/arch/sparc/net/bpf_jit_comp_32.c index b1dbf2fa8c0a..a74e5004c6c8 100644 --- a/arch/sparc/net/bpf_jit_comp_32.c +++ b/arch/sparc/net/bpf_jit_comp_32.c @@ -555,11 +555,11 @@ void bpf_jit_compile(struct bpf_prog *fp) emit_skb_load16(vlan_tci, r_A); break; case BPF_ANC | SKF_AD_VLAN_TAG_PRESENT: - __emit_skb_load8(__pkt_vlan_present_offset, r_A); - if (PKT_VLAN_PRESENT_BIT) - emit_alu_K(SRL, PKT_VLAN_PRESENT_BIT); - if (PKT_VLAN_PRESENT_BIT < 7) - emit_andi(r_A, 1, r_A); + emit_skb_load32(vlan_all, r_A); + emit_cmpi(r_A, 0); + emit_branch_off(BE, 12); + emit_nop(); + emit_loadimm(1, r_A); break; case BPF_LD | BPF_W | BPF_LEN: emit_skb_load32(len, r_A); diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c index 303930499a4c..c1ea60bc2630 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c @@ -1973,7 +1973,7 @@ static u16 otx2_select_queue(struct net_device *netdev, struct sk_buff *skb, #endif #ifdef CONFIG_DCB - if (!skb->vlan_present) + if (!skb_vlan_tag_present(skb)) goto pick_tx; vlan_prio = skb->vlan_tci >> 13; diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index e00c4ee81ff7..6864b89ef868 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -76,7 +76,7 @@ static inline bool is_vlan_dev(const struct net_device *dev) return dev->priv_flags & IFF_802_1Q_VLAN; } -#define skb_vlan_tag_present(__skb) ((__skb)->vlan_present) +#define skb_vlan_tag_present(__skb) (!!(__skb)->vlan_all) #define skb_vlan_tag_get(__skb) ((__skb)->vlan_tci) #define skb_vlan_tag_get_id(__skb) ((__skb)->vlan_tci & VLAN_VID_MASK) #define skb_vlan_tag_get_cfi(__skb) (!!((__skb)->vlan_tci & VLAN_CFI_MASK)) @@ -471,7 +471,7 @@ static inline struct sk_buff *vlan_insert_tag_set_proto(struct sk_buff *skb, */ static inline void __vlan_hwaccel_clear_tag(struct sk_buff *skb) { - skb->vlan_present = 0; + skb->vlan_all = 0; } /** @@ -483,9 +483,7 @@ static inline void __vlan_hwaccel_clear_tag(struct sk_buff *skb) */ static inline void __vlan_hwaccel_copy_tag(struct sk_buff *dst, const struct sk_buff *src) { - dst->vlan_present = src->vlan_present; - dst->vlan_proto = src->vlan_proto; - dst->vlan_tci = src->vlan_tci; + dst->vlan_all = src->vlan_all; } /* @@ -519,7 +517,6 @@ static inline void __vlan_hwaccel_put_tag(struct sk_buff *skb, { skb->vlan_proto = vlan_proto; skb->vlan_tci = vlan_tci; - skb->vlan_present = 1; } /** diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 59c9fd55699d..4e464a27adaf 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -818,7 +818,7 @@ typedef unsigned char *sk_buff_data_t; * @mark: Generic packet mark * @reserved_tailroom: (aka @mark) number of bytes of free space available * at the tail of an sk_buff - * @vlan_present: VLAN tag is present + * @vlan_all: vlan fields (proto & tci) * @vlan_proto: vlan encapsulation protocol * @vlan_tci: vlan tag control information * @inner_protocol: Protocol (encapsulation) @@ -951,7 +951,7 @@ struct sk_buff { /* private: */ __u8 __pkt_vlan_present_offset[0]; /* public: */ - __u8 vlan_present:1; /* See PKT_VLAN_PRESENT_BIT */ + __u8 remcsum_offload:1; __u8 csum_complete_sw:1; __u8 csum_level:2; __u8 dst_pending_confirm:1; @@ -966,7 +966,6 @@ struct sk_buff { __u8 ipvs_property:1; __u8 inner_protocol_type:1; - __u8 remcsum_offload:1; #ifdef CONFIG_NET_SWITCHDEV __u8 offload_fwd_mark:1; __u8 offload_l3_fwd_mark:1; @@ -999,8 +998,13 @@ struct sk_buff { __u32 priority; int skb_iif; __u32 hash; - __be16 vlan_proto; - __u16 vlan_tci; + union { + u32 vlan_all; + struct { + __be16 vlan_proto; + __u16 vlan_tci; + }; + }; #if defined(CONFIG_NET_RX_BUSY_POLL) || defined(CONFIG_XPS) union { unsigned int napi_id; @@ -1059,15 +1063,13 @@ struct sk_buff { #endif #define PKT_TYPE_OFFSET offsetof(struct sk_buff, __pkt_type_offset) -/* if you move pkt_vlan_present, tc_at_ingress, or mono_delivery_time +/* if you move tc_at_ingress or mono_delivery_time * around, you also must adapt these constants. */ #ifdef __BIG_ENDIAN_BITFIELD -#define PKT_VLAN_PRESENT_BIT 7 #define TC_AT_INGRESS_MASK (1 << 0) #define SKB_MONO_DELIVERY_TIME_MASK (1 << 2) #else -#define PKT_VLAN_PRESENT_BIT 0 #define TC_AT_INGRESS_MASK (1 << 7) #define SKB_MONO_DELIVERY_TIME_MASK (1 << 5) #endif diff --git a/lib/test_bpf.c b/lib/test_bpf.c index 5820704165a6..ade9ac672adb 100644 --- a/lib/test_bpf.c +++ b/lib/test_bpf.c @@ -14346,7 +14346,6 @@ static struct sk_buff *populate_skb(char *buf, int size) skb->hash = SKB_HASH; skb->queue_mapping = SKB_QUEUE_MAP; skb->vlan_tci = SKB_VLAN_TCI; - skb->vlan_present = SKB_VLAN_PRESENT; skb->vlan_proto = htons(ETH_P_IP); dev_net_set(&dev, &init_net); skb->dev = &dev; diff --git a/net/core/filter.c b/net/core/filter.c index bb0136e7a8e4..358d5e70671a 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -325,11 +325,11 @@ static u32 convert_skb_access(int skb_field, int dst_reg, int src_reg, offsetof(struct sk_buff, vlan_tci)); break; case SKF_AD_VLAN_TAG_PRESENT: - *insn++ = BPF_LDX_MEM(BPF_B, dst_reg, src_reg, PKT_VLAN_PRESENT_OFFSET); - if (PKT_VLAN_PRESENT_BIT) - *insn++ = BPF_ALU32_IMM(BPF_RSH, dst_reg, PKT_VLAN_PRESENT_BIT); - if (PKT_VLAN_PRESENT_BIT < 7) - *insn++ = BPF_ALU32_IMM(BPF_AND, dst_reg, 1); + BUILD_BUG_ON(sizeof_field(struct sk_buff, vlan_all) != 4); + *insn++ = BPF_LDX_MEM(BPF_W, dst_reg, src_reg, + offsetof(struct sk_buff, vlan_all)); + *insn++ = BPF_JMP_IMM(BPF_JEQ, dst_reg, 0, 1); + *insn++ = BPF_ALU32_IMM(BPF_MOV, dst_reg, 1); break; } @@ -9290,13 +9290,11 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type, break; case offsetof(struct __sk_buff, vlan_present): - *target_size = 1; - *insn++ = BPF_LDX_MEM(BPF_B, si->dst_reg, si->src_reg, - PKT_VLAN_PRESENT_OFFSET); - if (PKT_VLAN_PRESENT_BIT) - *insn++ = BPF_ALU32_IMM(BPF_RSH, si->dst_reg, PKT_VLAN_PRESENT_BIT); - if (PKT_VLAN_PRESENT_BIT < 7) - *insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, 1); + *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg, + bpf_target_off(struct sk_buff, + vlan_all, 4, target_size)); + *insn++ = BPF_JMP_IMM(BPF_JEQ, si->dst_reg, 0, 1); + *insn++ = BPF_ALU32_IMM(BPF_MOV, si->dst_reg, 1); break; case offsetof(struct __sk_buff, vlan_tci): -- cgit v1.2.3 From ce098da1497c6dee9589fce2c61d1910f4fcf0e7 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 7 Dec 2022 22:02:59 -0800 Subject: skbuff: Introduce slab_build_skb() syzkaller reported: BUG: KASAN: slab-out-of-bounds in __build_skb_around+0x235/0x340 net/core/skbuff.c:294 Write of size 32 at addr ffff88802aa172c0 by task syz-executor413/5295 For bpf_prog_test_run_skb(), which uses a kmalloc()ed buffer passed to build_skb(). When build_skb() is passed a frag_size of 0, it means the buffer came from kmalloc. In these cases, ksize() is used to find its actual size, but since the allocation may not have been made to that size, actually perform the krealloc() call so that all the associated buffer size checking will be correctly notified (and use the "new" pointer so that compiler hinting works correctly). Split this logic out into a new interface, slab_build_skb(), but leave the original 0 checking for now to catch any stragglers. Reported-by: syzbot+fda18eaa8c12534ccb3b@syzkaller.appspotmail.com Link: https://groups.google.com/g/syzkaller-bugs/c/UnIKxTtU5-0/m/-wbXinkgAQAJ Fixes: 38931d8989b5 ("mm: Make ksize() a reporting-only function") Cc: Pavel Begunkov Cc: pepsipu Cc: syzbot+fda18eaa8c12534ccb3b@syzkaller.appspotmail.com Cc: Vlastimil Babka Cc: kasan-dev Cc: Andrii Nakryiko Cc: ast@kernel.org Cc: Daniel Borkmann Cc: Hao Luo Cc: Jesper Dangaard Brouer Cc: John Fastabend Cc: jolsa@kernel.org Cc: KP Singh Cc: martin.lau@linux.dev Cc: Stanislav Fomichev Cc: song@kernel.org Cc: Yonghong Song Signed-off-by: Kees Cook Link: https://lore.kernel.org/r/20221208060256.give.994-kees@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnx2.c | 2 +- drivers/net/ethernet/qlogic/qed/qed_ll2.c | 2 +- include/linux/skbuff.h | 1 + net/bpf/test_run.c | 2 +- net/core/skbuff.c | 70 +++++++++++++++++++++++++++---- 5 files changed, 66 insertions(+), 11 deletions(-) (limited to 'include/linux/skbuff.h') diff --git a/drivers/net/ethernet/broadcom/bnx2.c b/drivers/net/ethernet/broadcom/bnx2.c index dbe310144780..9f473854b0f4 100644 --- a/drivers/net/ethernet/broadcom/bnx2.c +++ b/drivers/net/ethernet/broadcom/bnx2.c @@ -3045,7 +3045,7 @@ error: dma_unmap_single(&bp->pdev->dev, dma_addr, bp->rx_buf_use_size, DMA_FROM_DEVICE); - skb = build_skb(data, 0); + skb = slab_build_skb(data); if (!skb) { kfree(data); goto error; diff --git a/drivers/net/ethernet/qlogic/qed/qed_ll2.c b/drivers/net/ethernet/qlogic/qed/qed_ll2.c index ed274f033626..e5116a86cfbc 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_ll2.c +++ b/drivers/net/ethernet/qlogic/qed/qed_ll2.c @@ -200,7 +200,7 @@ static void qed_ll2b_complete_rx_packet(void *cxt, dma_unmap_single(&cdev->pdev->dev, buffer->phys_addr, cdev->ll2->rx_size, DMA_FROM_DEVICE); - skb = build_skb(buffer->data, 0); + skb = slab_build_skb(buffer->data); if (!skb) { DP_INFO(cdev, "Failed to build SKB\n"); kfree(buffer->data); diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 4e464a27adaf..4c8492401a10 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1255,6 +1255,7 @@ struct sk_buff *build_skb_around(struct sk_buff *skb, void skb_attempt_defer_free(struct sk_buff *skb); struct sk_buff *napi_build_skb(void *data, unsigned int frag_size); +struct sk_buff *slab_build_skb(void *data); /** * alloc_skb - allocate a network buffer diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c index 6094ef7cffcd..c9bfd263dcef 100644 --- a/net/bpf/test_run.c +++ b/net/bpf/test_run.c @@ -1128,7 +1128,7 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr, } sock_init_data(NULL, sk); - skb = build_skb(data, 0); + skb = slab_build_skb(data); if (!skb) { kfree(data); kfree(ctx); diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 4bf95e36ed16..3cbba7099c0f 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -270,12 +270,10 @@ static struct sk_buff *napi_skb_cache_get(void) return skb; } -/* Caller must provide SKB that is memset cleared */ -static void __build_skb_around(struct sk_buff *skb, void *data, - unsigned int frag_size) +static inline void __finalize_skb_around(struct sk_buff *skb, void *data, + unsigned int size) { struct skb_shared_info *shinfo; - unsigned int size = frag_size ? : ksize(data); size -= SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); @@ -297,15 +295,71 @@ static void __build_skb_around(struct sk_buff *skb, void *data, skb_set_kcov_handle(skb, kcov_common_handle()); } +static inline void *__slab_build_skb(struct sk_buff *skb, void *data, + unsigned int *size) +{ + void *resized; + + /* Must find the allocation size (and grow it to match). */ + *size = ksize(data); + /* krealloc() will immediately return "data" when + * "ksize(data)" is requested: it is the existing upper + * bounds. As a result, GFP_ATOMIC will be ignored. Note + * that this "new" pointer needs to be passed back to the + * caller for use so the __alloc_size hinting will be + * tracked correctly. + */ + resized = krealloc(data, *size, GFP_ATOMIC); + WARN_ON_ONCE(resized != data); + return resized; +} + +/* build_skb() variant which can operate on slab buffers. + * Note that this should be used sparingly as slab buffers + * cannot be combined efficiently by GRO! + */ +struct sk_buff *slab_build_skb(void *data) +{ + struct sk_buff *skb; + unsigned int size; + + skb = kmem_cache_alloc(skbuff_head_cache, GFP_ATOMIC); + if (unlikely(!skb)) + return NULL; + + memset(skb, 0, offsetof(struct sk_buff, tail)); + data = __slab_build_skb(skb, data, &size); + __finalize_skb_around(skb, data, size); + + return skb; +} +EXPORT_SYMBOL(slab_build_skb); + +/* Caller must provide SKB that is memset cleared */ +static void __build_skb_around(struct sk_buff *skb, void *data, + unsigned int frag_size) +{ + unsigned int size = frag_size; + + /* frag_size == 0 is considered deprecated now. Callers + * using slab buffer should use slab_build_skb() instead. + */ + if (WARN_ONCE(size == 0, "Use slab_build_skb() instead")) + data = __slab_build_skb(skb, data, &size); + + __finalize_skb_around(skb, data, size); +} + /** * __build_skb - build a network buffer * @data: data buffer provided by caller - * @frag_size: size of data, or 0 if head was kmalloced + * @frag_size: size of data (must not be 0) * * Allocate a new &sk_buff. Caller provides space holding head and - * skb_shared_info. @data must have been allocated by kmalloc() only if - * @frag_size is 0, otherwise data should come from the page allocator - * or vmalloc() + * skb_shared_info. @data must have been allocated from the page + * allocator or vmalloc(). (A @frag_size of 0 to indicate a kmalloc() + * allocation is deprecated, and callers should use slab_build_skb() + * instead.) * The return is the new skb buffer. * On a failure the return is %NULL, and @data is not freed. * Notes : -- cgit v1.2.3