Merge branch 'net-expand-napi_skb_cache-use'

Eric Dumazet says: ==================== net: expand napi_skb_cache use This is a followup of commit e20dfbad8aab ("net: fix napi_consume_skb() with alien skbs"). Now the per-cpu napi_skb_cache is populated from TX completion path, we can make use of this cache, especially for cpus not used from a driver NAPI poll (primary user of napi_cache). With this series, I consistently reach 130 Mpps on my UDP tx stress test and reduce SLUB spinlock contention to smaller values. ==================== Link: https://patch.msgid.link/20251116202717.1542829-1-edumazet@google.com Signed-off-by: Jakub Kicinski <kuba@kernel.org>
author: Jakub Kicinski <kuba@kernel.org> 2025-11-18 18:25:48 -0800
committer: Jakub Kicinski <kuba@kernel.org> 2025-11-18 18:25:49 -0800
commit: 616d8604392e25223fc2c0043c4744d4688b4890 (patch)
tree: dc0c9b7fd978032be974648c29329e2f3b865d5c
parent: eb74ae2f87d254e54ab15429e845c2c46f8e970f (diff)
parent: 21664814b89e1268bc48e9f641b813746a7dbaae (diff)
1 files changed, 31 insertions, 17 deletions
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index f34372666e67..9feea830a4db 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -280,17 +280,18 @@ EXPORT_SYMBOL(__netdev_alloc_frag_align);
  */
 static u32 skbuff_cache_size __read_mostly;
 
-static struct sk_buff *napi_skb_cache_get(void)
+static struct sk_buff *napi_skb_cache_get(bool alloc)
 {
 	struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache);
 	struct sk_buff *skb;
 
 	local_lock_nested_bh(&napi_alloc_cache.bh_lock);
 	if (unlikely(!nc->skb_count)) {
-		nc->skb_count = kmem_cache_alloc_bulk(net_hotdata.skbuff_cache,
-						      GFP_ATOMIC | __GFP_NOWARN,
-						      NAPI_SKB_CACHE_BULK,
-						      nc->skb_cache);
+		if (alloc)
+			nc->skb_count = kmem_cache_alloc_bulk(net_hotdata.skbuff_cache,
+						GFP_ATOMIC | __GFP_NOWARN,
+						NAPI_SKB_CACHE_BULK,
+						nc->skb_cache);
 		if (unlikely(!nc->skb_count)) {
 			local_unlock_nested_bh(&napi_alloc_cache.bh_lock);
 			return NULL;
@@ -530,7 +531,7 @@ static struct sk_buff *__napi_build_skb(void *data, unsigned int frag_size)
 {
 	struct sk_buff *skb;
 
-	skb = napi_skb_cache_get();
+	skb = napi_skb_cache_get(true);
 	if (unlikely(!skb))
 		return NULL;
 
@@ -645,25 +646,38 @@ out:
 struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
 			    int flags, int node)
 {
+	struct sk_buff *skb = NULL;
 	struct kmem_cache *cache;
-	struct sk_buff *skb;
 	bool pfmemalloc;
 	u8 *data;
 
-	cache = (flags & SKB_ALLOC_FCLONE)
-		? net_hotdata.skbuff_fclone_cache : net_hotdata.skbuff_cache;
-
 	if (sk_memalloc_socks() && (flags & SKB_ALLOC_RX))
 		gfp_mask |= __GFP_MEMALLOC;
 
-	/* Get the HEAD */
-	if ((flags & (SKB_ALLOC_FCLONE | SKB_ALLOC_NAPI)) == SKB_ALLOC_NAPI &&
-	    likely(node == NUMA_NO_NODE || node == numa_mem_id()))
-		skb = napi_skb_cache_get();
-	else
+	if (flags & SKB_ALLOC_FCLONE) {
+		cache = net_hotdata.skbuff_fclone_cache;
+		goto fallback;
+	}
+	cache = net_hotdata.skbuff_cache;
+	if (unlikely(node != NUMA_NO_NODE && node != numa_mem_id()))
+		goto fallback;
+
+	if (flags & SKB_ALLOC_NAPI) {
+		skb = napi_skb_cache_get(true);
+		if (unlikely(!skb))
+			return NULL;
+	} else if (!in_hardirq() && !irqs_disabled()) {
+		local_bh_disable();
+		skb = napi_skb_cache_get(false);
+		local_bh_enable();
+	}
+
+	if (!skb) {
+fallback:
 		skb = kmem_cache_alloc_node(cache, gfp_mask & ~GFP_DMA, node);
-	if (unlikely(!skb))
-		return NULL;
+		if (unlikely(!skb))
+			return NULL;
+	}
 	prefetchw(skb);
 
 	/* We do our best to align skb_shared_info on a separate cache
author	Jakub Kicinski <kuba@kernel.org>	2025-11-18 18:25:48 -0800
committer	Jakub Kicinski <kuba@kernel.org>	2025-11-18 18:25:49 -0800
commit	616d8604392e25223fc2c0043c4744d4688b4890 (patch)
tree	dc0c9b7fd978032be974648c29329e2f3b865d5c
parent	eb74ae2f87d254e54ab15429e845c2c46f8e970f (diff)
parent	21664814b89e1268bc48e9f641b813746a7dbaae (diff)