summaryrefslogtreecommitdiff
path: root/net/core/skbuff.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/core/skbuff.c')
-rw-r--r--net/core/skbuff.c166
1 files changed, 113 insertions, 53 deletions
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 61746c2b95f6..699c401a5eae 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -78,6 +78,7 @@
#include <net/mpls.h>
#include <net/mptcp.h>
#include <net/mctp.h>
+#include <net/can.h>
#include <net/page_pool/helpers.h>
#include <net/psp/types.h>
#include <net/dropreason.h>
@@ -280,7 +281,7 @@ EXPORT_SYMBOL(__netdev_alloc_frag_align);
*/
static u32 skbuff_cache_size __read_mostly;
-static struct sk_buff *napi_skb_cache_get(bool alloc)
+static inline struct sk_buff *napi_skb_cache_get(bool alloc)
{
struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache);
struct sk_buff *skb;
@@ -307,6 +308,23 @@ static struct sk_buff *napi_skb_cache_get(bool alloc)
return skb;
}
+/*
+ * Only clear those fields we need to clear, not those that we will
+ * actually initialise later. Hence, don't put any more fields after
+ * the tail pointer in struct sk_buff!
+ */
+static inline void skbuff_clear(struct sk_buff *skb)
+{
+ /* Replace memset(skb, 0, offsetof(struct sk_buff, tail))
+ * with two smaller memset(), with a barrier() between them.
+ * This forces the compiler to inline both calls.
+ */
+ BUILD_BUG_ON(offsetof(struct sk_buff, tail) <= 128);
+ memset(skb, 0, 128);
+ barrier();
+ memset((void *)skb + 128, 0, offsetof(struct sk_buff, tail) - 128);
+}
+
/**
* napi_skb_cache_get_bulk - obtain a number of zeroed skb heads from the cache
* @skbs: pointer to an at least @n-sized array to fill with skb pointers
@@ -357,7 +375,7 @@ get:
skbs[i] = nc->skb_cache[base + i];
kasan_mempool_unpoison_object(skbs[i], skbuff_cache_size);
- memset(skbs[i], 0, offsetof(struct sk_buff, tail));
+ skbuff_clear(skbs[i]);
}
nc->skb_count -= n;
@@ -424,7 +442,7 @@ struct sk_buff *slab_build_skb(void *data)
if (unlikely(!skb))
return NULL;
- memset(skb, 0, offsetof(struct sk_buff, tail));
+ skbuff_clear(skb);
data = __slab_build_skb(data, &size);
__finalize_skb_around(skb, data, size);
@@ -476,7 +494,7 @@ struct sk_buff *__build_skb(void *data, unsigned int frag_size)
if (unlikely(!skb))
return NULL;
- memset(skb, 0, offsetof(struct sk_buff, tail));
+ skbuff_clear(skb);
__build_skb_around(skb, data, frag_size);
return skb;
@@ -537,7 +555,7 @@ static struct sk_buff *__napi_build_skb(void *data, unsigned int frag_size)
if (unlikely(!skb))
return NULL;
- memset(skb, 0, offsetof(struct sk_buff, tail));
+ skbuff_clear(skb);
__build_skb_around(skb, data, frag_size);
return skb;
@@ -566,6 +584,16 @@ struct sk_buff *napi_build_skb(void *data, unsigned int frag_size)
}
EXPORT_SYMBOL(napi_build_skb);
+static void *kmalloc_pfmemalloc(size_t obj_size, gfp_t flags, int node)
+{
+ if (!gfp_pfmemalloc_allowed(flags))
+ return NULL;
+ if (!obj_size)
+ return kmem_cache_alloc_node(net_hotdata.skb_small_head_cache,
+ flags, node);
+ return kmalloc_node_track_caller(obj_size, flags, node);
+}
+
/*
* kmalloc_reserve is a wrapper around kmalloc_node_track_caller that tells
* the caller if emergency pfmemalloc reserves are being used. If it is and
@@ -574,9 +602,8 @@ EXPORT_SYMBOL(napi_build_skb);
* memory is free
*/
static void *kmalloc_reserve(unsigned int *size, gfp_t flags, int node,
- bool *pfmemalloc)
+ struct sk_buff *skb)
{
- bool ret_pfmemalloc = false;
size_t obj_size;
void *obj;
@@ -587,12 +614,12 @@ static void *kmalloc_reserve(unsigned int *size, gfp_t flags, int node,
flags | __GFP_NOMEMALLOC | __GFP_NOWARN,
node);
*size = SKB_SMALL_HEAD_CACHE_SIZE;
- if (obj || !(gfp_pfmemalloc_allowed(flags)))
+ if (likely(obj))
goto out;
/* Try again but now we are using pfmemalloc reserves */
- ret_pfmemalloc = true;
- obj = kmem_cache_alloc_node(net_hotdata.skb_small_head_cache, flags, node);
- goto out;
+ if (skb)
+ skb->pfmemalloc = true;
+ return kmalloc_pfmemalloc(0, flags, node);
}
obj_size = kmalloc_size_roundup(obj_size);
@@ -608,17 +635,14 @@ static void *kmalloc_reserve(unsigned int *size, gfp_t flags, int node,
obj = kmalloc_node_track_caller(obj_size,
flags | __GFP_NOMEMALLOC | __GFP_NOWARN,
node);
- if (obj || !(gfp_pfmemalloc_allowed(flags)))
+ if (likely(obj))
goto out;
/* Try again but now we are using pfmemalloc reserves */
- ret_pfmemalloc = true;
- obj = kmalloc_node_track_caller(obj_size, flags, node);
-
+ if (skb)
+ skb->pfmemalloc = true;
+ obj = kmalloc_pfmemalloc(obj_size, flags, node);
out:
- if (pfmemalloc)
- *pfmemalloc = ret_pfmemalloc;
-
return obj;
}
@@ -650,7 +674,6 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
{
struct sk_buff *skb = NULL;
struct kmem_cache *cache;
- bool pfmemalloc;
u8 *data;
if (sk_memalloc_socks() && (flags & SKB_ALLOC_RX))
@@ -680,37 +703,35 @@ fallback:
if (unlikely(!skb))
return NULL;
}
- prefetchw(skb);
+ skbuff_clear(skb);
/* We do our best to align skb_shared_info on a separate cache
* line. It usually works because kmalloc(X > SMP_CACHE_BYTES) gives
* aligned memory blocks, unless SLUB/SLAB debug is enabled.
* Both skb->head and skb_shared_info are cache line aligned.
*/
- data = kmalloc_reserve(&size, gfp_mask, node, &pfmemalloc);
+ data = kmalloc_reserve(&size, gfp_mask, node, skb);
if (unlikely(!data))
goto nodata;
/* kmalloc_size_roundup() might give us more room than requested.
* Put skb_shared_info exactly at the end of allocated zone,
* to allow max possible filling before reallocation.
*/
- prefetchw(data + SKB_WITH_OVERHEAD(size));
-
- /*
- * Only clear those fields we need to clear, not those that we will
- * actually initialise below. Hence, don't put any more fields after
- * the tail pointer in struct sk_buff!
- */
- memset(skb, 0, offsetof(struct sk_buff, tail));
- __build_skb_around(skb, data, size);
- skb->pfmemalloc = pfmemalloc;
+ __finalize_skb_around(skb, data, size);
if (flags & SKB_ALLOC_FCLONE) {
struct sk_buff_fclones *fclones;
fclones = container_of(skb, struct sk_buff_fclones, skb1);
- skb->fclone = SKB_FCLONE_ORIG;
+ /* skb->fclone is a 2bits field.
+ * Replace expensive RMW (skb->fclone = SKB_FCLONE_ORIG)
+ * with a single OR.
+ */
+ BUILD_BUG_ON(SKB_FCLONE_UNAVAILABLE != 0);
+ DEBUG_NET_WARN_ON_ONCE(skb->fclone != SKB_FCLONE_UNAVAILABLE);
+ skb->fclone |= SKB_FCLONE_ORIG;
+
refcount_set(&fclones->fclone_ref, 1);
}
@@ -1488,9 +1509,20 @@ void napi_skb_free_stolen_head(struct sk_buff *skb)
napi_skb_cache_put(skb);
}
+/**
+ * napi_consume_skb() - consume skb in NAPI context, try to feed skb cache
+ * @skb: buffer to free
+ * @budget: NAPI budget
+ *
+ * Non-zero @budget must come from the @budget argument passed by the core
+ * to a NAPI poll function. Note that core may pass budget of 0 to NAPI poll
+ * for example when polling for netpoll / netconsole.
+ *
+ * Passing @budget of 0 is safe from any context, it turns this function
+ * into dev_consume_skb_any().
+ */
void napi_consume_skb(struct sk_buff *skb, int budget)
{
- /* Zero budget indicate non-NAPI context called us, like netpoll */
if (unlikely(!budget || !skb)) {
dev_consume_skb_any(skb);
return;
@@ -5108,6 +5140,9 @@ static const u8 skb_ext_type_len[] = {
#if IS_ENABLED(CONFIG_INET_PSP)
[SKB_EXT_PSP] = SKB_EXT_CHUNKSIZEOF(struct psp_skb_ext),
#endif
+#if IS_ENABLED(CONFIG_CAN)
+ [SKB_EXT_CAN] = SKB_EXT_CHUNKSIZEOF(struct can_skb_ext),
+#endif
};
static __always_inline unsigned int skb_ext_total_length(void)
@@ -5123,7 +5158,7 @@ static __always_inline unsigned int skb_ext_total_length(void)
static void skb_extensions_init(void)
{
- BUILD_BUG_ON(SKB_EXT_NUM >= 8);
+ BUILD_BUG_ON(SKB_EXT_NUM > 8);
#if !IS_ENABLED(CONFIG_KCOV_INSTRUMENT_ALL)
BUILD_BUG_ON(skb_ext_total_length() > 255);
#endif
@@ -7392,31 +7427,56 @@ bool csum_and_copy_from_iter_full(void *addr, size_t bytes,
}
EXPORT_SYMBOL(csum_and_copy_from_iter_full);
-void get_netmem(netmem_ref netmem)
+void __get_netmem(netmem_ref netmem)
{
- struct net_iov *niov;
+ struct net_iov *niov = netmem_to_net_iov(netmem);
- if (netmem_is_net_iov(netmem)) {
- niov = netmem_to_net_iov(netmem);
- if (net_is_devmem_iov(niov))
- net_devmem_get_net_iov(netmem_to_net_iov(netmem));
- return;
- }
- get_page(netmem_to_page(netmem));
+ if (net_is_devmem_iov(niov))
+ net_devmem_get_net_iov(netmem_to_net_iov(netmem));
}
-EXPORT_SYMBOL(get_netmem);
+EXPORT_SYMBOL(__get_netmem);
-void put_netmem(netmem_ref netmem)
+void __put_netmem(netmem_ref netmem)
{
- struct net_iov *niov;
+ struct net_iov *niov = netmem_to_net_iov(netmem);
- if (netmem_is_net_iov(netmem)) {
- niov = netmem_to_net_iov(netmem);
- if (net_is_devmem_iov(niov))
- net_devmem_put_net_iov(netmem_to_net_iov(netmem));
- return;
+ if (net_is_devmem_iov(niov))
+ net_devmem_put_net_iov(netmem_to_net_iov(netmem));
+}
+EXPORT_SYMBOL(__put_netmem);
+
+struct vlan_type_depth __vlan_get_protocol_offset(const struct sk_buff *skb,
+ __be16 type,
+ int mac_offset)
+{
+ unsigned int vlan_depth = skb->mac_len, parse_depth = VLAN_MAX_DEPTH;
+
+ /* if type is 802.1Q/AD then the header should already be
+ * present at mac_len - VLAN_HLEN (if mac_len > 0), or at
+ * ETH_HLEN otherwise
+ */
+ if (vlan_depth) {
+ if (WARN_ON_ONCE(vlan_depth < VLAN_HLEN))
+ return (struct vlan_type_depth) { 0 };
+ vlan_depth -= VLAN_HLEN;
+ } else {
+ vlan_depth = ETH_HLEN;
}
+ do {
+ struct vlan_hdr vhdr, *vh;
+
+ vh = skb_header_pointer(skb, mac_offset + vlan_depth,
+ sizeof(vhdr), &vhdr);
+ if (unlikely(!vh || !--parse_depth))
+ return (struct vlan_type_depth) { 0 };
- put_page(netmem_to_page(netmem));
+ type = vh->h_vlan_encapsulated_proto;
+ vlan_depth += VLAN_HLEN;
+ } while (eth_type_vlan(type));
+
+ return (struct vlan_type_depth) {
+ .type = type,
+ .depth = vlan_depth
+ };
}
-EXPORT_SYMBOL(put_netmem);
+EXPORT_SYMBOL(__vlan_get_protocol_offset);