summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJason Xing <kernelxing@tencent.com>2026-01-04 09:21:25 +0800
committerPaolo Abeni <pabeni@redhat.com>2026-01-15 10:07:45 +0100
commita2cb2e23b2bcc5e376a7aa63964e04a5b059d7a1 (patch)
tree858293f26809e20f182270f851898b025ccb7746
parentcee715d907d0f93411542f19a4eb9161450e782b (diff)
xsk: move cq_cached_prod_lock to avoid touching a cacheline in sending path
We (Paolo and I) noticed that in the sending path touching an extra cacheline due to cq_cached_prod_lock will impact the performance. After moving the lock from struct xsk_buff_pool to struct xsk_queue, the performance is increased by ~5% which can be observed by xdpsock. An alternative approach [1] can be using atomic_try_cmpxchg() to have the same effect. But unfortunately I don't have evident performance numbers to prove the atomic approach is better than the current patch. The advantage is to save the contention time among multiple xsks sharing the same pool while the disadvantage is losing good maintenance. The full discussion can be found at the following link. [1]: https://lore.kernel.org/all/20251128134601.54678-1-kerneljasonxing@gmail.com/ Suggested-by: Paolo Abeni <pabeni@redhat.com> Signed-off-by: Jason Xing <kernelxing@tencent.com> Link: https://patch.msgid.link/20260104012125.44003-3-kerneljasonxing@gmail.com Acked-by: Stanislav Fomichev <sdf@fomichev.me> Signed-off-by: Paolo Abeni <pabeni@redhat.com>
-rw-r--r--include/net/xsk_buff_pool.h5
-rw-r--r--net/xdp/xsk.c8
-rw-r--r--net/xdp/xsk_buff_pool.c2
-rw-r--r--net/xdp/xsk_queue.h5
4 files changed, 10 insertions, 10 deletions
diff --git a/include/net/xsk_buff_pool.h b/include/net/xsk_buff_pool.h
index 92a2358c6ce3..0b1abdb99c9e 100644
--- a/include/net/xsk_buff_pool.h
+++ b/include/net/xsk_buff_pool.h
@@ -90,11 +90,6 @@ struct xsk_buff_pool {
* destructor callback.
*/
spinlock_t cq_prod_lock;
- /* Mutual exclusion of the completion ring in the SKB mode.
- * Protect: when sockets share a single cq when the same netdev
- * and queue id is shared.
- */
- spinlock_t cq_cached_prod_lock;
struct xdp_buff_xsk *free_heads[];
};
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index 3c52fafae47c..3b46bc635c43 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -543,9 +543,9 @@ static int xsk_cq_reserve_locked(struct xsk_buff_pool *pool)
{
int ret;
- spin_lock(&pool->cq_cached_prod_lock);
+ spin_lock(&pool->cq->cq_cached_prod_lock);
ret = xskq_prod_reserve(pool->cq);
- spin_unlock(&pool->cq_cached_prod_lock);
+ spin_unlock(&pool->cq->cq_cached_prod_lock);
return ret;
}
@@ -619,9 +619,9 @@ static void xsk_cq_submit_addr_locked(struct xsk_buff_pool *pool,
static void xsk_cq_cancel_locked(struct xsk_buff_pool *pool, u32 n)
{
- spin_lock(&pool->cq_cached_prod_lock);
+ spin_lock(&pool->cq->cq_cached_prod_lock);
xskq_prod_cancel_n(pool->cq, n);
- spin_unlock(&pool->cq_cached_prod_lock);
+ spin_unlock(&pool->cq->cq_cached_prod_lock);
}
INDIRECT_CALLABLE_SCOPE
diff --git a/net/xdp/xsk_buff_pool.c b/net/xdp/xsk_buff_pool.c
index 6bf84316e2ad..cd5125b6af53 100644
--- a/net/xdp/xsk_buff_pool.c
+++ b/net/xdp/xsk_buff_pool.c
@@ -91,7 +91,7 @@ struct xsk_buff_pool *xp_create_and_assign_umem(struct xdp_sock *xs,
INIT_LIST_HEAD(&pool->xsk_tx_list);
spin_lock_init(&pool->xsk_tx_list_lock);
spin_lock_init(&pool->cq_prod_lock);
- spin_lock_init(&pool->cq_cached_prod_lock);
+ spin_lock_init(&xs->cq_tmp->cq_cached_prod_lock);
refcount_set(&pool->users, 1);
pool->fq = xs->fq_tmp;
diff --git a/net/xdp/xsk_queue.h b/net/xdp/xsk_queue.h
index 1eb8d9f8b104..ec08d9c102b1 100644
--- a/net/xdp/xsk_queue.h
+++ b/net/xdp/xsk_queue.h
@@ -46,6 +46,11 @@ struct xsk_queue {
u64 invalid_descs;
u64 queue_empty_descs;
size_t ring_vmalloc_size;
+ /* Mutual exclusion of the completion ring in the SKB mode.
+ * Protect: when sockets share a single cq when the same netdev
+ * and queue id is shared.
+ */
+ spinlock_t cq_cached_prod_lock;
};
struct parsed_desc {