summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJakub Kicinski <kuba@kernel.org>2026-01-10 11:13:03 -0800
committerJakub Kicinski <kuba@kernel.org>2026-01-10 11:13:03 -0800
commitcac2c363c41c12ec9ea1caefdf90f99607a531aa (patch)
tree81b1021c873444654896a7b850fe2e3cb5150f61
parent7470a7a63dc162f07c26dbf960e41ee1e248d80e (diff)
parenta0c159647e6627496a85e57ca81f8cd6c685564b (diff)
Merge branch 'virtio-net-fix-the-deadlock-when-disabling-rx-napi'
Bui Quang Minh says: ==================== virtio-net: fix the deadlock when disabling rx NAPI Calling napi_disable() on an already disabled napi can cause the deadlock. In commit 4bc12818b363 ("virtio-net: disable delayed refill when pausing rx"), to avoid the deadlock, when pausing the RX in virtnet_rx_pause[_all](), we disable and cancel the delayed refill work. However, in the virtnet_rx_resume_all(), we enable the delayed refill work too early before enabling all the receive queue napis. The deadlock can be reproduced by running selftests/drivers/net/hw/xsk_reconfig.py with multiqueue virtio-net device and inserting a cond_resched() inside the for loop in virtnet_rx_resume_all() to increase the success rate. Because the worker processing the delayed refilled work runs on the same CPU as virtnet_rx_resume_all(), a reschedule is needed to cause the deadlock. In real scenario, the contention on netdev_lock can cause the reschedule. Due to the complexity of delayed refill worker, in this series, we remove it. When we fail to refill the receive buffer, we will retry in the next NAPI poll instead. - Patch 1: removes delayed refill worker schedule and retry refill in next NAPI - Patch 2, 3: removes and clean up unused delayed refill worker code For testing, I've run the following tests with no issue so far - selftests/drivers/net/hw/xsk_reconfig.py which sets up the XDP zerocopy without providing any descriptors to the fill ring. As a result, try_fill_recv will always fail. - Send TCP packets from host to guest while guest is nearly OOM and some try_fill_recv calls fail. v2: https://lore.kernel.org/20260102152023.10773-1-minhquangbui99@gmail.com v1: https://lore.kernel.org/20251223152533.24364-1-minhquangbui99@gmail.com Link to the previous approach and discussion: https://lore.kernel.org/20251212152741.11656-1-minhquangbui99@gmail.com ==================== Link: https://patch.msgid.link/20260106150438.7425-1-minhquangbui99@gmail.com Signed-off-by: Jakub Kicinski <kuba@kernel.org>
-rw-r--r--drivers/net/virtio_net.c163
1 files changed, 34 insertions, 129 deletions
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 22d894101c01..ca92b4a1879c 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -441,9 +441,6 @@ struct virtnet_info {
/* Packet virtio header size */
u8 hdr_len;
- /* Work struct for delayed refilling if we run low on memory. */
- struct delayed_work refill;
-
/* UDP tunnel support */
bool tx_tnl;
@@ -451,12 +448,6 @@ struct virtnet_info {
bool rx_tnl_csum;
- /* Is delayed refill enabled? */
- bool refill_enabled;
-
- /* The lock to synchronize the access to refill_enabled */
- spinlock_t refill_lock;
-
/* Work struct for config space updates */
struct work_struct config_work;
@@ -720,20 +711,6 @@ static void virtnet_rq_free_buf(struct virtnet_info *vi,
put_page(virt_to_head_page(buf));
}
-static void enable_delayed_refill(struct virtnet_info *vi)
-{
- spin_lock_bh(&vi->refill_lock);
- vi->refill_enabled = true;
- spin_unlock_bh(&vi->refill_lock);
-}
-
-static void disable_delayed_refill(struct virtnet_info *vi)
-{
- spin_lock_bh(&vi->refill_lock);
- vi->refill_enabled = false;
- spin_unlock_bh(&vi->refill_lock);
-}
-
static void enable_rx_mode_work(struct virtnet_info *vi)
{
rtnl_lock();
@@ -2948,42 +2925,6 @@ static void virtnet_napi_disable(struct receive_queue *rq)
napi_disable(napi);
}
-static void refill_work(struct work_struct *work)
-{
- struct virtnet_info *vi =
- container_of(work, struct virtnet_info, refill.work);
- bool still_empty;
- int i;
-
- for (i = 0; i < vi->curr_queue_pairs; i++) {
- struct receive_queue *rq = &vi->rq[i];
-
- /*
- * When queue API support is added in the future and the call
- * below becomes napi_disable_locked, this driver will need to
- * be refactored.
- *
- * One possible solution would be to:
- * - cancel refill_work with cancel_delayed_work (note:
- * non-sync)
- * - cancel refill_work with cancel_delayed_work_sync in
- * virtnet_remove after the netdev is unregistered
- * - wrap all of the work in a lock (perhaps the netdev
- * instance lock)
- * - check netif_running() and return early to avoid a race
- */
- napi_disable(&rq->napi);
- still_empty = !try_fill_recv(vi, rq, GFP_KERNEL);
- virtnet_napi_do_enable(rq->vq, &rq->napi);
-
- /* In theory, this can happen: if we don't get any buffers in
- * we will *never* try to fill again.
- */
- if (still_empty)
- schedule_delayed_work(&vi->refill, HZ/2);
- }
-}
-
static int virtnet_receive_xsk_bufs(struct virtnet_info *vi,
struct receive_queue *rq,
int budget,
@@ -3046,16 +2987,16 @@ static int virtnet_receive(struct receive_queue *rq, int budget,
else
packets = virtnet_receive_packets(vi, rq, budget, xdp_xmit, &stats);
+ u64_stats_set(&stats.packets, packets);
if (rq->vq->num_free > min((unsigned int)budget, virtqueue_get_vring_size(rq->vq)) / 2) {
- if (!try_fill_recv(vi, rq, GFP_ATOMIC)) {
- spin_lock(&vi->refill_lock);
- if (vi->refill_enabled)
- schedule_delayed_work(&vi->refill, 0);
- spin_unlock(&vi->refill_lock);
- }
+ if (!try_fill_recv(vi, rq, GFP_ATOMIC))
+ /* We need to retry refilling in the next NAPI poll so
+ * we must return budget to make sure the NAPI is
+ * repolled.
+ */
+ packets = budget;
}
- u64_stats_set(&stats.packets, packets);
u64_stats_update_begin(&rq->stats.syncp);
for (i = 0; i < ARRAY_SIZE(virtnet_rq_stats_desc); i++) {
size_t offset = virtnet_rq_stats_desc[i].offset;
@@ -3226,13 +3167,12 @@ static int virtnet_open(struct net_device *dev)
struct virtnet_info *vi = netdev_priv(dev);
int i, err;
- enable_delayed_refill(vi);
-
for (i = 0; i < vi->max_queue_pairs; i++) {
if (i < vi->curr_queue_pairs)
- /* Make sure we have some buffers: if oom use wq. */
- if (!try_fill_recv(vi, &vi->rq[i], GFP_KERNEL))
- schedule_delayed_work(&vi->refill, 0);
+ /* Pre-fill rq agressively, to make sure we are ready to
+ * get packets immediately.
+ */
+ try_fill_recv(vi, &vi->rq[i], GFP_KERNEL);
err = virtnet_enable_queue_pair(vi, i);
if (err < 0)
@@ -3251,9 +3191,6 @@ static int virtnet_open(struct net_device *dev)
return 0;
err_enable_qp:
- disable_delayed_refill(vi);
- cancel_delayed_work_sync(&vi->refill);
-
for (i--; i >= 0; i--) {
virtnet_disable_queue_pair(vi, i);
virtnet_cancel_dim(vi, &vi->rq[i].dim);
@@ -3432,8 +3369,8 @@ static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev)
return NETDEV_TX_OK;
}
-static void __virtnet_rx_pause(struct virtnet_info *vi,
- struct receive_queue *rq)
+static void virtnet_rx_pause(struct virtnet_info *vi,
+ struct receive_queue *rq)
{
bool running = netif_running(vi->dev);
@@ -3447,62 +3384,37 @@ static void virtnet_rx_pause_all(struct virtnet_info *vi)
{
int i;
- /*
- * Make sure refill_work does not run concurrently to
- * avoid napi_disable race which leads to deadlock.
- */
- disable_delayed_refill(vi);
- cancel_delayed_work_sync(&vi->refill);
for (i = 0; i < vi->max_queue_pairs; i++)
- __virtnet_rx_pause(vi, &vi->rq[i]);
+ virtnet_rx_pause(vi, &vi->rq[i]);
}
-static void virtnet_rx_pause(struct virtnet_info *vi, struct receive_queue *rq)
+static void virtnet_rx_resume(struct virtnet_info *vi,
+ struct receive_queue *rq,
+ bool refill)
{
- /*
- * Make sure refill_work does not run concurrently to
- * avoid napi_disable race which leads to deadlock.
- */
- disable_delayed_refill(vi);
- cancel_delayed_work_sync(&vi->refill);
- __virtnet_rx_pause(vi, rq);
-}
-
-static void __virtnet_rx_resume(struct virtnet_info *vi,
- struct receive_queue *rq,
- bool refill)
-{
- bool running = netif_running(vi->dev);
- bool schedule_refill = false;
+ if (netif_running(vi->dev)) {
+ /* Pre-fill rq agressively, to make sure we are ready to get
+ * packets immediately.
+ */
+ if (refill)
+ try_fill_recv(vi, rq, GFP_KERNEL);
- if (refill && !try_fill_recv(vi, rq, GFP_KERNEL))
- schedule_refill = true;
- if (running)
virtnet_napi_enable(rq);
-
- if (schedule_refill)
- schedule_delayed_work(&vi->refill, 0);
+ }
}
static void virtnet_rx_resume_all(struct virtnet_info *vi)
{
int i;
- enable_delayed_refill(vi);
for (i = 0; i < vi->max_queue_pairs; i++) {
if (i < vi->curr_queue_pairs)
- __virtnet_rx_resume(vi, &vi->rq[i], true);
+ virtnet_rx_resume(vi, &vi->rq[i], true);
else
- __virtnet_rx_resume(vi, &vi->rq[i], false);
+ virtnet_rx_resume(vi, &vi->rq[i], false);
}
}
-static void virtnet_rx_resume(struct virtnet_info *vi, struct receive_queue *rq)
-{
- enable_delayed_refill(vi);
- __virtnet_rx_resume(vi, rq, true);
-}
-
static int virtnet_rx_resize(struct virtnet_info *vi,
struct receive_queue *rq, u32 ring_num)
{
@@ -3516,7 +3428,7 @@ static int virtnet_rx_resize(struct virtnet_info *vi,
if (err)
netdev_err(vi->dev, "resize rx fail: rx queue index: %d err: %d\n", qindex, err);
- virtnet_rx_resume(vi, rq);
+ virtnet_rx_resume(vi, rq, true);
return err;
}
@@ -3829,11 +3741,12 @@ static int virtnet_set_queues(struct virtnet_info *vi, u16 queue_pairs)
}
succ:
vi->curr_queue_pairs = queue_pairs;
- /* virtnet_open() will refill when device is going to up. */
- spin_lock_bh(&vi->refill_lock);
- if (dev->flags & IFF_UP && vi->refill_enabled)
- schedule_delayed_work(&vi->refill, 0);
- spin_unlock_bh(&vi->refill_lock);
+ if (dev->flags & IFF_UP) {
+ local_bh_disable();
+ for (int i = 0; i < vi->curr_queue_pairs; ++i)
+ virtqueue_napi_schedule(&vi->rq[i].napi, vi->rq[i].vq);
+ local_bh_enable();
+ }
return 0;
}
@@ -3843,10 +3756,6 @@ static int virtnet_close(struct net_device *dev)
struct virtnet_info *vi = netdev_priv(dev);
int i;
- /* Make sure NAPI doesn't schedule refill work */
- disable_delayed_refill(vi);
- /* Make sure refill_work doesn't re-enable napi! */
- cancel_delayed_work_sync(&vi->refill);
/* Prevent the config change callback from changing carrier
* after close
*/
@@ -5802,7 +5711,6 @@ static int virtnet_restore_up(struct virtio_device *vdev)
virtio_device_ready(vdev);
- enable_delayed_refill(vi);
enable_rx_mode_work(vi);
if (netif_running(vi->dev)) {
@@ -5892,7 +5800,7 @@ static int virtnet_rq_bind_xsk_pool(struct virtnet_info *vi, struct receive_queu
rq->xsk_pool = pool;
- virtnet_rx_resume(vi, rq);
+ virtnet_rx_resume(vi, rq, true);
if (pool)
return 0;
@@ -6559,7 +6467,6 @@ static int virtnet_alloc_queues(struct virtnet_info *vi)
if (!vi->rq)
goto err_rq;
- INIT_DELAYED_WORK(&vi->refill, refill_work);
for (i = 0; i < vi->max_queue_pairs; i++) {
vi->rq[i].pages = NULL;
netif_napi_add_config(vi->dev, &vi->rq[i].napi, virtnet_poll,
@@ -6901,7 +6808,6 @@ static int virtnet_probe(struct virtio_device *vdev)
INIT_WORK(&vi->config_work, virtnet_config_changed_work);
INIT_WORK(&vi->rx_mode_work, virtnet_rx_mode_work);
- spin_lock_init(&vi->refill_lock);
if (virtio_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF)) {
vi->mergeable_rx_bufs = true;
@@ -7165,7 +7071,6 @@ free_failover:
net_failover_destroy(vi->failover);
free_vqs:
virtio_reset_device(vdev);
- cancel_delayed_work_sync(&vi->refill);
free_receive_page_frags(vi);
virtnet_del_vqs(vi);
free: