diff options
| author | Jakub Kicinski <kuba@kernel.org> | 2026-01-20 18:10:04 -0800 |
|---|---|---|
| committer | Jakub Kicinski <kuba@kernel.org> | 2026-01-20 18:10:04 -0800 |
| commit | 677a51790be9fe1c843885e6d0c613a50f1de1c0 (patch) | |
| tree | 4db0ec5119d55672689b0d7762e8cf90994e425f /include | |
| parent | 8766d61a1d33cb5f15bfdd6ce9832bbe1fc649c2 (diff) | |
| parent | d1de61db1536727c1cad049c09decff22e8b6dd7 (diff) | |
Merge tag 'net-queue-rx-buf-len-v9' of https://github.com/isilence/linux
Pavel Begunkov says:
====================
Add support for providers with large rx buffer
Many modern NICs support configurable receive buffer lengths, and zcrx and
memory providers can use buffers larger than 4K to improve performance.
When paired with hw-gro larger rx buffer sizes can drastically reduce
the number of buffers traversing the stack and save a lot of processing
time. It also allows to give to users larger contiguous chunks of data.
Single stream benchmarks showed up to ~30% CPU util improvement.
E.g. comparison for 4K vs 32K buffers using a 200Gbit NIC:
packets=23987040 (MB=2745098), rps=199559 (MB/s=22837)
CPU %usr %nice %sys %iowait %irq %soft %idle
0 1.53 0.00 27.78 2.72 1.31 66.45 0.22
packets=24078368 (MB=2755550), rps=200319 (MB/s=22924)
CPU %usr %nice %sys %iowait %irq %soft %idle
0 0.69 0.00 8.26 31.65 1.83 57.00 0.57
This series adds net infrastructure for memory providers configuring
the size and implements it for bnxt. It's an opt-in feature for drivers,
they should advertise support for the parameter in the qops and must check
if the hardware supports the given size. It's limited to memory providers
as it drastically simplifies implementation. It doesn't affect the fast
path zcrx uAPI, and the user exposed parameter is defined in zcrx terms,
which allows it to be flexible and adjusted in the future.
A liburing example can be found at [2]
full branch:
[1] https://github.com/isilence/linux.git zcrx/large-buffers-v8
Liburing example:
[2] https://github.com/isilence/liburing.git zcrx/rx-buf-len
* tag 'net-queue-rx-buf-len-v9' of https://github.com/isilence/linux:
io_uring/zcrx: document area chunking parameter
selftests: iou-zcrx: test large chunk sizes
eth: bnxt: support qcfg provided rx page size
eth: bnxt: adjust the fill level of agg queues with larger buffers
eth: bnxt: store rx buffer size per queue
net: pass queue rx page size from memory provider
net: add bare bone queue configs
net: reduce indent of struct netdev_queue_mgmt_ops members
net: memzero mp params when closing a queue
====================
Link: https://patch.msgid.link/
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Diffstat (limited to 'include')
| -rw-r--r-- | include/net/netdev_queues.h | 47 | ||||
| -rw-r--r-- | include/net/netdev_rx_queue.h | 2 | ||||
| -rw-r--r-- | include/net/page_pool/types.h | 1 |
3 files changed, 36 insertions, 14 deletions
diff --git a/include/net/netdev_queues.h b/include/net/netdev_queues.h index b55d3b9cb9c2..2ab3eae8e8c3 100644 --- a/include/net/netdev_queues.h +++ b/include/net/netdev_queues.h @@ -14,6 +14,10 @@ struct netdev_config { u8 hds_config; }; +struct netdev_queue_config { + u32 rx_page_size; +}; + /* See the netdev.yaml spec for definition of each statistic */ struct netdev_queue_stats_rx { u64 bytes; @@ -111,6 +115,11 @@ void netdev_stat_queue_sum(struct net_device *netdev, int tx_start, int tx_end, struct netdev_queue_stats_tx *tx_sum); +enum { + /* The queue checks and honours the page size qcfg parameter */ + QCFG_RX_PAGE_SIZE = 0x1, +}; + /** * struct netdev_queue_mgmt_ops - netdev ops for queue management * @@ -130,25 +139,35 @@ void netdev_stat_queue_sum(struct net_device *netdev, * @ndo_queue_get_dma_dev: Get dma device for zero-copy operations to be used * for this queue. Return NULL on error. * + * @ndo_default_qcfg: Populate queue config struct with defaults. Optional. + * + * @supported_params: Bitmask of supported parameters, see QCFG_*. + * * Note that @ndo_queue_mem_alloc and @ndo_queue_mem_free may be called while * the interface is closed. @ndo_queue_start and @ndo_queue_stop will only * be called for an interface which is open. */ struct netdev_queue_mgmt_ops { - size_t ndo_queue_mem_size; - int (*ndo_queue_mem_alloc)(struct net_device *dev, - void *per_queue_mem, - int idx); - void (*ndo_queue_mem_free)(struct net_device *dev, - void *per_queue_mem); - int (*ndo_queue_start)(struct net_device *dev, - void *per_queue_mem, - int idx); - int (*ndo_queue_stop)(struct net_device *dev, - void *per_queue_mem, - int idx); - struct device * (*ndo_queue_get_dma_dev)(struct net_device *dev, - int idx); + size_t ndo_queue_mem_size; + int (*ndo_queue_mem_alloc)(struct net_device *dev, + struct netdev_queue_config *qcfg, + void *per_queue_mem, + int idx); + void (*ndo_queue_mem_free)(struct net_device *dev, + void *per_queue_mem); + int (*ndo_queue_start)(struct net_device *dev, + struct netdev_queue_config *qcfg, + void *per_queue_mem, + int idx); + int (*ndo_queue_stop)(struct net_device *dev, + void *per_queue_mem, + int idx); + void (*ndo_default_qcfg)(struct net_device *dev, + struct netdev_queue_config *qcfg); + struct device * (*ndo_queue_get_dma_dev)(struct net_device *dev, + int idx); + + unsigned int supported_params; }; bool netif_rxq_has_unreadable_mp(struct net_device *dev, int idx); diff --git a/include/net/netdev_rx_queue.h b/include/net/netdev_rx_queue.h index 8cdcd138b33f..cfa72c485387 100644 --- a/include/net/netdev_rx_queue.h +++ b/include/net/netdev_rx_queue.h @@ -7,6 +7,7 @@ #include <linux/sysfs.h> #include <net/xdp.h> #include <net/page_pool/types.h> +#include <net/netdev_queues.h> /* This structure contains an instance of an RX queue. */ struct netdev_rx_queue { @@ -27,6 +28,7 @@ struct netdev_rx_queue { struct xsk_buff_pool *pool; #endif struct napi_struct *napi; + struct netdev_queue_config qcfg; struct pp_memory_provider_params mp_params; } ____cacheline_aligned_in_smp; diff --git a/include/net/page_pool/types.h b/include/net/page_pool/types.h index 1509a536cb85..0d453484a585 100644 --- a/include/net/page_pool/types.h +++ b/include/net/page_pool/types.h @@ -161,6 +161,7 @@ struct memory_provider_ops; struct pp_memory_provider_params { void *mp_priv; const struct memory_provider_ops *mp_ops; + u32 rx_page_size; }; struct page_pool { |
