From 45e359be1ce88fb22e61fa3aa23b2e450a6cae03 Mon Sep 17 00:00:00 2001 From: Jason Xing Date: Sat, 5 Jul 2025 00:01:38 +0800 Subject: net: xsk: introduce XDP_MAX_TX_SKB_BUDGET setsockopt This patch provides a setsockopt method to let applications leverage to adjust how many descs to be handled at most in one send syscall. It mitigates the situation where the default value (32) that is too small leads to higher frequency of triggering send syscall. Considering the prosperity/complexity the applications have, there is no absolutely ideal suggestion fitting all cases. So keep 32 as its default value like before. The patch does the following things: - Add XDP_MAX_TX_SKB_BUDGET socket option. - Set max_tx_budget to 32 by default in the initialization phase as a per-socket granular control. - Set the range of max_tx_budget as [32, xs->tx->nentries]. The idea behind this comes out of real workloads in production. We use a user-level stack with xsk support to accelerate sending packets and minimize triggering syscalls. When the packets are aggregated, it's not hard to hit the upper bound (namely, 32). The moment user-space stack fetches the -EAGAIN error number passed from sendto(), it will loop to try again until all the expected descs from tx ring are sent out to the driver. Enlarging the XDP_MAX_TX_SKB_BUDGET value contributes to less frequency of sendto() and higher throughput/PPS. Here is what I did in production, along with some numbers as follows: For one application I saw lately, I suggested using 128 as max_tx_budget because I saw two limitations without changing any default configuration: 1) XDP_MAX_TX_SKB_BUDGET, 2) socket sndbuf which is 212992 decided by net.core.wmem_default. As to XDP_MAX_TX_SKB_BUDGET, the scenario behind this was I counted how many descs are transmitted to the driver at one time of sendto() based on [1] patch and then I calculated the possibility of hitting the upper bound. Finally I chose 128 as a suitable value because 1) it covers most of the cases, 2) a higher number would not bring evident results. After twisting the parameters, a stable improvement of around 4% for both PPS and throughput and less resources consumption were found to be observed by strace -c -p xxx: 1) %time was decreased by 7.8% 2) error counter was decreased from 18367 to 572 [1]: https://lore.kernel.org/all/20250619093641.70700-1-kerneljasonxing@gmail.com/ Signed-off-by: Jason Xing Acked-by: Maciej Fijalkowski Link: https://patch.msgid.link/20250704160138.48677-1-kerneljasonxing@gmail.com Signed-off-by: Paolo Abeni --- tools/include/uapi/linux/if_xdp.h | 1 + 1 file changed, 1 insertion(+) (limited to 'tools/include/uapi') diff --git a/tools/include/uapi/linux/if_xdp.h b/tools/include/uapi/linux/if_xdp.h index 44f2bb93e7e6..23a062781468 100644 --- a/tools/include/uapi/linux/if_xdp.h +++ b/tools/include/uapi/linux/if_xdp.h @@ -79,6 +79,7 @@ struct xdp_mmap_offsets { #define XDP_UMEM_COMPLETION_RING 6 #define XDP_STATISTICS 7 #define XDP_OPTIONS 8 +#define XDP_MAX_TX_SKB_BUDGET 9 struct xdp_umem_reg { __u64 addr; /* Start of packet data area */ -- cgit v1.2.3 From 2677010e7793451c20d895c477c4dc76f6e6a10e Mon Sep 17 00:00:00 2001 From: Samiullah Khawaja Date: Thu, 10 Jul 2025 21:12:03 +0000 Subject: Add support to set NAPI threaded for individual NAPI A net device has a threaded sysctl that can be used to enable threaded NAPI polling on all of the NAPI contexts under that device. Allow enabling threaded NAPI polling at individual NAPI level using netlink. Extend the netlink operation `napi-set` and allow setting the threaded attribute of a NAPI. This will enable the threaded polling on a NAPI context. Add a test in `nl_netdev.py` that verifies various cases of threaded NAPI being set at NAPI and at device level. Tested ./tools/testing/selftests/net/nl_netdev.py TAP version 13 1..7 ok 1 nl_netdev.empty_check ok 2 nl_netdev.lo_check ok 3 nl_netdev.page_pool_check ok 4 nl_netdev.napi_list_check ok 5 nl_netdev.dev_set_threaded ok 6 nl_netdev.napi_set_threaded ok 7 nl_netdev.nsim_rxq_reset_down # Totals: pass:7 fail:0 xfail:0 xpass:0 skip:0 error:0 Signed-off-by: Samiullah Khawaja Reviewed-by: Willem de Bruijn Link: https://patch.msgid.link/20250710211203.3979655-1-skhawaja@google.com Signed-off-by: Jakub Kicinski --- Documentation/netlink/specs/netdev.yaml | 10 ++++ Documentation/networking/napi.rst | 9 +++- include/linux/netdevice.h | 1 + include/uapi/linux/netdev.h | 1 + net/core/dev.c | 30 +++++++++-- net/core/dev.h | 7 +++ net/core/netdev-genl-gen.c | 5 +- net/core/netdev-genl.c | 14 +++++ tools/include/uapi/linux/netdev.h | 1 + tools/testing/selftests/net/nl_netdev.py | 91 +++++++++++++++++++++++++++++++- 10 files changed, 162 insertions(+), 7 deletions(-) (limited to 'tools/include/uapi') diff --git a/Documentation/netlink/specs/netdev.yaml b/Documentation/netlink/specs/netdev.yaml index ce4cfec82100..85d0ea6ac426 100644 --- a/Documentation/netlink/specs/netdev.yaml +++ b/Documentation/netlink/specs/netdev.yaml @@ -283,6 +283,14 @@ attribute-sets: doc: The timeout, in nanoseconds, of how long to suspend irq processing, if event polling finds events type: uint + - + name: threaded + doc: Whether the NAPI is configured to operate in threaded polling + mode. If this is set to 1 then the NAPI context operates in + threaded polling mode. + type: uint + checks: + max: 1 - name: xsk-info attributes: [] @@ -694,6 +702,7 @@ operations: - defer-hard-irqs - gro-flush-timeout - irq-suspend-timeout + - threaded dump: request: attributes: @@ -746,6 +755,7 @@ operations: - defer-hard-irqs - gro-flush-timeout - irq-suspend-timeout + - threaded - name: bind-tx doc: Bind dmabuf to netdev for TX diff --git a/Documentation/networking/napi.rst b/Documentation/networking/napi.rst index d0e3953cae6a..a15754adb041 100644 --- a/Documentation/networking/napi.rst +++ b/Documentation/networking/napi.rst @@ -444,7 +444,14 @@ dependent). The NAPI instance IDs will be assigned in the opposite order than the process IDs of the kernel threads. Threaded NAPI is controlled by writing 0/1 to the ``threaded`` file in -netdev's sysfs directory. +netdev's sysfs directory. It can also be enabled for a specific NAPI using +netlink interface. + +For example, using the script: + +.. code-block:: bash + + $ ynl --family netdev --do napi-set --json='{"id": 66, "threaded": 1}' .. rubric:: Footnotes diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index ec23cee5245d..e49d8c98d284 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -369,6 +369,7 @@ struct napi_config { u64 irq_suspend_timeout; u32 defer_hard_irqs; cpumask_t affinity_mask; + bool threaded; unsigned int napi_id; }; diff --git a/include/uapi/linux/netdev.h b/include/uapi/linux/netdev.h index 7eb9571786b8..1f3719a9a0eb 100644 --- a/include/uapi/linux/netdev.h +++ b/include/uapi/linux/netdev.h @@ -134,6 +134,7 @@ enum { NETDEV_A_NAPI_DEFER_HARD_IRQS, NETDEV_A_NAPI_GRO_FLUSH_TIMEOUT, NETDEV_A_NAPI_IRQ_SUSPEND_TIMEOUT, + NETDEV_A_NAPI_THREADED, __NETDEV_A_NAPI_MAX, NETDEV_A_NAPI_MAX = (__NETDEV_A_NAPI_MAX - 1) diff --git a/net/core/dev.c b/net/core/dev.c index 19ddc3e6990a..621a639aeba1 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -6961,6 +6961,31 @@ static void napi_stop_kthread(struct napi_struct *napi) napi->thread = NULL; } +int napi_set_threaded(struct napi_struct *napi, bool threaded) +{ + if (threaded) { + if (!napi->thread) { + int err = napi_kthread_create(napi); + + if (err) + return err; + } + } + + if (napi->config) + napi->config->threaded = threaded; + + if (!threaded && napi->thread) { + napi_stop_kthread(napi); + } else { + /* Make sure kthread is created before THREADED bit is set. */ + smp_mb__before_atomic(); + assign_bit(NAPI_STATE_THREADED, &napi->state, threaded); + } + + return 0; +} + int dev_set_threaded(struct net_device *dev, bool threaded) { struct napi_struct *napi; @@ -6968,9 +6993,6 @@ int dev_set_threaded(struct net_device *dev, bool threaded) netdev_assert_locked_or_invisible(dev); - if (dev->threaded == threaded) - return 0; - if (threaded) { list_for_each_entry(napi, &dev->napi_list, dev_list) { if (!napi->thread) { @@ -7221,6 +7243,8 @@ static void napi_restore_config(struct napi_struct *n) napi_hash_add(n); n->config->napi_id = n->napi_id; } + + WARN_ON_ONCE(napi_set_threaded(n, n->config->threaded)); } static void napi_save_config(struct napi_struct *n) diff --git a/net/core/dev.h b/net/core/dev.h index e93f36b7ddf3..a603387fb566 100644 --- a/net/core/dev.h +++ b/net/core/dev.h @@ -315,6 +315,13 @@ static inline void napi_set_irq_suspend_timeout(struct napi_struct *n, WRITE_ONCE(n->irq_suspend_timeout, timeout); } +static inline bool napi_get_threaded(struct napi_struct *n) +{ + return test_bit(NAPI_STATE_THREADED, &n->state); +} + +int napi_set_threaded(struct napi_struct *n, bool threaded); + int rps_cpumask_housekeeping(struct cpumask *mask); #if defined(CONFIG_DEBUG_NET) && defined(CONFIG_BPF_SYSCALL) diff --git a/net/core/netdev-genl-gen.c b/net/core/netdev-genl-gen.c index 4fc44587f493..0994bd68a7e6 100644 --- a/net/core/netdev-genl-gen.c +++ b/net/core/netdev-genl-gen.c @@ -92,11 +92,12 @@ static const struct nla_policy netdev_bind_rx_nl_policy[NETDEV_A_DMABUF_FD + 1] }; /* NETDEV_CMD_NAPI_SET - do */ -static const struct nla_policy netdev_napi_set_nl_policy[NETDEV_A_NAPI_IRQ_SUSPEND_TIMEOUT + 1] = { +static const struct nla_policy netdev_napi_set_nl_policy[NETDEV_A_NAPI_THREADED + 1] = { [NETDEV_A_NAPI_ID] = { .type = NLA_U32, }, [NETDEV_A_NAPI_DEFER_HARD_IRQS] = NLA_POLICY_FULL_RANGE(NLA_U32, &netdev_a_napi_defer_hard_irqs_range), [NETDEV_A_NAPI_GRO_FLUSH_TIMEOUT] = { .type = NLA_UINT, }, [NETDEV_A_NAPI_IRQ_SUSPEND_TIMEOUT] = { .type = NLA_UINT, }, + [NETDEV_A_NAPI_THREADED] = NLA_POLICY_MAX(NLA_UINT, 1), }; /* NETDEV_CMD_BIND_TX - do */ @@ -193,7 +194,7 @@ static const struct genl_split_ops netdev_nl_ops[] = { .cmd = NETDEV_CMD_NAPI_SET, .doit = netdev_nl_napi_set_doit, .policy = netdev_napi_set_nl_policy, - .maxattr = NETDEV_A_NAPI_IRQ_SUSPEND_TIMEOUT, + .maxattr = NETDEV_A_NAPI_THREADED, .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, }, { diff --git a/net/core/netdev-genl.c b/net/core/netdev-genl.c index 2afa7b2141aa..5875df372415 100644 --- a/net/core/netdev-genl.c +++ b/net/core/netdev-genl.c @@ -184,6 +184,10 @@ netdev_nl_napi_fill_one(struct sk_buff *rsp, struct napi_struct *napi, if (napi->irq >= 0 && nla_put_u32(rsp, NETDEV_A_NAPI_IRQ, napi->irq)) goto nla_put_failure; + if (nla_put_uint(rsp, NETDEV_A_NAPI_THREADED, + napi_get_threaded(napi))) + goto nla_put_failure; + if (napi->thread) { pid = task_pid_nr(napi->thread); if (nla_put_u32(rsp, NETDEV_A_NAPI_PID, pid)) @@ -322,8 +326,18 @@ netdev_nl_napi_set_config(struct napi_struct *napi, struct genl_info *info) { u64 irq_suspend_timeout = 0; u64 gro_flush_timeout = 0; + u8 threaded = 0; u32 defer = 0; + if (info->attrs[NETDEV_A_NAPI_THREADED]) { + int ret; + + threaded = nla_get_uint(info->attrs[NETDEV_A_NAPI_THREADED]); + ret = napi_set_threaded(napi, !!threaded); + if (ret) + return ret; + } + if (info->attrs[NETDEV_A_NAPI_DEFER_HARD_IRQS]) { defer = nla_get_u32(info->attrs[NETDEV_A_NAPI_DEFER_HARD_IRQS]); napi_set_defer_hard_irqs(napi, defer); diff --git a/tools/include/uapi/linux/netdev.h b/tools/include/uapi/linux/netdev.h index 7eb9571786b8..1f3719a9a0eb 100644 --- a/tools/include/uapi/linux/netdev.h +++ b/tools/include/uapi/linux/netdev.h @@ -134,6 +134,7 @@ enum { NETDEV_A_NAPI_DEFER_HARD_IRQS, NETDEV_A_NAPI_GRO_FLUSH_TIMEOUT, NETDEV_A_NAPI_IRQ_SUSPEND_TIMEOUT, + NETDEV_A_NAPI_THREADED, __NETDEV_A_NAPI_MAX, NETDEV_A_NAPI_MAX = (__NETDEV_A_NAPI_MAX - 1) diff --git a/tools/testing/selftests/net/nl_netdev.py b/tools/testing/selftests/net/nl_netdev.py index c9109627a741..c8ffade79a52 100755 --- a/tools/testing/selftests/net/nl_netdev.py +++ b/tools/testing/selftests/net/nl_netdev.py @@ -35,6 +35,91 @@ def napi_list_check(nf) -> None: ksft_eq(len(napis), 100, comment=f"queue count after reset queue {q} mode {i}") +def napi_set_threaded(nf) -> None: + """ + Test that verifies various cases of napi threaded + set and unset at napi and device level. + """ + with NetdevSimDev(queue_count=2) as nsimdev: + nsim = nsimdev.nsims[0] + + ip(f"link set dev {nsim.ifname} up") + + napis = nf.napi_get({'ifindex': nsim.ifindex}, dump=True) + ksft_eq(len(napis), 2) + + napi0_id = napis[0]['id'] + napi1_id = napis[1]['id'] + + # set napi threaded and verify + nf.napi_set({'id': napi0_id, 'threaded': 1}) + napi0 = nf.napi_get({'id': napi0_id}) + ksft_eq(napi0['threaded'], 1) + ksft_ne(napi0.get('pid'), None) + + # check it is not set for napi1 + napi1 = nf.napi_get({'id': napi1_id}) + ksft_eq(napi1['threaded'], 0) + ksft_eq(napi1.get('pid'), None) + + ip(f"link set dev {nsim.ifname} down") + ip(f"link set dev {nsim.ifname} up") + + # verify if napi threaded is still set + napi0 = nf.napi_get({'id': napi0_id}) + ksft_eq(napi0['threaded'], 1) + ksft_ne(napi0.get('pid'), None) + + # check it is still not set for napi1 + napi1 = nf.napi_get({'id': napi1_id}) + ksft_eq(napi1['threaded'], 0) + ksft_eq(napi1.get('pid'), None) + + # unset napi threaded and verify + nf.napi_set({'id': napi0_id, 'threaded': 0}) + napi0 = nf.napi_get({'id': napi0_id}) + ksft_eq(napi0['threaded'], 0) + ksft_eq(napi0.get('pid'), None) + + # set threaded at device level + system(f"echo 1 > /sys/class/net/{nsim.ifname}/threaded") + + # check napi threaded is set for both napis + napi0 = nf.napi_get({'id': napi0_id}) + ksft_eq(napi0['threaded'], 1) + ksft_ne(napi0.get('pid'), None) + napi1 = nf.napi_get({'id': napi1_id}) + ksft_eq(napi1['threaded'], 1) + ksft_ne(napi1.get('pid'), None) + + # unset threaded at device level + system(f"echo 0 > /sys/class/net/{nsim.ifname}/threaded") + + # check napi threaded is unset for both napis + napi0 = nf.napi_get({'id': napi0_id}) + ksft_eq(napi0['threaded'], 0) + ksft_eq(napi0.get('pid'), None) + napi1 = nf.napi_get({'id': napi1_id}) + ksft_eq(napi1['threaded'], 0) + ksft_eq(napi1.get('pid'), None) + + # set napi threaded for napi0 + nf.napi_set({'id': napi0_id, 'threaded': 1}) + napi0 = nf.napi_get({'id': napi0_id}) + ksft_eq(napi0['threaded'], 1) + ksft_ne(napi0.get('pid'), None) + + # unset threaded at device level + system(f"echo 0 > /sys/class/net/{nsim.ifname}/threaded") + + # check napi threaded is unset for both napis + napi0 = nf.napi_get({'id': napi0_id}) + ksft_eq(napi0['threaded'], 0) + ksft_eq(napi0.get('pid'), None) + napi1 = nf.napi_get({'id': napi1_id}) + ksft_eq(napi1['threaded'], 0) + ksft_eq(napi1.get('pid'), None) + def dev_set_threaded(nf) -> None: """ Test that verifies various cases of napi threaded @@ -56,8 +141,10 @@ def dev_set_threaded(nf) -> None: # check napi threaded is set for both napis napi0 = nf.napi_get({'id': napi0_id}) + ksft_eq(napi0['threaded'], 1) ksft_ne(napi0.get('pid'), None) napi1 = nf.napi_get({'id': napi1_id}) + ksft_eq(napi1['threaded'], 1) ksft_ne(napi1.get('pid'), None) # unset threaded @@ -65,8 +152,10 @@ def dev_set_threaded(nf) -> None: # check napi threaded is unset for both napis napi0 = nf.napi_get({'id': napi0_id}) + ksft_eq(napi0['threaded'], 0) ksft_eq(napi0.get('pid'), None) napi1 = nf.napi_get({'id': napi1_id}) + ksft_eq(napi1['threaded'], 0) ksft_eq(napi1.get('pid'), None) def nsim_rxq_reset_down(nf) -> None: @@ -156,7 +245,7 @@ def page_pool_check(nf) -> None: def main() -> None: nf = NetdevFamily() ksft_run([empty_check, lo_check, page_pool_check, napi_list_check, - dev_set_threaded, nsim_rxq_reset_down], + dev_set_threaded, napi_set_threaded, nsim_rxq_reset_down], args=(nf, )) ksft_exit() -- cgit v1.2.3 From 8e7583a4f65f3dbf3e8deb4e60f3679c276bef62 Mon Sep 17 00:00:00 2001 From: Samiullah Khawaja Date: Wed, 23 Jul 2025 01:30:31 +0000 Subject: net: define an enum for the napi threaded state Instead of using '0' and '1' for napi threaded state use an enum with 'disabled' and 'enabled' states. Tested: ./tools/testing/selftests/net/nl_netdev.py TAP version 13 1..7 ok 1 nl_netdev.empty_check ok 2 nl_netdev.lo_check ok 3 nl_netdev.page_pool_check ok 4 nl_netdev.napi_list_check ok 5 nl_netdev.dev_set_threaded ok 6 nl_netdev.napi_set_threaded ok 7 nl_netdev.nsim_rxq_reset_down # Totals: pass:7 fail:0 xfail:0 xpass:0 skip:0 error:0 Signed-off-by: Samiullah Khawaja Link: https://patch.msgid.link/20250723013031.2911384-4-skhawaja@google.com Signed-off-by: Jakub Kicinski --- Documentation/netlink/specs/netdev.yaml | 13 +++++--- .../networking/net_cachelines/net_device.rst | 2 +- include/linux/netdevice.h | 10 +++--- include/uapi/linux/netdev.h | 5 +++ net/core/dev.c | 12 +++++--- net/core/dev.h | 13 +++++--- net/core/dev_api.c | 3 +- net/core/netdev-genl-gen.c | 2 +- net/core/netdev-genl.c | 2 +- tools/include/uapi/linux/netdev.h | 5 +++ tools/testing/selftests/net/nl_netdev.py | 36 +++++++++++----------- 11 files changed, 62 insertions(+), 41 deletions(-) (limited to 'tools/include/uapi') diff --git a/Documentation/netlink/specs/netdev.yaml b/Documentation/netlink/specs/netdev.yaml index 85d0ea6ac426..c035dc0f64fd 100644 --- a/Documentation/netlink/specs/netdev.yaml +++ b/Documentation/netlink/specs/netdev.yaml @@ -85,6 +85,10 @@ definitions: name: qstats-scope type: flags entries: [queue] + - + name: napi-threaded + type: enum + entries: [disabled, enabled] attribute-sets: - @@ -286,11 +290,10 @@ attribute-sets: - name: threaded doc: Whether the NAPI is configured to operate in threaded polling - mode. If this is set to 1 then the NAPI context operates in - threaded polling mode. - type: uint - checks: - max: 1 + mode. If this is set to enabled then the NAPI context operates + in threaded polling mode. + type: u32 + enum: napi-threaded - name: xsk-info attributes: [] diff --git a/Documentation/networking/net_cachelines/net_device.rst b/Documentation/networking/net_cachelines/net_device.rst index 2d3dc4692d20..1c19bb7705df 100644 --- a/Documentation/networking/net_cachelines/net_device.rst +++ b/Documentation/networking/net_cachelines/net_device.rst @@ -68,6 +68,7 @@ unsigned_char addr_assign_type unsigned_char addr_len unsigned_char upper_level unsigned_char lower_level +u8 threaded napi_poll(napi_enable,netif_set_threaded) unsigned_short neigh_priv_len unsigned_short padded unsigned_short dev_id @@ -165,7 +166,6 @@ struct sfp_bus* sfp_bus struct lock_class_key* qdisc_tx_busylock bool proto_down unsigned:1 wol_enabled -unsigned:1 threaded napi_poll(napi_enable,netif_set_threaded) unsigned_long:1 see_all_hwtstamp_requests unsigned_long:1 change_proto_down unsigned_long:1 netns_immutable diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index a97c9a337d6b..5e5de4b0a433 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -369,7 +369,7 @@ struct napi_config { u64 irq_suspend_timeout; u32 defer_hard_irqs; cpumask_t affinity_mask; - bool threaded; + u8 threaded; unsigned int napi_id; }; @@ -590,7 +590,8 @@ static inline bool napi_complete(struct napi_struct *n) } void netif_threaded_enable(struct net_device *dev); -int dev_set_threaded(struct net_device *dev, bool threaded); +int dev_set_threaded(struct net_device *dev, + enum netdev_napi_threaded threaded); void napi_disable(struct napi_struct *n); void napi_disable_locked(struct napi_struct *n); @@ -1872,6 +1873,7 @@ enum netdev_reg_state { * @addr_len: Hardware address length * @upper_level: Maximum depth level of upper devices. * @lower_level: Maximum depth level of lower devices. + * @threaded: napi threaded state. * @neigh_priv_len: Used in neigh_alloc() * @dev_id: Used to differentiate devices that share * the same link layer address @@ -2011,8 +2013,6 @@ enum netdev_reg_state { * switch driver and used to set the phys state of the * switch port. * - * @threaded: napi threaded mode is enabled - * * @irq_affinity_auto: driver wants the core to store and re-assign the IRQ * affinity. Set by netif_enable_irq_affinity(), then * the driver must create a persistent napi by @@ -2248,6 +2248,7 @@ struct net_device { unsigned char addr_len; unsigned char upper_level; unsigned char lower_level; + u8 threaded; unsigned short neigh_priv_len; unsigned short dev_id; @@ -2429,7 +2430,6 @@ struct net_device { struct sfp_bus *sfp_bus; struct lock_class_key *qdisc_tx_busylock; bool proto_down; - bool threaded; bool irq_affinity_auto; bool rx_cpu_rmap_auto; diff --git a/include/uapi/linux/netdev.h b/include/uapi/linux/netdev.h index 1f3719a9a0eb..48eb49aa03d4 100644 --- a/include/uapi/linux/netdev.h +++ b/include/uapi/linux/netdev.h @@ -77,6 +77,11 @@ enum netdev_qstats_scope { NETDEV_QSTATS_SCOPE_QUEUE = 1, }; +enum netdev_napi_threaded { + NETDEV_NAPI_THREADED_DISABLED, + NETDEV_NAPI_THREADED_ENABLED, +}; + enum { NETDEV_A_DEV_IFINDEX = 1, NETDEV_A_DEV_PAD, diff --git a/net/core/dev.c b/net/core/dev.c index f28661d6f5ea..1c6e755841ce 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -6963,7 +6963,8 @@ static void napi_stop_kthread(struct napi_struct *napi) napi->thread = NULL; } -int napi_set_threaded(struct napi_struct *napi, bool threaded) +int napi_set_threaded(struct napi_struct *napi, + enum netdev_napi_threaded threaded) { if (threaded) { if (!napi->thread) { @@ -6988,7 +6989,8 @@ int napi_set_threaded(struct napi_struct *napi, bool threaded) return 0; } -int netif_set_threaded(struct net_device *dev, bool threaded) +int netif_set_threaded(struct net_device *dev, + enum netdev_napi_threaded threaded) { struct napi_struct *napi; int err = 0; @@ -7000,7 +7002,7 @@ int netif_set_threaded(struct net_device *dev, bool threaded) if (!napi->thread) { err = napi_kthread_create(napi); if (err) { - threaded = false; + threaded = NETDEV_NAPI_THREADED_DISABLED; break; } } @@ -7043,7 +7045,7 @@ int netif_set_threaded(struct net_device *dev, bool threaded) */ void netif_threaded_enable(struct net_device *dev) { - WARN_ON_ONCE(netif_set_threaded(dev, true)); + WARN_ON_ONCE(netif_set_threaded(dev, NETDEV_NAPI_THREADED_ENABLED)); } EXPORT_SYMBOL(netif_threaded_enable); @@ -7360,7 +7362,7 @@ void netif_napi_add_weight_locked(struct net_device *dev, * threaded mode will not be enabled in napi_enable(). */ if (dev->threaded && napi_kthread_create(napi)) - dev->threaded = false; + dev->threaded = NETDEV_NAPI_THREADED_DISABLED; netif_napi_set_irq_locked(napi, -1); } EXPORT_SYMBOL(netif_napi_add_weight_locked); diff --git a/net/core/dev.h b/net/core/dev.h index f5b567310908..ab69edc0c3e3 100644 --- a/net/core/dev.h +++ b/net/core/dev.h @@ -315,14 +315,19 @@ static inline void napi_set_irq_suspend_timeout(struct napi_struct *n, WRITE_ONCE(n->irq_suspend_timeout, timeout); } -static inline bool napi_get_threaded(struct napi_struct *n) +static inline enum netdev_napi_threaded napi_get_threaded(struct napi_struct *n) { - return test_bit(NAPI_STATE_THREADED, &n->state); + if (test_bit(NAPI_STATE_THREADED, &n->state)) + return NETDEV_NAPI_THREADED_ENABLED; + + return NETDEV_NAPI_THREADED_DISABLED; } -int napi_set_threaded(struct napi_struct *n, bool threaded); +int napi_set_threaded(struct napi_struct *n, + enum netdev_napi_threaded threaded); -int netif_set_threaded(struct net_device *dev, bool threaded); +int netif_set_threaded(struct net_device *dev, + enum netdev_napi_threaded threaded); int rps_cpumask_housekeeping(struct cpumask *mask); diff --git a/net/core/dev_api.c b/net/core/dev_api.c index dd7f57013ce5..f28852078aa6 100644 --- a/net/core/dev_api.c +++ b/net/core/dev_api.c @@ -368,7 +368,8 @@ void netdev_state_change(struct net_device *dev) } EXPORT_SYMBOL(netdev_state_change); -int dev_set_threaded(struct net_device *dev, bool threaded) +int dev_set_threaded(struct net_device *dev, + enum netdev_napi_threaded threaded) { int ret; diff --git a/net/core/netdev-genl-gen.c b/net/core/netdev-genl-gen.c index 0994bd68a7e6..e9a2a6f26cb7 100644 --- a/net/core/netdev-genl-gen.c +++ b/net/core/netdev-genl-gen.c @@ -97,7 +97,7 @@ static const struct nla_policy netdev_napi_set_nl_policy[NETDEV_A_NAPI_THREADED [NETDEV_A_NAPI_DEFER_HARD_IRQS] = NLA_POLICY_FULL_RANGE(NLA_U32, &netdev_a_napi_defer_hard_irqs_range), [NETDEV_A_NAPI_GRO_FLUSH_TIMEOUT] = { .type = NLA_UINT, }, [NETDEV_A_NAPI_IRQ_SUSPEND_TIMEOUT] = { .type = NLA_UINT, }, - [NETDEV_A_NAPI_THREADED] = NLA_POLICY_MAX(NLA_UINT, 1), + [NETDEV_A_NAPI_THREADED] = NLA_POLICY_MAX(NLA_U32, 1), }; /* NETDEV_CMD_BIND_TX - do */ diff --git a/net/core/netdev-genl.c b/net/core/netdev-genl.c index 5875df372415..6314eb7bdf69 100644 --- a/net/core/netdev-genl.c +++ b/net/core/netdev-genl.c @@ -333,7 +333,7 @@ netdev_nl_napi_set_config(struct napi_struct *napi, struct genl_info *info) int ret; threaded = nla_get_uint(info->attrs[NETDEV_A_NAPI_THREADED]); - ret = napi_set_threaded(napi, !!threaded); + ret = napi_set_threaded(napi, threaded); if (ret) return ret; } diff --git a/tools/include/uapi/linux/netdev.h b/tools/include/uapi/linux/netdev.h index 1f3719a9a0eb..48eb49aa03d4 100644 --- a/tools/include/uapi/linux/netdev.h +++ b/tools/include/uapi/linux/netdev.h @@ -77,6 +77,11 @@ enum netdev_qstats_scope { NETDEV_QSTATS_SCOPE_QUEUE = 1, }; +enum netdev_napi_threaded { + NETDEV_NAPI_THREADED_DISABLED, + NETDEV_NAPI_THREADED_ENABLED, +}; + enum { NETDEV_A_DEV_IFINDEX = 1, NETDEV_A_DEV_PAD, diff --git a/tools/testing/selftests/net/nl_netdev.py b/tools/testing/selftests/net/nl_netdev.py index c8ffade79a52..5c66421ab8aa 100755 --- a/tools/testing/selftests/net/nl_netdev.py +++ b/tools/testing/selftests/net/nl_netdev.py @@ -52,14 +52,14 @@ def napi_set_threaded(nf) -> None: napi1_id = napis[1]['id'] # set napi threaded and verify - nf.napi_set({'id': napi0_id, 'threaded': 1}) + nf.napi_set({'id': napi0_id, 'threaded': "enabled"}) napi0 = nf.napi_get({'id': napi0_id}) - ksft_eq(napi0['threaded'], 1) + ksft_eq(napi0['threaded'], "enabled") ksft_ne(napi0.get('pid'), None) # check it is not set for napi1 napi1 = nf.napi_get({'id': napi1_id}) - ksft_eq(napi1['threaded'], 0) + ksft_eq(napi1['threaded'], "disabled") ksft_eq(napi1.get('pid'), None) ip(f"link set dev {nsim.ifname} down") @@ -67,18 +67,18 @@ def napi_set_threaded(nf) -> None: # verify if napi threaded is still set napi0 = nf.napi_get({'id': napi0_id}) - ksft_eq(napi0['threaded'], 1) + ksft_eq(napi0['threaded'], "enabled") ksft_ne(napi0.get('pid'), None) # check it is still not set for napi1 napi1 = nf.napi_get({'id': napi1_id}) - ksft_eq(napi1['threaded'], 0) + ksft_eq(napi1['threaded'], "disabled") ksft_eq(napi1.get('pid'), None) # unset napi threaded and verify - nf.napi_set({'id': napi0_id, 'threaded': 0}) + nf.napi_set({'id': napi0_id, 'threaded': "disabled"}) napi0 = nf.napi_get({'id': napi0_id}) - ksft_eq(napi0['threaded'], 0) + ksft_eq(napi0['threaded'], "disabled") ksft_eq(napi0.get('pid'), None) # set threaded at device level @@ -86,10 +86,10 @@ def napi_set_threaded(nf) -> None: # check napi threaded is set for both napis napi0 = nf.napi_get({'id': napi0_id}) - ksft_eq(napi0['threaded'], 1) + ksft_eq(napi0['threaded'], "enabled") ksft_ne(napi0.get('pid'), None) napi1 = nf.napi_get({'id': napi1_id}) - ksft_eq(napi1['threaded'], 1) + ksft_eq(napi1['threaded'], "enabled") ksft_ne(napi1.get('pid'), None) # unset threaded at device level @@ -97,16 +97,16 @@ def napi_set_threaded(nf) -> None: # check napi threaded is unset for both napis napi0 = nf.napi_get({'id': napi0_id}) - ksft_eq(napi0['threaded'], 0) + ksft_eq(napi0['threaded'], "disabled") ksft_eq(napi0.get('pid'), None) napi1 = nf.napi_get({'id': napi1_id}) - ksft_eq(napi1['threaded'], 0) + ksft_eq(napi1['threaded'], "disabled") ksft_eq(napi1.get('pid'), None) # set napi threaded for napi0 nf.napi_set({'id': napi0_id, 'threaded': 1}) napi0 = nf.napi_get({'id': napi0_id}) - ksft_eq(napi0['threaded'], 1) + ksft_eq(napi0['threaded'], "enabled") ksft_ne(napi0.get('pid'), None) # unset threaded at device level @@ -114,10 +114,10 @@ def napi_set_threaded(nf) -> None: # check napi threaded is unset for both napis napi0 = nf.napi_get({'id': napi0_id}) - ksft_eq(napi0['threaded'], 0) + ksft_eq(napi0['threaded'], "disabled") ksft_eq(napi0.get('pid'), None) napi1 = nf.napi_get({'id': napi1_id}) - ksft_eq(napi1['threaded'], 0) + ksft_eq(napi1['threaded'], "disabled") ksft_eq(napi1.get('pid'), None) def dev_set_threaded(nf) -> None: @@ -141,10 +141,10 @@ def dev_set_threaded(nf) -> None: # check napi threaded is set for both napis napi0 = nf.napi_get({'id': napi0_id}) - ksft_eq(napi0['threaded'], 1) + ksft_eq(napi0['threaded'], "enabled") ksft_ne(napi0.get('pid'), None) napi1 = nf.napi_get({'id': napi1_id}) - ksft_eq(napi1['threaded'], 1) + ksft_eq(napi1['threaded'], "enabled") ksft_ne(napi1.get('pid'), None) # unset threaded @@ -152,10 +152,10 @@ def dev_set_threaded(nf) -> None: # check napi threaded is unset for both napis napi0 = nf.napi_get({'id': napi0_id}) - ksft_eq(napi0['threaded'], 0) + ksft_eq(napi0['threaded'], "disabled") ksft_eq(napi0.get('pid'), None) napi1 = nf.napi_get({'id': napi1_id}) - ksft_eq(napi1['threaded'], 0) + ksft_eq(napi1['threaded'], "disabled") ksft_eq(napi1.get('pid'), None) def nsim_rxq_reset_down(nf) -> None: -- cgit v1.2.3