From 49675f5bdf9ae2624b430dafda4cb29024521625 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 1 Aug 2024 09:34:01 -0700 Subject: net: remove IFF_* re-definition We re-define values of enum netdev_priv_flags as preprocessor macros with the same name. I guess this was done to avoid breaking out of tree modules which may use #ifdef X for kernel compatibility? Commit 7aa98047df95 ("net: move net_device priv_flags out from UAPI") which added the enum doesn't say. In any case, the flags with defines are quite old now, and defines for new flags don't get added. OOT drivers have to resort to code greps for compat detection, anyway. Let's delete these defines, save LoC, help LXR link to the right place. Reviewed-by: Simon Horman Reviewed-by: Jiri Pirko Link: https://patch.msgid.link/20240801163401.378723-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- include/linux/netdevice.h | 32 -------------------------------- 1 file changed, 32 deletions(-) (limited to 'include/linux/netdevice.h') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 607009150b5f..0ef3eaa23f4b 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1689,38 +1689,6 @@ enum netdev_priv_flags { IFF_SEE_ALL_HWTSTAMP_REQUESTS = BIT_ULL(33), }; -#define IFF_802_1Q_VLAN IFF_802_1Q_VLAN -#define IFF_EBRIDGE IFF_EBRIDGE -#define IFF_BONDING IFF_BONDING -#define IFF_ISATAP IFF_ISATAP -#define IFF_WAN_HDLC IFF_WAN_HDLC -#define IFF_XMIT_DST_RELEASE IFF_XMIT_DST_RELEASE -#define IFF_DONT_BRIDGE IFF_DONT_BRIDGE -#define IFF_DISABLE_NETPOLL IFF_DISABLE_NETPOLL -#define IFF_MACVLAN_PORT IFF_MACVLAN_PORT -#define IFF_BRIDGE_PORT IFF_BRIDGE_PORT -#define IFF_OVS_DATAPATH IFF_OVS_DATAPATH -#define IFF_TX_SKB_SHARING IFF_TX_SKB_SHARING -#define IFF_UNICAST_FLT IFF_UNICAST_FLT -#define IFF_TEAM_PORT IFF_TEAM_PORT -#define IFF_SUPP_NOFCS IFF_SUPP_NOFCS -#define IFF_LIVE_ADDR_CHANGE IFF_LIVE_ADDR_CHANGE -#define IFF_MACVLAN IFF_MACVLAN -#define IFF_XMIT_DST_RELEASE_PERM IFF_XMIT_DST_RELEASE_PERM -#define IFF_L3MDEV_MASTER IFF_L3MDEV_MASTER -#define IFF_NO_QUEUE IFF_NO_QUEUE -#define IFF_OPENVSWITCH IFF_OPENVSWITCH -#define IFF_L3MDEV_SLAVE IFF_L3MDEV_SLAVE -#define IFF_TEAM IFF_TEAM -#define IFF_RXFH_CONFIGURED IFF_RXFH_CONFIGURED -#define IFF_PHONY_HEADROOM IFF_PHONY_HEADROOM -#define IFF_MACSEC IFF_MACSEC -#define IFF_NO_RX_HANDLER IFF_NO_RX_HANDLER -#define IFF_FAILOVER IFF_FAILOVER -#define IFF_FAILOVER_SLAVE IFF_FAILOVER_SLAVE -#define IFF_L3MDEV_RX_HANDLER IFF_L3MDEV_RX_HANDLER -#define IFF_TX_SKB_NO_LINEAR IFF_TX_SKB_NO_LINEAR - /* Specifies the type of the struct net_device::ml_priv pointer */ enum netdev_ml_priv_type { ML_PRIV_NONE, -- cgit v1.2.3 From 74b1e94e94ea369923e5656eeb10b26b16591327 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 20 Aug 2024 13:51:19 -0700 Subject: net: repack struct netdev_queue Adding the NAPI pointer to struct netdev_queue made it grow into another cacheline, even though there was 44 bytes of padding available. The struct was historically grouped as follows: /* read-mostly stuff (align) */ /* ... random control path fields ... */ /* write-mostly stuff (align) */ /* ... 40 byte hole ... */ /* struct dql (align) */ It seems that people want to add control path fields after the read only fields. struct dql looks pretty innocent but it forces its own alignment and nothing indicates that there is a lot of empty space above it. Move dql above the xmit_lock. This shifts the empty space to the end of the struct rather than in the middle of it. Move two example fields there to set an example. Hopefully people will now add new fields at the end of the struct. A lot of the read-only stuff is also control path-only, but if we move it all we'll have another hole in the middle. Before: /* size: 384, cachelines: 6, members: 16 */ /* sum members: 284, holes: 3, sum holes: 100 */ After: /* size: 320, cachelines: 5, members: 16 */ /* sum members: 284, holes: 1, sum holes: 8 */ Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20240820205119.1321322-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- include/linux/netdevice.h | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) (limited to 'include/linux/netdevice.h') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 0ef3eaa23f4b..614ec5d3d75b 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -644,9 +644,6 @@ struct netdev_queue { struct Qdisc __rcu *qdisc_sleeping; #ifdef CONFIG_SYSFS struct kobject kobj; -#endif -#if defined(CONFIG_XPS) && defined(CONFIG_NUMA) - int numa_node; #endif unsigned long tx_maxrate; /* @@ -660,13 +657,13 @@ struct netdev_queue { #ifdef CONFIG_XDP_SOCKETS struct xsk_buff_pool *pool; #endif - /* NAPI instance for the queue - * Readers and writers must hold RTNL - */ - struct napi_struct *napi; + /* * write-mostly part */ +#ifdef CONFIG_BQL + struct dql dql; +#endif spinlock_t _xmit_lock ____cacheline_aligned_in_smp; int xmit_lock_owner; /* @@ -676,8 +673,16 @@ struct netdev_queue { unsigned long state; -#ifdef CONFIG_BQL - struct dql dql; +/* + * slow- / control-path part + */ + /* NAPI instance for the queue + * Readers and writers must hold RTNL + */ + struct napi_struct *napi; + +#if defined(CONFIG_XPS) && defined(CONFIG_NUMA) + int numa_node; #endif } ____cacheline_aligned_in_smp; -- cgit v1.2.3 From 3849687869092094003ba009dc00e2e0237a3b8a Mon Sep 17 00:00:00 2001 From: Maxime Chevallier Date: Wed, 21 Aug 2024 17:09:55 +0200 Subject: net: phy: Introduce ethernet link topology representation Link topologies containing multiple network PHYs attached to the same net_device can be found when using a PHY as a media converter for use with an SFP connector, on which an SFP transceiver containing a PHY can be used. With the current model, the transceiver's PHY can't be used for operations such as cable testing, timestamping, macsec offload, etc. The reason being that most of the logic for these configuration, coming from either ethtool netlink or ioctls tend to use netdev->phydev, which in multi-phy systems will reference the PHY closest to the MAC. Introduce a numbering scheme allowing to enumerate PHY devices that belong to any netdev, which can in turn allow userspace to take more precise decisions with regard to each PHY's configuration. The numbering is maintained per-netdev, in a phy_device_list. The numbering works similarly to a netdevice's ifindex, with identifiers that are only recycled once INT_MAX has been reached. This prevents races that could occur between PHY listing and SFP transceiver removal/insertion. The identifiers are assigned at phy_attach time, as the numbering depends on the netdevice the phy is attached to. The PHY index can be re-used for PHYs that are persistent. Signed-off-by: Maxime Chevallier Reviewed-by: Christophe Leroy Tested-by: Christophe Leroy Signed-off-by: David S. Miller --- MAINTAINERS | 1 + drivers/net/phy/Makefile | 2 +- drivers/net/phy/phy_device.c | 6 +++ drivers/net/phy/phy_link_topology.c | 105 ++++++++++++++++++++++++++++++++++++ include/linux/netdevice.h | 4 +- include/linux/phy.h | 4 ++ include/linux/phy_link_topology.h | 82 ++++++++++++++++++++++++++++ include/uapi/linux/ethtool.h | 16 ++++++ net/core/dev.c | 15 ++++++ 9 files changed, 233 insertions(+), 2 deletions(-) create mode 100644 drivers/net/phy/phy_link_topology.c create mode 100644 include/linux/phy_link_topology.h (limited to 'include/linux/netdevice.h') diff --git a/MAINTAINERS b/MAINTAINERS index 5f45b75eba96..e4fa9010fcb6 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -8341,6 +8341,7 @@ F: include/linux/mii.h F: include/linux/of_net.h F: include/linux/phy.h F: include/linux/phy_fixed.h +F: include/linux/phy_link_topology.h F: include/linux/phylib_stubs.h F: include/linux/platform_data/mdio-bcm-unimac.h F: include/linux/platform_data/mdio-gpio.h diff --git a/drivers/net/phy/Makefile b/drivers/net/phy/Makefile index f086a606a87e..33adb3df5f64 100644 --- a/drivers/net/phy/Makefile +++ b/drivers/net/phy/Makefile @@ -2,7 +2,7 @@ # Makefile for Linux PHY drivers libphy-y := phy.o phy-c45.o phy-core.o phy_device.o \ - linkmode.o + linkmode.o phy_link_topology.o mdio-bus-y += mdio_bus.o mdio_device.o ifdef CONFIG_MDIO_DEVICE diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index 159e71c1c972..d896c799f7c2 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include #include @@ -1526,6 +1527,10 @@ int phy_attach_direct(struct net_device *dev, struct phy_device *phydev, if (phydev->sfp_bus_attached) dev->sfp_bus = phydev->sfp_bus; + + err = phy_link_topo_add_phy(dev, phydev, PHY_UPSTREAM_MAC, dev); + if (err) + goto error; } /* Some Ethernet drivers try to connect to a PHY device before @@ -1953,6 +1958,7 @@ void phy_detach(struct phy_device *phydev) if (dev) { phydev->attached_dev->phydev = NULL; phydev->attached_dev = NULL; + phy_link_topo_del_phy(dev, phydev); } phydev->phylink = NULL; diff --git a/drivers/net/phy/phy_link_topology.c b/drivers/net/phy/phy_link_topology.c new file mode 100644 index 000000000000..4a5d73002a1a --- /dev/null +++ b/drivers/net/phy/phy_link_topology.c @@ -0,0 +1,105 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * Infrastructure to handle all PHY devices connected to a given netdev, + * either directly or indirectly attached. + * + * Copyright (c) 2023 Maxime Chevallier + */ + +#include +#include +#include +#include + +static int netdev_alloc_phy_link_topology(struct net_device *dev) +{ + struct phy_link_topology *topo; + + topo = kzalloc(sizeof(*topo), GFP_KERNEL); + if (!topo) + return -ENOMEM; + + xa_init_flags(&topo->phys, XA_FLAGS_ALLOC1); + topo->next_phy_index = 1; + + dev->link_topo = topo; + + return 0; +} + +int phy_link_topo_add_phy(struct net_device *dev, + struct phy_device *phy, + enum phy_upstream upt, void *upstream) +{ + struct phy_link_topology *topo = dev->link_topo; + struct phy_device_node *pdn; + int ret; + + if (!topo) { + ret = netdev_alloc_phy_link_topology(dev); + if (ret) + return ret; + + topo = dev->link_topo; + } + + pdn = kzalloc(sizeof(*pdn), GFP_KERNEL); + if (!pdn) + return -ENOMEM; + + pdn->phy = phy; + switch (upt) { + case PHY_UPSTREAM_MAC: + pdn->upstream.netdev = (struct net_device *)upstream; + if (phy_on_sfp(phy)) + pdn->parent_sfp_bus = pdn->upstream.netdev->sfp_bus; + break; + case PHY_UPSTREAM_PHY: + pdn->upstream.phydev = (struct phy_device *)upstream; + if (phy_on_sfp(phy)) + pdn->parent_sfp_bus = pdn->upstream.phydev->sfp_bus; + break; + default: + ret = -EINVAL; + goto err; + } + pdn->upstream_type = upt; + + /* Attempt to re-use a previously allocated phy_index */ + if (phy->phyindex) + ret = xa_insert(&topo->phys, phy->phyindex, pdn, GFP_KERNEL); + else + ret = xa_alloc_cyclic(&topo->phys, &phy->phyindex, pdn, + xa_limit_32b, &topo->next_phy_index, + GFP_KERNEL); + + if (ret) + goto err; + + return 0; + +err: + kfree(pdn); + return ret; +} +EXPORT_SYMBOL_GPL(phy_link_topo_add_phy); + +void phy_link_topo_del_phy(struct net_device *dev, + struct phy_device *phy) +{ + struct phy_link_topology *topo = dev->link_topo; + struct phy_device_node *pdn; + + if (!topo) + return; + + pdn = xa_erase(&topo->phys, phy->phyindex); + + /* We delete the PHY from the topology, however we don't re-set the + * phy->phyindex field. If the PHY isn't gone, we can re-assign it the + * same index next time it's added back to the topology + */ + + kfree(pdn); +} +EXPORT_SYMBOL_GPL(phy_link_topo_del_phy); diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 614ec5d3d75b..289a6133769c 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -40,7 +40,6 @@ #include #endif #include - #include #include #include @@ -81,6 +80,7 @@ struct xdp_frame; struct xdp_metadata_ops; struct xdp_md; struct ethtool_netdev_state; +struct phy_link_topology; typedef u32 xdp_features_t; @@ -1951,6 +1951,7 @@ enum netdev_reg_state { * @fcoe_ddp_xid: Max exchange id for FCoE LRO by ddp * * @priomap: XXX: need comments on this one + * @link_topo: Physical link topology tracking attached PHYs * @phydev: Physical device may attach itself * for hardware timestamping * @sfp_bus: attached &struct sfp_bus structure. @@ -2342,6 +2343,7 @@ struct net_device { #if IS_ENABLED(CONFIG_CGROUP_NET_PRIO) struct netprio_map __rcu *priomap; #endif + struct phy_link_topology *link_topo; struct phy_device *phydev; struct sfp_bus *sfp_bus; struct lock_class_key *qdisc_tx_busylock; diff --git a/include/linux/phy.h b/include/linux/phy.h index 6b7d40d49129..3b0f4bf80650 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -554,6 +554,9 @@ struct macsec_ops; * @drv: Pointer to the driver for this PHY instance * @devlink: Create a link between phy dev and mac dev, if the external phy * used by current mac interface is managed by another mac interface. + * @phyindex: Unique id across the phy's parent tree of phys to address the PHY + * from userspace, similar to ifindex. A zero index means the PHY + * wasn't assigned an id yet. * @phy_id: UID for this device found during discovery * @c45_ids: 802.3-c45 Device Identifiers if is_c45. * @is_c45: Set to true if this PHY uses clause 45 addressing. @@ -656,6 +659,7 @@ struct phy_device { struct device_link *devlink; + u32 phyindex; u32 phy_id; struct phy_c45_device_ids c45_ids; diff --git a/include/linux/phy_link_topology.h b/include/linux/phy_link_topology.h new file mode 100644 index 000000000000..68a59e25821c --- /dev/null +++ b/include/linux/phy_link_topology.h @@ -0,0 +1,82 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * PHY device list allow maintaining a list of PHY devices that are + * part of a netdevice's link topology. PHYs can for example be chained, + * as is the case when using a PHY that exposes an SFP module, on which an + * SFP transceiver that embeds a PHY is connected. + * + * This list can then be used by userspace to leverage individual PHY + * capabilities. + */ +#ifndef __PHY_LINK_TOPOLOGY_H +#define __PHY_LINK_TOPOLOGY_H + +#include +#include + +struct xarray; +struct phy_device; +struct sfp_bus; + +struct phy_link_topology { + struct xarray phys; + u32 next_phy_index; +}; + +struct phy_device_node { + enum phy_upstream upstream_type; + + union { + struct net_device *netdev; + struct phy_device *phydev; + } upstream; + + struct sfp_bus *parent_sfp_bus; + + struct phy_device *phy; +}; + +#if IS_ENABLED(CONFIG_PHYLIB) +int phy_link_topo_add_phy(struct net_device *dev, + struct phy_device *phy, + enum phy_upstream upt, void *upstream); + +void phy_link_topo_del_phy(struct net_device *dev, struct phy_device *phy); + +static inline struct phy_device * +phy_link_topo_get_phy(struct net_device *dev, u32 phyindex) +{ + struct phy_link_topology *topo = dev->link_topo; + struct phy_device_node *pdn; + + if (!topo) + return NULL; + + pdn = xa_load(&topo->phys, phyindex); + if (pdn) + return pdn->phy; + + return NULL; +} + +#else +static inline int phy_link_topo_add_phy(struct net_device *dev, + struct phy_device *phy, + enum phy_upstream upt, void *upstream) +{ + return 0; +} + +static inline void phy_link_topo_del_phy(struct net_device *dev, + struct phy_device *phy) +{ +} + +static inline struct phy_device * +phy_link_topo_get_phy(struct net_device *dev, u32 phyindex) +{ + return NULL; +} +#endif + +#endif /* __PHY_LINK_TOPOLOGY_H */ diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h index 4a0a6e703483..c405ed63acfa 100644 --- a/include/uapi/linux/ethtool.h +++ b/include/uapi/linux/ethtool.h @@ -2533,4 +2533,20 @@ struct ethtool_link_settings { * __u32 map_lp_advertising[link_mode_masks_nwords]; */ }; + +/** + * enum phy_upstream - Represents the upstream component a given PHY device + * is connected to, as in what is on the other end of the MII bus. Most PHYs + * will be attached to an Ethernet MAC controller, but in some cases, there's + * an intermediate PHY used as a media-converter, which will driver another + * MII interface as its output. + * @PHY_UPSTREAM_MAC: Upstream component is a MAC (a switch port, + * or ethernet controller) + * @PHY_UPSTREAM_PHY: Upstream component is a PHY (likely a media converter) + */ +enum phy_upstream { + PHY_UPSTREAM_MAC, + PHY_UPSTREAM_PHY, +}; + #endif /* _UAPI_LINUX_ETHTOOL_H */ diff --git a/net/core/dev.c b/net/core/dev.c index e7260889d4cb..d0e7483abdcd 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -158,6 +158,7 @@ #include #include #include +#include #include "dev.h" #include "net-sysfs.h" @@ -10321,6 +10322,17 @@ static void netdev_do_free_pcpu_stats(struct net_device *dev) } } +static void netdev_free_phy_link_topology(struct net_device *dev) +{ + struct phy_link_topology *topo = dev->link_topo; + + if (IS_ENABLED(CONFIG_PHYLIB) && topo) { + xa_destroy(&topo->phys); + kfree(topo); + dev->link_topo = NULL; + } +} + /** * register_netdevice() - register a network device * @dev: device to register @@ -11099,6 +11111,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, #ifdef CONFIG_NET_SCHED hash_init(dev->qdisc_hash); #endif + dev->priv_flags = IFF_XMIT_DST_RELEASE | IFF_XMIT_DST_RELEASE_PERM; setup(dev); @@ -11191,6 +11204,8 @@ void free_netdev(struct net_device *dev) free_percpu(dev->xdp_bulkq); dev->xdp_bulkq = NULL; + netdev_free_phy_link_topology(dev); + /* Compatibility with error handling in drivers */ if (dev->reg_state == NETREG_UNINITIALIZED || dev->reg_state == NETREG_DUMMY) { -- cgit v1.2.3 From 7d3aed652d090508990d245f9d80dcc481910d02 Mon Sep 17 00:00:00 2001 From: Mina Almasry Date: Thu, 22 Aug 2024 05:51:54 +0000 Subject: net: refactor ->ndo_bpf calls into dev_xdp_propagate When net devices propagate xdp configurations to slave devices, we will need to perform a memory provider check to ensure we're not binding xdp to a device using unreadable netmem. Currently the ->ndo_bpf calls in a few places. Adding checks to all these places would not be ideal. Refactor all the ->ndo_bpf calls into one place where we can add this check in the future. Suggested-by: Jakub Kicinski Signed-off-by: Mina Almasry Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 8 ++++---- drivers/net/hyperv/netvsc_bpf.c | 2 +- include/linux/netdevice.h | 1 + net/core/dev.c | 9 +++++++++ 4 files changed, 15 insertions(+), 5 deletions(-) (limited to 'include/linux/netdevice.h') diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 96f5470a5f55..a3b6e6c696b4 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -2253,7 +2253,7 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev, goto err_sysfs_del; } - res = slave_dev->netdev_ops->ndo_bpf(slave_dev, &xdp); + res = dev_xdp_propagate(slave_dev, &xdp); if (res < 0) { /* ndo_bpf() sets extack error message */ slave_dbg(bond_dev, slave_dev, "Error %d calling ndo_bpf\n", res); @@ -2389,7 +2389,7 @@ static int __bond_release_one(struct net_device *bond_dev, .prog = NULL, .extack = NULL, }; - if (slave_dev->netdev_ops->ndo_bpf(slave_dev, &xdp)) + if (dev_xdp_propagate(slave_dev, &xdp)) slave_warn(bond_dev, slave_dev, "failed to unload XDP program\n"); } @@ -5579,7 +5579,7 @@ static int bond_xdp_set(struct net_device *dev, struct bpf_prog *prog, goto err; } - err = slave_dev->netdev_ops->ndo_bpf(slave_dev, &xdp); + err = dev_xdp_propagate(slave_dev, &xdp); if (err < 0) { /* ndo_bpf() sets extack error message */ slave_err(dev, slave_dev, "Error %d calling ndo_bpf\n", err); @@ -5611,7 +5611,7 @@ err: if (slave == rollback_slave) break; - err_unwind = slave_dev->netdev_ops->ndo_bpf(slave_dev, &xdp); + err_unwind = dev_xdp_propagate(slave_dev, &xdp); if (err_unwind < 0) slave_err(dev, slave_dev, "Error %d when unwinding XDP program change\n", err_unwind); diff --git a/drivers/net/hyperv/netvsc_bpf.c b/drivers/net/hyperv/netvsc_bpf.c index 4a9522689fa4..e01c5997a551 100644 --- a/drivers/net/hyperv/netvsc_bpf.c +++ b/drivers/net/hyperv/netvsc_bpf.c @@ -183,7 +183,7 @@ int netvsc_vf_setxdp(struct net_device *vf_netdev, struct bpf_prog *prog) xdp.command = XDP_SETUP_PROG; xdp.prog = prog; - ret = vf_netdev->netdev_ops->ndo_bpf(vf_netdev, &xdp); + ret = dev_xdp_propagate(vf_netdev, &xdp); if (ret && prog) bpf_prog_put(prog); diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 289a6133769c..7dfa836d9dbf 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3925,6 +3925,7 @@ struct sk_buff *dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, int bpf_xdp_link_attach(const union bpf_attr *attr, struct bpf_prog *prog); u8 dev_xdp_prog_count(struct net_device *dev); +int dev_xdp_propagate(struct net_device *dev, struct netdev_bpf *bpf); u32 dev_xdp_prog_id(struct net_device *dev, enum bpf_xdp_mode mode); int __dev_forward_skb(struct net_device *dev, struct sk_buff *skb); diff --git a/net/core/dev.c b/net/core/dev.c index d0e7483abdcd..271d6aa0cfb9 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -9370,6 +9370,15 @@ u8 dev_xdp_prog_count(struct net_device *dev) } EXPORT_SYMBOL_GPL(dev_xdp_prog_count); +int dev_xdp_propagate(struct net_device *dev, struct netdev_bpf *bpf) +{ + if (!dev->netdev_ops->ndo_bpf) + return -EOPNOTSUPP; + + return dev->netdev_ops->ndo_bpf(dev, bpf); +} +EXPORT_SYMBOL_GPL(dev_xdp_propagate); + u32 dev_xdp_prog_id(struct net_device *dev, enum bpf_xdp_mode mode) { struct bpf_prog *prog = dev_xdp_prog(dev, mode); -- cgit v1.2.3 From 70d0bb45fae87a3b08970a318e15f317446a1956 Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Thu, 22 Aug 2024 13:57:33 +0100 Subject: net: Correct spelling in headers Correct spelling in Networking headers. As reported by codespell. Signed-off-by: Simon Horman Link: https://patch.msgid.link/20240822-net-spell-v1-12-3a98971ce2d2@kernel.org Signed-off-by: Jakub Kicinski --- include/linux/etherdevice.h | 2 +- include/linux/netdevice.h | 8 ++++---- include/net/addrconf.h | 2 +- include/net/busy_poll.h | 2 +- include/net/caif/caif_layer.h | 4 ++-- include/net/caif/cfpkt.h | 2 +- include/net/dropreason-core.h | 6 +++--- include/net/dst.h | 2 +- include/net/dst_cache.h | 2 +- include/net/erspan.h | 4 ++-- include/net/hwbm.h | 4 ++-- include/net/llc_pdu.h | 2 +- include/net/netlink.h | 16 ++++++++-------- include/net/netns/sctp.h | 4 ++-- include/net/regulatory.h | 2 +- include/net/sock.h | 4 ++-- include/net/udp.h | 2 +- include/uapi/linux/in.h | 2 +- include/uapi/linux/inet_diag.h | 2 +- 19 files changed, 36 insertions(+), 36 deletions(-) (limited to 'include/linux/netdevice.h') diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h index 0ed47d00549b..30114c25ad12 100644 --- a/include/linux/etherdevice.h +++ b/include/linux/etherdevice.h @@ -645,7 +645,7 @@ static inline struct ethhdr *eth_skb_pull_mac(struct sk_buff *skb) } /** - * eth_skb_pad - Pad buffer to mininum number of octets for Ethernet frame + * eth_skb_pad - Pad buffer to minimum number of octets for Ethernet frame * @skb: Buffer to pad * * An Ethernet frame should have a minimum size of 60 bytes. This function diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 7dfa836d9dbf..fce70990b209 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1237,7 +1237,7 @@ struct netdev_net_notifier { * int (*ndo_fdb_del)(struct ndmsg *ndm, struct nlattr *tb[], * struct net_device *dev, * const unsigned char *addr, u16 vid) - * Deletes the FDB entry from dev coresponding to addr. + * Deletes the FDB entry from dev corresponding to addr. * int (*ndo_fdb_del_bulk)(struct nlmsghdr *nlh, struct net_device *dev, * struct netlink_ext_ack *extack); * int (*ndo_fdb_dump)(struct sk_buff *skb, struct netlink_callback *cb, @@ -3514,7 +3514,7 @@ static inline void netdev_tx_completed_queue(struct netdev_queue *dev_queue, dql_completed(&dev_queue->dql, bytes); /* - * Without the memory barrier there is a small possiblity that + * Without the memory barrier there is a small possibility that * netdev_tx_sent_queue will miss the update and cause the queue to * be stopped forever */ @@ -4583,7 +4583,7 @@ void dev_uc_flush(struct net_device *dev); void dev_uc_init(struct net_device *dev); /** - * __dev_uc_sync - Synchonize device's unicast list + * __dev_uc_sync - Synchronize device's unicast list * @dev: device to sync * @sync: function to call if address should be added * @unsync: function to call if address should be removed @@ -4627,7 +4627,7 @@ void dev_mc_flush(struct net_device *dev); void dev_mc_init(struct net_device *dev); /** - * __dev_mc_sync - Synchonize device's multicast list + * __dev_mc_sync - Synchronize device's multicast list * @dev: device to sync * @sync: function to call if address should be added * @unsync: function to call if address should be removed diff --git a/include/net/addrconf.h b/include/net/addrconf.h index c8ed31828db3..363dd63babe7 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h @@ -333,7 +333,7 @@ static inline struct inet6_dev *__in6_dev_get(const struct net_device *dev) /** * __in6_dev_stats_get - get inet6_dev pointer for stats * @dev: network device - * @skb: skb for original incoming interface if neeeded + * @skb: skb for original incoming interface if needed * * Caller must hold rcu_read_lock or RTNL, because this function * does not take a reference on the inet6_dev. diff --git a/include/net/busy_poll.h b/include/net/busy_poll.h index 9b09acac538e..5e3b5703e4ab 100644 --- a/include/net/busy_poll.h +++ b/include/net/busy_poll.h @@ -131,7 +131,7 @@ static inline void skb_mark_napi_id(struct sk_buff *skb, #endif } -/* used in the protocol hanlder to propagate the napi_id to the socket */ +/* used in the protocol handler to propagate the napi_id to the socket */ static inline void sk_mark_napi_id(struct sock *sk, const struct sk_buff *skb) { #ifdef CONFIG_NET_RX_BUSY_POLL diff --git a/include/net/caif/caif_layer.h b/include/net/caif/caif_layer.h index 0f45d875905f..053e7c6a6a66 100644 --- a/include/net/caif/caif_layer.h +++ b/include/net/caif/caif_layer.h @@ -20,7 +20,7 @@ struct caif_payload_info; * @assert: expression to evaluate. * * This function will print a error message and a do WARN_ON if the - * assertion failes. Normally this will do a stack up at the current location. + * assertion fails. Normally this will do a stack up at the current location. */ #define caif_assert(assert) \ do { \ @@ -116,7 +116,7 @@ enum caif_direction { * @dn: Pointer down to the layer below. * @node: List node used when layer participate in a list. * @receive: Packet receive function. - * @transmit: Packet transmit funciton. + * @transmit: Packet transmit function. * @ctrlcmd: Used for control signalling upwards in the stack. * @modemcmd: Used for control signaling downwards in the stack. * @id: The identity of this layer diff --git a/include/net/caif/cfpkt.h b/include/net/caif/cfpkt.h index 44d914a50369..acf664227d96 100644 --- a/include/net/caif/cfpkt.h +++ b/include/net/caif/cfpkt.h @@ -18,7 +18,7 @@ struct cfpkt *cfpkt_create(u16 len); /* * Destroy a CAIF Packet. - * pkt Packet to be destoyed. + * pkt Packet to be destroyed. */ void cfpkt_destroy(struct cfpkt *pkt); diff --git a/include/net/dropreason-core.h b/include/net/dropreason-core.h index 9707ab54fdd5..4748680e8c88 100644 --- a/include/net/dropreason-core.h +++ b/include/net/dropreason-core.h @@ -155,8 +155,8 @@ enum skb_drop_reason { /** @SKB_DROP_REASON_SOCKET_RCVBUFF: socket receive buff is full */ SKB_DROP_REASON_SOCKET_RCVBUFF, /** - * @SKB_DROP_REASON_PROTO_MEM: proto memory limition, such as udp packet - * drop out of udp_memory_allocated. + * @SKB_DROP_REASON_PROTO_MEM: proto memory limitation, such as + * udp packet drop out of udp_memory_allocated. */ SKB_DROP_REASON_PROTO_MEM, /** @@ -217,7 +217,7 @@ enum skb_drop_reason { */ SKB_DROP_REASON_TCP_ZEROWINDOW, /** - * @SKB_DROP_REASON_TCP_OLD_DATA: the TCP data reveived is already + * @SKB_DROP_REASON_TCP_OLD_DATA: the TCP data received is already * received before (spurious retrans may happened), see * LINUX_MIB_DELAYEDACKLOST */ diff --git a/include/net/dst.h b/include/net/dst.h index 0aa331bd2fdb..0f303cc60252 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -341,7 +341,7 @@ static inline void __skb_tunnel_rx(struct sk_buff *skb, struct net_device *dev, skb->dev = dev; /* - * Clear hash so that we can recalulate the hash for the + * Clear hash so that we can recalculate the hash for the * encapsulated packet, unless we have already determine the hash * over the L4 4-tuple. */ diff --git a/include/net/dst_cache.h b/include/net/dst_cache.h index b4a55d2d5e71..1961699598e2 100644 --- a/include/net/dst_cache.h +++ b/include/net/dst_cache.h @@ -102,7 +102,7 @@ int dst_cache_init(struct dst_cache *dst_cache, gfp_t gfp); * @dst_cache: the cache * * No synchronization is enforced: it must be called only when the cache - * is unsed. + * is unused. */ void dst_cache_destroy(struct dst_cache *dst_cache); diff --git a/include/net/erspan.h b/include/net/erspan.h index 6cb4cbd6a48f..c6209e7b6c96 100644 --- a/include/net/erspan.h +++ b/include/net/erspan.h @@ -89,7 +89,7 @@ enum erspan_encap_type { ERSPAN_ENCAP_NOVLAN = 0x0, /* originally without VLAN tag */ ERSPAN_ENCAP_ISL = 0x1, /* originally ISL encapsulated */ ERSPAN_ENCAP_8021Q = 0x2, /* originally 802.1Q encapsulated */ - ERSPAN_ENCAP_INFRAME = 0x3, /* VLAN tag perserved in frame */ + ERSPAN_ENCAP_INFRAME = 0x3, /* VLAN tag preserved in frame */ }; #define ERSPAN_V1_MDSIZE 4 @@ -192,7 +192,7 @@ static inline void erspan_build_header(struct sk_buff *skb, enc_type = ERSPAN_ENCAP_NOVLAN; /* If mirrored packet has vlan tag, extract tci and - * perserve vlan header in the mirrored frame. + * preserve vlan header in the mirrored frame. */ if (eth->h_proto == htons(ETH_P_8021Q)) { qp = (struct qtag_prefix *)(skb->data + 2 * ETH_ALEN); diff --git a/include/net/hwbm.h b/include/net/hwbm.h index aa495decec35..bdbe91c609ff 100644 --- a/include/net/hwbm.h +++ b/include/net/hwbm.h @@ -11,9 +11,9 @@ struct hwbm_pool { int frag_size; /* Number of buffers currently used by this pool */ int buf_num; - /* constructor called during alocation */ + /* constructor called during allocation */ int (*construct)(struct hwbm_pool *bm_pool, void *buf); - /* protect acces to the buffer counter*/ + /* protect access to the buffer counter*/ struct mutex buf_lock; /* private data */ void *priv; diff --git a/include/net/llc_pdu.h b/include/net/llc_pdu.h index 1d55ba7c45be..86681f29bda7 100644 --- a/include/net/llc_pdu.h +++ b/include/net/llc_pdu.h @@ -254,7 +254,7 @@ static inline void llc_pdu_header_init(struct sk_buff *skb, u8 type, } /** - * llc_pdu_decode_sa - extracs source address (MAC) of input frame + * llc_pdu_decode_sa - extracts, source address (MAC) of input frame * @skb: input skb that source address must be extracted from it. * @sa: pointer to source address (6 byte array). * diff --git a/include/net/netlink.h b/include/net/netlink.h index e78ce008e07c..db6af207287c 100644 --- a/include/net/netlink.h +++ b/include/net/netlink.h @@ -827,7 +827,7 @@ nlmsg_parse_deprecated_strict(const struct nlmsghdr *nlh, int hdrlen, /** * nlmsg_find_attr - find a specific attribute in a netlink message * @nlh: netlink message header - * @hdrlen: length of familiy specific header + * @hdrlen: length of family specific header * @attrtype: type of attribute to look for * * Returns the first attribute which matches the specified type. @@ -849,7 +849,7 @@ static inline struct nlattr *nlmsg_find_attr(const struct nlmsghdr *nlh, * * Validates all attributes in the specified attribute stream against the * specified policy. Validation is done in liberal mode. - * See documenation of struct nla_policy for more details. + * See documentation of struct nla_policy for more details. * * Returns 0 on success or a negative error code. */ @@ -872,7 +872,7 @@ static inline int nla_validate_deprecated(const struct nlattr *head, int len, * * Validates all attributes in the specified attribute stream against the * specified policy. Validation is done in strict mode. - * See documenation of struct nla_policy for more details. + * See documentation of struct nla_policy for more details. * * Returns 0 on success or a negative error code. */ @@ -887,7 +887,7 @@ static inline int nla_validate(const struct nlattr *head, int len, int maxtype, /** * nlmsg_validate_deprecated - validate a netlink message including attributes * @nlh: netlinket message header - * @hdrlen: length of familiy specific header + * @hdrlen: length of family specific header * @maxtype: maximum attribute type to be expected * @policy: validation policy * @extack: extended ACK report struct @@ -933,7 +933,7 @@ static inline u32 nlmsg_seq(const struct nlmsghdr *nlh) * nlmsg_for_each_attr - iterate over a stream of attributes * @pos: loop counter, set to current attribute * @nlh: netlink message header - * @hdrlen: length of familiy specific header + * @hdrlen: length of family specific header * @rem: initialized to len, holds bytes currently remaining in stream */ #define nlmsg_for_each_attr(pos, nlh, hdrlen, rem) \ @@ -1034,7 +1034,7 @@ static inline struct sk_buff *nlmsg_new_large(size_t payload) * @skb: socket buffer the message is stored in * @nlh: netlink message header * - * Corrects the netlink message header to include the appeneded + * Corrects the netlink message header to include the appended * attributes. Only necessary if attributes have been added to * the message. */ @@ -1954,7 +1954,7 @@ static inline struct nlattr *nla_nest_start(struct sk_buff *skb, int attrtype) * @start: container attribute * * Corrects the container attribute header to include the all - * appeneded attributes. + * appended attributes. * * Returns the total data length of the skb. */ @@ -1987,7 +1987,7 @@ static inline void nla_nest_cancel(struct sk_buff *skb, struct nlattr *start) * * Validates all attributes in the nested attribute stream against the * specified policy. Attributes with a type exceeding maxtype will be - * ignored. See documenation of struct nla_policy for more details. + * ignored. See documentation of struct nla_policy for more details. * * Returns 0 on success or a negative error code. */ diff --git a/include/net/netns/sctp.h b/include/net/netns/sctp.h index 7eff3d981b89..d25cd7a9c5ff 100644 --- a/include/net/netns/sctp.h +++ b/include/net/netns/sctp.h @@ -125,14 +125,14 @@ struct netns_sctp { int pf_expose; /* - * Policy for preforming sctp/socket accounting + * Policy for performing sctp/socket accounting * 0 - do socket level accounting, all assocs share sk_sndbuf * 1 - do sctp accounting, each asoc may use sk_sndbuf bytes */ int sndbuf_policy; /* - * Policy for preforming sctp/socket accounting + * Policy for performing sctp/socket accounting * 0 - do socket level accounting, all assocs share sk_rcvbuf * 1 - do sctp accounting, each asoc may use sk_rcvbuf bytes */ diff --git a/include/net/regulatory.h b/include/net/regulatory.h index a103f4c8cf75..6633627f6e76 100644 --- a/include/net/regulatory.h +++ b/include/net/regulatory.h @@ -121,7 +121,7 @@ struct regulatory_request { * @REGULATORY_DISABLE_BEACON_HINTS: enable this if your driver needs to * ensure that passive scan flags and beaconing flags may not be lifted by * cfg80211 due to regulatory beacon hints. For more information on beacon - * hints read the documenation for regulatory_hint_found_beacon() + * hints read the documentation for regulatory_hint_found_beacon() * @REGULATORY_COUNTRY_IE_FOLLOW_POWER: for devices that have a preference * that even though they may have programmed their own custom power * setting prior to wiphy registration, they want to ensure their channel diff --git a/include/net/sock.h b/include/net/sock.h index cce23ac4d514..f51d61fab059 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1624,7 +1624,7 @@ bool __lock_sock_fast(struct sock *sk) __acquires(&sk->sk_lock.slock); * lock_sock_fast - fast version of lock_sock * @sk: socket * - * This version should be used for very small section, where process wont block + * This version should be used for very small section, where process won't block * return false if fast path is taken: * * sk_lock.slock locked, owned = 0, BH disabled @@ -2546,7 +2546,7 @@ struct sock_skb_cb { /* Store sock_skb_cb at the end of skb->cb[] so protocol families * using skb->cb[] would keep using it directly and utilize its - * alignement guarantee. + * alignment guarantee. */ #define SOCK_SKB_CB_OFFSET ((sizeof_field(struct sk_buff, cb) - \ sizeof(struct sock_skb_cb))) diff --git a/include/net/udp.h b/include/net/udp.h index 5ca53b1cec67..61222545ab1c 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -232,7 +232,7 @@ static inline __be16 udp_flow_src_port(struct net *net, struct sk_buff *skb, } /* Since this is being sent on the wire obfuscate hash a bit - * to minimize possbility that any useful information to an + * to minimize possibility that any useful information to an * attacker is leaked. Only upper 16 bits are relevant in the * computation for 16 bit port value. */ diff --git a/include/uapi/linux/in.h b/include/uapi/linux/in.h index d358add1611c..5d32d53508d9 100644 --- a/include/uapi/linux/in.h +++ b/include/uapi/linux/in.h @@ -141,7 +141,7 @@ struct in_addr { */ #define IP_PMTUDISC_INTERFACE 4 /* weaker version of IP_PMTUDISC_INTERFACE, which allows packets to get - * fragmented if they exeed the interface mtu + * fragmented if they exceed the interface mtu */ #define IP_PMTUDISC_OMIT 5 diff --git a/include/uapi/linux/inet_diag.h b/include/uapi/linux/inet_diag.h index 50655de04c9b..86bb2e8b17c9 100644 --- a/include/uapi/linux/inet_diag.h +++ b/include/uapi/linux/inet_diag.h @@ -143,7 +143,7 @@ enum { INET_DIAG_SHUTDOWN, /* - * Next extenstions cannot be requested in struct inet_diag_req_v2: + * Next extensions cannot be requested in struct inet_diag_req_v2: * its field idiag_ext has only 8 bits. */ -- cgit v1.2.3 From beb5a9bea8239cdf4adf6b62672e30db3e9fa5ce Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Thu, 29 Aug 2024 14:33:36 +0200 Subject: netdevice: convert private flags > BIT(31) to bitfields Make dev->priv_flags `u32` back and define bits higher than 31 as bitfield booleans as per Jakub's suggestion. This simplifies code which accesses these bits with no optimization loss (testb both before/after), allows to not extend &netdev_priv_flags each time, but also scales better as bits > 63 in the future would only add a new u64 to the structure with no complications, comparing to that extending ::priv_flags would require converting it to a bitmap. Note that I picked `unsigned long :1` to not lose any potential optimizations comparing to `bool :1` etc. Suggested-by: Jakub Kicinski Signed-off-by: Alexander Lobakin Signed-off-by: Paolo Abeni --- .../networking/net_cachelines/net_device.rst | 4 +++- .../net/ethernet/microchip/lan966x/lan966x_main.c | 2 +- drivers/net/macvlan.c | 3 ++- drivers/net/vxlan/vxlan_core.c | 3 ++- include/linux/netdevice.h | 28 ++++++++++++++-------- net/8021q/vlanproc.c | 4 ++-- net/core/dev.c | 4 ++-- net/core/dev_ioctl.c | 9 ++++--- net/core/rtnetlink.c | 2 +- 9 files changed, 35 insertions(+), 24 deletions(-) (limited to 'include/linux/netdevice.h') diff --git a/Documentation/networking/net_cachelines/net_device.rst b/Documentation/networking/net_cachelines/net_device.rst index a0e0fab8161a..1fe3cdfe3582 100644 --- a/Documentation/networking/net_cachelines/net_device.rst +++ b/Documentation/networking/net_cachelines/net_device.rst @@ -7,6 +7,7 @@ net_device struct fast path usage breakdown Type Name fastpath_tx_access fastpath_rx_access Comments ..struct ..net_device +unsigned_long:32 priv_flags read_mostly - __dev_queue_xmit(tx) char name[16] - - struct_netdev_name_node* name_node struct_dev_ifalias* ifalias @@ -23,7 +24,6 @@ struct_list_head ptype_specific struct adj_list unsigned_int flags read_mostly read_mostly __dev_queue_xmit,__dev_xmit_skb,ip6_output,__ip6_finish_output(tx);ip6_rcv_core(rx) xdp_features_t xdp_features -unsigned_long_long priv_flags read_mostly - __dev_queue_xmit(tx) struct_net_device_ops* netdev_ops read_mostly - netdev_core_pick_tx,netdev_start_xmit(tx) struct_xdp_metadata_ops* xdp_metadata_ops int ifindex - read_mostly ip6_rcv_core @@ -163,6 +163,8 @@ struct_lock_class_key* qdisc_tx_busylock bool proto_down unsigned:1 wol_enabled unsigned:1 threaded - - napi_poll(napi_enable,dev_set_threaded) +unsigned_long:1 see_all_hwtstamp_requests +unsigned_long:1 change_proto_down struct_list_head net_notifier_list struct_macsec_ops* macsec_ops struct_udp_tunnel_nic_info* udp_tunnel_nic_info diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_main.c b/drivers/net/ethernet/microchip/lan966x/lan966x_main.c index ec672af12e25..534d4716d5f7 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_main.c +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_main.c @@ -816,7 +816,7 @@ static int lan966x_probe_port(struct lan966x *lan966x, u32 p, NETIF_F_HW_VLAN_STAG_TX | NETIF_F_HW_TC; dev->hw_features |= NETIF_F_HW_TC; - dev->priv_flags |= IFF_SEE_ALL_HWTSTAMP_REQUESTS; + dev->see_all_hwtstamp_requests = true; dev->needed_headroom = IFH_LEN_BYTES; eth_hw_addr_gen(dev, lan966x->base_mac, p + 1); diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index 24298a33e0e9..b45f137f365e 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -1213,7 +1213,8 @@ void macvlan_common_setup(struct net_device *dev) dev->max_mtu = ETH_MAX_MTU; dev->priv_flags &= ~IFF_TX_SKB_SHARING; netif_keep_dst(dev); - dev->priv_flags |= IFF_UNICAST_FLT | IFF_CHANGE_PROTO_DOWN; + dev->priv_flags |= IFF_UNICAST_FLT; + dev->change_proto_down = true; dev->netdev_ops = &macvlan_netdev_ops; dev->needs_free_netdev = true; dev->priv_destructor = macvlan_dev_free; diff --git a/drivers/net/vxlan/vxlan_core.c b/drivers/net/vxlan/vxlan_core.c index 34391c18bba7..4a54dd1950c1 100644 --- a/drivers/net/vxlan/vxlan_core.c +++ b/drivers/net/vxlan/vxlan_core.c @@ -3331,7 +3331,8 @@ static void vxlan_setup(struct net_device *dev) dev->hw_features |= NETIF_F_RXCSUM; dev->hw_features |= NETIF_F_GSO_SOFTWARE; netif_keep_dst(dev); - dev->priv_flags |= IFF_NO_QUEUE | IFF_CHANGE_PROTO_DOWN; + dev->priv_flags |= IFF_NO_QUEUE; + dev->change_proto_down = true; /* MTU range: 68 - 65535 */ dev->min_mtu = ETH_MIN_MTU; diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index fce70990b209..d6f35c9d8580 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1613,7 +1613,8 @@ struct net_device_ops { * userspace; this means that the order of these flags can change * during any kernel release. * - * You should have a pretty good reason to be extending these flags. + * You should add bitfield booleans after either net_device::priv_flags + * (hotpath) or ::threaded (slowpath) instead of extending these flags. * * @IFF_802_1Q_VLAN: 802.1Q VLAN device * @IFF_EBRIDGE: Ethernet bridging device @@ -1652,10 +1653,6 @@ struct net_device_ops { * @IFF_NO_ADDRCONF: prevent ipv6 addrconf * @IFF_TX_SKB_NO_LINEAR: device/driver is capable of xmitting frames with * skb_headlen(skb) == 0 (data starts from frag0) - * @IFF_CHANGE_PROTO_DOWN: device supports setting carrier via IFLA_PROTO_DOWN - * @IFF_SEE_ALL_HWTSTAMP_REQUESTS: device wants to see calls to - * ndo_hwtstamp_set() for all timestamp requests regardless of source, - * even if those aren't HWTSTAMP_SOURCE_NETDEV. */ enum netdev_priv_flags { IFF_802_1Q_VLAN = 1<<0, @@ -1690,8 +1687,6 @@ enum netdev_priv_flags { IFF_L3MDEV_RX_HANDLER = 1<<29, IFF_NO_ADDRCONF = BIT_ULL(30), IFF_TX_SKB_NO_LINEAR = BIT_ULL(31), - IFF_CHANGE_PROTO_DOWN = BIT_ULL(32), - IFF_SEE_ALL_HWTSTAMP_REQUESTS = BIT_ULL(33), }; /* Specifies the type of the struct net_device::ml_priv pointer */ @@ -1723,6 +1718,9 @@ enum netdev_reg_state { * data with strictly "high-level" data, and it has to know about * almost every data structure used in the INET module. * + * @priv_flags: flags invisible to userspace defined as bits, see + * enum netdev_priv_flags for the definitions + * * @name: This is the first field of the "visible" part of this structure * (i.e. as seen by users in the "Space.c" file). It is the name * of the interface. @@ -1789,8 +1787,6 @@ enum netdev_reg_state { * * @flags: Interface flags (a la BSD) * @xdp_features: XDP capability supported by the device - * @priv_flags: Like 'flags' but invisible to userspace, - * see if.h for the definitions * @gflags: Global flags ( kept as legacy ) * @priv_len: Size of the ->priv flexible array * @priv: Flexible array containing private data @@ -1964,6 +1960,12 @@ enum netdev_reg_state { * * @threaded: napi threaded mode is enabled * + * @see_all_hwtstamp_requests: device wants to see calls to + * ndo_hwtstamp_set() for all timestamp requests + * regardless of source, even if those aren't + * HWTSTAMP_SOURCE_NETDEV + * @change_proto_down: device supports setting carrier via IFLA_PROTO_DOWN + * * @net_notifier_list: List of per-net netdev notifier block * that follow this device when it is moved * to another network namespace. @@ -2014,7 +2016,9 @@ struct net_device { /* TX read-mostly hotpath */ __cacheline_group_begin(net_device_read_tx); - unsigned long long priv_flags; + struct_group(priv_flags_fast, + unsigned long priv_flags:32; + ); const struct net_device_ops *netdev_ops; const struct header_ops *header_ops; struct netdev_queue *_tx; @@ -2350,6 +2354,10 @@ struct net_device { bool proto_down; bool threaded; + /* priv_flags_slow, ungrouped to save space */ + unsigned long see_all_hwtstamp_requests:1; + unsigned long change_proto_down:1; + struct list_head net_notifier_list; #if IS_ENABLED(CONFIG_MACSEC) diff --git a/net/8021q/vlanproc.c b/net/8021q/vlanproc.c index 87b959da00cd..fa67374bda49 100644 --- a/net/8021q/vlanproc.c +++ b/net/8021q/vlanproc.c @@ -238,9 +238,9 @@ static int vlandev_seq_show(struct seq_file *seq, void *offset) stats = dev_get_stats(vlandev, &temp); seq_printf(seq, - "%s VID: %d REORDER_HDR: %i dev->priv_flags: %llx\n", + "%s VID: %d REORDER_HDR: %i dev->priv_flags: %x\n", vlandev->name, vlan->vlan_id, - (int)(vlan->flags & 1), vlandev->priv_flags); + (int)(vlan->flags & 1), (u32)vlandev->priv_flags); seq_printf(seq, fmt64, "total frames received", stats->rx_packets); seq_printf(seq, fmt64, "total bytes received", stats->rx_bytes); diff --git a/net/core/dev.c b/net/core/dev.c index 932d67b975f5..56aed88ceadf 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -9274,7 +9274,7 @@ EXPORT_SYMBOL(netdev_port_same_parent_id); */ int dev_change_proto_down(struct net_device *dev, bool proto_down) { - if (!(dev->priv_flags & IFF_CHANGE_PROTO_DOWN)) + if (!dev->change_proto_down) return -EOPNOTSUPP; if (!netif_device_present(dev)) return -ENODEV; @@ -11930,7 +11930,7 @@ static struct pernet_operations __net_initdata default_device_ops = { static void __init net_dev_struct_check(void) { /* TX read-mostly hotpath */ - CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_tx, priv_flags); + CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_tx, priv_flags_fast); CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_tx, netdev_ops); CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_tx, header_ops); CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_tx, _tx); diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c index 8592c052c0f4..473c437b6b53 100644 --- a/net/core/dev_ioctl.c +++ b/net/core/dev_ioctl.c @@ -317,8 +317,7 @@ static int dev_get_hwtstamp(struct net_device *dev, struct ifreq *ifr) * should take precedence in front of hardware timestamping provided by the * netdev. If the netdev driver needs to perform specific actions even for PHY * timestamping to work properly (a switch port must trap the timestamped - * frames and not forward them), it must set IFF_SEE_ALL_HWTSTAMP_REQUESTS in - * dev->priv_flags. + * frames and not forward them), it must set dev->see_all_hwtstamp_requests. */ int dev_set_hwtstamp_phylib(struct net_device *dev, struct kernel_hwtstamp_config *cfg, @@ -332,13 +331,13 @@ int dev_set_hwtstamp_phylib(struct net_device *dev, cfg->source = phy_ts ? HWTSTAMP_SOURCE_PHYLIB : HWTSTAMP_SOURCE_NETDEV; - if (phy_ts && (dev->priv_flags & IFF_SEE_ALL_HWTSTAMP_REQUESTS)) { + if (phy_ts && dev->see_all_hwtstamp_requests) { err = ops->ndo_hwtstamp_get(dev, &old_cfg); if (err) return err; } - if (!phy_ts || (dev->priv_flags & IFF_SEE_ALL_HWTSTAMP_REQUESTS)) { + if (!phy_ts || dev->see_all_hwtstamp_requests) { err = ops->ndo_hwtstamp_set(dev, cfg, extack); if (err) { if (extack->_msg) @@ -347,7 +346,7 @@ int dev_set_hwtstamp_phylib(struct net_device *dev, } } - if (phy_ts && (dev->priv_flags & IFF_SEE_ALL_HWTSTAMP_REQUESTS)) + if (phy_ts && dev->see_all_hwtstamp_requests) changed = kernel_hwtstamp_config_changed(&old_cfg, cfg); if (phy_ts) { diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index cd9487a12d1a..f0a520987085 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -2724,7 +2724,7 @@ static int do_set_proto_down(struct net_device *dev, bool proto_down; int err; - if (!(dev->priv_flags & IFF_CHANGE_PROTO_DOWN)) { + if (!dev->change_proto_down) { NL_SET_ERR_MSG(extack, "Protodown not supported by device"); return -EOPNOTSUPP; } -- cgit v1.2.3 From 00d066a4d4edbe559ba6c35153da71d4b2b8a383 Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Thu, 29 Aug 2024 14:33:37 +0200 Subject: netdev_features: convert NETIF_F_LLTX to dev->lltx NETIF_F_LLTX can't be changed via Ethtool and is not a feature, rather an attribute, very similar to IFF_NO_QUEUE (and hot). Free one netdev_features_t bit and make it a "hot" private flag. Signed-off-by: Alexander Lobakin Signed-off-by: Paolo Abeni --- Documentation/networking/net_cachelines/net_device.rst | 1 + Documentation/networking/netdev-features.rst | 8 -------- Documentation/networking/netdevices.rst | 4 ++-- drivers/net/amt.c | 2 +- drivers/net/bareudp.c | 2 +- drivers/net/bonding/bond_main.c | 2 +- drivers/net/dummy.c | 3 ++- drivers/net/ethernet/chelsio/cxgb/cxgb2.c | 3 ++- drivers/net/ethernet/freescale/dpaa/dpaa_eth.c | 3 ++- drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c | 3 ++- drivers/net/ethernet/mellanox/mlxsw/spectrum.c | 3 ++- drivers/net/ethernet/netronome/nfp/nfp_net_repr.c | 3 +-- drivers/net/ethernet/pasemi/pasemi_mac.c | 5 +++-- drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c | 2 +- drivers/net/ethernet/sfc/ef100_rep.c | 4 ++-- drivers/net/ethernet/tehuti/tehuti.c | 4 ++-- drivers/net/ethernet/tehuti/tehuti.h | 2 +- drivers/net/ethernet/toshiba/spider_net.c | 3 ++- drivers/net/geneve.c | 2 +- drivers/net/gtp.c | 2 +- drivers/net/hamradio/bpqether.c | 2 +- drivers/net/ipvlan/ipvlan_main.c | 3 ++- drivers/net/loopback.c | 2 +- drivers/net/macsec.c | 4 ++-- drivers/net/macvlan.c | 3 ++- drivers/net/net_failover.c | 2 +- drivers/net/netkit.c | 3 ++- drivers/net/nlmon.c | 4 ++-- drivers/net/ppp/ppp_generic.c | 2 +- drivers/net/rionet.c | 2 +- drivers/net/team/team_core.c | 2 +- drivers/net/tun.c | 5 +++-- drivers/net/veth.c | 2 +- drivers/net/vrf.c | 2 +- drivers/net/vsockmon.c | 4 ++-- drivers/net/vxlan/vxlan_core.c | 2 +- drivers/net/wireguard/device.c | 2 +- drivers/staging/octeon/ethernet.c | 2 +- include/linux/netdev_features.h | 6 ++---- include/linux/netdevice.h | 10 +++++++--- lib/test_bpf.c | 3 +-- net/8021q/vlan_dev.c | 4 ++-- net/batman-adv/soft-interface.c | 2 +- net/bridge/br_device.c | 3 ++- net/core/net-sysfs.c | 3 +-- net/dsa/user.c | 3 ++- net/ethtool/common.c | 1 - net/hsr/hsr_device.c | 4 ++-- net/ipv4/ip_gre.c | 4 +++- net/ipv4/ip_vti.c | 2 +- net/ipv4/ipip.c | 2 +- net/ipv6/ip6_gre.c | 4 +++- net/ipv6/ip6_tunnel.c | 2 +- net/ipv6/sit.c | 2 +- net/l2tp/l2tp_eth.c | 2 +- net/openvswitch/vport-internal_dev.c | 9 +++++---- net/xfrm/xfrm_interface_core.c | 2 +- 57 files changed, 93 insertions(+), 84 deletions(-) (limited to 'include/linux/netdevice.h') diff --git a/Documentation/networking/net_cachelines/net_device.rst b/Documentation/networking/net_cachelines/net_device.rst index 1fe3cdfe3582..e55319277074 100644 --- a/Documentation/networking/net_cachelines/net_device.rst +++ b/Documentation/networking/net_cachelines/net_device.rst @@ -8,6 +8,7 @@ net_device struct fast path usage breakdown Type Name fastpath_tx_access fastpath_rx_access Comments ..struct ..net_device unsigned_long:32 priv_flags read_mostly - __dev_queue_xmit(tx) +unsigned_long:1 lltx read_mostly - HARD_TX_LOCK,HARD_TX_TRYLOCK,HARD_TX_UNLOCK(tx) char name[16] - - struct_netdev_name_node* name_node struct_dev_ifalias* ifalias diff --git a/Documentation/networking/netdev-features.rst b/Documentation/networking/netdev-features.rst index d7b15bb64deb..f29d982ebf5d 100644 --- a/Documentation/networking/netdev-features.rst +++ b/Documentation/networking/netdev-features.rst @@ -139,14 +139,6 @@ chained skbs (skb->next/prev list). Features contained in NETIF_F_SOFT_FEATURES are features of networking stack. Driver should not change behaviour based on them. - * LLTX driver (deprecated for hardware drivers) - -NETIF_F_LLTX is meant to be used by drivers that don't need locking at all, -e.g. software tunnels. - -This is also used in a few legacy drivers that implement their -own locking, don't use it for new (hardware) drivers. - * netns-local device NETIF_F_NETNS_LOCAL is set for devices that are not allowed to move between diff --git a/Documentation/networking/netdevices.rst b/Documentation/networking/netdevices.rst index c2476917a6c3..857c9784f87e 100644 --- a/Documentation/networking/netdevices.rst +++ b/Documentation/networking/netdevices.rst @@ -258,11 +258,11 @@ ndo_get_stats: ndo_start_xmit: Synchronization: __netif_tx_lock spinlock. - When the driver sets NETIF_F_LLTX in dev->features this will be + When the driver sets dev->lltx this will be called without holding netif_tx_lock. In this case the driver has to lock by itself when needed. The locking there should also properly protect against - set_rx_mode. WARNING: use of NETIF_F_LLTX is deprecated. + set_rx_mode. WARNING: use of dev->lltx is deprecated. Don't use it for new drivers. Context: Process with BHs disabled or BH (timer), diff --git a/drivers/net/amt.c b/drivers/net/amt.c index 6d15ab3bfbbc..921bbfd72a38 100644 --- a/drivers/net/amt.c +++ b/drivers/net/amt.c @@ -3098,7 +3098,7 @@ static void amt_link_setup(struct net_device *dev) dev->hard_header_len = 0; dev->addr_len = 0; dev->priv_flags |= IFF_NO_QUEUE; - dev->features |= NETIF_F_LLTX; + dev->lltx = true; dev->features |= NETIF_F_GSO_SOFTWARE; dev->features |= NETIF_F_NETNS_LOCAL; dev->hw_features |= NETIF_F_SG | NETIF_F_HW_CSUM; diff --git a/drivers/net/bareudp.c b/drivers/net/bareudp.c index d5c56ca91b77..6f4de883e872 100644 --- a/drivers/net/bareudp.c +++ b/drivers/net/bareudp.c @@ -553,7 +553,6 @@ static void bareudp_setup(struct net_device *dev) SET_NETDEV_DEVTYPE(dev, &bareudp_type); dev->features |= NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_FRAGLIST; dev->features |= NETIF_F_RXCSUM; - dev->features |= NETIF_F_LLTX; dev->features |= NETIF_F_GSO_SOFTWARE; dev->hw_features |= NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_FRAGLIST; dev->hw_features |= NETIF_F_RXCSUM; @@ -566,6 +565,7 @@ static void bareudp_setup(struct net_device *dev) dev->type = ARPHRD_NONE; netif_keep_dst(dev); dev->priv_flags |= IFF_NO_QUEUE; + dev->lltx = true; dev->flags = IFF_POINTOPOINT | IFF_NOARP | IFF_MULTICAST; dev->pcpu_stat_type = NETDEV_PCPU_STAT_TSTATS; } diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index cb6a694313d5..8ae887cdc231 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -5928,7 +5928,7 @@ void bond_setup(struct net_device *bond_dev) #endif /* CONFIG_XFRM_OFFLOAD */ /* don't acquire bond device's netif_tx_lock when transmitting */ - bond_dev->features |= NETIF_F_LLTX; + bond_dev->lltx = true; /* By default, we declare the bond to be fully * VLAN hardware accelerated capable. Special diff --git a/drivers/net/dummy.c b/drivers/net/dummy.c index d29b5d7af0d7..e9c5e1e11fa0 100644 --- a/drivers/net/dummy.c +++ b/drivers/net/dummy.c @@ -109,9 +109,10 @@ static void dummy_setup(struct net_device *dev) dev->flags |= IFF_NOARP; dev->flags &= ~IFF_MULTICAST; dev->priv_flags |= IFF_LIVE_ADDR_CHANGE | IFF_NO_QUEUE; + dev->lltx = true; dev->features |= NETIF_F_SG | NETIF_F_FRAGLIST; dev->features |= NETIF_F_GSO_SOFTWARE; - dev->features |= NETIF_F_HW_CSUM | NETIF_F_HIGHDMA | NETIF_F_LLTX; + dev->features |= NETIF_F_HW_CSUM | NETIF_F_HIGHDMA; dev->features |= NETIF_F_GSO_ENCAP_ALL; dev->hw_features |= dev->features; dev->hw_enc_features |= dev->features; diff --git a/drivers/net/ethernet/chelsio/cxgb/cxgb2.c b/drivers/net/ethernet/chelsio/cxgb/cxgb2.c index 7d7d3e0098df..3b7068832f95 100644 --- a/drivers/net/ethernet/chelsio/cxgb/cxgb2.c +++ b/drivers/net/ethernet/chelsio/cxgb/cxgb2.c @@ -1034,7 +1034,8 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent) netdev->hw_features |= NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_RXCSUM; netdev->features |= NETIF_F_SG | NETIF_F_IP_CSUM | - NETIF_F_RXCSUM | NETIF_F_LLTX | NETIF_F_HIGHDMA; + NETIF_F_RXCSUM | NETIF_F_HIGHDMA; + netdev->lltx = true; if (vlan_tso_capable(adapter)) { netdev->features |= diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c index 5d99cfb4e994..1d0208b5db7d 100644 --- a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c +++ b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c @@ -229,7 +229,7 @@ static int dpaa_netdev_init(struct net_device *net_dev, net_dev->max_mtu = dpaa_get_max_mtu(); net_dev->hw_features |= (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | - NETIF_F_LLTX | NETIF_F_RXHASH); + NETIF_F_RXHASH); net_dev->hw_features |= NETIF_F_SG | NETIF_F_HIGHDMA; /* The kernels enables GSO automatically, if we declare NETIF_F_SG. @@ -239,6 +239,7 @@ static int dpaa_netdev_init(struct net_device *net_dev, net_dev->features |= NETIF_F_RXCSUM; net_dev->priv_flags |= IFF_LIVE_ADDR_CHANGE; + net_dev->lltx = true; /* we do not want shared skbs on TX */ net_dev->priv_flags &= ~IFF_TX_SKB_SHARING; diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c index 6866807973da..29886a8ba73f 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c @@ -4594,12 +4594,13 @@ static int dpaa2_eth_netdev_init(struct net_device *net_dev) net_dev->priv_flags |= supported; net_dev->priv_flags &= ~not_supported; + net_dev->lltx = true; /* Features */ net_dev->features = NETIF_F_RXCSUM | NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | NETIF_F_SG | NETIF_F_HIGHDMA | - NETIF_F_LLTX | NETIF_F_HW_TC | NETIF_F_TSO; + NETIF_F_HW_TC | NETIF_F_TSO; net_dev->gso_max_segs = DPAA2_ETH_ENQUEUE_MAX_FDS; net_dev->hw_features = net_dev->features; net_dev->xdp_features = NETDEV_XDP_ACT_BASIC | diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index f064789f3240..44d6e125bd6f 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -1676,9 +1676,10 @@ static int mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u16 local_port, netif_carrier_off(dev); - dev->features |= NETIF_F_NETNS_LOCAL | NETIF_F_LLTX | NETIF_F_SG | + dev->features |= NETIF_F_NETNS_LOCAL | NETIF_F_SG | NETIF_F_HW_VLAN_CTAG_FILTER | NETIF_F_HW_TC; dev->hw_features |= NETIF_F_HW_TC | NETIF_F_LOOPBACK; + dev->lltx = true; dev->min_mtu = ETH_MIN_MTU; dev->max_mtu = MLXSW_PORT_MAX_MTU - MLXSW_PORT_ETH_FRAME_HDR; diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_repr.c b/drivers/net/ethernet/netronome/nfp/nfp_net_repr.c index eee0bfc41074..227e7a5d712e 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_repr.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_repr.c @@ -248,7 +248,6 @@ nfp_repr_fix_features(struct net_device *netdev, netdev_features_t features) features = netdev_intersect_features(features, lower_features); features |= old_features & (NETIF_F_SOFT_FEATURES | NETIF_F_HW_TC); - features |= NETIF_F_LLTX; return features; } @@ -386,7 +385,7 @@ int nfp_repr_init(struct nfp_app *app, struct net_device *netdev, netif_set_tso_max_segs(netdev, NFP_NET_LSO_MAX_SEGS); netdev->priv_flags |= IFF_NO_QUEUE | IFF_DISABLE_NETPOLL; - netdev->features |= NETIF_F_LLTX; + netdev->lltx = true; if (nfp_app_has_tc(app)) { netdev->features |= NETIF_F_HW_TC; diff --git a/drivers/net/ethernet/pasemi/pasemi_mac.c b/drivers/net/ethernet/pasemi/pasemi_mac.c index 62ba269da902..cb4e12df7719 100644 --- a/drivers/net/ethernet/pasemi/pasemi_mac.c +++ b/drivers/net/ethernet/pasemi/pasemi_mac.c @@ -1699,8 +1699,9 @@ pasemi_mac_probe(struct pci_dev *pdev, const struct pci_device_id *ent) netif_napi_add(dev, &mac->napi, pasemi_mac_poll); - dev->features = NETIF_F_IP_CSUM | NETIF_F_LLTX | NETIF_F_SG | - NETIF_F_HIGHDMA | NETIF_F_GSO; + dev->features = NETIF_F_IP_CSUM | NETIF_F_SG | NETIF_F_HIGHDMA | + NETIF_F_GSO; + dev->lltx = true; mac->dma_pdev = pci_get_device(PCI_VENDOR_ID_PASEMI, 0xa007, NULL); if (!mac->dma_pdev) { diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c index f1e40aade127..4f0ddcedfa97 100644 --- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c +++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c @@ -286,7 +286,7 @@ void rmnet_vnd_setup(struct net_device *rmnet_dev) rmnet_dev->needs_free_netdev = true; rmnet_dev->ethtool_ops = &rmnet_ethtool_ops; - rmnet_dev->features |= NETIF_F_LLTX; + rmnet_dev->lltx = true; /* This perm addr will be used as interface identifier by IPv6 */ rmnet_dev->addr_assign_type = NET_ADDR_RANDOM; diff --git a/drivers/net/ethernet/sfc/ef100_rep.c b/drivers/net/ethernet/sfc/ef100_rep.c index 0b3083ef0ead..e923e1796369 100644 --- a/drivers/net/ethernet/sfc/ef100_rep.c +++ b/drivers/net/ethernet/sfc/ef100_rep.c @@ -233,8 +233,8 @@ static struct efx_rep *efx_ef100_rep_create_netdev(struct efx_nic *efx, net_dev->ethtool_ops = &efx_ef100_rep_ethtool_ops; net_dev->min_mtu = EFX_MIN_MTU; net_dev->max_mtu = EFX_MAX_MTU; - net_dev->features |= NETIF_F_LLTX; - net_dev->hw_features |= NETIF_F_LLTX; + net_dev->lltx = true; + return efv; fail1: free_netdev(net_dev); diff --git a/drivers/net/ethernet/tehuti/tehuti.c b/drivers/net/ethernet/tehuti/tehuti.c index ede5f7890fb4..fc77f424f90b 100644 --- a/drivers/net/ethernet/tehuti/tehuti.c +++ b/drivers/net/ethernet/tehuti/tehuti.c @@ -1671,7 +1671,7 @@ static netdev_tx_t bdx_tx_transmit(struct sk_buff *skb, #endif #ifdef BDX_LLTX - netif_trans_update(ndev); /* NETIF_F_LLTX driver :( */ + netif_trans_update(ndev); /* dev->lltx driver :( */ #endif ndev->stats.tx_packets++; ndev->stats.tx_bytes += skb->len; @@ -2019,7 +2019,7 @@ bdx_probe(struct pci_dev *pdev, const struct pci_device_id *ent) * set multicast list callback has to use priv->tx_lock. */ #ifdef BDX_LLTX - ndev->features |= NETIF_F_LLTX; + ndev->lltx = true; #endif /* MTU range: 60 - 16384 */ ndev->min_mtu = ETH_ZLEN; diff --git a/drivers/net/ethernet/tehuti/tehuti.h b/drivers/net/ethernet/tehuti/tehuti.h index 909e7296cecf..47a2d3e5f8ed 100644 --- a/drivers/net/ethernet/tehuti/tehuti.h +++ b/drivers/net/ethernet/tehuti/tehuti.h @@ -260,7 +260,7 @@ struct bdx_priv { int tx_update_mark; int tx_noupd; #endif - spinlock_t tx_lock; /* NETIF_F_LLTX mode */ + spinlock_t tx_lock; /* dev->lltx mode */ /* rarely used */ u8 port; diff --git a/drivers/net/ethernet/toshiba/spider_net.c b/drivers/net/ethernet/toshiba/spider_net.c index 87e67121477c..a4937c18d7cb 100644 --- a/drivers/net/ethernet/toshiba/spider_net.c +++ b/drivers/net/ethernet/toshiba/spider_net.c @@ -2277,10 +2277,11 @@ spider_net_setup_netdev(struct spider_net_card *card) netdev->hw_features = NETIF_F_RXCSUM | NETIF_F_IP_CSUM; if (SPIDER_NET_RX_CSUM_DEFAULT) netdev->features |= NETIF_F_RXCSUM; - netdev->features |= NETIF_F_IP_CSUM | NETIF_F_LLTX; + netdev->features |= NETIF_F_IP_CSUM; /* some time: NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX | * NETIF_F_HW_VLAN_CTAG_FILTER */ + netdev->lltx = true; /* MTU range: 64 - 2294 */ netdev->min_mtu = SPIDER_NET_MIN_MTU; diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c index 838e85ddec67..7f611c74eb62 100644 --- a/drivers/net/geneve.c +++ b/drivers/net/geneve.c @@ -1194,7 +1194,6 @@ static void geneve_setup(struct net_device *dev) SET_NETDEV_DEVTYPE(dev, &geneve_type); - dev->features |= NETIF_F_LLTX; dev->features |= NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_FRAGLIST; dev->features |= NETIF_F_RXCSUM; dev->features |= NETIF_F_GSO_SOFTWARE; @@ -1215,6 +1214,7 @@ static void geneve_setup(struct net_device *dev) netif_keep_dst(dev); dev->priv_flags &= ~IFF_TX_SKB_SHARING; dev->priv_flags |= IFF_LIVE_ADDR_CHANGE | IFF_NO_QUEUE; + dev->lltx = true; eth_hw_addr_random(dev); } diff --git a/drivers/net/gtp.c b/drivers/net/gtp.c index 2e94d10348cc..a60bfb1abb7f 100644 --- a/drivers/net/gtp.c +++ b/drivers/net/gtp.c @@ -1356,7 +1356,7 @@ static void gtp_link_setup(struct net_device *dev) dev->pcpu_stat_type = NETDEV_PCPU_STAT_TSTATS; dev->priv_flags |= IFF_NO_QUEUE; - dev->features |= NETIF_F_LLTX; + dev->lltx = true; netif_keep_dst(dev); dev->needed_headroom = LL_MAX_HEADER + GTP_IPV4_MAXLEN; diff --git a/drivers/net/hamradio/bpqether.c b/drivers/net/hamradio/bpqether.c index 83a16d10eedb..bac1bb69d63a 100644 --- a/drivers/net/hamradio/bpqether.c +++ b/drivers/net/hamradio/bpqether.c @@ -458,7 +458,7 @@ static void bpq_setup(struct net_device *dev) dev->needs_free_netdev = true; dev->flags = 0; - dev->features = NETIF_F_LLTX; /* Allow recursion */ + dev->lltx = true; /* Allow recursion */ #if IS_ENABLED(CONFIG_AX25) dev->header_ops = &ax25_header_ops; diff --git a/drivers/net/ipvlan/ipvlan_main.c b/drivers/net/ipvlan/ipvlan_main.c index 094f44dac5c8..ee2c3cf4df36 100644 --- a/drivers/net/ipvlan/ipvlan_main.c +++ b/drivers/net/ipvlan/ipvlan_main.c @@ -114,7 +114,7 @@ static void ipvlan_port_destroy(struct net_device *dev) NETIF_F_GSO_ROBUST | NETIF_F_GSO_SOFTWARE | NETIF_F_GSO_ENCAP_ALL) #define IPVLAN_ALWAYS_ON \ - (IPVLAN_ALWAYS_ON_OFLOADS | NETIF_F_LLTX | NETIF_F_VLAN_CHALLENGED) + (IPVLAN_ALWAYS_ON_OFLOADS | NETIF_F_VLAN_CHALLENGED) #define IPVLAN_FEATURES \ (NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_HIGHDMA | NETIF_F_FRAGLIST | \ @@ -141,6 +141,7 @@ static int ipvlan_init(struct net_device *dev) dev->vlan_features = phy_dev->vlan_features & IPVLAN_FEATURES; dev->vlan_features |= IPVLAN_ALWAYS_ON_OFLOADS; dev->hw_enc_features |= dev->features; + dev->lltx = true; netif_inherit_tso_max(dev, phy_dev); dev->hard_header_len = phy_dev->hard_header_len; diff --git a/drivers/net/loopback.c b/drivers/net/loopback.c index 2b486e7c749c..bf857782be0f 100644 --- a/drivers/net/loopback.c +++ b/drivers/net/loopback.c @@ -171,6 +171,7 @@ static void gen_lo_setup(struct net_device *dev, dev->type = ARPHRD_LOOPBACK; /* 0x0001*/ dev->flags = IFF_LOOPBACK; dev->priv_flags |= IFF_LIVE_ADDR_CHANGE | IFF_NO_QUEUE; + dev->lltx = true; netif_keep_dst(dev); dev->hw_features = NETIF_F_GSO_SOFTWARE; dev->features = NETIF_F_SG | NETIF_F_FRAGLIST @@ -179,7 +180,6 @@ static void gen_lo_setup(struct net_device *dev, | NETIF_F_RXCSUM | NETIF_F_SCTP_CRC | NETIF_F_HIGHDMA - | NETIF_F_LLTX | NETIF_F_NETNS_LOCAL | NETIF_F_VLAN_CHALLENGED | NETIF_F_LOOPBACK; diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c index 2da70bc3dd86..12d1b205f6d1 100644 --- a/drivers/net/macsec.c +++ b/drivers/net/macsec.c @@ -3550,7 +3550,8 @@ static int macsec_dev_init(struct net_device *dev) return err; dev->features = real_dev->features & MACSEC_FEATURES; - dev->features |= NETIF_F_LLTX | NETIF_F_GSO_SOFTWARE; + dev->features |= NETIF_F_GSO_SOFTWARE; + dev->lltx = true; dev->pcpu_stat_type = NETDEV_PCPU_STAT_TSTATS; macsec_set_head_tail_room(dev); @@ -3581,7 +3582,6 @@ static netdev_features_t macsec_fix_features(struct net_device *dev, features &= (real_dev->features & MACSEC_FEATURES) | NETIF_F_GSO_SOFTWARE | NETIF_F_SOFT_FEATURES; - features |= NETIF_F_LLTX; return features; } diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index b45f137f365e..cf18e66de142 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -900,7 +900,7 @@ static struct lock_class_key macvlan_netdev_addr_lock_key; (NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_GSO_SOFTWARE | \ NETIF_F_GSO_ROBUST | NETIF_F_GSO_ENCAP_ALL) -#define ALWAYS_ON_FEATURES (ALWAYS_ON_OFFLOADS | NETIF_F_LLTX) +#define ALWAYS_ON_FEATURES ALWAYS_ON_OFFLOADS #define MACVLAN_FEATURES \ (NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_HIGHDMA | NETIF_F_FRAGLIST | \ @@ -932,6 +932,7 @@ static int macvlan_init(struct net_device *dev) dev->vlan_features = lowerdev->vlan_features & MACVLAN_FEATURES; dev->vlan_features |= ALWAYS_ON_OFFLOADS; dev->hw_enc_features |= dev->features; + dev->lltx = true; netif_inherit_tso_max(dev, lowerdev); dev->hard_header_len = lowerdev->hard_header_len; macvlan_set_lockdep_class(dev); diff --git a/drivers/net/net_failover.c b/drivers/net/net_failover.c index 963d8b4af28d..06728385a35f 100644 --- a/drivers/net/net_failover.c +++ b/drivers/net/net_failover.c @@ -731,7 +731,7 @@ struct failover *net_failover_create(struct net_device *standby_dev) IFF_TX_SKB_SHARING); /* don't acquire failover netdev's netif_tx_lock when transmitting */ - failover_dev->features |= NETIF_F_LLTX; + failover_dev->lltx = true; /* Don't allow failover devices to change network namespaces. */ failover_dev->features |= NETIF_F_NETNS_LOCAL; diff --git a/drivers/net/netkit.c b/drivers/net/netkit.c index 16789cd446e9..79232f5cc088 100644 --- a/drivers/net/netkit.c +++ b/drivers/net/netkit.c @@ -255,11 +255,12 @@ static void netkit_setup(struct net_device *dev) dev->priv_flags |= IFF_LIVE_ADDR_CHANGE; dev->priv_flags |= IFF_PHONY_HEADROOM; dev->priv_flags |= IFF_NO_QUEUE; + dev->lltx = true; dev->ethtool_ops = &netkit_ethtool_ops; dev->netdev_ops = &netkit_netdev_ops; - dev->features |= netkit_features | NETIF_F_LLTX; + dev->features |= netkit_features; dev->hw_features = netkit_features; dev->hw_enc_features = netkit_features; dev->mpls_features = NETIF_F_HW_CSUM | NETIF_F_GSO_SOFTWARE; diff --git a/drivers/net/nlmon.c b/drivers/net/nlmon.c index e5a0987a263e..8bfd4ee5a8c4 100644 --- a/drivers/net/nlmon.c +++ b/drivers/net/nlmon.c @@ -63,13 +63,13 @@ static void nlmon_setup(struct net_device *dev) { dev->type = ARPHRD_NETLINK; dev->priv_flags |= IFF_NO_QUEUE; + dev->lltx = true; dev->netdev_ops = &nlmon_ops; dev->ethtool_ops = &nlmon_ethtool_ops; dev->needs_free_netdev = true; - dev->features = NETIF_F_SG | NETIF_F_FRAGLIST | - NETIF_F_HIGHDMA | NETIF_F_LLTX; + dev->features = NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HIGHDMA; dev->flags = IFF_NOARP; dev->pcpu_stat_type = NETDEV_PCPU_STAT_LSTATS; diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c index eb9acfcaeb09..4b2971e2bf48 100644 --- a/drivers/net/ppp/ppp_generic.c +++ b/drivers/net/ppp/ppp_generic.c @@ -1631,7 +1631,7 @@ static void ppp_setup(struct net_device *dev) dev->netdev_ops = &ppp_netdev_ops; SET_NETDEV_DEVTYPE(dev, &ppp_type); - dev->features |= NETIF_F_LLTX; + dev->lltx = true; dev->hard_header_len = PPP_HDRLEN; dev->mtu = PPP_MRU; diff --git a/drivers/net/rionet.c b/drivers/net/rionet.c index 4eececc94513..318a0ef1af50 100644 --- a/drivers/net/rionet.c +++ b/drivers/net/rionet.c @@ -515,7 +515,7 @@ static int rionet_setup_netdev(struct rio_mport *mport, struct net_device *ndev) /* MTU range: 68 - 4082 */ ndev->min_mtu = ETH_MIN_MTU; ndev->max_mtu = RIONET_MAX_MTU; - ndev->features = NETIF_F_LLTX; + ndev->lltx = true; SET_NETDEV_DEV(ndev, &mport->dev); ndev->ethtool_ops = &rionet_ethtool_ops; diff --git a/drivers/net/team/team_core.c b/drivers/net/team/team_core.c index ab1935a4aa2c..1d1bad3cedc2 100644 --- a/drivers/net/team/team_core.c +++ b/drivers/net/team/team_core.c @@ -2189,8 +2189,8 @@ static void team_setup(struct net_device *dev) * Let this up to underlay drivers. */ dev->priv_flags |= IFF_UNICAST_FLT | IFF_LIVE_ADDR_CHANGE; + dev->lltx = true; - dev->features |= NETIF_F_LLTX; dev->features |= NETIF_F_GRO; /* Don't allow team devices to change network namespaces. */ diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 1d06c560c5e6..a645c36f2cee 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -990,10 +990,11 @@ static int tun_net_init(struct net_device *dev) dev->hw_features = NETIF_F_SG | NETIF_F_FRAGLIST | TUN_USER_FEATURES | NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_STAG_TX; - dev->features = dev->hw_features | NETIF_F_LLTX; + dev->features = dev->hw_features; dev->vlan_features = dev->features & ~(NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_STAG_TX); + dev->lltx = true; tun->flags = (tun->flags & ~TUN_FEATURES) | (ifr->ifr_flags & TUN_FEATURES); @@ -1129,7 +1130,7 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev) goto drop; } - /* NETIF_F_LLTX requires to do our own update of trans_start */ + /* dev->lltx requires to do our own update of trans_start */ queue = netdev_get_tx_queue(dev, txq); txq_trans_cond_update(queue); diff --git a/drivers/net/veth.c b/drivers/net/veth.c index 34499b91a8bd..18148e068aa0 100644 --- a/drivers/net/veth.c +++ b/drivers/net/veth.c @@ -1697,11 +1697,11 @@ static void veth_setup(struct net_device *dev) dev->priv_flags |= IFF_NO_QUEUE; dev->priv_flags |= IFF_PHONY_HEADROOM; dev->priv_flags |= IFF_DISABLE_NETPOLL; + dev->lltx = true; dev->netdev_ops = &veth_netdev_ops; dev->xdp_metadata_ops = &veth_xdp_metadata_ops; dev->ethtool_ops = &veth_ethtool_ops; - dev->features |= NETIF_F_LLTX; dev->features |= VETH_FEATURES; dev->vlan_features = dev->features & ~(NETIF_F_HW_VLAN_CTAG_TX | diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index a900908eb24a..b1d4ef538995 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -1635,7 +1635,7 @@ static void vrf_setup(struct net_device *dev) eth_hw_addr_random(dev); /* don't acquire vrf device's netif_tx_lock when transmitting */ - dev->features |= NETIF_F_LLTX; + dev->lltx = true; /* don't allow vrf devices to change network namespaces. */ dev->features |= NETIF_F_NETNS_LOCAL; diff --git a/drivers/net/vsockmon.c b/drivers/net/vsockmon.c index 4c260074c091..53fb76d574c6 100644 --- a/drivers/net/vsockmon.c +++ b/drivers/net/vsockmon.c @@ -83,13 +83,13 @@ static void vsockmon_setup(struct net_device *dev) { dev->type = ARPHRD_VSOCKMON; dev->priv_flags |= IFF_NO_QUEUE; + dev->lltx = true; dev->netdev_ops = &vsockmon_ops; dev->ethtool_ops = &vsockmon_ethtool_ops; dev->needs_free_netdev = true; - dev->features = NETIF_F_SG | NETIF_F_FRAGLIST | - NETIF_F_HIGHDMA | NETIF_F_LLTX; + dev->features = NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HIGHDMA; dev->flags = IFF_NOARP; diff --git a/drivers/net/vxlan/vxlan_core.c b/drivers/net/vxlan/vxlan_core.c index 4a54dd1950c1..53dcb9fffc04 100644 --- a/drivers/net/vxlan/vxlan_core.c +++ b/drivers/net/vxlan/vxlan_core.c @@ -3321,7 +3321,6 @@ static void vxlan_setup(struct net_device *dev) dev->needs_free_netdev = true; SET_NETDEV_DEVTYPE(dev, &vxlan_type); - dev->features |= NETIF_F_LLTX; dev->features |= NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_FRAGLIST; dev->features |= NETIF_F_RXCSUM; dev->features |= NETIF_F_GSO_SOFTWARE; @@ -3333,6 +3332,7 @@ static void vxlan_setup(struct net_device *dev) netif_keep_dst(dev); dev->priv_flags |= IFF_NO_QUEUE; dev->change_proto_down = true; + dev->lltx = true; /* MTU range: 68 - 65535 */ dev->min_mtu = ETH_MIN_MTU; diff --git a/drivers/net/wireguard/device.c b/drivers/net/wireguard/device.c index 3feb36ee5bfb..45e9b908dbfb 100644 --- a/drivers/net/wireguard/device.c +++ b/drivers/net/wireguard/device.c @@ -289,7 +289,7 @@ static void wg_setup(struct net_device *dev) dev->type = ARPHRD_NONE; dev->flags = IFF_POINTOPOINT | IFF_NOARP; dev->priv_flags |= IFF_NO_QUEUE; - dev->features |= NETIF_F_LLTX; + dev->lltx = true; dev->features |= WG_NETDEV_FEATURES; dev->hw_features |= WG_NETDEV_FEATURES; dev->hw_enc_features |= WG_NETDEV_FEATURES; diff --git a/drivers/staging/octeon/ethernet.c b/drivers/staging/octeon/ethernet.c index 9eee28f2940c..a5e99cc78a45 100644 --- a/drivers/staging/octeon/ethernet.c +++ b/drivers/staging/octeon/ethernet.c @@ -425,7 +425,7 @@ int cvm_oct_common_init(struct net_device *dev) dev->features |= NETIF_F_SG | NETIF_F_IP_CSUM; /* We do our own locking, Linux doesn't need to */ - dev->features |= NETIF_F_LLTX; + dev->lltx = true; dev->ethtool_ops = &cvm_oct_ethtool_ops; cvm_oct_set_mac_filter(dev); diff --git a/include/linux/netdev_features.h b/include/linux/netdev_features.h index 7c2d77d75a88..a2e20b517584 100644 --- a/include/linux/netdev_features.h +++ b/include/linux/netdev_features.h @@ -24,8 +24,7 @@ enum { NETIF_F_HW_VLAN_CTAG_FILTER_BIT,/* Receive filtering on VLAN CTAGs */ NETIF_F_VLAN_CHALLENGED_BIT, /* Device cannot handle VLAN packets */ NETIF_F_GSO_BIT, /* Enable software GSO. */ - NETIF_F_LLTX_BIT, /* LockLess TX - deprecated. Please */ - /* do not use LLTX in new drivers */ + __UNUSED_NETIF_F_12, NETIF_F_NETNS_LOCAL_BIT, /* Does not change network namespaces */ NETIF_F_GRO_BIT, /* Generic receive offload */ NETIF_F_LRO_BIT, /* large receive offload */ @@ -120,7 +119,6 @@ enum { #define NETIF_F_HW_VLAN_CTAG_TX __NETIF_F(HW_VLAN_CTAG_TX) #define NETIF_F_IP_CSUM __NETIF_F(IP_CSUM) #define NETIF_F_IPV6_CSUM __NETIF_F(IPV6_CSUM) -#define NETIF_F_LLTX __NETIF_F(LLTX) #define NETIF_F_LOOPBACK __NETIF_F(LOOPBACK) #define NETIF_F_LRO __NETIF_F(LRO) #define NETIF_F_NETNS_LOCAL __NETIF_F(NETNS_LOCAL) @@ -193,7 +191,7 @@ static inline int find_next_netdev_feature(u64 feature, unsigned long start) /* Features valid for ethtool to change */ /* = all defined minus driver/device-class-related */ #define NETIF_F_NEVER_CHANGE (NETIF_F_VLAN_CHALLENGED | \ - NETIF_F_LLTX | NETIF_F_NETNS_LOCAL) + NETIF_F_NETNS_LOCAL) /* remember that ((t)1 << t_BITS) is undefined in C99 */ #define NETIF_F_ETHTOOL_BITS ((__NETIF_F_BIT(NETDEV_FEATURE_COUNT - 1) | \ diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index d6f35c9d8580..e22ca5e07490 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1720,6 +1720,9 @@ enum netdev_reg_state { * * @priv_flags: flags invisible to userspace defined as bits, see * enum netdev_priv_flags for the definitions + * @lltx: device supports lockless Tx. Deprecated for real HW + * drivers. Mainly used by logical interfaces, such as + * bonding and tunnels * * @name: This is the first field of the "visible" part of this structure * (i.e. as seen by users in the "Space.c" file). It is the name @@ -2018,6 +2021,7 @@ struct net_device { __cacheline_group_begin(net_device_read_tx); struct_group(priv_flags_fast, unsigned long priv_flags:32; + unsigned long lltx:1; ); const struct net_device_ops *netdev_ops; const struct header_ops *header_ops; @@ -4433,7 +4437,7 @@ static inline void netif_tx_unlock_bh(struct net_device *dev) } #define HARD_TX_LOCK(dev, txq, cpu) { \ - if ((dev->features & NETIF_F_LLTX) == 0) { \ + if (!(dev)->lltx) { \ __netif_tx_lock(txq, cpu); \ } else { \ __netif_tx_acquire(txq); \ @@ -4441,12 +4445,12 @@ static inline void netif_tx_unlock_bh(struct net_device *dev) } #define HARD_TX_TRYLOCK(dev, txq) \ - (((dev->features & NETIF_F_LLTX) == 0) ? \ + (!(dev)->lltx ? \ __netif_tx_trylock(txq) : \ __netif_tx_acquire(txq)) #define HARD_TX_UNLOCK(dev, txq) { \ - if ((dev->features & NETIF_F_LLTX) == 0) { \ + if (!(dev)->lltx) { \ __netif_tx_unlock(txq); \ } else { \ __netif_tx_release(txq); \ diff --git a/lib/test_bpf.c b/lib/test_bpf.c index ca4b0eea81a2..fa5edd6ef7f7 100644 --- a/lib/test_bpf.c +++ b/lib/test_bpf.c @@ -15077,8 +15077,7 @@ static struct skb_segment_test skb_segment_tests[] __initconst = { .build_skb = build_test_skb_linear_no_head_frag, .features = NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_GSO | - NETIF_F_LLTX | NETIF_F_GRO | - NETIF_F_IPV6_CSUM | NETIF_F_RXCSUM | + NETIF_F_GRO | NETIF_F_IPV6_CSUM | NETIF_F_RXCSUM | NETIF_F_HW_VLAN_STAG_TX } }; diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index 217be32426b5..3ca485537d77 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -569,7 +569,8 @@ static int vlan_dev_init(struct net_device *dev) if (real_dev->vlan_features & NETIF_F_HW_MACSEC) dev->hw_features |= NETIF_F_HW_MACSEC; - dev->features |= dev->hw_features | NETIF_F_LLTX; + dev->features |= dev->hw_features; + dev->lltx = true; netif_inherit_tso_max(dev, real_dev); if (dev->features & NETIF_F_VLAN_FEATURES) netdev_warn(real_dev, "VLAN features are set incorrectly. Q-in-Q configurations may not work correctly.\n"); @@ -655,7 +656,6 @@ static netdev_features_t vlan_dev_fix_features(struct net_device *dev, lower_features |= NETIF_F_HW_CSUM; features = netdev_intersect_features(features, lower_features); features |= old_features & (NETIF_F_SOFT_FEATURES | NETIF_F_GSO_SOFTWARE); - features |= NETIF_F_LLTX; return features; } diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c index 30ecbc2ef1fd..e791a73ef901 100644 --- a/net/batman-adv/soft-interface.c +++ b/net/batman-adv/soft-interface.c @@ -1021,8 +1021,8 @@ static void batadv_softif_init_early(struct net_device *dev) dev->needs_free_netdev = true; dev->priv_destructor = batadv_softif_free; dev->features |= NETIF_F_HW_VLAN_CTAG_FILTER | NETIF_F_NETNS_LOCAL; - dev->features |= NETIF_F_LLTX; dev->priv_flags |= IFF_NO_QUEUE; + dev->lltx = true; /* can't call min_mtu, because the needed variables * have not been initialized yet diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index fb1115857e49..a6d25113dfb1 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -487,8 +487,9 @@ void br_dev_setup(struct net_device *dev) dev->ethtool_ops = &br_ethtool_ops; SET_NETDEV_DEVTYPE(dev, &br_type); dev->priv_flags = IFF_EBRIDGE | IFF_NO_QUEUE; + dev->lltx = true; - dev->features = COMMON_FEATURES | NETIF_F_LLTX | NETIF_F_NETNS_LOCAL | + dev->features = COMMON_FEATURES | NETIF_F_NETNS_LOCAL | NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_STAG_TX; dev->hw_features = COMMON_FEATURES | NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_STAG_TX; diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 444f23e74f8e..01a9fb54ef42 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -1764,8 +1764,7 @@ static const struct kobj_type netdev_queue_ktype = { static bool netdev_uses_bql(const struct net_device *dev) { - if (dev->features & NETIF_F_LLTX || - dev->priv_flags & IFF_NO_QUEUE) + if (dev->lltx || (dev->priv_flags & IFF_NO_QUEUE)) return false; return IS_ENABLED(CONFIG_BQL); diff --git a/net/dsa/user.c b/net/dsa/user.c index f5adfa1d978a..74eda9b30608 100644 --- a/net/dsa/user.c +++ b/net/dsa/user.c @@ -2642,11 +2642,12 @@ void dsa_user_setup_tagger(struct net_device *user) user->features = conduit->vlan_features | NETIF_F_HW_TC; user->hw_features |= NETIF_F_HW_TC; - user->features |= NETIF_F_LLTX; if (user->needed_tailroom) user->features &= ~(NETIF_F_SG | NETIF_F_FRAGLIST); if (ds->needs_standalone_vlan_filtering) user->features |= NETIF_F_HW_VLAN_CTAG_FILTER; + + user->lltx = true; } int dsa_user_suspend(struct net_device *user_dev) diff --git a/net/ethtool/common.c b/net/ethtool/common.c index 7257ae272296..447fc80e1b00 100644 --- a/net/ethtool/common.c +++ b/net/ethtool/common.c @@ -25,7 +25,6 @@ const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN] = { [NETIF_F_HW_VLAN_STAG_FILTER_BIT] = "rx-vlan-stag-filter", [NETIF_F_VLAN_CHALLENGED_BIT] = "vlan-challenged", [NETIF_F_GSO_BIT] = "tx-generic-segmentation", - [NETIF_F_LLTX_BIT] = "tx-lockless", [NETIF_F_NETNS_LOCAL_BIT] = "netns-local", [NETIF_F_GRO_BIT] = "rx-gro", [NETIF_F_GRO_HW_BIT] = "rx-gro-hw", diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c index e4cc6b78dcfc..d4c783076662 100644 --- a/net/hsr/hsr_device.c +++ b/net/hsr/hsr_device.c @@ -554,6 +554,8 @@ void hsr_dev_setup(struct net_device *dev) dev->netdev_ops = &hsr_device_ops; SET_NETDEV_DEVTYPE(dev, &hsr_type); dev->priv_flags |= IFF_NO_QUEUE | IFF_DISABLE_NETPOLL; + /* Prevent recursive tx locking */ + dev->lltx = true; dev->needs_free_netdev = true; @@ -563,8 +565,6 @@ void hsr_dev_setup(struct net_device *dev) dev->features = dev->hw_features; - /* Prevent recursive tx locking */ - dev->features |= NETIF_F_LLTX; /* VLAN on top of HSR needs testing and probably some work on * hsr_header_create() etc. */ diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index ba205473522e..b54c41f3ae3c 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -996,7 +996,7 @@ static void __gre_tunnel_init(struct net_device *dev) tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen; dev->needed_headroom = tunnel->hlen + sizeof(tunnel->parms.iph); - dev->features |= GRE_FEATURES | NETIF_F_LLTX; + dev->features |= GRE_FEATURES; dev->hw_features |= GRE_FEATURES; /* TCP offload with GRE SEQ is not supported, nor can we support 2 @@ -1010,6 +1010,8 @@ static void __gre_tunnel_init(struct net_device *dev) dev->features |= NETIF_F_GSO_SOFTWARE; dev->hw_features |= NETIF_F_GSO_SOFTWARE; + + dev->lltx = true; } static int ipgre_tunnel_init(struct net_device *dev) diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c index 14536da9f5dc..f0b4419cef34 100644 --- a/net/ipv4/ip_vti.c +++ b/net/ipv4/ip_vti.c @@ -443,7 +443,7 @@ static int vti_tunnel_init(struct net_device *dev) dev->flags = IFF_NOARP; dev->addr_len = 4; - dev->features |= NETIF_F_LLTX; + dev->lltx = true; netif_keep_dst(dev); return ip_tunnel_init(dev); diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index 923a2ef68c2f..dc0db5895e0e 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -378,7 +378,7 @@ static void ipip_tunnel_setup(struct net_device *dev) dev->type = ARPHRD_TUNNEL; dev->flags = IFF_NOARP; dev->addr_len = 4; - dev->features |= NETIF_F_LLTX; + dev->lltx = true; netif_keep_dst(dev); dev->features |= IPIP_FEATURES; diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 3942bd2ade78..08beab638bda 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -1471,7 +1471,7 @@ static void ip6gre_tnl_init_features(struct net_device *dev) { struct ip6_tnl *nt = netdev_priv(dev); - dev->features |= GRE6_FEATURES | NETIF_F_LLTX; + dev->features |= GRE6_FEATURES; dev->hw_features |= GRE6_FEATURES; /* TCP offload with GRE SEQ is not supported, nor can we support 2 @@ -1485,6 +1485,8 @@ static void ip6gre_tnl_init_features(struct net_device *dev) dev->features |= NETIF_F_GSO_SOFTWARE; dev->hw_features |= NETIF_F_GSO_SOFTWARE; + + dev->lltx = true; } static int ip6gre_tunnel_init_common(struct net_device *dev) diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 87dfb565a9f8..cbef0fcece71 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -1849,7 +1849,7 @@ static void ip6_tnl_dev_setup(struct net_device *dev) dev->type = ARPHRD_TUNNEL6; dev->flags |= IFF_NOARP; dev->addr_len = sizeof(struct in6_addr); - dev->features |= NETIF_F_LLTX; + dev->lltx = true; dev->pcpu_stat_type = NETDEV_PCPU_STAT_TSTATS; netif_keep_dst(dev); diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 3b2eed7fc765..58306522fb1e 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -1436,7 +1436,7 @@ static void ipip6_tunnel_setup(struct net_device *dev) dev->flags = IFF_NOARP; netif_keep_dst(dev); dev->addr_len = 4; - dev->features |= NETIF_F_LLTX; + dev->lltx = true; dev->features |= SIT_FEATURES; dev->hw_features |= SIT_FEATURES; dev->pcpu_stat_type = NETDEV_PCPU_STAT_TSTATS; diff --git a/net/l2tp/l2tp_eth.c b/net/l2tp/l2tp_eth.c index 15a862f33f76..d692b902e120 100644 --- a/net/l2tp/l2tp_eth.c +++ b/net/l2tp/l2tp_eth.c @@ -97,7 +97,7 @@ static void l2tp_eth_dev_setup(struct net_device *dev) SET_NETDEV_DEVTYPE(dev, &l2tpeth_type); ether_setup(dev); dev->priv_flags &= ~IFF_TX_SKB_SHARING; - dev->features |= NETIF_F_LLTX; + dev->lltx = true; dev->netdev_ops = &l2tp_eth_netdev_ops; dev->needs_free_netdev = true; dev->pcpu_stat_type = NETDEV_PCPU_STAT_TSTATS; diff --git a/net/openvswitch/vport-internal_dev.c b/net/openvswitch/vport-internal_dev.c index 4b33133cbdff..3a369a31c5cc 100644 --- a/net/openvswitch/vport-internal_dev.c +++ b/net/openvswitch/vport-internal_dev.c @@ -102,19 +102,20 @@ static void do_setup(struct net_device *netdev) netdev->priv_flags &= ~IFF_TX_SKB_SHARING; netdev->priv_flags |= IFF_LIVE_ADDR_CHANGE | IFF_OPENVSWITCH | IFF_NO_QUEUE; + netdev->lltx = true; netdev->needs_free_netdev = true; netdev->priv_destructor = NULL; netdev->ethtool_ops = &internal_dev_ethtool_ops; netdev->rtnl_link_ops = &internal_dev_link_ops; - netdev->features = NETIF_F_LLTX | NETIF_F_SG | NETIF_F_FRAGLIST | - NETIF_F_HIGHDMA | NETIF_F_HW_CSUM | - NETIF_F_GSO_SOFTWARE | NETIF_F_GSO_ENCAP_ALL; + netdev->features = NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HIGHDMA | + NETIF_F_HW_CSUM | NETIF_F_GSO_SOFTWARE | + NETIF_F_GSO_ENCAP_ALL; netdev->vlan_features = netdev->features; netdev->hw_enc_features = netdev->features; netdev->features |= NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_STAG_TX; - netdev->hw_features = netdev->features & ~NETIF_F_LLTX; + netdev->hw_features = netdev->features; eth_hw_addr_random(netdev); } diff --git a/net/xfrm/xfrm_interface_core.c b/net/xfrm/xfrm_interface_core.c index e50e4bf993fa..98f1e2b67c76 100644 --- a/net/xfrm/xfrm_interface_core.c +++ b/net/xfrm/xfrm_interface_core.c @@ -769,7 +769,7 @@ static int xfrmi_dev_init(struct net_device *dev) if (err) return err; - dev->features |= NETIF_F_LLTX; + dev->lltx = true; dev->features |= XFRMI_FEATURES; dev->hw_features |= XFRMI_FEATURES; -- cgit v1.2.3 From 05c1280a2bcfca187fe7fa90bb240602cf54af0a Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Thu, 29 Aug 2024 14:33:38 +0200 Subject: netdev_features: convert NETIF_F_NETNS_LOCAL to dev->netns_local "Interface can't change network namespaces" is rather an attribute, not a feature, and it can't be changed via Ethtool. Make it a "cold" private flag instead of a netdev_feature and free one more bit. Signed-off-by: Alexander Lobakin Signed-off-by: Paolo Abeni --- Documentation/networking/net_cachelines/net_device.rst | 1 + Documentation/networking/netdev-features.rst | 7 ------- Documentation/networking/switchdev.rst | 4 ++-- drivers/net/amt.c | 2 +- drivers/net/bonding/bond_main.c | 6 +++--- drivers/net/ethernet/adi/adin1110.c | 2 +- drivers/net/ethernet/marvell/prestera/prestera_main.c | 3 ++- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 4 ++-- drivers/net/ethernet/mellanox/mlx5/core/en_rep.c | 3 ++- drivers/net/ethernet/mellanox/mlxsw/spectrum.c | 5 +++-- drivers/net/ethernet/rocker/rocker_main.c | 3 ++- drivers/net/ethernet/ti/cpsw_new.c | 3 ++- drivers/net/loopback.c | 2 +- drivers/net/net_failover.c | 2 +- drivers/net/team/team_core.c | 6 +++--- drivers/net/vrf.c | 2 +- include/linux/netdev_features.h | 6 ++---- include/linux/netdevice.h | 2 ++ net/batman-adv/soft-interface.c | 3 ++- net/bridge/br_device.c | 5 +++-- net/core/dev.c | 4 ++-- net/ethtool/common.c | 1 - net/hsr/hsr_device.c | 8 ++++---- net/ieee802154/6lowpan/core.c | 2 +- net/ieee802154/core.c | 10 +++++----- net/ipv4/ip_tunnel.c | 2 +- net/ipv4/ipmr.c | 2 +- net/ipv6/ip6_gre.c | 3 +-- net/ipv6/ip6_tunnel.c | 2 +- net/ipv6/ip6mr.c | 2 +- net/ipv6/sit.c | 2 +- net/openvswitch/vport-internal_dev.c | 2 +- net/wireless/core.c | 10 +++++----- tools/testing/selftests/net/forwarding/README | 2 +- 34 files changed, 61 insertions(+), 62 deletions(-) (limited to 'include/linux/netdevice.h') diff --git a/Documentation/networking/net_cachelines/net_device.rst b/Documentation/networking/net_cachelines/net_device.rst index e55319277074..e649d8e9556f 100644 --- a/Documentation/networking/net_cachelines/net_device.rst +++ b/Documentation/networking/net_cachelines/net_device.rst @@ -166,6 +166,7 @@ unsigned:1 wol_enabled unsigned:1 threaded - - napi_poll(napi_enable,dev_set_threaded) unsigned_long:1 see_all_hwtstamp_requests unsigned_long:1 change_proto_down +unsigned_long:1 netns_local struct_list_head net_notifier_list struct_macsec_ops* macsec_ops struct_udp_tunnel_nic_info* udp_tunnel_nic_info diff --git a/Documentation/networking/netdev-features.rst b/Documentation/networking/netdev-features.rst index f29d982ebf5d..5014f7cc1398 100644 --- a/Documentation/networking/netdev-features.rst +++ b/Documentation/networking/netdev-features.rst @@ -139,13 +139,6 @@ chained skbs (skb->next/prev list). Features contained in NETIF_F_SOFT_FEATURES are features of networking stack. Driver should not change behaviour based on them. - * netns-local device - -NETIF_F_NETNS_LOCAL is set for devices that are not allowed to move between -network namespaces (e.g. loopback). - -Don't use it in drivers. - * VLAN challenged NETIF_F_VLAN_CHALLENGED should be set for devices which can't cope with VLAN diff --git a/Documentation/networking/switchdev.rst b/Documentation/networking/switchdev.rst index 758f1dae3fce..f355f0166f1b 100644 --- a/Documentation/networking/switchdev.rst +++ b/Documentation/networking/switchdev.rst @@ -137,10 +137,10 @@ would be sub-port 0 on port 1 on switch 1. Port Features ^^^^^^^^^^^^^ -NETIF_F_NETNS_LOCAL +dev->netns_local If the switchdev driver (and device) only supports offloading of the default -network namespace (netns), the driver should set this feature flag to prevent +network namespace (netns), the driver should set this private flag to prevent the port netdev from being moved out of the default netns. A netns-aware driver/device would not set this flag and be responsible for partitioning hardware to preserve netns containment. This means hardware cannot forward diff --git a/drivers/net/amt.c b/drivers/net/amt.c index 921bbfd72a38..0433a0f36d1b 100644 --- a/drivers/net/amt.c +++ b/drivers/net/amt.c @@ -3099,8 +3099,8 @@ static void amt_link_setup(struct net_device *dev) dev->addr_len = 0; dev->priv_flags |= IFF_NO_QUEUE; dev->lltx = true; + dev->netns_local = true; dev->features |= NETIF_F_GSO_SOFTWARE; - dev->features |= NETIF_F_NETNS_LOCAL; dev->hw_features |= NETIF_F_SG | NETIF_F_HW_CSUM; dev->hw_features |= NETIF_F_FRAGLIST | NETIF_F_RXCSUM; dev->hw_features |= NETIF_F_GSO_SOFTWARE; diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 8ae887cdc231..f13d413ad26c 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -5930,6 +5930,9 @@ void bond_setup(struct net_device *bond_dev) /* don't acquire bond device's netif_tx_lock when transmitting */ bond_dev->lltx = true; + /* Don't allow bond devices to change network namespaces. */ + bond_dev->netns_local = true; + /* By default, we declare the bond to be fully * VLAN hardware accelerated capable. Special * care is taken in the various xmit functions @@ -5937,9 +5940,6 @@ void bond_setup(struct net_device *bond_dev) * capable */ - /* Don't allow bond devices to change network namespaces. */ - bond_dev->features |= NETIF_F_NETNS_LOCAL; - bond_dev->hw_features = BOND_VLAN_FEATURES | NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_HW_VLAN_CTAG_FILTER | diff --git a/drivers/net/ethernet/adi/adin1110.c b/drivers/net/ethernet/adi/adin1110.c index 0713f1e2c7f3..3431a7e62b0d 100644 --- a/drivers/net/ethernet/adi/adin1110.c +++ b/drivers/net/ethernet/adi/adin1110.c @@ -1599,7 +1599,7 @@ static int adin1110_probe_netdevs(struct adin1110_priv *priv) netdev->netdev_ops = &adin1110_netdev_ops; netdev->ethtool_ops = &adin1110_ethtool_ops; netdev->priv_flags |= IFF_UNICAST_FLT; - netdev->features |= NETIF_F_NETNS_LOCAL; + netdev->netns_local = true; port_priv->phydev = get_phy_device(priv->mii_bus, i + 1, false); if (IS_ERR(port_priv->phydev)) { diff --git a/drivers/net/ethernet/marvell/prestera/prestera_main.c b/drivers/net/ethernet/marvell/prestera/prestera_main.c index 63ae01954dfc..22ca6ee9665e 100644 --- a/drivers/net/ethernet/marvell/prestera/prestera_main.c +++ b/drivers/net/ethernet/marvell/prestera/prestera_main.c @@ -633,7 +633,8 @@ static int prestera_port_create(struct prestera_switch *sw, u32 id) if (err) goto err_dl_port_register; - dev->features |= NETIF_F_NETNS_LOCAL | NETIF_F_HW_TC; + dev->features |= NETIF_F_HW_TC; + dev->netns_local = true; dev->netdev_ops = &prestera_netdev_ops; dev->ethtool_ops = &prestera_ethtool_ops; SET_NETDEV_DEV(dev, sw->dev->dev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 16b67c457b60..47e7a80d221b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -4414,9 +4414,9 @@ static netdev_features_t mlx5e_fix_features(struct net_device *netdev, if (mlx5e_is_uplink_rep(priv)) { features = mlx5e_fix_uplink_rep_features(netdev, features); - features |= NETIF_F_NETNS_LOCAL; + netdev->netns_local = true; } else { - features &= ~NETIF_F_NETNS_LOCAL; + netdev->netns_local = false; } mutex_unlock(&priv->state_lock); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index b885042eef14..92094bf60d59 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -898,7 +898,8 @@ static void mlx5e_build_rep_netdev(struct net_device *netdev, netdev->hw_features |= NETIF_F_RXCSUM; netdev->features |= netdev->hw_features; - netdev->features |= NETIF_F_NETNS_LOCAL; + + netdev->netns_local = true; } static int mlx5e_init_rep(struct mlx5_core_dev *mdev, diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index 44d6e125bd6f..b9ffd7236aff 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -1676,10 +1676,11 @@ static int mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u16 local_port, netif_carrier_off(dev); - dev->features |= NETIF_F_NETNS_LOCAL | NETIF_F_SG | - NETIF_F_HW_VLAN_CTAG_FILTER | NETIF_F_HW_TC; + dev->features |= NETIF_F_SG | NETIF_F_HW_VLAN_CTAG_FILTER | + NETIF_F_HW_TC; dev->hw_features |= NETIF_F_HW_TC | NETIF_F_LOOPBACK; dev->lltx = true; + dev->netns_local = true; dev->min_mtu = ETH_MIN_MTU; dev->max_mtu = MLXSW_PORT_MAX_MTU - MLXSW_PORT_ETH_FRAME_HDR; diff --git a/drivers/net/ethernet/rocker/rocker_main.c b/drivers/net/ethernet/rocker/rocker_main.c index e097ce3e69ea..84fa911c78db 100644 --- a/drivers/net/ethernet/rocker/rocker_main.c +++ b/drivers/net/ethernet/rocker/rocker_main.c @@ -2575,7 +2575,8 @@ static int rocker_probe_port(struct rocker *rocker, unsigned int port_number) netif_napi_add(dev, &rocker_port->napi_rx, rocker_port_poll_rx); rocker_carrier_init(rocker_port); - dev->features |= NETIF_F_NETNS_LOCAL | NETIF_F_SG; + dev->features |= NETIF_F_SG; + dev->netns_local = true; /* MTU range: 68 - 9000 */ dev->min_mtu = ROCKER_PORT_MIN_MTU; diff --git a/drivers/net/ethernet/ti/cpsw_new.c b/drivers/net/ethernet/ti/cpsw_new.c index 2baa198ebfa0..557cc71b9dd2 100644 --- a/drivers/net/ethernet/ti/cpsw_new.c +++ b/drivers/net/ethernet/ti/cpsw_new.c @@ -1407,7 +1407,8 @@ static int cpsw_create_ports(struct cpsw_common *cpsw) cpsw->slaves[i].ndev = ndev; ndev->features |= NETIF_F_HW_VLAN_CTAG_FILTER | - NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_NETNS_LOCAL | NETIF_F_HW_TC; + NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_HW_TC; + ndev->netns_local = true; ndev->xdp_features = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT | diff --git a/drivers/net/loopback.c b/drivers/net/loopback.c index bf857782be0f..1993b90b1a5f 100644 --- a/drivers/net/loopback.c +++ b/drivers/net/loopback.c @@ -172,6 +172,7 @@ static void gen_lo_setup(struct net_device *dev, dev->flags = IFF_LOOPBACK; dev->priv_flags |= IFF_LIVE_ADDR_CHANGE | IFF_NO_QUEUE; dev->lltx = true; + dev->netns_local = true; netif_keep_dst(dev); dev->hw_features = NETIF_F_GSO_SOFTWARE; dev->features = NETIF_F_SG | NETIF_F_FRAGLIST @@ -180,7 +181,6 @@ static void gen_lo_setup(struct net_device *dev, | NETIF_F_RXCSUM | NETIF_F_SCTP_CRC | NETIF_F_HIGHDMA - | NETIF_F_NETNS_LOCAL | NETIF_F_VLAN_CHALLENGED | NETIF_F_LOOPBACK; dev->ethtool_ops = eth_ops; diff --git a/drivers/net/net_failover.c b/drivers/net/net_failover.c index 06728385a35f..54c8b9d5b5fc 100644 --- a/drivers/net/net_failover.c +++ b/drivers/net/net_failover.c @@ -734,7 +734,7 @@ struct failover *net_failover_create(struct net_device *standby_dev) failover_dev->lltx = true; /* Don't allow failover devices to change network namespaces. */ - failover_dev->features |= NETIF_F_NETNS_LOCAL; + failover_dev->netns_local = true; failover_dev->hw_features = FAILOVER_VLAN_FEATURES | NETIF_F_HW_VLAN_CTAG_TX | diff --git a/drivers/net/team/team_core.c b/drivers/net/team/team_core.c index 1d1bad3cedc2..18191d5a8bd4 100644 --- a/drivers/net/team/team_core.c +++ b/drivers/net/team/team_core.c @@ -2191,10 +2191,10 @@ static void team_setup(struct net_device *dev) dev->priv_flags |= IFF_UNICAST_FLT | IFF_LIVE_ADDR_CHANGE; dev->lltx = true; - dev->features |= NETIF_F_GRO; - /* Don't allow team devices to change network namespaces. */ - dev->features |= NETIF_F_NETNS_LOCAL; + dev->netns_local = true; + + dev->features |= NETIF_F_GRO; dev->hw_features = TEAM_VLAN_FEATURES | NETIF_F_HW_VLAN_CTAG_RX | diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index b1d4ef538995..4d8ccaf9a2b4 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -1638,7 +1638,7 @@ static void vrf_setup(struct net_device *dev) dev->lltx = true; /* don't allow vrf devices to change network namespaces. */ - dev->features |= NETIF_F_NETNS_LOCAL; + dev->netns_local = true; /* does not make sense for a VLAN to be added to a vrf device */ dev->features |= NETIF_F_VLAN_CHALLENGED; diff --git a/include/linux/netdev_features.h b/include/linux/netdev_features.h index a2e20b517584..d5a3836f4793 100644 --- a/include/linux/netdev_features.h +++ b/include/linux/netdev_features.h @@ -25,7 +25,7 @@ enum { NETIF_F_VLAN_CHALLENGED_BIT, /* Device cannot handle VLAN packets */ NETIF_F_GSO_BIT, /* Enable software GSO. */ __UNUSED_NETIF_F_12, - NETIF_F_NETNS_LOCAL_BIT, /* Does not change network namespaces */ + __UNUSED_NETIF_F_13, NETIF_F_GRO_BIT, /* Generic receive offload */ NETIF_F_LRO_BIT, /* large receive offload */ @@ -121,7 +121,6 @@ enum { #define NETIF_F_IPV6_CSUM __NETIF_F(IPV6_CSUM) #define NETIF_F_LOOPBACK __NETIF_F(LOOPBACK) #define NETIF_F_LRO __NETIF_F(LRO) -#define NETIF_F_NETNS_LOCAL __NETIF_F(NETNS_LOCAL) #define NETIF_F_NOCACHE_COPY __NETIF_F(NOCACHE_COPY) #define NETIF_F_NTUPLE __NETIF_F(NTUPLE) #define NETIF_F_RXCSUM __NETIF_F(RXCSUM) @@ -190,8 +189,7 @@ static inline int find_next_netdev_feature(u64 feature, unsigned long start) /* Features valid for ethtool to change */ /* = all defined minus driver/device-class-related */ -#define NETIF_F_NEVER_CHANGE (NETIF_F_VLAN_CHALLENGED | \ - NETIF_F_NETNS_LOCAL) +#define NETIF_F_NEVER_CHANGE NETIF_F_VLAN_CHALLENGED /* remember that ((t)1 << t_BITS) is undefined in C99 */ #define NETIF_F_ETHTOOL_BITS ((__NETIF_F_BIT(NETDEV_FEATURE_COUNT - 1) | \ diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index e22ca5e07490..a698e2402420 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1968,6 +1968,7 @@ enum netdev_reg_state { * regardless of source, even if those aren't * HWTSTAMP_SOURCE_NETDEV * @change_proto_down: device supports setting carrier via IFLA_PROTO_DOWN + * @netns_local: interface can't change network namespaces * * @net_notifier_list: List of per-net netdev notifier block * that follow this device when it is moved @@ -2361,6 +2362,7 @@ struct net_device { /* priv_flags_slow, ungrouped to save space */ unsigned long see_all_hwtstamp_requests:1; unsigned long change_proto_down:1; + unsigned long netns_local:1; struct list_head net_notifier_list; diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c index e791a73ef901..2758aba47a2f 100644 --- a/net/batman-adv/soft-interface.c +++ b/net/batman-adv/soft-interface.c @@ -1020,9 +1020,10 @@ static void batadv_softif_init_early(struct net_device *dev) dev->netdev_ops = &batadv_netdev_ops; dev->needs_free_netdev = true; dev->priv_destructor = batadv_softif_free; - dev->features |= NETIF_F_HW_VLAN_CTAG_FILTER | NETIF_F_NETNS_LOCAL; + dev->features |= NETIF_F_HW_VLAN_CTAG_FILTER; dev->priv_flags |= IFF_NO_QUEUE; dev->lltx = true; + dev->netns_local = true; /* can't call min_mtu, because the needed variables * have not been initialized yet diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index a6d25113dfb1..26b79feb385d 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -488,9 +488,10 @@ void br_dev_setup(struct net_device *dev) SET_NETDEV_DEVTYPE(dev, &br_type); dev->priv_flags = IFF_EBRIDGE | IFF_NO_QUEUE; dev->lltx = true; + dev->netns_local = true; - dev->features = COMMON_FEATURES | NETIF_F_NETNS_LOCAL | - NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_STAG_TX; + dev->features = COMMON_FEATURES | NETIF_F_HW_VLAN_CTAG_TX | + NETIF_F_HW_VLAN_STAG_TX; dev->hw_features = COMMON_FEATURES | NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_STAG_TX; dev->vlan_features = COMMON_FEATURES; diff --git a/net/core/dev.c b/net/core/dev.c index 56aed88ceadf..98bb5f890b88 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -11487,7 +11487,7 @@ int __dev_change_net_namespace(struct net_device *dev, struct net *net, /* Don't allow namespace local devices to be moved. */ err = -EINVAL; - if (dev->features & NETIF_F_NETNS_LOCAL) + if (dev->netns_local) goto out; /* Ensure the device has been registered */ @@ -11869,7 +11869,7 @@ static void __net_exit default_device_exit_net(struct net *net) char fb_name[IFNAMSIZ]; /* Ignore unmoveable devices (i.e. loopback) */ - if (dev->features & NETIF_F_NETNS_LOCAL) + if (dev->netns_local) continue; /* Leave virtual devices for the generic cleanup */ diff --git a/net/ethtool/common.c b/net/ethtool/common.c index 447fc80e1b00..ca8e64162104 100644 --- a/net/ethtool/common.c +++ b/net/ethtool/common.c @@ -25,7 +25,6 @@ const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN] = { [NETIF_F_HW_VLAN_STAG_FILTER_BIT] = "rx-vlan-stag-filter", [NETIF_F_VLAN_CHALLENGED_BIT] = "vlan-challenged", [NETIF_F_GSO_BIT] = "tx-generic-segmentation", - [NETIF_F_NETNS_LOCAL_BIT] = "netns-local", [NETIF_F_GRO_BIT] = "rx-gro", [NETIF_F_GRO_HW_BIT] = "rx-gro-hw", [NETIF_F_LRO_BIT] = "rx-lro", diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c index d4c783076662..a06e790042e2 100644 --- a/net/hsr/hsr_device.c +++ b/net/hsr/hsr_device.c @@ -556,6 +556,10 @@ void hsr_dev_setup(struct net_device *dev) dev->priv_flags |= IFF_NO_QUEUE | IFF_DISABLE_NETPOLL; /* Prevent recursive tx locking */ dev->lltx = true; + /* Not sure about this. Taken from bridge code. netdevice.h says + * it means "Does not change network namespaces". + */ + dev->netns_local = true; dev->needs_free_netdev = true; @@ -569,10 +573,6 @@ void hsr_dev_setup(struct net_device *dev) * hsr_header_create() etc. */ dev->features |= NETIF_F_VLAN_CHALLENGED; - /* Not sure about this. Taken from bridge code. netdev_features.h says - * it means "Does not change network namespaces". - */ - dev->features |= NETIF_F_NETNS_LOCAL; } /* Return true if dev is a HSR master; return false otherwise. diff --git a/net/ieee802154/6lowpan/core.c b/net/ieee802154/6lowpan/core.c index 77b4e92027c5..175efd860f7b 100644 --- a/net/ieee802154/6lowpan/core.c +++ b/net/ieee802154/6lowpan/core.c @@ -116,7 +116,7 @@ static void lowpan_setup(struct net_device *ldev) ldev->netdev_ops = &lowpan_netdev_ops; ldev->header_ops = &lowpan_header_ops; ldev->needs_free_netdev = true; - ldev->features |= NETIF_F_NETNS_LOCAL; + ldev->netns_local = true; } static int lowpan_validate(struct nlattr *tb[], struct nlattr *data[], diff --git a/net/ieee802154/core.c b/net/ieee802154/core.c index 60e8fff1347e..88adb04e4072 100644 --- a/net/ieee802154/core.c +++ b/net/ieee802154/core.c @@ -226,11 +226,11 @@ int cfg802154_switch_netns(struct cfg802154_registered_device *rdev, list_for_each_entry(wpan_dev, &rdev->wpan_dev_list, list) { if (!wpan_dev->netdev) continue; - wpan_dev->netdev->features &= ~NETIF_F_NETNS_LOCAL; + wpan_dev->netdev->netns_local = false; err = dev_change_net_namespace(wpan_dev->netdev, net, "wpan%d"); if (err) break; - wpan_dev->netdev->features |= NETIF_F_NETNS_LOCAL; + wpan_dev->netdev->netns_local = true; } if (err) { @@ -242,11 +242,11 @@ int cfg802154_switch_netns(struct cfg802154_registered_device *rdev, list) { if (!wpan_dev->netdev) continue; - wpan_dev->netdev->features &= ~NETIF_F_NETNS_LOCAL; + wpan_dev->netdev->netns_local = false; err = dev_change_net_namespace(wpan_dev->netdev, net, "wpan%d"); WARN_ON(err); - wpan_dev->netdev->features |= NETIF_F_NETNS_LOCAL; + wpan_dev->netdev->netns_local = true; } return err; @@ -291,7 +291,7 @@ static int cfg802154_netdev_notifier_call(struct notifier_block *nb, switch (state) { /* TODO NETDEV_DEVTYPE */ case NETDEV_REGISTER: - dev->features |= NETIF_F_NETNS_LOCAL; + dev->netns_local = true; wpan_dev->identifier = ++rdev->wpan_dev_id; list_add_rcu(&wpan_dev->list, &rdev->wpan_dev_list); rdev->devlist_generation++; diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index 0cd2f3de100c..18964394d6bd 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -1161,7 +1161,7 @@ int ip_tunnel_init_net(struct net *net, unsigned int ip_tnl_net_id, * Allowing to move it to another netns is clearly unsafe. */ if (!IS_ERR(itn->fb_tunnel_dev)) { - itn->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL; + itn->fb_tunnel_dev->netns_local = true; itn->fb_tunnel_dev->mtu = ip_tunnel_bind_dev(itn->fb_tunnel_dev); ip_tunnel_add(itn, netdev_priv(itn->fb_tunnel_dev)); itn->type = itn->fb_tunnel_dev->type; diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index b0eda745e3bc..f1a43199551b 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -537,7 +537,7 @@ static void reg_vif_setup(struct net_device *dev) dev->flags = IFF_NOARP; dev->netdev_ops = ®_vif_netdev_ops; dev->needs_free_netdev = true; - dev->features |= NETIF_F_NETNS_LOCAL; + dev->netns_local = true; } static struct net_device *ipmr_reg_vif(struct net *net, struct mr_table *mrt) diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 08beab638bda..235808cfec70 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -1621,8 +1621,7 @@ static int __net_init ip6gre_init_net(struct net *net) /* FB netdevice is special: we have one, and only one per netns. * Allowing to move it to another netns is clearly unsafe. */ - ign->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL; - + ign->fb_tunnel_dev->netns_local = true; ip6gre_fb_tunnel_init(ign->fb_tunnel_dev); ign->fb_tunnel_dev->rtnl_link_ops = &ip6gre_link_ops; diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index cbef0fcece71..ec51ab5063e8 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -2258,7 +2258,7 @@ static int __net_init ip6_tnl_init_net(struct net *net) /* FB netdevice is special: we have one, and only one per netns. * Allowing to move it to another netns is clearly unsafe. */ - ip6n->fb_tnl_dev->features |= NETIF_F_NETNS_LOCAL; + ip6n->fb_tnl_dev->netns_local = true; err = ip6_fb_tnl_dev_init(ip6n->fb_tnl_dev); if (err < 0) diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index e3ee93c562c3..2ce4ae0d8dc3 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -640,7 +640,7 @@ static void reg_vif_setup(struct net_device *dev) dev->flags = IFF_NOARP; dev->netdev_ops = ®_vif_netdev_ops; dev->needs_free_netdev = true; - dev->features |= NETIF_F_NETNS_LOCAL; + dev->netns_local = true; } static struct net_device *ip6mr_reg_vif(struct net *net, struct mr_table *mrt) diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 58306522fb1e..16b90a24c9ba 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -1856,7 +1856,7 @@ static int __net_init sit_init_net(struct net *net) /* FB netdevice is special: we have one, and only one per netns. * Allowing to move it to another netns is clearly unsafe. */ - sitn->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL; + sitn->fb_tunnel_dev->netns_local = true; err = register_netdev(sitn->fb_tunnel_dev); if (err) diff --git a/net/openvswitch/vport-internal_dev.c b/net/openvswitch/vport-internal_dev.c index 3a369a31c5cc..5858d65ea1a9 100644 --- a/net/openvswitch/vport-internal_dev.c +++ b/net/openvswitch/vport-internal_dev.c @@ -149,7 +149,7 @@ static struct vport *internal_dev_create(const struct vport_parms *parms) /* Restrict bridge port to current netns. */ if (vport->port_no == OVSP_LOCAL) - vport->dev->features |= NETIF_F_NETNS_LOCAL; + vport->dev->netns_local = true; rtnl_lock(); err = register_netdevice(vport->dev); diff --git a/net/wireless/core.c b/net/wireless/core.c index 4d5d351bd0b5..661adfc77644 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -165,11 +165,11 @@ int cfg80211_switch_netns(struct cfg80211_registered_device *rdev, list_for_each_entry(wdev, &rdev->wiphy.wdev_list, list) { if (!wdev->netdev) continue; - wdev->netdev->features &= ~NETIF_F_NETNS_LOCAL; + wdev->netdev->netns_local = false; err = dev_change_net_namespace(wdev->netdev, net, "wlan%d"); if (err) break; - wdev->netdev->features |= NETIF_F_NETNS_LOCAL; + wdev->netdev->netns_local = true; } if (err) { @@ -181,11 +181,11 @@ int cfg80211_switch_netns(struct cfg80211_registered_device *rdev, list) { if (!wdev->netdev) continue; - wdev->netdev->features &= ~NETIF_F_NETNS_LOCAL; + wdev->netdev->netns_local = false; err = dev_change_net_namespace(wdev->netdev, net, "wlan%d"); WARN_ON(err); - wdev->netdev->features |= NETIF_F_NETNS_LOCAL; + wdev->netdev->netns_local = true; } return err; @@ -1473,7 +1473,7 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb, SET_NETDEV_DEVTYPE(dev, &wiphy_type); wdev->netdev = dev; /* can only change netns with wiphy */ - dev->features |= NETIF_F_NETNS_LOCAL; + dev->netns_local = true; cfg80211_init_wdev(wdev); break; diff --git a/tools/testing/selftests/net/forwarding/README b/tools/testing/selftests/net/forwarding/README index 7fdb6a9ca543..a652429bfd53 100644 --- a/tools/testing/selftests/net/forwarding/README +++ b/tools/testing/selftests/net/forwarding/README @@ -6,7 +6,7 @@ to easily create and test complex environments. Unfortunately, these namespaces can not be used with actual switching ASICs, as their ports can not be migrated to other network namespaces -(NETIF_F_NETNS_LOCAL) and most of them probably do not support the +(dev->netns_local) and most of them probably do not support the L1-separation provided by namespaces. However, a similar kind of flexibility can be achieved by using VRFs and -- cgit v1.2.3 From 782dbbf589cd9082effaec522e3f1b4ce1594803 Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Thu, 29 Aug 2024 14:33:39 +0200 Subject: netdev_features: convert NETIF_F_FCOE_MTU to dev->fcoe_mtu Ability to handle maximum FCoE frames of 2158 bytes can never be changed and thus more of an attribute, not a toggleable feature. Move it from netdev_features_t to "cold" priv flags (bitfield bool) and free yet another feature bit. Signed-off-by: Alexander Lobakin Signed-off-by: Paolo Abeni --- Documentation/networking/net_cachelines/net_device.rst | 1 + drivers/net/ethernet/chelsio/cxgb4/cxgb4_fcoe.c | 6 ++---- drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_nl.c | 2 +- drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.c | 4 ++-- drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c | 2 +- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 11 ++++------- drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c | 4 ++-- drivers/scsi/fcoe/fcoe.c | 4 ++-- include/linux/netdev_features.h | 6 ++---- include/linux/netdevice.h | 2 ++ net/8021q/vlan_dev.c | 1 + net/ethtool/common.c | 1 - 12 files changed, 20 insertions(+), 24 deletions(-) (limited to 'include/linux/netdevice.h') diff --git a/Documentation/networking/net_cachelines/net_device.rst b/Documentation/networking/net_cachelines/net_device.rst index e649d8e9556f..56d4bbaf8a2c 100644 --- a/Documentation/networking/net_cachelines/net_device.rst +++ b/Documentation/networking/net_cachelines/net_device.rst @@ -167,6 +167,7 @@ unsigned:1 threaded - unsigned_long:1 see_all_hwtstamp_requests unsigned_long:1 change_proto_down unsigned_long:1 netns_local +unsigned_long:1 fcoe_mtu struct_list_head net_notifier_list struct_macsec_ops* macsec_ops struct_udp_tunnel_nic_info* udp_tunnel_nic_info diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_fcoe.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_fcoe.c index 33b2c0c45509..f6f745f5c022 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_fcoe.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_fcoe.c @@ -81,8 +81,7 @@ int cxgb_fcoe_enable(struct net_device *netdev) netdev->features |= NETIF_F_FCOE_CRC; netdev->vlan_features |= NETIF_F_FCOE_CRC; - netdev->features |= NETIF_F_FCOE_MTU; - netdev->vlan_features |= NETIF_F_FCOE_MTU; + netdev->fcoe_mtu = true; netdev_features_change(netdev); @@ -112,8 +111,7 @@ int cxgb_fcoe_disable(struct net_device *netdev) netdev->features &= ~NETIF_F_FCOE_CRC; netdev->vlan_features &= ~NETIF_F_FCOE_CRC; - netdev->features &= ~NETIF_F_FCOE_MTU; - netdev->vlan_features &= ~NETIF_F_FCOE_MTU; + netdev->fcoe_mtu = false; netdev_features_change(netdev); diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_nl.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_nl.c index e85f7d2e8810..f2709b10c2e5 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_nl.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_nl.c @@ -317,7 +317,7 @@ static u8 ixgbe_dcbnl_set_all(struct net_device *netdev) int max_frame = adapter->netdev->mtu + ETH_HLEN + ETH_FCS_LEN; #ifdef IXGBE_FCOE - if (adapter->netdev->features & NETIF_F_FCOE_MTU) + if (adapter->netdev->fcoe_mtu) max_frame = max(max_frame, IXGBE_FCOE_JUMBO_FRAME_SIZE); #endif diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.c index 18d63c8c2ff4..955dced844a9 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.c @@ -858,7 +858,7 @@ int ixgbe_fcoe_enable(struct net_device *netdev) /* enable FCoE and notify stack */ adapter->flags |= IXGBE_FLAG_FCOE_ENABLED; - netdev->features |= NETIF_F_FCOE_MTU; + netdev->fcoe_mtu = true; netdev_features_change(netdev); /* release existing queues and reallocate them */ @@ -898,7 +898,7 @@ int ixgbe_fcoe_disable(struct net_device *netdev) /* disable FCoE and notify stack */ adapter->flags &= ~IXGBE_FLAG_FCOE_ENABLED; - netdev->features &= ~NETIF_F_FCOE_MTU; + netdev->fcoe_mtu = false; netdev_features_change(netdev); diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c index 0ee943db3dc9..16fa621ce0ff 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c @@ -981,7 +981,7 @@ static int ixgbe_alloc_q_vector(struct ixgbe_adapter *adapter, set_bit(__IXGBE_RX_CSUM_UDP_ZERO_ERR, &ring->state); #ifdef IXGBE_FCOE - if (adapter->netdev->features & NETIF_F_FCOE_MTU) { + if (adapter->netdev->fcoe_mtu) { struct ixgbe_ring_feature *f; f = &adapter->ring_feature[RING_F_FCOE]; if ((rxr_idx >= f->offset) && diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 8057cef61f39..8b8404d8c946 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -5079,7 +5079,7 @@ static void ixgbe_configure_dcb(struct ixgbe_adapter *adapter) netif_set_tso_max_size(adapter->netdev, 32768); #ifdef IXGBE_FCOE - if (adapter->netdev->features & NETIF_F_FCOE_MTU) + if (adapter->netdev->fcoe_mtu) max_frame = max(max_frame, IXGBE_FCOE_JUMBO_FRAME_SIZE); #endif @@ -5136,8 +5136,7 @@ static int ixgbe_hpbthresh(struct ixgbe_adapter *adapter, int pb) #ifdef IXGBE_FCOE /* FCoE traffic class uses FCOE jumbo frames */ - if ((dev->features & NETIF_F_FCOE_MTU) && - (tc < IXGBE_FCOE_JUMBO_FRAME_SIZE) && + if (dev->fcoe_mtu && tc < IXGBE_FCOE_JUMBO_FRAME_SIZE && (pb == ixgbe_fcoe_get_tc(adapter))) tc = IXGBE_FCOE_JUMBO_FRAME_SIZE; #endif @@ -5197,8 +5196,7 @@ static int ixgbe_lpbthresh(struct ixgbe_adapter *adapter, int pb) #ifdef IXGBE_FCOE /* FCoE traffic class uses FCOE jumbo frames */ - if ((dev->features & NETIF_F_FCOE_MTU) && - (tc < IXGBE_FCOE_JUMBO_FRAME_SIZE) && + if (dev->fcoe_mtu && tc < IXGBE_FCOE_JUMBO_FRAME_SIZE && (pb == netdev_get_prio_tc_map(dev, adapter->fcoe.up))) tc = IXGBE_FCOE_JUMBO_FRAME_SIZE; #endif @@ -11096,8 +11094,7 @@ skip_sriov: NETIF_F_FCOE_CRC; netdev->vlan_features |= NETIF_F_FSO | - NETIF_F_FCOE_CRC | - NETIF_F_FCOE_MTU; + NETIF_F_FCOE_CRC; } #endif /* IXGBE_FCOE */ if (adapter->flags2 & IXGBE_FLAG2_RSC_CAPABLE) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c index fcfd0a075eee..e71715f5da22 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c @@ -495,7 +495,7 @@ static int ixgbe_set_vf_lpe(struct ixgbe_adapter *adapter, u32 max_frame, u32 vf int err = 0; #ifdef CONFIG_FCOE - if (dev->features & NETIF_F_FCOE_MTU) + if (dev->fcoe_mtu) pf_max_frame = max_t(int, pf_max_frame, IXGBE_FCOE_JUMBO_FRAME_SIZE); @@ -857,7 +857,7 @@ static void ixgbe_set_vf_rx_tx(struct ixgbe_adapter *adapter, int vf) int pf_max_frame = dev->mtu + ETH_HLEN; #if IS_ENABLED(CONFIG_FCOE) - if (dev->features & NETIF_F_FCOE_MTU) + if (dev->fcoe_mtu) pf_max_frame = max_t(int, pf_max_frame, IXGBE_FCOE_JUMBO_FRAME_SIZE); #endif /* CONFIG_FCOE */ diff --git a/drivers/scsi/fcoe/fcoe.c b/drivers/scsi/fcoe/fcoe.c index f1429f270170..39aec710660c 100644 --- a/drivers/scsi/fcoe/fcoe.c +++ b/drivers/scsi/fcoe/fcoe.c @@ -722,7 +722,7 @@ static int fcoe_netdev_config(struct fc_lport *lport, struct net_device *netdev) * will return 0, so do this first. */ mfs = netdev->mtu; - if (netdev->features & NETIF_F_FCOE_MTU) { + if (netdev->fcoe_mtu) { mfs = FCOE_MTU; FCOE_NETDEV_DBG(netdev, "Supports FCOE_MTU of %d bytes\n", mfs); } @@ -1863,7 +1863,7 @@ static int fcoe_device_notification(struct notifier_block *notifier, case NETDEV_CHANGE: break; case NETDEV_CHANGEMTU: - if (netdev->features & NETIF_F_FCOE_MTU) + if (netdev->fcoe_mtu) break; mfs = netdev->mtu - (sizeof(struct fcoe_hdr) + sizeof(struct fcoe_crc_eof)); diff --git a/include/linux/netdev_features.h b/include/linux/netdev_features.h index d5a3836f4793..37af2c6e7caf 100644 --- a/include/linux/netdev_features.h +++ b/include/linux/netdev_features.h @@ -58,7 +58,7 @@ enum { NETIF_F_FCOE_CRC_BIT, /* FCoE CRC32 */ NETIF_F_SCTP_CRC_BIT, /* SCTP checksum offload */ - NETIF_F_FCOE_MTU_BIT, /* Supports max FCoE MTU, 2158 bytes*/ + __UNUSED_NETIF_F_37, NETIF_F_NTUPLE_BIT, /* N-tuple filters supported */ NETIF_F_RXHASH_BIT, /* Receive hashing offload */ NETIF_F_RXCSUM_BIT, /* Receive checksumming offload */ @@ -105,7 +105,6 @@ enum { #define __NETIF_F(name) __NETIF_F_BIT(NETIF_F_##name##_BIT) #define NETIF_F_FCOE_CRC __NETIF_F(FCOE_CRC) -#define NETIF_F_FCOE_MTU __NETIF_F(FCOE_MTU) #define NETIF_F_FRAGLIST __NETIF_F(FRAGLIST) #define NETIF_F_FSO __NETIF_F(FSO) #define NETIF_F_GRO __NETIF_F(GRO) @@ -210,8 +209,7 @@ static inline int find_next_netdev_feature(u64 feature, unsigned long start) #define NETIF_F_ALL_TSO (NETIF_F_TSO | NETIF_F_TSO6 | \ NETIF_F_TSO_ECN | NETIF_F_TSO_MANGLEID) -#define NETIF_F_ALL_FCOE (NETIF_F_FCOE_CRC | NETIF_F_FCOE_MTU | \ - NETIF_F_FSO) +#define NETIF_F_ALL_FCOE (NETIF_F_FCOE_CRC | NETIF_F_FSO) /* List of features with software fallbacks. */ #define NETIF_F_GSO_SOFTWARE (NETIF_F_ALL_TSO | NETIF_F_GSO_SCTP | \ diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index a698e2402420..ca5f0dda733b 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1969,6 +1969,7 @@ enum netdev_reg_state { * HWTSTAMP_SOURCE_NETDEV * @change_proto_down: device supports setting carrier via IFLA_PROTO_DOWN * @netns_local: interface can't change network namespaces + * @fcoe_mtu: device supports maximum FCoE MTU, 2158 bytes * * @net_notifier_list: List of per-net netdev notifier block * that follow this device when it is moved @@ -2363,6 +2364,7 @@ struct net_device { unsigned long see_all_hwtstamp_requests:1; unsigned long change_proto_down:1; unsigned long netns_local:1; + unsigned long fcoe_mtu:1; struct list_head net_notifier_list; diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index 3ca485537d77..09b46b057ab2 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -571,6 +571,7 @@ static int vlan_dev_init(struct net_device *dev) dev->features |= dev->hw_features; dev->lltx = true; + dev->fcoe_mtu = true; netif_inherit_tso_max(dev, real_dev); if (dev->features & NETIF_F_VLAN_FEATURES) netdev_warn(real_dev, "VLAN features are set incorrectly. Q-in-Q configurations may not work correctly.\n"); diff --git a/net/ethtool/common.c b/net/ethtool/common.c index ca8e64162104..00f93c58b319 100644 --- a/net/ethtool/common.c +++ b/net/ethtool/common.c @@ -50,7 +50,6 @@ const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN] = { [NETIF_F_FCOE_CRC_BIT] = "tx-checksum-fcoe-crc", [NETIF_F_SCTP_CRC_BIT] = "tx-checksum-sctp", - [NETIF_F_FCOE_MTU_BIT] = "fcoe-mtu", [NETIF_F_NTUPLE_BIT] = "rx-ntuple-filter", [NETIF_F_RXHASH_BIT] = "rx-hashing", [NETIF_F_RXCSUM_BIT] = "rx-checksum", -- cgit v1.2.3 From 08062af0a52107a243f7608fd972edb54ca5b7f8 Mon Sep 17 00:00:00 2001 From: Joe Damato Date: Wed, 4 Sep 2024 15:34:30 +0000 Subject: net: napi: Prevent overflow of napi_defer_hard_irqs In commit 6f8b12d661d0 ("net: napi: add hard irqs deferral feature") napi_defer_irqs was added to net_device and napi_defer_irqs_count was added to napi_struct, both as type int. This value never goes below zero, so there is not reason for it to be a signed int. Change the type for both from int to u32, and add an overflow check to sysfs to limit the value to S32_MAX. The limit of S32_MAX was chosen because the practical limit before this patch was S32_MAX (anything larger was an overflow) and thus there are no behavioral changes introduced. If the extra bit is needed in the future, the limit can be raised. Before this patch: $ sudo bash -c 'echo 2147483649 > /sys/class/net/eth4/napi_defer_hard_irqs' $ cat /sys/class/net/eth4/napi_defer_hard_irqs -2147483647 After this patch: $ sudo bash -c 'echo 2147483649 > /sys/class/net/eth4/napi_defer_hard_irqs' bash: line 0: echo: write error: Numerical result out of range Similarly, /sys/class/net/XXXXX/tx_queue_len is defined as unsigned: include/linux/netdevice.h: unsigned int tx_queue_len; And has an overflow check: dev_change_tx_queue_len(..., unsigned long new_len): if (new_len != (unsigned int)new_len) return -ERANGE; Suggested-by: Jakub Kicinski Signed-off-by: Joe Damato Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20240904153431.307932-1-jdamato@fastly.com Signed-off-by: Jakub Kicinski --- Documentation/networking/net_cachelines/net_device.rst | 2 +- include/linux/netdevice.h | 4 ++-- net/core/net-sysfs.c | 6 +++++- 3 files changed, 8 insertions(+), 4 deletions(-) (limited to 'include/linux/netdevice.h') diff --git a/Documentation/networking/net_cachelines/net_device.rst b/Documentation/networking/net_cachelines/net_device.rst index 56d4bbaf8a2c..22b07c814f4a 100644 --- a/Documentation/networking/net_cachelines/net_device.rst +++ b/Documentation/networking/net_cachelines/net_device.rst @@ -99,7 +99,7 @@ unsigned_int num_rx_queues unsigned_int real_num_rx_queues - read_mostly get_rps_cpu struct_bpf_prog* xdp_prog - read_mostly netif_elide_gro() unsigned_long gro_flush_timeout - read_mostly napi_complete_done -int napi_defer_hard_irqs - read_mostly napi_complete_done +u32 napi_defer_hard_irqs - read_mostly napi_complete_done unsigned_int gro_max_size - read_mostly skb_gro_receive unsigned_int gro_ipv4_max_size - read_mostly skb_gro_receive rx_handler_func_t* rx_handler read_mostly - __netif_receive_skb_core diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index ca5f0dda733b..b47c00657bd0 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -356,7 +356,7 @@ struct napi_struct { unsigned long state; int weight; - int defer_hard_irqs_count; + u32 defer_hard_irqs_count; unsigned long gro_bitmask; int (*poll)(struct napi_struct *, int); #ifdef CONFIG_NETPOLL @@ -2075,7 +2075,7 @@ struct net_device { unsigned int real_num_rx_queues; struct netdev_rx_queue *_rx; unsigned long gro_flush_timeout; - int napi_defer_hard_irqs; + u32 napi_defer_hard_irqs; unsigned int gro_max_size; unsigned int gro_ipv4_max_size; rx_handler_func_t __rcu *rx_handler; diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 01a9fb54ef42..146a271a74e0 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -32,6 +32,7 @@ #ifdef CONFIG_SYSFS static const char fmt_hex[] = "%#x\n"; static const char fmt_dec[] = "%d\n"; +static const char fmt_uint[] = "%u\n"; static const char fmt_ulong[] = "%lu\n"; static const char fmt_u64[] = "%llu\n"; @@ -425,6 +426,9 @@ NETDEVICE_SHOW_RW(gro_flush_timeout, fmt_ulong); static int change_napi_defer_hard_irqs(struct net_device *dev, unsigned long val) { + if (val > S32_MAX) + return -ERANGE; + WRITE_ONCE(dev->napi_defer_hard_irqs, val); return 0; } @@ -438,7 +442,7 @@ static ssize_t napi_defer_hard_irqs_store(struct device *dev, return netdev_store(dev, attr, buf, len, change_napi_defer_hard_irqs); } -NETDEVICE_SHOW_RW(napi_defer_hard_irqs, fmt_dec); +NETDEVICE_SHOW_RW(napi_defer_hard_irqs, fmt_uint); static ssize_t ifalias_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t len) -- cgit v1.2.3 From 3dc95a3edd0a86b4a59670b3fafcc64c7d83e2e7 Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Wed, 4 Sep 2024 17:47:45 +0200 Subject: netdevice: add netdev_tx_reset_subqueue() shorthand Add a shorthand similar to other net*_subqueue() helpers for resetting the queue by its index w/o obtaining &netdev_tx_queue beforehand manually. Reviewed-by: Przemek Kitszel Signed-off-by: Alexander Lobakin Signed-off-by: Tony Nguyen --- include/linux/netdevice.h | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) (limited to 'include/linux/netdevice.h') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index b47c00657bd0..44d1dbb54ffe 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3568,6 +3568,17 @@ static inline void netdev_tx_reset_queue(struct netdev_queue *q) #endif } +/** + * netdev_tx_reset_subqueue - reset the BQL stats and state of a netdev queue + * @dev: network device + * @qid: stack index of the queue to reset + */ +static inline void netdev_tx_reset_subqueue(const struct net_device *dev, + u32 qid) +{ + netdev_tx_reset_queue(netdev_get_tx_queue(dev, qid)); +} + /** * netdev_reset_queue - reset the packets and bytes count of a network device * @dev_queue: network device @@ -3577,7 +3588,7 @@ static inline void netdev_tx_reset_queue(struct netdev_queue *q) */ static inline void netdev_reset_queue(struct net_device *dev_queue) { - netdev_tx_reset_queue(netdev_get_tx_queue(dev_queue, 0)); + netdev_tx_reset_subqueue(dev_queue, 0); } /** -- cgit v1.2.3 From 17245a195df4c86b1fd38718d8cdc532c040c08e Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 6 Sep 2024 09:10:59 -0700 Subject: net: remove dev_pick_tx_cpu_id() dev_pick_tx_cpu_id() has been introduced with two users by commit a4ea8a3dacc3 ("net: Add generic ndo_select_queue functions"). The use in AF_PACKET has been removed in 2019 by commit b71b5837f871 ("packet: rework packet_pick_tx_queue() to use common code selection") The other user was a Netlogic XLP driver, removed in 2021 by commit 47ac6f567c28 ("staging: Remove Netlogic XLP network driver"). It's relatively unlikely that any modern driver will need an .ndo_select_queue implementation which picks purely based on CPU ID and skips XPS, delete dev_pick_tx_cpu_id() Found by code inspection. Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20240906161059.715546-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- include/linux/netdevice.h | 2 -- net/core/dev.c | 7 ------- 2 files changed, 9 deletions(-) (limited to 'include/linux/netdevice.h') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index b47c00657bd0..2e40a137dc12 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3085,8 +3085,6 @@ void dev_disable_lro(struct net_device *dev); int dev_loopback_xmit(struct net *net, struct sock *sk, struct sk_buff *newskb); u16 dev_pick_tx_zero(struct net_device *dev, struct sk_buff *skb, struct net_device *sb_dev); -u16 dev_pick_tx_cpu_id(struct net_device *dev, struct sk_buff *skb, - struct net_device *sb_dev); int __dev_queue_xmit(struct sk_buff *skb, struct net_device *sb_dev); int __dev_direct_xmit(struct sk_buff *skb, u16 queue_id); diff --git a/net/core/dev.c b/net/core/dev.c index 22c3f14d9287..8f4dead64284 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4247,13 +4247,6 @@ u16 dev_pick_tx_zero(struct net_device *dev, struct sk_buff *skb, } EXPORT_SYMBOL(dev_pick_tx_zero); -u16 dev_pick_tx_cpu_id(struct net_device *dev, struct sk_buff *skb, - struct net_device *sb_dev) -{ - return (u16)raw_smp_processor_id() % dev->real_num_tx_queues; -} -EXPORT_SYMBOL(dev_pick_tx_cpu_id); - u16 netdev_pick_tx(struct net_device *dev, struct sk_buff *skb, struct net_device *sb_dev) { -- cgit v1.2.3 From 170aafe35cb98e0f3fbacb446ea86389fbce22ea Mon Sep 17 00:00:00 2001 From: Mina Almasry Date: Tue, 10 Sep 2024 17:14:47 +0000 Subject: netdev: support binding dma-buf to netdevice Add a netdev_dmabuf_binding struct which represents the dma-buf-to-netdevice binding. The netlink API will bind the dma-buf to rx queues on the netdevice. On the binding, the dma_buf_attach & dma_buf_map_attachment will occur. The entries in the sg_table from mapping will be inserted into a genpool to make it ready for allocation. The chunks in the genpool are owned by a dmabuf_chunk_owner struct which holds the dma-buf offset of the base of the chunk and the dma_addr of the chunk. Both are needed to use allocations that come from this chunk. We create a new type that represents an allocation from the genpool: net_iov. We setup the net_iov allocation size in the genpool to PAGE_SIZE for simplicity: to match the PAGE_SIZE normally allocated by the page pool and given to the drivers. The user can unbind the dmabuf from the netdevice by closing the netlink socket that established the binding. We do this so that the binding is automatically unbound even if the userspace process crashes. The binding and unbinding leaves an indicator in struct netdev_rx_queue that the given queue is bound, and the binding is actuated by resetting the rx queue using the queue API. The netdev_dmabuf_binding struct is refcounted, and releases its resources only when all the refs are released. Signed-off-by: Willem de Bruijn Signed-off-by: Kaiyuan Zhang Signed-off-by: Mina Almasry Reviewed-by: Pavel Begunkov # excluding netlink Acked-by: Daniel Vetter Reviewed-by: Jakub Kicinski Link: https://patch.msgid.link/20240910171458.219195-4-almasrymina@google.com Signed-off-by: Jakub Kicinski --- Documentation/netlink/specs/netdev.yaml | 4 + include/linux/netdevice.h | 2 + include/net/netdev_rx_queue.h | 2 + include/net/netmem.h | 8 + include/net/page_pool/types.h | 6 + net/Kconfig | 5 + net/core/Makefile | 1 + net/core/dev.c | 26 +++ net/core/devmem.c | 274 ++++++++++++++++++++++++++++++++ net/core/devmem.h | 124 +++++++++++++++ net/core/netdev-genl-gen.c | 4 + net/core/netdev-genl-gen.h | 4 + net/core/netdev-genl.c | 128 ++++++++++++++- net/ethtool/common.c | 8 + net/xdp/xsk_buff_pool.c | 5 + 15 files changed, 596 insertions(+), 5 deletions(-) create mode 100644 net/core/devmem.c create mode 100644 net/core/devmem.h (limited to 'include/linux/netdevice.h') diff --git a/Documentation/netlink/specs/netdev.yaml b/Documentation/netlink/specs/netdev.yaml index 4930e8142aa6..0c747530c275 100644 --- a/Documentation/netlink/specs/netdev.yaml +++ b/Documentation/netlink/specs/netdev.yaml @@ -667,6 +667,10 @@ operations: attributes: - id +kernel-family: + headers: [ "linux/list.h"] + sock-priv: struct list_head + mcast-groups: list: - diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 2465bdb6037f..e87b5e488325 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3953,6 +3953,8 @@ u8 dev_xdp_prog_count(struct net_device *dev); int dev_xdp_propagate(struct net_device *dev, struct netdev_bpf *bpf); u32 dev_xdp_prog_id(struct net_device *dev, enum bpf_xdp_mode mode); +u32 dev_get_min_mp_channel_count(const struct net_device *dev); + int __dev_forward_skb(struct net_device *dev, struct sk_buff *skb); int dev_forward_skb(struct net_device *dev, struct sk_buff *skb); int dev_forward_skb_nomtu(struct net_device *dev, struct sk_buff *skb); diff --git a/include/net/netdev_rx_queue.h b/include/net/netdev_rx_queue.h index e78ca52d67fb..ac34f5fb4f71 100644 --- a/include/net/netdev_rx_queue.h +++ b/include/net/netdev_rx_queue.h @@ -6,6 +6,7 @@ #include #include #include +#include /* This structure contains an instance of an RX queue. */ struct netdev_rx_queue { @@ -25,6 +26,7 @@ struct netdev_rx_queue { * Readers and writers must hold RTNL */ struct napi_struct *napi; + struct pp_memory_provider_params mp_params; } ____cacheline_aligned_in_smp; /* diff --git a/include/net/netmem.h b/include/net/netmem.h index 46cc9b89ac79..c23e224dd6a0 100644 --- a/include/net/netmem.h +++ b/include/net/netmem.h @@ -8,6 +8,14 @@ #ifndef _NET_NETMEM_H #define _NET_NETMEM_H +/* net_iov */ + +struct net_iov { + struct dmabuf_genpool_chunk_owner *owner; +}; + +/* netmem */ + /** * typedef netmem_ref - a nonexistent type marking a reference to generic * network memory. diff --git a/include/net/page_pool/types.h b/include/net/page_pool/types.h index 50569fed7868..4afd6dd56351 100644 --- a/include/net/page_pool/types.h +++ b/include/net/page_pool/types.h @@ -139,6 +139,10 @@ struct page_pool_stats { */ #define PAGE_POOL_FRAG_GROUP_ALIGN (4 * sizeof(long)) +struct pp_memory_provider_params { + void *mp_priv; +}; + struct page_pool { struct page_pool_params_fast p; @@ -197,6 +201,8 @@ struct page_pool { */ struct ptr_ring ring; + void *mp_priv; + #ifdef CONFIG_PAGE_POOL_STATS /* recycle stats are per-cpu to avoid locking */ struct page_pool_recycle_stats __percpu *recycle_stats; diff --git a/net/Kconfig b/net/Kconfig index d27d0deac0bf..7574b066d7cd 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -66,6 +66,11 @@ config SKB_DECRYPTED config SKB_EXTENSIONS bool +config NET_DEVMEM + def_bool y + depends on DMA_SHARED_BUFFER + depends on GENERIC_ALLOCATOR + menu "Networking options" source "net/packet/Kconfig" diff --git a/net/core/Makefile b/net/core/Makefile index f82232b358a2..c3ebbaf9c81e 100644 --- a/net/core/Makefile +++ b/net/core/Makefile @@ -44,3 +44,4 @@ obj-$(CONFIG_BPF_SYSCALL) += sock_map.o obj-$(CONFIG_BPF_SYSCALL) += bpf_sk_storage.o obj-$(CONFIG_OF) += of_net.o obj-$(CONFIG_NET_TEST) += net_test.o +obj-$(CONFIG_NET_DEVMEM) += devmem.o diff --git a/net/core/dev.c b/net/core/dev.c index 8f4dead64284..7818d7f6295e 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -161,6 +161,7 @@ #include #include "dev.h" +#include "devmem.h" #include "net-sysfs.h" static DEFINE_SPINLOCK(ptype_lock); @@ -9367,6 +9368,11 @@ int dev_xdp_propagate(struct net_device *dev, struct netdev_bpf *bpf) if (!dev->netdev_ops->ndo_bpf) return -EOPNOTSUPP; + if (dev_get_min_mp_channel_count(dev)) { + NL_SET_ERR_MSG(bpf->extack, "unable to propagate XDP to device using memory provider"); + return -EBUSY; + } + return dev->netdev_ops->ndo_bpf(dev, bpf); } EXPORT_SYMBOL_GPL(dev_xdp_propagate); @@ -9399,6 +9405,11 @@ static int dev_xdp_install(struct net_device *dev, enum bpf_xdp_mode mode, struct netdev_bpf xdp; int err; + if (dev_get_min_mp_channel_count(dev)) { + NL_SET_ERR_MSG(extack, "unable to install XDP to device using memory provider"); + return -EBUSY; + } + memset(&xdp, 0, sizeof(xdp)); xdp.command = mode == XDP_MODE_HW ? XDP_SETUP_PROG_HW : XDP_SETUP_PROG; xdp.extack = extack; @@ -9823,6 +9834,20 @@ err_out: return err; } +u32 dev_get_min_mp_channel_count(const struct net_device *dev) +{ + int i; + + ASSERT_RTNL(); + + for (i = dev->real_num_rx_queues - 1; i >= 0; i--) + if (dev->_rx[i].mp_params.mp_priv) + /* The channel count is the idx plus 1. */ + return i + 1; + + return 0; +} + /** * dev_index_reserve() - allocate an ifindex in a namespace * @net: the applicable net namespace @@ -11359,6 +11384,7 @@ void unregister_netdevice_many_notify(struct list_head *head, dev_tcx_uninstall(dev); dev_xdp_uninstall(dev); bpf_dev_bound_netdev_unregister(dev); + dev_dmabuf_uninstall(dev); netdev_offload_xstats_disable_all(dev); diff --git a/net/core/devmem.c b/net/core/devmem.c new file mode 100644 index 000000000000..8dd7beb080d2 --- /dev/null +++ b/net/core/devmem.c @@ -0,0 +1,274 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Devmem TCP + * + * Authors: Mina Almasry + * Willem de Bruijn + * Kaiyuan Zhang +#include +#include +#include +#include +#include +#include +#include +#include + +#include "devmem.h" + +/* Device memory support */ + +/* Protected by rtnl_lock() */ +static DEFINE_XARRAY_FLAGS(net_devmem_dmabuf_bindings, XA_FLAGS_ALLOC1); + +static void net_devmem_dmabuf_free_chunk_owner(struct gen_pool *genpool, + struct gen_pool_chunk *chunk, + void *not_used) +{ + struct dmabuf_genpool_chunk_owner *owner = chunk->owner; + + kvfree(owner->niovs); + kfree(owner); +} + +void __net_devmem_dmabuf_binding_free(struct net_devmem_dmabuf_binding *binding) +{ + size_t size, avail; + + gen_pool_for_each_chunk(binding->chunk_pool, + net_devmem_dmabuf_free_chunk_owner, NULL); + + size = gen_pool_size(binding->chunk_pool); + avail = gen_pool_avail(binding->chunk_pool); + + if (!WARN(size != avail, "can't destroy genpool. size=%zu, avail=%zu", + size, avail)) + gen_pool_destroy(binding->chunk_pool); + + dma_buf_unmap_attachment_unlocked(binding->attachment, binding->sgt, + DMA_FROM_DEVICE); + dma_buf_detach(binding->dmabuf, binding->attachment); + dma_buf_put(binding->dmabuf); + xa_destroy(&binding->bound_rxqs); + kfree(binding); +} + +void net_devmem_unbind_dmabuf(struct net_devmem_dmabuf_binding *binding) +{ + struct netdev_rx_queue *rxq; + unsigned long xa_idx; + unsigned int rxq_idx; + + if (binding->list.next) + list_del(&binding->list); + + xa_for_each(&binding->bound_rxqs, xa_idx, rxq) { + WARN_ON(rxq->mp_params.mp_priv != binding); + + rxq->mp_params.mp_priv = NULL; + + rxq_idx = get_netdev_rx_queue_index(rxq); + + WARN_ON(netdev_rx_queue_restart(binding->dev, rxq_idx)); + } + + xa_erase(&net_devmem_dmabuf_bindings, binding->id); + + net_devmem_dmabuf_binding_put(binding); +} + +int net_devmem_bind_dmabuf_to_queue(struct net_device *dev, u32 rxq_idx, + struct net_devmem_dmabuf_binding *binding, + struct netlink_ext_ack *extack) +{ + struct netdev_rx_queue *rxq; + u32 xa_idx; + int err; + + if (rxq_idx >= dev->real_num_rx_queues) { + NL_SET_ERR_MSG(extack, "rx queue index out of range"); + return -ERANGE; + } + + rxq = __netif_get_rx_queue(dev, rxq_idx); + if (rxq->mp_params.mp_priv) { + NL_SET_ERR_MSG(extack, "designated queue already memory provider bound"); + return -EEXIST; + } + +#ifdef CONFIG_XDP_SOCKETS + if (rxq->pool) { + NL_SET_ERR_MSG(extack, "designated queue already in use by AF_XDP"); + return -EBUSY; + } +#endif + + err = xa_alloc(&binding->bound_rxqs, &xa_idx, rxq, xa_limit_32b, + GFP_KERNEL); + if (err) + return err; + + rxq->mp_params.mp_priv = binding; + + err = netdev_rx_queue_restart(dev, rxq_idx); + if (err) + goto err_xa_erase; + + return 0; + +err_xa_erase: + rxq->mp_params.mp_priv = NULL; + xa_erase(&binding->bound_rxqs, xa_idx); + + return err; +} + +struct net_devmem_dmabuf_binding * +net_devmem_bind_dmabuf(struct net_device *dev, unsigned int dmabuf_fd, + struct netlink_ext_ack *extack) +{ + struct net_devmem_dmabuf_binding *binding; + static u32 id_alloc_next; + struct scatterlist *sg; + struct dma_buf *dmabuf; + unsigned int sg_idx, i; + unsigned long virtual; + int err; + + dmabuf = dma_buf_get(dmabuf_fd); + if (IS_ERR(dmabuf)) + return ERR_CAST(dmabuf); + + binding = kzalloc_node(sizeof(*binding), GFP_KERNEL, + dev_to_node(&dev->dev)); + if (!binding) { + err = -ENOMEM; + goto err_put_dmabuf; + } + + binding->dev = dev; + + err = xa_alloc_cyclic(&net_devmem_dmabuf_bindings, &binding->id, + binding, xa_limit_32b, &id_alloc_next, + GFP_KERNEL); + if (err < 0) + goto err_free_binding; + + xa_init_flags(&binding->bound_rxqs, XA_FLAGS_ALLOC); + + refcount_set(&binding->ref, 1); + + binding->dmabuf = dmabuf; + + binding->attachment = dma_buf_attach(binding->dmabuf, dev->dev.parent); + if (IS_ERR(binding->attachment)) { + err = PTR_ERR(binding->attachment); + NL_SET_ERR_MSG(extack, "Failed to bind dmabuf to device"); + goto err_free_id; + } + + binding->sgt = dma_buf_map_attachment_unlocked(binding->attachment, + DMA_FROM_DEVICE); + if (IS_ERR(binding->sgt)) { + err = PTR_ERR(binding->sgt); + NL_SET_ERR_MSG(extack, "Failed to map dmabuf attachment"); + goto err_detach; + } + + /* For simplicity we expect to make PAGE_SIZE allocations, but the + * binding can be much more flexible than that. We may be able to + * allocate MTU sized chunks here. Leave that for future work... + */ + binding->chunk_pool = + gen_pool_create(PAGE_SHIFT, dev_to_node(&dev->dev)); + if (!binding->chunk_pool) { + err = -ENOMEM; + goto err_unmap; + } + + virtual = 0; + for_each_sgtable_dma_sg(binding->sgt, sg, sg_idx) { + dma_addr_t dma_addr = sg_dma_address(sg); + struct dmabuf_genpool_chunk_owner *owner; + size_t len = sg_dma_len(sg); + struct net_iov *niov; + + owner = kzalloc_node(sizeof(*owner), GFP_KERNEL, + dev_to_node(&dev->dev)); + if (!owner) { + err = -ENOMEM; + goto err_free_chunks; + } + + owner->base_virtual = virtual; + owner->base_dma_addr = dma_addr; + owner->num_niovs = len / PAGE_SIZE; + owner->binding = binding; + + err = gen_pool_add_owner(binding->chunk_pool, dma_addr, + dma_addr, len, dev_to_node(&dev->dev), + owner); + if (err) { + kfree(owner); + err = -EINVAL; + goto err_free_chunks; + } + + owner->niovs = kvmalloc_array(owner->num_niovs, + sizeof(*owner->niovs), + GFP_KERNEL); + if (!owner->niovs) { + err = -ENOMEM; + goto err_free_chunks; + } + + for (i = 0; i < owner->num_niovs; i++) { + niov = &owner->niovs[i]; + niov->owner = owner; + } + + virtual += len; + } + + return binding; + +err_free_chunks: + gen_pool_for_each_chunk(binding->chunk_pool, + net_devmem_dmabuf_free_chunk_owner, NULL); + gen_pool_destroy(binding->chunk_pool); +err_unmap: + dma_buf_unmap_attachment_unlocked(binding->attachment, binding->sgt, + DMA_FROM_DEVICE); +err_detach: + dma_buf_detach(dmabuf, binding->attachment); +err_free_id: + xa_erase(&net_devmem_dmabuf_bindings, binding->id); +err_free_binding: + kfree(binding); +err_put_dmabuf: + dma_buf_put(dmabuf); + return ERR_PTR(err); +} + +void dev_dmabuf_uninstall(struct net_device *dev) +{ + struct net_devmem_dmabuf_binding *binding; + struct netdev_rx_queue *rxq; + unsigned long xa_idx; + unsigned int i; + + for (i = 0; i < dev->real_num_rx_queues; i++) { + binding = dev->_rx[i].mp_params.mp_priv; + if (!binding) + continue; + + xa_for_each(&binding->bound_rxqs, xa_idx, rxq) + if (rxq == &dev->_rx[i]) { + xa_erase(&binding->bound_rxqs, xa_idx); + break; + } + } +} diff --git a/net/core/devmem.h b/net/core/devmem.h new file mode 100644 index 000000000000..c50f91d858dd --- /dev/null +++ b/net/core/devmem.h @@ -0,0 +1,124 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Device memory TCP support + * + * Authors: Mina Almasry + * Willem de Bruijn + * Kaiyuan Zhang + * + */ +#ifndef _NET_DEVMEM_H +#define _NET_DEVMEM_H + +struct netlink_ext_ack; + +struct net_devmem_dmabuf_binding { + struct dma_buf *dmabuf; + struct dma_buf_attachment *attachment; + struct sg_table *sgt; + struct net_device *dev; + struct gen_pool *chunk_pool; + + /* The user holds a ref (via the netlink API) for as long as they want + * the binding to remain alive. Each page pool using this binding holds + * a ref to keep the binding alive. Each allocated net_iov holds a + * ref. + * + * The binding undos itself and unmaps the underlying dmabuf once all + * those refs are dropped and the binding is no longer desired or in + * use. + */ + refcount_t ref; + + /* The list of bindings currently active. Used for netlink to notify us + * of the user dropping the bind. + */ + struct list_head list; + + /* rxq's this binding is active on. */ + struct xarray bound_rxqs; + + /* ID of this binding. Globally unique to all bindings currently + * active. + */ + u32 id; +}; + +#if defined(CONFIG_NET_DEVMEM) +/* Owner of the dma-buf chunks inserted into the gen pool. Each scatterlist + * entry from the dmabuf is inserted into the genpool as a chunk, and needs + * this owner struct to keep track of some metadata necessary to create + * allocations from this chunk. + */ +struct dmabuf_genpool_chunk_owner { + /* Offset into the dma-buf where this chunk starts. */ + unsigned long base_virtual; + + /* dma_addr of the start of the chunk. */ + dma_addr_t base_dma_addr; + + /* Array of net_iovs for this chunk. */ + struct net_iov *niovs; + size_t num_niovs; + + struct net_devmem_dmabuf_binding *binding; +}; + +void __net_devmem_dmabuf_binding_free(struct net_devmem_dmabuf_binding *binding); +struct net_devmem_dmabuf_binding * +net_devmem_bind_dmabuf(struct net_device *dev, unsigned int dmabuf_fd, + struct netlink_ext_ack *extack); +void net_devmem_unbind_dmabuf(struct net_devmem_dmabuf_binding *binding); +int net_devmem_bind_dmabuf_to_queue(struct net_device *dev, u32 rxq_idx, + struct net_devmem_dmabuf_binding *binding, + struct netlink_ext_ack *extack); +void dev_dmabuf_uninstall(struct net_device *dev); + +static inline void +net_devmem_dmabuf_binding_get(struct net_devmem_dmabuf_binding *binding) +{ + refcount_inc(&binding->ref); +} + +static inline void +net_devmem_dmabuf_binding_put(struct net_devmem_dmabuf_binding *binding) +{ + if (!refcount_dec_and_test(&binding->ref)) + return; + + __net_devmem_dmabuf_binding_free(binding); +} + +#else +static inline void +__net_devmem_dmabuf_binding_free(struct net_devmem_dmabuf_binding *binding) +{ +} + +static inline struct net_devmem_dmabuf_binding * +net_devmem_bind_dmabuf(struct net_device *dev, unsigned int dmabuf_fd, + struct netlink_ext_ack *extack) +{ + return ERR_PTR(-EOPNOTSUPP); +} + +static inline void +net_devmem_unbind_dmabuf(struct net_devmem_dmabuf_binding *binding) +{ +} + +static inline int +net_devmem_bind_dmabuf_to_queue(struct net_device *dev, u32 rxq_idx, + struct net_devmem_dmabuf_binding *binding, + struct netlink_ext_ack *extack) + +{ + return -EOPNOTSUPP; +} + +static inline void dev_dmabuf_uninstall(struct net_device *dev) +{ +} +#endif + +#endif /* _NET_DEVMEM_H */ diff --git a/net/core/netdev-genl-gen.c b/net/core/netdev-genl-gen.c index 6b7fe6035067..b28424ae06d5 100644 --- a/net/core/netdev-genl-gen.c +++ b/net/core/netdev-genl-gen.c @@ -9,6 +9,7 @@ #include "netdev-genl-gen.h" #include +#include /* Integer value ranges */ static const struct netlink_range_validation netdev_a_page_pool_id_range = { @@ -187,4 +188,7 @@ struct genl_family netdev_nl_family __ro_after_init = { .n_split_ops = ARRAY_SIZE(netdev_nl_ops), .mcgrps = netdev_nl_mcgrps, .n_mcgrps = ARRAY_SIZE(netdev_nl_mcgrps), + .sock_priv_size = sizeof(struct list_head), + .sock_priv_init = (void *)netdev_nl_sock_priv_init, + .sock_priv_destroy = (void *)netdev_nl_sock_priv_destroy, }; diff --git a/net/core/netdev-genl-gen.h b/net/core/netdev-genl-gen.h index 67c34005750c..8cda334fd042 100644 --- a/net/core/netdev-genl-gen.h +++ b/net/core/netdev-genl-gen.h @@ -10,6 +10,7 @@ #include #include +#include /* Common nested types */ extern const struct nla_policy netdev_page_pool_info_nl_policy[NETDEV_A_PAGE_POOL_IFINDEX + 1]; @@ -40,4 +41,7 @@ enum { extern struct genl_family netdev_nl_family; +void netdev_nl_sock_priv_init(struct list_head *priv); +void netdev_nl_sock_priv_destroy(struct list_head *priv); + #endif /* _LINUX_NETDEV_GEN_H */ diff --git a/net/core/netdev-genl.c b/net/core/netdev-genl.c index 699c34b9b03c..9153a8ab0cf8 100644 --- a/net/core/netdev-genl.c +++ b/net/core/netdev-genl.c @@ -3,16 +3,17 @@ #include #include #include +#include #include +#include +#include #include #include #include -#include -#include -#include -#include "netdev-genl-gen.h" #include "dev.h" +#include "devmem.h" +#include "netdev-genl-gen.h" struct netdev_nl_dump_ctx { unsigned long ifindex; @@ -723,10 +724,127 @@ int netdev_nl_qstats_get_dumpit(struct sk_buff *skb, return err; } -/* Stub */ int netdev_nl_bind_rx_doit(struct sk_buff *skb, struct genl_info *info) { + struct nlattr *tb[ARRAY_SIZE(netdev_queue_id_nl_policy)]; + struct net_devmem_dmabuf_binding *binding; + struct list_head *sock_binding_list; + u32 ifindex, dmabuf_fd, rxq_idx; + struct net_device *netdev; + struct sk_buff *rsp; + struct nlattr *attr; + int rem, err = 0; + void *hdr; + + if (GENL_REQ_ATTR_CHECK(info, NETDEV_A_DEV_IFINDEX) || + GENL_REQ_ATTR_CHECK(info, NETDEV_A_DMABUF_FD) || + GENL_REQ_ATTR_CHECK(info, NETDEV_A_DMABUF_QUEUES)) + return -EINVAL; + + ifindex = nla_get_u32(info->attrs[NETDEV_A_DEV_IFINDEX]); + dmabuf_fd = nla_get_u32(info->attrs[NETDEV_A_DMABUF_FD]); + + sock_binding_list = genl_sk_priv_get(&netdev_nl_family, + NETLINK_CB(skb).sk); + if (IS_ERR(sock_binding_list)) + return PTR_ERR(sock_binding_list); + + rsp = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!rsp) + return -ENOMEM; + + hdr = genlmsg_iput(rsp, info); + if (!hdr) { + err = -EMSGSIZE; + goto err_genlmsg_free; + } + + rtnl_lock(); + + netdev = __dev_get_by_index(genl_info_net(info), ifindex); + if (!netdev || !netif_device_present(netdev)) { + err = -ENODEV; + goto err_unlock; + } + + if (dev_xdp_prog_count(netdev)) { + NL_SET_ERR_MSG(info->extack, "unable to bind dmabuf to device with XDP program attached"); + err = -EEXIST; + goto err_unlock; + } + + binding = net_devmem_bind_dmabuf(netdev, dmabuf_fd, info->extack); + if (IS_ERR(binding)) { + err = PTR_ERR(binding); + goto err_unlock; + } + + nla_for_each_attr_type(attr, NETDEV_A_DMABUF_QUEUES, + genlmsg_data(info->genlhdr), + genlmsg_len(info->genlhdr), rem) { + err = nla_parse_nested( + tb, ARRAY_SIZE(netdev_queue_id_nl_policy) - 1, attr, + netdev_queue_id_nl_policy, info->extack); + if (err < 0) + goto err_unbind; + + if (NL_REQ_ATTR_CHECK(info->extack, attr, tb, NETDEV_A_QUEUE_ID) || + NL_REQ_ATTR_CHECK(info->extack, attr, tb, NETDEV_A_QUEUE_TYPE)) { + err = -EINVAL; + goto err_unbind; + } + + if (nla_get_u32(tb[NETDEV_A_QUEUE_TYPE]) != NETDEV_QUEUE_TYPE_RX) { + NL_SET_BAD_ATTR(info->extack, tb[NETDEV_A_QUEUE_TYPE]); + err = -EINVAL; + goto err_unbind; + } + + rxq_idx = nla_get_u32(tb[NETDEV_A_QUEUE_ID]); + + err = net_devmem_bind_dmabuf_to_queue(netdev, rxq_idx, binding, + info->extack); + if (err) + goto err_unbind; + } + + list_add(&binding->list, sock_binding_list); + + nla_put_u32(rsp, NETDEV_A_DMABUF_ID, binding->id); + genlmsg_end(rsp, hdr); + + err = genlmsg_reply(rsp, info); + if (err) + goto err_unbind; + + rtnl_unlock(); + return 0; + +err_unbind: + net_devmem_unbind_dmabuf(binding); +err_unlock: + rtnl_unlock(); +err_genlmsg_free: + nlmsg_free(rsp); + return err; +} + +void netdev_nl_sock_priv_init(struct list_head *priv) +{ + INIT_LIST_HEAD(priv); +} + +void netdev_nl_sock_priv_destroy(struct list_head *priv) +{ + struct net_devmem_dmabuf_binding *binding; + struct net_devmem_dmabuf_binding *temp; + + list_for_each_entry_safe(binding, temp, priv, list) { + rtnl_lock(); + net_devmem_unbind_dmabuf(binding); + rtnl_unlock(); + } } static int netdev_genl_netdevice_event(struct notifier_block *nb, diff --git a/net/ethtool/common.c b/net/ethtool/common.c index 6c245e59bbc1..dd345efa114b 100644 --- a/net/ethtool/common.c +++ b/net/ethtool/common.c @@ -655,6 +655,7 @@ int ethtool_check_max_channel(struct net_device *dev, { u64 max_rxnfc_in_use; u32 max_rxfh_in_use; + int max_mp_in_use; /* ensure the new Rx count fits within the configured Rx flow * indirection table/rxnfc settings @@ -673,6 +674,13 @@ int ethtool_check_max_channel(struct net_device *dev, return -EINVAL; } + max_mp_in_use = dev_get_min_mp_channel_count(dev); + if (channels.combined_count + channels.rx_count <= max_mp_in_use) { + if (info) + GENL_SET_ERR_MSG_FMT(info, "requested channel counts are too low for existing memory provider setting (%d)", max_mp_in_use); + return -EINVAL; + } + return 0; } diff --git a/net/xdp/xsk_buff_pool.c b/net/xdp/xsk_buff_pool.c index c0e0204b9630..6b2756f95629 100644 --- a/net/xdp/xsk_buff_pool.c +++ b/net/xdp/xsk_buff_pool.c @@ -211,6 +211,11 @@ int xp_assign_dev(struct xsk_buff_pool *pool, goto err_unreg_pool; } + if (dev_get_min_mp_channel_count(netdev)) { + err = -EBUSY; + goto err_unreg_pool; + } + bpf.command = XDP_SETUP_XSK_POOL; bpf.xsk.pool = pool; bpf.xsk.queue_id = queue_id; -- cgit v1.2.3