From 3830c5719af66fac9849cf5fb04b03d4e4bb46ff Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Wed, 2 Nov 2022 17:01:59 +0100 Subject: net: devlink: convert devlink port type-specific pointers to union Instead of storing type_dev as a void pointer, convert it to union and use it to store either struct net_device or struct ib_device pointer. Signed-off-by: Jiri Pirko Signed-off-by: Jakub Kicinski --- include/net/devlink.h | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) (limited to 'include/net/devlink.h') diff --git a/include/net/devlink.h b/include/net/devlink.h index ba6b8b094943..6c55aabaedf1 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -121,12 +121,19 @@ struct devlink_port { struct list_head region_list; struct devlink *devlink; unsigned int index; - spinlock_t type_lock; /* Protects type and type_dev - * pointer consistency. + spinlock_t type_lock; /* Protects type and type_eth/ib + * structures consistency. */ enum devlink_port_type type; enum devlink_port_type desired_type; - void *type_dev; + union { + struct { + struct net_device *netdev; + } type_eth; + struct { + struct ib_device *ibdev; + } type_ib; + }; struct devlink_port_attrs attrs; u8 attrs_set:1, switch_port:1, -- cgit v1.2.3 From c80965784dbf2fd624be654c1e73c24beada7441 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Wed, 2 Nov 2022 17:02:05 +0100 Subject: net: devlink: remove netdev arg from devlink_port_type_eth_set() Since devlink_port_type_eth_set() should no longer be called by any driver with netdev pointer as it should rather use SET_NETDEV_DEVLINK_PORT, remove the netdev arg. Add a warn to type_clear() Signed-off-by: Jiri Pirko Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx4/main.c | 2 +- include/net/devlink.h | 3 +-- net/core/devlink.c | 23 ++++++++++++++--------- 3 files changed, 16 insertions(+), 12 deletions(-) (limited to 'include/net/devlink.h') diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index d3fc86cd3c1d..3ae246391549 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -3043,7 +3043,7 @@ static int mlx4_init_port_info(struct mlx4_dev *dev, int port) */ if (!IS_ENABLED(CONFIG_MLX4_EN) && dev->caps.port_type[port] == MLX4_PORT_TYPE_ETH) - devlink_port_type_eth_set(&info->devlink_port, NULL); + devlink_port_type_eth_set(&info->devlink_port); else if (!IS_ENABLED(CONFIG_MLX4_INFINIBAND) && dev->caps.port_type[port] == MLX4_PORT_TYPE_IB) devlink_port_type_ib_set(&info->devlink_port, NULL); diff --git a/include/net/devlink.h b/include/net/devlink.h index 6c55aabaedf1..b1582b32183a 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -1582,8 +1582,7 @@ int devlink_port_register(struct devlink *devlink, unsigned int port_index); void devl_port_unregister(struct devlink_port *devlink_port); void devlink_port_unregister(struct devlink_port *devlink_port); -void devlink_port_type_eth_set(struct devlink_port *devlink_port, - struct net_device *netdev); +void devlink_port_type_eth_set(struct devlink_port *devlink_port); void devlink_port_type_ib_set(struct devlink_port *devlink_port, struct ib_device *ibdev); void devlink_port_type_clear(struct devlink_port *devlink_port); diff --git a/net/core/devlink.c b/net/core/devlink.c index 6f06c05c7b1a..70a374c828ae 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -10097,17 +10097,15 @@ static void __devlink_port_type_set(struct devlink_port *devlink_port, * devlink_port_type_eth_set - Set port type to Ethernet * * @devlink_port: devlink port - * @netdev: related netdevice + * + * If driver is calling this, most likely it is doing something wrong. */ -void devlink_port_type_eth_set(struct devlink_port *devlink_port, - struct net_device *netdev) +void devlink_port_type_eth_set(struct devlink_port *devlink_port) { - if (!netdev) - dev_warn(devlink_port->devlink->dev, - "devlink port type for port %d set to Ethernet without a software interface reference, device type not supported by the kernel?\n", - devlink_port->index); - - __devlink_port_type_set(devlink_port, DEVLINK_PORT_TYPE_ETH, netdev, + dev_warn(devlink_port->devlink->dev, + "devlink port type for port %d set to Ethernet without a software interface reference, device type not supported by the kernel?\n", + devlink_port->index); + __devlink_port_type_set(devlink_port, DEVLINK_PORT_TYPE_ETH, NULL, false); } EXPORT_SYMBOL_GPL(devlink_port_type_eth_set); @@ -10130,9 +10128,16 @@ EXPORT_SYMBOL_GPL(devlink_port_type_ib_set); * devlink_port_type_clear - Clear port type * * @devlink_port: devlink port + * + * If driver is calling this for clearing Ethernet type, most likely + * it is doing something wrong. */ void devlink_port_type_clear(struct devlink_port *devlink_port) { + if (devlink_port->type == DEVLINK_PORT_TYPE_ETH) + dev_warn(devlink_port->devlink->dev, + "devlink port type for port %d cleared without a software interface reference, device type not supported by the kernel?\n", + devlink_port->index); __devlink_port_type_set(devlink_port, DEVLINK_PORT_TYPE_NOTSET, NULL, false); } -- cgit v1.2.3 From 31265c1e29eb28f17df50d04ee421b5b6369fefd Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Wed, 2 Nov 2022 17:02:07 +0100 Subject: net: devlink: store copy netdevice ifindex and ifname to allow port_fill() without RTNL held To avoid a need to take RTNL mutex in port_fill() function, benefit from the introduce infrastructure that tracks netdevice notifier events. Store the ifindex and ifname upon register and change name events. Remove the rtnl_held bool propagated down to port_fill() function as it is no longer needed. Signed-off-by: Jiri Pirko Signed-off-by: Jakub Kicinski --- include/net/devlink.h | 2 ++ net/core/devlink.c | 68 ++++++++++++++++++++------------------------------- 2 files changed, 29 insertions(+), 41 deletions(-) (limited to 'include/net/devlink.h') diff --git a/include/net/devlink.h b/include/net/devlink.h index b1582b32183a..7befad57afd4 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -129,6 +129,8 @@ struct devlink_port { union { struct { struct net_device *netdev; + int ifindex; + char ifname[IFNAMSIZ]; } type_eth; struct { struct ib_device *ibdev; diff --git a/net/core/devlink.c b/net/core/devlink.c index d948bb2fdd5f..38de3a1dff36 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -1279,8 +1279,7 @@ out: static int devlink_nl_port_fill(struct sk_buff *msg, struct devlink_port *devlink_port, enum devlink_command cmd, u32 portid, u32 seq, - int flags, struct netlink_ext_ack *extack, - bool rtnl_held) + int flags, struct netlink_ext_ack *extack) { struct devlink *devlink = devlink_port->devlink; void *hdr; @@ -1294,9 +1293,6 @@ static int devlink_nl_port_fill(struct sk_buff *msg, if (nla_put_u32(msg, DEVLINK_ATTR_PORT_INDEX, devlink_port->index)) goto nla_put_failure; - /* Hold rtnl lock while accessing port's netdev attributes. */ - if (!rtnl_held) - rtnl_lock(); spin_lock_bh(&devlink_port->type_lock); if (nla_put_u16(msg, DEVLINK_ATTR_PORT_TYPE, devlink_port->type)) goto nla_put_failure_type_locked; @@ -1305,13 +1301,11 @@ static int devlink_nl_port_fill(struct sk_buff *msg, devlink_port->desired_type)) goto nla_put_failure_type_locked; if (devlink_port->type == DEVLINK_PORT_TYPE_ETH) { - struct net_device *netdev = devlink_port->type_eth.netdev; - - if (netdev && + if (devlink_port->type_eth.netdev && (nla_put_u32(msg, DEVLINK_ATTR_PORT_NETDEV_IFINDEX, - netdev->ifindex) || + devlink_port->type_eth.ifindex) || nla_put_string(msg, DEVLINK_ATTR_PORT_NETDEV_NAME, - netdev->name))) + devlink_port->type_eth.ifname))) goto nla_put_failure_type_locked; } if (devlink_port->type == DEVLINK_PORT_TYPE_IB) { @@ -1323,8 +1317,6 @@ static int devlink_nl_port_fill(struct sk_buff *msg, goto nla_put_failure_type_locked; } spin_unlock_bh(&devlink_port->type_lock); - if (!rtnl_held) - rtnl_unlock(); if (devlink_nl_port_attrs_put(msg, devlink_port)) goto nla_put_failure; if (devlink_nl_port_function_attrs_put(msg, devlink_port, extack)) @@ -1339,15 +1331,13 @@ static int devlink_nl_port_fill(struct sk_buff *msg, nla_put_failure_type_locked: spin_unlock_bh(&devlink_port->type_lock); - if (!rtnl_held) - rtnl_unlock(); nla_put_failure: genlmsg_cancel(msg, hdr); return -EMSGSIZE; } -static void __devlink_port_notify(struct devlink_port *devlink_port, - enum devlink_command cmd, bool rtnl_held) +static void devlink_port_notify(struct devlink_port *devlink_port, + enum devlink_command cmd) { struct devlink *devlink = devlink_port->devlink; struct sk_buff *msg; @@ -1362,8 +1352,7 @@ static void __devlink_port_notify(struct devlink_port *devlink_port, if (!msg) return; - err = devlink_nl_port_fill(msg, devlink_port, cmd, 0, 0, 0, NULL, - rtnl_held); + err = devlink_nl_port_fill(msg, devlink_port, cmd, 0, 0, 0, NULL); if (err) { nlmsg_free(msg); return; @@ -1373,12 +1362,6 @@ static void __devlink_port_notify(struct devlink_port *devlink_port, 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL); } -static void devlink_port_notify(struct devlink_port *devlink_port, - enum devlink_command cmd) -{ - __devlink_port_notify(devlink_port, cmd, false); -} - static void devlink_rate_notify(struct devlink_rate *devlink_rate, enum devlink_command cmd) { @@ -1542,7 +1525,7 @@ static int devlink_nl_cmd_port_get_doit(struct sk_buff *skb, err = devlink_nl_port_fill(msg, devlink_port, DEVLINK_CMD_PORT_NEW, info->snd_portid, info->snd_seq, 0, - info->extack, false); + info->extack); if (err) { nlmsg_free(msg); return err; @@ -1572,8 +1555,7 @@ static int devlink_nl_cmd_port_get_dumpit(struct sk_buff *msg, DEVLINK_CMD_NEW, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, - NLM_F_MULTI, cb->extack, - false); + NLM_F_MULTI, cb->extack); if (err) { devl_unlock(devlink); devlink_put(devlink); @@ -1785,8 +1767,7 @@ static int devlink_port_new_notify(struct devlink *devlink, } err = devlink_nl_port_fill(msg, devlink_port, DEVLINK_CMD_NEW, - info->snd_portid, info->snd_seq, 0, NULL, - false); + info->snd_portid, info->snd_seq, 0, NULL); if (err) goto out; @@ -10062,7 +10043,7 @@ static void devlink_port_type_netdev_checks(struct devlink_port *devlink_port, static void __devlink_port_type_set(struct devlink_port *devlink_port, enum devlink_port_type type, - void *type_dev, bool rtnl_held) + void *type_dev) { struct net_device *netdev = type_dev; @@ -10081,6 +10062,13 @@ static void __devlink_port_type_set(struct devlink_port *devlink_port, switch (type) { case DEVLINK_PORT_TYPE_ETH: devlink_port->type_eth.netdev = netdev; + if (netdev) { + ASSERT_RTNL(); + devlink_port->type_eth.ifindex = netdev->ifindex; + BUILD_BUG_ON(sizeof(devlink_port->type_eth.ifname) != + sizeof(netdev->name)); + strcpy(devlink_port->type_eth.ifname, netdev->name); + } break; case DEVLINK_PORT_TYPE_IB: devlink_port->type_ib.ibdev = type_dev; @@ -10089,7 +10077,7 @@ static void __devlink_port_type_set(struct devlink_port *devlink_port, break; } spin_unlock_bh(&devlink_port->type_lock); - __devlink_port_notify(devlink_port, DEVLINK_CMD_PORT_NEW, rtnl_held); + devlink_port_notify(devlink_port, DEVLINK_CMD_PORT_NEW); } /** @@ -10104,8 +10092,7 @@ void devlink_port_type_eth_set(struct devlink_port *devlink_port) dev_warn(devlink_port->devlink->dev, "devlink port type for port %d set to Ethernet without a software interface reference, device type not supported by the kernel?\n", devlink_port->index); - __devlink_port_type_set(devlink_port, DEVLINK_PORT_TYPE_ETH, NULL, - false); + __devlink_port_type_set(devlink_port, DEVLINK_PORT_TYPE_ETH, NULL); } EXPORT_SYMBOL_GPL(devlink_port_type_eth_set); @@ -10118,8 +10105,7 @@ EXPORT_SYMBOL_GPL(devlink_port_type_eth_set); void devlink_port_type_ib_set(struct devlink_port *devlink_port, struct ib_device *ibdev) { - __devlink_port_type_set(devlink_port, DEVLINK_PORT_TYPE_IB, ibdev, - false); + __devlink_port_type_set(devlink_port, DEVLINK_PORT_TYPE_IB, ibdev); } EXPORT_SYMBOL_GPL(devlink_port_type_ib_set); @@ -10137,8 +10123,7 @@ void devlink_port_type_clear(struct devlink_port *devlink_port) dev_warn(devlink_port->devlink->dev, "devlink port type for port %d cleared without a software interface reference, device type not supported by the kernel?\n", devlink_port->index); - __devlink_port_type_set(devlink_port, DEVLINK_PORT_TYPE_NOTSET, NULL, - false); + __devlink_port_type_set(devlink_port, DEVLINK_PORT_TYPE_NOTSET, NULL); } EXPORT_SYMBOL_GPL(devlink_port_type_clear); @@ -10161,16 +10146,17 @@ static int devlink_netdevice_event(struct notifier_block *nb, * netdevice register */ __devlink_port_type_set(devlink_port, DEVLINK_PORT_TYPE_ETH, - NULL, true); + NULL); break; case NETDEV_REGISTER: + case NETDEV_CHANGENAME: /* Set the netdev on top of previously set type. Note this * event happens also during net namespace change so here * we take into account netdev pointer appearing in this * namespace. */ __devlink_port_type_set(devlink_port, DEVLINK_PORT_TYPE_ETH, - netdev, true); + netdev); break; case NETDEV_UNREGISTER: /* Clear netdev pointer, but not the type. This event happens @@ -10178,14 +10164,14 @@ static int devlink_netdevice_event(struct notifier_block *nb, * pointer to netdev that is going to another net namespace. */ __devlink_port_type_set(devlink_port, DEVLINK_PORT_TYPE_ETH, - NULL, true); + NULL); break; case NETDEV_PRE_UNINIT: /* Clear the type and the netdev pointer. Happens one during * netdevice unregister. */ __devlink_port_type_set(devlink_port, DEVLINK_PORT_TYPE_NOTSET, - NULL, true); + NULL); break; } -- cgit v1.2.3 From dca56c3038c34a3e5acfe0aadb1f2bc9d724ae79 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Wed, 2 Nov 2022 17:02:11 +0100 Subject: net: expose devlink port over rtnetlink Expose devlink port handle related to netdev over rtnetlink. Introduce a new nested IFLA attribute to carry the info. Call into devlink code to fill-up the nest with existing devlink attributes that are used over devlink netlink. Signed-off-by: Jiri Pirko Signed-off-by: Jakub Kicinski --- include/net/devlink.h | 14 ++++++++++++++ include/uapi/linux/if_link.h | 2 ++ net/core/devlink.c | 18 ++++++++++++++++++ net/core/rtnetlink.c | 39 +++++++++++++++++++++++++++++++++++++++ 4 files changed, 73 insertions(+) (limited to 'include/net/devlink.h') diff --git a/include/net/devlink.h b/include/net/devlink.h index 7befad57afd4..fa6e936af1a5 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -1873,6 +1873,9 @@ int devlink_compat_phys_port_name_get(struct net_device *dev, int devlink_compat_switch_id_get(struct net_device *dev, struct netdev_phys_item_id *ppid); +int devlink_nl_port_handle_fill(struct sk_buff *msg, struct devlink_port *devlink_port); +size_t devlink_nl_port_handle_size(struct devlink_port *devlink_port); + #else static inline struct devlink *devlink_try_get(struct devlink *devlink) @@ -1909,6 +1912,17 @@ devlink_compat_switch_id_get(struct net_device *dev, return -EOPNOTSUPP; } +static inline int +devlink_nl_port_handle_fill(struct sk_buff *msg, struct devlink_port *devlink_port) +{ + return 0; +} + +static inline size_t devlink_nl_port_handle_size(struct devlink_port *devlink_port) +{ + return 0; +} + #endif #endif /* _NET_DEVLINK_H_ */ diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index d92b3f79eba3..1021a7e47a86 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -372,6 +372,8 @@ enum { IFLA_TSO_MAX_SEGS, IFLA_ALLMULTI, /* Allmulti count: > 0 means acts ALLMULTI */ + IFLA_DEVLINK_PORT, + __IFLA_MAX }; diff --git a/net/core/devlink.c b/net/core/devlink.c index 3a454d0045e5..2dcf2bcc3527 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -880,6 +880,24 @@ nla_put_failure: return -EMSGSIZE; } +int devlink_nl_port_handle_fill(struct sk_buff *msg, struct devlink_port *devlink_port) +{ + if (devlink_nl_put_handle(msg, devlink_port->devlink)) + return -EMSGSIZE; + if (nla_put_u32(msg, DEVLINK_ATTR_PORT_INDEX, devlink_port->index)) + return -EMSGSIZE; + return 0; +} + +size_t devlink_nl_port_handle_size(struct devlink_port *devlink_port) +{ + struct devlink *devlink = devlink_port->devlink; + + return nla_total_size(strlen(devlink->dev->bus->name) + 1) /* DEVLINK_ATTR_BUS_NAME */ + + nla_total_size(strlen(dev_name(devlink->dev)) + 1) /* DEVLINK_ATTR_DEV_NAME */ + + nla_total_size(4); /* DEVLINK_ATTR_PORT_INDEX */ +} + struct devlink_reload_combination { enum devlink_reload_action action; enum devlink_reload_limit limit; diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index b64fffeb3844..64289bc98887 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -53,6 +53,7 @@ #include #include #include +#include #include "dev.h" @@ -1038,6 +1039,16 @@ static size_t rtnl_proto_down_size(const struct net_device *dev) return size; } +static size_t rtnl_devlink_port_size(const struct net_device *dev) +{ + size_t size = nla_total_size(0); /* nest IFLA_DEVLINK_PORT */ + + if (dev->devlink_port) + size += devlink_nl_port_handle_size(dev->devlink_port); + + return size; +} + static noinline size_t if_nlmsg_size(const struct net_device *dev, u32 ext_filter_mask) { @@ -1091,6 +1102,7 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev, + nla_total_size(4) /* IFLA_MAX_MTU */ + rtnl_prop_list_size(dev) + nla_total_size(MAX_ADDR_LEN) /* IFLA_PERM_ADDRESS */ + + rtnl_devlink_port_size(dev) + 0; } @@ -1728,6 +1740,30 @@ nla_put_failure: return -EMSGSIZE; } +static int rtnl_fill_devlink_port(struct sk_buff *skb, + const struct net_device *dev) +{ + struct nlattr *devlink_port_nest; + int ret; + + devlink_port_nest = nla_nest_start(skb, IFLA_DEVLINK_PORT); + if (!devlink_port_nest) + return -EMSGSIZE; + + if (dev->devlink_port) { + ret = devlink_nl_port_handle_fill(skb, dev->devlink_port); + if (ret < 0) + goto nest_cancel; + } + + nla_nest_end(skb, devlink_port_nest); + return 0; + +nest_cancel: + nla_nest_cancel(skb, devlink_port_nest); + return ret; +} + static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, struct net *src_net, int type, u32 pid, u32 seq, u32 change, @@ -1865,6 +1901,9 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, dev->dev.parent->bus->name)) goto nla_put_failure; + if (rtnl_fill_devlink_port(skb, dev)) + goto nla_put_failure; + nlmsg_end(skb, nlh); return 0; -- cgit v1.2.3 From 2640a82bbc08393c846c7b55178079bb8ca31a8c Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Tue, 8 Nov 2022 11:47:10 +0100 Subject: devlink: Add packet traps for 802.1X operation Add packet traps for 802.1X operation. The "eapol" control trap is used to trap EAPOL packets and is required for the correct operation of the control plane. The "locked_port" drop trap can be enabled to gain visibility into packets that were dropped by the device due to the locked bridge port check. Signed-off-by: Ido Schimmel Reviewed-by: Petr Machata Signed-off-by: Petr Machata Signed-off-by: Jakub Kicinski --- Documentation/networking/devlink/devlink-trap.rst | 13 +++++++++++++ include/net/devlink.h | 9 +++++++++ net/core/devlink.c | 3 +++ 3 files changed, 25 insertions(+) (limited to 'include/net/devlink.h') diff --git a/Documentation/networking/devlink/devlink-trap.rst b/Documentation/networking/devlink/devlink-trap.rst index 90d1381b88de..2c14dfe69b3a 100644 --- a/Documentation/networking/devlink/devlink-trap.rst +++ b/Documentation/networking/devlink/devlink-trap.rst @@ -485,6 +485,16 @@ be added to the following table: - Traps incoming packets that the device decided to drop because the destination MAC is not configured in the MAC table and the interface is not in promiscuous mode + * - ``eapol`` + - ``control`` + - Traps "Extensible Authentication Protocol over LAN" (EAPOL) packets + specified in IEEE 802.1X + * - ``locked_port`` + - ``drop`` + - Traps packets that the device decided to drop because they failed the + locked bridge port check. That is, packets that were received via a + locked port and whose {SMAC, VID} does not correspond to an FDB entry + pointing to the port Driver-specific Packet Traps ============================ @@ -589,6 +599,9 @@ narrow. The description of these groups must be added to the following table: * - ``parser_error_drops`` - Contains packet traps for packets that were marked by the device during parsing as erroneous + * - ``eapol`` + - Contains packet traps for "Extensible Authentication Protocol over LAN" + (EAPOL) packets specified in IEEE 802.1X Packet Trap Policers ==================== diff --git a/include/net/devlink.h b/include/net/devlink.h index fa6e936af1a5..611a23a3deb2 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -894,6 +894,8 @@ enum devlink_trap_generic_id { DEVLINK_TRAP_GENERIC_ID_ESP_PARSING, DEVLINK_TRAP_GENERIC_ID_BLACKHOLE_NEXTHOP, DEVLINK_TRAP_GENERIC_ID_DMAC_FILTER, + DEVLINK_TRAP_GENERIC_ID_EAPOL, + DEVLINK_TRAP_GENERIC_ID_LOCKED_PORT, /* Add new generic trap IDs above */ __DEVLINK_TRAP_GENERIC_ID_MAX, @@ -930,6 +932,7 @@ enum devlink_trap_group_generic_id { DEVLINK_TRAP_GROUP_GENERIC_ID_ACL_SAMPLE, DEVLINK_TRAP_GROUP_GENERIC_ID_ACL_TRAP, DEVLINK_TRAP_GROUP_GENERIC_ID_PARSER_ERROR_DROPS, + DEVLINK_TRAP_GROUP_GENERIC_ID_EAPOL, /* Add new generic trap group IDs above */ __DEVLINK_TRAP_GROUP_GENERIC_ID_MAX, @@ -1121,6 +1124,10 @@ enum devlink_trap_group_generic_id { "blackhole_nexthop" #define DEVLINK_TRAP_GENERIC_NAME_DMAC_FILTER \ "dmac_filter" +#define DEVLINK_TRAP_GENERIC_NAME_EAPOL \ + "eapol" +#define DEVLINK_TRAP_GENERIC_NAME_LOCKED_PORT \ + "locked_port" #define DEVLINK_TRAP_GROUP_GENERIC_NAME_L2_DROPS \ "l2_drops" @@ -1174,6 +1181,8 @@ enum devlink_trap_group_generic_id { "acl_trap" #define DEVLINK_TRAP_GROUP_GENERIC_NAME_PARSER_ERROR_DROPS \ "parser_error_drops" +#define DEVLINK_TRAP_GROUP_GENERIC_NAME_EAPOL \ + "eapol" #define DEVLINK_TRAP_GENERIC(_type, _init_action, _id, _group_id, \ _metadata_cap) \ diff --git a/net/core/devlink.c b/net/core/devlink.c index ea0b319385fc..6bbe230c4ec5 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -11734,6 +11734,8 @@ static const struct devlink_trap devlink_trap_generic[] = { DEVLINK_TRAP(ESP_PARSING, DROP), DEVLINK_TRAP(BLACKHOLE_NEXTHOP, DROP), DEVLINK_TRAP(DMAC_FILTER, DROP), + DEVLINK_TRAP(EAPOL, CONTROL), + DEVLINK_TRAP(LOCKED_PORT, DROP), }; #define DEVLINK_TRAP_GROUP(_id) \ @@ -11769,6 +11771,7 @@ static const struct devlink_trap_group devlink_trap_group_generic[] = { DEVLINK_TRAP_GROUP(ACL_SAMPLE), DEVLINK_TRAP_GROUP(ACL_TRAP), DEVLINK_TRAP_GROUP(PARSER_ERROR_DROPS), + DEVLINK_TRAP_GROUP(EAPOL), }; static int devlink_trap_generic_verify(const struct devlink_trap *trap) -- cgit v1.2.3 From cd502236835b678738810ecd501c85a3a7a11150 Mon Sep 17 00:00:00 2001 From: Michal Wilczynski Date: Tue, 15 Nov 2022 11:48:15 +0100 Subject: devlink: Introduce new attribute 'tx_priority' to devlink-rate To fully utilize offload capabilities of Intel 100G card QoS capabilities new attribute 'tx_priority' needs to be introduced. This attribute allows for usage of strict priority arbiter among siblings. This arbitration scheme attempts to schedule nodes based on their priority as long as the nodes remain within their bandwidth limit. Introduce new attribute in devlink-rate that will allow for configuration of strict priority. New attribute is optional. Signed-off-by: Michal Wilczynski Reviewed-by: Jiri Pirko Signed-off-by: Jakub Kicinski --- include/net/devlink.h | 6 ++++++ include/uapi/linux/devlink.h | 1 + net/core/devlink.c | 31 +++++++++++++++++++++++++++++++ 3 files changed, 38 insertions(+) (limited to 'include/net/devlink.h') diff --git a/include/net/devlink.h b/include/net/devlink.h index 611a23a3deb2..90d59d673cb1 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -114,6 +114,8 @@ struct devlink_rate { refcount_t refcnt; }; }; + + u32 tx_priority; }; struct devlink_port { @@ -1511,10 +1513,14 @@ struct devlink_ops { u64 tx_share, struct netlink_ext_ack *extack); int (*rate_leaf_tx_max_set)(struct devlink_rate *devlink_rate, void *priv, u64 tx_max, struct netlink_ext_ack *extack); + int (*rate_leaf_tx_priority_set)(struct devlink_rate *devlink_rate, void *priv, + u32 tx_priority, struct netlink_ext_ack *extack); int (*rate_node_tx_share_set)(struct devlink_rate *devlink_rate, void *priv, u64 tx_share, struct netlink_ext_ack *extack); int (*rate_node_tx_max_set)(struct devlink_rate *devlink_rate, void *priv, u64 tx_max, struct netlink_ext_ack *extack); + int (*rate_node_tx_priority_set)(struct devlink_rate *devlink_rate, void *priv, + u32 tx_priority, struct netlink_ext_ack *extack); int (*rate_node_new)(struct devlink_rate *rate_node, void **priv, struct netlink_ext_ack *extack); int (*rate_node_del)(struct devlink_rate *rate_node, void *priv, diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h index 2f24b53a87a5..1a9214d35ef5 100644 --- a/include/uapi/linux/devlink.h +++ b/include/uapi/linux/devlink.h @@ -607,6 +607,7 @@ enum devlink_attr { DEVLINK_ATTR_SELFTESTS, /* nested */ + DEVLINK_ATTR_RATE_TX_PRIORITY, /* u32 */ /* add new attributes above here, update the policy in devlink.c */ __DEVLINK_ATTR_MAX, diff --git a/net/core/devlink.c b/net/core/devlink.c index 7f789bbcbbd7..bf6d3a3c28bb 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -1203,6 +1203,9 @@ static int devlink_nl_rate_fill(struct sk_buff *msg, devlink_rate->tx_max, DEVLINK_ATTR_PAD)) goto nla_put_failure; + if (nla_put_u32(msg, DEVLINK_ATTR_RATE_TX_PRIORITY, + devlink_rate->tx_priority)) + goto nla_put_failure; if (devlink_rate->parent) if (nla_put_string(msg, DEVLINK_ATTR_RATE_PARENT_NODE_NAME, devlink_rate->parent->name)) @@ -1936,6 +1939,7 @@ static int devlink_nl_rate_set(struct devlink_rate *devlink_rate, { struct nlattr *nla_parent, **attrs = info->attrs; int err = -EOPNOTSUPP; + u32 priority; u64 rate; if (attrs[DEVLINK_ATTR_RATE_TX_SHARE]) { @@ -1964,6 +1968,20 @@ static int devlink_nl_rate_set(struct devlink_rate *devlink_rate, devlink_rate->tx_max = rate; } + if (attrs[DEVLINK_ATTR_RATE_TX_PRIORITY]) { + priority = nla_get_u32(attrs[DEVLINK_ATTR_RATE_TX_PRIORITY]); + if (devlink_rate_is_leaf(devlink_rate)) + err = ops->rate_leaf_tx_priority_set(devlink_rate, devlink_rate->priv, + priority, info->extack); + else if (devlink_rate_is_node(devlink_rate)) + err = ops->rate_node_tx_priority_set(devlink_rate, devlink_rate->priv, + priority, info->extack); + + if (err) + return err; + devlink_rate->tx_priority = priority; + } + nla_parent = attrs[DEVLINK_ATTR_RATE_PARENT_NODE_NAME]; if (nla_parent) { err = devlink_nl_rate_parent_node_set(devlink_rate, info, @@ -1995,6 +2013,12 @@ static bool devlink_rate_set_ops_supported(const struct devlink_ops *ops, NL_SET_ERR_MSG_MOD(info->extack, "Parent set isn't supported for the leafs"); return false; } + if (attrs[DEVLINK_ATTR_RATE_TX_PRIORITY] && !ops->rate_leaf_tx_priority_set) { + NL_SET_ERR_MSG_ATTR(info->extack, + attrs[DEVLINK_ATTR_RATE_TX_PRIORITY], + "TX priority set isn't supported for the leafs"); + return false; + } } else if (type == DEVLINK_RATE_TYPE_NODE) { if (attrs[DEVLINK_ATTR_RATE_TX_SHARE] && !ops->rate_node_tx_share_set) { NL_SET_ERR_MSG_MOD(info->extack, "TX share set isn't supported for the nodes"); @@ -2009,6 +2033,12 @@ static bool devlink_rate_set_ops_supported(const struct devlink_ops *ops, NL_SET_ERR_MSG_MOD(info->extack, "Parent set isn't supported for the nodes"); return false; } + if (attrs[DEVLINK_ATTR_RATE_TX_PRIORITY] && !ops->rate_node_tx_priority_set) { + NL_SET_ERR_MSG_ATTR(info->extack, + attrs[DEVLINK_ATTR_RATE_TX_PRIORITY], + "TX priority set isn't supported for the nodes"); + return false; + } } else { WARN(1, "Unknown type of rate object"); return false; @@ -9187,6 +9217,7 @@ static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = { [DEVLINK_ATTR_LINECARD_INDEX] = { .type = NLA_U32 }, [DEVLINK_ATTR_LINECARD_TYPE] = { .type = NLA_NUL_STRING }, [DEVLINK_ATTR_SELFTESTS] = { .type = NLA_NESTED }, + [DEVLINK_ATTR_RATE_TX_PRIORITY] = { .type = NLA_U32 }, }; static const struct genl_small_ops devlink_nl_ops[] = { -- cgit v1.2.3 From 6e2d7e84fcfee62d70e62aa9e469d10b8b6a7dc7 Mon Sep 17 00:00:00 2001 From: Michal Wilczynski Date: Tue, 15 Nov 2022 11:48:16 +0100 Subject: devlink: Introduce new attribute 'tx_weight' to devlink-rate To fully utilize offload capabilities of Intel 100G card QoS capabilities new attribute 'tx_weight' needs to be introduced. This attribute allows for usage of Weighted Fair Queuing arbitration scheme among siblings. This arbitration scheme can be used simultaneously with the strict priority. Introduce new attribute in devlink-rate that will allow for configuration of Weighted Fair Queueing. New attribute is optional. Signed-off-by: Michal Wilczynski Reviewed-by: Jiri Pirko Signed-off-by: Jakub Kicinski --- include/net/devlink.h | 5 +++++ include/uapi/linux/devlink.h | 2 ++ net/core/devlink.c | 33 +++++++++++++++++++++++++++++++++ 3 files changed, 40 insertions(+) (limited to 'include/net/devlink.h') diff --git a/include/net/devlink.h b/include/net/devlink.h index 90d59d673cb1..366b23d3f973 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -116,6 +116,7 @@ struct devlink_rate { }; u32 tx_priority; + u32 tx_weight; }; struct devlink_port { @@ -1515,12 +1516,16 @@ struct devlink_ops { u64 tx_max, struct netlink_ext_ack *extack); int (*rate_leaf_tx_priority_set)(struct devlink_rate *devlink_rate, void *priv, u32 tx_priority, struct netlink_ext_ack *extack); + int (*rate_leaf_tx_weight_set)(struct devlink_rate *devlink_rate, void *priv, + u32 tx_weight, struct netlink_ext_ack *extack); int (*rate_node_tx_share_set)(struct devlink_rate *devlink_rate, void *priv, u64 tx_share, struct netlink_ext_ack *extack); int (*rate_node_tx_max_set)(struct devlink_rate *devlink_rate, void *priv, u64 tx_max, struct netlink_ext_ack *extack); int (*rate_node_tx_priority_set)(struct devlink_rate *devlink_rate, void *priv, u32 tx_priority, struct netlink_ext_ack *extack); + int (*rate_node_tx_weight_set)(struct devlink_rate *devlink_rate, void *priv, + u32 tx_weight, struct netlink_ext_ack *extack); int (*rate_node_new)(struct devlink_rate *rate_node, void **priv, struct netlink_ext_ack *extack); int (*rate_node_del)(struct devlink_rate *rate_node, void *priv, diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h index 1a9214d35ef5..498d0d5d0957 100644 --- a/include/uapi/linux/devlink.h +++ b/include/uapi/linux/devlink.h @@ -608,6 +608,8 @@ enum devlink_attr { DEVLINK_ATTR_SELFTESTS, /* nested */ DEVLINK_ATTR_RATE_TX_PRIORITY, /* u32 */ + DEVLINK_ATTR_RATE_TX_WEIGHT, /* u32 */ + /* add new attributes above here, update the policy in devlink.c */ __DEVLINK_ATTR_MAX, diff --git a/net/core/devlink.c b/net/core/devlink.c index bf6d3a3c28bb..525bdf426163 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -1206,6 +1206,11 @@ static int devlink_nl_rate_fill(struct sk_buff *msg, if (nla_put_u32(msg, DEVLINK_ATTR_RATE_TX_PRIORITY, devlink_rate->tx_priority)) goto nla_put_failure; + + if (nla_put_u32(msg, DEVLINK_ATTR_RATE_TX_WEIGHT, + devlink_rate->tx_weight)) + goto nla_put_failure; + if (devlink_rate->parent) if (nla_put_string(msg, DEVLINK_ATTR_RATE_PARENT_NODE_NAME, devlink_rate->parent->name)) @@ -1940,6 +1945,7 @@ static int devlink_nl_rate_set(struct devlink_rate *devlink_rate, struct nlattr *nla_parent, **attrs = info->attrs; int err = -EOPNOTSUPP; u32 priority; + u32 weight; u64 rate; if (attrs[DEVLINK_ATTR_RATE_TX_SHARE]) { @@ -1982,6 +1988,20 @@ static int devlink_nl_rate_set(struct devlink_rate *devlink_rate, devlink_rate->tx_priority = priority; } + if (attrs[DEVLINK_ATTR_RATE_TX_WEIGHT]) { + weight = nla_get_u32(attrs[DEVLINK_ATTR_RATE_TX_WEIGHT]); + if (devlink_rate_is_leaf(devlink_rate)) + err = ops->rate_leaf_tx_weight_set(devlink_rate, devlink_rate->priv, + weight, info->extack); + else if (devlink_rate_is_node(devlink_rate)) + err = ops->rate_node_tx_weight_set(devlink_rate, devlink_rate->priv, + weight, info->extack); + + if (err) + return err; + devlink_rate->tx_weight = weight; + } + nla_parent = attrs[DEVLINK_ATTR_RATE_PARENT_NODE_NAME]; if (nla_parent) { err = devlink_nl_rate_parent_node_set(devlink_rate, info, @@ -2019,6 +2039,12 @@ static bool devlink_rate_set_ops_supported(const struct devlink_ops *ops, "TX priority set isn't supported for the leafs"); return false; } + if (attrs[DEVLINK_ATTR_RATE_TX_WEIGHT] && !ops->rate_leaf_tx_weight_set) { + NL_SET_ERR_MSG_ATTR(info->extack, + attrs[DEVLINK_ATTR_RATE_TX_WEIGHT], + "TX weight set isn't supported for the leafs"); + return false; + } } else if (type == DEVLINK_RATE_TYPE_NODE) { if (attrs[DEVLINK_ATTR_RATE_TX_SHARE] && !ops->rate_node_tx_share_set) { NL_SET_ERR_MSG_MOD(info->extack, "TX share set isn't supported for the nodes"); @@ -2039,6 +2065,12 @@ static bool devlink_rate_set_ops_supported(const struct devlink_ops *ops, "TX priority set isn't supported for the nodes"); return false; } + if (attrs[DEVLINK_ATTR_RATE_TX_WEIGHT] && !ops->rate_node_tx_weight_set) { + NL_SET_ERR_MSG_ATTR(info->extack, + attrs[DEVLINK_ATTR_RATE_TX_WEIGHT], + "TX weight set isn't supported for the nodes"); + return false; + } } else { WARN(1, "Unknown type of rate object"); return false; @@ -9218,6 +9250,7 @@ static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = { [DEVLINK_ATTR_LINECARD_TYPE] = { .type = NLA_NUL_STRING }, [DEVLINK_ATTR_SELFTESTS] = { .type = NLA_NESTED }, [DEVLINK_ATTR_RATE_TX_PRIORITY] = { .type = NLA_U32 }, + [DEVLINK_ATTR_RATE_TX_WEIGHT] = { .type = NLA_U32 }, }; static const struct genl_small_ops devlink_nl_ops[] = { -- cgit v1.2.3 From caba177d7f4d7693a9157ece8c9a30944c949e34 Mon Sep 17 00:00:00 2001 From: Michal Wilczynski Date: Tue, 15 Nov 2022 11:48:17 +0100 Subject: devlink: Enable creation of the devlink-rate nodes from the driver Intel 100G card internal firmware hierarchy for Hierarchicial QoS is very rigid and can't be easily removed. This requires an ability to export default hierarchy to allow user to modify it. Currently the driver is only able to create the 'leaf' nodes, which usually represent the vport. This is not enough for HQoS implemented in Intel hardware. Introduce new function devl_rate_node_create() that allows for creation of the devlink-rate nodes from the driver. Signed-off-by: Michal Wilczynski Reviewed-by: Jiri Pirko Signed-off-by: Jakub Kicinski --- include/net/devlink.h | 3 +++ net/core/devlink.c | 45 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 48 insertions(+) (limited to 'include/net/devlink.h') diff --git a/include/net/devlink.h b/include/net/devlink.h index 366b23d3f973..339a2ed02d36 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -1618,6 +1618,9 @@ void devlink_port_attrs_pci_sf_set(struct devlink_port *devlink_port, u32 controller, u16 pf, u32 sf, bool external); int devl_rate_leaf_create(struct devlink_port *port, void *priv); +struct devlink_rate * +devl_rate_node_create(struct devlink *devlink, void *priv, char *node_name, + struct devlink_rate *parent); void devl_rate_leaf_destroy(struct devlink_port *devlink_port); void devl_rate_nodes_destroy(struct devlink *devlink); void devlink_port_linecard_set(struct devlink_port *devlink_port, diff --git a/net/core/devlink.c b/net/core/devlink.c index 525bdf426163..3dfee7cd9929 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -10384,6 +10384,51 @@ void devlink_port_attrs_pci_sf_set(struct devlink_port *devlink_port, u32 contro } EXPORT_SYMBOL_GPL(devlink_port_attrs_pci_sf_set); +/** + * devl_rate_node_create - create devlink rate node + * @devlink: devlink instance + * @priv: driver private data + * @node_name: name of the resulting node + * @parent: parent devlink_rate struct + * + * Create devlink rate object of type node + */ +struct devlink_rate * +devl_rate_node_create(struct devlink *devlink, void *priv, char *node_name, + struct devlink_rate *parent) +{ + struct devlink_rate *rate_node; + + rate_node = devlink_rate_node_get_by_name(devlink, node_name); + if (!IS_ERR(rate_node)) + return ERR_PTR(-EEXIST); + + rate_node = kzalloc(sizeof(*rate_node), GFP_KERNEL); + if (!rate_node) + return ERR_PTR(-ENOMEM); + + if (parent) { + rate_node->parent = parent; + refcount_inc(&rate_node->parent->refcnt); + } + + rate_node->type = DEVLINK_RATE_TYPE_NODE; + rate_node->devlink = devlink; + rate_node->priv = priv; + + rate_node->name = kstrdup(node_name, GFP_KERNEL); + if (!rate_node->name) { + kfree(rate_node); + return ERR_PTR(-ENOMEM); + } + + refcount_set(&rate_node->refcnt, 1); + list_add(&rate_node->list, &devlink->rate_list); + devlink_rate_notify(rate_node, DEVLINK_CMD_RATE_NEW); + return rate_node; +} +EXPORT_SYMBOL_GPL(devl_rate_node_create); + /** * devl_rate_leaf_create - create devlink rate leaf * @devlink_port: devlink port object to create rate object on -- cgit v1.2.3 From f2fc15e271f2d17f2bee2c5a3b3e50252a7ba91f Mon Sep 17 00:00:00 2001 From: Michal Wilczynski Date: Tue, 15 Nov 2022 11:48:19 +0100 Subject: devlink: Allow to set up parent in devl_rate_leaf_create() Currently the driver is able to create leaf nodes for the devlink-rate, but is unable to set parent for them. This wasn't as issue before the possibility to export hierarchy from the driver. After adding the export feature, in order for the driver to supply correct hierarchy, it's necessary for it to be able to supply a parent name to devl_rate_leaf_create(). Introduce a new parameter 'parent_name' in devl_rate_leaf_create(). Signed-off-by: Michal Wilczynski Reviewed-by: Jiri Pirko Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c | 4 ++-- drivers/net/netdevsim/dev.c | 2 +- include/net/devlink.h | 4 +++- net/core/devlink.c | 9 ++++++++- 4 files changed, 14 insertions(+), 5 deletions(-) (limited to 'include/net/devlink.h') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c index 9bc7be95db54..084a910bb4e7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c @@ -91,7 +91,7 @@ int mlx5_esw_offloads_devlink_port_register(struct mlx5_eswitch *esw, u16 vport_ if (err) goto reg_err; - err = devl_rate_leaf_create(dl_port, vport); + err = devl_rate_leaf_create(dl_port, vport, NULL); if (err) goto rate_err; @@ -160,7 +160,7 @@ int mlx5_esw_devlink_sf_port_register(struct mlx5_eswitch *esw, struct devlink_p if (err) return err; - err = devl_rate_leaf_create(dl_port, vport); + err = devl_rate_leaf_create(dl_port, vport, NULL); if (err) goto rate_err; diff --git a/drivers/net/netdevsim/dev.c b/drivers/net/netdevsim/dev.c index 705872eb7564..e14686594a71 100644 --- a/drivers/net/netdevsim/dev.c +++ b/drivers/net/netdevsim/dev.c @@ -1401,7 +1401,7 @@ static int __nsim_dev_port_add(struct nsim_dev *nsim_dev, enum nsim_dev_port_typ if (nsim_dev_port_is_vf(nsim_dev_port)) { err = devl_rate_leaf_create(&nsim_dev_port->devlink_port, - nsim_dev_port); + nsim_dev_port, NULL); if (err) goto err_nsim_destroy; } diff --git a/include/net/devlink.h b/include/net/devlink.h index 339a2ed02d36..074a79b8933f 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -1617,10 +1617,12 @@ void devlink_port_attrs_pci_vf_set(struct devlink_port *devlink_port, u32 contro void devlink_port_attrs_pci_sf_set(struct devlink_port *devlink_port, u32 controller, u16 pf, u32 sf, bool external); -int devl_rate_leaf_create(struct devlink_port *port, void *priv); struct devlink_rate * devl_rate_node_create(struct devlink *devlink, void *priv, char *node_name, struct devlink_rate *parent); +int +devl_rate_leaf_create(struct devlink_port *devlink_port, void *priv, + struct devlink_rate *parent); void devl_rate_leaf_destroy(struct devlink_port *devlink_port); void devl_rate_nodes_destroy(struct devlink *devlink); void devlink_port_linecard_set(struct devlink_port *devlink_port, diff --git a/net/core/devlink.c b/net/core/devlink.c index 61d431578f5f..d93bc95cd7cb 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -10435,10 +10435,12 @@ EXPORT_SYMBOL_GPL(devl_rate_node_create); * devl_rate_leaf_create - create devlink rate leaf * @devlink_port: devlink port object to create rate object on * @priv: driver private data + * @parent: parent devlink_rate struct * * Create devlink rate object of type leaf on provided @devlink_port. */ -int devl_rate_leaf_create(struct devlink_port *devlink_port, void *priv) +int devl_rate_leaf_create(struct devlink_port *devlink_port, void *priv, + struct devlink_rate *parent) { struct devlink *devlink = devlink_port->devlink; struct devlink_rate *devlink_rate; @@ -10452,6 +10454,11 @@ int devl_rate_leaf_create(struct devlink_port *devlink_port, void *priv) if (!devlink_rate) return -ENOMEM; + if (parent) { + devlink_rate->parent = parent; + refcount_inc(&devlink_rate->parent->refcnt); + } + devlink_rate->type = DEVLINK_RATE_TYPE_LEAF; devlink_rate->devlink = devlink; devlink_rate->devlink_port = devlink_port; -- cgit v1.2.3 From af6397c9ee2b42988c912dcad2fca1f43d5c1c99 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Mon, 28 Nov 2022 12:36:44 -0800 Subject: devlink: support directly reading from region memory To read from a region, user space must currently request a new snapshot of the region and then read from that snapshot. This can sometimes be overkill if user space only reads a tiny portion. They first create the snapshot, then request a read, then destroy the snapshot. For regions which have a single underlying "contents", it makes sense to allow supporting direct reading of the region data. Extend the DEVLINK_CMD_REGION_READ to allow direct reading from a region if requested via the new DEVLINK_ATTR_REGION_DIRECT. If this attribute is set, then perform a direct read instead of using a snapshot. Direct read is mutually exclusive with DEVLINK_ATTR_REGION_SNAPSHOT_ID, and care is taken to ensure that we reject commands which provide incorrect attributes. Regions must enable support for direct read by implementing the .read() callback function. If a region does not support such direct reads, a suitable extended error message is reported. Signed-off-by: Jacob Keller Reviewed-by: Jiri Pirko Signed-off-by: Jakub Kicinski --- .../networking/devlink/devlink-region.rst | 13 ++++ include/net/devlink.h | 16 +++++ include/uapi/linux/devlink.h | 2 + net/core/devlink.c | 80 +++++++++++++++++----- 4 files changed, 94 insertions(+), 17 deletions(-) (limited to 'include/net/devlink.h') diff --git a/Documentation/networking/devlink/devlink-region.rst b/Documentation/networking/devlink/devlink-region.rst index f06dca9a1eb6..9232cd7da301 100644 --- a/Documentation/networking/devlink/devlink-region.rst +++ b/Documentation/networking/devlink/devlink-region.rst @@ -31,6 +31,15 @@ in its ``devlink_region_ops`` structure. If snapshot id is not set in the ``DEVLINK_CMD_REGION_NEW`` request kernel will allocate one and send the snapshot information to user space. +Regions may optionally allow directly reading from their contents without a +snapshot. Direct read requests are not atomic. In particular a read request +of size 256 bytes or larger will be split into multiple chunks. If atomic +access is required, use a snapshot. A driver wishing to enable this for a +region should implement the ``.read`` callback in the ``devlink_region_ops`` +structure. User space can request a direct read by using the +``DEVLINK_ATTR_REGION_DIRECT`` attribute instead of specifying a snapshot +id. + example usage ------------- @@ -65,6 +74,10 @@ example usage $ devlink region read pci/0000:00:05.0/fw-health snapshot 1 address 0 length 16 0000000000000000 0014 95dc 0014 9514 0035 1670 0034 db30 + # Read from the region without a snapshot + $ devlink region read pci/0000:00:05.0/fw-health address 16 length 16 + 0000000000000010 0000 0000 ffff ff04 0029 8c00 0028 8cc8 + As regions are likely very device or driver specific, no generic regions are defined. See the driver-specific documentation files for information on the specific regions a driver supports. diff --git a/include/net/devlink.h b/include/net/devlink.h index 074a79b8933f..02528f736f65 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -650,6 +650,10 @@ struct devlink_info_req; * the data variable must be updated to point to the snapshot data. * The function will be called while the devlink instance lock is * held. + * @read: callback to directly read a portion of the region. On success, + * the data pointer will be updated with the contents of the + * requested portion of the region. The function will be called + * while the devlink instance lock is held. * @priv: Pointer to driver private data for the region operation */ struct devlink_region_ops { @@ -659,6 +663,10 @@ struct devlink_region_ops { const struct devlink_region_ops *ops, struct netlink_ext_ack *extack, u8 **data); + int (*read)(struct devlink *devlink, + const struct devlink_region_ops *ops, + struct netlink_ext_ack *extack, + u64 offset, u32 size, u8 *data); void *priv; }; @@ -670,6 +678,10 @@ struct devlink_region_ops { * the data variable must be updated to point to the snapshot data. * The function will be called while the devlink instance lock is * held. + * @read: callback to directly read a portion of the region. On success, + * the data pointer will be updated with the contents of the + * requested portion of the region. The function will be called + * while the devlink instance lock is held. * @priv: Pointer to driver private data for the region operation */ struct devlink_port_region_ops { @@ -679,6 +691,10 @@ struct devlink_port_region_ops { const struct devlink_port_region_ops *ops, struct netlink_ext_ack *extack, u8 **data); + int (*read)(struct devlink_port *port, + const struct devlink_port_region_ops *ops, + struct netlink_ext_ack *extack, + u64 offset, u32 size, u8 *data); void *priv; }; diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h index 498d0d5d0957..70191d96af89 100644 --- a/include/uapi/linux/devlink.h +++ b/include/uapi/linux/devlink.h @@ -610,6 +610,8 @@ enum devlink_attr { DEVLINK_ATTR_RATE_TX_PRIORITY, /* u32 */ DEVLINK_ATTR_RATE_TX_WEIGHT, /* u32 */ + DEVLINK_ATTR_REGION_DIRECT, /* flag */ + /* add new attributes above here, update the policy in devlink.c */ __DEVLINK_ATTR_MAX, diff --git a/net/core/devlink.c b/net/core/devlink.c index 6c05cfaa571d..298041a44aa8 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -6515,6 +6515,26 @@ devlink_region_snapshot_fill(void *cb_priv, u8 *chunk, u32 chunk_size, return 0; } +static int +devlink_region_port_direct_fill(void *cb_priv, u8 *chunk, u32 chunk_size, + u64 curr_offset, struct netlink_ext_ack *extack) +{ + struct devlink_region *region = cb_priv; + + return region->port_ops->read(region->port, region->port_ops, extack, + curr_offset, chunk_size, chunk); +} + +static int +devlink_region_direct_fill(void *cb_priv, u8 *chunk, u32 chunk_size, + u64 curr_offset, struct netlink_ext_ack *extack) +{ + struct devlink_region *region = cb_priv; + + return region->ops->read(region->devlink, region->ops, extack, + curr_offset, chunk_size, chunk); +} + static int devlink_nl_cmd_region_read_dumpit(struct sk_buff *skb, struct netlink_callback *cb) { @@ -6523,12 +6543,12 @@ static int devlink_nl_cmd_region_read_dumpit(struct sk_buff *skb, u64 ret_offset, start_offset, end_offset = U64_MAX; struct nlattr **attrs = info->attrs; struct devlink_port *port = NULL; - struct devlink_snapshot *snapshot; + devlink_chunk_fill_t *region_cb; struct devlink_region *region; const char *region_name; struct devlink *devlink; unsigned int index; - u32 snapshot_id; + void *region_cb_priv; void *hdr; int err; @@ -6546,12 +6566,6 @@ static int devlink_nl_cmd_region_read_dumpit(struct sk_buff *skb, goto out_unlock; } - if (!attrs[DEVLINK_ATTR_REGION_SNAPSHOT_ID]) { - NL_SET_ERR_MSG(cb->extack, "No snapshot id provided"); - err = -EINVAL; - goto out_unlock; - } - if (info->attrs[DEVLINK_ATTR_PORT_INDEX]) { index = nla_get_u32(info->attrs[DEVLINK_ATTR_PORT_INDEX]); @@ -6577,12 +6591,43 @@ static int devlink_nl_cmd_region_read_dumpit(struct sk_buff *skb, } snapshot_attr = attrs[DEVLINK_ATTR_REGION_SNAPSHOT_ID]; - snapshot_id = nla_get_u32(snapshot_attr); - snapshot = devlink_region_snapshot_get_by_id(region, snapshot_id); - if (!snapshot) { - NL_SET_ERR_MSG_ATTR(cb->extack, snapshot_attr, "Requested snapshot does not exist"); - err = -EINVAL; - goto out_unlock; + if (!snapshot_attr) { + if (!nla_get_flag(attrs[DEVLINK_ATTR_REGION_DIRECT])) { + NL_SET_ERR_MSG(cb->extack, "No snapshot id provided"); + err = -EINVAL; + goto out_unlock; + } + + if (!region->ops->read) { + NL_SET_ERR_MSG(cb->extack, "Requested region does not support direct read"); + err = -EOPNOTSUPP; + goto out_unlock; + } + + if (port) + region_cb = &devlink_region_port_direct_fill; + else + region_cb = &devlink_region_direct_fill; + region_cb_priv = region; + } else { + struct devlink_snapshot *snapshot; + u32 snapshot_id; + + if (nla_get_flag(attrs[DEVLINK_ATTR_REGION_DIRECT])) { + NL_SET_ERR_MSG_ATTR(cb->extack, snapshot_attr, "Direct region read does not use snapshot"); + err = -EINVAL; + goto out_unlock; + } + + snapshot_id = nla_get_u32(snapshot_attr); + snapshot = devlink_region_snapshot_get_by_id(region, snapshot_id); + if (!snapshot) { + NL_SET_ERR_MSG_ATTR(cb->extack, snapshot_attr, "Requested snapshot does not exist"); + err = -EINVAL; + goto out_unlock; + } + region_cb = &devlink_region_snapshot_fill; + region_cb_priv = snapshot; } if (attrs[DEVLINK_ATTR_REGION_CHUNK_ADDR] && @@ -6633,9 +6678,9 @@ static int devlink_nl_cmd_region_read_dumpit(struct sk_buff *skb, goto nla_put_failure; } - err = devlink_nl_region_read_fill(skb, &devlink_region_snapshot_fill, - snapshot, start_offset, end_offset, - &ret_offset, cb->extack); + err = devlink_nl_region_read_fill(skb, region_cb, region_cb_priv, + start_offset, end_offset, &ret_offset, + cb->extack); if (err && err != -EMSGSIZE) goto nla_put_failure; @@ -9280,6 +9325,7 @@ static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = { [DEVLINK_ATTR_SELFTESTS] = { .type = NLA_NESTED }, [DEVLINK_ATTR_RATE_TX_PRIORITY] = { .type = NLA_U32 }, [DEVLINK_ATTR_RATE_TX_WEIGHT] = { .type = NLA_U32 }, + [DEVLINK_ATTR_REGION_DIRECT] = { .type = NLA_FLAG }, }; static const struct genl_small_ops devlink_nl_ops[] = { -- cgit v1.2.3 From 226bf980550627c88549b112ac6c8fb40873afb4 Mon Sep 17 00:00:00 2001 From: Vincent Mailhol Date: Tue, 29 Nov 2022 18:51:38 +0900 Subject: net: devlink: let the core report the driver name instead of the drivers The driver name is available in device_driver::name. Right now, drivers still have to report this piece of information themselves in their devlink_ops::info_get callback function. In order to factorize code, make devlink_nl_info_fill() add the driver name attribute. Now that the core sets the driver name attribute, drivers are not supposed to call devlink_info_driver_name_put() anymore. Remove devlink_info_driver_name_put() and clean-up all the drivers using this function in their callback. Signed-off-by: Vincent Mailhol Tested-by: Ido Schimmel # mlxsw Reviewed-by: Jacob Keller Reviewed-by: Jiri Pirko Signed-off-by: Jakub Kicinski --- .../crypto/marvell/octeontx2/otx2_cpt_devlink.c | 4 ---- drivers/net/dsa/hirschmann/hellcreek.c | 5 ----- drivers/net/dsa/mv88e6xxx/devlink.c | 5 ----- drivers/net/dsa/sja1105/sja1105_devlink.c | 12 +++------- drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c | 4 ---- .../ethernet/freescale/dpaa2/dpaa2-eth-devlink.c | 11 +-------- .../net/ethernet/fungible/funeth/funeth_devlink.c | 2 +- .../ethernet/hisilicon/hns3/hns3pf/hclge_devlink.c | 5 ----- .../hisilicon/hns3/hns3vf/hclgevf_devlink.c | 5 ----- drivers/net/ethernet/intel/ice/ice_devlink.c | 6 ----- .../ethernet/marvell/octeontx2/af/rvu_devlink.c | 2 +- .../ethernet/marvell/octeontx2/nic/otx2_devlink.c | 9 +------- .../ethernet/marvell/prestera/prestera_devlink.c | 5 ----- drivers/net/ethernet/mellanox/mlx5/core/devlink.c | 4 ---- drivers/net/ethernet/mellanox/mlxsw/core.c | 5 ----- drivers/net/ethernet/netronome/nfp/nfp_devlink.c | 4 ---- .../net/ethernet/pensando/ionic/ionic_devlink.c | 4 ---- drivers/net/ethernet/qlogic/qed/qed_devlink.c | 4 ---- drivers/net/netdevsim/dev.c | 3 --- drivers/ptp/ptp_ocp.c | 4 ---- include/net/devlink.h | 2 -- net/core/devlink.c | 26 +++++++++++++++------- 22 files changed, 25 insertions(+), 106 deletions(-) (limited to 'include/net/devlink.h') diff --git a/drivers/crypto/marvell/octeontx2/otx2_cpt_devlink.c b/drivers/crypto/marvell/octeontx2/otx2_cpt_devlink.c index 7503f6b18ac5..a2aba0b0d68a 100644 --- a/drivers/crypto/marvell/octeontx2/otx2_cpt_devlink.c +++ b/drivers/crypto/marvell/octeontx2/otx2_cpt_devlink.c @@ -76,10 +76,6 @@ static int otx2_cpt_devlink_info_get(struct devlink *dl, struct otx2_cptpf_dev *cptpf = cpt_dl->cptpf; int err; - err = devlink_info_driver_name_put(req, "rvu_cptpf"); - if (err) - return err; - err = otx2_cpt_dl_info_firmware_version_put(req, cptpf->eng_grps.grp, "fw.ae", OTX2_CPT_AE_TYPES); if (err) diff --git a/drivers/net/dsa/hirschmann/hellcreek.c b/drivers/net/dsa/hirschmann/hellcreek.c index 951f7935c872..595a548bb0a8 100644 --- a/drivers/net/dsa/hirschmann/hellcreek.c +++ b/drivers/net/dsa/hirschmann/hellcreek.c @@ -1176,11 +1176,6 @@ static int hellcreek_devlink_info_get(struct dsa_switch *ds, struct netlink_ext_ack *extack) { struct hellcreek *hellcreek = ds->priv; - int ret; - - ret = devlink_info_driver_name_put(req, "hellcreek"); - if (ret) - return ret; return devlink_info_version_fixed_put(req, DEVLINK_INFO_VERSION_GENERIC_ASIC_ID, diff --git a/drivers/net/dsa/mv88e6xxx/devlink.c b/drivers/net/dsa/mv88e6xxx/devlink.c index 1266eabee086..a08dab75e0c0 100644 --- a/drivers/net/dsa/mv88e6xxx/devlink.c +++ b/drivers/net/dsa/mv88e6xxx/devlink.c @@ -821,11 +821,6 @@ int mv88e6xxx_devlink_info_get(struct dsa_switch *ds, struct netlink_ext_ack *extack) { struct mv88e6xxx_chip *chip = ds->priv; - int err; - - err = devlink_info_driver_name_put(req, "mv88e6xxx"); - if (err) - return err; return devlink_info_version_fixed_put(req, DEVLINK_INFO_VERSION_GENERIC_ASIC_ID, diff --git a/drivers/net/dsa/sja1105/sja1105_devlink.c b/drivers/net/dsa/sja1105/sja1105_devlink.c index 10c6fea1227f..da532614f34a 100644 --- a/drivers/net/dsa/sja1105/sja1105_devlink.c +++ b/drivers/net/dsa/sja1105/sja1105_devlink.c @@ -120,16 +120,10 @@ int sja1105_devlink_info_get(struct dsa_switch *ds, struct netlink_ext_ack *extack) { struct sja1105_private *priv = ds->priv; - int rc; - - rc = devlink_info_driver_name_put(req, "sja1105"); - if (rc) - return rc; - rc = devlink_info_version_fixed_put(req, - DEVLINK_INFO_VERSION_GENERIC_ASIC_ID, - priv->info->name); - return rc; + return devlink_info_version_fixed_put(req, + DEVLINK_INFO_VERSION_GENERIC_ASIC_ID, + priv->info->name); } int sja1105_devlink_setup(struct dsa_switch *ds) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c index 8a6f788f6294..26913dc816d3 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c @@ -892,10 +892,6 @@ static int bnxt_dl_info_get(struct devlink *dl, struct devlink_info_req *req, u32 ver = 0; int rc; - rc = devlink_info_driver_name_put(req, DRV_MODULE_NAME); - if (rc) - return rc; - if (BNXT_PF(bp) && (bp->flags & BNXT_FLAG_DSN_VALID)) { sprintf(buf, "%02X-%02X-%02X-%02X-%02X-%02X-%02X-%02X", bp->dsn[7], bp->dsn[6], bp->dsn[5], bp->dsn[4], diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth-devlink.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth-devlink.c index 5c6dd3029e2f..76f808d38066 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth-devlink.c +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth-devlink.c @@ -37,18 +37,9 @@ static int dpaa2_eth_dl_info_get(struct devlink *devlink, struct dpaa2_eth_devlink_priv *dl_priv = devlink_priv(devlink); struct dpaa2_eth_priv *priv = dl_priv->dpaa2_priv; char buf[10]; - int err; - - err = devlink_info_driver_name_put(req, KBUILD_MODNAME); - if (err) - return err; scnprintf(buf, 10, "%d.%d", priv->dpni_ver_major, priv->dpni_ver_minor); - err = devlink_info_version_running_put(req, "dpni", buf); - if (err) - return err; - - return 0; + return devlink_info_version_running_put(req, "dpni", buf); } static struct dpaa2_eth_trap_item * diff --git a/drivers/net/ethernet/fungible/funeth/funeth_devlink.c b/drivers/net/ethernet/fungible/funeth/funeth_devlink.c index d50c222948b4..6668375edff6 100644 --- a/drivers/net/ethernet/fungible/funeth/funeth_devlink.c +++ b/drivers/net/ethernet/fungible/funeth/funeth_devlink.c @@ -6,7 +6,7 @@ static int fun_dl_info_get(struct devlink *dl, struct devlink_info_req *req, struct netlink_ext_ack *extack) { - return devlink_info_driver_name_put(req, KBUILD_MODNAME); + return 0; } static const struct devlink_ops fun_dl_ops = { diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_devlink.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_devlink.c index 4c441e6a5082..3d3b69605423 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_devlink.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_devlink.c @@ -13,11 +13,6 @@ static int hclge_devlink_info_get(struct devlink *devlink, struct hclge_devlink_priv *priv = devlink_priv(devlink); char version_str[HCLGE_DEVLINK_FW_STRING_LEN]; struct hclge_dev *hdev = priv->hdev; - int ret; - - ret = devlink_info_driver_name_put(req, KBUILD_MODNAME); - if (ret) - return ret; snprintf(version_str, sizeof(version_str), "%lu.%lu.%lu.%lu", hnae3_get_field(hdev->fw_version, HNAE3_FW_VERSION_BYTE3_MASK, diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_devlink.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_devlink.c index fdc19868b818..a6c3c5e8f0ab 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_devlink.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_devlink.c @@ -13,11 +13,6 @@ static int hclgevf_devlink_info_get(struct devlink *devlink, struct hclgevf_devlink_priv *priv = devlink_priv(devlink); char version_str[HCLGEVF_DEVLINK_FW_STRING_LEN]; struct hclgevf_dev *hdev = priv->hdev; - int ret; - - ret = devlink_info_driver_name_put(req, KBUILD_MODNAME); - if (ret) - return ret; snprintf(version_str, sizeof(version_str), "%lu.%lu.%lu.%lu", hnae3_get_field(hdev->fw_version, HNAE3_FW_VERSION_BYTE3_MASK, diff --git a/drivers/net/ethernet/intel/ice/ice_devlink.c b/drivers/net/ethernet/intel/ice/ice_devlink.c index 946d64e577c9..8286e47b4bae 100644 --- a/drivers/net/ethernet/intel/ice/ice_devlink.c +++ b/drivers/net/ethernet/intel/ice/ice_devlink.c @@ -311,12 +311,6 @@ static int ice_devlink_info_get(struct devlink *devlink, } } - err = devlink_info_driver_name_put(req, KBUILD_MODNAME); - if (err) { - NL_SET_ERR_MSG_MOD(extack, "Unable to set driver name"); - goto out_free_ctx; - } - ice_info_get_dsn(pf, ctx); err = devlink_info_serial_number_put(req, ctx->buf); diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.c index 88dee589cb21..f15439d26d21 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.c @@ -1550,7 +1550,7 @@ static int rvu_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode, static int rvu_devlink_info_get(struct devlink *devlink, struct devlink_info_req *req, struct netlink_ext_ack *extack) { - return devlink_info_driver_name_put(req, DRV_NAME); + return 0; } static const struct devlink_ops rvu_devlink_ops = { diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_devlink.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_devlink.c index 777a27047c8e..5cc6416cf1a6 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_devlink.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_devlink.c @@ -77,18 +77,11 @@ static const struct devlink_param otx2_dl_params[] = { otx2_dl_mcam_count_validate), }; -/* Devlink OPs */ static int otx2_devlink_info_get(struct devlink *devlink, struct devlink_info_req *req, struct netlink_ext_ack *extack) { - struct otx2_devlink *otx2_dl = devlink_priv(devlink); - struct otx2_nic *pfvf = otx2_dl->pfvf; - - if (is_otx2_vf(pfvf->pcifunc)) - return devlink_info_driver_name_put(req, "rvu_nicvf"); - - return devlink_info_driver_name_put(req, "rvu_nicpf"); + return 0; } static const struct devlink_ops otx2_devlink_ops = { diff --git a/drivers/net/ethernet/marvell/prestera/prestera_devlink.c b/drivers/net/ethernet/marvell/prestera/prestera_devlink.c index 84ad05c9f12d..2a4c9df4eb79 100644 --- a/drivers/net/ethernet/marvell/prestera/prestera_devlink.c +++ b/drivers/net/ethernet/marvell/prestera/prestera_devlink.c @@ -355,11 +355,6 @@ static int prestera_dl_info_get(struct devlink *dl, { struct prestera_switch *sw = devlink_priv(dl); char buf[16]; - int err; - - err = devlink_info_driver_name_put(req, PRESTERA_DRV_NAME); - if (err) - return err; snprintf(buf, sizeof(buf), "%d.%d.%d", sw->dev->fw_rev.maj, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c index cc2ae427dcb0..751bc4a9edcf 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c @@ -46,10 +46,6 @@ mlx5_devlink_info_get(struct devlink *devlink, struct devlink_info_req *req, u32 running_fw, stored_fw; int err; - err = devlink_info_driver_name_put(req, KBUILD_MODNAME); - if (err) - return err; - err = devlink_info_version_fixed_put(req, "fw.psid", dev->board_id); if (err) return err; diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.c b/drivers/net/ethernet/mellanox/mlxsw/core.c index a83f6bc30072..a0a06e2eff82 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core.c @@ -1459,11 +1459,6 @@ mlxsw_devlink_info_get(struct devlink *devlink, struct devlink_info_req *req, char buf[32]; int err; - err = devlink_info_driver_name_put(req, - mlxsw_core->bus_info->device_kind); - if (err) - return err; - mlxsw_reg_mgir_pack(mgir_pl); err = mlxsw_reg_query(mlxsw_core, MLXSW_REG(mgir), mgir_pl); if (err) diff --git a/drivers/net/ethernet/netronome/nfp/nfp_devlink.c b/drivers/net/ethernet/netronome/nfp/nfp_devlink.c index 784f23602a8a..bf6bae557158 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_devlink.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_devlink.c @@ -239,10 +239,6 @@ nfp_devlink_info_get(struct devlink *devlink, struct devlink_info_req *req, char *buf = NULL; int err; - err = devlink_info_driver_name_put(req, "nfp"); - if (err) - return err; - vendor = nfp_hwinfo_lookup(pf->hwinfo, "assembly.vendor"); part = nfp_hwinfo_lookup(pf->hwinfo, "assembly.partno"); sn = nfp_hwinfo_lookup(pf->hwinfo, "assembly.serial"); diff --git a/drivers/net/ethernet/pensando/ionic/ionic_devlink.c b/drivers/net/ethernet/pensando/ionic/ionic_devlink.c index 567f778433e2..e6ff757895ab 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_devlink.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_devlink.c @@ -26,10 +26,6 @@ static int ionic_dl_info_get(struct devlink *dl, struct devlink_info_req *req, char buf[16]; int err = 0; - err = devlink_info_driver_name_put(req, IONIC_DRV_NAME); - if (err) - return err; - err = devlink_info_version_running_put(req, DEVLINK_INFO_VERSION_GENERIC_FW, idev->dev_info.fw_version); diff --git a/drivers/net/ethernet/qlogic/qed/qed_devlink.c b/drivers/net/ethernet/qlogic/qed/qed_devlink.c index 6bb4e165b592..922c47797af6 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_devlink.c +++ b/drivers/net/ethernet/qlogic/qed/qed_devlink.c @@ -162,10 +162,6 @@ static int qed_devlink_info_get(struct devlink *devlink, dev_info = &cdev->common_dev_info; - err = devlink_info_driver_name_put(req, KBUILD_MODNAME); - if (err) - return err; - memcpy(buf, cdev->hwfns[0].hw_info.part_num, sizeof(cdev->hwfns[0].hw_info.part_num)); buf[sizeof(cdev->hwfns[0].hw_info.part_num)] = 0; diff --git a/drivers/net/netdevsim/dev.c b/drivers/net/netdevsim/dev.c index e14686594a71..b962fc8e1397 100644 --- a/drivers/net/netdevsim/dev.c +++ b/drivers/net/netdevsim/dev.c @@ -994,9 +994,6 @@ static int nsim_dev_info_get(struct devlink *devlink, { int err; - err = devlink_info_driver_name_put(req, DRV_NAME); - if (err) - return err; err = devlink_info_version_stored_put_ext(req, "fw.mgmt", "10.20.30", DEVLINK_INFO_VERSION_TYPE_COMPONENT); if (err) diff --git a/drivers/ptp/ptp_ocp.c b/drivers/ptp/ptp_ocp.c index 154d58cbd9ce..4bbaccd543ad 100644 --- a/drivers/ptp/ptp_ocp.c +++ b/drivers/ptp/ptp_ocp.c @@ -1647,10 +1647,6 @@ ptp_ocp_devlink_info_get(struct devlink *devlink, struct devlink_info_req *req, char buf[32]; int err; - err = devlink_info_driver_name_put(req, KBUILD_MODNAME); - if (err) - return err; - fw_image = bp->fw_loader ? "loader" : "fw"; sprintf(buf, "%d.%d", bp->fw_tag, bp->fw_version); err = devlink_info_version_running_put(req, fw_image, buf); diff --git a/include/net/devlink.h b/include/net/devlink.h index 02528f736f65..5f6eca5e4a40 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -1762,8 +1762,6 @@ int devlink_region_snapshot_create(struct devlink_region *region, u8 *data, u32 snapshot_id); int devlink_info_serial_number_put(struct devlink_info_req *req, const char *sn); -int devlink_info_driver_name_put(struct devlink_info_req *req, - const char *name); int devlink_info_board_serial_number_put(struct devlink_info_req *req, const char *bsn); diff --git a/net/core/devlink.c b/net/core/devlink.c index 298041a44aa8..60eb0f46520f 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -6707,14 +6707,6 @@ out_unlock: return err; } -int devlink_info_driver_name_put(struct devlink_info_req *req, const char *name) -{ - if (!req->msg) - return 0; - return nla_put_string(req->msg, DEVLINK_ATTR_INFO_DRIVER_NAME, name); -} -EXPORT_SYMBOL_GPL(devlink_info_driver_name_put); - int devlink_info_serial_number_put(struct devlink_info_req *req, const char *sn) { if (!req->msg) @@ -6823,11 +6815,25 @@ int devlink_info_version_running_put_ext(struct devlink_info_req *req, } EXPORT_SYMBOL_GPL(devlink_info_version_running_put_ext); +static int devlink_nl_driver_info_get(struct device_driver *drv, + struct devlink_info_req *req) +{ + if (!drv) + return 0; + + if (drv->name[0]) + return nla_put_string(req->msg, DEVLINK_ATTR_INFO_DRIVER_NAME, + drv->name); + + return 0; +} + static int devlink_nl_info_fill(struct sk_buff *msg, struct devlink *devlink, enum devlink_command cmd, u32 portid, u32 seq, int flags, struct netlink_ext_ack *extack) { + struct device *dev = devlink_to_dev(devlink); struct devlink_info_req req = {}; void *hdr; int err; @@ -6845,6 +6851,10 @@ devlink_nl_info_fill(struct sk_buff *msg, struct devlink *devlink, if (err) goto err_cancel_msg; + err = devlink_nl_driver_info_get(dev->driver, &req); + if (err) + goto err_cancel_msg; + genlmsg_end(msg, hdr); return 0; -- cgit v1.2.3 From da65e9ff3bf614d2836e38e1d405c7073e6ba3b7 Mon Sep 17 00:00:00 2001 From: Shay Drory Date: Tue, 6 Dec 2022 20:51:15 +0200 Subject: devlink: Expose port function commands to control RoCE Expose port function commands to enable / disable RoCE, this is used to control the port RoCE device capabilities. When RoCE is disabled for a function of the port, function cannot create any RoCE specific resources (e.g GID table). It also saves system memory utilization. For example disabling RoCE enable a VF/SF saves 1 Mbytes of system memory per function. Example of a PCI VF port which supports function configuration: Set RoCE of the VF's port function. $ devlink port show pci/0000:06:00.0/2 pci/0000:06:00.0/2: type eth netdev enp6s0pf0vf1 flavour pcivf pfnum 0 vfnum 1 function: hw_addr 00:00:00:00:00:00 roce enable $ devlink port function set pci/0000:06:00.0/2 roce disable $ devlink port show pci/0000:06:00.0/2 pci/0000:06:00.0/2: type eth netdev enp6s0pf0vf1 flavour pcivf pfnum 0 vfnum 1 function: hw_addr 00:00:00:00:00:00 roce disable Signed-off-by: Shay Drory Reviewed-by: Jiri Pirko Signed-off-by: Jakub Kicinski --- Documentation/networking/devlink/devlink-port.rst | 34 ++++++- include/net/devlink.h | 18 ++++ include/uapi/linux/devlink.h | 10 ++ net/core/devlink.c | 113 ++++++++++++++++++++++ 4 files changed, 174 insertions(+), 1 deletion(-) (limited to 'include/net/devlink.h') diff --git a/Documentation/networking/devlink/devlink-port.rst b/Documentation/networking/devlink/devlink-port.rst index 2c637f4aae8e..c3302d23e480 100644 --- a/Documentation/networking/devlink/devlink-port.rst +++ b/Documentation/networking/devlink/devlink-port.rst @@ -110,7 +110,7 @@ devlink ports for both the controllers. Function configuration ====================== -A user can configure the function attribute before enumerating the PCI +Users can configure one or more function attributes before enumerating the PCI function. Usually it means, user should configure function attribute before a bus specific device for the function is created. However, when SRIOV is enabled, virtual function devices are created on the PCI bus. @@ -122,6 +122,9 @@ A user may set the hardware address of the function using `devlink port function set hw_addr` command. For Ethernet port function this means a MAC address. +Users may also set the RoCE capability of the function using +`devlink port function set roce` command. + Function attributes =================== @@ -162,6 +165,35 @@ device created for the PCI VF/SF. function: hw_addr 00:00:00:00:88:88 +RoCE capability setup +--------------------- +Not all PCI VFs/SFs require RoCE capability. + +When RoCE capability is disabled, it saves system memory per PCI VF/SF. + +When user disables RoCE capability for a VF/SF, user application cannot send or +receive any RoCE packets through this VF/SF and RoCE GID table for this PCI +will be empty. + +When RoCE capability is disabled in the device using port function attribute, +VF/SF driver cannot override it. + +- Get RoCE capability of the VF device:: + + $ devlink port show pci/0000:06:00.0/2 + pci/0000:06:00.0/2: type eth netdev enp6s0pf0vf1 flavour pcivf pfnum 0 vfnum 1 + function: + hw_addr 00:00:00:00:00:00 roce enable + +- Set RoCE capability of the VF device:: + + $ devlink port function set pci/0000:06:00.0/2 roce disable + + $ devlink port show pci/0000:06:00.0/2 + pci/0000:06:00.0/2: type eth netdev enp6s0pf0vf1 flavour pcivf pfnum 0 vfnum 1 + function: + hw_addr 00:00:00:00:00:00 roce disable + Subfunction ============ diff --git a/include/net/devlink.h b/include/net/devlink.h index 5f6eca5e4a40..ce4c65d2f2e7 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -1451,6 +1451,24 @@ struct devlink_ops { int (*port_function_hw_addr_set)(struct devlink_port *port, const u8 *hw_addr, int hw_addr_len, struct netlink_ext_ack *extack); + /** + * @port_fn_roce_get: Port function's roce get function. + * + * Query RoCE state of a function managed by the devlink port. + * Return -EOPNOTSUPP if port function RoCE handling is not supported. + */ + int (*port_fn_roce_get)(struct devlink_port *devlink_port, + bool *is_enable, + struct netlink_ext_ack *extack); + /** + * @port_fn_roce_set: Port function's roce set function. + * + * Enable/Disable the RoCE state of a function managed by the devlink + * port. + * Return -EOPNOTSUPP if port function RoCE handling is not supported. + */ + int (*port_fn_roce_set)(struct devlink_port *devlink_port, + bool enable, struct netlink_ext_ack *extack); /** * port_new() - Add a new port function of a specified flavor * @devlink: Devlink instance diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h index 70191d96af89..6cc2925bd478 100644 --- a/include/uapi/linux/devlink.h +++ b/include/uapi/linux/devlink.h @@ -658,11 +658,21 @@ enum devlink_resource_unit { DEVLINK_RESOURCE_UNIT_ENTRY, }; +enum devlink_port_fn_attr_cap { + DEVLINK_PORT_FN_ATTR_CAP_ROCE_BIT, + + /* Add new caps above */ + __DEVLINK_PORT_FN_ATTR_CAPS_MAX, +}; + +#define DEVLINK_PORT_FN_CAP_ROCE _BITUL(DEVLINK_PORT_FN_ATTR_CAP_ROCE_BIT) + enum devlink_port_function_attr { DEVLINK_PORT_FUNCTION_ATTR_UNSPEC, DEVLINK_PORT_FUNCTION_ATTR_HW_ADDR, /* binary */ DEVLINK_PORT_FN_ATTR_STATE, /* u8 */ DEVLINK_PORT_FN_ATTR_OPSTATE, /* u8 */ + DEVLINK_PORT_FN_ATTR_CAPS, /* bitfield32 */ __DEVLINK_PORT_FUNCTION_ATTR_MAX, DEVLINK_PORT_FUNCTION_ATTR_MAX = __DEVLINK_PORT_FUNCTION_ATTR_MAX - 1 diff --git a/net/core/devlink.c b/net/core/devlink.c index 035249c5dd17..8c0ad52431c5 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -195,11 +195,16 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(devlink_hwmsg); EXPORT_TRACEPOINT_SYMBOL_GPL(devlink_hwerr); EXPORT_TRACEPOINT_SYMBOL_GPL(devlink_trap_report); +#define DEVLINK_PORT_FN_CAPS_VALID_MASK \ + (_BITUL(__DEVLINK_PORT_FN_ATTR_CAPS_MAX) - 1) + static const struct nla_policy devlink_function_nl_policy[DEVLINK_PORT_FUNCTION_ATTR_MAX + 1] = { [DEVLINK_PORT_FUNCTION_ATTR_HW_ADDR] = { .type = NLA_BINARY }, [DEVLINK_PORT_FN_ATTR_STATE] = NLA_POLICY_RANGE(NLA_U8, DEVLINK_PORT_FN_STATE_INACTIVE, DEVLINK_PORT_FN_STATE_ACTIVE), + [DEVLINK_PORT_FN_ATTR_CAPS] = + NLA_POLICY_BITFIELD32(DEVLINK_PORT_FN_CAPS_VALID_MASK), }; static const struct nla_policy devlink_selftest_nl_policy[DEVLINK_ATTR_SELFTEST_ID_MAX + 1] = { @@ -680,6 +685,60 @@ devlink_sb_tc_index_get_from_attrs(struct devlink_sb *devlink_sb, return 0; } +static void devlink_port_fn_cap_fill(struct nla_bitfield32 *caps, + u32 cap, bool is_enable) +{ + caps->selector |= cap; + if (is_enable) + caps->value |= cap; +} + +static int devlink_port_fn_roce_fill(const struct devlink_ops *ops, + struct devlink_port *devlink_port, + struct nla_bitfield32 *caps, + struct netlink_ext_ack *extack) +{ + bool is_enable; + int err; + + if (!ops->port_fn_roce_get) + return 0; + + err = ops->port_fn_roce_get(devlink_port, &is_enable, extack); + if (err) { + if (err == -EOPNOTSUPP) + return 0; + return err; + } + + devlink_port_fn_cap_fill(caps, DEVLINK_PORT_FN_CAP_ROCE, is_enable); + return 0; +} + +static int devlink_port_fn_caps_fill(const struct devlink_ops *ops, + struct devlink_port *devlink_port, + struct sk_buff *msg, + struct netlink_ext_ack *extack, + bool *msg_updated) +{ + struct nla_bitfield32 caps = {}; + int err; + + err = devlink_port_fn_roce_fill(ops, devlink_port, &caps, extack); + if (err) + return err; + + if (!caps.selector) + return 0; + err = nla_put_bitfield32(msg, DEVLINK_PORT_FN_ATTR_CAPS, caps.value, + caps.selector); + if (err) + return err; + + *msg_updated = true; + return 0; +} + static int devlink_sb_tc_index_get_from_info(struct devlink_sb *devlink_sb, struct genl_info *info, @@ -1263,6 +1322,35 @@ static int devlink_port_fn_state_fill(const struct devlink_ops *ops, return 0; } +static int +devlink_port_fn_roce_set(struct devlink_port *devlink_port, bool enable, + struct netlink_ext_ack *extack) +{ + const struct devlink_ops *ops = devlink_port->devlink->ops; + + return ops->port_fn_roce_set(devlink_port, enable, extack); +} + +static int devlink_port_fn_caps_set(struct devlink_port *devlink_port, + const struct nlattr *attr, + struct netlink_ext_ack *extack) +{ + struct nla_bitfield32 caps; + u32 caps_value; + int err; + + caps = nla_get_bitfield32(attr); + caps_value = caps.value & caps.selector; + if (caps.selector & DEVLINK_PORT_FN_CAP_ROCE) { + err = devlink_port_fn_roce_set(devlink_port, + caps_value & DEVLINK_PORT_FN_CAP_ROCE, + extack); + if (err) + return err; + } + return 0; +} + static int devlink_nl_port_function_attrs_put(struct sk_buff *msg, struct devlink_port *port, struct netlink_ext_ack *extack) @@ -1281,6 +1369,10 @@ devlink_nl_port_function_attrs_put(struct sk_buff *msg, struct devlink_port *por &msg_updated); if (err) goto out; + err = devlink_port_fn_caps_fill(ops, port, msg, extack, + &msg_updated); + if (err) + goto out; err = devlink_port_fn_state_fill(ops, port, msg, extack, &msg_updated); out: if (err || !msg_updated) @@ -1653,6 +1745,7 @@ static int devlink_port_function_validate(struct devlink_port *devlink_port, struct netlink_ext_ack *extack) { const struct devlink_ops *ops = devlink_port->devlink->ops; + struct nlattr *attr; if (tb[DEVLINK_PORT_FUNCTION_ATTR_HW_ADDR] && !ops->port_function_hw_addr_set) { @@ -1665,6 +1758,18 @@ static int devlink_port_function_validate(struct devlink_port *devlink_port, "Function does not support state setting"); return -EOPNOTSUPP; } + attr = tb[DEVLINK_PORT_FN_ATTR_CAPS]; + if (attr) { + struct nla_bitfield32 caps; + + caps = nla_get_bitfield32(attr); + if (caps.selector & DEVLINK_PORT_FN_CAP_ROCE && + !ops->port_fn_roce_set) { + NL_SET_ERR_MSG_ATTR(extack, attr, + "Port doesn't support RoCE function attribute"); + return -EOPNOTSUPP; + } + } return 0; } @@ -1692,6 +1797,14 @@ static int devlink_port_function_set(struct devlink_port *port, if (err) return err; } + + attr = tb[DEVLINK_PORT_FN_ATTR_CAPS]; + if (attr) { + err = devlink_port_fn_caps_set(port, attr, extack); + if (err) + return err; + } + /* Keep this as the last function attribute set, so that when * multiple port function attributes are set along with state, * Those can be applied first before activating the state. -- cgit v1.2.3 From a8ce7b26a51efc4d7753b23d639ae092878a6193 Mon Sep 17 00:00:00 2001 From: Shay Drory Date: Tue, 6 Dec 2022 20:51:18 +0200 Subject: devlink: Expose port function commands to control migratable Expose port function commands to enable / disable migratable capability, this is used to set the port function as migratable. Live migration is the process of transferring a live virtual machine from one physical host to another without disrupting its normal operation. In order for a VM to be able to perform LM, all the VM components must be able to perform migration. e.g.: to be migratable. In order for VF to be migratable, VF must be bound to VFIO driver with migration support. When migratable capability is enabled for a function of the port, the device is making the necessary preparations for the function to be migratable, which might include disabling features which cannot be migrated. Example of LM with migratable function configuration: Set migratable of the VF's port function. $ devlink port show pci/0000:06:00.0/2 pci/0000:06:00.0/2: type eth netdev enp6s0pf0vf1 flavour pcivf pfnum 0 vfnum 1 function: hw_addr 00:00:00:00:00:00 migratable disable $ devlink port function set pci/0000:06:00.0/2 migratable enable $ devlink port show pci/0000:06:00.0/2 pci/0000:06:00.0/2: type eth netdev enp6s0pf0vf1 flavour pcivf pfnum 0 vfnum 1 function: hw_addr 00:00:00:00:00:00 migratable enable Bind VF to VFIO driver with migration support: $ echo > /sys/bus/pci/devices/0000:08:00.0/driver/unbind $ echo mlx5_vfio_pci > /sys/bus/pci/devices/0000:08:00.0/driver_override $ echo > /sys/bus/pci/devices/0000:08:00.0/driver/bind Attach VF to the VM. Start the VM. Perform LM. Signed-off-by: Shay Drory Reviewed-by: Jiri Pirko Acked-by: Shannon Nelson Signed-off-by: Jakub Kicinski --- Documentation/networking/devlink/devlink-port.rst | 46 +++++++++++++++++++ include/net/devlink.h | 21 +++++++++ include/uapi/linux/devlink.h | 3 ++ net/core/devlink.c | 55 +++++++++++++++++++++++ 4 files changed, 125 insertions(+) (limited to 'include/net/devlink.h') diff --git a/Documentation/networking/devlink/devlink-port.rst b/Documentation/networking/devlink/devlink-port.rst index c3302d23e480..3da590953ce8 100644 --- a/Documentation/networking/devlink/devlink-port.rst +++ b/Documentation/networking/devlink/devlink-port.rst @@ -125,6 +125,9 @@ this means a MAC address. Users may also set the RoCE capability of the function using `devlink port function set roce` command. +Users may also set the function as migratable using +'devlink port function set migratable' command. + Function attributes =================== @@ -194,6 +197,49 @@ VF/SF driver cannot override it. function: hw_addr 00:00:00:00:00:00 roce disable +migratable capability setup +--------------------------- +Live migration is the process of transferring a live virtual machine +from one physical host to another without disrupting its normal +operation. + +User who want PCI VFs to be able to perform live migration need to +explicitly enable the VF migratable capability. + +When user enables migratable capability for a VF, and the HV binds the VF to VFIO driver +with migration support, the user can migrate the VM with this VF from one HV to a +different one. + +However, when migratable capability is enable, device will disable features which cannot +be migrated. Thus migratable cap can impose limitations on a VF so let the user decide. + +Example of LM with migratable function configuration: +- Get migratable capability of the VF device:: + + $ devlink port show pci/0000:06:00.0/2 + pci/0000:06:00.0/2: type eth netdev enp6s0pf0vf1 flavour pcivf pfnum 0 vfnum 1 + function: + hw_addr 00:00:00:00:00:00 migratable disable + +- Set migratable capability of the VF device:: + + $ devlink port function set pci/0000:06:00.0/2 migratable enable + + $ devlink port show pci/0000:06:00.0/2 + pci/0000:06:00.0/2: type eth netdev enp6s0pf0vf1 flavour pcivf pfnum 0 vfnum 1 + function: + hw_addr 00:00:00:00:00:00 migratable enable + +- Bind VF to VFIO driver with migration support:: + + $ echo > /sys/bus/pci/devices/0000:08:00.0/driver/unbind + $ echo mlx5_vfio_pci > /sys/bus/pci/devices/0000:08:00.0/driver_override + $ echo > /sys/bus/pci/devices/0000:08:00.0/driver/bind + +Attach VF to the VM. +Start the VM. +Perform live migration. + Subfunction ============ diff --git a/include/net/devlink.h b/include/net/devlink.h index ce4c65d2f2e7..0f376a28b9c4 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -1469,6 +1469,27 @@ struct devlink_ops { */ int (*port_fn_roce_set)(struct devlink_port *devlink_port, bool enable, struct netlink_ext_ack *extack); + /** + * @port_fn_migratable_get: Port function's migratable get function. + * + * Query migratable state of a function managed by the devlink port. + * Return -EOPNOTSUPP if port function migratable handling is not + * supported. + */ + int (*port_fn_migratable_get)(struct devlink_port *devlink_port, + bool *is_enable, + struct netlink_ext_ack *extack); + /** + * @port_fn_migratable_set: Port function's migratable set function. + * + * Enable/Disable migratable state of a function managed by the devlink + * port. + * Return -EOPNOTSUPP if port function migratable handling is not + * supported. + */ + int (*port_fn_migratable_set)(struct devlink_port *devlink_port, + bool enable, + struct netlink_ext_ack *extack); /** * port_new() - Add a new port function of a specified flavor * @devlink: Devlink instance diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h index 6cc2925bd478..3782d4219ac9 100644 --- a/include/uapi/linux/devlink.h +++ b/include/uapi/linux/devlink.h @@ -660,12 +660,15 @@ enum devlink_resource_unit { enum devlink_port_fn_attr_cap { DEVLINK_PORT_FN_ATTR_CAP_ROCE_BIT, + DEVLINK_PORT_FN_ATTR_CAP_MIGRATABLE_BIT, /* Add new caps above */ __DEVLINK_PORT_FN_ATTR_CAPS_MAX, }; #define DEVLINK_PORT_FN_CAP_ROCE _BITUL(DEVLINK_PORT_FN_ATTR_CAP_ROCE_BIT) +#define DEVLINK_PORT_FN_CAP_MIGRATABLE \ + _BITUL(DEVLINK_PORT_FN_ATTR_CAP_MIGRATABLE_BIT) enum devlink_port_function_attr { DEVLINK_PORT_FUNCTION_ATTR_UNSPEC, diff --git a/net/core/devlink.c b/net/core/devlink.c index 8c0ad52431c5..ab40ebcb4aea 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -715,6 +715,29 @@ static int devlink_port_fn_roce_fill(const struct devlink_ops *ops, return 0; } +static int devlink_port_fn_migratable_fill(const struct devlink_ops *ops, + struct devlink_port *devlink_port, + struct nla_bitfield32 *caps, + struct netlink_ext_ack *extack) +{ + bool is_enable; + int err; + + if (!ops->port_fn_migratable_get || + devlink_port->attrs.flavour != DEVLINK_PORT_FLAVOUR_PCI_VF) + return 0; + + err = ops->port_fn_migratable_get(devlink_port, &is_enable, extack); + if (err) { + if (err == -EOPNOTSUPP) + return 0; + return err; + } + + devlink_port_fn_cap_fill(caps, DEVLINK_PORT_FN_CAP_MIGRATABLE, is_enable); + return 0; +} + static int devlink_port_fn_caps_fill(const struct devlink_ops *ops, struct devlink_port *devlink_port, struct sk_buff *msg, @@ -728,6 +751,10 @@ static int devlink_port_fn_caps_fill(const struct devlink_ops *ops, if (err) return err; + err = devlink_port_fn_migratable_fill(ops, devlink_port, &caps, extack); + if (err) + return err; + if (!caps.selector) return 0; err = nla_put_bitfield32(msg, DEVLINK_PORT_FN_ATTR_CAPS, caps.value, @@ -1322,6 +1349,15 @@ static int devlink_port_fn_state_fill(const struct devlink_ops *ops, return 0; } +static int +devlink_port_fn_mig_set(struct devlink_port *devlink_port, bool enable, + struct netlink_ext_ack *extack) +{ + const struct devlink_ops *ops = devlink_port->devlink->ops; + + return ops->port_fn_migratable_set(devlink_port, enable, extack); +} + static int devlink_port_fn_roce_set(struct devlink_port *devlink_port, bool enable, struct netlink_ext_ack *extack) @@ -1348,6 +1384,13 @@ static int devlink_port_fn_caps_set(struct devlink_port *devlink_port, if (err) return err; } + if (caps.selector & DEVLINK_PORT_FN_CAP_MIGRATABLE) { + err = devlink_port_fn_mig_set(devlink_port, caps_value & + DEVLINK_PORT_FN_CAP_MIGRATABLE, + extack); + if (err) + return err; + } return 0; } @@ -1769,6 +1812,18 @@ static int devlink_port_function_validate(struct devlink_port *devlink_port, "Port doesn't support RoCE function attribute"); return -EOPNOTSUPP; } + if (caps.selector & DEVLINK_PORT_FN_CAP_MIGRATABLE) { + if (!ops->port_fn_migratable_set) { + NL_SET_ERR_MSG_ATTR(extack, attr, + "Port doesn't support migratable function attribute"); + return -EOPNOTSUPP; + } + if (devlink_port->attrs.flavour != DEVLINK_PORT_FLAVOUR_PCI_VF) { + NL_SET_ERR_MSG_ATTR(extack, attr, + "migratable function attribute supported for VFs only"); + return -EOPNOTSUPP; + } + } } return 0; } -- cgit v1.2.3 From 01d80532295cd359dc43e6bd71860d5515f84372 Mon Sep 17 00:00:00 2001 From: Vincent Mailhol Date: Thu, 1 Dec 2022 02:46:55 +0900 Subject: net: devlink: add DEVLINK_INFO_VERSION_GENERIC_FW_BOOTLOADER As discussed in [1], abbreviating the bootloader to "bl" might not be well understood. Instead, a bootloader technically being a firmware, name it "fw.bootloader". Add a new macro to devlink.h to formalize this new info attribute name and update the documentation. [1] https://lore.kernel.org/netdev/20221128142723.2f826d20@kernel.org/ Suggested-by: Jakub Kicinski Signed-off-by: Vincent Mailhol Link: https://lore.kernel.org/all/20221130174658.29282-5-mailhol.vincent@wanadoo.fr Signed-off-by: Marc Kleine-Budde --- Documentation/networking/devlink/devlink-info.rst | 5 +++++ include/net/devlink.h | 2 ++ 2 files changed, 7 insertions(+) (limited to 'include/net/devlink.h') diff --git a/Documentation/networking/devlink/devlink-info.rst b/Documentation/networking/devlink/devlink-info.rst index 7572bf6de5c1..1242b0e6826b 100644 --- a/Documentation/networking/devlink/devlink-info.rst +++ b/Documentation/networking/devlink/devlink-info.rst @@ -198,6 +198,11 @@ fw.bundle_id Unique identifier of the entire firmware bundle. +fw.bootloader +------------- + +Version of the bootloader. + Future work =========== diff --git a/include/net/devlink.h b/include/net/devlink.h index 0f376a28b9c4..6a2e4f21779f 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -621,6 +621,8 @@ enum devlink_param_generic_id { #define DEVLINK_INFO_VERSION_GENERIC_FW_ROCE "fw.roce" /* Firmware bundle identifier */ #define DEVLINK_INFO_VERSION_GENERIC_FW_BUNDLE_ID "fw.bundle_id" +/* Bootloader */ +#define DEVLINK_INFO_VERSION_GENERIC_FW_BOOTLOADER "fw.bootloader" /** * struct devlink_flash_update_params - Flash Update parameters -- cgit v1.2.3